diff --git a/spacy/lang/th/tokenizer_exceptions.py b/spacy/lang/th/tokenizer_exceptions.py index 7e3967aed..c31595893 100644 --- a/spacy/lang/th/tokenizer_exceptions.py +++ b/spacy/lang/th/tokenizer_exceptions.py @@ -1,9 +1,7 @@ # encoding: utf8 from __future__ import unicode_literals -from ..symbols import * -from ..language_data import PRON_LEMMA - +from ...symbols import * TOKENIZER_EXCEPTIONS = { "ม.ค.": [ @@ -43,38 +41,3 @@ TOKENIZER_EXCEPTIONS = { {ORTH: "ธ.ค.", LEMMA: "ธันวาคม"} ] } - - -# exceptions mapped to a single token containing only ORTH property -# example: {"string": [{ORTH: "string"}]} -# converted using strings_to_exc() util -''' -ORTH_ONLY = [ - "a.", - "b.", - "c.", - "d.", - "e.", - "f.", - "g.", - "h.", - "i.", - "j.", - "k.", - "l.", - "m.", - "n.", - "o.", - "p.", - "q.", - "r.", - "s.", - "t.", - "u.", - "v.", - "w.", - "x.", - "y.", - "z." -] -''' \ No newline at end of file