diff --git a/spacy/lang/ky/__init__.py b/spacy/lang/ky/__init__.py index 4656cfeb9..a333db035 100644 --- a/spacy/lang/ky/__init__.py +++ b/spacy/lang/ky/__init__.py @@ -1,25 +1,14 @@ -# coding: utf8 -from __future__ import unicode_literals - from .lex_attrs import LEX_ATTRS from .punctuation import TOKENIZER_INFIXES from .stop_words import STOP_WORDS from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS -from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...attrs import LANG from ...language import Language -from ...util import update_exc class KyrgyzDefaults(Language.Defaults): - lex_attr_getters = dict(Language.Defaults.lex_attr_getters) - lex_attr_getters[LANG] = lambda text: "ky" - - lex_attr_getters.update(LEX_ATTRS) - - tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) - infixes = tuple(TOKENIZER_INFIXES) - + tokenizer_exceptions = TOKENIZER_EXCEPTIONS + infixes = TOKENIZER_INFIXES + lex_attr_getters = LEX_ATTRS stop_words = STOP_WORDS diff --git a/spacy/lang/ky/examples.py b/spacy/lang/ky/examples.py index f1f31e3ab..ba77ea975 100644 --- a/spacy/lang/ky/examples.py +++ b/spacy/lang/ky/examples.py @@ -1,6 +1,3 @@ -# coding: utf8 -from __future__ import unicode_literals - """ Example sentences to test spaCy and its language models. >>> from spacy.lang.ky.examples import sentences diff --git a/spacy/lang/ky/lex_attrs.py b/spacy/lang/ky/lex_attrs.py index af926b138..bdf993482 100644 --- a/spacy/lang/ky/lex_attrs.py +++ b/spacy/lang/ky/lex_attrs.py @@ -1,6 +1,3 @@ -# coding: utf8 -from __future__ import unicode_literals - from ...attrs import LIKE_NUM _num_words = [ diff --git a/spacy/lang/ky/punctuation.py b/spacy/lang/ky/punctuation.py index 22c2061ca..fa9819f80 100644 --- a/spacy/lang/ky/punctuation.py +++ b/spacy/lang/ky/punctuation.py @@ -1,6 +1,3 @@ -# coding: utf8 -from __future__ import unicode_literals - from ..char_classes import ALPHA, ALPHA_LOWER, ALPHA_UPPER, CONCAT_QUOTES, HYPHENS from ..char_classes import LIST_ELLIPSES, LIST_ICONS diff --git a/spacy/lang/ky/stop_words.py b/spacy/lang/ky/stop_words.py index eede62767..ea40bdfa2 100644 --- a/spacy/lang/ky/stop_words.py +++ b/spacy/lang/ky/stop_words.py @@ -1,8 +1,5 @@ -# encoding: utf8 -from __future__ import unicode_literals - STOP_WORDS = set( -""" + """ ага адам айтты айтымында айтып ал алар алардын алган алуу алып анда андан аны анын ар diff --git a/spacy/lang/ky/tokenizer_exceptions.py b/spacy/lang/ky/tokenizer_exceptions.py index be5e9530c..eb367aeef 100644 --- a/spacy/lang/ky/tokenizer_exceptions.py +++ b/spacy/lang/ky/tokenizer_exceptions.py @@ -1,7 +1,6 @@ -# coding: utf8 -from __future__ import unicode_literals - +from ..tokenizer_exceptions import BASE_EXCEPTIONS from ...symbols import ORTH, LEMMA, NORM +from ...util import update_exc _exc = {} @@ -52,4 +51,4 @@ for exc_data in [ # "etc." abbreviations exc_data[LEMMA] = exc_data[NORM] _exc[exc_data[ORTH]] = [exc_data] -TOKENIZER_EXCEPTIONS = _exc +TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)