diff --git a/spacy/lang/ta/__init__.py b/spacy/lang/ta/__init__.py index 81baaafe2..8b21b70c7 100644 --- a/spacy/lang/ta/__init__.py +++ b/spacy/lang/ta/__init__.py @@ -1,4 +1,6 @@ -# import language-specific data +# coding: utf8 +from __future__ import unicode_literals + from .stop_words import STOP_WORDS from .lex_attrs import LEX_ATTRS @@ -7,20 +9,16 @@ from ...language import Language from ...attrs import LANG from ...util import update_exc -# create Defaults class in the module scope (necessary for pickling!) + class TamilDefaults(Language.Defaults): lex_attr_getters = dict(Language.Defaults.lex_attr_getters) - lex_attr_getters[LANG] = lambda text: "ta" # language ISO code - - # optional: replace flags with custom functions, e.g. like_num() + lex_attr_getters[LANG] = lambda text: "ta" lex_attr_getters.update(LEX_ATTRS) -# create actual Language class class Tamil(Language): - lang = "ta" # language ISO code - Defaults = TamilDefaults # override defaults + lang = "ta" + Defaults = TamilDefaults -# set default export – this allows the language class to be lazy-loaded __all__ = ["Tamil"] diff --git a/spacy/lang/ta/lex_attrs.py b/spacy/lang/ta/lex_attrs.py index 0f17a5885..87def6162 100644 --- a/spacy/lang/ta/lex_attrs.py +++ b/spacy/lang/ta/lex_attrs.py @@ -1,5 +1,6 @@ # coding: utf8 from __future__ import unicode_literals + from ...attrs import LIKE_NUM @@ -81,4 +82,5 @@ def like_num(text): return False + LEX_ATTRS = {LIKE_NUM: like_num}