diff --git a/spacy/lang/xx/__init__.py b/spacy/lang/xx/__init__.py new file mode 100644 index 000000000..fef8c9d59 --- /dev/null +++ b/spacy/lang/xx/__init__.py @@ -0,0 +1,26 @@ +# coding: utf8 +from __future__ import unicode_literals + + +from ..tokenizer_exceptions import BASE_EXCEPTIONS +from ...language import Language +from ...attrs import LANG +from ...util import update_exc + + +class MultiLanguageDefaults(Language.Defaults): + lex_attr_getters = dict(Language.Defaults.lex_attr_getters) + lex_attr_getters[LANG] = lambda text: 'xx' + + tokenizer_exceptions = update_exc(BASE_EXCEPTIONS) + + +class MultiLanguage(Language): + """Language class to be used for models that support multiple languages. + This module allows models to specify their language ID as 'xx'. + """ + lang = 'xx' + Defaults = MultiLanguageDefaults + + +__all__ = ['MultiLanguage']