From 84189c1cab1f8534597cbdf740a8ba51ac1d086a Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 28 May 2017 00:58:59 +0200 Subject: [PATCH] Add 'xx' language ID for multi-language support Allows models to specify their language ID as 'xx'. --- spacy/lang/xx/__init__.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 spacy/lang/xx/__init__.py diff --git a/spacy/lang/xx/__init__.py b/spacy/lang/xx/__init__.py new file mode 100644 index 000000000..fef8c9d59 --- /dev/null +++ b/spacy/lang/xx/__init__.py @@ -0,0 +1,26 @@ +# coding: utf8 +from __future__ import unicode_literals + + +from ..tokenizer_exceptions import BASE_EXCEPTIONS +from ...language import Language +from ...attrs import LANG +from ...util import update_exc + + +class MultiLanguageDefaults(Language.Defaults): + lex_attr_getters = dict(Language.Defaults.lex_attr_getters) + lex_attr_getters[LANG] = lambda text: 'xx' + + tokenizer_exceptions = update_exc(BASE_EXCEPTIONS) + + +class MultiLanguage(Language): + """Language class to be used for models that support multiple languages. + This module allows models to specify their language ID as 'xx'. + """ + lang = 'xx' + Defaults = MultiLanguageDefaults + + +__all__ = ['MultiLanguage']