diff --git a/spacy/de/__init__.py b/spacy/de/__init__.py index be5b3b0f0..7a8e5727c 100644 --- a/spacy/de/__init__.py +++ b/spacy/de/__init__.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals, print_function from os import path from ..language import Language +from ..attrs import LANG from . import language_data @@ -11,6 +12,8 @@ class German(Language): class Defaults(Language.Defaults): tokenizer_exceptions = dict(language_data.TOKENIZER_EXCEPTIONS) + lex_attr_getters = dict(Language.Defaults.lex_attr_getters) + lex_attr_getters[LANG] = lambda text: 'de' prefixes = tuple(language_data.TOKENIZER_PREFIXES) diff --git a/spacy/en/__init__.py b/spacy/en/__init__.py index fdd17904f..ade3e8e7a 100644 --- a/spacy/en/__init__.py +++ b/spacy/en/__init__.py @@ -8,6 +8,7 @@ from .. import util from ..lemmatizer import Lemmatizer from ..vocab import Vocab from ..tokenizer import Tokenizer +from ..attrs import LANG class English(Language): @@ -15,13 +16,14 @@ class English(Language): class Defaults(Language.Defaults): lex_attr_getters = dict(Language.Defaults.lex_attr_getters) + lex_attr_getters[LANG] = lambda text: 'en' tokenizer_exceptions = dict(language_data.TOKENIZER_EXCEPTIONS) - + prefixes = tuple(language_data.TOKENIZER_PREFIXES) - + suffixes = tuple(language_data.TOKENIZER_SUFFIXES) - + infixes = tuple(language_data.TOKENIZER_INFIXES) tag_map = dict(language_data.TAG_MAP)