Add LANG attribute to English and German

This commit is contained in:
Matthew Honnibal 2016-10-18 18:52:48 +02:00
parent 05e2a589a4
commit 8c8f5c62c6
2 changed files with 8 additions and 3 deletions

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals, print_function
from os import path from os import path
from ..language import Language from ..language import Language
from ..attrs import LANG
from . import language_data from . import language_data
@ -11,6 +12,8 @@ class German(Language):
class Defaults(Language.Defaults): class Defaults(Language.Defaults):
tokenizer_exceptions = dict(language_data.TOKENIZER_EXCEPTIONS) tokenizer_exceptions = dict(language_data.TOKENIZER_EXCEPTIONS)
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'de'
prefixes = tuple(language_data.TOKENIZER_PREFIXES) prefixes = tuple(language_data.TOKENIZER_PREFIXES)

View File

@ -8,6 +8,7 @@ from .. import util
from ..lemmatizer import Lemmatizer from ..lemmatizer import Lemmatizer
from ..vocab import Vocab from ..vocab import Vocab
from ..tokenizer import Tokenizer from ..tokenizer import Tokenizer
from ..attrs import LANG
class English(Language): class English(Language):
@ -15,13 +16,14 @@ class English(Language):
class Defaults(Language.Defaults): class Defaults(Language.Defaults):
lex_attr_getters = dict(Language.Defaults.lex_attr_getters) lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
lex_attr_getters[LANG] = lambda text: 'en'
tokenizer_exceptions = dict(language_data.TOKENIZER_EXCEPTIONS) tokenizer_exceptions = dict(language_data.TOKENIZER_EXCEPTIONS)
prefixes = tuple(language_data.TOKENIZER_PREFIXES) prefixes = tuple(language_data.TOKENIZER_PREFIXES)
suffixes = tuple(language_data.TOKENIZER_SUFFIXES) suffixes = tuple(language_data.TOKENIZER_SUFFIXES)
infixes = tuple(language_data.TOKENIZER_INFIXES) infixes = tuple(language_data.TOKENIZER_INFIXES)
tag_map = dict(language_data.TAG_MAP) tag_map = dict(language_data.TAG_MAP)