From 8c8f5c62c6680f2700d7488555916dd89d4befcd Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 18 Oct 2016 18:52:48 +0200 Subject: [PATCH] Add LANG attribute to English and German --- spacy/de/__init__.py | 3 +++ spacy/en/__init__.py | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/spacy/de/__init__.py b/spacy/de/__init__.py index be5b3b0f0..7a8e5727c 100644 --- a/spacy/de/__init__.py +++ b/spacy/de/__init__.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals, print_function from os import path from ..language import Language +from ..attrs import LANG from . import language_data @@ -11,6 +12,8 @@ class German(Language): class Defaults(Language.Defaults): tokenizer_exceptions = dict(language_data.TOKENIZER_EXCEPTIONS) + lex_attr_getters = dict(Language.Defaults.lex_attr_getters) + lex_attr_getters[LANG] = lambda text: 'de' prefixes = tuple(language_data.TOKENIZER_PREFIXES) diff --git a/spacy/en/__init__.py b/spacy/en/__init__.py index fdd17904f..ade3e8e7a 100644 --- a/spacy/en/__init__.py +++ b/spacy/en/__init__.py @@ -8,6 +8,7 @@ from .. import util from ..lemmatizer import Lemmatizer from ..vocab import Vocab from ..tokenizer import Tokenizer +from ..attrs import LANG class English(Language): @@ -15,13 +16,14 @@ class English(Language): class Defaults(Language.Defaults): lex_attr_getters = dict(Language.Defaults.lex_attr_getters) + lex_attr_getters[LANG] = lambda text: 'en' tokenizer_exceptions = dict(language_data.TOKENIZER_EXCEPTIONS) - + prefixes = tuple(language_data.TOKENIZER_PREFIXES) - + suffixes = tuple(language_data.TOKENIZER_SUFFIXES) - + infixes = tuple(language_data.TOKENIZER_INFIXES) tag_map = dict(language_data.TAG_MAP)