diff --git a/spacy/en/__init__.py b/spacy/en/__init__.py index 879c2ff43..7262f37fb 100644 --- a/spacy/en/__init__.py +++ b/spacy/en/__init__.py @@ -31,6 +31,7 @@ class English(Language): tag_map = TAG_MAP stop_words = STOP_WORDS + morph_rules = dict(MORPH_RULES) lemma_rules = dict(LEMMA_RULES) lemma_index = dict(LEMMA_INDEX) lemma_exc = dict(LEMMA_EXC) diff --git a/spacy/en/language_data.py b/spacy/en/language_data.py index 971d998f5..f53e6beda 100644 --- a/spacy/en/language_data.py +++ b/spacy/en/language_data.py @@ -9,6 +9,9 @@ from .tag_map import TAG_MAP from .word_sets import STOP_WORDS, NUM_WORDS from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, ORTH_ONLY from .morph_rules import MORPH_RULES +from .lemmatizer import RULES as LEMMA_RULES +from .lemmatizer import INDEX as LEMMA_INDEX +from .lemmatizer import EXC as LEMMA_EXC TAG_MAP = dict(TAG_MAP) @@ -22,4 +25,5 @@ update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.EMOTICONS)) update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.ABBREVIATIONS)) -__all__ = ["TOKENIZER_EXCEPTIONS", "TAG_MAP", "STOP_WORDS", "MORPH_RULES"] +__all__ = ["TOKENIZER_EXCEPTIONS", "TAG_MAP", "STOP_WORDS", "MORPH_RULES", + "LEMMA_RULES", "LEMMA_INDEX", "LEMMA_EXC"] diff --git a/spacy/en/tokenizer_exceptions.py b/spacy/en/tokenizer_exceptions.py index 419d29f54..8ef862411 100644 --- a/spacy/en/tokenizer_exceptions.py +++ b/spacy/en/tokenizer_exceptions.py @@ -770,5 +770,5 @@ ORTH_ONLY = [ "Rev.", "Sen.", "St.", - "vs." + "vs.", ]