Wire up English lemma and morph rules.

2017-03-15 09:23:22 -05:00 · 2017-03-15 09:23:22 -05:00 · 8dbff4f5f4
parent f70be44746
commit 8dbff4f5f4
3 changed files with 7 additions and 2 deletions
--- a/spacy/en/init.py
+++ b/spacy/en/init.py
@ -31,6 +31,7 @@ class English(Language):
        tag_map = TAG_MAP
        stop_words = STOP_WORDS

+        morph_rules = dict(MORPH_RULES)
        lemma_rules = dict(LEMMA_RULES)
        lemma_index = dict(LEMMA_INDEX)
        lemma_exc = dict(LEMMA_EXC)
--- a/spacy/en/language_data.py
+++ b/spacy/en/language_data.py
@ -9,6 +9,9 @@ from .tag_map import TAG_MAP
 from .word_sets import STOP_WORDS, NUM_WORDS
 from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, ORTH_ONLY
 from .morph_rules import MORPH_RULES
+from .lemmatizer import RULES as LEMMA_RULES
+from .lemmatizer import INDEX as LEMMA_INDEX
+from .lemmatizer import EXC as LEMMA_EXC


 TAG_MAP = dict(TAG_MAP)
@ -22,4 +25,5 @@ update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.EMOTICONS))
 update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.ABBREVIATIONS))


-__all__ = ["TOKENIZER_EXCEPTIONS", "TAG_MAP", "STOP_WORDS", "MORPH_RULES"]
+__all__ = ["TOKENIZER_EXCEPTIONS", "TAG_MAP", "STOP_WORDS", "MORPH_RULES",
+           "LEMMA_RULES", "LEMMA_INDEX", "LEMMA_EXC"]
--- a/spacy/en/tokenizer_exceptions.py
+++ b/spacy/en/tokenizer_exceptions.py
@ -770,5 +770,5 @@ ORTH_ONLY = [
    "Rev.",
    "Sen.",
    "St.",
-    "vs."
+    "vs.",
 ]