mirror of https://github.com/explosion/spaCy.git
Wire up English lemma and morph rules.
This commit is contained in:
parent
f70be44746
commit
8dbff4f5f4
|
@ -31,6 +31,7 @@ class English(Language):
|
||||||
tag_map = TAG_MAP
|
tag_map = TAG_MAP
|
||||||
stop_words = STOP_WORDS
|
stop_words = STOP_WORDS
|
||||||
|
|
||||||
|
morph_rules = dict(MORPH_RULES)
|
||||||
lemma_rules = dict(LEMMA_RULES)
|
lemma_rules = dict(LEMMA_RULES)
|
||||||
lemma_index = dict(LEMMA_INDEX)
|
lemma_index = dict(LEMMA_INDEX)
|
||||||
lemma_exc = dict(LEMMA_EXC)
|
lemma_exc = dict(LEMMA_EXC)
|
||||||
|
|
|
@ -9,6 +9,9 @@ from .tag_map import TAG_MAP
|
||||||
from .word_sets import STOP_WORDS, NUM_WORDS
|
from .word_sets import STOP_WORDS, NUM_WORDS
|
||||||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, ORTH_ONLY
|
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, ORTH_ONLY
|
||||||
from .morph_rules import MORPH_RULES
|
from .morph_rules import MORPH_RULES
|
||||||
|
from .lemmatizer import RULES as LEMMA_RULES
|
||||||
|
from .lemmatizer import INDEX as LEMMA_INDEX
|
||||||
|
from .lemmatizer import EXC as LEMMA_EXC
|
||||||
|
|
||||||
|
|
||||||
TAG_MAP = dict(TAG_MAP)
|
TAG_MAP = dict(TAG_MAP)
|
||||||
|
@ -22,4 +25,5 @@ update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.EMOTICONS))
|
||||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.ABBREVIATIONS))
|
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.ABBREVIATIONS))
|
||||||
|
|
||||||
|
|
||||||
__all__ = ["TOKENIZER_EXCEPTIONS", "TAG_MAP", "STOP_WORDS", "MORPH_RULES"]
|
__all__ = ["TOKENIZER_EXCEPTIONS", "TAG_MAP", "STOP_WORDS", "MORPH_RULES",
|
||||||
|
"LEMMA_RULES", "LEMMA_INDEX", "LEMMA_EXC"]
|
||||||
|
|
|
@ -770,5 +770,5 @@ ORTH_ONLY = [
|
||||||
"Rev.",
|
"Rev.",
|
||||||
"Sen.",
|
"Sen.",
|
||||||
"St.",
|
"St.",
|
||||||
"vs."
|
"vs.",
|
||||||
]
|
]
|
||||||
|
|
Loading…
Reference in New Issue