mirror of https://github.com/explosion/spaCy.git
Import and combine Portuguese tokenizer exceptions (see #943)
This commit is contained in:
parent
f8b2d9c3b7
commit
ad8bf1829f
|
@ -5,13 +5,15 @@ from .. import language_data as base
|
|||
from ..language_data import update_exc, strings_to_exc
|
||||
|
||||
from .stop_words import STOP_WORDS
|
||||
|
||||
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS, ORTH_ONLY
|
||||
|
||||
STOP_WORDS = set(STOP_WORDS)
|
||||
|
||||
|
||||
TOKENIZER_EXCEPTIONS = strings_to_exc(base.EMOTICONS)
|
||||
TOKENIZER_EXCEPTIONS = dict(TOKENIZER_EXCEPTIONS)
|
||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(ORTH_ONLY))
|
||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.ABBREVIATIONS))
|
||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(base.EMOTICONS))
|
||||
|
||||
|
||||
__all__ = ["TOKENIZER_EXCEPTIONS", "STOP_WORDS"]
|
||||
|
|
Loading…
Reference in New Issue