spaCy/spacy/lang/lt/tokenizer_exceptions.py

16 lines
383 B
Python
Raw Normal View History

from ..tokenizer_exceptions import BASE_EXCEPTIONS
2019-07-08 08:25:22 +00:00
from ...symbols import ORTH
from ...util import update_exc
2019-07-08 08:25:22 +00:00
_exc = {}
for orth in ["n-tosios", "?!"]:
2019-07-08 08:25:22 +00:00
_exc[orth] = [{ORTH: orth}]
mod_base_exceptions = {
exc: val for exc, val in BASE_EXCEPTIONS.items() if not exc.endswith(".")
}
del mod_base_exceptions["8)"]
TOKENIZER_EXCEPTIONS = update_exc(mod_base_exceptions, _exc)