spaCy/spacy/lang/it/tokenizer_exceptions.py

10 lines
173 B
Python

# coding: utf8
from __future__ import unicode_literals
from ...symbols import ORTH, LEMMA
_exc = {
"po'": [{ORTH: "po'", LEMMA: 'poco'}]
}
TOKENIZER_EXCEPTIONS = _exc