spaCy/spacy/it/language_data.py

34 lines
429 B
Python
Raw Normal View History

2016-11-02 19:37:55 +00:00
# encoding: utf8
from __future__ import unicode_literals
2016-12-08 18:52:18 +00:00
from ..symbols import *
from ..language_data import TOKENIZER_PREFIXES
from ..language_data import TOKENIZER_SUFFIXES
from ..language_data import TOKENIZER_INFIXES
2016-11-02 19:37:55 +00:00
2016-12-08 18:52:18 +00:00
def strings_to_exc(orths):
return {orth: [{ORTH: orth}] for orth in orths}
2016-11-02 19:37:55 +00:00
2016-12-08 18:52:18 +00:00
PRON_LEMMA = "-PRON-"
2016-11-02 19:37:55 +00:00
2016-12-08 18:52:18 +00:00
TAG_MAP = {
2016-11-02 19:37:55 +00:00
2016-12-08 18:52:18 +00:00
}
2016-11-02 19:37:55 +00:00
2016-12-08 18:52:18 +00:00
STOP_WORDS = set("""
2016-11-02 19:37:55 +00:00
2016-12-08 18:52:18 +00:00
""".split())
2016-11-02 19:37:55 +00:00
2016-12-08 18:52:18 +00:00
TOKENIZER_EXCEPTIONS = {
2016-11-02 19:37:55 +00:00
}
2016-12-08 18:52:18 +00:00
ORTH_ONLY = {
2016-11-02 19:37:55 +00:00
}