spaCy/spacy/pt/language_data.py

34 lines
429 B
Python
Raw Normal View History

# encoding: utf8
from __future__ import unicode_literals
2016-12-08 19:41:41 +00:00
from ..symbols import *
from ..language_data import TOKENIZER_PREFIXES
from ..language_data import TOKENIZER_SUFFIXES
from ..language_data import TOKENIZER_INFIXES
2016-12-08 19:41:41 +00:00
def strings_to_exc(orths):
return {orth: [{ORTH: orth}] for orth in orths}
2016-12-08 19:41:41 +00:00
PRON_LEMMA = "-PRON-"
2016-12-08 19:41:41 +00:00
TAG_MAP = {
2016-12-08 19:41:41 +00:00
}
2016-12-08 19:41:41 +00:00
STOP_WORDS = set("""
2016-12-08 19:41:41 +00:00
""".split())
2016-12-08 19:41:41 +00:00
TOKENIZER_EXCEPTIONS = {
}
2016-12-08 19:41:41 +00:00
ORTH_ONLY = {
}