mirror of https://github.com/explosion/spaCy.git
34 lines
429 B
Python
34 lines
429 B
Python
# encoding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
from ..symbols import *
|
|
from ..language_data import TOKENIZER_PREFIXES
|
|
from ..language_data import TOKENIZER_SUFFIXES
|
|
from ..language_data import TOKENIZER_INFIXES
|
|
|
|
|
|
def strings_to_exc(orths):
|
|
return {orth: [{ORTH: orth}] for orth in orths}
|
|
|
|
|
|
PRON_LEMMA = "-PRON-"
|
|
|
|
|
|
TAG_MAP = {
|
|
|
|
}
|
|
|
|
STOP_WORDS = set("""
|
|
|
|
""".split())
|
|
|
|
|
|
TOKENIZER_EXCEPTIONS = {
|
|
|
|
}
|
|
|
|
|
|
ORTH_ONLY = {
|
|
|
|
}
|