spaCy/spacy/fr/language_data.py

34 lines
429 B
Python

# encoding: utf8
from __future__ import unicode_literals
from ..symbols import *
from ..language_data import TOKENIZER_PREFIXES
from ..language_data import TOKENIZER_SUFFIXES
from ..language_data import TOKENIZER_INFIXES
def strings_to_exc(orths):
return {orth: [{ORTH: orth}] for orth in orths}
PRON_LEMMA = "-PRON-"
TAG_MAP = {
}
STOP_WORDS = set("""
""".split())
TOKENIZER_EXCEPTIONS = {
}
ORTH_ONLY = {
}