Use base language data as default

This commit is contained in:
Ines Montani 2016-12-18 16:55:25 +01:00
parent bcc1d50d09
commit 753068f1d5
1 changed files with 7 additions and 6 deletions

View File

@ -21,6 +21,7 @@ from .matcher import Matcher
from . import attrs from . import attrs
from . import orth from . import orth
from . import util from . import util
from . import language_data
from .lemmatizer import Lemmatizer from .lemmatizer import Lemmatizer
from .train import Trainer from .train import Trainer
@ -141,13 +142,13 @@ class BaseDefaults(object):
pipeline.append(nlp.entity) pipeline.append(nlp.entity)
return pipeline return pipeline
prefixes = tuple() prefixes = tuple(language_data.TOKENIZER_PREFIXES)
suffixes = tuple() suffixes = tuple(language_data.TOKENIZER_SUFFIXES)
infixes = tuple() infixes = tuple(language_data.TOKENIZER_INFIXES)
tag_map = {} tag_map = dict(language_data.TAG_MAP)
tokenizer_exceptions = {} tokenizer_exceptions = {}