spaCy/spacy/__init__.py

62 lines
1.5 KiB
Python
Raw Normal View History

import pathlib
from .util import set_lang_class, get_lang_class
from . import en
from . import de
from . import zh
2016-09-24 20:09:21 +00:00
try:
2016-09-24 20:17:01 +00:00
basestring
2016-09-24 20:09:21 +00:00
except NameError:
2016-09-24 20:17:01 +00:00
basestring = str
2016-09-24 20:09:21 +00:00
set_lang_class(en.English.lang, en.English)
set_lang_class(de.German.lang, de.German)
set_lang_class(zh.Chinese.lang, zh.Chinese)
2016-03-25 17:54:45 +00:00
def blank(name, vocab=None, tokenizer=None, parser=None, tagger=None, entity=None,
matcher=None, serializer=None, vectors=None, pipeline=None):
target_name, target_version = util.split_data_name(name)
cls = get_lang_class(target_name)
return cls(
path=None,
vectors=vectors,
vocab=vocab,
tokenizer=tokenizer,
tagger=tagger,
parser=parser,
entity=entity,
matcher=matcher,
pipeline=pipeline,
serializer=serializer)
2016-09-24 18:26:17 +00:00
def load(name, vocab=True, tokenizer=True, parser=True, tagger=True, entity=True,
2016-09-24 20:09:21 +00:00
matcher=True, serializer=True, vectors=True, pipeline=True, via=None):
2016-09-24 18:26:17 +00:00
if via is None:
via = util.get_data_path()
2016-09-24 18:26:17 +00:00
target_name, target_version = util.split_data_name(name)
path = util.match_best_version(target_name, target_version, via)
2016-09-24 18:26:17 +00:00
if isinstance(vectors, basestring):
2016-09-24 20:09:21 +00:00
vectors = util.match_best_version(vectors, None, via)
2016-09-24 18:26:17 +00:00
cls = get_lang_class(target_name)
return cls(
2016-09-24 18:26:17 +00:00
path,
vectors=vectors,
vocab=vocab,
tokenizer=tokenizer,
tagger=tagger,
parser=parser,
entity=entity,
matcher=matcher,
2016-09-24 20:09:21 +00:00
pipeline=pipeline,
serializer=serializer)