2016-09-24 13:42:01 +00:00
|
|
|
import pathlib
|
|
|
|
|
|
|
|
from .util import set_lang_class, get_lang_class
|
2016-01-15 17:01:02 +00:00
|
|
|
|
2016-04-12 14:31:16 +00:00
|
|
|
from . import en
|
|
|
|
from . import de
|
2016-04-24 16:45:16 +00:00
|
|
|
from . import zh
|
2016-01-15 17:01:02 +00:00
|
|
|
|
2016-03-24 10:19:43 +00:00
|
|
|
|
2016-09-24 20:09:21 +00:00
|
|
|
try:
|
2016-09-24 20:17:01 +00:00
|
|
|
basestring
|
2016-09-24 20:09:21 +00:00
|
|
|
except NameError:
|
2016-09-24 20:17:01 +00:00
|
|
|
basestring = str
|
2016-09-24 20:09:21 +00:00
|
|
|
|
|
|
|
|
2016-04-12 14:31:16 +00:00
|
|
|
set_lang_class(en.English.lang, en.English)
|
|
|
|
set_lang_class(de.German.lang, de.German)
|
2016-04-24 16:45:16 +00:00
|
|
|
set_lang_class(zh.Chinese.lang, zh.Chinese)
|
2016-03-25 17:54:45 +00:00
|
|
|
|
|
|
|
|
2016-10-18 17:23:31 +00:00
|
|
|
def load(name, **overrides):
|
2016-09-26 09:07:46 +00:00
|
|
|
target_name, target_version = util.split_data_name(name)
|
2016-10-20 16:27:48 +00:00
|
|
|
data_path = overrides.get('path', util.get_data_path())
|
|
|
|
if target_name == 'en' and 'add_vectors' not in overrides:
|
|
|
|
if 'vectors' in overrides:
|
|
|
|
vec_path = util.match_best_version(overrides['vectors'], None, data_path)
|
|
|
|
if vec_path is None:
|
|
|
|
raise IOError(
|
|
|
|
'Could not load data pack %s from %s' % (overrides['vectors'], data_path))
|
|
|
|
|
|
|
|
else:
|
|
|
|
vec_path = util.match_best_version('en_glove_cc_300_1m_vectors', None, data_path)
|
|
|
|
if vec_path is not None:
|
|
|
|
vec_path = vec_path / 'vocab' / 'vec.bin'
|
|
|
|
overrides['add_vectors'] = lambda vocab: vocab.load_vectors_from_bin_loc(vec_path)
|
|
|
|
path = util.match_best_version(target_name, target_version, data_path)
|
2016-09-24 18:26:17 +00:00
|
|
|
cls = get_lang_class(target_name)
|
2016-10-18 17:23:31 +00:00
|
|
|
return cls(path=path, **overrides)
|