mirror of https://github.com/explosion/spaCy.git
* Refactor init_model to accomodate other languages
This commit is contained in:
parent
494da25872
commit
dc13edd7cb
|
@ -151,7 +151,7 @@ def setup_vocab(get_lex_attr, src_dir, dst_dir):
|
||||||
write_binary_vectors(str(vectors_src), str(dst_dir / 'vec.bin'))
|
write_binary_vectors(str(vectors_src), str(dst_dir / 'vec.bin'))
|
||||||
else:
|
else:
|
||||||
print("Warning: Word vectors file not found")
|
print("Warning: Word vectors file not found")
|
||||||
vocab = Vocab(data_dir=None, get_lex_attr=get_lex_attr)
|
vocab = Vocab(get_lex_attr=get_lex_attr)
|
||||||
clusters = _read_clusters(src_dir / 'clusters.txt')
|
clusters = _read_clusters(src_dir / 'clusters.txt')
|
||||||
probs, oov_prob = _read_probs(src_dir / 'words.sgt.prob')
|
probs, oov_prob = _read_probs(src_dir / 'words.sgt.prob')
|
||||||
if not probs:
|
if not probs:
|
||||||
|
@ -183,8 +183,8 @@ def setup_vocab(get_lex_attr, src_dir, dst_dir):
|
||||||
|
|
||||||
def main(lang_id, lang_data_dir, corpora_dir, model_dir):
|
def main(lang_id, lang_data_dir, corpora_dir, model_dir):
|
||||||
languages = {
|
languages = {
|
||||||
'en': spacy.en.get_lex_attr,
|
'en': spacy.en.English.default_lex_attrs(),
|
||||||
'de': spacy.en.get_lex_attr
|
'de': spacy.de.Deutsch.default_lex_attrs()
|
||||||
}
|
}
|
||||||
|
|
||||||
model_dir = Path(model_dir)
|
model_dir = Path(model_dir)
|
||||||
|
|
Loading…
Reference in New Issue