From dc13edd7cb78e751d0954059173c09bc0ebf7394 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 26 Aug 2015 19:14:05 +0200 Subject: [PATCH] * Refactor init_model to accomodate other languages --- bin/init_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/init_model.py b/bin/init_model.py index 9a635f296..0badf71fc 100644 --- a/bin/init_model.py +++ b/bin/init_model.py @@ -151,7 +151,7 @@ def setup_vocab(get_lex_attr, src_dir, dst_dir): write_binary_vectors(str(vectors_src), str(dst_dir / 'vec.bin')) else: print("Warning: Word vectors file not found") - vocab = Vocab(data_dir=None, get_lex_attr=get_lex_attr) + vocab = Vocab(get_lex_attr=get_lex_attr) clusters = _read_clusters(src_dir / 'clusters.txt') probs, oov_prob = _read_probs(src_dir / 'words.sgt.prob') if not probs: @@ -183,8 +183,8 @@ def setup_vocab(get_lex_attr, src_dir, dst_dir): def main(lang_id, lang_data_dir, corpora_dir, model_dir): languages = { - 'en': spacy.en.get_lex_attr, - 'de': spacy.en.get_lex_attr + 'en': spacy.en.English.default_lex_attrs(), + 'de': spacy.de.Deutsch.default_lex_attrs() } model_dir = Path(model_dir)