From da4821fc149c3fc5d6d87661f583b1acce142189 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 23 Jul 2015 09:27:07 +0200 Subject: [PATCH] * Add cluster words to probs in init_model --- bin/init_model.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/bin/init_model.py b/bin/init_model.py index d4e40d840..792ce21ca 100644 --- a/bin/init_model.py +++ b/bin/init_model.py @@ -115,6 +115,14 @@ def setup_vocab(src_dir, dst_dir): vocab = Vocab(data_dir=None, get_lex_props=get_lex_props) clusters = _read_clusters(src_dir / 'clusters.txt') probs = _read_probs(src_dir / 'words.sgt.prob') + if not probs: + min_prob = 0.0 + else: + min_prob = min(probs.values()) + for word in clusters: + if word not in probs: + probs[word] = min_prob + lexicon = [] for word, prob in reversed(sorted(probs.items(), key=lambda item: item[1])): entry = get_lex_props(word)