From 5b6bf4d4a6188e3215d990aa4a374808a6c2a58a Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 25 Jul 2015 23:05:51 +0200 Subject: [PATCH] * Remove probability cap on lexicon --- bin/init_model.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/bin/init_model.py b/bin/init_model.py index fe65cd309..3bd69b43c 100644 --- a/bin/init_model.py +++ b/bin/init_model.py @@ -158,14 +158,12 @@ def setup_vocab(src_dir, dst_dir): lexicon = [] for word, prob in reversed(sorted(list(probs.items()), key=lambda item: item[1])): entry = get_lex_props(word) - if word in clusters or float(prob) >= -17: + if word in clusters: entry['prob'] = float(prob) cluster = clusters.get(word, '0') # Decode as a little-endian string, so that we can do & 15 to get # the first 4 bits. See _parse_features.pyx entry['cluster'] = int(cluster[::-1], 2) - orth_senses = set() - lemmas = [] vocab[word] = entry vocab.dump(str(dst_dir / 'lexemes.bin')) vocab.strings.dump(str(dst_dir / 'strings.txt'))