From 5b6bf4d4a6188e3215d990aa4a374808a6c2a58a Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sat, 25 Jul 2015 23:05:51 +0200
Subject: [PATCH] * Remove probability cap on lexicon

---
 bin/init_model.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/bin/init_model.py b/bin/init_model.py
index fe65cd309..3bd69b43c 100644
--- a/bin/init_model.py
+++ b/bin/init_model.py
@@ -158,14 +158,12 @@ def setup_vocab(src_dir, dst_dir):
     lexicon = []
     for word, prob in reversed(sorted(list(probs.items()), key=lambda item: item[1])):
         entry = get_lex_props(word)
-        if word in clusters or float(prob) >= -17:
+        if word in clusters:
             entry['prob'] = float(prob)
             cluster = clusters.get(word, '0')
             # Decode as a little-endian string, so that we can do & 15 to get
             # the first 4 bits. See _parse_features.pyx
             entry['cluster'] = int(cluster[::-1], 2)
-            orth_senses = set()
-            lemmas = []
             vocab[word] = entry
     vocab.dump(str(dst_dir / 'lexemes.bin'))
     vocab.strings.dump(str(dst_dir / 'strings.txt'))