From 5ff4454177db14a899bfb4f0012287bc4c93e4a8 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 16 Oct 2015 04:31:15 +1100 Subject: [PATCH] * Update get_freqs.py script --- bin/get_freqs.py | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/bin/get_freqs.py b/bin/get_freqs.py index 123deca10..f8639d4a8 100755 --- a/bin/get_freqs.py +++ b/bin/get_freqs.py @@ -25,25 +25,10 @@ def iter_comments(loc): yield ujson.loads(line) -def null_props(string): - return { - 'flags': 0, - 'length': len(string), - 'orth': string, - 'lower': string, - 'norm': string, - 'shape': string, - 'prefix': string, - 'suffix': string, - 'cluster': 0, - 'prob': -22, - 'sentiment': 0 - } - - def count_freqs(input_loc, output_loc): print(output_loc) - tokenizer = Tokenizer.from_dir(Vocab(), spacy.en.English.default_data_dir()) + tokenizer = Tokenizer.from_dir(Vocab(), + path.join(spacy.en.English.default_data_dir(), 'tokenizer')) counts = PreshCounter() for json_comment in iter_comments(input_loc):