From 9d51e4d13c667a543478287e5661b67d4ae8ab96 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 2 Mar 2016 00:42:55 +1100 Subject: [PATCH] Delete gather_freqs.py This script was in a broken state, and should be unnecessary. The functionality is subsumed by `get_freqs.py` --- bin/gather_freqs.py | 29 ----------------------------- 1 file changed, 29 deletions(-) delete mode 100644 bin/gather_freqs.py diff --git a/bin/gather_freqs.py b/bin/gather_freqs.py deleted file mode 100644 index fa3d61ee1..000000000 --- a/bin/gather_freqs.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import unicode_literals -import plac -import io - -def main(in_loc, out_loc): - this_key = None - this_freq = 0 - df = 0 - with io.open(out_loc, 'w', encoding='utf8') as out_file: - for line in io.open(in_loc, encoding='utf8'): - line = line.strip() - if not line: - continue - freq, key = line.split('\t', 1) - freq = int(freq) - if this_key is not None and key != this_key: - out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key)) - this_key = key - this_freq = freq - df = 1 - else: - this_freq += freq - df += 1 - this_key = key - out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key)) - - -if __name__ == '__main__': - plac.call(main)