diff --git a/fog/cli/cluster.py b/fog/cli/cluster.py index a8bd09c..b75dd10 100644 --- a/fog/cli/cluster.py +++ b/fog/cli/cluster.py @@ -6,42 +6,90 @@ # a CSV file. # import csv -from collections import defaultdict - -from fog.cli.ui.cluster import ClusteringUI +import re +from collections import Counter +from datetime import datetime +from timeit import default_timer as timer from fog.clustering import ( key_collision ) from fog.key import fingerprint - -def fingerprint_collision(data): - clusters = key_collision(data, key=fingerprint) - - return list(clusters) - - CLUSTERING_ROUTINES = { 'fingerprint_collision': { 'name': 'Fingerpint collision', - 'fn': fingerprint_collision + 'fn': key_collision, + 'args': { + 'key': fingerprint + } } } +def escape_quote(string): + return string.replace('\'', '\\\'') + + +def print_toml_report(meta, values, clusters): + print('[info]') + print('date = %s' % meta['date'].isoformat().split('.')[0]) + print('algorithm = \'%s\'' % meta['algorithm']) + print() + print('[stats]') + print('lines = %i' % meta['lines']) + print('nb_distinct_values = %i' % len(values)) + print('nb_clusters = %i' % len(clusters)) + print('took = %2f' % meta['took']) + print() + + for i, cluster in enumerate(clusters): + print('[[cluster]]') + print('id = %i' % i) + print('nb_values = %i' % len(cluster)) + + # Sorting by affected rows + sorted_values = sorted(cluster, key=lambda v: values[v], reverse=True) + max_length = len(max(cluster, key=len)) + + print('harmonized = \'%s\'' % escape_quote(sorted_values[0])) + print('values = [') + for value in sorted_values: + print(' [\'%s\',%s %i],' % ( + escape_quote(value), + ' ' * (max_length - len(value)), + values[value] + )) + + print(']') + + print('harmonize = false') + + print() + + def cluster_action(namespace): routine = CLUSTERING_ROUTINES[namespace.algorithm] + lines = 0 with open(namespace.file, 'r') as f: reader = csv.DictReader(f) - rows = defaultdict(list) + values = Counter() for line in reader: - rows[line[namespace.column]].append(line) + lines += 1 + values[line[namespace.column]] += 1 - clusters = routine['fn'](rows.keys()) + start = timer() + clusters = routine['fn'](values.keys(), **routine['args']) + end = timer() - start - ui = ClusteringUI(rows, clusters) - ui.run() + meta = { + 'algorithm': namespace.algorithm, + 'took': end, + 'date': datetime.now(), + 'lines': lines + } + + print_toml_report(meta, values, list(clusters)) diff --git a/fog/cli/ui/__init__.py b/fog/cli/ui/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/fog/cli/ui/cluster.py b/fog/cli/ui/cluster.py deleted file mode 100644 index a95a8e8..0000000 --- a/fog/cli/ui/cluster.py +++ /dev/null @@ -1,60 +0,0 @@ -# ============================================================================= -# Fog Cluster CLI Action UI -# ============================================================================= -# -# Urwid UI for the cluster action. -# -import signal -from urwid import ( - Columns, - ExitMainLoop, - Filler, - LineBox, - ListBox, - MainLoop, - Padding, - SimpleFocusListWalker, - Text -) - - -def exit(*args): - raise ExitMainLoop() - - -class ClusteringUI(object): - def __init__(self, rows, clusters): - - # Composing the UI - body = SimpleFocusListWalker([Text(str(cluster)) for cluster in clusters]) - clusters_list_box = ListBox(body) - - left_box = Padding(clusters_list_box, left=1, right=1) - right_box = Padding(Text('Some other thing'), left=1, right=1) - - left_column = LineBox(left_box, title='Fog Clustering', title_align='left') - right_colum = LineBox(Filler(right_box, 'top'), title='Stats', title_align='left') - - columns = Columns([('weight', 0.7, left_column), ('weight', 0.3, right_colum)]) - - # Loop - self.loop = MainLoop(columns, unhandled_input=self.unhandled_input) - - # Handling signals - signal.signal(signal.SIGINT, exit) - signal.signal(signal.SIGTERM, exit) - - # Properties - self.rows = rows - self.clusters = clusters - self.current_cluster = 0 - - # Activable components - self.left_box = left_box - - def run(self): - self.loop.run() - - def unhandled_input(self, key): - if key in ('q', 'Q', 'esc'): - raise ExitMainLoop()