From aa7126f791a653af47041b033a3cb6d09c6a76b0 Mon Sep 17 00:00:00 2001 From: Yomguithereal Date: Fri, 6 Jul 2018 19:00:57 +0200 Subject: [PATCH] New bench --- experiments/benchmark.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/experiments/benchmark.py b/experiments/benchmark.py index 77fcdbb..730476e 100644 --- a/experiments/benchmark.py +++ b/experiments/benchmark.py @@ -3,7 +3,7 @@ from functools import partial from timeit import default_timer as timer from fog.clustering import * from fog.tokenizers import ngrams -from fog.key import fingerprint, omission_key +from fog.key import fingerprint, omission_key, skeleton_key from Levenshtein import distance as levenshtein with open('./data/universities.csv', 'r') as f: @@ -37,6 +37,10 @@ with open('./data/universities.csv', 'r') as f: clusters = list(sorted_neighborhood(universities, key=omission_key, distance=levenshtein, radius=2)) print('SNM Omission (%i):' % len(clusters), timer() - start) + start = timer() + clusters = list(sorted_neighborhood(universities, key=skeleton_key, distance=levenshtein, radius=2)) + print('SNM Skeleton (%i):' % len(clusters), timer() - start) + print() with open('./data/musicians.csv', 'r') as f: reader = csv.DictReader(f) @@ -61,6 +65,10 @@ with open('./data/musicians.csv', 'r') as f: clusters = list(sorted_neighborhood(artists, key=omission_key, distance=levenshtein, radius=2)) print('SNM Omission (%i):' % len(clusters), timer() - start) + start = timer() + clusters = list(sorted_neighborhood(artists, key=skeleton_key, distance=levenshtein, radius=2)) + print('SNM Skeleton (%i):' % len(clusters), timer() - start) + start = timer() clusters = list(pairwise_fuzzy_clusters(artists, distance=levenshtein, radius=2, processes=8)) print('Parallel Fuzzy clusters (%i):' % len(clusters), timer() - start)