diff --git a/experiments/fagin.py b/experiments/fagin.py index 4295efe..7e59740 100644 --- a/experiments/fagin.py +++ b/experiments/fagin.py @@ -77,7 +77,7 @@ with Timer('sqrt indices'): neighbors = defaultdict(lambda: (0, None)) - for _ in range(5): + for _ in range(1): pairs = sqrt_indexation_pairs(VECTORS) for i, j in pairs: @@ -92,8 +92,8 @@ with Timer('sqrt indices'): T = 0 P = 0 for i in range(len(VECTORS)): - if GROUND_TRUTH[i][1] < 0.8: - continue + # if GROUND_TRUTH[i][1] < 0.8: + # continue T += 1 diff --git a/fog/clustering/fagin.py b/fog/clustering/fagin.py index 5182a41..118d50c 100644 --- a/fog/clustering/fagin.py +++ b/fog/clustering/fagin.py @@ -164,6 +164,8 @@ def sqrt_indexation_pairs(vectors): for i, v in enumerate(vectors): leader = min(leaders, key=lambda x: 1.0 - sparse_cosine_similarity(v, vectors[x])) + yield i, leader + l = proximities[leader] for j in l: