From e260e481cffbdda166e0574b5d710caf32919994 Mon Sep 17 00:00:00 2001 From: Yomguithereal Date: Wed, 20 Jun 2018 18:35:12 +0200 Subject: [PATCH] Improvements --- fog/clustering/minhash.py | 2 +- fog/lsh/minhash.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fog/clustering/minhash.py b/fog/clustering/minhash.py index c6a4299..4712b70 100644 --- a/fog/clustering/minhash.py +++ b/fog/clustering/minhash.py @@ -11,7 +11,7 @@ from collections import defaultdict import math from fog.clustering.utils import merge_buckets_into_clusters -from fog.lsh.minhash import LSBMinHash, MinHash +from fog.lsh.minhash import MinHash from fog.metrics.jaccard import jaccard_similarity diff --git a/fog/lsh/minhash.py b/fog/lsh/minhash.py index e425004..db49150 100644 --- a/fog/lsh/minhash.py +++ b/fog/lsh/minhash.py @@ -33,10 +33,8 @@ def crc32(x): class MinHash(object): def __init__(self, h=256, seed=None): - # TODO: weighted # TODO: cheap_hashes # TODO: lsb - # TODO: superminhash rng = Random(seed) @@ -124,7 +122,7 @@ class SuperMinHash(object): while j <= a: r = rng.random() - k = rng.randint(j, m - 1) + k = j + math.floor(r * (m - j)) if q[j] != i: q[j] = i