mirror of https://github.com/Yomguithereal/fog.git
Notes for the future
This commit is contained in:
parent
de02bae69d
commit
50bed85577
|
@ -16,6 +16,13 @@ import hashlib
|
||||||
|
|
||||||
from fog.lsh.utils import is_power_of_two, popcount, popcount64
|
from fog.lsh.utils import is_power_of_two, popcount, popcount64
|
||||||
|
|
||||||
|
# TODO: reimplement soundly by following:
|
||||||
|
# https://github.com/reubano/changanya/blob/master/changanya/simhash.py
|
||||||
|
# Note: Simhash clustering methods I could find are two slow (mozilla's one)
|
||||||
|
# and even Hamming space indexation ones.
|
||||||
|
# I should try again to use the same lsh method used by minhash to see if it
|
||||||
|
# yields similar results
|
||||||
|
|
||||||
|
|
||||||
def simhash(tokens, f=128):
|
def simhash(tokens, f=128):
|
||||||
assert f <= 512 and not is_power_of_two(f), 'fog.lsh.simhash: f should be a power of 2 <= 512'
|
assert f <= 512 and not is_power_of_two(f), 'fog.lsh.simhash: f should be a power of 2 <= 512'
|
||||||
|
|
Loading…
Reference in New Issue