fog/test/lsh/minhash_test.py

37 lines
976 B
Python
Raw Normal View History

2018-06-13 15:09:33 +00:00
# =============================================================================
# Fog MinHash LSH Unit Tests
# =============================================================================
from pytest import approx
2018-06-19 16:11:54 +00:00
from fog.lsh import MinHash, LSBMinHash
2018-06-13 15:09:33 +00:00
TESTS = [
('abc', '', 0),
('', 'abc', 0),
('', '', 1),
('abc', 'abc', 1),
('abc', 'xyz', 0),
('night', 'nacht', 3 / 7),
('context', 'contact', 4 / 7),
('ht', 'nacht', 2 / 5)
]
class TestLSBMinHash(object):
def test_basics(self):
2018-06-19 16:11:54 +00:00
m = MinHash(512, seed=123)
for A, B, j in TESTS:
sA = m.create_signature(A)
sB = m.create_signature(B)
assert m.similarity(sA, sB) == approx(j, abs=1e-1)
def test_lsb(self):
2018-06-13 15:09:33 +00:00
m = LSBMinHash(precision=16, seed=123)
for A, B, j in TESTS:
2018-06-19 16:03:39 +00:00
sA = m.create_signature(A)
sB = m.create_signature(B)
2018-06-13 15:09:33 +00:00
assert m.similarity(sA, sB) == approx(j, abs=1e-1)