fog/test/clustering/vp_tree_test.py

41 lines
968 B
Python
Raw Normal View History

2018-06-08 17:12:43 +00:00
# =============================================================================
# Fog VPTree Clustering Unit Tests
# =============================================================================
2018-06-11 15:00:59 +00:00
from test.clustering.utils import Clusters
2018-06-08 17:12:43 +00:00
from Levenshtein import distance as levenshtein
from fog.clustering import vp_tree
DATA = [
'abc',
'bcd',
'cde',
'def',
'efg',
'fgh',
'ghi'
]
2018-06-11 15:00:59 +00:00
FUZZY_CLUSTERS = Clusters([
2018-06-08 17:12:43 +00:00
('abc', 'bcd'),
('bcd', 'cde', 'def'),
('def', 'efg', 'fgh'),
('fgh', 'ghi')
])
2018-06-11 15:00:59 +00:00
MIN_FUZZY_CLUSTERS = Clusters([
2018-06-08 17:12:43 +00:00
('abc', 'bcd', 'cde'),
('cde', 'def', 'efg'),
('efg', 'fgh', 'ghi')
])
2018-06-08 17:24:49 +00:00
class TestVPTreeClustering(object):
2018-06-08 17:12:43 +00:00
def test_basics(self):
2018-06-11 15:00:59 +00:00
clusters = Clusters(vp_tree(DATA, distance=levenshtein, radius=2))
2018-06-08 17:12:43 +00:00
assert clusters == FUZZY_CLUSTERS
2018-06-11 15:00:59 +00:00
clusters = Clusters(vp_tree(DATA, distance=levenshtein, radius=2, min_size=3))
assert clusters == MIN_FUZZY_CLUSTERS