2018-06-08 17:12:43 +00:00
|
|
|
# =============================================================================
|
|
|
|
# Fog VPTree Clustering Unit Tests
|
|
|
|
# =============================================================================
|
2018-06-11 15:00:59 +00:00
|
|
|
from test.clustering.utils import Clusters
|
2018-06-08 17:12:43 +00:00
|
|
|
from Levenshtein import distance as levenshtein
|
|
|
|
from fog.clustering import vp_tree
|
|
|
|
|
|
|
|
DATA = [
|
|
|
|
'abc',
|
|
|
|
'bcd',
|
|
|
|
'cde',
|
|
|
|
'def',
|
|
|
|
'efg',
|
|
|
|
'fgh',
|
|
|
|
'ghi'
|
|
|
|
]
|
|
|
|
|
2018-06-11 15:00:59 +00:00
|
|
|
FUZZY_CLUSTERS = Clusters([
|
2018-06-08 17:12:43 +00:00
|
|
|
('abc', 'bcd'),
|
|
|
|
('bcd', 'cde', 'def'),
|
|
|
|
('def', 'efg', 'fgh'),
|
|
|
|
('fgh', 'ghi')
|
|
|
|
])
|
|
|
|
|
2018-06-11 15:00:59 +00:00
|
|
|
MIN_FUZZY_CLUSTERS = Clusters([
|
2018-06-08 17:12:43 +00:00
|
|
|
('abc', 'bcd', 'cde'),
|
|
|
|
('cde', 'def', 'efg'),
|
|
|
|
('efg', 'fgh', 'ghi')
|
|
|
|
])
|
|
|
|
|
|
|
|
|
2018-06-08 17:24:49 +00:00
|
|
|
class TestVPTreeClustering(object):
|
2018-06-08 17:12:43 +00:00
|
|
|
def test_basics(self):
|
2018-06-11 15:00:59 +00:00
|
|
|
clusters = Clusters(vp_tree(DATA, distance=levenshtein, radius=2))
|
2018-06-08 17:12:43 +00:00
|
|
|
|
|
|
|
assert clusters == FUZZY_CLUSTERS
|
|
|
|
|
2018-06-11 15:00:59 +00:00
|
|
|
clusters = Clusters(vp_tree(DATA, distance=levenshtein, radius=2, min_size=3))
|
|
|
|
|
|
|
|
assert clusters == MIN_FUZZY_CLUSTERS
|