2018-07-18 12:52:17 +00:00
|
|
|
# =============================================================================
|
|
|
|
# Fog Levenshtein Distance Unit Tests
|
|
|
|
# =============================================================================
|
|
|
|
import math
|
|
|
|
from pytest import approx
|
|
|
|
from fog.metrics import levenshtein_distance
|
|
|
|
|
|
|
|
BASIC_TESTS = [
|
2018-07-18 14:20:04 +00:00
|
|
|
# (('b', 'o', 'o', 'k'), ('b', 'a', 'c', 'k'), 2),
|
|
|
|
# (('the', 'cat', 'eats', 'mouse'), ('the', 'mouse', 'likes', 'mouse'), 2),
|
2018-07-18 12:52:17 +00:00
|
|
|
('book', 'back', 2),
|
|
|
|
('bbbbookkkk', 'bbbbackkkk', 2),
|
|
|
|
('hello', 'helo', 1),
|
|
|
|
('good sir', 'baal', 8),
|
|
|
|
('say', 'shiver', 5),
|
|
|
|
('feature', 'get-project-features', 13),
|
|
|
|
('example', 'samples', 3),
|
|
|
|
('sturgeon', 'urgently', 6),
|
|
|
|
('levenshtein', 'frankenstein', 6),
|
|
|
|
('distance', 'difference', 5),
|
|
|
|
('a', 'b', 1),
|
|
|
|
('ab', 'ac', 1),
|
|
|
|
('ac', 'bc', 1),
|
|
|
|
('abc', 'axc', 1),
|
|
|
|
('xabxcdxxefxgx', '1ab2cd34ef5g6', 6),
|
|
|
|
('a', '', 1),
|
|
|
|
('ab', 'a', 1),
|
|
|
|
('ab', 'b', 1),
|
|
|
|
('abc', 'ac', 1),
|
|
|
|
('xabxcdxxefxgx', 'abcdefg', 6),
|
|
|
|
('', 'a', 1),
|
|
|
|
('a', 'ab', 1),
|
|
|
|
('b', 'ab', 1),
|
|
|
|
('ac', 'abc', 1),
|
|
|
|
('abcdefg', 'xabxcdxxefxgx', 6),
|
|
|
|
('', '', 0),
|
|
|
|
('a', 'a', 0),
|
|
|
|
('abc', 'abc', 0),
|
|
|
|
('', '', 0),
|
|
|
|
('a', '', 1),
|
|
|
|
('', 'a', 1),
|
|
|
|
('abc', '', 3),
|
|
|
|
('', 'abc', 3),
|
|
|
|
('因為我是中國人所以我會說中文', '因為我是英國人所以我會說英文', 2),
|
2018-07-18 14:20:04 +00:00
|
|
|
# (list('因為我是中國人所以我會說中文'), list('因為我是英國人所以我會說英文'), 2)
|
2018-07-18 12:52:17 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
class TestLevenshteinSimilarity(object):
|
|
|
|
def test_basics(self):
|
|
|
|
for A, B, distance in BASIC_TESTS:
|
|
|
|
assert levenshtein_distance(A, B) == distance, '%s // %s => %i' % (A, B, distance)
|