fix some issues in hamming distance
This commit is contained in:
parent
531512e0fa
commit
72e2ca7d95
|
@ -9,6 +9,9 @@
|
|||
|
||||
#### Fixed
|
||||
- fuzz.partial_ratio_alignment ignored the score_cutoff
|
||||
- fix implementation of Hamming.normalized_similarity
|
||||
- fix default score_cutoff of Hamming.similarity
|
||||
- fix implementation of LCSseq.distance when used in the process module
|
||||
|
||||
### [2.0.15] - 2022-06-24
|
||||
#### Fixed
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit db5ac864bcb2890556c0e5b3c0a13335e65ac3e3
|
||||
Subproject commit 13d7c7751b23dd2ef1efb340572661972941675c
|
|
@ -103,7 +103,7 @@ def similarity(s1, s2, *, processor=None, score_cutoff=None):
|
|||
ValueError
|
||||
If s1 and s2 have a different length
|
||||
"""
|
||||
cdef int64_t c_score_cutoff = INT64_MAX if score_cutoff is None else score_cutoff
|
||||
cdef int64_t c_score_cutoff = 0 if score_cutoff is None else score_cutoff
|
||||
cdef RF_StringWrapper s1_proc, s2_proc
|
||||
|
||||
if c_score_cutoff < 0:
|
||||
|
@ -187,7 +187,7 @@ def normalized_similarity(s1, s2, *, processor=None, score_cutoff=None):
|
|||
raise ValueError("score_cutoff has to be >= 0")
|
||||
|
||||
preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc, None)
|
||||
return hamming_normalized_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff)
|
||||
return hamming_normalized_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff)
|
||||
|
||||
|
||||
cdef bool NoKwargsInit(RF_Kwargs* self, dict kwargs) except False:
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
import unittest
|
||||
|
||||
from rapidfuzz.distance import Hamming_cpp, Hamming_py
|
||||
|
||||
|
||||
def isclose(a, b, rel_tol=1e-09, abs_tol=0.0):
|
||||
return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)
|
||||
|
||||
|
||||
class Hamming:
|
||||
@staticmethod
|
||||
def distance(*args, **kwargs):
|
||||
dist1 = Hamming_cpp.distance(*args, **kwargs)
|
||||
dist2 = Hamming_py.distance(*args, **kwargs)
|
||||
assert dist1 == dist2
|
||||
return dist1
|
||||
|
||||
@staticmethod
|
||||
def similarity(*args, **kwargs):
|
||||
dist1 = Hamming_cpp.similarity(*args, **kwargs)
|
||||
dist2 = Hamming_py.similarity(*args, **kwargs)
|
||||
assert dist1 == dist2
|
||||
return dist1
|
||||
|
||||
@staticmethod
|
||||
def normalized_distance(*args, **kwargs):
|
||||
dist1 = Hamming_cpp.normalized_distance(*args, **kwargs)
|
||||
dist2 = Hamming_py.normalized_distance(*args, **kwargs)
|
||||
assert isclose(dist1, dist2)
|
||||
return dist1
|
||||
|
||||
@staticmethod
|
||||
def normalized_similarity(*args, **kwargs):
|
||||
dist1 = Hamming_cpp.normalized_similarity(*args, **kwargs)
|
||||
dist2 = Hamming_py.normalized_similarity(*args, **kwargs)
|
||||
assert isclose(dist1, dist2)
|
||||
return dist1
|
||||
|
||||
|
||||
def test_empty_string():
|
||||
"""
|
||||
when both strings are empty this is a perfect match
|
||||
"""
|
||||
assert Hamming.distance("", "") == 0
|
||||
assert Hamming.similarity("", "") == 0
|
||||
assert Hamming.normalized_distance("", "") == 0.0
|
||||
assert Hamming.normalized_similarity("", "") == 1.0
|
||||
|
||||
|
||||
def test_similar_strings():
|
||||
"""
|
||||
Test similar strings
|
||||
"""
|
||||
assert Hamming.distance("test", "test") == 0
|
||||
assert Hamming.similarity("test", "test") == 4
|
||||
assert Hamming.normalized_distance("test", "test") == 0
|
||||
assert Hamming.normalized_similarity("test", "test") == 1.0
|
||||
|
||||
|
||||
def test_different_strings():
|
||||
"""
|
||||
Test completly different strings
|
||||
"""
|
||||
assert Hamming.distance("aaaa", "bbbb") == 4
|
||||
assert Hamming.similarity("aaaa", "bbbb") == 0
|
||||
assert Hamming.normalized_distance("aaaa", "bbbb") == 1.0
|
||||
assert Hamming.normalized_similarity("aaaa", "bbbb") == 0.0
|
||||
|
||||
|
||||
def test_score_cutoff():
|
||||
"""
|
||||
test whether score_cutoff works correctly
|
||||
"""
|
||||
assert Hamming.distance("South Korea", "North Korea") == 2
|
||||
assert Hamming.distance("South Korea", "North Korea", score_cutoff=4) == 2
|
||||
assert Hamming.distance("South Korea", "North Korea", score_cutoff=3) == 2
|
||||
assert Hamming.distance("South Korea", "North Korea", score_cutoff=2) == 2
|
||||
assert Hamming.distance("South Korea", "North Korea", score_cutoff=1) == 2
|
||||
assert Hamming.distance("South Korea", "North Korea", score_cutoff=0) == 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
Loading…
Reference in New Issue