Fix incorrect Indel distance for score_cutoff=1 (#196)

When using score_cutoff=1 the Indel distance returned an incorrect
result when the two strings are not similar.
This commit is contained in:
Max Bachmann 2022-02-18 09:02:01 +01:00
parent bf8dd83f2f
commit b7cc7e2e29
4 changed files with 30 additions and 2 deletions

View File

@ -1,12 +1,21 @@
## Changelog
### [2.0.3] - 2022-02-18
####
- Added fuzz.partial_ratio_alignment, which returns the result of fuzz.partial_ratio
combined with the alignment this result stems from
#### Fixed
- Fix Indel distance returning incorrect result when using score_cutoff=1, when the strings
are not equal. This affected other scorers like fuzz.WRatio, which use the Indel distance
as well.
### [2.0.2] - 2022-02-12
#### Fixed
- fix type hints
- Add back transpiled cython files to the sdist to simplify builds in package builders
like FreeBSD port build or conda-forge
### [2.0.1] - 2022-02-11
#### Fixed
- fix type hints

@ -1 +1 @@
Subproject commit 4f0b73d6bbd7eb46647c43bff961de400ffc07b1
Subproject commit c44c5dc82b15abb65aa20c2c3f23ac52cdd12432

View File

@ -20,6 +20,17 @@ def test_different_strings():
assert Indel.normalized_distance("aaaa", "bbbb") == 1.0
assert Indel.normalized_similarity("aaaa", "bbbb") == 0.0
def testIssue196():
"""
Indel distance did not work correctly for score_cutoff=1
"""
assert Indel.distance('South Korea', 'North Korea') == 4
assert Indel.distance('South Korea', 'North Korea', score_cutoff=4) == 4
assert Indel.distance('South Korea', 'North Korea', score_cutoff=3) == 4
assert Indel.distance('South Korea', 'North Korea', score_cutoff=2) == 3
assert Indel.distance('South Korea', 'North Korea', score_cutoff=1) == 2
assert Indel.distance('South Korea', 'North Korea', score_cutoff=0) == 1
if __name__ == '__main__':
unittest.main()

View File

@ -107,6 +107,14 @@ class RatioTest(unittest.TestCase):
ScoreAlignment(100, 2, 2 + len(s), 0, len(s))
)
def testIssue196(self):
"""
fuzz.WRatio did not work correctly with score_cutoffs
"""
self.assertAlmostEqual(fuzz.WRatio('South Korea', 'North Korea'), 81.81818, places=4)
assert fuzz.WRatio('South Korea', 'North Korea', score_cutoff=85.4) == 0.0
assert fuzz.WRatio('South Korea', 'North Korea', score_cutoff=85.5) == 0.0
def test_empty_string():
"""
when both strings are empty this is either a perfect match or no match