From b7cc7e2e29bf0aa770e5822d4e62500df6f8e00d Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Fri, 18 Feb 2022 09:02:01 +0100 Subject: [PATCH] Fix incorrect Indel distance for score_cutoff=1 (#196) When using score_cutoff=1 the Indel distance returned an incorrect result when the two strings are not similar. --- CHANGELOG.md | 11 ++++++++++- extern/rapidfuzz-cpp | 2 +- tests/distance/test_Indel.py | 11 +++++++++++ tests/test_fuzz.py | 8 ++++++++ 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ce7a8e..efe8a32 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,21 @@ ## Changelog +### [2.0.3] - 2022-02-18 +#### +- Added fuzz.partial_ratio_alignment, which returns the result of fuzz.partial_ratio + combined with the alignment this result stems from + +#### Fixed +- Fix Indel distance returning incorrect result when using score_cutoff=1, when the strings + are not equal. This affected other scorers like fuzz.WRatio, which use the Indel distance + as well. + ### [2.0.2] - 2022-02-12 #### Fixed - fix type hints - Add back transpiled cython files to the sdist to simplify builds in package builders like FreeBSD port build or conda-forge - ### [2.0.1] - 2022-02-11 #### Fixed - fix type hints diff --git a/extern/rapidfuzz-cpp b/extern/rapidfuzz-cpp index 4f0b73d..c44c5dc 160000 --- a/extern/rapidfuzz-cpp +++ b/extern/rapidfuzz-cpp @@ -1 +1 @@ -Subproject commit 4f0b73d6bbd7eb46647c43bff961de400ffc07b1 +Subproject commit c44c5dc82b15abb65aa20c2c3f23ac52cdd12432 diff --git a/tests/distance/test_Indel.py b/tests/distance/test_Indel.py index 11af578..2344a85 100644 --- a/tests/distance/test_Indel.py +++ b/tests/distance/test_Indel.py @@ -20,6 +20,17 @@ def test_different_strings(): assert Indel.normalized_distance("aaaa", "bbbb") == 1.0 assert Indel.normalized_similarity("aaaa", "bbbb") == 0.0 +def testIssue196(): + """ + Indel distance did not work correctly for score_cutoff=1 + """ + assert Indel.distance('South Korea', 'North Korea') == 4 + assert Indel.distance('South Korea', 'North Korea', score_cutoff=4) == 4 + assert Indel.distance('South Korea', 'North Korea', score_cutoff=3) == 4 + assert Indel.distance('South Korea', 'North Korea', score_cutoff=2) == 3 + assert Indel.distance('South Korea', 'North Korea', score_cutoff=1) == 2 + assert Indel.distance('South Korea', 'North Korea', score_cutoff=0) == 1 + if __name__ == '__main__': unittest.main() diff --git a/tests/test_fuzz.py b/tests/test_fuzz.py index fb04bbd..0830334 100644 --- a/tests/test_fuzz.py +++ b/tests/test_fuzz.py @@ -107,6 +107,14 @@ class RatioTest(unittest.TestCase): ScoreAlignment(100, 2, 2 + len(s), 0, len(s)) ) + def testIssue196(self): + """ + fuzz.WRatio did not work correctly with score_cutoffs + """ + self.assertAlmostEqual(fuzz.WRatio('South Korea', 'North Korea'), 81.81818, places=4) + assert fuzz.WRatio('South Korea', 'North Korea', score_cutoff=85.4) == 0.0 + assert fuzz.WRatio('South Korea', 'North Korea', score_cutoff=85.5) == 0.0 + def test_empty_string(): """ when both strings are empty this is either a perfect match or no match