test preprocessing function with distance scorers
This commit is contained in:
parent
41dfce987a
commit
cc1e9a11ea
|
@ -1,3 +1 @@
|
|||
"C'est la vie",
|
||||
"c est la vie",
|
||||
>>> s2 = "cetain"
|
||||
|
|
|
@ -111,7 +111,7 @@ repos:
|
|||
rev: "v2.2.4"
|
||||
hooks:
|
||||
- id: codespell
|
||||
exclude: ".supp$"
|
||||
exclude: ".*/test_.*.py"
|
||||
args: ["-x", ".codespell-ignore-lines"]
|
||||
|
||||
# Check for common shell mistakes
|
||||
|
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
|||
|
||||
import pytest
|
||||
|
||||
from rapidfuzz import utils_cpp, utils_py
|
||||
from tests.distance.common import DamerauLevenshtein
|
||||
|
||||
|
||||
|
@ -28,3 +29,8 @@ from tests.distance.common import DamerauLevenshtein
|
|||
)
|
||||
def test_distance(left, right, distance):
|
||||
assert DamerauLevenshtein.distance(left, right) == distance
|
||||
|
||||
|
||||
def testCaseInsensitive():
|
||||
assert DamerauLevenshtein.distance("new york mets", "new YORK mets", processor=utils_cpp.default_process) == 0
|
||||
assert DamerauLevenshtein.distance("new york mets", "new YORK mets", processor=utils_py.default_process) == 0
|
||||
|
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
|||
|
||||
import pytest
|
||||
|
||||
from rapidfuzz import utils_cpp, utils_py
|
||||
from rapidfuzz.distance import metrics_cpp, metrics_py
|
||||
from tests.distance.common import Hamming
|
||||
|
||||
|
@ -45,6 +46,12 @@ def test_disable_padding():
|
|||
with pytest.raises(ValueError, match="Sequences are not the same length."):
|
||||
Hamming.distance("aaaa", "aaaaa", pad=False)
|
||||
|
||||
with pytest.raises(ValueError, match="Sequences are not the same length."):
|
||||
metrics_cpp.hamming_editops("aaaa", "aaaaa", pad=False)
|
||||
|
||||
with pytest.raises(ValueError, match="Sequences are not the same length."):
|
||||
metrics_py.hamming_editops("aaaa", "aaaaa", pad=False)
|
||||
|
||||
|
||||
def test_score_cutoff():
|
||||
"""
|
||||
|
@ -80,3 +87,8 @@ def test_Editops():
|
|||
ops = hamming_editops("aaabaaa", "abbaaabba")
|
||||
assert ops.src_len == 7
|
||||
assert ops.dest_len == 9
|
||||
|
||||
|
||||
def testCaseInsensitive():
|
||||
assert Hamming.distance("new york mets", "new YORK mets", processor=utils_cpp.default_process) == 0
|
||||
assert Hamming.distance("new york mets", "new YORK mets", processor=utils_py.default_process) == 0
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from rapidfuzz import utils_cpp, utils_py
|
||||
from tests.distance.common import Indel
|
||||
|
||||
|
||||
|
@ -53,3 +54,8 @@ def test_Editops():
|
|||
ops = Indel.editops("aaabaaa", "abbaaabba")
|
||||
assert ops.src_len == 7
|
||||
assert ops.dest_len == 9
|
||||
|
||||
|
||||
def testCaseInsensitive():
|
||||
assert Indel.distance("new york mets", "new YORK mets", processor=utils_cpp.default_process) == 0
|
||||
assert Indel.distance("new york mets", "new YORK mets", processor=utils_py.default_process) == 0
|
||||
|
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
|||
|
||||
import pytest
|
||||
|
||||
from rapidfuzz import utils_cpp, utils_py
|
||||
from tests.distance.common import Jaro
|
||||
|
||||
|
||||
|
@ -33,3 +34,8 @@ def test_edge_case_lengths():
|
|||
"00000000000000000000000000000000000000000000000000000000000000"
|
||||
)
|
||||
assert pytest.approx(Jaro.similarity(s2, s1)) == 0.8359375
|
||||
|
||||
|
||||
def testCaseInsensitive():
|
||||
assert pytest.approx(Jaro.similarity("new york mets", "new YORK mets", processor=utils_cpp.default_process)) == 1.0
|
||||
assert pytest.approx(Jaro.similarity("new york mets", "new YORK mets", processor=utils_py.default_process)) == 1.0
|
||||
|
|
|
@ -2,6 +2,7 @@ from __future__ import annotations
|
|||
|
||||
import pytest
|
||||
|
||||
from rapidfuzz import utils_cpp, utils_py
|
||||
from tests.distance.common import JaroWinkler
|
||||
|
||||
|
||||
|
@ -33,3 +34,14 @@ def test_edge_case_lengths():
|
|||
"00000000000000000000000000000000000000000000000000000000000000"
|
||||
)
|
||||
assert pytest.approx(JaroWinkler.similarity(s2, s1)) == 0.852344
|
||||
|
||||
|
||||
def testCaseInsensitive():
|
||||
assert (
|
||||
pytest.approx(JaroWinkler.similarity("new york mets", "new YORK mets", processor=utils_cpp.default_process))
|
||||
== 1.0
|
||||
)
|
||||
assert (
|
||||
pytest.approx(JaroWinkler.similarity("new york mets", "new YORK mets", processor=utils_py.default_process))
|
||||
== 1.0
|
||||
)
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from rapidfuzz import utils_cpp, utils_py
|
||||
from tests.distance.common import LCSseq
|
||||
|
||||
|
||||
|
@ -41,3 +42,8 @@ def test_Editops():
|
|||
ops = LCSseq.editops("aaabaaa", "abbaaabba")
|
||||
assert ops.src_len == 7
|
||||
assert ops.dest_len == 9
|
||||
|
||||
|
||||
def testCaseInsensitive():
|
||||
assert LCSseq.distance("new york mets", "new YORK mets", processor=utils_cpp.default_process) == 0
|
||||
assert LCSseq.distance("new york mets", "new YORK mets", processor=utils_py.default_process) == 0
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from rapidfuzz import utils_cpp, utils_py
|
||||
from tests.distance.common import Levenshtein
|
||||
|
||||
|
||||
|
@ -123,3 +124,8 @@ def test_mbleven():
|
|||
assert Levenshtein.distance("0", "101", score_cutoff=1) == 2
|
||||
assert Levenshtein.distance("0", "101", score_cutoff=2) == 2
|
||||
assert Levenshtein.distance("0", "101", score_cutoff=3) == 2
|
||||
|
||||
|
||||
def testCaseInsensitive():
|
||||
assert Levenshtein.distance("new york mets", "new YORK mets", processor=utils_cpp.default_process) == 0
|
||||
assert Levenshtein.distance("new york mets", "new YORK mets", processor=utils_py.default_process) == 0
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from rapidfuzz import utils_cpp, utils_py
|
||||
from rapidfuzz.distance import metrics_cpp
|
||||
from tests.distance.common import OSA
|
||||
|
||||
|
@ -73,3 +74,8 @@ def test_simple_unicode_tests():
|
|||
s2 = "ABCD"
|
||||
assert OSA.distance(s1, s2) == 4
|
||||
assert OSA.distance(s1, s1) == 0
|
||||
|
||||
|
||||
def testCaseInsensitive():
|
||||
assert OSA.distance("new york mets", "new YORK mets", processor=utils_cpp.default_process) == 0
|
||||
assert OSA.distance("new york mets", "new YORK mets", processor=utils_py.default_process) == 0
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from rapidfuzz import utils_cpp, utils_py
|
||||
from tests.distance.common import Postfix
|
||||
|
||||
|
||||
|
@ -19,3 +20,8 @@ def test_score_cutoff():
|
|||
assert Postfix.distance("abcd", "eebcd", score_cutoff=2) == 2
|
||||
assert Postfix.distance("abcd", "eebcd", score_cutoff=1) == 2
|
||||
assert Postfix.distance("abcd", "eebcd", score_cutoff=0) == 1
|
||||
|
||||
|
||||
def testCaseInsensitive():
|
||||
assert Postfix.distance("new york mets", "new YORK mets", processor=utils_cpp.default_process) == 0
|
||||
assert Postfix.distance("new york mets", "new YORK mets", processor=utils_py.default_process) == 0
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from rapidfuzz import utils_cpp, utils_py
|
||||
from tests.distance.common import Prefix
|
||||
|
||||
|
||||
|
@ -19,3 +20,8 @@ def test_score_cutoff():
|
|||
assert Prefix.distance("abcd", "abcee", score_cutoff=2) == 2
|
||||
assert Prefix.distance("abcd", "abcee", score_cutoff=1) == 2
|
||||
assert Prefix.distance("abcd", "abcee", score_cutoff=0) == 1
|
||||
|
||||
|
||||
def testCaseInsensitive():
|
||||
assert Prefix.distance("new york mets", "new YORK mets", processor=utils_cpp.default_process) == 0
|
||||
assert Prefix.distance("new york mets", "new YORK mets", processor=utils_py.default_process) == 0
|
||||
|
|
|
@ -308,7 +308,15 @@ def test_simple_unicode_tests(scorer):
|
|||
assert scorer(s1, s1) == 100
|
||||
|
||||
|
||||
@pytest.mark.parametrize("processor", [utils_cpp.default_process, lambda s: utils_cpp.default_process(s), utils_py.default_process, lambda s: utils_py.default_process(s)])
|
||||
@pytest.mark.parametrize(
|
||||
"processor",
|
||||
[
|
||||
utils_cpp.default_process,
|
||||
lambda s: utils_cpp.default_process(s),
|
||||
utils_py.default_process,
|
||||
lambda s: utils_py.default_process(s),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("scorer", scorers)
|
||||
def test_scorer_case_insensitive(processor, scorer):
|
||||
"""
|
||||
|
|
|
@ -332,6 +332,17 @@ def test_result_order():
|
|||
assert best[0][2] == 0
|
||||
|
||||
|
||||
def test_extract_limits():
|
||||
"""
|
||||
test process.extract with special limits
|
||||
"""
|
||||
bests = process.extract("test", ["tes", "tes"], limit=1, score_cutoff=100)
|
||||
assert bests == []
|
||||
|
||||
bests = process.extract("test", ["te", "test"], limit=None, scorer=Levenshtein.distance)
|
||||
assert bests == [("test", 0, 1), ("te", 2, 0)]
|
||||
|
||||
|
||||
def test_empty_strings():
|
||||
choices = [
|
||||
"",
|
||||
|
|
Loading…
Reference in New Issue