release v2.10.0

2022-09-18 16:17:29 +02:00 · 2022-09-18 16:17:29 +02:00 · f69be93ca8
parent 2cb4ec71f1
commit f69be93ca8
15 changed files with 131 additions and 9 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,15 @@
 ## Changelog

+### [2.10.0] - 2022-09-18
+#### Added
+- add editops to hamming distance
+
+#### Performance
+- strip common affix in osa distance
+
+#### Fixed
+- ignore missing pandas in Python3.11 tests
+
 ### [2.9.0] - 2022-09-16
 #### Added
 - add optimal string alignment (OSA)
@ -151,7 +161,7 @@
 #### Changed
 - add tests to sdist
 - remove cython dependency for sdist
-  
+
 ### [2.0.11] - 2022-04-23
 #### Changed
 - relax version requirements of dependencies to simplify packaging
@ -538,7 +548,7 @@ The old algorithm is used again until this bug is fixed.

 ### [0.11.2] - 2020-09-12
 #### Added
- added rapidfuzz.\_\_author\_\_, rapidfuzz.\_\_license\_\_ and rapidfuzz.\_\_version\_\_ 
+- added rapidfuzz.\_\_author\_\_, rapidfuzz.\_\_license\_\_ and rapidfuzz.\_\_version\_\_

 ### [0.11.1] - 2020-09-01
 #### Fixed
@ -546,7 +556,7 @@ The old algorithm is used again until this bug is fixed.

 ### [0.11.0] - 2020-08-22
 #### Changed
- support for python 2.7 added #40 
+- support for python 2.7 added #40
 - add wheels for python2.7 (both pypy and cpython) on MacOS and Linux

 ### [0.10.0] - 2020-08-17
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -34,7 +34,7 @@ else()
    add_library(Taskflow::Taskflow ALIAS Taskflow)
 endif()

-find_package(rapidfuzz 1.5.0 QUIET)
+find_package(rapidfuzz 1.7.0 QUIET)
 if (rapidfuzz_FOUND)
    message("Using system supplied version of rapidfuzz-cpp")
 else()
--- a/docs/conf.py
+++ b/docs/conf.py
@ -22,7 +22,7 @@ copyright = '2021, Max Bachmann'
 author = 'Max Bachmann'

 # The full version, including alpha/beta/rc tags
-release = '2.9.0'
+release = '2.10.0'


 # -- General configuration ---------------------------------------------------
--- a/extern/rapidfuzz-cpp
+++ b/extern/rapidfuzz-cpp
@ -1 +1 @@
-Subproject commit 06c582124a33f4642132137c621af8abad3dea0e
+Subproject commit 75e10756124a2805752d8f4713918466cd67902f
--- a/setup.py
+++ b/setup.py
@ -11,7 +11,7 @@ with open('README.md', 'rt', encoding="utf8") as f:

 setup_args = {
    "name": "rapidfuzz",
-    "version": "2.9.0",
+    "version": "2.10.0",
    "install_requires": ["jarowinkler >= 1.2.0, < 2.0.0"],
    "extras_require": {'full': ['numpy']},
    "url": "https://github.com/maxbachmann/RapidFuzz",
--- a/src/rapidfuzz/init.py
+++ b/src/rapidfuzz/init.py
@ -3,6 +3,6 @@ rapid string matching library
 """
 __author__: str = "Max Bachmann"
 __license__: str = "MIT"
-__version__: str = "2.9.0"
+__version__: str = "2.10.0"

 from rapidfuzz import process, distance, fuzz, string_metric, utils
--- a/src/rapidfuzz/distance/Hamming.py
+++ b/src/rapidfuzz/distance/Hamming.py
@ -14,6 +14,8 @@ distance = _fallback_import(_mod, "distance")
 similarity = _fallback_import(_mod, "similarity")
 normalized_distance = _fallback_import(_mod, "normalized_distance")
 normalized_similarity = _fallback_import(_mod, "normalized_similarity")
+editops = _fallback_import(_mod, "editops")
+opcodes = _fallback_import(_mod, "opcodes")

 distance._RF_ScorerPy = _dist_attr
 similarity._RF_ScorerPy = _sim_attr
--- a/src/rapidfuzz/distance/Hamming.pyi
+++ b/src/rapidfuzz/distance/Hamming.pyi
@ -1,4 +1,5 @@
 from typing import Callable, Hashable, Sequence, Optional, TypeVar
+from rapidfuzz.distance import Editops, Opcodes

 _StringType = Sequence[Hashable]
 _S1 = TypeVar("_S1")
@ -32,3 +33,15 @@ def normalized_similarity(
    processor: Optional[Callable[..., _StringType]] = None,
    score_cutoff: Optional[float] = 0
 ) -> float: ...
+def editops(
+    s1: _S1,
+    s2: _S2,
+    *,
+    processor: Optional[Callable[..., _StringType]] = None
+) -> Editops: ...
+def opcodes(
+    s1: _S1,
+    s2: _S2,
+    *,
+    processor: Optional[Callable[..., _StringType]] = None
+) -> Opcodes: ...
--- a/src/rapidfuzz/distance/Hamming_cpp.py
+++ b/src/rapidfuzz/distance/Hamming_cpp.py
@ -6,4 +6,6 @@ from rapidfuzz.distance.metrics_cpp import (
    hamming_similarity as similarity,
    hamming_normalized_distance as normalized_distance,
    hamming_normalized_similarity as normalized_similarity,
+    hamming_editops as editops,
+    hamming_opcodes as opcodes
 )
--- a/src/rapidfuzz/distance/Hamming_py.py
+++ b/src/rapidfuzz/distance/Hamming_py.py
@ -155,3 +155,46 @@ def normalized_similarity(s1, s2, *, processor=None, score_cutoff=None):
    norm_sim = 1 - norm_dist

    return norm_sim if (score_cutoff is None or norm_dist >= score_cutoff) else 0.0
+
+def editops(s1, s2, *, processor=None):
+    """
+    Return Editops describing how to turn s1 into s2.
+
+    Parameters
+    ----------
+    s1 : Sequence[Hashable]
+        First string to compare.
+    s2 : Sequence[Hashable]
+        Second string to compare.
+    processor: callable, optional
+        Optional callable that is used to preprocess the strings before
+        comparing them. Default is None, which deactivates this behaviour.
+
+    Returns
+    -------
+    editops : Editops
+        edit operations required to turn s1 into s2
+    """
+    raise NotImplementedError
+
+
+def opcodes(s1, s2, *, processor=None):
+    """
+    Return Opcodes describing how to turn s1 into s2.
+
+    Parameters
+    ----------
+    s1 : Sequence[Hashable]
+        First string to compare.
+    s2 : Sequence[Hashable]
+        Second string to compare.
+    processor: callable, optional
+        Optional callable that is used to preprocess the strings before
+        comparing them. Default is None, which deactivates this behaviour.
+
+    Returns
+    -------
+    opcodes : Opcodes
+        edit operations required to turn s1 into s2
+    """
+    raise NotImplementedError
--- a/src/rapidfuzz/distance/Hamming_py.pyi
+++ b/src/rapidfuzz/distance/Hamming_py.pyi
@ -1,5 +1,6 @@
 from typing import Callable, Hashable, Sequence, Optional, TypeVar, Any, Dict
 from typing_extensions import Protocol
+from rapidfuzz.distance import Editops, Opcodes

 class _ScorerAttributes(Protocol):
    _RF_ScorerPy: Dict
@ -43,3 +44,15 @@ def normalized_similarity(
    processor: Optional[Callable[..., _StringType]] = None,
    score_cutoff: Optional[float] = 0
 ) -> float: ...
+def editops(
+    s1: _S1,
+    s2: _S2,
+    *,
+    processor: Optional[Callable[..., _StringType]] = None
+) -> Editops: ...
+def opcodes(
+    s1: _S1,
+    s2: _S2,
+    *,
+    processor: Optional[Callable[..., _StringType]] = None
+) -> Opcodes: ...
--- a/src/rapidfuzz/distance/metrics.hpp
+++ b/src/rapidfuzz/distance/metrics.hpp
@ -278,6 +278,13 @@ static inline bool LCSseqNormalizedSimilarityInit(RF_ScorerFunc* self, const RF_
    return normalized_similarity_init<rapidfuzz::CachedLCSseq, double>(self, str_count, str);
 }

+static inline rapidfuzz::Editops hamming_editops_func(const RF_String& str1, const RF_String& str2)
+{
+    return visitor(str1, str2, [&](auto s1, auto s2) {
+        return rapidfuzz::hamming_editops(s1, s2);
+    });
+}
+
 static inline rapidfuzz::Editops levenshtein_editops_func(const RF_String& str1, const RF_String& str2,
                                                          int64_t score_hint)
 {
--- a/src/rapidfuzz/distance/metrics_cpp.pyi
+++ b/src/rapidfuzz/distance/metrics_cpp.pyi
@ -147,6 +147,18 @@ def hamming_normalized_similarity(
    processor: Optional[Callable[..., _StringType]] = None,
    score_cutoff: Optional[float] = 0
 ) -> float: ...
+def hamming_editops(
+    s1: _S1,
+    s2: _S2,
+    *,
+    processor: Optional[Callable[..., _StringType]] = None
+) -> Editops: ...
+def hamming_opcodes(
+    s1: _S1,
+    s2: _S2,
+    *,
+    processor: Optional[Callable[..., _StringType]] = None
+) -> Opcodes: ...
 def damerau_levenshtein_distance(
    s1: _S1,
    s2: _S2,
@ -203,4 +215,3 @@ def osa_normalized_similarity(
    processor: Optional[Callable[..., _StringType]] = None,
    score_cutoff: Optional[float] = 0
 ) -> float: ...
-
--- a/src/rapidfuzz/distance/metrics_cpp.pyx
+++ b/src/rapidfuzz/distance/metrics_cpp.pyx
@ -86,6 +86,8 @@ cdef extern from "metrics.hpp":
    bool HammingSimilarityInit(          RF_ScorerFunc*, const RF_Kwargs*, int64_t, const RF_String*) nogil except False
    bool HammingNormalizedSimilarityInit(RF_ScorerFunc*, const RF_Kwargs*, int64_t, const RF_String*) nogil except False

+    RfEditops hamming_editops_func(const RF_String&, const RF_String&) nogil except +
+
    # Damerau Levenshtein
    double osa_normalized_distance_func(  const RF_String&, const RF_String&, double) nogil except +
    int64_t osa_distance_func(            const RF_String&, const RF_String&, int64_t) nogil except +
@ -563,6 +565,24 @@ def hamming_normalized_similarity(s1, s2, *, processor=None, score_cutoff=None):
    return hamming_normalized_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff)


+def hamming_editops(s1, s2, *, processor=None):
+    cdef RF_StringWrapper s1_proc, s2_proc
+    cdef Editops ops = Editops.__new__(Editops)
+
+    preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc, None)
+    ops.editops = hamming_editops_func(s1_proc.string, s2_proc.string)
+    return ops
+
+
+def hamming_opcodes(s1, s2, *, processor=None):
+    cdef RF_StringWrapper s1_proc, s2_proc
+    cdef Editops ops = Editops.__new__(Editops)
+
+    preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc, None)
+    ops.editops = hamming_editops_func(s1_proc.string, s2_proc.string)
+    return ops.as_opcodes()
+
+
 cdef bool GetScorerFlagsHammingDistance(const RF_Kwargs* self, RF_ScorerFlags* scorer_flags) nogil except False:
    dereference(scorer_flags).flags = RF_SCORER_FLAG_RESULT_I64 | RF_SCORER_FLAG_SYMMETRIC
    dereference(scorer_flags).optimal_score.i64 = 0
--- a/src/rapidfuzz/utils.hpp
+++ b/src/rapidfuzz/utils.hpp
@ -10,6 +10,7 @@
 #include <cwctype>
 #include <limits>
 #include <stdint.h>
+#include <string>

 uint32_t UnicodeDefaultProcess(uint32_t ch);