release v2.10.0

This commit is contained in:
Max Bachmann 2022-09-18 16:17:29 +02:00
parent 2cb4ec71f1
commit f69be93ca8
15 changed files with 131 additions and 9 deletions

View File

@ -1,5 +1,15 @@
## Changelog
### [2.10.0] - 2022-09-18
#### Added
- add editops to hamming distance
#### Performance
- strip common affix in osa distance
#### Fixed
- ignore missing pandas in Python3.11 tests
### [2.9.0] - 2022-09-16
#### Added
- add optimal string alignment (OSA)
@ -151,7 +161,7 @@
#### Changed
- add tests to sdist
- remove cython dependency for sdist
### [2.0.11] - 2022-04-23
#### Changed
- relax version requirements of dependencies to simplify packaging
@ -538,7 +548,7 @@ The old algorithm is used again until this bug is fixed.
### [0.11.2] - 2020-09-12
#### Added
- added rapidfuzz.\_\_author\_\_, rapidfuzz.\_\_license\_\_ and rapidfuzz.\_\_version\_\_
- added rapidfuzz.\_\_author\_\_, rapidfuzz.\_\_license\_\_ and rapidfuzz.\_\_version\_\_
### [0.11.1] - 2020-09-01
#### Fixed
@ -546,7 +556,7 @@ The old algorithm is used again until this bug is fixed.
### [0.11.0] - 2020-08-22
#### Changed
- support for python 2.7 added #40
- support for python 2.7 added #40
- add wheels for python2.7 (both pypy and cpython) on MacOS and Linux
### [0.10.0] - 2020-08-17

View File

@ -34,7 +34,7 @@ else()
add_library(Taskflow::Taskflow ALIAS Taskflow)
endif()
find_package(rapidfuzz 1.5.0 QUIET)
find_package(rapidfuzz 1.7.0 QUIET)
if (rapidfuzz_FOUND)
message("Using system supplied version of rapidfuzz-cpp")
else()

View File

@ -22,7 +22,7 @@ copyright = '2021, Max Bachmann'
author = 'Max Bachmann'
# The full version, including alpha/beta/rc tags
release = '2.9.0'
release = '2.10.0'
# -- General configuration ---------------------------------------------------

@ -1 +1 @@
Subproject commit 06c582124a33f4642132137c621af8abad3dea0e
Subproject commit 75e10756124a2805752d8f4713918466cd67902f

View File

@ -11,7 +11,7 @@ with open('README.md', 'rt', encoding="utf8") as f:
setup_args = {
"name": "rapidfuzz",
"version": "2.9.0",
"version": "2.10.0",
"install_requires": ["jarowinkler >= 1.2.0, < 2.0.0"],
"extras_require": {'full': ['numpy']},
"url": "https://github.com/maxbachmann/RapidFuzz",

View File

@ -3,6 +3,6 @@ rapid string matching library
"""
__author__: str = "Max Bachmann"
__license__: str = "MIT"
__version__: str = "2.9.0"
__version__: str = "2.10.0"
from rapidfuzz import process, distance, fuzz, string_metric, utils

View File

@ -14,6 +14,8 @@ distance = _fallback_import(_mod, "distance")
similarity = _fallback_import(_mod, "similarity")
normalized_distance = _fallback_import(_mod, "normalized_distance")
normalized_similarity = _fallback_import(_mod, "normalized_similarity")
editops = _fallback_import(_mod, "editops")
opcodes = _fallback_import(_mod, "opcodes")
distance._RF_ScorerPy = _dist_attr
similarity._RF_ScorerPy = _sim_attr

View File

@ -1,4 +1,5 @@
from typing import Callable, Hashable, Sequence, Optional, TypeVar
from rapidfuzz.distance import Editops, Opcodes
_StringType = Sequence[Hashable]
_S1 = TypeVar("_S1")
@ -32,3 +33,15 @@ def normalized_similarity(
processor: Optional[Callable[..., _StringType]] = None,
score_cutoff: Optional[float] = 0
) -> float: ...
def editops(
s1: _S1,
s2: _S2,
*,
processor: Optional[Callable[..., _StringType]] = None
) -> Editops: ...
def opcodes(
s1: _S1,
s2: _S2,
*,
processor: Optional[Callable[..., _StringType]] = None
) -> Opcodes: ...

View File

@ -6,4 +6,6 @@ from rapidfuzz.distance.metrics_cpp import (
hamming_similarity as similarity,
hamming_normalized_distance as normalized_distance,
hamming_normalized_similarity as normalized_similarity,
hamming_editops as editops,
hamming_opcodes as opcodes
)

View File

@ -155,3 +155,46 @@ def normalized_similarity(s1, s2, *, processor=None, score_cutoff=None):
norm_sim = 1 - norm_dist
return norm_sim if (score_cutoff is None or norm_dist >= score_cutoff) else 0.0
def editops(s1, s2, *, processor=None):
"""
Return Editops describing how to turn s1 into s2.
Parameters
----------
s1 : Sequence[Hashable]
First string to compare.
s2 : Sequence[Hashable]
Second string to compare.
processor: callable, optional
Optional callable that is used to preprocess the strings before
comparing them. Default is None, which deactivates this behaviour.
Returns
-------
editops : Editops
edit operations required to turn s1 into s2
"""
raise NotImplementedError
def opcodes(s1, s2, *, processor=None):
"""
Return Opcodes describing how to turn s1 into s2.
Parameters
----------
s1 : Sequence[Hashable]
First string to compare.
s2 : Sequence[Hashable]
Second string to compare.
processor: callable, optional
Optional callable that is used to preprocess the strings before
comparing them. Default is None, which deactivates this behaviour.
Returns
-------
opcodes : Opcodes
edit operations required to turn s1 into s2
"""
raise NotImplementedError

View File

@ -1,5 +1,6 @@
from typing import Callable, Hashable, Sequence, Optional, TypeVar, Any, Dict
from typing_extensions import Protocol
from rapidfuzz.distance import Editops, Opcodes
class _ScorerAttributes(Protocol):
_RF_ScorerPy: Dict
@ -43,3 +44,15 @@ def normalized_similarity(
processor: Optional[Callable[..., _StringType]] = None,
score_cutoff: Optional[float] = 0
) -> float: ...
def editops(
s1: _S1,
s2: _S2,
*,
processor: Optional[Callable[..., _StringType]] = None
) -> Editops: ...
def opcodes(
s1: _S1,
s2: _S2,
*,
processor: Optional[Callable[..., _StringType]] = None
) -> Opcodes: ...

View File

@ -278,6 +278,13 @@ static inline bool LCSseqNormalizedSimilarityInit(RF_ScorerFunc* self, const RF_
return normalized_similarity_init<rapidfuzz::CachedLCSseq, double>(self, str_count, str);
}
static inline rapidfuzz::Editops hamming_editops_func(const RF_String& str1, const RF_String& str2)
{
return visitor(str1, str2, [&](auto s1, auto s2) {
return rapidfuzz::hamming_editops(s1, s2);
});
}
static inline rapidfuzz::Editops levenshtein_editops_func(const RF_String& str1, const RF_String& str2,
int64_t score_hint)
{

View File

@ -147,6 +147,18 @@ def hamming_normalized_similarity(
processor: Optional[Callable[..., _StringType]] = None,
score_cutoff: Optional[float] = 0
) -> float: ...
def hamming_editops(
s1: _S1,
s2: _S2,
*,
processor: Optional[Callable[..., _StringType]] = None
) -> Editops: ...
def hamming_opcodes(
s1: _S1,
s2: _S2,
*,
processor: Optional[Callable[..., _StringType]] = None
) -> Opcodes: ...
def damerau_levenshtein_distance(
s1: _S1,
s2: _S2,
@ -203,4 +215,3 @@ def osa_normalized_similarity(
processor: Optional[Callable[..., _StringType]] = None,
score_cutoff: Optional[float] = 0
) -> float: ...

View File

@ -86,6 +86,8 @@ cdef extern from "metrics.hpp":
bool HammingSimilarityInit( RF_ScorerFunc*, const RF_Kwargs*, int64_t, const RF_String*) nogil except False
bool HammingNormalizedSimilarityInit(RF_ScorerFunc*, const RF_Kwargs*, int64_t, const RF_String*) nogil except False
RfEditops hamming_editops_func(const RF_String&, const RF_String&) nogil except +
# Damerau Levenshtein
double osa_normalized_distance_func( const RF_String&, const RF_String&, double) nogil except +
int64_t osa_distance_func( const RF_String&, const RF_String&, int64_t) nogil except +
@ -563,6 +565,24 @@ def hamming_normalized_similarity(s1, s2, *, processor=None, score_cutoff=None):
return hamming_normalized_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff)
def hamming_editops(s1, s2, *, processor=None):
cdef RF_StringWrapper s1_proc, s2_proc
cdef Editops ops = Editops.__new__(Editops)
preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc, None)
ops.editops = hamming_editops_func(s1_proc.string, s2_proc.string)
return ops
def hamming_opcodes(s1, s2, *, processor=None):
cdef RF_StringWrapper s1_proc, s2_proc
cdef Editops ops = Editops.__new__(Editops)
preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc, None)
ops.editops = hamming_editops_func(s1_proc.string, s2_proc.string)
return ops.as_opcodes()
cdef bool GetScorerFlagsHammingDistance(const RF_Kwargs* self, RF_ScorerFlags* scorer_flags) nogil except False:
dereference(scorer_flags).flags = RF_SCORER_FLAG_RESULT_I64 | RF_SCORER_FLAG_SYMMETRIC
dereference(scorer_flags).optimal_score.i64 = 0

View File

@ -10,6 +10,7 @@
#include <cwctype>
#include <limits>
#include <stdint.h>
#include <string>
uint32_t UnicodeDefaultProcess(uint32_t ch);