release v2.10.0
This commit is contained in:
parent
2cb4ec71f1
commit
f69be93ca8
16
CHANGELOG.md
16
CHANGELOG.md
|
@ -1,5 +1,15 @@
|
|||
## Changelog
|
||||
|
||||
### [2.10.0] - 2022-09-18
|
||||
#### Added
|
||||
- add editops to hamming distance
|
||||
|
||||
#### Performance
|
||||
- strip common affix in osa distance
|
||||
|
||||
#### Fixed
|
||||
- ignore missing pandas in Python3.11 tests
|
||||
|
||||
### [2.9.0] - 2022-09-16
|
||||
#### Added
|
||||
- add optimal string alignment (OSA)
|
||||
|
@ -151,7 +161,7 @@
|
|||
#### Changed
|
||||
- add tests to sdist
|
||||
- remove cython dependency for sdist
|
||||
|
||||
|
||||
### [2.0.11] - 2022-04-23
|
||||
#### Changed
|
||||
- relax version requirements of dependencies to simplify packaging
|
||||
|
@ -538,7 +548,7 @@ The old algorithm is used again until this bug is fixed.
|
|||
|
||||
### [0.11.2] - 2020-09-12
|
||||
#### Added
|
||||
- added rapidfuzz.\_\_author\_\_, rapidfuzz.\_\_license\_\_ and rapidfuzz.\_\_version\_\_
|
||||
- added rapidfuzz.\_\_author\_\_, rapidfuzz.\_\_license\_\_ and rapidfuzz.\_\_version\_\_
|
||||
|
||||
### [0.11.1] - 2020-09-01
|
||||
#### Fixed
|
||||
|
@ -546,7 +556,7 @@ The old algorithm is used again until this bug is fixed.
|
|||
|
||||
### [0.11.0] - 2020-08-22
|
||||
#### Changed
|
||||
- support for python 2.7 added #40
|
||||
- support for python 2.7 added #40
|
||||
- add wheels for python2.7 (both pypy and cpython) on MacOS and Linux
|
||||
|
||||
### [0.10.0] - 2020-08-17
|
||||
|
|
|
@ -34,7 +34,7 @@ else()
|
|||
add_library(Taskflow::Taskflow ALIAS Taskflow)
|
||||
endif()
|
||||
|
||||
find_package(rapidfuzz 1.5.0 QUIET)
|
||||
find_package(rapidfuzz 1.7.0 QUIET)
|
||||
if (rapidfuzz_FOUND)
|
||||
message("Using system supplied version of rapidfuzz-cpp")
|
||||
else()
|
||||
|
|
|
@ -22,7 +22,7 @@ copyright = '2021, Max Bachmann'
|
|||
author = 'Max Bachmann'
|
||||
|
||||
# The full version, including alpha/beta/rc tags
|
||||
release = '2.9.0'
|
||||
release = '2.10.0'
|
||||
|
||||
|
||||
# -- General configuration ---------------------------------------------------
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 06c582124a33f4642132137c621af8abad3dea0e
|
||||
Subproject commit 75e10756124a2805752d8f4713918466cd67902f
|
2
setup.py
2
setup.py
|
@ -11,7 +11,7 @@ with open('README.md', 'rt', encoding="utf8") as f:
|
|||
|
||||
setup_args = {
|
||||
"name": "rapidfuzz",
|
||||
"version": "2.9.0",
|
||||
"version": "2.10.0",
|
||||
"install_requires": ["jarowinkler >= 1.2.0, < 2.0.0"],
|
||||
"extras_require": {'full': ['numpy']},
|
||||
"url": "https://github.com/maxbachmann/RapidFuzz",
|
||||
|
|
|
@ -3,6 +3,6 @@ rapid string matching library
|
|||
"""
|
||||
__author__: str = "Max Bachmann"
|
||||
__license__: str = "MIT"
|
||||
__version__: str = "2.9.0"
|
||||
__version__: str = "2.10.0"
|
||||
|
||||
from rapidfuzz import process, distance, fuzz, string_metric, utils
|
||||
|
|
|
@ -14,6 +14,8 @@ distance = _fallback_import(_mod, "distance")
|
|||
similarity = _fallback_import(_mod, "similarity")
|
||||
normalized_distance = _fallback_import(_mod, "normalized_distance")
|
||||
normalized_similarity = _fallback_import(_mod, "normalized_similarity")
|
||||
editops = _fallback_import(_mod, "editops")
|
||||
opcodes = _fallback_import(_mod, "opcodes")
|
||||
|
||||
distance._RF_ScorerPy = _dist_attr
|
||||
similarity._RF_ScorerPy = _sim_attr
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
from typing import Callable, Hashable, Sequence, Optional, TypeVar
|
||||
from rapidfuzz.distance import Editops, Opcodes
|
||||
|
||||
_StringType = Sequence[Hashable]
|
||||
_S1 = TypeVar("_S1")
|
||||
|
@ -32,3 +33,15 @@ def normalized_similarity(
|
|||
processor: Optional[Callable[..., _StringType]] = None,
|
||||
score_cutoff: Optional[float] = 0
|
||||
) -> float: ...
|
||||
def editops(
|
||||
s1: _S1,
|
||||
s2: _S2,
|
||||
*,
|
||||
processor: Optional[Callable[..., _StringType]] = None
|
||||
) -> Editops: ...
|
||||
def opcodes(
|
||||
s1: _S1,
|
||||
s2: _S2,
|
||||
*,
|
||||
processor: Optional[Callable[..., _StringType]] = None
|
||||
) -> Opcodes: ...
|
||||
|
|
|
@ -6,4 +6,6 @@ from rapidfuzz.distance.metrics_cpp import (
|
|||
hamming_similarity as similarity,
|
||||
hamming_normalized_distance as normalized_distance,
|
||||
hamming_normalized_similarity as normalized_similarity,
|
||||
hamming_editops as editops,
|
||||
hamming_opcodes as opcodes
|
||||
)
|
||||
|
|
|
@ -155,3 +155,46 @@ def normalized_similarity(s1, s2, *, processor=None, score_cutoff=None):
|
|||
norm_sim = 1 - norm_dist
|
||||
|
||||
return norm_sim if (score_cutoff is None or norm_dist >= score_cutoff) else 0.0
|
||||
|
||||
def editops(s1, s2, *, processor=None):
|
||||
"""
|
||||
Return Editops describing how to turn s1 into s2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s1 : Sequence[Hashable]
|
||||
First string to compare.
|
||||
s2 : Sequence[Hashable]
|
||||
Second string to compare.
|
||||
processor: callable, optional
|
||||
Optional callable that is used to preprocess the strings before
|
||||
comparing them. Default is None, which deactivates this behaviour.
|
||||
|
||||
Returns
|
||||
-------
|
||||
editops : Editops
|
||||
edit operations required to turn s1 into s2
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def opcodes(s1, s2, *, processor=None):
|
||||
"""
|
||||
Return Opcodes describing how to turn s1 into s2.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
s1 : Sequence[Hashable]
|
||||
First string to compare.
|
||||
s2 : Sequence[Hashable]
|
||||
Second string to compare.
|
||||
processor: callable, optional
|
||||
Optional callable that is used to preprocess the strings before
|
||||
comparing them. Default is None, which deactivates this behaviour.
|
||||
|
||||
Returns
|
||||
-------
|
||||
opcodes : Opcodes
|
||||
edit operations required to turn s1 into s2
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from typing import Callable, Hashable, Sequence, Optional, TypeVar, Any, Dict
|
||||
from typing_extensions import Protocol
|
||||
from rapidfuzz.distance import Editops, Opcodes
|
||||
|
||||
class _ScorerAttributes(Protocol):
|
||||
_RF_ScorerPy: Dict
|
||||
|
@ -43,3 +44,15 @@ def normalized_similarity(
|
|||
processor: Optional[Callable[..., _StringType]] = None,
|
||||
score_cutoff: Optional[float] = 0
|
||||
) -> float: ...
|
||||
def editops(
|
||||
s1: _S1,
|
||||
s2: _S2,
|
||||
*,
|
||||
processor: Optional[Callable[..., _StringType]] = None
|
||||
) -> Editops: ...
|
||||
def opcodes(
|
||||
s1: _S1,
|
||||
s2: _S2,
|
||||
*,
|
||||
processor: Optional[Callable[..., _StringType]] = None
|
||||
) -> Opcodes: ...
|
||||
|
|
|
@ -278,6 +278,13 @@ static inline bool LCSseqNormalizedSimilarityInit(RF_ScorerFunc* self, const RF_
|
|||
return normalized_similarity_init<rapidfuzz::CachedLCSseq, double>(self, str_count, str);
|
||||
}
|
||||
|
||||
static inline rapidfuzz::Editops hamming_editops_func(const RF_String& str1, const RF_String& str2)
|
||||
{
|
||||
return visitor(str1, str2, [&](auto s1, auto s2) {
|
||||
return rapidfuzz::hamming_editops(s1, s2);
|
||||
});
|
||||
}
|
||||
|
||||
static inline rapidfuzz::Editops levenshtein_editops_func(const RF_String& str1, const RF_String& str2,
|
||||
int64_t score_hint)
|
||||
{
|
||||
|
|
|
@ -147,6 +147,18 @@ def hamming_normalized_similarity(
|
|||
processor: Optional[Callable[..., _StringType]] = None,
|
||||
score_cutoff: Optional[float] = 0
|
||||
) -> float: ...
|
||||
def hamming_editops(
|
||||
s1: _S1,
|
||||
s2: _S2,
|
||||
*,
|
||||
processor: Optional[Callable[..., _StringType]] = None
|
||||
) -> Editops: ...
|
||||
def hamming_opcodes(
|
||||
s1: _S1,
|
||||
s2: _S2,
|
||||
*,
|
||||
processor: Optional[Callable[..., _StringType]] = None
|
||||
) -> Opcodes: ...
|
||||
def damerau_levenshtein_distance(
|
||||
s1: _S1,
|
||||
s2: _S2,
|
||||
|
@ -203,4 +215,3 @@ def osa_normalized_similarity(
|
|||
processor: Optional[Callable[..., _StringType]] = None,
|
||||
score_cutoff: Optional[float] = 0
|
||||
) -> float: ...
|
||||
|
||||
|
|
|
@ -86,6 +86,8 @@ cdef extern from "metrics.hpp":
|
|||
bool HammingSimilarityInit( RF_ScorerFunc*, const RF_Kwargs*, int64_t, const RF_String*) nogil except False
|
||||
bool HammingNormalizedSimilarityInit(RF_ScorerFunc*, const RF_Kwargs*, int64_t, const RF_String*) nogil except False
|
||||
|
||||
RfEditops hamming_editops_func(const RF_String&, const RF_String&) nogil except +
|
||||
|
||||
# Damerau Levenshtein
|
||||
double osa_normalized_distance_func( const RF_String&, const RF_String&, double) nogil except +
|
||||
int64_t osa_distance_func( const RF_String&, const RF_String&, int64_t) nogil except +
|
||||
|
@ -563,6 +565,24 @@ def hamming_normalized_similarity(s1, s2, *, processor=None, score_cutoff=None):
|
|||
return hamming_normalized_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff)
|
||||
|
||||
|
||||
def hamming_editops(s1, s2, *, processor=None):
|
||||
cdef RF_StringWrapper s1_proc, s2_proc
|
||||
cdef Editops ops = Editops.__new__(Editops)
|
||||
|
||||
preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc, None)
|
||||
ops.editops = hamming_editops_func(s1_proc.string, s2_proc.string)
|
||||
return ops
|
||||
|
||||
|
||||
def hamming_opcodes(s1, s2, *, processor=None):
|
||||
cdef RF_StringWrapper s1_proc, s2_proc
|
||||
cdef Editops ops = Editops.__new__(Editops)
|
||||
|
||||
preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc, None)
|
||||
ops.editops = hamming_editops_func(s1_proc.string, s2_proc.string)
|
||||
return ops.as_opcodes()
|
||||
|
||||
|
||||
cdef bool GetScorerFlagsHammingDistance(const RF_Kwargs* self, RF_ScorerFlags* scorer_flags) nogil except False:
|
||||
dereference(scorer_flags).flags = RF_SCORER_FLAG_RESULT_I64 | RF_SCORER_FLAG_SYMMETRIC
|
||||
dereference(scorer_flags).optimal_score.i64 = 0
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <cwctype>
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
|
||||
uint32_t UnicodeDefaultProcess(uint32_t ch);
|
||||
|
||||
|
|
Loading…
Reference in New Issue