add cdist implementation
This commit is contained in:
parent
1aed654d4f
commit
56f062b063
|
@ -101,7 +101,8 @@ jobs:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
python_tag: [ "pp36-*", "pp37-*"]
|
python_tag: [ "pp36-*", "pp37-*"]
|
||||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
# numpy ships no wheels for pypy on mac os
|
||||||
|
os: [ubuntu-latest, windows-latest]
|
||||||
env:
|
env:
|
||||||
CIBW_BUILD: ${{matrix.python_tag}}
|
CIBW_BUILD: ${{matrix.python_tag}}
|
||||||
# activate tests when the fix for
|
# activate tests when the fix for
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
process module
|
process module
|
||||||
==============
|
==============
|
||||||
|
|
||||||
|
cdist
|
||||||
|
----------
|
||||||
|
.. autofunction:: rapidfuzz.process.cdist
|
||||||
|
|
||||||
extract
|
extract
|
||||||
-------
|
-------
|
||||||
.. autofunction:: rapidfuzz.process.extract
|
.. autofunction:: rapidfuzz.process.extract
|
||||||
|
|
|
@ -2,5 +2,6 @@
|
||||||
requires = [
|
requires = [
|
||||||
"setuptools",
|
"setuptools",
|
||||||
"wheel",
|
"wheel",
|
||||||
|
"oldest-supported-numpy"
|
||||||
]
|
]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
|
@ -26,6 +26,8 @@ package_dir=
|
||||||
=src
|
=src
|
||||||
packages = find:
|
packages = find:
|
||||||
python_requires = >=3.5
|
python_requires = >=3.5
|
||||||
|
install_requires =
|
||||||
|
numpy
|
||||||
|
|
||||||
[options.packages.find]
|
[options.packages.find]
|
||||||
where=src
|
where=src
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -2,6 +2,7 @@ from setuptools import setup, Extension
|
||||||
from setuptools.command.build_ext import build_ext
|
from setuptools.command.build_ext import build_ext
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
# use with export RAPIDFUZZ_TRACE=1
|
# use with export RAPIDFUZZ_TRACE=1
|
||||||
RAPIDFUZZ_TRACE = os.environ.get("RAPIDFUZZ_TRACE", False)
|
RAPIDFUZZ_TRACE = os.environ.get("RAPIDFUZZ_TRACE", False)
|
||||||
|
@ -46,7 +47,7 @@ ext_modules = [
|
||||||
'src/cpp_process.cpp',
|
'src/cpp_process.cpp',
|
||||||
'src/rapidfuzz-cpp/rapidfuzz/details/unicode.cpp'
|
'src/rapidfuzz-cpp/rapidfuzz/details/unicode.cpp'
|
||||||
],
|
],
|
||||||
include_dirs=["src/rapidfuzz-cpp/"],
|
include_dirs=["src/rapidfuzz-cpp/", np.get_include()],
|
||||||
language='c++',
|
language='c++',
|
||||||
),
|
),
|
||||||
Extension(
|
Extension(
|
||||||
|
|
|
@ -204,8 +204,9 @@ double RATIO##_impl_inner_##PROCESSOR(const proc_string& s1, const Sentence& s2,
|
||||||
{ \
|
{ \
|
||||||
switch(s1.kind){ \
|
switch(s1.kind){ \
|
||||||
LIST_OF_CASES(RATIO_FUNC, PROCESSOR) \
|
LIST_OF_CASES(RATIO_FUNC, PROCESSOR) \
|
||||||
|
default: \
|
||||||
|
throw std::logic_error("Reached end of control flow in " #RATIO "_impl_inner_" #PROCESSOR); \
|
||||||
} \
|
} \
|
||||||
assert(false); /* silence any warnings about missing return value */ \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* generate <ratio_name>_impl_<processor> functions which are used internally
|
/* generate <ratio_name>_impl_<processor> functions which are used internally
|
||||||
|
@ -217,8 +218,9 @@ double RATIO##_impl_##PROCESSOR(const proc_string& s1, const proc_string& s2, Ar
|
||||||
{ \
|
{ \
|
||||||
switch(s1.kind){ \
|
switch(s1.kind){ \
|
||||||
LIST_OF_CASES(RATIO##_impl_inner_##PROCESSOR, PROCESSOR) \
|
LIST_OF_CASES(RATIO##_impl_inner_##PROCESSOR, PROCESSOR) \
|
||||||
|
default: \
|
||||||
|
throw std::logic_error("Reached end of control flow in " #RATIO "_impl_" #PROCESSOR); \
|
||||||
} \
|
} \
|
||||||
assert(false); /* silence any warnings about missing return value */ \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define RATIO_IMPL_DEF(RATIO, RATIO_FUNC) \
|
#define RATIO_IMPL_DEF(RATIO, RATIO_FUNC) \
|
||||||
|
@ -236,8 +238,9 @@ size_t RATIO##_impl_inner_##PROCESSOR(const proc_string& s1, const Sentence& s2,
|
||||||
{ \
|
{ \
|
||||||
switch(s1.kind){ \
|
switch(s1.kind){ \
|
||||||
LIST_OF_CASES(RATIO_FUNC, PROCESSOR) \
|
LIST_OF_CASES(RATIO_FUNC, PROCESSOR) \
|
||||||
|
default: \
|
||||||
|
throw std::logic_error("Reached end of control flow in " #RATIO "_impl_inner_" #PROCESSOR); \
|
||||||
} \
|
} \
|
||||||
assert(false); /* silence any warnings about missing return value */ \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* generate <ratio_name>_impl_<processor> functions which are used internally
|
/* generate <ratio_name>_impl_<processor> functions which are used internally
|
||||||
|
@ -249,8 +252,9 @@ size_t RATIO##_impl_##PROCESSOR(const proc_string& s1, const proc_string& s2, Ar
|
||||||
{ \
|
{ \
|
||||||
switch(s1.kind){ \
|
switch(s1.kind){ \
|
||||||
LIST_OF_CASES(RATIO##_impl_inner_##PROCESSOR, PROCESSOR) \
|
LIST_OF_CASES(RATIO##_impl_inner_##PROCESSOR, PROCESSOR) \
|
||||||
|
default: \
|
||||||
|
throw std::logic_error("Reached end of control flow in " #RATIO "_impl_" #PROCESSOR); \
|
||||||
} \
|
} \
|
||||||
assert(false); /* silence any warnings about missing return value */ \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DISTANCE_IMPL_DEF(RATIO, RATIO_FUNC) \
|
#define DISTANCE_IMPL_DEF(RATIO, RATIO_FUNC) \
|
||||||
|
@ -310,3 +314,36 @@ PyObject* RATIO##_default_process(const proc_string& s1, const proc_string& s2,
|
||||||
size_t result = RATIO##_impl_default_process(s1, s2, max); \
|
size_t result = RATIO##_impl_default_process(s1, s2, max); \
|
||||||
return dist_to_long(result); \
|
return dist_to_long(result); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename CharT>
|
||||||
|
proc_string default_process_func_impl(proc_string sentence) {
|
||||||
|
CharT* str = static_cast<CharT*>(sentence.data);
|
||||||
|
if (!sentence.allocated)
|
||||||
|
{
|
||||||
|
CharT* temp_str = (CharT*)malloc(sentence.length * sizeof(CharT));
|
||||||
|
if (temp_str == NULL)
|
||||||
|
{
|
||||||
|
throw std::bad_alloc();
|
||||||
|
}
|
||||||
|
std::copy(str, str + sentence.length, temp_str);
|
||||||
|
str = temp_str;
|
||||||
|
}
|
||||||
|
|
||||||
|
sentence.allocated = true;
|
||||||
|
sentence.data = str;
|
||||||
|
sentence.kind = sentence.kind;
|
||||||
|
sentence.length = utils::default_process(str, sentence.length);
|
||||||
|
|
||||||
|
return sentence;
|
||||||
|
}
|
||||||
|
|
||||||
|
proc_string default_process_func(proc_string sentence) {
|
||||||
|
switch (sentence.kind) {
|
||||||
|
# define X_ENUM(KIND, TYPE, MSVC_TUPLE) case KIND: return default_process_func_impl<TYPE>(std::move(sentence));
|
||||||
|
LIST_OF_CASES()
|
||||||
|
default:
|
||||||
|
throw std::logic_error("Reached end of control flow in default_process_func");
|
||||||
|
# undef X_ENUM
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@ cdef extern from "cpp_common.hpp":
|
||||||
int is_valid_string(object py_str) except +
|
int is_valid_string(object py_str) except +
|
||||||
proc_string convert_string(object py_str)
|
proc_string convert_string(object py_str)
|
||||||
void validate_string(object py_str, const char* err) except +
|
void validate_string(object py_str, const char* err) except +
|
||||||
|
proc_string default_process_func(proc_string sentence) except +
|
||||||
|
|
||||||
cdef inline proc_string hash_array(arr) except *:
|
cdef inline proc_string hash_array(arr) except *:
|
||||||
# TODO on Cpython this does not require any copies
|
# TODO on Cpython this does not require any copies
|
||||||
|
|
|
@ -1976,11 +1976,11 @@ static const char __pyx_k_partial_token_set_ratio[] = "partial_token_set_ratio";
|
||||||
static const char __pyx_k_partial_token_sort_ratio[] = "partial_token_sort_ratio";
|
static const char __pyx_k_partial_token_sort_ratio[] = "partial_token_sort_ratio";
|
||||||
static const char __pyx_k_token_set_ratio_line_217[] = "token_set_ratio (line 217)";
|
static const char __pyx_k_token_set_ratio_line_217[] = "token_set_ratio (line 217)";
|
||||||
static const char __pyx_k_token_sort_ratio_line_170[] = "token_sort_ratio (line 170)";
|
static const char __pyx_k_token_sort_ratio_line_170[] = "token_sort_ratio (line 170)";
|
||||||
static const char __pyx_k_Sorts_the_words_in_the_strings[] = "\n Sorts the words in the strings and calculates the fuzz.ratio between them\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/token_sort_ratio.svg\n\n Examples\n --------\n >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n 100.0\n ";
|
static const char __pyx_k_Sorts_the_words_in_the_strings[] = "\n Sorts the words in the strings and calculates the fuzz.ratio between them\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/token_sort_ratio.svg\n\n Examples\n --------\n >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n 100.0\n ";
|
||||||
static const char __pyx_k_Calculates_a_quick_ratio_betwee[] = "\n Calculates a quick ratio between two strings using fuzz.ratio.\n The only difference to fuzz.ratio is, that this preprocesses\n the strings by default.\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Examples\n --------\n >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n 100.0\n ";
|
static const char __pyx_k_Calculates_a_quick_ratio_betwee[] = "\n Calculates a quick ratio between two strings using fuzz.ratio.\n The only difference to fuzz.ratio is, that this preprocesses\n the strings by default.\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Examples\n --------\n >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n 100.0\n ";
|
||||||
static const char __pyx_k_Calculates_the_normalized_InDel[] = "\n Calculates the normalized InDel distance.\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is None, which deactivates this behaviour.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n See Also\n --------\n rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n Notes\n -----\n .. image:: img/ratio.svg\n\n Examples\n --------\n >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n 96.55171966552734\n ";
|
static const char __pyx_k_Calculates_the_normalized_InDel[] = "\n Calculates the normalized InDel distance.\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is None, which deactivates this behaviour.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n See Also\n --------\n rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n Notes\n -----\n .. image:: img/ratio.svg\n\n Examples\n --------\n >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n 96.55171966552734\n ";
|
||||||
static const char __pyx_k_Compares_the_words_in_the_strin[] = "\n Compares the words in the strings based on unique and common words between them\n using fuzz.ratio\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/token_set_ratio.svg\n\n Examples\n --------\n >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n 83.8709716796875\n >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n 100.0\n ";
|
static const char __pyx_k_Compares_the_words_in_the_strin[] = "\n Compares the words in the strings based on unique and common words between them\n using fuzz.ratio\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/token_set_ratio.svg\n\n Examples\n --------\n >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n 83.8709716796875\n >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n 100.0\n ";
|
||||||
static const char __pyx_k_Searches_for_the_optimal_alignm[] = "\n Searches for the optimal alignment of the shorter string in the\n longer string and returns the fuzz.ratio for this alignment.\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is None, which deactivates this behaviour.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n Depending on the length of the needle (shorter string) different\n implementations are used to improve the performance.\n\n short needle (length \342\211\244 64):\n When using a short needle length the fuzz.ratio is calculated for all\n alignments that could result in an optimal alignment. It is\n guaranteed to find the optimal alignment. For short needles this is very\n fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n case performance of ``O(NM)``.\n \n .. image:: img/partial_ratio_short_needle.svg\n\n long needle (length > 64):\n For long needles a similar implementation to FuzzyWuzzy is used.\n This implementation only considers alignments which start at one\n of the longest common substrings. This results in a worst case performance\n of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n The following Python code shows the concept:\n\n .. code-block:: python\n\n blocks = SequenceMatcher(None, needle, longer, False).get_ma""tching_blocks()\n score = 0\n for block in blocks:\n long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n long_end = long_start + len(shorter)\n long_substr = longer[long_start:long_end]\n score = max(score, fuzz.ratio(needle, long_substr))\n\n This is a lot faster than checking all possible alignments. However it\n only finds one of the best alignments and not necessarily the optimal one.\n\n .. image:: img/partial_ratio_long_needle.svg\n\n Examples\n --------\n >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n 100.0\n ";
|
static const char __pyx_k_Searches_for_the_optimal_alignm[] = "\n Searches for the optimal alignment of the shorter string in the\n longer string and returns the fuzz.ratio for this alignment.\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is None, which deactivates this behaviour.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n Depending on the length of the needle (shorter string) different\n implementations are used to improve the performance.\n\n short needle (length \342\211\244 64):\n When using a short needle length the fuzz.ratio is calculated for all\n alignments that could result in an optimal alignment. It is\n guaranteed to find the optimal alignment. For short needles this is very\n fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n case performance of ``O(NM)``.\n \n .. image:: img/partial_ratio_short_needle.svg\n\n long needle (length > 64):\n For long needles a similar implementation to FuzzyWuzzy is used.\n This implementation only considers alignments which start at one\n of the longest common substrings. This results in a worst case performance\n of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n The following Python code shows the concept:\n\n .. code-block:: python\n\n blocks = SequenceMatcher(None,"" needle, longer, False).get_matching_blocks()\n score = 0\n for block in blocks:\n long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n long_end = long_start + len(shorter)\n long_substr = longer[long_start:long_end]\n score = max(score, fuzz.ratio(needle, long_substr))\n\n This is a lot faster than checking all possible alignments. However it\n only finds one of the best alignments and not necessarily the optimal one.\n\n .. image:: img/partial_ratio_long_needle.svg\n\n Examples\n --------\n >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n 100.0\n ";
|
||||||
#if !CYTHON_USE_MODULE_STATE
|
#if !CYTHON_USE_MODULE_STATE
|
||||||
static PyObject *__pyx_kp_u_Calculates_a_quick_ratio_betwee;
|
static PyObject *__pyx_kp_u_Calculates_a_quick_ratio_betwee;
|
||||||
static PyObject *__pyx_kp_u_Calculates_the_normalized_InDel;
|
static PyObject *__pyx_kp_u_Calculates_the_normalized_InDel;
|
||||||
|
@ -2542,7 +2542,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
|
||||||
PyObject *__pyx_args, PyObject *__pyx_kwds
|
PyObject *__pyx_args, PyObject *__pyx_kwds
|
||||||
#endif
|
#endif
|
||||||
); /*proto*/
|
); /*proto*/
|
||||||
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_ratio, "\n Calculates the normalized InDel distance.\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is None, which deactivates this behaviour.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n See Also\n --------\n rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n Notes\n -----\n .. image:: img/ratio.svg\n\n Examples\n --------\n >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n 96.55171966552734\n ");
|
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_ratio, "\n Calculates the normalized InDel distance.\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is None, which deactivates this behaviour.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n See Also\n --------\n rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n Notes\n -----\n .. image:: img/ratio.svg\n\n Examples\n --------\n >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n 96.55171966552734\n ");
|
||||||
static PyMethodDef __pyx_mdef_8cpp_fuzz_1ratio = {"ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_1ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_ratio};
|
static PyMethodDef __pyx_mdef_8cpp_fuzz_1ratio = {"ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_1ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_ratio};
|
||||||
static PyObject *__pyx_pw_8cpp_fuzz_1ratio(PyObject *__pyx_self,
|
static PyObject *__pyx_pw_8cpp_fuzz_1ratio(PyObject *__pyx_self,
|
||||||
#if CYTHON_METH_FASTCALL
|
#if CYTHON_METH_FASTCALL
|
||||||
|
@ -2926,7 +2926,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
|
||||||
PyObject *__pyx_args, PyObject *__pyx_kwds
|
PyObject *__pyx_args, PyObject *__pyx_kwds
|
||||||
#endif
|
#endif
|
||||||
); /*proto*/
|
); /*proto*/
|
||||||
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_2partial_ratio, "\n Searches for the optimal alignment of the shorter string in the\n longer string and returns the fuzz.ratio for this alignment.\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is None, which deactivates this behaviour.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n Depending on the length of the needle (shorter string) different\n implementations are used to improve the performance.\n\n short needle (length \342\211\244 64):\n When using a short needle length the fuzz.ratio is calculated for all\n alignments that could result in an optimal alignment. It is\n guaranteed to find the optimal alignment. For short needles this is very\n fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n case performance of ``O(NM)``.\n \n .. image:: img/partial_ratio_short_needle.svg\n\n long needle (length > 64):\n For long needles a similar implementation to FuzzyWuzzy is used.\n This implementation only considers alignments which start at one\n of the longest common substrings. This results in a worst case performance\n of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n The following Python code shows the concept:\n\n .. code-block:: python\n\n blocks = SequenceMatcher(None, needle, longer, False).get_ma""tching_blocks()\n score = 0\n for block in blocks:\n long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n long_end = long_start + len(shorter)\n long_substr = longer[long_start:long_end]\n score = max(score, fuzz.ratio(needle, long_substr))\n\n This is a lot faster than checking all possible alignments. However it\n only finds one of the best alignments and not necessarily the optimal one.\n\n .. image:: img/partial_ratio_long_needle.svg\n\n Examples\n --------\n >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n 100.0\n ");
|
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_2partial_ratio, "\n Searches for the optimal alignment of the shorter string in the\n longer string and returns the fuzz.ratio for this alignment.\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is None, which deactivates this behaviour.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n Depending on the length of the needle (shorter string) different\n implementations are used to improve the performance.\n\n short needle (length \342\211\244 64):\n When using a short needle length the fuzz.ratio is calculated for all\n alignments that could result in an optimal alignment. It is\n guaranteed to find the optimal alignment. For short needles this is very\n fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n case performance of ``O(NM)``.\n \n .. image:: img/partial_ratio_short_needle.svg\n\n long needle (length > 64):\n For long needles a similar implementation to FuzzyWuzzy is used.\n This implementation only considers alignments which start at one\n of the longest common substrings. This results in a worst case performance\n of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n The following Python code shows the concept:\n\n .. code-block:: python\n\n blocks = SequenceMatcher(None,"" needle, longer, False).get_matching_blocks()\n score = 0\n for block in blocks:\n long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n long_end = long_start + len(shorter)\n long_substr = longer[long_start:long_end]\n score = max(score, fuzz.ratio(needle, long_substr))\n\n This is a lot faster than checking all possible alignments. However it\n only finds one of the best alignments and not necessarily the optimal one.\n\n .. image:: img/partial_ratio_long_needle.svg\n\n Examples\n --------\n >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n 100.0\n ");
|
||||||
static PyMethodDef __pyx_mdef_8cpp_fuzz_3partial_ratio = {"partial_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_3partial_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_2partial_ratio};
|
static PyMethodDef __pyx_mdef_8cpp_fuzz_3partial_ratio = {"partial_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_3partial_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_2partial_ratio};
|
||||||
static PyObject *__pyx_pw_8cpp_fuzz_3partial_ratio(PyObject *__pyx_self,
|
static PyObject *__pyx_pw_8cpp_fuzz_3partial_ratio(PyObject *__pyx_self,
|
||||||
#if CYTHON_METH_FASTCALL
|
#if CYTHON_METH_FASTCALL
|
||||||
|
@ -3310,7 +3310,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
|
||||||
PyObject *__pyx_args, PyObject *__pyx_kwds
|
PyObject *__pyx_args, PyObject *__pyx_kwds
|
||||||
#endif
|
#endif
|
||||||
); /*proto*/
|
); /*proto*/
|
||||||
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_4token_sort_ratio, "\n Sorts the words in the strings and calculates the fuzz.ratio between them\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/token_sort_ratio.svg\n\n Examples\n --------\n >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n 100.0\n ");
|
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_4token_sort_ratio, "\n Sorts the words in the strings and calculates the fuzz.ratio between them\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/token_sort_ratio.svg\n\n Examples\n --------\n >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n 100.0\n ");
|
||||||
static PyMethodDef __pyx_mdef_8cpp_fuzz_5token_sort_ratio = {"token_sort_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_5token_sort_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_4token_sort_ratio};
|
static PyMethodDef __pyx_mdef_8cpp_fuzz_5token_sort_ratio = {"token_sort_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_5token_sort_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_4token_sort_ratio};
|
||||||
static PyObject *__pyx_pw_8cpp_fuzz_5token_sort_ratio(PyObject *__pyx_self,
|
static PyObject *__pyx_pw_8cpp_fuzz_5token_sort_ratio(PyObject *__pyx_self,
|
||||||
#if CYTHON_METH_FASTCALL
|
#if CYTHON_METH_FASTCALL
|
||||||
|
@ -3694,7 +3694,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
|
||||||
PyObject *__pyx_args, PyObject *__pyx_kwds
|
PyObject *__pyx_args, PyObject *__pyx_kwds
|
||||||
#endif
|
#endif
|
||||||
); /*proto*/
|
); /*proto*/
|
||||||
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_6token_set_ratio, "\n Compares the words in the strings based on unique and common words between them\n using fuzz.ratio\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/token_set_ratio.svg\n\n Examples\n --------\n >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n 83.8709716796875\n >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n 100.0\n ");
|
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_6token_set_ratio, "\n Compares the words in the strings based on unique and common words between them\n using fuzz.ratio\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/token_set_ratio.svg\n\n Examples\n --------\n >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n 83.8709716796875\n >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n 100.0\n ");
|
||||||
static PyMethodDef __pyx_mdef_8cpp_fuzz_7token_set_ratio = {"token_set_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_7token_set_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_6token_set_ratio};
|
static PyMethodDef __pyx_mdef_8cpp_fuzz_7token_set_ratio = {"token_set_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_7token_set_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_6token_set_ratio};
|
||||||
static PyObject *__pyx_pw_8cpp_fuzz_7token_set_ratio(PyObject *__pyx_self,
|
static PyObject *__pyx_pw_8cpp_fuzz_7token_set_ratio(PyObject *__pyx_self,
|
||||||
#if CYTHON_METH_FASTCALL
|
#if CYTHON_METH_FASTCALL
|
||||||
|
@ -4078,7 +4078,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
|
||||||
PyObject *__pyx_args, PyObject *__pyx_kwds
|
PyObject *__pyx_args, PyObject *__pyx_kwds
|
||||||
#endif
|
#endif
|
||||||
); /*proto*/
|
); /*proto*/
|
||||||
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_8token_ratio, "\n Helper method that returns the maximum of fuzz.token_set_ratio and fuzz.token_sort_ratio\n (faster than manually executing the two functions)\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/token_ratio.svg\n ");
|
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_8token_ratio, "\n Helper method that returns the maximum of fuzz.token_set_ratio and fuzz.token_sort_ratio\n (faster than manually executing the two functions)\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/token_ratio.svg\n ");
|
||||||
static PyMethodDef __pyx_mdef_8cpp_fuzz_9token_ratio = {"token_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_9token_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_8token_ratio};
|
static PyMethodDef __pyx_mdef_8cpp_fuzz_9token_ratio = {"token_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_9token_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_8token_ratio};
|
||||||
static PyObject *__pyx_pw_8cpp_fuzz_9token_ratio(PyObject *__pyx_self,
|
static PyObject *__pyx_pw_8cpp_fuzz_9token_ratio(PyObject *__pyx_self,
|
||||||
#if CYTHON_METH_FASTCALL
|
#if CYTHON_METH_FASTCALL
|
||||||
|
@ -4462,7 +4462,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
|
||||||
PyObject *__pyx_args, PyObject *__pyx_kwds
|
PyObject *__pyx_args, PyObject *__pyx_kwds
|
||||||
#endif
|
#endif
|
||||||
); /*proto*/
|
); /*proto*/
|
||||||
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_10partial_token_sort_ratio, "\n sorts the words in the strings and calculates the fuzz.partial_ratio between them\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/partial_token_sort_ratio.svg\n ");
|
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_10partial_token_sort_ratio, "\n sorts the words in the strings and calculates the fuzz.partial_ratio between them\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/partial_token_sort_ratio.svg\n ");
|
||||||
static PyMethodDef __pyx_mdef_8cpp_fuzz_11partial_token_sort_ratio = {"partial_token_sort_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_11partial_token_sort_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_10partial_token_sort_ratio};
|
static PyMethodDef __pyx_mdef_8cpp_fuzz_11partial_token_sort_ratio = {"partial_token_sort_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_11partial_token_sort_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_10partial_token_sort_ratio};
|
||||||
static PyObject *__pyx_pw_8cpp_fuzz_11partial_token_sort_ratio(PyObject *__pyx_self,
|
static PyObject *__pyx_pw_8cpp_fuzz_11partial_token_sort_ratio(PyObject *__pyx_self,
|
||||||
#if CYTHON_METH_FASTCALL
|
#if CYTHON_METH_FASTCALL
|
||||||
|
@ -4846,7 +4846,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
|
||||||
PyObject *__pyx_args, PyObject *__pyx_kwds
|
PyObject *__pyx_args, PyObject *__pyx_kwds
|
||||||
#endif
|
#endif
|
||||||
); /*proto*/
|
); /*proto*/
|
||||||
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_12partial_token_set_ratio, "\n Compares the words in the strings based on unique and common words between them\n using fuzz.partial_ratio\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/partial_token_set_ratio.svg\n ");
|
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_12partial_token_set_ratio, "\n Compares the words in the strings based on unique and common words between them\n using fuzz.partial_ratio\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/partial_token_set_ratio.svg\n ");
|
||||||
static PyMethodDef __pyx_mdef_8cpp_fuzz_13partial_token_set_ratio = {"partial_token_set_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_13partial_token_set_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_12partial_token_set_ratio};
|
static PyMethodDef __pyx_mdef_8cpp_fuzz_13partial_token_set_ratio = {"partial_token_set_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_13partial_token_set_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_12partial_token_set_ratio};
|
||||||
static PyObject *__pyx_pw_8cpp_fuzz_13partial_token_set_ratio(PyObject *__pyx_self,
|
static PyObject *__pyx_pw_8cpp_fuzz_13partial_token_set_ratio(PyObject *__pyx_self,
|
||||||
#if CYTHON_METH_FASTCALL
|
#if CYTHON_METH_FASTCALL
|
||||||
|
@ -5230,7 +5230,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
|
||||||
PyObject *__pyx_args, PyObject *__pyx_kwds
|
PyObject *__pyx_args, PyObject *__pyx_kwds
|
||||||
#endif
|
#endif
|
||||||
); /*proto*/
|
); /*proto*/
|
||||||
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_14partial_token_ratio, "\n Helper method that returns the maximum of fuzz.partial_token_set_ratio and\n fuzz.partial_token_sort_ratio (faster than manually executing the two functions)\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/partial_token_ratio.svg\n ");
|
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_14partial_token_ratio, "\n Helper method that returns the maximum of fuzz.partial_token_set_ratio and\n fuzz.partial_token_sort_ratio (faster than manually executing the two functions)\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/partial_token_ratio.svg\n ");
|
||||||
static PyMethodDef __pyx_mdef_8cpp_fuzz_15partial_token_ratio = {"partial_token_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_15partial_token_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_14partial_token_ratio};
|
static PyMethodDef __pyx_mdef_8cpp_fuzz_15partial_token_ratio = {"partial_token_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_15partial_token_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_14partial_token_ratio};
|
||||||
static PyObject *__pyx_pw_8cpp_fuzz_15partial_token_ratio(PyObject *__pyx_self,
|
static PyObject *__pyx_pw_8cpp_fuzz_15partial_token_ratio(PyObject *__pyx_self,
|
||||||
#if CYTHON_METH_FASTCALL
|
#if CYTHON_METH_FASTCALL
|
||||||
|
@ -5614,7 +5614,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
|
||||||
PyObject *__pyx_args, PyObject *__pyx_kwds
|
PyObject *__pyx_args, PyObject *__pyx_kwds
|
||||||
#endif
|
#endif
|
||||||
); /*proto*/
|
); /*proto*/
|
||||||
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_16WRatio, "\n Calculates a weighted ratio based on the other ratio algorithms\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/WRatio.svg\n ");
|
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_16WRatio, "\n Calculates a weighted ratio based on the other ratio algorithms\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Notes\n -----\n .. image:: img/WRatio.svg\n ");
|
||||||
static PyMethodDef __pyx_mdef_8cpp_fuzz_17WRatio = {"WRatio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_17WRatio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_16WRatio};
|
static PyMethodDef __pyx_mdef_8cpp_fuzz_17WRatio = {"WRatio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_17WRatio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_16WRatio};
|
||||||
static PyObject *__pyx_pw_8cpp_fuzz_17WRatio(PyObject *__pyx_self,
|
static PyObject *__pyx_pw_8cpp_fuzz_17WRatio(PyObject *__pyx_self,
|
||||||
#if CYTHON_METH_FASTCALL
|
#if CYTHON_METH_FASTCALL
|
||||||
|
@ -5998,7 +5998,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
|
||||||
PyObject *__pyx_args, PyObject *__pyx_kwds
|
PyObject *__pyx_args, PyObject *__pyx_kwds
|
||||||
#endif
|
#endif
|
||||||
); /*proto*/
|
); /*proto*/
|
||||||
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_18QRatio, "\n Calculates a quick ratio between two strings using fuzz.ratio.\n The only difference to fuzz.ratio is, that this preprocesses\n the strings by default.\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Examples\n --------\n >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n 100.0\n ");
|
PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_18QRatio, "\n Calculates a quick ratio between two strings using fuzz.ratio.\n The only difference to fuzz.ratio is, that this preprocesses\n the strings by default.\n\n Parameters\n ----------\n s1 : Sequence[Hashable]\n First string to compare.\n s2 : Sequence[Hashable]\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is True.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n similarity : float\n similarity between s1 and s2 as a float between 0 and 100\n\n Examples\n --------\n >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n 100.0\n ");
|
||||||
static PyMethodDef __pyx_mdef_8cpp_fuzz_19QRatio = {"QRatio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_19QRatio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_18QRatio};
|
static PyMethodDef __pyx_mdef_8cpp_fuzz_19QRatio = {"QRatio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_19QRatio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_18QRatio};
|
||||||
static PyObject *__pyx_pw_8cpp_fuzz_19QRatio(PyObject *__pyx_self,
|
static PyObject *__pyx_pw_8cpp_fuzz_19QRatio(PyObject *__pyx_self,
|
||||||
#if CYTHON_METH_FASTCALL
|
#if CYTHON_METH_FASTCALL
|
||||||
|
@ -6364,8 +6364,8 @@ static PyObject *__pyx_pf_8cpp_fuzz_18QRatio(CYTHON_UNUSED PyObject *__pyx_self,
|
||||||
return __pyx_r;
|
return __pyx_r;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":27
|
/* "cpp_common.pxd":28
|
||||||
* void validate_string(object py_str, const char* err) except +
|
* proc_string default_process_func(proc_string sentence) except +
|
||||||
*
|
*
|
||||||
* cdef inline proc_string hash_array(arr) except *: # <<<<<<<<<<<<<<
|
* cdef inline proc_string hash_array(arr) except *: # <<<<<<<<<<<<<<
|
||||||
* # TODO on Cpython this does not require any copies
|
* # TODO on Cpython this does not require any copies
|
||||||
|
@ -6407,30 +6407,30 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
int __pyx_clineno = 0;
|
int __pyx_clineno = 0;
|
||||||
__Pyx_RefNannySetupContext("hash_array", 0);
|
__Pyx_RefNannySetupContext("hash_array", 0);
|
||||||
|
|
||||||
/* "cpp_common.pxd":30
|
/* "cpp_common.pxd":31
|
||||||
* # TODO on Cpython this does not require any copies
|
* # TODO on Cpython this does not require any copies
|
||||||
* cdef proc_string s_proc
|
* cdef proc_string s_proc
|
||||||
* cdef Py_UCS4 typecode = <Py_UCS4>arr.typecode # <<<<<<<<<<<<<<
|
* cdef Py_UCS4 typecode = <Py_UCS4>arr.typecode # <<<<<<<<<<<<<<
|
||||||
* s_proc.length = <size_t>len(arr)
|
* s_proc.length = <size_t>len(arr)
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_arr, __pyx_n_s_typecode); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 30, __pyx_L1_error)
|
__pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_arr, __pyx_n_s_typecode); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 31, __pyx_L1_error)
|
||||||
__Pyx_GOTREF(__pyx_t_1);
|
__Pyx_GOTREF(__pyx_t_1);
|
||||||
__pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 30, __pyx_L1_error)
|
__pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 31, __pyx_L1_error)
|
||||||
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
||||||
__pyx_v_typecode = ((Py_UCS4)__pyx_t_2);
|
__pyx_v_typecode = ((Py_UCS4)__pyx_t_2);
|
||||||
|
|
||||||
/* "cpp_common.pxd":31
|
/* "cpp_common.pxd":32
|
||||||
* cdef proc_string s_proc
|
* cdef proc_string s_proc
|
||||||
* cdef Py_UCS4 typecode = <Py_UCS4>arr.typecode
|
* cdef Py_UCS4 typecode = <Py_UCS4>arr.typecode
|
||||||
* s_proc.length = <size_t>len(arr) # <<<<<<<<<<<<<<
|
* s_proc.length = <size_t>len(arr) # <<<<<<<<<<<<<<
|
||||||
*
|
*
|
||||||
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
||||||
*/
|
*/
|
||||||
__pyx_t_3 = PyObject_Length(__pyx_v_arr); if (unlikely(__pyx_t_3 == ((Py_ssize_t)-1))) __PYX_ERR(1, 31, __pyx_L1_error)
|
__pyx_t_3 = PyObject_Length(__pyx_v_arr); if (unlikely(__pyx_t_3 == ((Py_ssize_t)-1))) __PYX_ERR(1, 32, __pyx_L1_error)
|
||||||
__pyx_v_s_proc.length = ((size_t)__pyx_t_3);
|
__pyx_v_s_proc.length = ((size_t)__pyx_t_3);
|
||||||
|
|
||||||
/* "cpp_common.pxd":33
|
/* "cpp_common.pxd":34
|
||||||
* s_proc.length = <size_t>len(arr)
|
* s_proc.length = <size_t>len(arr)
|
||||||
*
|
*
|
||||||
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t)) # <<<<<<<<<<<<<<
|
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t)) # <<<<<<<<<<<<<<
|
||||||
|
@ -6439,7 +6439,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.data = malloc((__pyx_v_s_proc.length * (sizeof(uint64_t))));
|
__pyx_v_s_proc.data = malloc((__pyx_v_s_proc.length * (sizeof(uint64_t))));
|
||||||
|
|
||||||
/* "cpp_common.pxd":35
|
/* "cpp_common.pxd":36
|
||||||
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
||||||
*
|
*
|
||||||
* if s_proc.data == NULL: # <<<<<<<<<<<<<<
|
* if s_proc.data == NULL: # <<<<<<<<<<<<<<
|
||||||
|
@ -6449,16 +6449,16 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
__pyx_t_4 = ((__pyx_v_s_proc.data == NULL) != 0);
|
__pyx_t_4 = ((__pyx_v_s_proc.data == NULL) != 0);
|
||||||
if (unlikely(__pyx_t_4)) {
|
if (unlikely(__pyx_t_4)) {
|
||||||
|
|
||||||
/* "cpp_common.pxd":36
|
/* "cpp_common.pxd":37
|
||||||
*
|
*
|
||||||
* if s_proc.data == NULL:
|
* if s_proc.data == NULL:
|
||||||
* raise MemoryError # <<<<<<<<<<<<<<
|
* raise MemoryError # <<<<<<<<<<<<<<
|
||||||
*
|
*
|
||||||
* try:
|
* try:
|
||||||
*/
|
*/
|
||||||
PyErr_NoMemory(); __PYX_ERR(1, 36, __pyx_L1_error)
|
PyErr_NoMemory(); __PYX_ERR(1, 37, __pyx_L1_error)
|
||||||
|
|
||||||
/* "cpp_common.pxd":35
|
/* "cpp_common.pxd":36
|
||||||
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
||||||
*
|
*
|
||||||
* if s_proc.data == NULL: # <<<<<<<<<<<<<<
|
* if s_proc.data == NULL: # <<<<<<<<<<<<<<
|
||||||
|
@ -6467,7 +6467,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":38
|
/* "cpp_common.pxd":39
|
||||||
* raise MemoryError
|
* raise MemoryError
|
||||||
*
|
*
|
||||||
* try: # <<<<<<<<<<<<<<
|
* try: # <<<<<<<<<<<<<<
|
||||||
|
@ -6483,7 +6483,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
__Pyx_XGOTREF(__pyx_t_7);
|
__Pyx_XGOTREF(__pyx_t_7);
|
||||||
/*try:*/ {
|
/*try:*/ {
|
||||||
|
|
||||||
/* "cpp_common.pxd":40
|
/* "cpp_common.pxd":41
|
||||||
* try:
|
* try:
|
||||||
* # ignore signed/unsigned, since it is not relevant in any of the algorithms
|
* # ignore signed/unsigned, since it is not relevant in any of the algorithms
|
||||||
* if typecode in {'b', 'B'}: # signed/unsigned char # <<<<<<<<<<<<<<
|
* if typecode in {'b', 'B'}: # signed/unsigned char # <<<<<<<<<<<<<<
|
||||||
|
@ -6494,7 +6494,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
case 98:
|
case 98:
|
||||||
case 66:
|
case 66:
|
||||||
|
|
||||||
/* "cpp_common.pxd":41
|
/* "cpp_common.pxd":42
|
||||||
* # ignore signed/unsigned, since it is not relevant in any of the algorithms
|
* # ignore signed/unsigned, since it is not relevant in any of the algorithms
|
||||||
* if typecode in {'b', 'B'}: # signed/unsigned char
|
* if typecode in {'b', 'B'}: # signed/unsigned char
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
||||||
|
@ -6503,7 +6503,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
||||||
|
|
||||||
/* "cpp_common.pxd":42
|
/* "cpp_common.pxd":43
|
||||||
* if typecode in {'b', 'B'}: # signed/unsigned char
|
* if typecode in {'b', 'B'}: # signed/unsigned char
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
||||||
|
@ -6515,21 +6515,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
||||||
__pyx_v_i = __pyx_t_10;
|
__pyx_v_i = __pyx_t_10;
|
||||||
|
|
||||||
/* "cpp_common.pxd":43
|
/* "cpp_common.pxd":44
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i] # <<<<<<<<<<<<<<
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i] # <<<<<<<<<<<<<<
|
||||||
* elif typecode == 'u': # 'u' wchar_t
|
* elif typecode == 'u': # 'u' wchar_t
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
*/
|
*/
|
||||||
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 43, __pyx_L4_error)
|
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 44, __pyx_L4_error)
|
||||||
__Pyx_GOTREF(__pyx_t_1);
|
__Pyx_GOTREF(__pyx_t_1);
|
||||||
__pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 43, __pyx_L4_error)
|
__pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 44, __pyx_L4_error)
|
||||||
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
||||||
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
|
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":40
|
/* "cpp_common.pxd":41
|
||||||
* try:
|
* try:
|
||||||
* # ignore signed/unsigned, since it is not relevant in any of the algorithms
|
* # ignore signed/unsigned, since it is not relevant in any of the algorithms
|
||||||
* if typecode in {'b', 'B'}: # signed/unsigned char # <<<<<<<<<<<<<<
|
* if typecode in {'b', 'B'}: # signed/unsigned char # <<<<<<<<<<<<<<
|
||||||
|
@ -6539,7 +6539,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
break;
|
break;
|
||||||
case 0x75:
|
case 0x75:
|
||||||
|
|
||||||
/* "cpp_common.pxd":45
|
/* "cpp_common.pxd":46
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* elif typecode == 'u': # 'u' wchar_t
|
* elif typecode == 'u': # 'u' wchar_t
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
||||||
|
@ -6548,7 +6548,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
||||||
|
|
||||||
/* "cpp_common.pxd":46
|
/* "cpp_common.pxd":47
|
||||||
* elif typecode == 'u': # 'u' wchar_t
|
* elif typecode == 'u': # 'u' wchar_t
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
||||||
|
@ -6560,21 +6560,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
||||||
__pyx_v_i = __pyx_t_10;
|
__pyx_v_i = __pyx_t_10;
|
||||||
|
|
||||||
/* "cpp_common.pxd":47
|
/* "cpp_common.pxd":48
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i] # <<<<<<<<<<<<<<
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i] # <<<<<<<<<<<<<<
|
||||||
* elif typecode in {'h', 'H'}: # signed/unsigned short
|
* elif typecode in {'h', 'H'}: # signed/unsigned short
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
*/
|
*/
|
||||||
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 47, __pyx_L4_error)
|
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 48, __pyx_L4_error)
|
||||||
__Pyx_GOTREF(__pyx_t_1);
|
__Pyx_GOTREF(__pyx_t_1);
|
||||||
__pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 47, __pyx_L4_error)
|
__pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 48, __pyx_L4_error)
|
||||||
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
||||||
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)((Py_UCS4)__pyx_t_2));
|
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)((Py_UCS4)__pyx_t_2));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":44
|
/* "cpp_common.pxd":45
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* elif typecode == 'u': # 'u' wchar_t # <<<<<<<<<<<<<<
|
* elif typecode == 'u': # 'u' wchar_t # <<<<<<<<<<<<<<
|
||||||
|
@ -6584,7 +6584,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
break;
|
break;
|
||||||
case 0x68:
|
case 0x68:
|
||||||
|
|
||||||
/* "cpp_common.pxd":48
|
/* "cpp_common.pxd":49
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
|
||||||
* elif typecode in {'h', 'H'}: # signed/unsigned short # <<<<<<<<<<<<<<
|
* elif typecode in {'h', 'H'}: # signed/unsigned short # <<<<<<<<<<<<<<
|
||||||
|
@ -6593,7 +6593,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
case 72:
|
case 72:
|
||||||
|
|
||||||
/* "cpp_common.pxd":49
|
/* "cpp_common.pxd":50
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
|
||||||
* elif typecode in {'h', 'H'}: # signed/unsigned short
|
* elif typecode in {'h', 'H'}: # signed/unsigned short
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
||||||
|
@ -6602,7 +6602,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
||||||
|
|
||||||
/* "cpp_common.pxd":50
|
/* "cpp_common.pxd":51
|
||||||
* elif typecode in {'h', 'H'}: # signed/unsigned short
|
* elif typecode in {'h', 'H'}: # signed/unsigned short
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
||||||
|
@ -6614,21 +6614,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
||||||
__pyx_v_i = __pyx_t_10;
|
__pyx_v_i = __pyx_t_10;
|
||||||
|
|
||||||
/* "cpp_common.pxd":51
|
/* "cpp_common.pxd":52
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i] # <<<<<<<<<<<<<<
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i] # <<<<<<<<<<<<<<
|
||||||
* elif typecode in {'i', 'I'}: # signed/unsigned int
|
* elif typecode in {'i', 'I'}: # signed/unsigned int
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
*/
|
*/
|
||||||
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 51, __pyx_L4_error)
|
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 52, __pyx_L4_error)
|
||||||
__Pyx_GOTREF(__pyx_t_1);
|
__Pyx_GOTREF(__pyx_t_1);
|
||||||
__pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 51, __pyx_L4_error)
|
__pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 52, __pyx_L4_error)
|
||||||
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
||||||
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
|
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":48
|
/* "cpp_common.pxd":49
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
|
||||||
* elif typecode in {'h', 'H'}: # signed/unsigned short # <<<<<<<<<<<<<<
|
* elif typecode in {'h', 'H'}: # signed/unsigned short # <<<<<<<<<<<<<<
|
||||||
|
@ -6638,7 +6638,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
break;
|
break;
|
||||||
case 0x69:
|
case 0x69:
|
||||||
|
|
||||||
/* "cpp_common.pxd":52
|
/* "cpp_common.pxd":53
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* elif typecode in {'i', 'I'}: # signed/unsigned int # <<<<<<<<<<<<<<
|
* elif typecode in {'i', 'I'}: # signed/unsigned int # <<<<<<<<<<<<<<
|
||||||
|
@ -6647,7 +6647,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
case 73:
|
case 73:
|
||||||
|
|
||||||
/* "cpp_common.pxd":53
|
/* "cpp_common.pxd":54
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* elif typecode in {'i', 'I'}: # signed/unsigned int
|
* elif typecode in {'i', 'I'}: # signed/unsigned int
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
||||||
|
@ -6656,7 +6656,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
||||||
|
|
||||||
/* "cpp_common.pxd":54
|
/* "cpp_common.pxd":55
|
||||||
* elif typecode in {'i', 'I'}: # signed/unsigned int
|
* elif typecode in {'i', 'I'}: # signed/unsigned int
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
||||||
|
@ -6668,21 +6668,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
||||||
__pyx_v_i = __pyx_t_10;
|
__pyx_v_i = __pyx_t_10;
|
||||||
|
|
||||||
/* "cpp_common.pxd":55
|
/* "cpp_common.pxd":56
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i] # <<<<<<<<<<<<<<
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i] # <<<<<<<<<<<<<<
|
||||||
* elif typecode in {'l', 'L'}: # signed/unsigned long
|
* elif typecode in {'l', 'L'}: # signed/unsigned long
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
*/
|
*/
|
||||||
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 55, __pyx_L4_error)
|
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 56, __pyx_L4_error)
|
||||||
__Pyx_GOTREF(__pyx_t_1);
|
__Pyx_GOTREF(__pyx_t_1);
|
||||||
__pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 55, __pyx_L4_error)
|
__pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 56, __pyx_L4_error)
|
||||||
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
||||||
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
|
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":52
|
/* "cpp_common.pxd":53
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* elif typecode in {'i', 'I'}: # signed/unsigned int # <<<<<<<<<<<<<<
|
* elif typecode in {'i', 'I'}: # signed/unsigned int # <<<<<<<<<<<<<<
|
||||||
|
@ -6692,7 +6692,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
break;
|
break;
|
||||||
case 0x6C:
|
case 0x6C:
|
||||||
|
|
||||||
/* "cpp_common.pxd":56
|
/* "cpp_common.pxd":57
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* elif typecode in {'l', 'L'}: # signed/unsigned long # <<<<<<<<<<<<<<
|
* elif typecode in {'l', 'L'}: # signed/unsigned long # <<<<<<<<<<<<<<
|
||||||
|
@ -6701,7 +6701,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
case 76:
|
case 76:
|
||||||
|
|
||||||
/* "cpp_common.pxd":57
|
/* "cpp_common.pxd":58
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* elif typecode in {'l', 'L'}: # signed/unsigned long
|
* elif typecode in {'l', 'L'}: # signed/unsigned long
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
||||||
|
@ -6710,7 +6710,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
||||||
|
|
||||||
/* "cpp_common.pxd":58
|
/* "cpp_common.pxd":59
|
||||||
* elif typecode in {'l', 'L'}: # signed/unsigned long
|
* elif typecode in {'l', 'L'}: # signed/unsigned long
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
||||||
|
@ -6722,21 +6722,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
||||||
__pyx_v_i = __pyx_t_10;
|
__pyx_v_i = __pyx_t_10;
|
||||||
|
|
||||||
/* "cpp_common.pxd":59
|
/* "cpp_common.pxd":60
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i] # <<<<<<<<<<<<<<
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i] # <<<<<<<<<<<<<<
|
||||||
* elif typecode in {'q', 'Q'}: # signed/unsigned long long
|
* elif typecode in {'q', 'Q'}: # signed/unsigned long long
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
*/
|
*/
|
||||||
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 59, __pyx_L4_error)
|
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 60, __pyx_L4_error)
|
||||||
__Pyx_GOTREF(__pyx_t_1);
|
__Pyx_GOTREF(__pyx_t_1);
|
||||||
__pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 59, __pyx_L4_error)
|
__pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 60, __pyx_L4_error)
|
||||||
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
||||||
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
|
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":56
|
/* "cpp_common.pxd":57
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* elif typecode in {'l', 'L'}: # signed/unsigned long # <<<<<<<<<<<<<<
|
* elif typecode in {'l', 'L'}: # signed/unsigned long # <<<<<<<<<<<<<<
|
||||||
|
@ -6746,7 +6746,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
break;
|
break;
|
||||||
case 0x71:
|
case 0x71:
|
||||||
|
|
||||||
/* "cpp_common.pxd":60
|
/* "cpp_common.pxd":61
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* elif typecode in {'q', 'Q'}: # signed/unsigned long long # <<<<<<<<<<<<<<
|
* elif typecode in {'q', 'Q'}: # signed/unsigned long long # <<<<<<<<<<<<<<
|
||||||
|
@ -6755,7 +6755,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
case 81:
|
case 81:
|
||||||
|
|
||||||
/* "cpp_common.pxd":61
|
/* "cpp_common.pxd":62
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* elif typecode in {'q', 'Q'}: # signed/unsigned long long
|
* elif typecode in {'q', 'Q'}: # signed/unsigned long long
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
* s_proc.kind = RAPIDFUZZ_UINT64 # <<<<<<<<<<<<<<
|
||||||
|
@ -6764,7 +6764,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
__pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
|
||||||
|
|
||||||
/* "cpp_common.pxd":62
|
/* "cpp_common.pxd":63
|
||||||
* elif typecode in {'q', 'Q'}: # signed/unsigned long long
|
* elif typecode in {'q', 'Q'}: # signed/unsigned long long
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
||||||
|
@ -6776,21 +6776,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
||||||
__pyx_v_i = __pyx_t_10;
|
__pyx_v_i = __pyx_t_10;
|
||||||
|
|
||||||
/* "cpp_common.pxd":63
|
/* "cpp_common.pxd":64
|
||||||
* s_proc.kind = RAPIDFUZZ_UINT64
|
* s_proc.kind = RAPIDFUZZ_UINT64
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i] # <<<<<<<<<<<<<<
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i] # <<<<<<<<<<<<<<
|
||||||
* else: # float/double are hashed
|
* else: # float/double are hashed
|
||||||
* s_proc.kind = RAPIDFUZZ_INT64
|
* s_proc.kind = RAPIDFUZZ_INT64
|
||||||
*/
|
*/
|
||||||
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 63, __pyx_L4_error)
|
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 64, __pyx_L4_error)
|
||||||
__Pyx_GOTREF(__pyx_t_1);
|
__Pyx_GOTREF(__pyx_t_1);
|
||||||
__pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 63, __pyx_L4_error)
|
__pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 64, __pyx_L4_error)
|
||||||
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
||||||
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
|
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":60
|
/* "cpp_common.pxd":61
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* elif typecode in {'q', 'Q'}: # signed/unsigned long long # <<<<<<<<<<<<<<
|
* elif typecode in {'q', 'Q'}: # signed/unsigned long long # <<<<<<<<<<<<<<
|
||||||
|
@ -6800,7 +6800,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
||||||
/* "cpp_common.pxd":65
|
/* "cpp_common.pxd":66
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
|
||||||
* else: # float/double are hashed
|
* else: # float/double are hashed
|
||||||
* s_proc.kind = RAPIDFUZZ_INT64 # <<<<<<<<<<<<<<
|
* s_proc.kind = RAPIDFUZZ_INT64 # <<<<<<<<<<<<<<
|
||||||
|
@ -6809,7 +6809,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.kind = RAPIDFUZZ_INT64;
|
__pyx_v_s_proc.kind = RAPIDFUZZ_INT64;
|
||||||
|
|
||||||
/* "cpp_common.pxd":66
|
/* "cpp_common.pxd":67
|
||||||
* else: # float/double are hashed
|
* else: # float/double are hashed
|
||||||
* s_proc.kind = RAPIDFUZZ_INT64
|
* s_proc.kind = RAPIDFUZZ_INT64
|
||||||
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
||||||
|
@ -6821,23 +6821,23 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
|
||||||
__pyx_v_i = __pyx_t_10;
|
__pyx_v_i = __pyx_t_10;
|
||||||
|
|
||||||
/* "cpp_common.pxd":67
|
/* "cpp_common.pxd":68
|
||||||
* s_proc.kind = RAPIDFUZZ_INT64
|
* s_proc.kind = RAPIDFUZZ_INT64
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i]) # <<<<<<<<<<<<<<
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i]) # <<<<<<<<<<<<<<
|
||||||
* except Exception as e:
|
* except Exception as e:
|
||||||
* free(s_proc.data)
|
* free(s_proc.data)
|
||||||
*/
|
*/
|
||||||
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 67, __pyx_L4_error)
|
__pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 68, __pyx_L4_error)
|
||||||
__Pyx_GOTREF(__pyx_t_1);
|
__Pyx_GOTREF(__pyx_t_1);
|
||||||
__pyx_t_12 = PyObject_Hash(__pyx_t_1); if (unlikely(__pyx_t_12 == ((Py_hash_t)-1))) __PYX_ERR(1, 67, __pyx_L4_error)
|
__pyx_t_12 = PyObject_Hash(__pyx_t_1); if (unlikely(__pyx_t_12 == ((Py_hash_t)-1))) __PYX_ERR(1, 68, __pyx_L4_error)
|
||||||
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
||||||
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_12);
|
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_12);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":38
|
/* "cpp_common.pxd":39
|
||||||
* raise MemoryError
|
* raise MemoryError
|
||||||
*
|
*
|
||||||
* try: # <<<<<<<<<<<<<<
|
* try: # <<<<<<<<<<<<<<
|
||||||
|
@ -6852,7 +6852,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
__pyx_L4_error:;
|
__pyx_L4_error:;
|
||||||
__Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
|
__Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
|
||||||
|
|
||||||
/* "cpp_common.pxd":68
|
/* "cpp_common.pxd":69
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
|
||||||
* except Exception as e: # <<<<<<<<<<<<<<
|
* except Exception as e: # <<<<<<<<<<<<<<
|
||||||
|
@ -6862,7 +6862,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
__pyx_t_13 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
|
__pyx_t_13 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
|
||||||
if (__pyx_t_13) {
|
if (__pyx_t_13) {
|
||||||
__Pyx_AddTraceback("cpp_common.hash_array", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
__Pyx_AddTraceback("cpp_common.hash_array", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
||||||
if (__Pyx_GetException(&__pyx_t_1, &__pyx_t_14, &__pyx_t_15) < 0) __PYX_ERR(1, 68, __pyx_L6_except_error)
|
if (__Pyx_GetException(&__pyx_t_1, &__pyx_t_14, &__pyx_t_15) < 0) __PYX_ERR(1, 69, __pyx_L6_except_error)
|
||||||
__Pyx_GOTREF(__pyx_t_1);
|
__Pyx_GOTREF(__pyx_t_1);
|
||||||
__Pyx_GOTREF(__pyx_t_14);
|
__Pyx_GOTREF(__pyx_t_14);
|
||||||
__Pyx_GOTREF(__pyx_t_15);
|
__Pyx_GOTREF(__pyx_t_15);
|
||||||
|
@ -6870,7 +6870,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
__pyx_v_e = __pyx_t_14;
|
__pyx_v_e = __pyx_t_14;
|
||||||
/*try:*/ {
|
/*try:*/ {
|
||||||
|
|
||||||
/* "cpp_common.pxd":69
|
/* "cpp_common.pxd":70
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
|
||||||
* except Exception as e:
|
* except Exception as e:
|
||||||
* free(s_proc.data) # <<<<<<<<<<<<<<
|
* free(s_proc.data) # <<<<<<<<<<<<<<
|
||||||
|
@ -6879,7 +6879,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
free(__pyx_v_s_proc.data);
|
free(__pyx_v_s_proc.data);
|
||||||
|
|
||||||
/* "cpp_common.pxd":70
|
/* "cpp_common.pxd":71
|
||||||
* except Exception as e:
|
* except Exception as e:
|
||||||
* free(s_proc.data)
|
* free(s_proc.data)
|
||||||
* s_proc.data = NULL # <<<<<<<<<<<<<<
|
* s_proc.data = NULL # <<<<<<<<<<<<<<
|
||||||
|
@ -6888,7 +6888,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.data = NULL;
|
__pyx_v_s_proc.data = NULL;
|
||||||
|
|
||||||
/* "cpp_common.pxd":71
|
/* "cpp_common.pxd":72
|
||||||
* free(s_proc.data)
|
* free(s_proc.data)
|
||||||
* s_proc.data = NULL
|
* s_proc.data = NULL
|
||||||
* raise # <<<<<<<<<<<<<<
|
* raise # <<<<<<<<<<<<<<
|
||||||
|
@ -6900,10 +6900,10 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
__Pyx_XGIVEREF(__pyx_t_15);
|
__Pyx_XGIVEREF(__pyx_t_15);
|
||||||
__Pyx_ErrRestoreWithState(__pyx_t_1, __pyx_t_14, __pyx_t_15);
|
__Pyx_ErrRestoreWithState(__pyx_t_1, __pyx_t_14, __pyx_t_15);
|
||||||
__pyx_t_1 = 0; __pyx_t_14 = 0; __pyx_t_15 = 0;
|
__pyx_t_1 = 0; __pyx_t_14 = 0; __pyx_t_15 = 0;
|
||||||
__PYX_ERR(1, 71, __pyx_L29_error)
|
__PYX_ERR(1, 72, __pyx_L29_error)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":68
|
/* "cpp_common.pxd":69
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
|
||||||
* except Exception as e: # <<<<<<<<<<<<<<
|
* except Exception as e: # <<<<<<<<<<<<<<
|
||||||
|
@ -6947,7 +6947,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
goto __pyx_L6_except_error;
|
goto __pyx_L6_except_error;
|
||||||
__pyx_L6_except_error:;
|
__pyx_L6_except_error:;
|
||||||
|
|
||||||
/* "cpp_common.pxd":38
|
/* "cpp_common.pxd":39
|
||||||
* raise MemoryError
|
* raise MemoryError
|
||||||
*
|
*
|
||||||
* try: # <<<<<<<<<<<<<<
|
* try: # <<<<<<<<<<<<<<
|
||||||
|
@ -6962,7 +6962,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
__pyx_L9_try_end:;
|
__pyx_L9_try_end:;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":73
|
/* "cpp_common.pxd":74
|
||||||
* raise
|
* raise
|
||||||
*
|
*
|
||||||
* s_proc.allocated = True # <<<<<<<<<<<<<<
|
* s_proc.allocated = True # <<<<<<<<<<<<<<
|
||||||
|
@ -6971,7 +6971,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.allocated = 1;
|
__pyx_v_s_proc.allocated = 1;
|
||||||
|
|
||||||
/* "cpp_common.pxd":74
|
/* "cpp_common.pxd":75
|
||||||
*
|
*
|
||||||
* s_proc.allocated = True
|
* s_proc.allocated = True
|
||||||
* return move(s_proc) # <<<<<<<<<<<<<<
|
* return move(s_proc) # <<<<<<<<<<<<<<
|
||||||
|
@ -6981,8 +6981,8 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
__pyx_r = cython_std::move<proc_string>(__pyx_v_s_proc);
|
__pyx_r = cython_std::move<proc_string>(__pyx_v_s_proc);
|
||||||
goto __pyx_L0;
|
goto __pyx_L0;
|
||||||
|
|
||||||
/* "cpp_common.pxd":27
|
/* "cpp_common.pxd":28
|
||||||
* void validate_string(object py_str, const char* err) except +
|
* proc_string default_process_func(proc_string sentence) except +
|
||||||
*
|
*
|
||||||
* cdef inline proc_string hash_array(arr) except *: # <<<<<<<<<<<<<<
|
* cdef inline proc_string hash_array(arr) except *: # <<<<<<<<<<<<<<
|
||||||
* # TODO on Cpython this does not require any copies
|
* # TODO on Cpython this does not require any copies
|
||||||
|
@ -7002,7 +7002,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
|
||||||
return __pyx_r;
|
return __pyx_r;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":77
|
/* "cpp_common.pxd":78
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* cdef inline proc_string hash_sequence(seq) except *: # <<<<<<<<<<<<<<
|
* cdef inline proc_string hash_sequence(seq) except *: # <<<<<<<<<<<<<<
|
||||||
|
@ -7046,17 +7046,17 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
int __pyx_clineno = 0;
|
int __pyx_clineno = 0;
|
||||||
__Pyx_RefNannySetupContext("hash_sequence", 0);
|
__Pyx_RefNannySetupContext("hash_sequence", 0);
|
||||||
|
|
||||||
/* "cpp_common.pxd":79
|
/* "cpp_common.pxd":80
|
||||||
* cdef inline proc_string hash_sequence(seq) except *:
|
* cdef inline proc_string hash_sequence(seq) except *:
|
||||||
* cdef proc_string s_proc
|
* cdef proc_string s_proc
|
||||||
* s_proc.length = <size_t>len(seq) # <<<<<<<<<<<<<<
|
* s_proc.length = <size_t>len(seq) # <<<<<<<<<<<<<<
|
||||||
*
|
*
|
||||||
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
||||||
*/
|
*/
|
||||||
__pyx_t_1 = PyObject_Length(__pyx_v_seq); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 79, __pyx_L1_error)
|
__pyx_t_1 = PyObject_Length(__pyx_v_seq); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 80, __pyx_L1_error)
|
||||||
__pyx_v_s_proc.length = ((size_t)__pyx_t_1);
|
__pyx_v_s_proc.length = ((size_t)__pyx_t_1);
|
||||||
|
|
||||||
/* "cpp_common.pxd":81
|
/* "cpp_common.pxd":82
|
||||||
* s_proc.length = <size_t>len(seq)
|
* s_proc.length = <size_t>len(seq)
|
||||||
*
|
*
|
||||||
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t)) # <<<<<<<<<<<<<<
|
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t)) # <<<<<<<<<<<<<<
|
||||||
|
@ -7065,7 +7065,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.data = malloc((__pyx_v_s_proc.length * (sizeof(uint64_t))));
|
__pyx_v_s_proc.data = malloc((__pyx_v_s_proc.length * (sizeof(uint64_t))));
|
||||||
|
|
||||||
/* "cpp_common.pxd":83
|
/* "cpp_common.pxd":84
|
||||||
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
||||||
*
|
*
|
||||||
* if s_proc.data == NULL: # <<<<<<<<<<<<<<
|
* if s_proc.data == NULL: # <<<<<<<<<<<<<<
|
||||||
|
@ -7075,16 +7075,16 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
__pyx_t_2 = ((__pyx_v_s_proc.data == NULL) != 0);
|
__pyx_t_2 = ((__pyx_v_s_proc.data == NULL) != 0);
|
||||||
if (unlikely(__pyx_t_2)) {
|
if (unlikely(__pyx_t_2)) {
|
||||||
|
|
||||||
/* "cpp_common.pxd":84
|
/* "cpp_common.pxd":85
|
||||||
*
|
*
|
||||||
* if s_proc.data == NULL:
|
* if s_proc.data == NULL:
|
||||||
* raise MemoryError # <<<<<<<<<<<<<<
|
* raise MemoryError # <<<<<<<<<<<<<<
|
||||||
*
|
*
|
||||||
* try:
|
* try:
|
||||||
*/
|
*/
|
||||||
PyErr_NoMemory(); __PYX_ERR(1, 84, __pyx_L1_error)
|
PyErr_NoMemory(); __PYX_ERR(1, 85, __pyx_L1_error)
|
||||||
|
|
||||||
/* "cpp_common.pxd":83
|
/* "cpp_common.pxd":84
|
||||||
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
* s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
|
||||||
*
|
*
|
||||||
* if s_proc.data == NULL: # <<<<<<<<<<<<<<
|
* if s_proc.data == NULL: # <<<<<<<<<<<<<<
|
||||||
|
@ -7093,7 +7093,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":86
|
/* "cpp_common.pxd":87
|
||||||
* raise MemoryError
|
* raise MemoryError
|
||||||
*
|
*
|
||||||
* try: # <<<<<<<<<<<<<<
|
* try: # <<<<<<<<<<<<<<
|
||||||
|
@ -7109,7 +7109,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
__Pyx_XGOTREF(__pyx_t_5);
|
__Pyx_XGOTREF(__pyx_t_5);
|
||||||
/*try:*/ {
|
/*try:*/ {
|
||||||
|
|
||||||
/* "cpp_common.pxd":87
|
/* "cpp_common.pxd":88
|
||||||
*
|
*
|
||||||
* try:
|
* try:
|
||||||
* s_proc.kind = RAPIDFUZZ_INT64 # <<<<<<<<<<<<<<
|
* s_proc.kind = RAPIDFUZZ_INT64 # <<<<<<<<<<<<<<
|
||||||
|
@ -7118,7 +7118,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.kind = RAPIDFUZZ_INT64;
|
__pyx_v_s_proc.kind = RAPIDFUZZ_INT64;
|
||||||
|
|
||||||
/* "cpp_common.pxd":88
|
/* "cpp_common.pxd":89
|
||||||
* try:
|
* try:
|
||||||
* s_proc.kind = RAPIDFUZZ_INT64
|
* s_proc.kind = RAPIDFUZZ_INT64
|
||||||
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
* for i in range(s_proc.length): # <<<<<<<<<<<<<<
|
||||||
|
@ -7130,19 +7130,19 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
|
for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
|
||||||
__pyx_v_i = __pyx_t_8;
|
__pyx_v_i = __pyx_t_8;
|
||||||
|
|
||||||
/* "cpp_common.pxd":89
|
/* "cpp_common.pxd":90
|
||||||
* s_proc.kind = RAPIDFUZZ_INT64
|
* s_proc.kind = RAPIDFUZZ_INT64
|
||||||
* for i in range(s_proc.length):
|
* for i in range(s_proc.length):
|
||||||
* elem = seq[i] # <<<<<<<<<<<<<<
|
* elem = seq[i] # <<<<<<<<<<<<<<
|
||||||
* # this is required so e.g. a list of char can be compared to a string
|
* # this is required so e.g. a list of char can be compared to a string
|
||||||
* if isinstance(elem, str) and len(elem) == 1:
|
* if isinstance(elem, str) and len(elem) == 1:
|
||||||
*/
|
*/
|
||||||
__pyx_t_9 = __Pyx_GetItemInt(__pyx_v_seq, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 89, __pyx_L4_error)
|
__pyx_t_9 = __Pyx_GetItemInt(__pyx_v_seq, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 90, __pyx_L4_error)
|
||||||
__Pyx_GOTREF(__pyx_t_9);
|
__Pyx_GOTREF(__pyx_t_9);
|
||||||
__Pyx_XDECREF_SET(__pyx_v_elem, __pyx_t_9);
|
__Pyx_XDECREF_SET(__pyx_v_elem, __pyx_t_9);
|
||||||
__pyx_t_9 = 0;
|
__pyx_t_9 = 0;
|
||||||
|
|
||||||
/* "cpp_common.pxd":91
|
/* "cpp_common.pxd":92
|
||||||
* elem = seq[i]
|
* elem = seq[i]
|
||||||
* # this is required so e.g. a list of char can be compared to a string
|
* # this is required so e.g. a list of char can be compared to a string
|
||||||
* if isinstance(elem, str) and len(elem) == 1: # <<<<<<<<<<<<<<
|
* if isinstance(elem, str) and len(elem) == 1: # <<<<<<<<<<<<<<
|
||||||
|
@ -7156,23 +7156,23 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
__pyx_t_2 = __pyx_t_11;
|
__pyx_t_2 = __pyx_t_11;
|
||||||
goto __pyx_L13_bool_binop_done;
|
goto __pyx_L13_bool_binop_done;
|
||||||
}
|
}
|
||||||
__pyx_t_1 = PyObject_Length(__pyx_v_elem); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 91, __pyx_L4_error)
|
__pyx_t_1 = PyObject_Length(__pyx_v_elem); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 92, __pyx_L4_error)
|
||||||
__pyx_t_11 = ((__pyx_t_1 == 1) != 0);
|
__pyx_t_11 = ((__pyx_t_1 == 1) != 0);
|
||||||
__pyx_t_2 = __pyx_t_11;
|
__pyx_t_2 = __pyx_t_11;
|
||||||
__pyx_L13_bool_binop_done:;
|
__pyx_L13_bool_binop_done:;
|
||||||
if (__pyx_t_2) {
|
if (__pyx_t_2) {
|
||||||
|
|
||||||
/* "cpp_common.pxd":92
|
/* "cpp_common.pxd":93
|
||||||
* # this is required so e.g. a list of char can be compared to a string
|
* # this is required so e.g. a list of char can be compared to a string
|
||||||
* if isinstance(elem, str) and len(elem) == 1:
|
* if isinstance(elem, str) and len(elem) == 1:
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>elem # <<<<<<<<<<<<<<
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>elem # <<<<<<<<<<<<<<
|
||||||
* else:
|
* else:
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
|
||||||
*/
|
*/
|
||||||
__pyx_t_12 = __Pyx_PyObject_AsPy_UCS4(__pyx_v_elem); if (unlikely((__pyx_t_12 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 92, __pyx_L4_error)
|
__pyx_t_12 = __Pyx_PyObject_AsPy_UCS4(__pyx_v_elem); if (unlikely((__pyx_t_12 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 93, __pyx_L4_error)
|
||||||
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)((Py_UCS4)__pyx_t_12));
|
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)((Py_UCS4)__pyx_t_12));
|
||||||
|
|
||||||
/* "cpp_common.pxd":91
|
/* "cpp_common.pxd":92
|
||||||
* elem = seq[i]
|
* elem = seq[i]
|
||||||
* # this is required so e.g. a list of char can be compared to a string
|
* # this is required so e.g. a list of char can be compared to a string
|
||||||
* if isinstance(elem, str) and len(elem) == 1: # <<<<<<<<<<<<<<
|
* if isinstance(elem, str) and len(elem) == 1: # <<<<<<<<<<<<<<
|
||||||
|
@ -7182,7 +7182,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
goto __pyx_L12;
|
goto __pyx_L12;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":94
|
/* "cpp_common.pxd":95
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>elem
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>elem
|
||||||
* else:
|
* else:
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem) # <<<<<<<<<<<<<<
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem) # <<<<<<<<<<<<<<
|
||||||
|
@ -7190,13 +7190,13 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
* free(s_proc.data)
|
* free(s_proc.data)
|
||||||
*/
|
*/
|
||||||
/*else*/ {
|
/*else*/ {
|
||||||
__pyx_t_13 = PyObject_Hash(__pyx_v_elem); if (unlikely(__pyx_t_13 == ((Py_hash_t)-1))) __PYX_ERR(1, 94, __pyx_L4_error)
|
__pyx_t_13 = PyObject_Hash(__pyx_v_elem); if (unlikely(__pyx_t_13 == ((Py_hash_t)-1))) __PYX_ERR(1, 95, __pyx_L4_error)
|
||||||
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_13);
|
(((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_13);
|
||||||
}
|
}
|
||||||
__pyx_L12:;
|
__pyx_L12:;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":86
|
/* "cpp_common.pxd":87
|
||||||
* raise MemoryError
|
* raise MemoryError
|
||||||
*
|
*
|
||||||
* try: # <<<<<<<<<<<<<<
|
* try: # <<<<<<<<<<<<<<
|
||||||
|
@ -7211,7 +7211,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
__pyx_L4_error:;
|
__pyx_L4_error:;
|
||||||
__Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0;
|
__Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0;
|
||||||
|
|
||||||
/* "cpp_common.pxd":95
|
/* "cpp_common.pxd":96
|
||||||
* else:
|
* else:
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
|
||||||
* except Exception as e: # <<<<<<<<<<<<<<
|
* except Exception as e: # <<<<<<<<<<<<<<
|
||||||
|
@ -7221,7 +7221,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
__pyx_t_14 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
|
__pyx_t_14 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
|
||||||
if (__pyx_t_14) {
|
if (__pyx_t_14) {
|
||||||
__Pyx_AddTraceback("cpp_common.hash_sequence", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
__Pyx_AddTraceback("cpp_common.hash_sequence", __pyx_clineno, __pyx_lineno, __pyx_filename);
|
||||||
if (__Pyx_GetException(&__pyx_t_9, &__pyx_t_15, &__pyx_t_16) < 0) __PYX_ERR(1, 95, __pyx_L6_except_error)
|
if (__Pyx_GetException(&__pyx_t_9, &__pyx_t_15, &__pyx_t_16) < 0) __PYX_ERR(1, 96, __pyx_L6_except_error)
|
||||||
__Pyx_GOTREF(__pyx_t_9);
|
__Pyx_GOTREF(__pyx_t_9);
|
||||||
__Pyx_GOTREF(__pyx_t_15);
|
__Pyx_GOTREF(__pyx_t_15);
|
||||||
__Pyx_GOTREF(__pyx_t_16);
|
__Pyx_GOTREF(__pyx_t_16);
|
||||||
|
@ -7229,7 +7229,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
__pyx_v_e = __pyx_t_15;
|
__pyx_v_e = __pyx_t_15;
|
||||||
/*try:*/ {
|
/*try:*/ {
|
||||||
|
|
||||||
/* "cpp_common.pxd":96
|
/* "cpp_common.pxd":97
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
|
||||||
* except Exception as e:
|
* except Exception as e:
|
||||||
* free(s_proc.data) # <<<<<<<<<<<<<<
|
* free(s_proc.data) # <<<<<<<<<<<<<<
|
||||||
|
@ -7238,7 +7238,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
*/
|
*/
|
||||||
free(__pyx_v_s_proc.data);
|
free(__pyx_v_s_proc.data);
|
||||||
|
|
||||||
/* "cpp_common.pxd":97
|
/* "cpp_common.pxd":98
|
||||||
* except Exception as e:
|
* except Exception as e:
|
||||||
* free(s_proc.data)
|
* free(s_proc.data)
|
||||||
* s_proc.data = NULL # <<<<<<<<<<<<<<
|
* s_proc.data = NULL # <<<<<<<<<<<<<<
|
||||||
|
@ -7247,7 +7247,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.data = NULL;
|
__pyx_v_s_proc.data = NULL;
|
||||||
|
|
||||||
/* "cpp_common.pxd":98
|
/* "cpp_common.pxd":99
|
||||||
* free(s_proc.data)
|
* free(s_proc.data)
|
||||||
* s_proc.data = NULL
|
* s_proc.data = NULL
|
||||||
* raise # <<<<<<<<<<<<<<
|
* raise # <<<<<<<<<<<<<<
|
||||||
|
@ -7259,10 +7259,10 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
__Pyx_XGIVEREF(__pyx_t_16);
|
__Pyx_XGIVEREF(__pyx_t_16);
|
||||||
__Pyx_ErrRestoreWithState(__pyx_t_9, __pyx_t_15, __pyx_t_16);
|
__Pyx_ErrRestoreWithState(__pyx_t_9, __pyx_t_15, __pyx_t_16);
|
||||||
__pyx_t_9 = 0; __pyx_t_15 = 0; __pyx_t_16 = 0;
|
__pyx_t_9 = 0; __pyx_t_15 = 0; __pyx_t_16 = 0;
|
||||||
__PYX_ERR(1, 98, __pyx_L20_error)
|
__PYX_ERR(1, 99, __pyx_L20_error)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":95
|
/* "cpp_common.pxd":96
|
||||||
* else:
|
* else:
|
||||||
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
|
* (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
|
||||||
* except Exception as e: # <<<<<<<<<<<<<<
|
* except Exception as e: # <<<<<<<<<<<<<<
|
||||||
|
@ -7306,7 +7306,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
goto __pyx_L6_except_error;
|
goto __pyx_L6_except_error;
|
||||||
__pyx_L6_except_error:;
|
__pyx_L6_except_error:;
|
||||||
|
|
||||||
/* "cpp_common.pxd":86
|
/* "cpp_common.pxd":87
|
||||||
* raise MemoryError
|
* raise MemoryError
|
||||||
*
|
*
|
||||||
* try: # <<<<<<<<<<<<<<
|
* try: # <<<<<<<<<<<<<<
|
||||||
|
@ -7321,7 +7321,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
__pyx_L9_try_end:;
|
__pyx_L9_try_end:;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "cpp_common.pxd":100
|
/* "cpp_common.pxd":101
|
||||||
* raise
|
* raise
|
||||||
*
|
*
|
||||||
* s_proc.allocated = True # <<<<<<<<<<<<<<
|
* s_proc.allocated = True # <<<<<<<<<<<<<<
|
||||||
|
@ -7329,7 +7329,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
*/
|
*/
|
||||||
__pyx_v_s_proc.allocated = 1;
|
__pyx_v_s_proc.allocated = 1;
|
||||||
|
|
||||||
/* "cpp_common.pxd":101
|
/* "cpp_common.pxd":102
|
||||||
*
|
*
|
||||||
* s_proc.allocated = True
|
* s_proc.allocated = True
|
||||||
* return move(s_proc) # <<<<<<<<<<<<<<
|
* return move(s_proc) # <<<<<<<<<<<<<<
|
||||||
|
@ -7337,7 +7337,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
|
||||||
__pyx_r = cython_std::move<proc_string>(__pyx_v_s_proc);
|
__pyx_r = cython_std::move<proc_string>(__pyx_v_s_proc);
|
||||||
goto __pyx_L0;
|
goto __pyx_L0;
|
||||||
|
|
||||||
/* "cpp_common.pxd":77
|
/* "cpp_common.pxd":78
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* cdef inline proc_string hash_sequence(seq) except *: # <<<<<<<<<<<<<<
|
* cdef inline proc_string hash_sequence(seq) except *: # <<<<<<<<<<<<<<
|
||||||
|
@ -7463,8 +7463,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
|
||||||
};
|
};
|
||||||
/* #### Code section: cached_builtins ### */
|
/* #### Code section: cached_builtins ### */
|
||||||
static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) {
|
static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) {
|
||||||
__pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) __PYX_ERR(1, 36, __pyx_L1_error)
|
__pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) __PYX_ERR(1, 37, __pyx_L1_error)
|
||||||
__pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(1, 42, __pyx_L1_error)
|
__pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(1, 43, __pyx_L1_error)
|
||||||
return 0;
|
return 0;
|
||||||
__pyx_L1_error:;
|
__pyx_L1_error:;
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -8252,7 +8252,7 @@ if (!__Pyx_RefNanny) {
|
||||||
if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
|
if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
|
||||||
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
__Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
|
||||||
|
|
||||||
/* "cpp_common.pxd":77
|
/* "cpp_common.pxd":78
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* cdef inline proc_string hash_sequence(seq) except *: # <<<<<<<<<<<<<<
|
* cdef inline proc_string hash_sequence(seq) except *: # <<<<<<<<<<<<<<
|
||||||
|
|
|
@ -42,9 +42,9 @@ def ratio(s1, s2, *, processor=None, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -94,9 +94,9 @@ def partial_ratio(s1, s2, *, processor=None, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -173,9 +173,9 @@ def token_sort_ratio(s1, s2, *, processor=True, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -221,9 +221,9 @@ def token_set_ratio(s1, s2, *, processor=True, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -271,9 +271,9 @@ def token_ratio(s1, s2, *, processor=True, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -313,9 +313,9 @@ def partial_token_sort_ratio(s1, s2, *, processor=True, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -356,9 +356,9 @@ def partial_token_set_ratio(s1, s2, *, processor=True, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -399,9 +399,9 @@ def partial_token_ratio(s1, s2, *, processor=True, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -441,9 +441,9 @@ def WRatio(s1, s2, *, processor=True, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -485,9 +485,9 @@ def QRatio(s1, s2, *, processor=True, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -28,16 +28,21 @@ from rapidfuzz.fuzz import (
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
from libcpp cimport algorithm
|
from libcpp cimport algorithm
|
||||||
from libcpp.utility cimport move
|
from libcpp.utility cimport move
|
||||||
|
from libc.stdint cimport uint8_t, int32_t
|
||||||
|
from libc.math cimport floor
|
||||||
|
|
||||||
from cpython.list cimport PyList_New, PyList_SET_ITEM
|
from cpython.list cimport PyList_New, PyList_SET_ITEM
|
||||||
from cpython.object cimport PyObject
|
from cpython.object cimport PyObject
|
||||||
from cpython.ref cimport Py_INCREF, Py_DECREF
|
from cpython.ref cimport Py_INCREF, Py_DECREF
|
||||||
|
|
||||||
from cpp_common cimport proc_string, is_valid_string, convert_string, hash_array, hash_sequence
|
from cpp_common cimport proc_string, is_valid_string, convert_string, hash_array, hash_sequence, default_process_func
|
||||||
|
|
||||||
import heapq
|
import heapq
|
||||||
from array import array
|
from array import array
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
cimport numpy as np
|
||||||
|
cimport cython
|
||||||
|
|
||||||
cdef inline proc_string conv_sequence(seq) except *:
|
cdef inline proc_string conv_sequence(seq) except *:
|
||||||
if is_valid_string(seq):
|
if is_valid_string(seq):
|
||||||
|
@ -50,15 +55,15 @@ cdef inline proc_string conv_sequence(seq) except *:
|
||||||
cdef extern from "cpp_process.hpp":
|
cdef extern from "cpp_process.hpp":
|
||||||
cdef cppclass CachedScorerContext:
|
cdef cppclass CachedScorerContext:
|
||||||
CachedScorerContext()
|
CachedScorerContext()
|
||||||
double ratio(const proc_string&, double) except +
|
double ratio(const proc_string&, double) nogil except +
|
||||||
|
|
||||||
cdef cppclass CachedDistanceContext:
|
cdef cppclass CachedDistanceContext:
|
||||||
CachedDistanceContext()
|
CachedDistanceContext()
|
||||||
size_t ratio(const proc_string&, size_t) except +
|
size_t ratio(const proc_string&, size_t) nogil except +
|
||||||
|
|
||||||
# normalized distances
|
# normalized distances
|
||||||
# fuzz
|
# fuzz
|
||||||
CachedScorerContext cached_ratio_init( const proc_string&, int) except +
|
CachedScorerContext cached_ratio_init( const proc_string&, int) nogil except +
|
||||||
CachedScorerContext cached_partial_ratio_init( const proc_string&, int) except +
|
CachedScorerContext cached_partial_ratio_init( const proc_string&, int) except +
|
||||||
CachedScorerContext cached_token_sort_ratio_init( const proc_string&, int) except +
|
CachedScorerContext cached_token_sort_ratio_init( const proc_string&, int) except +
|
||||||
CachedScorerContext cached_token_set_ratio_init( const proc_string&, int) except +
|
CachedScorerContext cached_token_set_ratio_init( const proc_string&, int) except +
|
||||||
|
@ -226,7 +231,7 @@ cdef inline extractOne_dict(CachedScorerContext context, choices, processor, dou
|
||||||
for choice_key, choice in choices.items():
|
for choice_key, choice in choices.items():
|
||||||
if choice is None:
|
if choice is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
score = context.ratio(conv_sequence(choice), score_cutoff)
|
score = context.ratio(conv_sequence(choice), score_cutoff)
|
||||||
|
|
||||||
if score >= score_cutoff and score > result_score:
|
if score >= score_cutoff and score > result_score:
|
||||||
|
@ -400,8 +405,8 @@ cdef inline py_extractOne_dict(query, choices, scorer, processor, double score_c
|
||||||
score = scorer(query, processor(choice), **kwargs)
|
score = scorer(query, processor(choice), **kwargs)
|
||||||
|
|
||||||
if score >= score_cutoff and score > result_score:
|
if score >= score_cutoff and score > result_score:
|
||||||
kwargs["score_cutoff"] = score_cutoff
|
|
||||||
score_cutoff = score
|
score_cutoff = score
|
||||||
|
kwargs["score_cutoff"] = score
|
||||||
result_score = score
|
result_score = score
|
||||||
result_choice = choice
|
result_choice = choice
|
||||||
result_key = choice_key
|
result_key = choice_key
|
||||||
|
@ -416,8 +421,8 @@ cdef inline py_extractOne_dict(query, choices, scorer, processor, double score_c
|
||||||
score = scorer(query, choice, **kwargs)
|
score = scorer(query, choice, **kwargs)
|
||||||
|
|
||||||
if score >= score_cutoff and score > result_score:
|
if score >= score_cutoff and score > result_score:
|
||||||
kwargs["score_cutoff"] = score_cutoff
|
|
||||||
score_cutoff = score
|
score_cutoff = score
|
||||||
|
kwargs["score_cutoff"] = score
|
||||||
result_score = score
|
result_score = score
|
||||||
result_choice = choice
|
result_choice = choice
|
||||||
result_key = choice_key
|
result_key = choice_key
|
||||||
|
@ -445,8 +450,8 @@ cdef inline py_extractOne_list(query, choices, scorer, processor, double score_c
|
||||||
score = scorer(query, processor(choice), **kwargs)
|
score = scorer(query, processor(choice), **kwargs)
|
||||||
|
|
||||||
if score >= score_cutoff and score > result_score:
|
if score >= score_cutoff and score > result_score:
|
||||||
kwargs["score_cutoff"] = score_cutoff
|
|
||||||
score_cutoff = score
|
score_cutoff = score
|
||||||
|
kwargs["score_cutoff"] = score
|
||||||
result_score = score
|
result_score = score
|
||||||
result_choice = choice
|
result_choice = choice
|
||||||
result_index = i
|
result_index = i
|
||||||
|
@ -461,8 +466,8 @@ cdef inline py_extractOne_list(query, choices, scorer, processor, double score_c
|
||||||
score = scorer(query, choice, **kwargs)
|
score = scorer(query, choice, **kwargs)
|
||||||
|
|
||||||
if score >= score_cutoff and score > result_score:
|
if score >= score_cutoff and score > result_score:
|
||||||
kwargs["score_cutoff"] = score_cutoff
|
|
||||||
score_cutoff = score
|
score_cutoff = score
|
||||||
|
kwargs["score_cutoff"] = score
|
||||||
result_score = score
|
result_score = score
|
||||||
result_choice = choice
|
result_choice = choice
|
||||||
result_index = i
|
result_index = i
|
||||||
|
@ -480,9 +485,9 @@ def extractOne(query, choices, *, scorer=WRatio, processor=default_process, scor
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
query : str
|
query : Sequence[Hashable]
|
||||||
string we want to find
|
string we want to find
|
||||||
choices : Iterable
|
choices : Iterable[Sequence[Hashable]] | Mapping[Sequence[Hashable]]
|
||||||
list of all strings the query should be compared with or dict with a mapping
|
list of all strings the query should be compared with or dict with a mapping
|
||||||
{<result>: <string to compare>}
|
{<result>: <string to compare>}
|
||||||
scorer : Callable, optional
|
scorer : Callable, optional
|
||||||
|
@ -506,7 +511,7 @@ def extractOne(query, choices, *, scorer=WRatio, processor=default_process, scor
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
Tuple[str, Any, Any]
|
Tuple[Sequence[Hashable], Any, Any]
|
||||||
Returns the best match in form of a Tuple with 3 elements. The values stored in the
|
Returns the best match in form of a Tuple with 3 elements. The values stored in the
|
||||||
tuple depend on the types of the input arguments.
|
tuple depend on the types of the input arguments.
|
||||||
|
|
||||||
|
@ -634,7 +639,7 @@ def extractOne(query, choices, *, scorer=WRatio, processor=default_process, scor
|
||||||
return extractOne_dict(move(ScorerContext), choices, processor, c_score_cutoff)
|
return extractOne_dict(move(ScorerContext), choices, processor, c_score_cutoff)
|
||||||
else:
|
else:
|
||||||
return extractOne_list(move(ScorerContext), choices, processor, c_score_cutoff)
|
return extractOne_list(move(ScorerContext), choices, processor, c_score_cutoff)
|
||||||
|
|
||||||
if IsIntegratedDistance(scorer):
|
if IsIntegratedDistance(scorer):
|
||||||
# distance implemented in C++
|
# distance implemented in C++
|
||||||
query_context = conv_sequence(query)
|
query_context = conv_sequence(query)
|
||||||
|
@ -804,13 +809,13 @@ cdef inline extract_list(CachedScorerContext context, choices, processor, size_t
|
||||||
for i, choice in enumerate(choices):
|
for i, choice in enumerate(choices):
|
||||||
if choice is None:
|
if choice is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
proc_choice = processor(choice)
|
proc_choice = processor(choice)
|
||||||
if proc_choice is None:
|
if proc_choice is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
score = context.ratio(conv_sequence(proc_choice), score_cutoff)
|
score = context.ratio(conv_sequence(proc_choice), score_cutoff)
|
||||||
|
|
||||||
if score >= score_cutoff:
|
if score >= score_cutoff:
|
||||||
Py_INCREF(choice)
|
Py_INCREF(choice)
|
||||||
results.push_back(ListMatchScorerElem(score, i, <PyObject*>choice))
|
results.push_back(ListMatchScorerElem(score, i, <PyObject*>choice))
|
||||||
|
@ -818,23 +823,23 @@ cdef inline extract_list(CachedScorerContext context, choices, processor, size_t
|
||||||
for i, choice in enumerate(choices):
|
for i, choice in enumerate(choices):
|
||||||
if choice is None:
|
if choice is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
score = context.ratio(conv_sequence(choice), score_cutoff)
|
score = context.ratio(conv_sequence(choice), score_cutoff)
|
||||||
|
|
||||||
if score >= score_cutoff:
|
if score >= score_cutoff:
|
||||||
Py_INCREF(choice)
|
Py_INCREF(choice)
|
||||||
results.push_back(ListMatchScorerElem(score, i, <PyObject*>choice))
|
results.push_back(ListMatchScorerElem(score, i, <PyObject*>choice))
|
||||||
|
|
||||||
# due to score_cutoff not always completely filled
|
# due to score_cutoff not always completely filled
|
||||||
if limit > results.size():
|
if limit > results.size():
|
||||||
limit = results.size()
|
limit = results.size()
|
||||||
|
|
||||||
if limit >= results.size():
|
if limit >= results.size():
|
||||||
algorithm.sort(results.begin(), results.end(), ExtractScorerComp())
|
algorithm.sort(results.begin(), results.end(), ExtractScorerComp())
|
||||||
else:
|
else:
|
||||||
algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractScorerComp())
|
algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractScorerComp())
|
||||||
results.resize(limit)
|
results.resize(limit)
|
||||||
|
|
||||||
# copy elements into Python List
|
# copy elements into Python List
|
||||||
result_list = PyList_New(<Py_ssize_t>limit)
|
result_list = PyList_New(<Py_ssize_t>limit)
|
||||||
for i in range(limit):
|
for i in range(limit):
|
||||||
|
@ -863,13 +868,13 @@ cdef inline extract_distance_list(CachedDistanceContext context, choices, proces
|
||||||
for i, choice in enumerate(choices):
|
for i, choice in enumerate(choices):
|
||||||
if choice is None:
|
if choice is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
proc_choice = processor(choice)
|
proc_choice = processor(choice)
|
||||||
if proc_choice is None:
|
if proc_choice is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
distance = context.ratio(conv_sequence(proc_choice), max_)
|
distance = context.ratio(conv_sequence(proc_choice), max_)
|
||||||
|
|
||||||
if distance <= max_:
|
if distance <= max_:
|
||||||
Py_INCREF(choice)
|
Py_INCREF(choice)
|
||||||
results.push_back(ListMatchDistanceElem(distance, i, <PyObject*>choice))
|
results.push_back(ListMatchDistanceElem(distance, i, <PyObject*>choice))
|
||||||
|
@ -877,23 +882,23 @@ cdef inline extract_distance_list(CachedDistanceContext context, choices, proces
|
||||||
for i, choice in enumerate(choices):
|
for i, choice in enumerate(choices):
|
||||||
if choice is None:
|
if choice is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
distance = context.ratio(conv_sequence(choice), max_)
|
distance = context.ratio(conv_sequence(choice), max_)
|
||||||
|
|
||||||
if distance <= max_:
|
if distance <= max_:
|
||||||
Py_INCREF(choice)
|
Py_INCREF(choice)
|
||||||
results.push_back(ListMatchDistanceElem(distance, i, <PyObject*>choice))
|
results.push_back(ListMatchDistanceElem(distance, i, <PyObject*>choice))
|
||||||
|
|
||||||
# due to max_ not always completely filled
|
# due to max_ not always completely filled
|
||||||
if limit > results.size():
|
if limit > results.size():
|
||||||
limit = results.size()
|
limit = results.size()
|
||||||
|
|
||||||
if limit >= results.size():
|
if limit >= results.size():
|
||||||
algorithm.sort(results.begin(), results.end(), ExtractDistanceComp())
|
algorithm.sort(results.begin(), results.end(), ExtractDistanceComp())
|
||||||
else:
|
else:
|
||||||
algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractDistanceComp())
|
algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractDistanceComp())
|
||||||
results.resize(limit)
|
results.resize(limit)
|
||||||
|
|
||||||
# copy elements into Python List
|
# copy elements into Python List
|
||||||
result_list = PyList_New(<Py_ssize_t>limit)
|
result_list = PyList_New(<Py_ssize_t>limit)
|
||||||
for i in range(limit):
|
for i in range(limit):
|
||||||
|
@ -980,9 +985,9 @@ def extract(query, choices, *, scorer=WRatio, processor=default_process, limit=5
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
query : str
|
query : Sequence[Hashable]
|
||||||
string we want to find
|
string we want to find
|
||||||
choices : Iterable
|
choices : Collection[Sequence[Hashable]] | Mapping[Sequence[Hashable]]
|
||||||
list of all strings the query should be compared with or dict with a mapping
|
list of all strings the query should be compared with or dict with a mapping
|
||||||
{<result>: <string to compare>}
|
{<result>: <string to compare>}
|
||||||
scorer : Callable, optional
|
scorer : Callable, optional
|
||||||
|
@ -1008,7 +1013,7 @@ def extract(query, choices, *, scorer=WRatio, processor=default_process, limit=5
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
List[Tuple[str, Any, Any]]
|
List[Tuple[Sequence[Hashable], Any, Any]]
|
||||||
The return type is always a List of Tuples with 3 elements. However the values stored in the
|
The return type is always a List of Tuples with 3 elements. However the values stored in the
|
||||||
tuple depend on the types of the input arguments.
|
tuple depend on the types of the input arguments.
|
||||||
|
|
||||||
|
@ -1107,9 +1112,9 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=default_process, sc
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
query : str
|
query : Sequence[Hashable]
|
||||||
string we want to find
|
string we want to find
|
||||||
choices : Iterable
|
choices : Iterable[Sequence[Hashable]] | Mapping[Sequence[Hashable]]
|
||||||
list of all strings the query should be compared with or dict with a mapping
|
list of all strings the query should be compared with or dict with a mapping
|
||||||
{<result>: <string to compare>}
|
{<result>: <string to compare>}
|
||||||
scorer : Callable, optional
|
scorer : Callable, optional
|
||||||
|
@ -1133,7 +1138,7 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=default_process, sc
|
||||||
|
|
||||||
Yields
|
Yields
|
||||||
-------
|
-------
|
||||||
Tuple[str, Any, Any]
|
Tuple[Sequence[Hashable], Any, Any]
|
||||||
Yields similarity between the query and each choice in form of a Tuple with 3 elements.
|
Yields similarity between the query and each choice in form of a Tuple with 3 elements.
|
||||||
The values stored in the tuple depend on the types of the input arguments.
|
The values stored in the tuple depend on the types of the input arguments.
|
||||||
|
|
||||||
|
@ -1408,3 +1413,325 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=default_process, sc
|
||||||
yield from py_extract_iter_dict()
|
yield from py_extract_iter_dict()
|
||||||
else:
|
else:
|
||||||
yield from py_extract_iter_list()
|
yield from py_extract_iter_list()
|
||||||
|
|
||||||
|
|
||||||
|
@cython.boundscheck(False)
|
||||||
|
@cython.wraparound(False)
|
||||||
|
cdef inline cdist_two_lists_similarity(
|
||||||
|
const vector[proc_string]& queries,
|
||||||
|
const vector[proc_string]& choices,
|
||||||
|
scorer, score_cutoff, dict kwargs
|
||||||
|
):
|
||||||
|
cdef size_t queries_len = queries.size()
|
||||||
|
cdef size_t choices_len = choices.size()
|
||||||
|
cdef size_t i, j
|
||||||
|
cdef double c_score_cutoff = 0
|
||||||
|
cdef np.ndarray[np.uint8_t, ndim=2] matrix = np.empty((queries_len, choices_len), dtype=np.uint8)
|
||||||
|
|
||||||
|
if score_cutoff is not None:
|
||||||
|
c_score_cutoff = score_cutoff
|
||||||
|
if c_score_cutoff < 0 or c_score_cutoff > 100:
|
||||||
|
raise TypeError("score_cutoff has to be in the range of 0.0 - 100.0")
|
||||||
|
|
||||||
|
c_score_cutoff = floor(c_score_cutoff)
|
||||||
|
|
||||||
|
for i in range(queries_len):
|
||||||
|
ScorerContext = CachedScorerInit(scorer, queries[i], 0, kwargs)
|
||||||
|
for j in range(choices_len):
|
||||||
|
matrix[i, j] = <uint8_t>floor(ScorerContext.ratio(choices[j], c_score_cutoff))
|
||||||
|
|
||||||
|
return matrix
|
||||||
|
|
||||||
|
@cython.boundscheck(False)
|
||||||
|
@cython.wraparound(False)
|
||||||
|
cdef inline cdist_two_lists_distance(
|
||||||
|
const vector[proc_string]& queries, const vector[proc_string]& choices,
|
||||||
|
scorer, score_cutoff, dict kwargs
|
||||||
|
):
|
||||||
|
cdef size_t queries_len = queries.size()
|
||||||
|
cdef size_t choices_len = choices.size()
|
||||||
|
cdef size_t i, j
|
||||||
|
cdef size_t c_max = <size_t>-1
|
||||||
|
cdef np.ndarray[np.int32_t, ndim=2] matrix = np.empty((queries_len, choices_len), dtype=np.int32)
|
||||||
|
|
||||||
|
if score_cutoff is not None and score_cutoff != -1:
|
||||||
|
c_max = score_cutoff
|
||||||
|
|
||||||
|
for i in range(queries_len):
|
||||||
|
DistanceContext = CachedDistanceInit(scorer, queries[i], 0, kwargs)
|
||||||
|
for j in range(choices_len):
|
||||||
|
matrix[i, j] = <int32_t>DistanceContext.ratio(choices[j], c_max)
|
||||||
|
|
||||||
|
return matrix
|
||||||
|
|
||||||
|
@cython.boundscheck(False)
|
||||||
|
@cython.wraparound(False)
|
||||||
|
cdef inline py_cdist_two_lists(
|
||||||
|
const vector[PyObject*]& queries, const vector[PyObject*]& choices,
|
||||||
|
scorer, score_cutoff, dict kwargs
|
||||||
|
):
|
||||||
|
cdef size_t queries_len = queries.size()
|
||||||
|
cdef size_t choices_len = choices.size()
|
||||||
|
cdef size_t i, j
|
||||||
|
cdef double c_score_cutoff = 0
|
||||||
|
cdef np.ndarray[np.uint8_t, ndim=2] matrix = np.empty((queries_len, choices_len), dtype=np.uint8)
|
||||||
|
|
||||||
|
if score_cutoff is not None:
|
||||||
|
c_score_cutoff = score_cutoff
|
||||||
|
if c_score_cutoff < 0 or c_score_cutoff > 100:
|
||||||
|
raise TypeError("score_cutoff has to be in the range of 0.0 - 100.0")
|
||||||
|
|
||||||
|
c_score_cutoff = floor(c_score_cutoff)
|
||||||
|
|
||||||
|
kwargs["processor"] = None
|
||||||
|
kwargs["score_cutoff"] = c_score_cutoff
|
||||||
|
|
||||||
|
for i in range(queries_len):
|
||||||
|
for j in range(choices_len):
|
||||||
|
matrix[i, j] = <uint8_t>floor(
|
||||||
|
<double>scorer(<object>queries[i], <object>choices[j],**kwargs))
|
||||||
|
|
||||||
|
return matrix
|
||||||
|
|
||||||
|
cdef cdist_two_lists(queries, choices, scorer, processor, score_cutoff, dict kwargs):
|
||||||
|
cdef vector[proc_string] proc_queries
|
||||||
|
cdef vector[proc_string] proc_choices
|
||||||
|
cdef vector[PyObject*] proc_py_queries
|
||||||
|
cdef vector[PyObject*] proc_py_choices
|
||||||
|
cdef size_t queries_len = <size_t>len(queries)
|
||||||
|
cdef size_t choices_len = <size_t>len(choices)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if IsIntegratedScorer(scorer) or IsIntegratedDistance(scorer):
|
||||||
|
proc_queries.reserve(queries_len)
|
||||||
|
proc_choices.reserve(choices_len)
|
||||||
|
|
||||||
|
# processor None/False
|
||||||
|
if not processor:
|
||||||
|
for query in queries:
|
||||||
|
proc_queries.push_back(move(conv_sequence(query)))
|
||||||
|
|
||||||
|
for choice in choices:
|
||||||
|
proc_choices.push_back(move(conv_sequence(choice)))
|
||||||
|
# processor has to be called through python
|
||||||
|
elif processor is not default_process and callable(processor):
|
||||||
|
proc_py_queries.reserve(queries_len)
|
||||||
|
for query in queries:
|
||||||
|
proc_query = processor(query)
|
||||||
|
Py_INCREF(proc_query)
|
||||||
|
proc_py_queries.push_back(<PyObject*>proc_query)
|
||||||
|
proc_queries.push_back(move(conv_sequence(proc_query)))
|
||||||
|
|
||||||
|
proc_py_choices.reserve(choices_len)
|
||||||
|
for choice in choices:
|
||||||
|
proc_choice = processor(choice)
|
||||||
|
Py_INCREF(proc_choice)
|
||||||
|
proc_py_choices.push_back(<PyObject*>proc_choice)
|
||||||
|
proc_choices.push_back(move(conv_sequence(proc_choice)))
|
||||||
|
|
||||||
|
# processor is True / default_process
|
||||||
|
else:
|
||||||
|
for query in queries:
|
||||||
|
proc_queries.push_back(
|
||||||
|
move(default_process_func(move(conv_sequence(query))))
|
||||||
|
)
|
||||||
|
|
||||||
|
for choice in choices:
|
||||||
|
proc_choices.push_back(
|
||||||
|
move(default_process_func(move(conv_sequence(choice))))
|
||||||
|
)
|
||||||
|
|
||||||
|
if IsIntegratedScorer(scorer):
|
||||||
|
return cdist_two_lists_similarity(proc_queries, proc_choices, scorer, score_cutoff, kwargs)
|
||||||
|
|
||||||
|
if IsIntegratedDistance(scorer):
|
||||||
|
return cdist_two_lists_distance(proc_queries, proc_choices, scorer, score_cutoff, kwargs)
|
||||||
|
|
||||||
|
else:
|
||||||
|
proc_py_queries.reserve(queries_len)
|
||||||
|
proc_py_choices.reserve(choices_len)
|
||||||
|
|
||||||
|
# processor None/False
|
||||||
|
if not processor:
|
||||||
|
for query in queries:
|
||||||
|
Py_INCREF(query)
|
||||||
|
proc_py_queries.push_back(<PyObject*>query)
|
||||||
|
|
||||||
|
for choice in choices:
|
||||||
|
Py_INCREF(choice)
|
||||||
|
proc_py_choices.push_back(<PyObject*>choice)
|
||||||
|
# processor has to be called through python
|
||||||
|
else:
|
||||||
|
if not callable(processor):
|
||||||
|
processor = default_process
|
||||||
|
|
||||||
|
for query in queries:
|
||||||
|
proc_query = processor(query)
|
||||||
|
Py_INCREF(proc_query)
|
||||||
|
proc_py_queries.push_back(<PyObject*>proc_query)
|
||||||
|
|
||||||
|
for choice in choices:
|
||||||
|
proc_choice = processor(choice)
|
||||||
|
Py_INCREF(proc_choice)
|
||||||
|
proc_py_choices.push_back(<PyObject*>proc_choice)
|
||||||
|
|
||||||
|
return py_cdist_two_lists(proc_py_queries, proc_py_choices, scorer, score_cutoff, kwargs)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# decref all reference counts
|
||||||
|
for item in proc_py_queries:
|
||||||
|
Py_DECREF(<object>item)
|
||||||
|
|
||||||
|
for item in proc_py_choices:
|
||||||
|
Py_DECREF(<object>item)
|
||||||
|
|
||||||
|
@cython.boundscheck(False)
|
||||||
|
@cython.wraparound(False)
|
||||||
|
cdef inline cdist_single_list_similarity(
|
||||||
|
const vector[proc_string]& queries, scorer, score_cutoff, dict kwargs
|
||||||
|
):
|
||||||
|
cdef size_t queries_len = queries.size()
|
||||||
|
cdef size_t i, j
|
||||||
|
cdef double c_score_cutoff = 0
|
||||||
|
cdef np.ndarray[np.uint8_t, ndim=2] matrix = np.empty((queries_len, queries_len), dtype=np.uint8)
|
||||||
|
|
||||||
|
if score_cutoff is not None:
|
||||||
|
c_score_cutoff = score_cutoff
|
||||||
|
if c_score_cutoff < 0 or c_score_cutoff > 100:
|
||||||
|
raise TypeError("score_cutoff has to be in the range of 0.0 - 100.0")
|
||||||
|
|
||||||
|
c_score_cutoff = floor(c_score_cutoff)
|
||||||
|
|
||||||
|
for i in range(queries_len):
|
||||||
|
matrix[i, i] = 100
|
||||||
|
ScorerContext = CachedScorerInit(scorer, queries[i], 0, kwargs)
|
||||||
|
for j in range(i + 1, queries_len):
|
||||||
|
score = <uint8_t>floor(ScorerContext.ratio(queries[j], c_score_cutoff))
|
||||||
|
matrix[i, j] = score
|
||||||
|
matrix[j, i] = score
|
||||||
|
|
||||||
|
return matrix
|
||||||
|
|
||||||
|
@cython.boundscheck(False)
|
||||||
|
@cython.wraparound(False)
|
||||||
|
cdef inline cdist_single_list_distance(
|
||||||
|
const vector[proc_string]& queries, scorer, score_cutoff, dict kwargs
|
||||||
|
):
|
||||||
|
cdef size_t queries_len = queries.size()
|
||||||
|
cdef size_t i, j
|
||||||
|
cdef size_t c_max = <size_t>-1
|
||||||
|
cdef np.ndarray[np.int32_t, ndim=2] matrix = np.empty((queries_len, queries_len), dtype=np.int32)
|
||||||
|
|
||||||
|
if score_cutoff is not None and score_cutoff != -1:
|
||||||
|
c_max = score_cutoff
|
||||||
|
|
||||||
|
for i in range(queries_len):
|
||||||
|
matrix[i, i] = 0
|
||||||
|
DistanceContext = CachedDistanceInit(scorer, queries[i], 0, kwargs)
|
||||||
|
for j in range(i + 1, queries_len):
|
||||||
|
score = <int32_t>DistanceContext.ratio(queries[j], c_max)
|
||||||
|
matrix[i, j] = score
|
||||||
|
matrix[j, i] = score
|
||||||
|
|
||||||
|
return matrix
|
||||||
|
|
||||||
|
cdef cdist_single_list(queries, scorer, processor, score_cutoff, dict kwargs):
|
||||||
|
cdef size_t queries_len = <size_t>len(queries)
|
||||||
|
|
||||||
|
cdef vector[proc_string] proc_queries
|
||||||
|
cdef vector[PyObject*] proc_py_queries
|
||||||
|
|
||||||
|
try:
|
||||||
|
if IsIntegratedScorer(scorer) or IsIntegratedDistance(scorer):
|
||||||
|
proc_queries.reserve(queries_len)
|
||||||
|
|
||||||
|
# processor None/False
|
||||||
|
if not processor:
|
||||||
|
for query in queries:
|
||||||
|
proc_queries.push_back(move(conv_sequence(query)))
|
||||||
|
# processor has to be called through python
|
||||||
|
elif processor is not default_process and callable(processor):
|
||||||
|
proc_py_queries.reserve(queries_len)
|
||||||
|
for query in queries:
|
||||||
|
proc_query = processor(query)
|
||||||
|
Py_INCREF(proc_query)
|
||||||
|
proc_py_queries.push_back(<PyObject*>proc_query)
|
||||||
|
proc_queries.push_back(move(conv_sequence(proc_query)))
|
||||||
|
|
||||||
|
# processor is True / default_process
|
||||||
|
else:
|
||||||
|
for query in queries:
|
||||||
|
proc_queries.push_back(
|
||||||
|
move(default_process_func(move(conv_sequence(query))))
|
||||||
|
)
|
||||||
|
|
||||||
|
if IsIntegratedScorer(scorer):
|
||||||
|
return cdist_single_list_similarity(proc_queries, scorer, score_cutoff, kwargs)
|
||||||
|
|
||||||
|
if IsIntegratedDistance(scorer):
|
||||||
|
return cdist_single_list_distance(proc_queries, scorer, score_cutoff, kwargs)
|
||||||
|
|
||||||
|
else:
|
||||||
|
proc_py_queries.reserve(queries_len)
|
||||||
|
|
||||||
|
# processor None/False
|
||||||
|
if not processor:
|
||||||
|
for query in queries:
|
||||||
|
Py_INCREF(query)
|
||||||
|
proc_py_queries.push_back(<PyObject*>query)
|
||||||
|
# processor has to be called through python
|
||||||
|
else:
|
||||||
|
if not callable(processor):
|
||||||
|
processor = default_process
|
||||||
|
|
||||||
|
for query in queries:
|
||||||
|
proc_query = processor(query)
|
||||||
|
Py_INCREF(proc_query)
|
||||||
|
proc_py_queries.push_back(<PyObject*>proc_query)
|
||||||
|
|
||||||
|
# scorer(a, b) might not be equal to scorer(b, a)
|
||||||
|
return py_cdist_two_lists(proc_py_queries, proc_py_queries, scorer, score_cutoff, kwargs)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# decref all reference counts
|
||||||
|
for item in proc_py_queries:
|
||||||
|
Py_DECREF(<object>item)
|
||||||
|
|
||||||
|
def cdist(queries, choices, *, scorer=ratio, processor=None, score_cutoff=None, **kwargs):
|
||||||
|
"""
|
||||||
|
Compute distance/similarity between each pair of the two collections of inputs.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
queries : Collection[Sequence[Hashable]]
|
||||||
|
list of all strings the queries
|
||||||
|
choices : Collection[Sequence[Hashable]]
|
||||||
|
list of all strings the query should be compared
|
||||||
|
scorer : Callable, optional
|
||||||
|
Optional callable that is used to calculate the matching score between
|
||||||
|
the query and each choice. This can be any of the scorers included in RapidFuzz
|
||||||
|
(both scorers that calculate the edit distance or the normalized edit distance).
|
||||||
|
Custom functions are not supported so far!
|
||||||
|
fuzz.ratio is used by default.
|
||||||
|
processor : Callable, optional
|
||||||
|
Optional callable that is used to preprocess the strings before
|
||||||
|
comparing them. When processor is True ``utils.default_process``
|
||||||
|
is used. Default is None, which deactivates this behaviour.
|
||||||
|
score_cutoff : Any, optional
|
||||||
|
Optional argument for a score threshold. When an edit distance is used this represents the maximum
|
||||||
|
edit distance and matches with a `distance <= score_cutoff` are inserted as -1. When a
|
||||||
|
normalized edit distance is used this represents the minimal similarity
|
||||||
|
and matches with a `similarity >= score_cutoff` are inserted as 0.
|
||||||
|
Default is None, which deactivates this behaviour.
|
||||||
|
**kwargs : Any, optional
|
||||||
|
any other named parameters are passed to the scorer. This can be used to pass
|
||||||
|
e.g. weights to string_metric.levenshtein
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
List[Tuple[Sequence[Hashable], Any, Any]]
|
||||||
|
"""
|
||||||
|
if queries is choices:
|
||||||
|
return cdist_single_list(queries, scorer, processor, score_cutoff, kwargs)
|
||||||
|
else:
|
||||||
|
return cdist_two_lists(queries, choices, scorer, processor, score_cutoff, kwargs)
|
File diff suppressed because one or more lines are too long
|
@ -55,9 +55,9 @@ def levenshtein(s1, s2, *, weights=(1,1,1), processor=None, max=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
weights : Tuple[int, int, int] or None, optional
|
weights : Tuple[int, int, int] or None, optional
|
||||||
The weights for the three operations in the form
|
The weights for the three operations in the form
|
||||||
|
@ -259,9 +259,9 @@ def levenshtein_editops(s1, s2, *, processor=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -301,9 +301,9 @@ def normalized_levenshtein(s1, s2, *, weights=(1,1,1), processor=None, score_cut
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
weights : Tuple[int, int, int] or None, optional
|
weights : Tuple[int, int, int] or None, optional
|
||||||
The weights for the three operations in the form
|
The weights for the three operations in the form
|
||||||
|
@ -407,9 +407,9 @@ def hamming(s1, s2, *, processor=None, max=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -451,9 +451,9 @@ def normalized_hamming(s1, s2, *, processor=None, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -498,9 +498,9 @@ def jaro_similarity(s1, s2, *, processor=None, score_cutoff=None):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
processor: bool or callable, optional
|
processor: bool or callable, optional
|
||||||
Optional callable that is used to preprocess the strings before
|
Optional callable that is used to preprocess the strings before
|
||||||
|
@ -537,9 +537,9 @@ def jaro_winkler_similarity(s1, s2, *, double prefix_weight=0.1, processor=None,
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
s1 : str
|
s1 : Sequence[Hashable]
|
||||||
First string to compare.
|
First string to compare.
|
||||||
s2 : str
|
s2 : Sequence[Hashable]
|
||||||
Second string to compare.
|
Second string to compare.
|
||||||
prefix_weight : float, optional
|
prefix_weight : float, optional
|
||||||
Weight used for the common prefix of the two strings.
|
Weight used for the common prefix of the two strings.
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# SPDX-License-Identifier: MIT
|
# SPDX-License-Identifier: MIT
|
||||||
# Copyright (C) 2021 Max Bachmann
|
# Copyright (C) 2021 Max Bachmann
|
||||||
|
|
||||||
from rapidfuzz.cpp_process import extract, extractOne, extract_iter
|
from rapidfuzz.cpp_process import extract, extractOne, extract_iter, cdist
|
||||||
|
|
|
@ -9,6 +9,7 @@ import pytest
|
||||||
from rapidfuzz import fuzz, process, utils, string_metric
|
from rapidfuzz import fuzz, process, utils, string_metric
|
||||||
import random
|
import random
|
||||||
from math import isclose
|
from math import isclose
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
def levenshtein(s1, s2, weights=(1, 1, 1)):
|
def levenshtein(s1, s2, weights=(1, 1, 1)):
|
||||||
"""
|
"""
|
||||||
|
@ -79,6 +80,24 @@ def partial_ratio_short_needle(s1, s2):
|
||||||
res = max(res, fuzz.ratio(s1, part))
|
res = max(res, fuzz.ratio(s1, part))
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
def cdist_scorer(queries, choices, scorer):
|
||||||
|
matrix = np.zeros((len(queries), len(choices)), dtype=np.uint8)
|
||||||
|
|
||||||
|
for i, query in enumerate(queries):
|
||||||
|
for j, choice in enumerate(choices):
|
||||||
|
matrix[i, j] = scorer(query, choice)
|
||||||
|
|
||||||
|
return matrix
|
||||||
|
|
||||||
|
def cdist_distance(queries, choices, scorer):
|
||||||
|
matrix = np.zeros((len(queries), len(choices)), dtype=np.int32)
|
||||||
|
|
||||||
|
for i, query in enumerate(queries):
|
||||||
|
for j, choice in enumerate(choices):
|
||||||
|
matrix[i, j] = scorer(query, choice)
|
||||||
|
|
||||||
|
return matrix
|
||||||
|
|
||||||
def extractOne_scorer(s1, s2, scorer, processor=None, **kwargs):
|
def extractOne_scorer(s1, s2, scorer, processor=None, **kwargs):
|
||||||
return process.extractOne(s1, [s2], processor=processor, scorer=scorer, **kwargs)[1]
|
return process.extractOne(s1, [s2], processor=processor, scorer=scorer, **kwargs)[1]
|
||||||
|
|
||||||
|
@ -294,3 +313,19 @@ def test_only_identical_strings_extracted(scorer, processor, choices):
|
||||||
|
|
||||||
for match in matches:
|
for match in matches:
|
||||||
assert processor(query) == processor(match[0])
|
assert processor(query) == processor(match[0])
|
||||||
|
|
||||||
|
|
||||||
|
@given(queries=st.lists(st.text(), min_size=1), choices=st.lists(st.text(), min_size=1))
|
||||||
|
@settings(max_examples=500, deadline=5000)
|
||||||
|
def test_cdist(queries, choices):
|
||||||
|
"""
|
||||||
|
Test that cdist returns correct results
|
||||||
|
"""
|
||||||
|
|
||||||
|
reference_matrix = cdist_distance(queries, choices, scorer=string_metric.levenshtein)
|
||||||
|
matrix = process.cdist(queries, choices, scorer=string_metric.levenshtein)
|
||||||
|
assert (matrix == reference_matrix).all()
|
||||||
|
|
||||||
|
reference_matrix = cdist_distance(queries, queries, scorer=string_metric.levenshtein)
|
||||||
|
matrix = process.cdist(queries, queries, scorer=string_metric.levenshtein)
|
||||||
|
assert (matrix == reference_matrix).all()
|
||||||
|
|
Loading…
Reference in New Issue