fix handling of inputs in bytes-only C extension functions
This commit is contained in:
parent
ac48a1f1eb
commit
e6750e5170
|
@ -4,6 +4,7 @@ include HISTORY.rst
|
|||
include LICENSE
|
||||
include README.rst
|
||||
include src/fuzzysearch/memmem.h
|
||||
include src/fuzzysearch/_c_ext_base.h
|
||||
include src/fuzzysearch/_substitutions_only_lp_template.h
|
||||
include src/fuzzysearch/_substitutions_only_ngrams_template.h
|
||||
include src/fuzzysearch/wordlen_memmem.h
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
#ifndef _FUZZYSEARCH_C_EXT_BASE_H
|
||||
#define _FUZZYSEARCH_C_EXT_BASE_H
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
#define IS_PY3K
|
||||
#endif
|
||||
|
||||
#ifndef unlikely
|
||||
#ifdef __GNUC__
|
||||
/* Test for GCC > 2.95 */
|
||||
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#else /* __GNUC__ > 2 ... */
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#endif /* __GNUC__ > 2 ... */
|
||||
#else /* __GNUC__ */
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#endif /* __GNUC__ */
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define inline __inline
|
||||
#endif
|
||||
|
||||
inline static int is_simple_buffer(Py_buffer pybuf) {
|
||||
return (
|
||||
pybuf.itemsize == 1 &&
|
||||
pybuf.ndim == 1 &&
|
||||
(pybuf.strides == NULL || pybuf.strides[0] == 1) &&
|
||||
pybuf.suboffsets == NULL
|
||||
);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,82 +1,76 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include "src/fuzzysearch/_c_ext_base.h"
|
||||
#include "src/fuzzysearch/memmem.h"
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
#define IS_PY3K
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
/* Test for GCC > 2.95 */
|
||||
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#else /* __GNUC__ > 2 ... */
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#endif /* __GNUC__ > 2 ... */
|
||||
#else /* __GNUC__ */
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#endif /* __GNUC__ */
|
||||
|
||||
|
||||
#ifdef IS_PY3K
|
||||
#define ARG_TYPES_DEF "y#y#|ll:search_exact_byteslike"
|
||||
#else
|
||||
#if PY_HEX_VERSION >= 0x02070000
|
||||
#define ARG_TYPES_DEF "t#t#|ll:search_exact_byteslike"
|
||||
#else
|
||||
#define ARG_TYPES_DEF "s#s#|ll:search_exact_byteslike"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static PyObject *
|
||||
search_exact_byteslike(PyObject *self, PyObject *args, PyObject *kwdict) {
|
||||
/* input params */
|
||||
const char *subseq, *seq;
|
||||
Py_ssize_t subseq_len, seq_len;
|
||||
Py_buffer subseq_pybuf, seq_pybuf;
|
||||
Py_ssize_t start_index=0, end_index=-1;
|
||||
|
||||
static char *kwlist[] = {"subsequence", "sequence", "start_index", "end_index", NULL};
|
||||
|
||||
const char *subseq, *seq;
|
||||
Py_ssize_t subseq_len, seq_len;
|
||||
PyObject *results;
|
||||
PyObject *next_result;
|
||||
size_t next_match_index;
|
||||
int subseq_sum;
|
||||
char *next_match_ptr;
|
||||
|
||||
const char* argspec =
|
||||
#ifdef IS_PY3K
|
||||
"y*y*|ll:search_exact_byteslike";
|
||||
#else
|
||||
"s*s*|ll:search_exact_byteslike";
|
||||
#endif
|
||||
|
||||
if (unlikely(!PyArg_ParseTupleAndKeywords(
|
||||
args, kwdict, ARG_TYPES_DEF, kwlist,
|
||||
&subseq, &subseq_len,
|
||||
&seq, &seq_len,
|
||||
args, kwdict,
|
||||
argspec,
|
||||
kwlist,
|
||||
&subseq_pybuf,
|
||||
&seq_pybuf,
|
||||
&start_index,
|
||||
&end_index
|
||||
))) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (unlikely(!(
|
||||
is_simple_buffer(subseq_pybuf) &&
|
||||
is_simple_buffer(seq_pybuf)
|
||||
))) {
|
||||
PyErr_SetString(PyExc_TypeError, "only contiguous sequences of single-byte values are supported");
|
||||
goto error;
|
||||
}
|
||||
|
||||
subseq = (const char*)(subseq_pybuf.buf);
|
||||
seq = (const char*)(seq_pybuf.buf);
|
||||
subseq_len = subseq_pybuf.len;
|
||||
seq_len = seq_pybuf.len;
|
||||
|
||||
/* this is required because simple_memmem_with_needle_sum() returns the
|
||||
haystack if the needle is empty */
|
||||
if (unlikely(subseq_len == 0)) {
|
||||
PyErr_SetString(PyExc_ValueError, "subsequence must not be empty");
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (unlikely(start_index < 0)) {
|
||||
PyErr_SetString(PyExc_ValueError, "start_index must be non-negative");
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (end_index == -1) end_index = seq_len;
|
||||
if (unlikely(end_index < 0)) {
|
||||
PyErr_SetString(PyExc_ValueError, "end_index must be non-negative");
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
results = PyList_New(0);
|
||||
if (unlikely(!results)) {
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
seq_len = (end_index < seq_len ? end_index : seq_len);
|
||||
|
@ -84,13 +78,14 @@ search_exact_byteslike(PyObject *self, PyObject *args, PyObject *kwdict) {
|
|||
seq_len -= (start_index <= seq_len ? start_index : seq_len);
|
||||
|
||||
if (unlikely(seq_len < subseq_len)) {
|
||||
return results;
|
||||
next_match_ptr = NULL;
|
||||
} else {
|
||||
subseq_sum = calc_sum(subseq, subseq_len);
|
||||
next_match_ptr = simple_memmem_with_needle_sum(seq, seq_len,
|
||||
subseq, subseq_len,
|
||||
subseq_sum);
|
||||
}
|
||||
|
||||
subseq_sum = calc_sum(subseq, subseq_len);
|
||||
next_match_ptr = simple_memmem_with_needle_sum(seq, seq_len,
|
||||
subseq, subseq_len,
|
||||
subseq_sum);
|
||||
while (next_match_ptr != NULL) {
|
||||
next_match_index = (const char *)next_match_ptr - seq;
|
||||
#ifdef IS_PY3K
|
||||
|
@ -99,10 +94,12 @@ search_exact_byteslike(PyObject *self, PyObject *args, PyObject *kwdict) {
|
|||
next_result = PyInt_FromLong(next_match_index + start_index);
|
||||
#endif
|
||||
if (unlikely(next_result == NULL)) {
|
||||
Py_DECREF(results);
|
||||
goto error;
|
||||
}
|
||||
if (unlikely(PyList_Append(results, next_result) == -1)) {
|
||||
Py_DECREF(next_result);
|
||||
Py_DECREF(results);
|
||||
goto error;
|
||||
}
|
||||
Py_DECREF(next_result);
|
||||
|
@ -113,10 +110,13 @@ search_exact_byteslike(PyObject *self, PyObject *args, PyObject *kwdict) {
|
|||
subseq_sum);
|
||||
}
|
||||
|
||||
PyBuffer_Release(&subseq_pybuf);
|
||||
PyBuffer_Release(&seq_pybuf);
|
||||
return results;
|
||||
|
||||
error:
|
||||
Py_DECREF(results);
|
||||
PyBuffer_Release(&subseq_pybuf);
|
||||
PyBuffer_Release(&seq_pybuf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -125,35 +125,48 @@ static PyObject *
|
|||
count_differences_with_maximum_byteslike(PyObject *self, PyObject *args)
|
||||
{
|
||||
/* input params */
|
||||
const char *seq1, *seq2;
|
||||
Py_ssize_t seq1_len, seq2_len;
|
||||
Py_buffer seq1_pybuf, seq2_pybuf;
|
||||
int max_differences;
|
||||
|
||||
const char *seq1, *seq2;
|
||||
Py_ssize_t seq1_len, seq2_len;
|
||||
Py_ssize_t i;
|
||||
int n_differences;
|
||||
|
||||
const char* argspec =
|
||||
#ifdef IS_PY3K
|
||||
"y*y*i";
|
||||
#else
|
||||
"s*s*i";
|
||||
#endif
|
||||
|
||||
if (!PyArg_ParseTuple(
|
||||
args,
|
||||
#ifdef IS_PY3K
|
||||
"y#y#i",
|
||||
#else
|
||||
#if PY_HEX_VERSION >= 0x02070000
|
||||
"t#t#i",
|
||||
#else
|
||||
"s#s#i",
|
||||
#endif
|
||||
#endif
|
||||
&seq1, &seq1_len,
|
||||
&seq2, &seq2_len,
|
||||
argspec,
|
||||
&seq1_pybuf,
|
||||
&seq2_pybuf,
|
||||
&max_differences
|
||||
)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (unlikely(!(
|
||||
is_simple_buffer(seq1_pybuf) &&
|
||||
is_simple_buffer(seq2_pybuf)
|
||||
))) {
|
||||
PyErr_SetString(PyExc_TypeError, "only contiguous sequences of single-byte values are supported");
|
||||
goto error;
|
||||
}
|
||||
|
||||
seq1 = (const char*)(seq1_pybuf.buf);
|
||||
seq2 = (const char*)(seq2_pybuf.buf);
|
||||
seq1_len = seq1_pybuf.len;
|
||||
seq2_len = seq2_pybuf.len;
|
||||
|
||||
if (seq1_len != seq2_len) {
|
||||
PyErr_SetString(PyExc_ValueError,
|
||||
"The lengths of the given sequences must be equal.");
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
n_differences = max_differences;
|
||||
|
@ -163,7 +176,14 @@ count_differences_with_maximum_byteslike(PyObject *self, PyObject *args)
|
|||
++seq2;
|
||||
}
|
||||
|
||||
PyBuffer_Release(&seq1_pybuf);
|
||||
PyBuffer_Release(&seq2_pybuf);
|
||||
return PyLong_FromLong((long) (max_differences - n_differences));
|
||||
|
||||
error:
|
||||
PyBuffer_Release(&seq1_pybuf);
|
||||
PyBuffer_Release(&seq2_pybuf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static PyMethodDef _common_methods[] = {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Generated by Cython 0.29.14 */
|
||||
/* Generated by Cython 0.29.20 */
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include "Python.h"
|
||||
|
@ -7,8 +7,8 @@
|
|||
#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
|
||||
#error Cython requires Python 2.6+ or Python 3.3+.
|
||||
#else
|
||||
#define CYTHON_ABI "0_29_14"
|
||||
#define CYTHON_HEX_VERSION 0x001D0EF0
|
||||
#define CYTHON_ABI "0_29_20"
|
||||
#define CYTHON_HEX_VERSION 0x001D14F0
|
||||
#define CYTHON_FUTURE_DIVISION 0
|
||||
#include <stddef.h>
|
||||
#ifndef offsetof
|
||||
|
@ -484,8 +484,10 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
|
|||
#define PyString_Type PyUnicode_Type
|
||||
#define PyString_Check PyUnicode_Check
|
||||
#define PyString_CheckExact PyUnicode_CheckExact
|
||||
#ifndef PyObject_Unicode
|
||||
#define PyObject_Unicode PyObject_Str
|
||||
#endif
|
||||
#endif
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
#define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
|
||||
#define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
|
||||
|
@ -496,6 +498,13 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
|
|||
#ifndef PySet_CheckExact
|
||||
#define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
|
||||
#endif
|
||||
#if PY_VERSION_HEX >= 0x030900A4
|
||||
#define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
|
||||
#define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
|
||||
#else
|
||||
#define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
|
||||
#define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
|
||||
#endif
|
||||
#if CYTHON_ASSUME_SAFE_MACROS
|
||||
#define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq)
|
||||
#else
|
||||
|
@ -576,11 +585,10 @@ static CYTHON_INLINE float __PYX_NAN() {
|
|||
#define __Pyx_truncl truncl
|
||||
#endif
|
||||
|
||||
|
||||
#define __PYX_MARK_ERR_POS(f_index, lineno) \
|
||||
{ __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; }
|
||||
#define __PYX_ERR(f_index, lineno, Ln_error) \
|
||||
{ \
|
||||
__pyx_filename = __pyx_f[f_index]; __pyx_lineno = lineno; __pyx_clineno = __LINE__; goto Ln_error; \
|
||||
}
|
||||
{ __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }
|
||||
|
||||
#ifndef __PYX_EXTERN_C
|
||||
#ifdef __cplusplus
|
||||
|
@ -1078,7 +1086,7 @@ static CYTHON_INLINE int __Pyx_PyList_Append(PyObject* list, PyObject* x) {
|
|||
if (likely(L->allocated > len) & likely(len > (L->allocated >> 1))) {
|
||||
Py_INCREF(x);
|
||||
PyList_SET_ITEM(list, len, x);
|
||||
Py_SIZE(list) = len+1;
|
||||
__Pyx_SET_SIZE(list, len + 1);
|
||||
return 0;
|
||||
}
|
||||
return PyList_Append(list, x);
|
||||
|
@ -1093,7 +1101,7 @@ static CYTHON_INLINE void __Pyx_RaiseClosureNameError(const char *varname);
|
|||
/* FetchCommonType.proto */
|
||||
static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type);
|
||||
|
||||
/* CythonFunction.proto */
|
||||
/* CythonFunctionShared.proto */
|
||||
#define __Pyx_CyFunction_USED 1
|
||||
#define __Pyx_CYFUNCTION_STATICMETHOD 0x01
|
||||
#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02
|
||||
|
@ -1121,6 +1129,7 @@ typedef struct {
|
|||
PyObject *func_classobj;
|
||||
void *defaults;
|
||||
int defaults_pyobjects;
|
||||
size_t defaults_size; // used by FusedFunction for copying defaults
|
||||
int flags;
|
||||
PyObject *defaults_tuple;
|
||||
PyObject *defaults_kwdict;
|
||||
|
@ -1129,9 +1138,7 @@ typedef struct {
|
|||
} __pyx_CyFunctionObject;
|
||||
static PyTypeObject *__pyx_CyFunctionType = 0;
|
||||
#define __Pyx_CyFunction_Check(obj) (__Pyx_TypeCheck(obj, __pyx_CyFunctionType))
|
||||
#define __Pyx_CyFunction_NewEx(ml, flags, qualname, self, module, globals, code)\
|
||||
__Pyx_CyFunction_New(__pyx_CyFunctionType, ml, flags, qualname, self, module, globals, code)
|
||||
static PyObject *__Pyx_CyFunction_New(PyTypeObject *, PyMethodDef *ml,
|
||||
static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml,
|
||||
int flags, PyObject* qualname,
|
||||
PyObject *self,
|
||||
PyObject *module, PyObject *globals,
|
||||
|
@ -1147,6 +1154,13 @@ static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m,
|
|||
PyObject *dict);
|
||||
static int __pyx_CyFunction_init(void);
|
||||
|
||||
/* CythonFunction.proto */
|
||||
static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml,
|
||||
int flags, PyObject* qualname,
|
||||
PyObject *closure,
|
||||
PyObject *module, PyObject *globals,
|
||||
PyObject* code);
|
||||
|
||||
/* GetException.proto */
|
||||
#if CYTHON_FAST_THREAD_STATE
|
||||
#define __Pyx_GetException(type, value, tb) __Pyx__GetException(__pyx_tstate, type, value, tb)
|
||||
|
@ -1435,6 +1449,9 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_1c_find_near_matches_g
|
|||
PyObject *__pyx_v_subsequence = 0;
|
||||
PyObject *__pyx_v_sequence = 0;
|
||||
PyObject *__pyx_v_search_params = 0;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
PyObject *__pyx_r = 0;
|
||||
__Pyx_RefNannyDeclarations
|
||||
__Pyx_RefNannySetupContext("c_find_near_matches_generic_linear_programming (wrapper)", 0);
|
||||
|
@ -1528,6 +1545,9 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_c_find_near_matches_ge
|
|||
unsigned int __pyx_t_16;
|
||||
unsigned int __pyx_t_17;
|
||||
unsigned int __pyx_t_18;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
__Pyx_RefNannySetupContext("c_find_near_matches_generic_linear_programming", 0);
|
||||
|
||||
/* "fuzzysearch/_generic_search.pyx":36
|
||||
|
@ -1881,6 +1901,9 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_47_c_find_near_matches
|
|||
PyObject *__pyx_v_start = 0;
|
||||
PyObject *__pyx_v_end = 0;
|
||||
PyObject *__pyx_v_dist = 0;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
PyObject *__pyx_r = 0;
|
||||
__Pyx_RefNannyDeclarations
|
||||
__Pyx_RefNannySetupContext("add_match (wrapper)", 0);
|
||||
|
@ -1961,6 +1984,9 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_47_c_find_near_matches
|
|||
Py_ssize_t __pyx_t_7;
|
||||
Py_ssize_t __pyx_t_8;
|
||||
int __pyx_t_9;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
__Pyx_RefNannySetupContext("add_match", 0);
|
||||
__pyx_outer_scope = (struct __pyx_obj_11fuzzysearch_15_generic_search___pyx_scope_struct____pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming *) __Pyx_CyFunction_GetClosure(__pyx_self);
|
||||
__pyx_cur_scope = __pyx_outer_scope;
|
||||
|
@ -2101,6 +2127,9 @@ static PyObject *__pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_ge
|
|||
PyObject *__pyx_t_25 = NULL;
|
||||
PyObject *__pyx_t_26 = NULL;
|
||||
PyObject *__pyx_t_27 = NULL;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
__Pyx_RefNannySetupContext("_c_find_near_matches_generic_linear_programming", 0);
|
||||
__pyx_cur_scope = (struct __pyx_obj_11fuzzysearch_15_generic_search___pyx_scope_struct____pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming *)__pyx_tp_new_11fuzzysearch_15_generic_search___pyx_scope_struct____pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming(__pyx_ptype_11fuzzysearch_15_generic_search___pyx_scope_struct____pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming, __pyx_empty_tuple, NULL);
|
||||
if (unlikely(!__pyx_cur_scope)) {
|
||||
|
@ -2258,7 +2287,7 @@ static PyObject *__pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_ge
|
|||
* matches.append(Match(start, end, dist, matched=sequence[start:end]))
|
||||
*
|
||||
*/
|
||||
__pyx_t_5 = __Pyx_CyFunction_NewEx(&__pyx_mdef_11fuzzysearch_15_generic_search_47_c_find_near_matches_generic_linear_programming_1add_match, 0, __pyx_n_s_c_find_near_matches_generic_lin, ((PyObject*)__pyx_cur_scope), __pyx_n_s_fuzzysearch__generic_search, __pyx_d, ((PyObject *)__pyx_codeobj__3)); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 90, __pyx_L1_error)
|
||||
__pyx_t_5 = __Pyx_CyFunction_New(&__pyx_mdef_11fuzzysearch_15_generic_search_47_c_find_near_matches_generic_linear_programming_1add_match, 0, __pyx_n_s_c_find_near_matches_generic_lin, ((PyObject*)__pyx_cur_scope), __pyx_n_s_fuzzysearch__generic_search, __pyx_d, ((PyObject *)__pyx_codeobj__3)); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 90, __pyx_L1_error)
|
||||
__Pyx_GOTREF(__pyx_t_5);
|
||||
__pyx_v_add_match = __pyx_t_5;
|
||||
__pyx_t_5 = 0;
|
||||
|
@ -3455,6 +3484,9 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_3c_find_near_matches_g
|
|||
PyObject *__pyx_v_subsequence = 0;
|
||||
PyObject *__pyx_v_sequence = 0;
|
||||
PyObject *__pyx_v_search_params = 0;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
PyObject *__pyx_r = 0;
|
||||
__Pyx_RefNannyDeclarations
|
||||
__Pyx_RefNannySetupContext("c_find_near_matches_generic_ngrams (wrapper)", 0);
|
||||
|
@ -3567,6 +3599,9 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2c_find_near_matches_g
|
|||
PyObject *__pyx_t_20 = NULL;
|
||||
PyObject *__pyx_t_21 = NULL;
|
||||
int __pyx_t_22;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
__Pyx_RefNannySetupContext("c_find_near_matches_generic_ngrams", 0);
|
||||
|
||||
/* "fuzzysearch/_generic_search.pyx":248
|
||||
|
@ -4722,6 +4757,9 @@ static int __Pyx_modinit_function_export_code(void) {
|
|||
|
||||
static int __Pyx_modinit_type_init_code(void) {
|
||||
__Pyx_RefNannyDeclarations
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
__Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0);
|
||||
/*--- Type init code ---*/
|
||||
if (PyType_Ready(&__pyx_type_11fuzzysearch_15_generic_search___pyx_scope_struct____pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming) < 0) __PYX_ERR(0, 61, __pyx_L1_error)
|
||||
|
@ -4764,17 +4802,19 @@ static int __Pyx_modinit_function_import_code(void) {
|
|||
}
|
||||
|
||||
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
#ifdef CYTHON_NO_PYINIT_EXPORT
|
||||
#define __Pyx_PyMODINIT_FUNC void
|
||||
#else
|
||||
#ifndef CYTHON_NO_PYINIT_EXPORT
|
||||
#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
|
||||
#elif PY_MAJOR_VERSION < 3
|
||||
#ifdef __cplusplus
|
||||
#define __Pyx_PyMODINIT_FUNC extern "C" void
|
||||
#else
|
||||
#define __Pyx_PyMODINIT_FUNC void
|
||||
#endif
|
||||
#else
|
||||
#ifdef CYTHON_NO_PYINIT_EXPORT
|
||||
#define __Pyx_PyMODINIT_FUNC PyObject *
|
||||
#ifdef __cplusplus
|
||||
#define __Pyx_PyMODINIT_FUNC extern "C" PyObject *
|
||||
#else
|
||||
#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
|
||||
#define __Pyx_PyMODINIT_FUNC PyObject *
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -4857,6 +4897,9 @@ static CYTHON_SMALL_CODE int __pyx_pymod_exec__generic_search(PyObject *__pyx_py
|
|||
{
|
||||
PyObject *__pyx_t_1 = NULL;
|
||||
PyObject *__pyx_t_2 = NULL;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
__Pyx_RefNannyDeclarations
|
||||
#if CYTHON_PEP489_MULTI_PHASE_INIT
|
||||
if (__pyx_m) {
|
||||
|
@ -4945,14 +4988,14 @@ if (!__Pyx_RefNanny) {
|
|||
}
|
||||
#endif
|
||||
/*--- Builtin init code ---*/
|
||||
if (__Pyx_InitCachedBuiltins() < 0) goto __pyx_L1_error;
|
||||
if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
|
||||
/*--- Constants init code ---*/
|
||||
if (__Pyx_InitCachedConstants() < 0) goto __pyx_L1_error;
|
||||
if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
|
||||
/*--- Global type/function init code ---*/
|
||||
(void)__Pyx_modinit_global_init_code();
|
||||
(void)__Pyx_modinit_variable_export_code();
|
||||
(void)__Pyx_modinit_function_export_code();
|
||||
if (unlikely(__Pyx_modinit_type_init_code() != 0)) goto __pyx_L1_error;
|
||||
if (unlikely(__Pyx_modinit_type_init_code() < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
|
||||
(void)__Pyx_modinit_type_import_code();
|
||||
(void)__Pyx_modinit_variable_import_code();
|
||||
(void)__Pyx_modinit_function_import_code();
|
||||
|
@ -5194,7 +5237,7 @@ static int __Pyx_ParseOptionalKeywords(
|
|||
}
|
||||
name = first_kw_arg;
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) {
|
||||
if (likely(PyString_Check(key))) {
|
||||
while (*name) {
|
||||
if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
|
||||
&& _PyString_Eq(**name, key)) {
|
||||
|
@ -5221,7 +5264,7 @@ static int __Pyx_ParseOptionalKeywords(
|
|||
while (*name) {
|
||||
int cmp = (**name == key) ? 0 :
|
||||
#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
|
||||
(PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :
|
||||
(__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
|
||||
#endif
|
||||
PyUnicode_Compare(**name, key);
|
||||
if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
|
||||
|
@ -5237,7 +5280,7 @@ static int __Pyx_ParseOptionalKeywords(
|
|||
while (argname != first_kw_arg) {
|
||||
int cmp = (**argname == key) ? 0 :
|
||||
#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
|
||||
(PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :
|
||||
(__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
|
||||
#endif
|
||||
PyUnicode_Compare(**argname, key);
|
||||
if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
|
||||
|
@ -5844,7 +5887,7 @@ bad:
|
|||
goto done;
|
||||
}
|
||||
|
||||
/* CythonFunction */
|
||||
/* CythonFunctionShared */
|
||||
#include <structmember.h>
|
||||
static PyObject *
|
||||
__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *closure)
|
||||
|
@ -6151,10 +6194,9 @@ static PyMethodDef __pyx_CyFunction_methods[] = {
|
|||
#else
|
||||
#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func.m_weakreflist)
|
||||
#endif
|
||||
static PyObject *__Pyx_CyFunction_New(PyTypeObject *type, PyMethodDef *ml, int flags, PyObject* qualname,
|
||||
PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
|
||||
__pyx_CyFunctionObject *op = PyObject_GC_New(__pyx_CyFunctionObject, type);
|
||||
if (op == NULL)
|
||||
static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname,
|
||||
PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
|
||||
if (unlikely(op == NULL))
|
||||
return NULL;
|
||||
op->flags = flags;
|
||||
__Pyx_CyFunction_weakreflist(op) = NULL;
|
||||
|
@ -6175,12 +6217,12 @@ static PyObject *__Pyx_CyFunction_New(PyTypeObject *type, PyMethodDef *ml, int f
|
|||
Py_XINCREF(code);
|
||||
op->func_code = code;
|
||||
op->defaults_pyobjects = 0;
|
||||
op->defaults_size = 0;
|
||||
op->defaults = NULL;
|
||||
op->defaults_tuple = NULL;
|
||||
op->defaults_kwdict = NULL;
|
||||
op->defaults_getter = NULL;
|
||||
op->func_annotations = NULL;
|
||||
PyObject_GC_Track(op);
|
||||
return (PyObject *) op;
|
||||
}
|
||||
static int
|
||||
|
@ -6428,6 +6470,7 @@ static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t
|
|||
return PyErr_NoMemory();
|
||||
memset(m->defaults, 0, size);
|
||||
m->defaults_pyobjects = pyobjects;
|
||||
m->defaults_size = size;
|
||||
return m->defaults;
|
||||
}
|
||||
static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) {
|
||||
|
@ -6446,6 +6489,19 @@ static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, Py
|
|||
Py_INCREF(dict);
|
||||
}
|
||||
|
||||
/* CythonFunction */
|
||||
static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname,
|
||||
PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
|
||||
PyObject *op = __Pyx_CyFunction_Init(
|
||||
PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType),
|
||||
ml, flags, qualname, closure, module, globals, code
|
||||
);
|
||||
if (likely(op)) {
|
||||
PyObject_GC_Track(op);
|
||||
}
|
||||
return op;
|
||||
}
|
||||
|
||||
/* GetException */
|
||||
#if CYTHON_FAST_THREAD_STATE
|
||||
static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb)
|
||||
|
@ -6681,7 +6737,7 @@ static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
|
|||
{
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
if (level == -1) {
|
||||
if (strchr(__Pyx_MODULE_NAME, '.')) {
|
||||
if ((1) && (strchr(__Pyx_MODULE_NAME, '.'))) {
|
||||
module = PyImport_ImportModuleLevelObject(
|
||||
name, global_dict, empty_dict, list, 1);
|
||||
if (!module) {
|
||||
|
@ -6732,7 +6788,7 @@ static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) {
|
|||
|
||||
/* CLineInTraceback */
|
||||
#ifndef CYTHON_CLINE_IN_TRACEBACK
|
||||
static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) {
|
||||
static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tstate, int c_line) {
|
||||
PyObject *use_cline;
|
||||
PyObject *ptype, *pvalue, *ptraceback;
|
||||
#if CYTHON_COMPILING_IN_CPYTHON
|
||||
|
@ -6836,7 +6892,7 @@ static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
|
|||
if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
|
||||
int new_max = __pyx_code_cache.max_count + 64;
|
||||
entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
|
||||
__pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry));
|
||||
__pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry));
|
||||
if (unlikely(!entries)) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* Generated by Cython 0.29.14 */
|
||||
/* Generated by Cython 0.29.20 */
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include "Python.h"
|
||||
|
@ -7,8 +7,8 @@
|
|||
#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
|
||||
#error Cython requires Python 2.6+ or Python 3.3+.
|
||||
#else
|
||||
#define CYTHON_ABI "0_29_14"
|
||||
#define CYTHON_HEX_VERSION 0x001D0EF0
|
||||
#define CYTHON_ABI "0_29_20"
|
||||
#define CYTHON_HEX_VERSION 0x001D14F0
|
||||
#define CYTHON_FUTURE_DIVISION 0
|
||||
#include <stddef.h>
|
||||
#ifndef offsetof
|
||||
|
@ -484,8 +484,10 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
|
|||
#define PyString_Type PyUnicode_Type
|
||||
#define PyString_Check PyUnicode_Check
|
||||
#define PyString_CheckExact PyUnicode_CheckExact
|
||||
#ifndef PyObject_Unicode
|
||||
#define PyObject_Unicode PyObject_Str
|
||||
#endif
|
||||
#endif
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
#define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
|
||||
#define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
|
||||
|
@ -496,6 +498,13 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
|
|||
#ifndef PySet_CheckExact
|
||||
#define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
|
||||
#endif
|
||||
#if PY_VERSION_HEX >= 0x030900A4
|
||||
#define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
|
||||
#define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
|
||||
#else
|
||||
#define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
|
||||
#define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
|
||||
#endif
|
||||
#if CYTHON_ASSUME_SAFE_MACROS
|
||||
#define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq)
|
||||
#else
|
||||
|
@ -576,11 +585,10 @@ static CYTHON_INLINE float __PYX_NAN() {
|
|||
#define __Pyx_truncl truncl
|
||||
#endif
|
||||
|
||||
|
||||
#define __PYX_MARK_ERR_POS(f_index, lineno) \
|
||||
{ __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; }
|
||||
#define __PYX_ERR(f_index, lineno, Ln_error) \
|
||||
{ \
|
||||
__pyx_filename = __pyx_f[f_index]; __pyx_lineno = lineno; __pyx_clineno = __LINE__; goto Ln_error; \
|
||||
}
|
||||
{ __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }
|
||||
|
||||
#ifndef __PYX_EXTERN_C
|
||||
#ifdef __cplusplus
|
||||
|
@ -1243,6 +1251,9 @@ static PyObject *__pyx_pw_11fuzzysearch_19_levenshtein_ngrams_1c_expand_short(Py
|
|||
PyObject *__pyx_v_subsequence = 0;
|
||||
PyObject *__pyx_v_sequence = 0;
|
||||
PyObject *__pyx_v_max_l_dist = 0;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
PyObject *__pyx_r = 0;
|
||||
__Pyx_RefNannyDeclarations
|
||||
__Pyx_RefNannySetupContext("c_expand_short (wrapper)", 0);
|
||||
|
@ -1349,6 +1360,9 @@ static PyObject *__pyx_pf_11fuzzysearch_19_levenshtein_ngrams_c_expand_short(CYT
|
|||
PyObject *__pyx_t_23 = NULL;
|
||||
PyObject *__pyx_t_24 = NULL;
|
||||
PyObject *__pyx_t_25 = NULL;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
__Pyx_RefNannySetupContext("c_expand_short", 0);
|
||||
|
||||
/* "fuzzysearch/_levenshtein_ngrams.pyx":28
|
||||
|
@ -1938,6 +1952,9 @@ static PyObject *__pyx_pw_11fuzzysearch_19_levenshtein_ngrams_3c_expand_long(PyO
|
|||
PyObject *__pyx_v_subsequence = 0;
|
||||
PyObject *__pyx_v_sequence = 0;
|
||||
PyObject *__pyx_v_max_l_dist = 0;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
PyObject *__pyx_r = 0;
|
||||
__Pyx_RefNannyDeclarations
|
||||
__Pyx_RefNannySetupContext("c_expand_long (wrapper)", 0);
|
||||
|
@ -2051,6 +2068,9 @@ static PyObject *__pyx_pf_11fuzzysearch_19_levenshtein_ngrams_2c_expand_long(CYT
|
|||
PyObject *__pyx_t_26 = NULL;
|
||||
PyObject *__pyx_t_27 = NULL;
|
||||
PyObject *__pyx_t_28 = NULL;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
__Pyx_RefNannySetupContext("c_expand_long", 0);
|
||||
|
||||
/* "fuzzysearch/_levenshtein_ngrams.pyx":87
|
||||
|
@ -3123,17 +3143,19 @@ static int __Pyx_modinit_function_import_code(void) {
|
|||
}
|
||||
|
||||
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
#ifdef CYTHON_NO_PYINIT_EXPORT
|
||||
#define __Pyx_PyMODINIT_FUNC void
|
||||
#else
|
||||
#ifndef CYTHON_NO_PYINIT_EXPORT
|
||||
#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
|
||||
#elif PY_MAJOR_VERSION < 3
|
||||
#ifdef __cplusplus
|
||||
#define __Pyx_PyMODINIT_FUNC extern "C" void
|
||||
#else
|
||||
#define __Pyx_PyMODINIT_FUNC void
|
||||
#endif
|
||||
#else
|
||||
#ifdef CYTHON_NO_PYINIT_EXPORT
|
||||
#define __Pyx_PyMODINIT_FUNC PyObject *
|
||||
#ifdef __cplusplus
|
||||
#define __Pyx_PyMODINIT_FUNC extern "C" PyObject *
|
||||
#else
|
||||
#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
|
||||
#define __Pyx_PyMODINIT_FUNC PyObject *
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -3215,6 +3237,9 @@ static CYTHON_SMALL_CODE int __pyx_pymod_exec__levenshtein_ngrams(PyObject *__py
|
|||
#endif
|
||||
{
|
||||
PyObject *__pyx_t_1 = NULL;
|
||||
int __pyx_lineno = 0;
|
||||
const char *__pyx_filename = NULL;
|
||||
int __pyx_clineno = 0;
|
||||
__Pyx_RefNannyDeclarations
|
||||
#if CYTHON_PEP489_MULTI_PHASE_INIT
|
||||
if (__pyx_m) {
|
||||
|
@ -3303,9 +3328,9 @@ if (!__Pyx_RefNanny) {
|
|||
}
|
||||
#endif
|
||||
/*--- Builtin init code ---*/
|
||||
if (__Pyx_InitCachedBuiltins() < 0) goto __pyx_L1_error;
|
||||
if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
|
||||
/*--- Constants init code ---*/
|
||||
if (__Pyx_InitCachedConstants() < 0) goto __pyx_L1_error;
|
||||
if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
|
||||
/*--- Global type/function init code ---*/
|
||||
(void)__Pyx_modinit_global_init_code();
|
||||
(void)__Pyx_modinit_variable_export_code();
|
||||
|
@ -3503,7 +3528,7 @@ static int __Pyx_ParseOptionalKeywords(
|
|||
}
|
||||
name = first_kw_arg;
|
||||
#if PY_MAJOR_VERSION < 3
|
||||
if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) {
|
||||
if (likely(PyString_Check(key))) {
|
||||
while (*name) {
|
||||
if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
|
||||
&& _PyString_Eq(**name, key)) {
|
||||
|
@ -3530,7 +3555,7 @@ static int __Pyx_ParseOptionalKeywords(
|
|||
while (*name) {
|
||||
int cmp = (**name == key) ? 0 :
|
||||
#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
|
||||
(PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :
|
||||
(__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
|
||||
#endif
|
||||
PyUnicode_Compare(**name, key);
|
||||
if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
|
||||
|
@ -3546,7 +3571,7 @@ static int __Pyx_ParseOptionalKeywords(
|
|||
while (argname != first_kw_arg) {
|
||||
int cmp = (**argname == key) ? 0 :
|
||||
#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
|
||||
(PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :
|
||||
(__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
|
||||
#endif
|
||||
PyUnicode_Compare(**argname, key);
|
||||
if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
|
||||
|
@ -4262,7 +4287,7 @@ static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UIN
|
|||
|
||||
/* CLineInTraceback */
|
||||
#ifndef CYTHON_CLINE_IN_TRACEBACK
|
||||
static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) {
|
||||
static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tstate, int c_line) {
|
||||
PyObject *use_cline;
|
||||
PyObject *ptype, *pvalue, *ptraceback;
|
||||
#if CYTHON_COMPILING_IN_CPYTHON
|
||||
|
@ -4366,7 +4391,7 @@ static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
|
|||
if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
|
||||
int new_max = __pyx_code_cache.max_count + 64;
|
||||
entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
|
||||
__pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry));
|
||||
__pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry));
|
||||
if (unlikely(!entries)) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1,15 +1,10 @@
|
|||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
#define IS_PY3K
|
||||
#endif
|
||||
#include "src/fuzzysearch/_c_ext_base.h"
|
||||
|
||||
|
||||
#define DECLARE_VARS
|
||||
#define DECLARE_VARS int found = 0
|
||||
#define PREPARE
|
||||
#define OUTPUT_VALUE(x) DO_FREES; Py_RETURN_TRUE
|
||||
#define RETURN_AT_END Py_RETURN_FALSE
|
||||
#define OUTPUT_VALUE(x) found = 1; break
|
||||
#define RETURN_AT_END if (found) { Py_RETURN_TRUE; } else { Py_RETURN_FALSE; }
|
||||
#define FUNCTION_NAME substitutions_only_has_near_matches_lp_byteslike
|
||||
#include "src/fuzzysearch/_substitutions_only_lp_template.h"
|
||||
#undef FUNCTION_NAME
|
||||
|
@ -31,17 +26,17 @@
|
|||
#define PREPARE \
|
||||
results = PyList_New(0); \
|
||||
if (unlikely(!results)) \
|
||||
return NULL;
|
||||
goto error;
|
||||
#define OUTPUT_VALUE(x) do { \
|
||||
next_result = PyInt_FromSsize_t((x)); \
|
||||
if (unlikely(next_result == NULL)) { \
|
||||
Py_DECREF(results); \
|
||||
return NULL; \
|
||||
goto error; \
|
||||
} \
|
||||
if (unlikely(PyList_Append(results, next_result) == -1)) { \
|
||||
Py_DECREF(next_result); \
|
||||
Py_DECREF(results); \
|
||||
return NULL; \
|
||||
goto error; \
|
||||
} \
|
||||
Py_DECREF(next_result); \
|
||||
} while(0)
|
||||
|
|
|
@ -1,89 +1,89 @@
|
|||
#ifdef __GNUC__
|
||||
/* Test for GCC > 2.95 */
|
||||
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#else /* __GNUC__ > 2 ... */
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#endif /* __GNUC__ > 2 ... */
|
||||
#else /* __GNUC__ */
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#endif /* __GNUC__ */
|
||||
#define RELEASE_BUFFERS \
|
||||
PyBuffer_Release(&subseq_pybuf); \
|
||||
PyBuffer_Release(&seq_pybuf)
|
||||
|
||||
|
||||
#define DO_FREES free(sub_counts)
|
||||
|
||||
static PyObject *
|
||||
FUNCTION_NAME(PyObject *self, PyObject *args)
|
||||
{
|
||||
/* input params */
|
||||
Py_buffer subseq_pybuf, seq_pybuf;
|
||||
int max_substitutions;
|
||||
|
||||
const char *subsequence;
|
||||
const char *sequence;
|
||||
Py_ssize_t subseq_len, seq_len;
|
||||
int max_substitutions_input;
|
||||
unsigned int max_substitutions;
|
||||
|
||||
unsigned int *sub_counts;
|
||||
int *sub_counts = NULL;
|
||||
Py_ssize_t seq_idx, subseq_idx, count_idx;
|
||||
|
||||
DECLARE_VARS;
|
||||
|
||||
const char* argspec =
|
||||
#ifdef IS_PY3K
|
||||
#define ARGSPEC "y#y#i"
|
||||
"y*y*i";
|
||||
#else
|
||||
#if PY_HEX_VERSION >= 0x02070000
|
||||
#define ARGSPEC "t#t#i"
|
||||
#else
|
||||
#define ARGSPEC "s#s#i"
|
||||
#endif
|
||||
"s*s*i";
|
||||
#endif
|
||||
|
||||
if (unlikely(!PyArg_ParseTuple(
|
||||
args,
|
||||
ARGSPEC,
|
||||
&subsequence, &subseq_len,
|
||||
&sequence, &seq_len,
|
||||
&max_substitutions_input
|
||||
argspec,
|
||||
&subseq_pybuf,
|
||||
&seq_pybuf,
|
||||
&max_substitutions
|
||||
))) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (unlikely(max_substitutions_input < 0)) {
|
||||
if (unlikely(max_substitutions < 0)) {
|
||||
PyErr_SetString(PyExc_ValueError, "max_l_dist must be non-negative");
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
max_substitutions = (unsigned int) max_substitutions_input;
|
||||
|
||||
if (unlikely(!(
|
||||
is_simple_buffer(subseq_pybuf) &&
|
||||
is_simple_buffer(seq_pybuf)
|
||||
))) {
|
||||
PyErr_SetString(PyExc_TypeError, "only contiguous sequences of single-byte values are supported");
|
||||
goto error;
|
||||
}
|
||||
|
||||
subsequence = (const char*)(subseq_pybuf.buf);
|
||||
sequence = (const char*)(seq_pybuf.buf);
|
||||
subseq_len = subseq_pybuf.len;
|
||||
seq_len = seq_pybuf.len;
|
||||
|
||||
if (unlikely(subseq_len < 0 || seq_len < 0)) {
|
||||
PyErr_SetString(PyExc_Exception, "an unknown error occurred");
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (unlikely(subseq_len == 0)) {
|
||||
PyErr_SetString(PyExc_ValueError, "subsequence must not be empty");
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
PREPARE;
|
||||
|
||||
if (unlikely(seq_len < subseq_len)) {
|
||||
RELEASE_BUFFERS;
|
||||
RETURN_AT_END;
|
||||
}
|
||||
|
||||
sub_counts = (unsigned int *) malloc (sizeof(unsigned int) * subseq_len);
|
||||
if (sub_counts == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
if (unlikely(max_substitutions >= subseq_len)) {
|
||||
for (seq_idx = 0; seq_idx <= seq_len - subseq_len; ++seq_idx) {
|
||||
OUTPUT_VALUE(seq_idx);
|
||||
}
|
||||
RELEASE_BUFFERS;
|
||||
RETURN_AT_END;
|
||||
}
|
||||
|
||||
sub_counts = (int *) malloc (sizeof(int) * subseq_len);
|
||||
if (sub_counts == NULL) {
|
||||
RELEASE_BUFFERS;
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
for (seq_idx = 0; seq_idx < subseq_len - 1; ++seq_idx) {
|
||||
sub_counts[seq_idx] = 0;
|
||||
for (subseq_idx = 0; subseq_idx <= seq_idx; ++subseq_idx) {
|
||||
|
@ -108,8 +108,13 @@ FUNCTION_NAME(PyObject *self, PyObject *args)
|
|||
sub_counts[count_idx] = 0;
|
||||
}
|
||||
|
||||
DO_FREES;
|
||||
free(sub_counts);
|
||||
RELEASE_BUFFERS;
|
||||
RETURN_AT_END;
|
||||
|
||||
error:
|
||||
RELEASE_BUFFERS;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#undef DO_FREES
|
||||
#undef RELEASE_BUFFERS
|
||||
|
|
|
@ -1,76 +1,74 @@
|
|||
#include "src/fuzzysearch/_c_ext_base.h"
|
||||
#include "src/fuzzysearch/memmem.h"
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
/* Test for GCC > 2.95 */
|
||||
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
|
||||
#define likely(x) __builtin_expect(!!(x), 1)
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#else /* __GNUC__ > 2 ... */
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#endif /* __GNUC__ > 2 ... */
|
||||
#else /* __GNUC__ */
|
||||
#define likely(x) (x)
|
||||
#define unlikely(x) (x)
|
||||
#endif /* __GNUC__ */
|
||||
#define RELEASE_BUFFERS \
|
||||
PyBuffer_Release(&subseq_pybuf); \
|
||||
PyBuffer_Release(&seq_pybuf)
|
||||
|
||||
|
||||
#define DO_FREES
|
||||
|
||||
static PyObject *
|
||||
FUNCTION_NAME(PyObject *self, PyObject *args)
|
||||
{
|
||||
/* input params */
|
||||
Py_buffer subseq_pybuf, seq_pybuf;
|
||||
int max_substitutions;
|
||||
|
||||
const char *subsequence;
|
||||
const char *sequence;
|
||||
Py_ssize_t subseq_len, seq_len;
|
||||
int max_substitutions_input;
|
||||
unsigned int max_substitutions;
|
||||
|
||||
unsigned int ngram_len, ngram_start, subseq_len_after_ngram;
|
||||
Py_ssize_t ngram_len, ngram_start, subseq_len_after_ngram;
|
||||
const char *match_ptr, *seq_ptr, *subseq_ptr, *subseq_end;
|
||||
int subseq_sum;
|
||||
unsigned int n_differences;
|
||||
int n_differences;
|
||||
|
||||
DECLARE_VARS;
|
||||
|
||||
const char* argspec =
|
||||
#ifdef IS_PY3K
|
||||
#define ARGSPEC "y#y#i"
|
||||
"y*y*i";
|
||||
#else
|
||||
#if PY_HEX_VERSION >= 0x02070000
|
||||
#define ARGSPEC "t#t#i"
|
||||
#else
|
||||
#define ARGSPEC "s#s#i"
|
||||
#endif
|
||||
"s*s*i";
|
||||
#endif
|
||||
|
||||
if (unlikely(!PyArg_ParseTuple(
|
||||
args,
|
||||
ARGSPEC,
|
||||
&subsequence, &subseq_len,
|
||||
&sequence, &seq_len,
|
||||
&max_substitutions_input
|
||||
argspec,
|
||||
&subseq_pybuf,
|
||||
&seq_pybuf,
|
||||
&max_substitutions
|
||||
))) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (unlikely(max_substitutions_input < 0)) {
|
||||
if (unlikely(max_substitutions < 0)) {
|
||||
PyErr_SetString(PyExc_ValueError, "max_l_dist must be non-negative");
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
max_substitutions = (unsigned int) max_substitutions_input;
|
||||
|
||||
if (unlikely(!(
|
||||
is_simple_buffer(subseq_pybuf) &&
|
||||
is_simple_buffer(seq_pybuf)
|
||||
))) {
|
||||
PyErr_SetString(PyExc_TypeError, "only contiguous sequences of single-byte values are supported");
|
||||
goto error;
|
||||
}
|
||||
|
||||
subsequence = (const char*)(subseq_pybuf.buf);
|
||||
sequence = (const char*)(seq_pybuf.buf);
|
||||
subseq_len = subseq_pybuf.len;
|
||||
seq_len = seq_pybuf.len;
|
||||
|
||||
if (unlikely(subseq_len < 0 || seq_len < 0)) {
|
||||
PyErr_SetString(PyExc_Exception, "an unknown error occurred");
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* this is required because simple_memmem_with_needle_sum() returns the
|
||||
haystack if the needle is empty */
|
||||
if (unlikely(subseq_len == 0)) {
|
||||
PyErr_SetString(PyExc_ValueError, "subsequence must not be empty");
|
||||
return NULL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
PREPARE;
|
||||
|
@ -79,7 +77,7 @@ FUNCTION_NAME(PyObject *self, PyObject *args)
|
|||
RETURN_AT_END;
|
||||
}
|
||||
|
||||
ngram_len = ((unsigned long) subseq_len) / ((unsigned long) max_substitutions + 1);
|
||||
ngram_len = subseq_len / (max_substitutions + 1);
|
||||
if (unlikely(ngram_len <= 0)) {
|
||||
/* ngram_len <= 0 *
|
||||
* IFF *
|
||||
|
@ -138,6 +136,10 @@ FUNCTION_NAME(PyObject *self, PyObject *args)
|
|||
}
|
||||
|
||||
RETURN_AT_END;
|
||||
|
||||
error:
|
||||
RELEASE_BUFFERS;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#undef DO_FREES
|
||||
#undef RELEASE_BUFFERS
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
from collections import namedtuple
|
||||
from functools import wraps
|
||||
|
||||
import attr
|
||||
|
||||
from fuzzysearch.common import FuzzySearchBase, Match, \
|
||||
consolidate_overlapping_matches
|
||||
from fuzzysearch.compat import text_type, xrange
|
||||
from fuzzysearch.compat import xrange
|
||||
from fuzzysearch.search_exact import search_exact
|
||||
|
||||
|
||||
|
@ -184,20 +185,15 @@ except ImportError:
|
|||
find_near_matches_generic_linear_programming = \
|
||||
_find_near_matches_generic_linear_programming
|
||||
else:
|
||||
@wraps(_find_near_matches_generic_linear_programming)
|
||||
def find_near_matches_generic_linear_programming(subsequence, sequence, search_params):
|
||||
if not (
|
||||
isinstance(subsequence, text_type) or
|
||||
isinstance(sequence, text_type)
|
||||
):
|
||||
try:
|
||||
for match in c_fnm_generic_lp(subsequence, sequence, search_params):
|
||||
yield match
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
for match in _find_near_matches_generic_linear_programming(
|
||||
subsequence, sequence, search_params):
|
||||
yield match
|
||||
try:
|
||||
for match in c_fnm_generic_lp(subsequence, sequence, search_params):
|
||||
yield match
|
||||
except (TypeError, UnicodeEncodeError):
|
||||
for match in _find_near_matches_generic_linear_programming(
|
||||
subsequence, sequence, search_params):
|
||||
yield match
|
||||
|
||||
|
||||
def find_near_matches_generic_ngrams(subsequence, sequence, search_params):
|
||||
|
|
|
@ -4,7 +4,6 @@ from functools import wraps
|
|||
|
||||
from fuzzysearch.common import FuzzySearchBase, Match, \
|
||||
count_differences_with_maximum, get_best_match_in_group, group_matches
|
||||
from fuzzysearch.compat import text_type
|
||||
from fuzzysearch.search_exact import search_exact
|
||||
|
||||
|
||||
|
@ -246,51 +245,41 @@ else:
|
|||
@wraps(py_has_near_match_substitutions_ngrams)
|
||||
def has_near_match_substitutions_ngrams(subsequence, sequence,
|
||||
max_substitutions):
|
||||
if not (
|
||||
isinstance(subsequence, text_type) or
|
||||
isinstance(sequence, text_type)
|
||||
):
|
||||
try:
|
||||
return substitutions_only_has_near_matches_ngrams_byteslike(
|
||||
subsequence, sequence, max_substitutions)
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
return py_has_near_match_substitutions_ngrams(
|
||||
subsequence, sequence, max_substitutions)
|
||||
try:
|
||||
return substitutions_only_has_near_matches_ngrams_byteslike(
|
||||
subsequence, sequence, max_substitutions)
|
||||
except (TypeError, UnicodeEncodeError):
|
||||
return py_has_near_match_substitutions_ngrams(
|
||||
subsequence, sequence, max_substitutions)
|
||||
|
||||
py_find_near_matches_substitutions_ngrams = \
|
||||
find_near_matches_substitutions_ngrams
|
||||
@wraps(py_find_near_matches_substitutions_ngrams)
|
||||
def find_near_matches_substitutions_ngrams(subsequence, sequence,
|
||||
max_substitutions):
|
||||
if not (
|
||||
isinstance(subsequence, text_type) or
|
||||
isinstance(sequence, text_type)
|
||||
):
|
||||
try:
|
||||
results = _subs_only_fnm_ngram_byteslike(
|
||||
subsequence, sequence, max_substitutions)
|
||||
except TypeError:
|
||||
pass
|
||||
else:
|
||||
matches = [
|
||||
Match(
|
||||
index,
|
||||
index + len(subsequence),
|
||||
count_differences_with_maximum(
|
||||
sequence[index:index+len(subsequence)],
|
||||
subsequence,
|
||||
max_substitutions + 1,
|
||||
),
|
||||
matched=sequence[index:index + len(subsequence)],
|
||||
)
|
||||
for index in results
|
||||
]
|
||||
return [
|
||||
get_best_match_in_group(group)
|
||||
for group in group_matches(matches)
|
||||
]
|
||||
try:
|
||||
results = _subs_only_fnm_ngram_byteslike(
|
||||
subsequence, sequence, max_substitutions)
|
||||
except (TypeError, UnicodeEncodeError):
|
||||
pass
|
||||
else:
|
||||
matches = [
|
||||
Match(
|
||||
index,
|
||||
index + len(subsequence),
|
||||
count_differences_with_maximum(
|
||||
sequence[index:index+len(subsequence)],
|
||||
subsequence,
|
||||
max_substitutions + 1,
|
||||
),
|
||||
matched=sequence[index:index + len(subsequence)],
|
||||
)
|
||||
for index in results
|
||||
]
|
||||
return [
|
||||
get_best_match_in_group(group)
|
||||
for group in group_matches(matches)
|
||||
]
|
||||
|
||||
return py_find_near_matches_substitutions_ngrams(
|
||||
subsequence, sequence, max_substitutions)
|
||||
|
|
Loading…
Reference in New Issue