fix handling of inputs in bytes-only C extension functions

This commit is contained in:
Tal Einat 2020-06-27 22:17:57 +03:00
parent ac48a1f1eb
commit 8be3d79a00
No known key found for this signature in database
GPG Key ID: 613A98AF4C800CDA
10 changed files with 361 additions and 250 deletions

View File

@ -4,6 +4,7 @@ include HISTORY.rst
include LICENSE
include README.rst
include src/fuzzysearch/memmem.h
include src/fuzzysearch/_c_ext_base.h
include src/fuzzysearch/_substitutions_only_lp_template.h
include src/fuzzysearch/_substitutions_only_ngrams_template.h
include src/fuzzysearch/wordlen_memmem.h

View File

@ -0,0 +1,32 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#if PY_MAJOR_VERSION >= 3
#define IS_PY3K
#endif
#ifndef unlikely
#ifdef __GNUC__
/* Test for GCC > 2.95 */
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else /* __GNUC__ > 2 ... */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ > 2 ... */
#else /* __GNUC__ */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ */
#endif
inline static int is_simple_buffer(Py_buffer pybuf) {
return (
pybuf.itemsize == 1 &&
pybuf.ndim == 1 &&
(pybuf.strides == NULL || pybuf.strides[0] == 1) &&
pybuf.suboffsets == NULL
);
}

View File

@ -1,45 +1,17 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "src/fuzzysearch/_c_ext_base.h"
#include "src/fuzzysearch/memmem.h"
#if PY_MAJOR_VERSION >= 3
#define IS_PY3K
#endif
#ifdef __GNUC__
/* Test for GCC > 2.95 */
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else /* __GNUC__ > 2 ... */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ > 2 ... */
#else /* __GNUC__ */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ */
#ifdef IS_PY3K
#define ARG_TYPES_DEF "y#y#|ll:search_exact_byteslike"
#else
#if PY_HEX_VERSION >= 0x02070000
#define ARG_TYPES_DEF "t#t#|ll:search_exact_byteslike"
#else
#define ARG_TYPES_DEF "s#s#|ll:search_exact_byteslike"
#endif
#endif
static PyObject *
search_exact_byteslike(PyObject *self, PyObject *args, PyObject *kwdict) {
/* input params */
const char *subseq, *seq;
Py_ssize_t subseq_len, seq_len;
Py_buffer subseq_pybuf, seq_pybuf;
Py_ssize_t start_index=0, end_index=-1;
static char *kwlist[] = {"subsequence", "sequence", "start_index", "end_index", NULL};
const char *subseq, *seq;
Py_ssize_t subseq_len, seq_len;
PyObject *results;
PyObject *next_result;
size_t next_match_index;
@ -47,36 +19,55 @@ search_exact_byteslike(PyObject *self, PyObject *args, PyObject *kwdict) {
char *next_match_ptr;
if (unlikely(!PyArg_ParseTupleAndKeywords(
args, kwdict, ARG_TYPES_DEF, kwlist,
&subseq, &subseq_len,
&seq, &seq_len,
args, kwdict,
#ifdef IS_PY3K
"y*y*|ll:search_exact_byteslike",
#else
"s*s*|ll:search_exact_byteslike",
#endif
kwlist,
&subseq_pybuf,
&seq_pybuf,
&start_index,
&end_index
))) {
return NULL;
}
if (unlikely(!(
is_simple_buffer(subseq_pybuf) &&
is_simple_buffer(seq_pybuf)
))) {
PyErr_SetString(PyExc_TypeError, "only contiguous sequences of single-byte values are supported");
goto error;
}
subseq = (const char*)(subseq_pybuf.buf);
seq = (const char*)(seq_pybuf.buf);
subseq_len = subseq_pybuf.len;
seq_len = seq_pybuf.len;
/* this is required because simple_memmem_with_needle_sum() returns the
haystack if the needle is empty */
if (unlikely(subseq_len == 0)) {
PyErr_SetString(PyExc_ValueError, "subsequence must not be empty");
return NULL;
goto error;
}
if (unlikely(start_index < 0)) {
PyErr_SetString(PyExc_ValueError, "start_index must be non-negative");
return NULL;
goto error;
}
if (end_index == -1) end_index = seq_len;
if (unlikely(end_index < 0)) {
PyErr_SetString(PyExc_ValueError, "end_index must be non-negative");
return NULL;
goto error;
}
results = PyList_New(0);
if (unlikely(!results)) {
return NULL;
goto error;
}
seq_len = (end_index < seq_len ? end_index : seq_len);
@ -84,13 +75,14 @@ search_exact_byteslike(PyObject *self, PyObject *args, PyObject *kwdict) {
seq_len -= (start_index <= seq_len ? start_index : seq_len);
if (unlikely(seq_len < subseq_len)) {
return results;
next_match_ptr = NULL;
} else {
subseq_sum = calc_sum(subseq, subseq_len);
next_match_ptr = simple_memmem_with_needle_sum(seq, seq_len,
subseq, subseq_len,
subseq_sum);
}
subseq_sum = calc_sum(subseq, subseq_len);
next_match_ptr = simple_memmem_with_needle_sum(seq, seq_len,
subseq, subseq_len,
subseq_sum);
while (next_match_ptr != NULL) {
next_match_index = (const char *)next_match_ptr - seq;
#ifdef IS_PY3K
@ -99,10 +91,12 @@ search_exact_byteslike(PyObject *self, PyObject *args, PyObject *kwdict) {
next_result = PyInt_FromLong(next_match_index + start_index);
#endif
if (unlikely(next_result == NULL)) {
Py_DECREF(results);
goto error;
}
if (unlikely(PyList_Append(results, next_result) == -1)) {
Py_DECREF(next_result);
Py_DECREF(results);
goto error;
}
Py_DECREF(next_result);
@ -113,10 +107,13 @@ search_exact_byteslike(PyObject *self, PyObject *args, PyObject *kwdict) {
subseq_sum);
}
PyBuffer_Release(&subseq_pybuf);
PyBuffer_Release(&seq_pybuf);
return results;
error:
Py_DECREF(results);
PyBuffer_Release(&subseq_pybuf);
PyBuffer_Release(&seq_pybuf);
return NULL;
}
@ -125,35 +122,45 @@ static PyObject *
count_differences_with_maximum_byteslike(PyObject *self, PyObject *args)
{
/* input params */
const char *seq1, *seq2;
Py_ssize_t seq1_len, seq2_len;
Py_buffer seq1_pybuf, seq2_pybuf;
int max_differences;
const char *seq1, *seq2;
Py_ssize_t seq1_len, seq2_len;
Py_ssize_t i;
int n_differences;
if (!PyArg_ParseTuple(
args,
#ifdef IS_PY3K
"y#y#i",
"y*y*i",
#else
#if PY_HEX_VERSION >= 0x02070000
"t#t#i",
#else
"s#s#i",
#endif
"s*s*i",
#endif
&seq1, &seq1_len,
&seq2, &seq2_len,
&seq1_pybuf,
&seq2_pybuf,
&max_differences
)) {
return NULL;
}
if (unlikely(!(
is_simple_buffer(seq1_pybuf) &&
is_simple_buffer(seq2_pybuf)
))) {
PyErr_SetString(PyExc_TypeError, "only contiguous sequences of single-byte values are supported");
goto error;
}
seq1 = (const char*)(seq1_pybuf.buf);
seq2 = (const char*)(seq2_pybuf.buf);
seq1_len = seq1_pybuf.len;
seq2_len = seq2_pybuf.len;
if (seq1_len != seq2_len) {
PyErr_SetString(PyExc_ValueError,
"The lengths of the given sequences must be equal.");
return NULL;
goto error;
}
n_differences = max_differences;
@ -163,7 +170,14 @@ count_differences_with_maximum_byteslike(PyObject *self, PyObject *args)
++seq2;
}
PyBuffer_Release(&seq1_pybuf);
PyBuffer_Release(&seq2_pybuf);
return PyLong_FromLong((long) (max_differences - n_differences));
error:
PyBuffer_Release(&seq1_pybuf);
PyBuffer_Release(&seq2_pybuf);
return NULL;
}
static PyMethodDef _common_methods[] = {

View File

@ -1,4 +1,4 @@
/* Generated by Cython 0.29.14 */
/* Generated by Cython 0.29.20 */
#define PY_SSIZE_T_CLEAN
#include "Python.h"
@ -7,8 +7,8 @@
#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
#error Cython requires Python 2.6+ or Python 3.3+.
#else
#define CYTHON_ABI "0_29_14"
#define CYTHON_HEX_VERSION 0x001D0EF0
#define CYTHON_ABI "0_29_20"
#define CYTHON_HEX_VERSION 0x001D14F0
#define CYTHON_FUTURE_DIVISION 0
#include <stddef.h>
#ifndef offsetof
@ -484,8 +484,10 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
#define PyString_Type PyUnicode_Type
#define PyString_Check PyUnicode_Check
#define PyString_CheckExact PyUnicode_CheckExact
#ifndef PyObject_Unicode
#define PyObject_Unicode PyObject_Str
#endif
#endif
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
#define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
@ -496,6 +498,13 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
#ifndef PySet_CheckExact
#define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
#endif
#if PY_VERSION_HEX >= 0x030900A4
#define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
#define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
#else
#define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
#define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
#endif
#if CYTHON_ASSUME_SAFE_MACROS
#define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq)
#else
@ -576,11 +585,10 @@ static CYTHON_INLINE float __PYX_NAN() {
#define __Pyx_truncl truncl
#endif
#define __PYX_MARK_ERR_POS(f_index, lineno) \
{ __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; }
#define __PYX_ERR(f_index, lineno, Ln_error) \
{ \
__pyx_filename = __pyx_f[f_index]; __pyx_lineno = lineno; __pyx_clineno = __LINE__; goto Ln_error; \
}
{ __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }
#ifndef __PYX_EXTERN_C
#ifdef __cplusplus
@ -1078,7 +1086,7 @@ static CYTHON_INLINE int __Pyx_PyList_Append(PyObject* list, PyObject* x) {
if (likely(L->allocated > len) & likely(len > (L->allocated >> 1))) {
Py_INCREF(x);
PyList_SET_ITEM(list, len, x);
Py_SIZE(list) = len+1;
__Pyx_SET_SIZE(list, len + 1);
return 0;
}
return PyList_Append(list, x);
@ -1093,7 +1101,7 @@ static CYTHON_INLINE void __Pyx_RaiseClosureNameError(const char *varname);
/* FetchCommonType.proto */
static PyTypeObject* __Pyx_FetchCommonType(PyTypeObject* type);
/* CythonFunction.proto */
/* CythonFunctionShared.proto */
#define __Pyx_CyFunction_USED 1
#define __Pyx_CYFUNCTION_STATICMETHOD 0x01
#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02
@ -1121,6 +1129,7 @@ typedef struct {
PyObject *func_classobj;
void *defaults;
int defaults_pyobjects;
size_t defaults_size; // used by FusedFunction for copying defaults
int flags;
PyObject *defaults_tuple;
PyObject *defaults_kwdict;
@ -1129,9 +1138,7 @@ typedef struct {
} __pyx_CyFunctionObject;
static PyTypeObject *__pyx_CyFunctionType = 0;
#define __Pyx_CyFunction_Check(obj) (__Pyx_TypeCheck(obj, __pyx_CyFunctionType))
#define __Pyx_CyFunction_NewEx(ml, flags, qualname, self, module, globals, code)\
__Pyx_CyFunction_New(__pyx_CyFunctionType, ml, flags, qualname, self, module, globals, code)
static PyObject *__Pyx_CyFunction_New(PyTypeObject *, PyMethodDef *ml,
static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject* op, PyMethodDef *ml,
int flags, PyObject* qualname,
PyObject *self,
PyObject *module, PyObject *globals,
@ -1147,6 +1154,13 @@ static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *m,
PyObject *dict);
static int __pyx_CyFunction_init(void);
/* CythonFunction.proto */
static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml,
int flags, PyObject* qualname,
PyObject *closure,
PyObject *module, PyObject *globals,
PyObject* code);
/* GetException.proto */
#if CYTHON_FAST_THREAD_STATE
#define __Pyx_GetException(type, value, tb) __Pyx__GetException(__pyx_tstate, type, value, tb)
@ -1435,6 +1449,9 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_1c_find_near_matches_g
PyObject *__pyx_v_subsequence = 0;
PyObject *__pyx_v_sequence = 0;
PyObject *__pyx_v_search_params = 0;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("c_find_near_matches_generic_linear_programming (wrapper)", 0);
@ -1528,6 +1545,9 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_c_find_near_matches_ge
unsigned int __pyx_t_16;
unsigned int __pyx_t_17;
unsigned int __pyx_t_18;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("c_find_near_matches_generic_linear_programming", 0);
/* "fuzzysearch/_generic_search.pyx":36
@ -1881,6 +1901,9 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_47_c_find_near_matches
PyObject *__pyx_v_start = 0;
PyObject *__pyx_v_end = 0;
PyObject *__pyx_v_dist = 0;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("add_match (wrapper)", 0);
@ -1961,6 +1984,9 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_47_c_find_near_matches
Py_ssize_t __pyx_t_7;
Py_ssize_t __pyx_t_8;
int __pyx_t_9;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("add_match", 0);
__pyx_outer_scope = (struct __pyx_obj_11fuzzysearch_15_generic_search___pyx_scope_struct____pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming *) __Pyx_CyFunction_GetClosure(__pyx_self);
__pyx_cur_scope = __pyx_outer_scope;
@ -2101,6 +2127,9 @@ static PyObject *__pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_ge
PyObject *__pyx_t_25 = NULL;
PyObject *__pyx_t_26 = NULL;
PyObject *__pyx_t_27 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("_c_find_near_matches_generic_linear_programming", 0);
__pyx_cur_scope = (struct __pyx_obj_11fuzzysearch_15_generic_search___pyx_scope_struct____pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming *)__pyx_tp_new_11fuzzysearch_15_generic_search___pyx_scope_struct____pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming(__pyx_ptype_11fuzzysearch_15_generic_search___pyx_scope_struct____pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming, __pyx_empty_tuple, NULL);
if (unlikely(!__pyx_cur_scope)) {
@ -2258,7 +2287,7 @@ static PyObject *__pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_ge
* matches.append(Match(start, end, dist, matched=sequence[start:end]))
*
*/
__pyx_t_5 = __Pyx_CyFunction_NewEx(&__pyx_mdef_11fuzzysearch_15_generic_search_47_c_find_near_matches_generic_linear_programming_1add_match, 0, __pyx_n_s_c_find_near_matches_generic_lin, ((PyObject*)__pyx_cur_scope), __pyx_n_s_fuzzysearch__generic_search, __pyx_d, ((PyObject *)__pyx_codeobj__3)); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 90, __pyx_L1_error)
__pyx_t_5 = __Pyx_CyFunction_New(&__pyx_mdef_11fuzzysearch_15_generic_search_47_c_find_near_matches_generic_linear_programming_1add_match, 0, __pyx_n_s_c_find_near_matches_generic_lin, ((PyObject*)__pyx_cur_scope), __pyx_n_s_fuzzysearch__generic_search, __pyx_d, ((PyObject *)__pyx_codeobj__3)); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 90, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_5);
__pyx_v_add_match = __pyx_t_5;
__pyx_t_5 = 0;
@ -3455,6 +3484,9 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_3c_find_near_matches_g
PyObject *__pyx_v_subsequence = 0;
PyObject *__pyx_v_sequence = 0;
PyObject *__pyx_v_search_params = 0;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("c_find_near_matches_generic_ngrams (wrapper)", 0);
@ -3567,6 +3599,9 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2c_find_near_matches_g
PyObject *__pyx_t_20 = NULL;
PyObject *__pyx_t_21 = NULL;
int __pyx_t_22;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("c_find_near_matches_generic_ngrams", 0);
/* "fuzzysearch/_generic_search.pyx":248
@ -4722,6 +4757,9 @@ static int __Pyx_modinit_function_export_code(void) {
static int __Pyx_modinit_type_init_code(void) {
__Pyx_RefNannyDeclarations
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("__Pyx_modinit_type_init_code", 0);
/*--- Type init code ---*/
if (PyType_Ready(&__pyx_type_11fuzzysearch_15_generic_search___pyx_scope_struct____pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming) < 0) __PYX_ERR(0, 61, __pyx_L1_error)
@ -4764,17 +4802,19 @@ static int __Pyx_modinit_function_import_code(void) {
}
#if PY_MAJOR_VERSION < 3
#ifdef CYTHON_NO_PYINIT_EXPORT
#define __Pyx_PyMODINIT_FUNC void
#else
#ifndef CYTHON_NO_PYINIT_EXPORT
#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
#elif PY_MAJOR_VERSION < 3
#ifdef __cplusplus
#define __Pyx_PyMODINIT_FUNC extern "C" void
#else
#define __Pyx_PyMODINIT_FUNC void
#endif
#else
#ifdef CYTHON_NO_PYINIT_EXPORT
#define __Pyx_PyMODINIT_FUNC PyObject *
#ifdef __cplusplus
#define __Pyx_PyMODINIT_FUNC extern "C" PyObject *
#else
#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
#define __Pyx_PyMODINIT_FUNC PyObject *
#endif
#endif
@ -4857,6 +4897,9 @@ static CYTHON_SMALL_CODE int __pyx_pymod_exec__generic_search(PyObject *__pyx_py
{
PyObject *__pyx_t_1 = NULL;
PyObject *__pyx_t_2 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannyDeclarations
#if CYTHON_PEP489_MULTI_PHASE_INIT
if (__pyx_m) {
@ -4945,14 +4988,14 @@ if (!__Pyx_RefNanny) {
}
#endif
/*--- Builtin init code ---*/
if (__Pyx_InitCachedBuiltins() < 0) goto __pyx_L1_error;
if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
/*--- Constants init code ---*/
if (__Pyx_InitCachedConstants() < 0) goto __pyx_L1_error;
if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
/*--- Global type/function init code ---*/
(void)__Pyx_modinit_global_init_code();
(void)__Pyx_modinit_variable_export_code();
(void)__Pyx_modinit_function_export_code();
if (unlikely(__Pyx_modinit_type_init_code() != 0)) goto __pyx_L1_error;
if (unlikely(__Pyx_modinit_type_init_code() < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
(void)__Pyx_modinit_type_import_code();
(void)__Pyx_modinit_variable_import_code();
(void)__Pyx_modinit_function_import_code();
@ -5194,7 +5237,7 @@ static int __Pyx_ParseOptionalKeywords(
}
name = first_kw_arg;
#if PY_MAJOR_VERSION < 3
if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) {
if (likely(PyString_Check(key))) {
while (*name) {
if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
&& _PyString_Eq(**name, key)) {
@ -5221,7 +5264,7 @@ static int __Pyx_ParseOptionalKeywords(
while (*name) {
int cmp = (**name == key) ? 0 :
#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
(PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :
(__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
#endif
PyUnicode_Compare(**name, key);
if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
@ -5237,7 +5280,7 @@ static int __Pyx_ParseOptionalKeywords(
while (argname != first_kw_arg) {
int cmp = (**argname == key) ? 0 :
#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
(PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :
(__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
#endif
PyUnicode_Compare(**argname, key);
if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
@ -5844,7 +5887,7 @@ bad:
goto done;
}
/* CythonFunction */
/* CythonFunctionShared */
#include <structmember.h>
static PyObject *
__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *closure)
@ -6151,10 +6194,9 @@ static PyMethodDef __pyx_CyFunction_methods[] = {
#else
#define __Pyx_CyFunction_weakreflist(cyfunc) ((cyfunc)->func.m_weakreflist)
#endif
static PyObject *__Pyx_CyFunction_New(PyTypeObject *type, PyMethodDef *ml, int flags, PyObject* qualname,
PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
__pyx_CyFunctionObject *op = PyObject_GC_New(__pyx_CyFunctionObject, type);
if (op == NULL)
static PyObject *__Pyx_CyFunction_Init(__pyx_CyFunctionObject *op, PyMethodDef *ml, int flags, PyObject* qualname,
PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
if (unlikely(op == NULL))
return NULL;
op->flags = flags;
__Pyx_CyFunction_weakreflist(op) = NULL;
@ -6175,12 +6217,12 @@ static PyObject *__Pyx_CyFunction_New(PyTypeObject *type, PyMethodDef *ml, int f
Py_XINCREF(code);
op->func_code = code;
op->defaults_pyobjects = 0;
op->defaults_size = 0;
op->defaults = NULL;
op->defaults_tuple = NULL;
op->defaults_kwdict = NULL;
op->defaults_getter = NULL;
op->func_annotations = NULL;
PyObject_GC_Track(op);
return (PyObject *) op;
}
static int
@ -6428,6 +6470,7 @@ static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t
return PyErr_NoMemory();
memset(m->defaults, 0, size);
m->defaults_pyobjects = pyobjects;
m->defaults_size = size;
return m->defaults;
}
static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple) {
@ -6446,6 +6489,19 @@ static CYTHON_INLINE void __Pyx_CyFunction_SetAnnotationsDict(PyObject *func, Py
Py_INCREF(dict);
}
/* CythonFunction */
static PyObject *__Pyx_CyFunction_New(PyMethodDef *ml, int flags, PyObject* qualname,
PyObject *closure, PyObject *module, PyObject* globals, PyObject* code) {
PyObject *op = __Pyx_CyFunction_Init(
PyObject_GC_New(__pyx_CyFunctionObject, __pyx_CyFunctionType),
ml, flags, qualname, closure, module, globals, code
);
if (likely(op)) {
PyObject_GC_Track(op);
}
return op;
}
/* GetException */
#if CYTHON_FAST_THREAD_STATE
static int __Pyx__GetException(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb)
@ -6681,7 +6737,7 @@ static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
{
#if PY_MAJOR_VERSION >= 3
if (level == -1) {
if (strchr(__Pyx_MODULE_NAME, '.')) {
if ((1) && (strchr(__Pyx_MODULE_NAME, '.'))) {
module = PyImport_ImportModuleLevelObject(
name, global_dict, empty_dict, list, 1);
if (!module) {
@ -6732,7 +6788,7 @@ static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) {
/* CLineInTraceback */
#ifndef CYTHON_CLINE_IN_TRACEBACK
static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) {
static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tstate, int c_line) {
PyObject *use_cline;
PyObject *ptype, *pvalue, *ptraceback;
#if CYTHON_COMPILING_IN_CPYTHON
@ -6836,7 +6892,7 @@ static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
int new_max = __pyx_code_cache.max_count + 64;
entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
__pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry));
__pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry));
if (unlikely(!entries)) {
return;
}

View File

@ -1,4 +1,4 @@
/* Generated by Cython 0.29.14 */
/* Generated by Cython 0.29.20 */
#define PY_SSIZE_T_CLEAN
#include "Python.h"
@ -7,8 +7,8 @@
#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03030000)
#error Cython requires Python 2.6+ or Python 3.3+.
#else
#define CYTHON_ABI "0_29_14"
#define CYTHON_HEX_VERSION 0x001D0EF0
#define CYTHON_ABI "0_29_20"
#define CYTHON_HEX_VERSION 0x001D14F0
#define CYTHON_FUTURE_DIVISION 0
#include <stddef.h>
#ifndef offsetof
@ -484,8 +484,10 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
#define PyString_Type PyUnicode_Type
#define PyString_Check PyUnicode_Check
#define PyString_CheckExact PyUnicode_CheckExact
#ifndef PyObject_Unicode
#define PyObject_Unicode PyObject_Str
#endif
#endif
#if PY_MAJOR_VERSION >= 3
#define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
#define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
@ -496,6 +498,13 @@ static CYTHON_INLINE void * PyThread_tss_get(Py_tss_t *key) {
#ifndef PySet_CheckExact
#define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
#endif
#if PY_VERSION_HEX >= 0x030900A4
#define __Pyx_SET_REFCNT(obj, refcnt) Py_SET_REFCNT(obj, refcnt)
#define __Pyx_SET_SIZE(obj, size) Py_SET_SIZE(obj, size)
#else
#define __Pyx_SET_REFCNT(obj, refcnt) Py_REFCNT(obj) = (refcnt)
#define __Pyx_SET_SIZE(obj, size) Py_SIZE(obj) = (size)
#endif
#if CYTHON_ASSUME_SAFE_MACROS
#define __Pyx_PySequence_SIZE(seq) Py_SIZE(seq)
#else
@ -576,11 +585,10 @@ static CYTHON_INLINE float __PYX_NAN() {
#define __Pyx_truncl truncl
#endif
#define __PYX_MARK_ERR_POS(f_index, lineno) \
{ __pyx_filename = __pyx_f[f_index]; (void)__pyx_filename; __pyx_lineno = lineno; (void)__pyx_lineno; __pyx_clineno = __LINE__; (void)__pyx_clineno; }
#define __PYX_ERR(f_index, lineno, Ln_error) \
{ \
__pyx_filename = __pyx_f[f_index]; __pyx_lineno = lineno; __pyx_clineno = __LINE__; goto Ln_error; \
}
{ __PYX_MARK_ERR_POS(f_index, lineno) goto Ln_error; }
#ifndef __PYX_EXTERN_C
#ifdef __cplusplus
@ -1243,6 +1251,9 @@ static PyObject *__pyx_pw_11fuzzysearch_19_levenshtein_ngrams_1c_expand_short(Py
PyObject *__pyx_v_subsequence = 0;
PyObject *__pyx_v_sequence = 0;
PyObject *__pyx_v_max_l_dist = 0;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("c_expand_short (wrapper)", 0);
@ -1349,6 +1360,9 @@ static PyObject *__pyx_pf_11fuzzysearch_19_levenshtein_ngrams_c_expand_short(CYT
PyObject *__pyx_t_23 = NULL;
PyObject *__pyx_t_24 = NULL;
PyObject *__pyx_t_25 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("c_expand_short", 0);
/* "fuzzysearch/_levenshtein_ngrams.pyx":28
@ -1938,6 +1952,9 @@ static PyObject *__pyx_pw_11fuzzysearch_19_levenshtein_ngrams_3c_expand_long(PyO
PyObject *__pyx_v_subsequence = 0;
PyObject *__pyx_v_sequence = 0;
PyObject *__pyx_v_max_l_dist = 0;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("c_expand_long (wrapper)", 0);
@ -2051,6 +2068,9 @@ static PyObject *__pyx_pf_11fuzzysearch_19_levenshtein_ngrams_2c_expand_long(CYT
PyObject *__pyx_t_26 = NULL;
PyObject *__pyx_t_27 = NULL;
PyObject *__pyx_t_28 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannySetupContext("c_expand_long", 0);
/* "fuzzysearch/_levenshtein_ngrams.pyx":87
@ -3123,17 +3143,19 @@ static int __Pyx_modinit_function_import_code(void) {
}
#if PY_MAJOR_VERSION < 3
#ifdef CYTHON_NO_PYINIT_EXPORT
#define __Pyx_PyMODINIT_FUNC void
#else
#ifndef CYTHON_NO_PYINIT_EXPORT
#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
#elif PY_MAJOR_VERSION < 3
#ifdef __cplusplus
#define __Pyx_PyMODINIT_FUNC extern "C" void
#else
#define __Pyx_PyMODINIT_FUNC void
#endif
#else
#ifdef CYTHON_NO_PYINIT_EXPORT
#define __Pyx_PyMODINIT_FUNC PyObject *
#ifdef __cplusplus
#define __Pyx_PyMODINIT_FUNC extern "C" PyObject *
#else
#define __Pyx_PyMODINIT_FUNC PyMODINIT_FUNC
#define __Pyx_PyMODINIT_FUNC PyObject *
#endif
#endif
@ -3215,6 +3237,9 @@ static CYTHON_SMALL_CODE int __pyx_pymod_exec__levenshtein_ngrams(PyObject *__py
#endif
{
PyObject *__pyx_t_1 = NULL;
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
__Pyx_RefNannyDeclarations
#if CYTHON_PEP489_MULTI_PHASE_INIT
if (__pyx_m) {
@ -3303,9 +3328,9 @@ if (!__Pyx_RefNanny) {
}
#endif
/*--- Builtin init code ---*/
if (__Pyx_InitCachedBuiltins() < 0) goto __pyx_L1_error;
if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
/*--- Constants init code ---*/
if (__Pyx_InitCachedConstants() < 0) goto __pyx_L1_error;
if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
/*--- Global type/function init code ---*/
(void)__Pyx_modinit_global_init_code();
(void)__Pyx_modinit_variable_export_code();
@ -3503,7 +3528,7 @@ static int __Pyx_ParseOptionalKeywords(
}
name = first_kw_arg;
#if PY_MAJOR_VERSION < 3
if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) {
if (likely(PyString_Check(key))) {
while (*name) {
if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
&& _PyString_Eq(**name, key)) {
@ -3530,7 +3555,7 @@ static int __Pyx_ParseOptionalKeywords(
while (*name) {
int cmp = (**name == key) ? 0 :
#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
(PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :
(__Pyx_PyUnicode_GET_LENGTH(**name) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
#endif
PyUnicode_Compare(**name, key);
if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
@ -3546,7 +3571,7 @@ static int __Pyx_ParseOptionalKeywords(
while (argname != first_kw_arg) {
int cmp = (**argname == key) ? 0 :
#if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
(PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :
(__Pyx_PyUnicode_GET_LENGTH(**argname) != __Pyx_PyUnicode_GET_LENGTH(key)) ? 1 :
#endif
PyUnicode_Compare(**argname, key);
if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
@ -4262,7 +4287,7 @@ static CYTHON_INLINE int __Pyx_object_dict_version_matches(PyObject* obj, PY_UIN
/* CLineInTraceback */
#ifndef CYTHON_CLINE_IN_TRACEBACK
static int __Pyx_CLineForTraceback(PyThreadState *tstate, int c_line) {
static int __Pyx_CLineForTraceback(CYTHON_NCP_UNUSED PyThreadState *tstate, int c_line) {
PyObject *use_cline;
PyObject *ptype, *pvalue, *ptraceback;
#if CYTHON_COMPILING_IN_CPYTHON
@ -4366,7 +4391,7 @@ static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
int new_max = __pyx_code_cache.max_count + 64;
entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
__pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry));
__pyx_code_cache.entries, ((size_t)new_max) * sizeof(__Pyx_CodeObjectCacheEntry));
if (unlikely(!entries)) {
return;
}

View File

@ -1,15 +1,10 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#if PY_MAJOR_VERSION >= 3
#define IS_PY3K
#endif
#include "src/fuzzysearch/_c_ext_base.h"
#define DECLARE_VARS
#define PREPARE
#define OUTPUT_VALUE(x) DO_FREES; Py_RETURN_TRUE
#define RETURN_AT_END Py_RETURN_FALSE
#define RETURN_AT_END DO_FREES; Py_RETURN_FALSE
#define FUNCTION_NAME substitutions_only_has_near_matches_lp_byteslike
#include "src/fuzzysearch/_substitutions_only_lp_template.h"
#undef FUNCTION_NAME
@ -31,21 +26,21 @@
#define PREPARE \
results = PyList_New(0); \
if (unlikely(!results)) \
return NULL;
goto error;
#define OUTPUT_VALUE(x) do { \
next_result = PyInt_FromSsize_t((x)); \
if (unlikely(next_result == NULL)) { \
Py_DECREF(results); \
return NULL; \
goto error; \
} \
if (unlikely(PyList_Append(results, next_result) == -1)) { \
Py_DECREF(next_result); \
Py_DECREF(results); \
return NULL; \
goto error; \
} \
Py_DECREF(next_result); \
} while(0)
#define RETURN_AT_END return results
#define RETURN_AT_END DO_FREES; return results
#define FUNCTION_NAME substitutions_only_find_near_matches_lp_byteslike
#include "src/fuzzysearch/_substitutions_only_lp_template.h"
#undef FUNCTION_NAME

View File

@ -1,50 +1,34 @@
#ifdef __GNUC__
/* Test for GCC > 2.95 */
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else /* __GNUC__ > 2 ... */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ > 2 ... */
#else /* __GNUC__ */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ */
#define DO_FREES \
free(sub_counts); \
PyBuffer_Release(&subseq_pybuf); \
PyBuffer_Release(&seq_pybuf)
#define DO_FREES free(sub_counts)
static PyObject *
FUNCTION_NAME(PyObject *self, PyObject *args)
{
/* input params */
Py_buffer subseq_pybuf, seq_pybuf;
int max_substitutions_input;
const char *subsequence;
const char *sequence;
Py_ssize_t subseq_len, seq_len;
int max_substitutions_input;
unsigned int max_substitutions;
unsigned int *sub_counts;
unsigned int *sub_counts = NULL;
Py_ssize_t seq_idx, subseq_idx, count_idx;
DECLARE_VARS;
#ifdef IS_PY3K
#define ARGSPEC "y#y#i"
#else
#if PY_HEX_VERSION >= 0x02070000
#define ARGSPEC "t#t#i"
#else
#define ARGSPEC "s#s#i"
#endif
#endif
if (unlikely(!PyArg_ParseTuple(
args,
ARGSPEC,
&subsequence, &subseq_len,
&sequence, &seq_len,
#ifdef IS_PY3K
"y*y*i",
#else
"s*s*i",
#endif
&subseq_pybuf,
&seq_pybuf,
&max_substitutions_input
))) {
return NULL;
@ -52,18 +36,32 @@ FUNCTION_NAME(PyObject *self, PyObject *args)
if (unlikely(max_substitutions_input < 0)) {
PyErr_SetString(PyExc_ValueError, "max_l_dist must be non-negative");
return NULL;
goto error;
}
/// TODO: check for overflow
max_substitutions = (unsigned int) max_substitutions_input;
if (unlikely(!(
is_simple_buffer(subseq_pybuf) &&
is_simple_buffer(seq_pybuf)
))) {
PyErr_SetString(PyExc_TypeError, "only contiguous sequences of single-byte values are supported");
goto error;
}
subsequence = (const char*)(subseq_pybuf.buf);
sequence = (const char*)(seq_pybuf.buf);
subseq_len = subseq_pybuf.len;
seq_len = seq_pybuf.len;
if (unlikely(subseq_len < 0 || seq_len < 0)) {
PyErr_SetString(PyExc_Exception, "an unknown error occurred");
return NULL;
goto error;
}
if (unlikely(subseq_len == 0)) {
PyErr_SetString(PyExc_ValueError, "subsequence must not be empty");
return NULL;
goto error;
}
PREPARE;
@ -72,11 +70,6 @@ FUNCTION_NAME(PyObject *self, PyObject *args)
RETURN_AT_END;
}
sub_counts = (unsigned int *) malloc (sizeof(unsigned int) * subseq_len);
if (sub_counts == NULL) {
return PyErr_NoMemory();
}
if (unlikely(max_substitutions >= subseq_len)) {
for (seq_idx = 0; seq_idx <= seq_len - subseq_len; ++seq_idx) {
OUTPUT_VALUE(seq_idx);
@ -84,6 +77,12 @@ FUNCTION_NAME(PyObject *self, PyObject *args)
RETURN_AT_END;
}
sub_counts = (unsigned int *) malloc (sizeof(unsigned int) * subseq_len);
if (sub_counts == NULL) {
DO_FREES;
return PyErr_NoMemory();
}
for (seq_idx = 0; seq_idx < subseq_len - 1; ++seq_idx) {
sub_counts[seq_idx] = 0;
for (subseq_idx = 0; subseq_idx <= seq_idx; ++subseq_idx) {
@ -108,8 +107,11 @@ FUNCTION_NAME(PyObject *self, PyObject *args)
sub_counts[count_idx] = 0;
}
DO_FREES;
RETURN_AT_END;
error:
DO_FREES;
return NULL;
}
#undef DO_FREES

View File

@ -1,33 +1,22 @@
#include "src/fuzzysearch/memmem.h"
#ifdef __GNUC__
/* Test for GCC > 2.95 */
#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else /* __GNUC__ > 2 ... */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ > 2 ... */
#else /* __GNUC__ */
#define likely(x) (x)
#define unlikely(x) (x)
#endif /* __GNUC__ */
#define DO_FREES \
PyBuffer_Release(&subseq_pybuf); \
PyBuffer_Release(&seq_pybuf)
#define DO_FREES
static PyObject *
FUNCTION_NAME(PyObject *self, PyObject *args)
{
/* input params */
Py_buffer subseq_pybuf, seq_pybuf;
int max_substitutions_input;
const char *subsequence;
const char *sequence;
Py_ssize_t subseq_len, seq_len;
int max_substitutions_input;
unsigned int max_substitutions;
unsigned int ngram_len, ngram_start, subseq_len_after_ngram;
const char *match_ptr, *seq_ptr, *subseq_ptr, *subseq_end;
int subseq_sum;
@ -35,21 +24,15 @@ FUNCTION_NAME(PyObject *self, PyObject *args)
DECLARE_VARS;
#ifdef IS_PY3K
#define ARGSPEC "y#y#i"
#else
#if PY_HEX_VERSION >= 0x02070000
#define ARGSPEC "t#t#i"
#else
#define ARGSPEC "s#s#i"
#endif
#endif
if (unlikely(!PyArg_ParseTuple(
args,
ARGSPEC,
&subsequence, &subseq_len,
&sequence, &seq_len,
#ifdef IS_PY3K
"y*y*i",
#else
"s*s*i",
#endif
&subseq_pybuf,
&seq_pybuf,
&max_substitutions_input
))) {
return NULL;
@ -57,20 +40,34 @@ FUNCTION_NAME(PyObject *self, PyObject *args)
if (unlikely(max_substitutions_input < 0)) {
PyErr_SetString(PyExc_ValueError, "max_l_dist must be non-negative");
return NULL;
goto error;
}
/// TODO: check for overflow
max_substitutions = (unsigned int) max_substitutions_input;
if (unlikely(!(
is_simple_buffer(subseq_pybuf) &&
is_simple_buffer(seq_pybuf)
))) {
PyErr_SetString(PyExc_TypeError, "only contiguous sequences of single-byte values are supported");
goto error;
}
subsequence = (const char*)(subseq_pybuf.buf);
sequence = (const char*)(seq_pybuf.buf);
subseq_len = subseq_pybuf.len;
seq_len = seq_pybuf.len;
if (unlikely(subseq_len < 0 || seq_len < 0)) {
PyErr_SetString(PyExc_Exception, "an unknown error occurred");
return NULL;
goto error;
}
/* this is required because simple_memmem_with_needle_sum() returns the
haystack if the needle is empty */
if (unlikely(subseq_len == 0)) {
PyErr_SetString(PyExc_ValueError, "subsequence must not be empty");
return NULL;
goto error;
}
PREPARE;
@ -138,6 +135,10 @@ FUNCTION_NAME(PyObject *self, PyObject *args)
}
RETURN_AT_END;
error:
DO_FREES;
return NULL;
}
#undef DO_FREES

View File

@ -1,10 +1,11 @@
from collections import namedtuple
from functools import wraps
import attr
from fuzzysearch.common import FuzzySearchBase, Match, \
consolidate_overlapping_matches
from fuzzysearch.compat import text_type, xrange
from fuzzysearch.compat import xrange
from fuzzysearch.search_exact import search_exact
@ -184,20 +185,15 @@ except ImportError:
find_near_matches_generic_linear_programming = \
_find_near_matches_generic_linear_programming
else:
@wraps(_find_near_matches_generic_linear_programming)
def find_near_matches_generic_linear_programming(subsequence, sequence, search_params):
if not (
isinstance(subsequence, text_type) or
isinstance(sequence, text_type)
):
try:
for match in c_fnm_generic_lp(subsequence, sequence, search_params):
yield match
except TypeError:
pass
for match in _find_near_matches_generic_linear_programming(
subsequence, sequence, search_params):
yield match
try:
for match in c_fnm_generic_lp(subsequence, sequence, search_params):
yield match
except (TypeError, UnicodeEncodeError):
for match in _find_near_matches_generic_linear_programming(
subsequence, sequence, search_params):
yield match
def find_near_matches_generic_ngrams(subsequence, sequence, search_params):

View File

@ -4,7 +4,6 @@ from functools import wraps
from fuzzysearch.common import FuzzySearchBase, Match, \
count_differences_with_maximum, get_best_match_in_group, group_matches
from fuzzysearch.compat import text_type
from fuzzysearch.search_exact import search_exact
@ -246,51 +245,41 @@ else:
@wraps(py_has_near_match_substitutions_ngrams)
def has_near_match_substitutions_ngrams(subsequence, sequence,
max_substitutions):
if not (
isinstance(subsequence, text_type) or
isinstance(sequence, text_type)
):
try:
return substitutions_only_has_near_matches_ngrams_byteslike(
subsequence, sequence, max_substitutions)
except TypeError:
pass
return py_has_near_match_substitutions_ngrams(
subsequence, sequence, max_substitutions)
try:
return substitutions_only_has_near_matches_ngrams_byteslike(
subsequence, sequence, max_substitutions)
except (TypeError, UnicodeEncodeError):
return py_has_near_match_substitutions_ngrams(
subsequence, sequence, max_substitutions)
py_find_near_matches_substitutions_ngrams = \
find_near_matches_substitutions_ngrams
@wraps(py_find_near_matches_substitutions_ngrams)
def find_near_matches_substitutions_ngrams(subsequence, sequence,
max_substitutions):
if not (
isinstance(subsequence, text_type) or
isinstance(sequence, text_type)
):
try:
results = _subs_only_fnm_ngram_byteslike(
subsequence, sequence, max_substitutions)
except TypeError:
pass
else:
matches = [
Match(
index,
index + len(subsequence),
count_differences_with_maximum(
sequence[index:index+len(subsequence)],
subsequence,
max_substitutions + 1,
),
matched=sequence[index:index + len(subsequence)],
)
for index in results
]
return [
get_best_match_in_group(group)
for group in group_matches(matches)
]
try:
results = _subs_only_fnm_ngram_byteslike(
subsequence, sequence, max_substitutions)
except (TypeError, UnicodeEncodeError):
pass
else:
matches = [
Match(
index,
index + len(subsequence),
count_differences_with_maximum(
sequence[index:index+len(subsequence)],
subsequence,
max_substitutions + 1,
),
matched=sequence[index:index + len(subsequence)],
)
for index in results
]
return [
get_best_match_in_group(group)
for group in group_matches(matches)
]
return py_find_near_matches_substitutions_ngrams(
subsequence, sequence, max_substitutions)