diff --git a/fuzzysearch/_generic_search.c b/fuzzysearch/_generic_search.c index ba43228..4672d65 100644 --- a/fuzzysearch/_generic_search.c +++ b/fuzzysearch/_generic_search.c @@ -1,4 +1,4 @@ -/* Generated by Cython 0.20.1 on Tue May 6 13:45:47 2014 */ +/* Generated by Cython 0.20.1 on Sun May 11 02:38:48 2014 */ #define PY_SSIZE_T_CLEAN #ifndef CYTHON_USE_PYLONG_INTERNALS @@ -342,6 +342,7 @@ static CYTHON_INLINE float __PYX_NAN() { #define __PYX_HAVE_API__fuzzysearch___generic_search #include "string.h" #include "stdlib.h" +#include "kmp.h" #ifdef _OPENMP #include #endif /* _OPENMP */ @@ -533,7 +534,7 @@ static const char *__pyx_f[] = { /*--- Type declarations ---*/ struct __pyx_t_11fuzzysearch_15_generic_search_GenericSearchCandidate; -/* "fuzzysearch/_generic_search.pyx":14 +/* "fuzzysearch/_generic_search.pyx":24 * * * cdef struct GenericSearchCandidate: # <<<<<<<<<<<<<< @@ -680,12 +681,12 @@ static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); /*proto*/ static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level); /*proto*/ -static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *); - static CYTHON_INLINE unsigned int __Pyx_PyInt_As_unsigned_int(PyObject *); static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *); +static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *); + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value); static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); @@ -723,6 +724,7 @@ static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/ /* Module declarations from 'libc.stdlib' */ /* Module declarations from 'fuzzysearch._generic_search' */ +static PyObject *__pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming(char const *, size_t, char const *, size_t, unsigned int, unsigned int, unsigned int, unsigned int); /*proto*/ #define __Pyx_MODULE_NAME "fuzzysearch._generic_search" int __pyx_module_is_main_fuzzysearch___generic_search = 0; @@ -733,15 +735,12 @@ static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_MemoryError; static PyObject *__pyx_builtin_xrange; static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_c_find_near_matches_generic_linear_programming(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_subsequence, PyObject *__pyx_v_sequence, PyObject *__pyx_v_max_substitutions, PyObject *__pyx_v_max_insertions, PyObject *__pyx_v_max_deletions, PyObject *__pyx_v_max_l_dist); /* proto */ -static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_generic_linear_programming(CYTHON_UNUSED PyObject *__pyx_self, char *__pyx_v_subsequence, size_t __pyx_v_subseq_len, char *__pyx_v_sequence, size_t __pyx_v_seq_len, unsigned int __pyx_v_max_substitutions, unsigned int __pyx_v_max_insertions, unsigned int __pyx_v_max_deletions, unsigned int __pyx_v_max_l_dist); /* proto */ -static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_generic_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_subsequence, PyObject *__pyx_v_sequence, PyObject *__pyx_v_max_substitutions, PyObject *__pyx_v_max_insertions, PyObject *__pyx_v_max_deletions, PyObject *__pyx_v_max_l_dist); /* proto */ +static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2c_find_near_matches_generic_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_subsequence, PyObject *__pyx_v_sequence, PyObject *__pyx_v_max_substitutions, PyObject *__pyx_v_max_insertions, PyObject *__pyx_v_max_deletions, PyObject *__pyx_v_max_l_dist); /* proto */ static char __pyx_k_Seq[] = "Seq"; static char __pyx_k_all[] = "__all__"; static char __pyx_k_end[] = "end"; static char __pyx_k_six[] = "six"; static char __pyx_k_sys[] = "sys"; -static char __pyx_k_tmp[] = "_tmp"; -static char __pyx_k_cand[] = "cand"; static char __pyx_k_main[] = "__main__"; static char __pyx_k_test[] = "__test__"; static char __pyx_k_Match[] = "Match"; @@ -753,58 +752,46 @@ static char __pyx_k_start[] = "start"; static char __pyx_k_import[] = "__import__"; static char __pyx_k_l_dist[] = "l_dist"; static char __pyx_k_maxint[] = "maxint"; -static char __pyx_k_n_cand[] = "n_cand"; static char __pyx_k_n_dels[] = "n_dels"; static char __pyx_k_n_subs[] = "n_subs"; static char __pyx_k_xrange[] = "xrange"; static char __pyx_k_Bio_Seq[] = "Bio.Seq"; +static char __pyx_k_kmpNext[] = "kmpNext"; static char __pyx_k_matches[] = "matches"; static char __pyx_k_replace[] = "_replace"; -static char __pyx_k_seq_len[] = "seq_len"; -static char __pyx_k_charchar[] = "charchar"; +static char __pyx_k_seq_len[] = "_seq_len"; static char __pyx_k_sequence[] = "sequence"; static char __pyx_k_TypeError[] = "TypeError"; +static char __pyx_k_kmp_state[] = "kmp_state"; static char __pyx_k_match_ptr[] = "match_ptr"; -static char __pyx_k_n_skipped[] = "n_skipped"; static char __pyx_k_ngram_len[] = "ngram_len"; -static char __pyx_k_ngram_str[] = "ngram_str"; -static char __pyx_k_seq_len_2[] = "_seq_len"; static char __pyx_k_ValueError[] = "ValueError"; -static char __pyx_k_alloc_size[] = "alloc_size"; static char __pyx_k_c_sequence[] = "c_sequence"; -static char __pyx_k_candidates[] = "candidates"; static char __pyx_k_max_l_dist[] = "max_l_dist"; -static char __pyx_k_subseq_len[] = "subseq_len"; +static char __pyx_k_subseq_len[] = "_subseq_len"; static char __pyx_k_ImportError[] = "ImportError"; static char __pyx_k_MemoryError[] = "MemoryError"; static char __pyx_k_binary_type[] = "binary_type"; static char __pyx_k_ngram_start[] = "ngram_start"; static char __pyx_k_subsequence[] = "subsequence"; static char __pyx_k_c_max_l_dist[] = "c_max_l_dist"; -static char __pyx_k_n_candidates[] = "n_candidates"; static char __pyx_k_subseq_index[] = "subseq_index"; -static char __pyx_k_subseq_len_2[] = "_subseq_len"; static char __pyx_k_ALLOWED_TYPES[] = "ALLOWED_TYPES"; static char __pyx_k_c_subsequence[] = "c_subsequence"; static char __pyx_k_max_deletions[] = "max_deletions"; -static char __pyx_k_have_realloced[] = "have_realloced"; static char __pyx_k_max_insertions[] = "max_insertions"; -static char __pyx_k_new_candidates[] = "new_candidates"; static char __pyx_k_c_max_deletions[] = "c_max_deletions"; static char __pyx_k_c_max_insertions[] = "c_max_insertions"; -static char __pyx_k_n_new_candidates[] = "n_new_candidates"; static char __pyx_k_max_substitutions[] = "max_substitutions"; static char __pyx_k_fuzzysearch_common[] = "fuzzysearch.common"; static char __pyx_k_c_max_substitutions[] = "c_max_substitutions"; static char __pyx_k_small_search_length[] = "small_search_length"; -static char __pyx_k_subseq_len_minus_one[] = "subseq_len_minus_one"; -static char __pyx_k_subseq_len_minus_one_2[] = "_subseq_len_minus_one"; +static char __pyx_k_subseq_len_minus_one[] = "_subseq_len_minus_one"; static char __pyx_k_small_search_start_index[] = "small_search_start_index"; static char __pyx_k_Given_subsequence_is_empty[] = "Given subsequence is empty!"; static char __pyx_k_fuzzysearch__generic_search[] = "fuzzysearch._generic_search"; static char __pyx_k_sequence_is_of_invalid_type_s[] = "sequence is of invalid type %s"; static char __pyx_k_Users_taleinat_dev_fuzzysearch[] = "/Users/taleinat/dev/fuzzysearch/fuzzysearch/_generic_search.pyx"; -static char __pyx_k_c_find_near_matches_generic_lin[] = "_c_find_near_matches_generic_linear_programming"; static char __pyx_k_c_find_near_matches_generic_line[] = "c_find_near_matches_generic_linear_programming"; static char __pyx_k_c_find_near_matches_generic_ngra[] = "c_find_near_matches_generic_ngrams"; static char __pyx_k_subsequence_is_of_invalid_type_s[] = "subsequence is of invalid type %s"; @@ -820,9 +807,7 @@ static PyObject *__pyx_n_s_TypeError; static PyObject *__pyx_kp_s_Users_taleinat_dev_fuzzysearch; static PyObject *__pyx_n_s_ValueError; static PyObject *__pyx_n_s_all; -static PyObject *__pyx_n_s_alloc_size; static PyObject *__pyx_n_s_binary_type; -static PyObject *__pyx_n_s_c_find_near_matches_generic_lin; static PyObject *__pyx_n_s_c_find_near_matches_generic_line; static PyObject *__pyx_n_s_c_find_near_matches_generic_ngra; static PyObject *__pyx_n_s_c_max_deletions; @@ -831,15 +816,13 @@ static PyObject *__pyx_n_s_c_max_l_dist; static PyObject *__pyx_n_s_c_max_substitutions; static PyObject *__pyx_n_s_c_sequence; static PyObject *__pyx_n_s_c_subsequence; -static PyObject *__pyx_n_s_cand; -static PyObject *__pyx_n_s_candidates; -static PyObject *__pyx_n_s_charchar; static PyObject *__pyx_n_s_end; static PyObject *__pyx_n_s_fuzzysearch__generic_search; static PyObject *__pyx_n_s_fuzzysearch_common; -static PyObject *__pyx_n_s_have_realloced; static PyObject *__pyx_n_s_import; static PyObject *__pyx_n_s_index; +static PyObject *__pyx_n_s_kmpNext; +static PyObject *__pyx_n_s_kmp_state; static PyObject *__pyx_n_s_l_dist; static PyObject *__pyx_n_s_main; static PyObject *__pyx_n_s_match; @@ -850,21 +833,14 @@ static PyObject *__pyx_n_s_max_insertions; static PyObject *__pyx_n_s_max_l_dist; static PyObject *__pyx_n_s_max_substitutions; static PyObject *__pyx_n_s_maxint; -static PyObject *__pyx_n_s_n_cand; -static PyObject *__pyx_n_s_n_candidates; static PyObject *__pyx_n_s_n_dels; static PyObject *__pyx_n_s_n_ins; -static PyObject *__pyx_n_s_n_new_candidates; -static PyObject *__pyx_n_s_n_skipped; static PyObject *__pyx_n_s_n_subs; -static PyObject *__pyx_n_s_new_candidates; static PyObject *__pyx_n_s_ngram_len; static PyObject *__pyx_n_s_ngram_start; -static PyObject *__pyx_n_s_ngram_str; static PyObject *__pyx_n_s_range; static PyObject *__pyx_n_s_replace; static PyObject *__pyx_n_s_seq_len; -static PyObject *__pyx_n_s_seq_len_2; static PyObject *__pyx_n_s_sequence; static PyObject *__pyx_kp_s_sequence_is_of_invalid_type_s; static PyObject *__pyx_n_s_six; @@ -873,15 +849,12 @@ static PyObject *__pyx_n_s_small_search_start_index; static PyObject *__pyx_n_s_start; static PyObject *__pyx_n_s_subseq_index; static PyObject *__pyx_n_s_subseq_len; -static PyObject *__pyx_n_s_subseq_len_2; static PyObject *__pyx_n_s_subseq_len_minus_one; -static PyObject *__pyx_n_s_subseq_len_minus_one_2; static PyObject *__pyx_n_s_subsequence; static PyObject *__pyx_kp_s_subsequence_is_of_invalid_type_s; static PyObject *__pyx_n_s_sys; static PyObject *__pyx_n_s_test; static PyObject *__pyx_kp_s_the_subsequence_length_must_be_g; -static PyObject *__pyx_n_s_tmp; static PyObject *__pyx_n_s_xrange; static PyObject *__pyx_int_0; static PyObject *__pyx_int_1; @@ -891,12 +864,10 @@ static PyObject *__pyx_tuple__2; static PyObject *__pyx_tuple__3; static PyObject *__pyx_tuple__4; static PyObject *__pyx_tuple__6; -static PyObject *__pyx_tuple__8; static PyObject *__pyx_codeobj__5; static PyObject *__pyx_codeobj__7; -static PyObject *__pyx_codeobj__9; -/* "fuzzysearch/_generic_search.pyx":27 +/* "fuzzysearch/_generic_search.pyx":37 * * * def c_find_near_matches_generic_linear_programming(subsequence, sequence, # <<<<<<<<<<<<<< @@ -925,7 +896,7 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_1c_find_near_matches_g static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_subsequence,&__pyx_n_s_sequence,&__pyx_n_s_max_substitutions,&__pyx_n_s_max_insertions,&__pyx_n_s_max_deletions,&__pyx_n_s_max_l_dist,0}; PyObject* values[6] = {0,0,0,0,0,0}; - /* "fuzzysearch/_generic_search.pyx":31 + /* "fuzzysearch/_generic_search.pyx":41 * max_insertions, * max_deletions, * max_l_dist=None): # <<<<<<<<<<<<<< @@ -954,22 +925,22 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_1c_find_near_matches_g case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sequence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_linear_programming", 0, 5, 6, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_linear_programming", 0, 5, 6, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_substitutions)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_linear_programming", 0, 5, 6, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_linear_programming", 0, 5, 6, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_insertions)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_linear_programming", 0, 5, 6, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_linear_programming", 0, 5, 6, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_deletions)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_linear_programming", 0, 5, 6, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_linear_programming", 0, 5, 6, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: if (kw_args > 0) { @@ -978,7 +949,7 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_1c_find_near_matches_g } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "c_find_near_matches_generic_linear_programming") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "c_find_near_matches_generic_linear_programming") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -1001,7 +972,7 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_1c_find_near_matches_g } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_linear_programming", 0, 5, 6, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_linear_programming", 0, 5, 6, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("fuzzysearch._generic_search.c_find_near_matches_generic_linear_programming", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -1009,7 +980,7 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_1c_find_near_matches_g __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_11fuzzysearch_15_generic_search_c_find_near_matches_generic_linear_programming(__pyx_self, __pyx_v_subsequence, __pyx_v_sequence, __pyx_v_max_substitutions, __pyx_v_max_insertions, __pyx_v_max_deletions, __pyx_v_max_l_dist); - /* "fuzzysearch/_generic_search.pyx":27 + /* "fuzzysearch/_generic_search.pyx":37 * * * def c_find_near_matches_generic_linear_programming(subsequence, sequence, # <<<<<<<<<<<<<< @@ -1023,184 +994,173 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_1c_find_near_matches_g } static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_c_find_near_matches_generic_linear_programming(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_subsequence, PyObject *__pyx_v_sequence, PyObject *__pyx_v_max_substitutions, PyObject *__pyx_v_max_insertions, PyObject *__pyx_v_max_deletions, PyObject *__pyx_v_max_l_dist) { - char *__pyx_v_c_subsequence; - char *__pyx_v_c_sequence; + char const *__pyx_v_c_subsequence; + char const *__pyx_v_c_sequence; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_t_2; int __pyx_t_3; PyObject *__pyx_t_4 = NULL; - char *__pyx_t_5; - Py_ssize_t __pyx_t_6; - PyObject *__pyx_t_7 = NULL; - PyObject *__pyx_t_8 = NULL; - PyObject *__pyx_t_9 = NULL; - PyObject *__pyx_t_10 = NULL; - PyObject *__pyx_t_11 = NULL; - PyObject *__pyx_t_12 = NULL; - PyObject *__pyx_t_13 = NULL; - PyObject *__pyx_t_14 = NULL; + char const *__pyx_t_5; + char const *__pyx_t_6; + Py_ssize_t __pyx_t_7; + Py_ssize_t __pyx_t_8; + unsigned int __pyx_t_9; + unsigned int __pyx_t_10; + unsigned int __pyx_t_11; + unsigned int __pyx_t_12; + unsigned int __pyx_t_13; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("c_find_near_matches_generic_linear_programming", 0); - /* "fuzzysearch/_generic_search.pyx":42 + /* "fuzzysearch/_generic_search.pyx":52 * * the total number of substitutions, insertions and deletions * """ * if not isinstance(sequence, ALLOWED_TYPES): # <<<<<<<<<<<<<< * raise TypeError('sequence is of invalid type %s' % type(subsequence)) * if not isinstance(subsequence, ALLOWED_TYPES): */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_ALLOWED_TYPES); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_ALLOWED_TYPES); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_IsInstance(__pyx_v_sequence, __pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_IsInstance(__pyx_v_sequence, __pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_3 = ((!(__pyx_t_2 != 0)) != 0); if (__pyx_t_3) { - /* "fuzzysearch/_generic_search.pyx":43 + /* "fuzzysearch/_generic_search.pyx":53 * """ * if not isinstance(sequence, ALLOWED_TYPES): * raise TypeError('sequence is of invalid type %s' % type(subsequence)) # <<<<<<<<<<<<<< * if not isinstance(subsequence, ALLOWED_TYPES): * raise TypeError('subsequence is of invalid type %s' % type(subsequence)) */ - __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_sequence_is_of_invalid_type_s, ((PyObject *)Py_TYPE(__pyx_v_subsequence))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_sequence_is_of_invalid_type_s, ((PyObject *)Py_TYPE(__pyx_v_subsequence))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_Raise(__pyx_t_1, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "fuzzysearch/_generic_search.pyx":44 + /* "fuzzysearch/_generic_search.pyx":54 * if not isinstance(sequence, ALLOWED_TYPES): * raise TypeError('sequence is of invalid type %s' % type(subsequence)) * if not isinstance(subsequence, ALLOWED_TYPES): # <<<<<<<<<<<<<< * raise TypeError('subsequence is of invalid type %s' % type(subsequence)) * */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_ALLOWED_TYPES); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_ALLOWED_TYPES); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyObject_IsInstance(__pyx_v_subsequence, __pyx_t_1); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 44; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_IsInstance(__pyx_v_subsequence, __pyx_t_1); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_2 = ((!(__pyx_t_3 != 0)) != 0); if (__pyx_t_2) { - /* "fuzzysearch/_generic_search.pyx":45 + /* "fuzzysearch/_generic_search.pyx":55 * raise TypeError('sequence is of invalid type %s' % type(subsequence)) * if not isinstance(subsequence, ALLOWED_TYPES): * raise TypeError('subsequence is of invalid type %s' % type(subsequence)) # <<<<<<<<<<<<<< * * if not subsequence: */ - __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_subsequence_is_of_invalid_type_s, ((PyObject *)Py_TYPE(__pyx_v_subsequence))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_subsequence_is_of_invalid_type_s, ((PyObject *)Py_TYPE(__pyx_v_subsequence))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_Raise(__pyx_t_1, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 45; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "fuzzysearch/_generic_search.pyx":47 + /* "fuzzysearch/_generic_search.pyx":57 * raise TypeError('subsequence is of invalid type %s' % type(subsequence)) * * if not subsequence: # <<<<<<<<<<<<<< * raise ValueError('Given subsequence is empty!') * */ - __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_subsequence); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_subsequence); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_3 = ((!__pyx_t_2) != 0); if (__pyx_t_3) { - /* "fuzzysearch/_generic_search.pyx":48 + /* "fuzzysearch/_generic_search.pyx":58 * * if not subsequence: * raise ValueError('Given subsequence is empty!') # <<<<<<<<<<<<<< * - * c_subsequence = subsequence + * cdef const char *c_subsequence = subsequence */ - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_Raise(__pyx_t_1, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "fuzzysearch/_generic_search.pyx":50 + /* "fuzzysearch/_generic_search.pyx":60 * raise ValueError('Given subsequence is empty!') * - * c_subsequence = subsequence # <<<<<<<<<<<<<< - * c_sequence = sequence + * cdef const char *c_subsequence = subsequence # <<<<<<<<<<<<<< + * cdef const char *c_sequence = sequence * */ - __pyx_t_5 = __Pyx_PyObject_AsString(__pyx_v_subsequence); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 50; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_c_subsequence = ((char *)__pyx_t_5); + __pyx_t_5 = __Pyx_PyObject_AsString(__pyx_v_subsequence); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_c_subsequence = __pyx_t_5; - /* "fuzzysearch/_generic_search.pyx":51 + /* "fuzzysearch/_generic_search.pyx":61 * - * c_subsequence = subsequence - * c_sequence = sequence # <<<<<<<<<<<<<< + * cdef const char *c_subsequence = subsequence + * cdef const char *c_sequence = sequence # <<<<<<<<<<<<<< * * return _c_find_near_matches_generic_linear_programming( */ - __pyx_t_5 = __Pyx_PyObject_AsString(__pyx_v_sequence); if (unlikely((!__pyx_t_5) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_c_sequence = ((char *)__pyx_t_5); + __pyx_t_6 = __Pyx_PyObject_AsString(__pyx_v_sequence); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_c_sequence = __pyx_t_6; - /* "fuzzysearch/_generic_search.pyx":53 - * c_sequence = sequence + /* "fuzzysearch/_generic_search.pyx":63 + * cdef const char *c_sequence = sequence * * return _c_find_near_matches_generic_linear_programming( # <<<<<<<<<<<<<< * c_subsequence, len(subsequence), * c_sequence, len(sequence), */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_c_find_near_matches_generic_lin); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - /* "fuzzysearch/_generic_search.pyx":54 + /* "fuzzysearch/_generic_search.pyx":64 * * return _c_find_near_matches_generic_linear_programming( * c_subsequence, len(subsequence), # <<<<<<<<<<<<<< * c_sequence, len(sequence), * max_substitutions if max_substitutions is not None else (1<<29), */ - __pyx_t_4 = __Pyx_PyBytes_FromString(__pyx_v_c_subsequence); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_6 = PyObject_Length(__pyx_v_subsequence); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_7 = PyInt_FromSsize_t(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 54; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __pyx_t_7 = PyObject_Length(__pyx_v_subsequence); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "fuzzysearch/_generic_search.pyx":55 + /* "fuzzysearch/_generic_search.pyx":65 * return _c_find_near_matches_generic_linear_programming( * c_subsequence, len(subsequence), * c_sequence, len(sequence), # <<<<<<<<<<<<<< * max_substitutions if max_substitutions is not None else (1<<29), * max_insertions if max_insertions is not None else (1<<29), */ - __pyx_t_8 = __Pyx_PyBytes_FromString(__pyx_v_c_sequence); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_6 = PyObject_Length(__pyx_v_sequence); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = PyInt_FromSsize_t(__pyx_t_6); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 55; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_9); + __pyx_t_8 = PyObject_Length(__pyx_v_sequence); if (unlikely(__pyx_t_8 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "fuzzysearch/_generic_search.pyx":56 + /* "fuzzysearch/_generic_search.pyx":66 * c_subsequence, len(subsequence), * c_sequence, len(sequence), * max_substitutions if max_substitutions is not None else (1<<29), # <<<<<<<<<<<<<< @@ -1209,14 +1169,13 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_c_find_near_matches_ge */ __pyx_t_3 = (__pyx_v_max_substitutions != Py_None); if ((__pyx_t_3 != 0)) { - __Pyx_INCREF(__pyx_v_max_substitutions); - __pyx_t_10 = __pyx_v_max_substitutions; + __pyx_t_10 = __Pyx_PyInt_As_unsigned_int(__pyx_v_max_substitutions); if (unlikely((__pyx_t_10 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __pyx_t_10; } else { - __Pyx_INCREF(__pyx_int_536870912); - __pyx_t_10 = __pyx_int_536870912; + __pyx_t_9 = 536870912; } - /* "fuzzysearch/_generic_search.pyx":57 + /* "fuzzysearch/_generic_search.pyx":67 * c_sequence, len(sequence), * max_substitutions if max_substitutions is not None else (1<<29), * max_insertions if max_insertions is not None else (1<<29), # <<<<<<<<<<<<<< @@ -1225,14 +1184,13 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_c_find_near_matches_ge */ __pyx_t_3 = (__pyx_v_max_insertions != Py_None); if ((__pyx_t_3 != 0)) { - __Pyx_INCREF(__pyx_v_max_insertions); - __pyx_t_11 = __pyx_v_max_insertions; + __pyx_t_11 = __Pyx_PyInt_As_unsigned_int(__pyx_v_max_insertions); if (unlikely((__pyx_t_11 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __pyx_t_11; } else { - __Pyx_INCREF(__pyx_int_536870912); - __pyx_t_11 = __pyx_int_536870912; + __pyx_t_10 = 536870912; } - /* "fuzzysearch/_generic_search.pyx":58 + /* "fuzzysearch/_generic_search.pyx":68 * max_substitutions if max_substitutions is not None else (1<<29), * max_insertions if max_insertions is not None else (1<<29), * max_deletions if max_deletions is not None else (1<<29), # <<<<<<<<<<<<<< @@ -1241,14 +1199,13 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_c_find_near_matches_ge */ __pyx_t_3 = (__pyx_v_max_deletions != Py_None); if ((__pyx_t_3 != 0)) { - __Pyx_INCREF(__pyx_v_max_deletions); - __pyx_t_12 = __pyx_v_max_deletions; + __pyx_t_12 = __Pyx_PyInt_As_unsigned_int(__pyx_v_max_deletions); if (unlikely((__pyx_t_12 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = __pyx_t_12; } else { - __Pyx_INCREF(__pyx_int_536870912); - __pyx_t_12 = __pyx_int_536870912; + __pyx_t_11 = 536870912; } - /* "fuzzysearch/_generic_search.pyx":59 + /* "fuzzysearch/_generic_search.pyx":69 * max_insertions if max_insertions is not None else (1<<29), * max_deletions if max_deletions is not None else (1<<29), * max_l_dist if max_l_dist is not None else (1<<29), # <<<<<<<<<<<<<< @@ -1257,55 +1214,26 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_c_find_near_matches_ge */ __pyx_t_3 = (__pyx_v_max_l_dist != Py_None); if ((__pyx_t_3 != 0)) { - __Pyx_INCREF(__pyx_v_max_l_dist); - __pyx_t_13 = __pyx_v_max_l_dist; + __pyx_t_13 = __Pyx_PyInt_As_unsigned_int(__pyx_v_max_l_dist); if (unlikely((__pyx_t_13 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = __pyx_t_13; } else { - __Pyx_INCREF(__pyx_int_536870912); - __pyx_t_13 = __pyx_int_536870912; + __pyx_t_12 = 536870912; } - /* "fuzzysearch/_generic_search.pyx":53 - * c_sequence = sequence + /* "fuzzysearch/_generic_search.pyx":63 + * cdef const char *c_sequence = sequence * * return _c_find_near_matches_generic_linear_programming( # <<<<<<<<<<<<<< * c_subsequence, len(subsequence), * c_sequence, len(sequence), */ - __pyx_t_14 = PyTuple_New(8); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_14); - PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_14, 1, __pyx_t_7); - __Pyx_GIVEREF(__pyx_t_7); - PyTuple_SET_ITEM(__pyx_t_14, 2, __pyx_t_8); - __Pyx_GIVEREF(__pyx_t_8); - PyTuple_SET_ITEM(__pyx_t_14, 3, __pyx_t_9); - __Pyx_GIVEREF(__pyx_t_9); - PyTuple_SET_ITEM(__pyx_t_14, 4, __pyx_t_10); - __Pyx_GIVEREF(__pyx_t_10); - PyTuple_SET_ITEM(__pyx_t_14, 5, __pyx_t_11); - __Pyx_GIVEREF(__pyx_t_11); - PyTuple_SET_ITEM(__pyx_t_14, 6, __pyx_t_12); - __Pyx_GIVEREF(__pyx_t_12); - PyTuple_SET_ITEM(__pyx_t_14, 7, __pyx_t_13); - __Pyx_GIVEREF(__pyx_t_13); - __pyx_t_4 = 0; - __pyx_t_7 = 0; - __pyx_t_8 = 0; - __pyx_t_9 = 0; - __pyx_t_10 = 0; - __pyx_t_11 = 0; - __pyx_t_12 = 0; - __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_14, NULL); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_r = __pyx_t_13; - __pyx_t_13 = 0; + __pyx_t_1 = __pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming(__pyx_v_c_subsequence, __pyx_t_7, __pyx_v_c_sequence, __pyx_t_8, __pyx_t_9, __pyx_t_10, __pyx_t_11, __pyx_t_12); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; goto __pyx_L0; - /* "fuzzysearch/_generic_search.pyx":27 + /* "fuzzysearch/_generic_search.pyx":37 * * * def c_find_near_matches_generic_linear_programming(subsequence, sequence, # <<<<<<<<<<<<<< @@ -1317,14 +1245,6 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_c_find_near_matches_ge __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_8); - __Pyx_XDECREF(__pyx_t_9); - __Pyx_XDECREF(__pyx_t_10); - __Pyx_XDECREF(__pyx_t_11); - __Pyx_XDECREF(__pyx_t_12); - __Pyx_XDECREF(__pyx_t_13); - __Pyx_XDECREF(__pyx_t_14); __Pyx_AddTraceback("fuzzysearch._generic_search.c_find_near_matches_generic_linear_programming", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; @@ -1333,131 +1253,15 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_c_find_near_matches_ge return __pyx_r; } -/* "fuzzysearch/_generic_search.pyx":62 - * ) - * - * def _c_find_near_matches_generic_linear_programming( # <<<<<<<<<<<<<< - * char* subsequence, size_t subseq_len, - * char* sequence, size_t seq_len, +/* "fuzzysearch/_generic_search.pyx":75 + * # subsequence strings, which means if they contain null bytes the data after + * # the first null byte will not be copied. + * cdef _c_find_near_matches_generic_linear_programming( # <<<<<<<<<<<<<< + * const char* subsequence, size_t subseq_len, + * const char* sequence, size_t seq_len, */ -/* Python wrapper */ -static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_3_c_find_near_matches_generic_linear_programming(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_11fuzzysearch_15_generic_search_3_c_find_near_matches_generic_linear_programming = {__Pyx_NAMESTR("_c_find_near_matches_generic_linear_programming"), (PyCFunction)__pyx_pw_11fuzzysearch_15_generic_search_3_c_find_near_matches_generic_linear_programming, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(0)}; -static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_3_c_find_near_matches_generic_linear_programming(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { - char *__pyx_v_subsequence; - size_t __pyx_v_subseq_len; - char *__pyx_v_sequence; - size_t __pyx_v_seq_len; - unsigned int __pyx_v_max_substitutions; - unsigned int __pyx_v_max_insertions; - unsigned int __pyx_v_max_deletions; - unsigned int __pyx_v_max_l_dist; - int __pyx_lineno = 0; - const char *__pyx_filename = NULL; - int __pyx_clineno = 0; - PyObject *__pyx_r = 0; - __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("_c_find_near_matches_generic_linear_programming (wrapper)", 0); - { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_subsequence,&__pyx_n_s_subseq_len,&__pyx_n_s_sequence,&__pyx_n_s_seq_len,&__pyx_n_s_max_substitutions,&__pyx_n_s_max_insertions,&__pyx_n_s_max_deletions,&__pyx_n_s_max_l_dist,0}; - PyObject* values[8] = {0,0,0,0,0,0,0,0}; - if (unlikely(__pyx_kwds)) { - Py_ssize_t kw_args; - const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); - switch (pos_args) { - case 8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7); - case 7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6); - case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); - case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); - case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); - case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - case 0: break; - default: goto __pyx_L5_argtuple_error; - } - kw_args = PyDict_Size(__pyx_kwds); - switch (pos_args) { - case 0: - if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_subsequence)) != 0)) kw_args--; - else goto __pyx_L5_argtuple_error; - case 1: - if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_subseq_len)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("_c_find_near_matches_generic_linear_programming", 1, 8, 8, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sequence)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("_c_find_near_matches_generic_linear_programming", 1, 8, 8, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 3: - if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_seq_len)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("_c_find_near_matches_generic_linear_programming", 1, 8, 8, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 4: - if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_substitutions)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("_c_find_near_matches_generic_linear_programming", 1, 8, 8, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 5: - if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_insertions)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("_c_find_near_matches_generic_linear_programming", 1, 8, 8, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 6: - if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_deletions)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("_c_find_near_matches_generic_linear_programming", 1, 8, 8, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - case 7: - if (likely((values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_l_dist)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("_c_find_near_matches_generic_linear_programming", 1, 8, 8, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - } - if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_c_find_near_matches_generic_linear_programming") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - } else if (PyTuple_GET_SIZE(__pyx_args) != 8) { - goto __pyx_L5_argtuple_error; - } else { - values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - values[3] = PyTuple_GET_ITEM(__pyx_args, 3); - values[4] = PyTuple_GET_ITEM(__pyx_args, 4); - values[5] = PyTuple_GET_ITEM(__pyx_args, 5); - values[6] = PyTuple_GET_ITEM(__pyx_args, 6); - values[7] = PyTuple_GET_ITEM(__pyx_args, 7); - } - __pyx_v_subsequence = __Pyx_PyObject_AsString(values[0]); if (unlikely((!__pyx_v_subsequence) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_subseq_len = __Pyx_PyInt_As_size_t(values[1]); if (unlikely((__pyx_v_subseq_len == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 63; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_sequence = __Pyx_PyObject_AsString(values[2]); if (unlikely((!__pyx_v_sequence) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_seq_len = __Pyx_PyInt_As_size_t(values[3]); if (unlikely((__pyx_v_seq_len == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 64; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_max_substitutions = __Pyx_PyInt_As_unsigned_int(values[4]); if (unlikely((__pyx_v_max_substitutions == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_max_insertions = __Pyx_PyInt_As_unsigned_int(values[5]); if (unlikely((__pyx_v_max_insertions == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_max_deletions = __Pyx_PyInt_As_unsigned_int(values[6]); if (unlikely((__pyx_v_max_deletions == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 67; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_v_max_l_dist = __Pyx_PyInt_As_unsigned_int(values[7]); if (unlikely((__pyx_v_max_l_dist == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 68; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - } - goto __pyx_L4_argument_unpacking_done; - __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("_c_find_near_matches_generic_linear_programming", 1, 8, 8, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L3_error;} - __pyx_L3_error:; - __Pyx_AddTraceback("fuzzysearch._generic_search._c_find_near_matches_generic_linear_programming", __pyx_clineno, __pyx_lineno, __pyx_filename); - __Pyx_RefNannyFinishContext(); - return NULL; - __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_generic_linear_programming(__pyx_self, __pyx_v_subsequence, __pyx_v_subseq_len, __pyx_v_sequence, __pyx_v_seq_len, __pyx_v_max_substitutions, __pyx_v_max_insertions, __pyx_v_max_deletions, __pyx_v_max_l_dist); - - /* function exit code */ - __Pyx_RefNannyFinishContext(); - return __pyx_r; -} - -static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_generic_linear_programming(CYTHON_UNUSED PyObject *__pyx_self, char *__pyx_v_subsequence, size_t __pyx_v_subseq_len, char *__pyx_v_sequence, size_t __pyx_v_seq_len, unsigned int __pyx_v_max_substitutions, unsigned int __pyx_v_max_insertions, unsigned int __pyx_v_max_deletions, unsigned int __pyx_v_max_l_dist) { +static PyObject *__pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming(char const *__pyx_v_subsequence, size_t __pyx_v_subseq_len, char const *__pyx_v_sequence, size_t __pyx_v_seq_len, unsigned int __pyx_v_max_substitutions, unsigned int __pyx_v_max_insertions, unsigned int __pyx_v_max_deletions, unsigned int __pyx_v_max_l_dist) { unsigned int __pyx_v_subseq_len_minus_one; size_t __pyx_v_alloc_size; struct __pyx_t_11fuzzysearch_15_generic_search_GenericSearchCandidate *__pyx_v_candidates; @@ -1469,7 +1273,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ size_t __pyx_v_n_cand; PyObject *__pyx_v_matches = NULL; size_t __pyx_v_index; - char __pyx_v_charchar; + char __pyx_v_seq_char; int __pyx_v_have_realloced; PyObject *__pyx_v_n_skipped = NULL; PyObject *__pyx_r = NULL; @@ -1514,7 +1318,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_c_find_near_matches_generic_linear_programming", 0); - /* "fuzzysearch/_generic_search.pyx":70 + /* "fuzzysearch/_generic_search.pyx":83 * unsigned int max_l_dist, * ): * cdef unsigned int subseq_len_minus_one = subseq_len - 1 # <<<<<<<<<<<<<< @@ -1523,7 +1327,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_subseq_len_minus_one = (__pyx_v_subseq_len - 1); - /* "fuzzysearch/_generic_search.pyx":77 + /* "fuzzysearch/_generic_search.pyx":90 * cdef GenericSearchCandidate* _tmp * cdef GenericSearchCandidate cand * cdef size_t n_candidates = 0 # <<<<<<<<<<<<<< @@ -1532,7 +1336,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_n_candidates = 0; - /* "fuzzysearch/_generic_search.pyx":78 + /* "fuzzysearch/_generic_search.pyx":91 * cdef GenericSearchCandidate cand * cdef size_t n_candidates = 0 * cdef size_t n_new_candidates = 0 # <<<<<<<<<<<<<< @@ -1541,7 +1345,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_n_new_candidates = 0; - /* "fuzzysearch/_generic_search.pyx":81 + /* "fuzzysearch/_generic_search.pyx":94 * cdef size_t n_cand * * alloc_size = min(10, subseq_len * 3 + 1) # <<<<<<<<<<<<<< @@ -1557,7 +1361,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } __pyx_v_alloc_size = __pyx_t_3; - /* "fuzzysearch/_generic_search.pyx":82 + /* "fuzzysearch/_generic_search.pyx":95 * * alloc_size = min(10, subseq_len * 3 + 1) * candidates = malloc(alloc_size * sizeof(GenericSearchCandidate)) # <<<<<<<<<<<<<< @@ -1566,7 +1370,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_candidates = ((struct __pyx_t_11fuzzysearch_15_generic_search_GenericSearchCandidate *)malloc((__pyx_v_alloc_size * (sizeof(struct __pyx_t_11fuzzysearch_15_generic_search_GenericSearchCandidate))))); - /* "fuzzysearch/_generic_search.pyx":83 + /* "fuzzysearch/_generic_search.pyx":96 * alloc_size = min(10, subseq_len * 3 + 1) * candidates = malloc(alloc_size * sizeof(GenericSearchCandidate)) * if candidates is NULL: # <<<<<<<<<<<<<< @@ -1576,17 +1380,17 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = ((__pyx_v_candidates == NULL) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":84 + /* "fuzzysearch/_generic_search.pyx":97 * candidates = malloc(alloc_size * sizeof(GenericSearchCandidate)) * if candidates is NULL: * raise MemoryError() # <<<<<<<<<<<<<< * new_candidates = malloc(alloc_size * sizeof(GenericSearchCandidate)) * if candidates is NULL: */ - PyErr_NoMemory(); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + PyErr_NoMemory(); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "fuzzysearch/_generic_search.pyx":85 + /* "fuzzysearch/_generic_search.pyx":98 * if candidates is NULL: * raise MemoryError() * new_candidates = malloc(alloc_size * sizeof(GenericSearchCandidate)) # <<<<<<<<<<<<<< @@ -1595,7 +1399,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_new_candidates = ((struct __pyx_t_11fuzzysearch_15_generic_search_GenericSearchCandidate *)malloc((__pyx_v_alloc_size * (sizeof(struct __pyx_t_11fuzzysearch_15_generic_search_GenericSearchCandidate))))); - /* "fuzzysearch/_generic_search.pyx":86 + /* "fuzzysearch/_generic_search.pyx":99 * raise MemoryError() * new_candidates = malloc(alloc_size * sizeof(GenericSearchCandidate)) * if candidates is NULL: # <<<<<<<<<<<<<< @@ -1605,7 +1409,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = ((__pyx_v_candidates == NULL) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":87 + /* "fuzzysearch/_generic_search.pyx":100 * new_candidates = malloc(alloc_size * sizeof(GenericSearchCandidate)) * if candidates is NULL: * free(candidates) # <<<<<<<<<<<<<< @@ -1614,73 +1418,73 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ free(__pyx_v_candidates); - /* "fuzzysearch/_generic_search.pyx":88 + /* "fuzzysearch/_generic_search.pyx":101 * if candidates is NULL: * free(candidates) * raise MemoryError() # <<<<<<<<<<<<<< * * matches = [] */ - PyErr_NoMemory(); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + PyErr_NoMemory(); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "fuzzysearch/_generic_search.pyx":90 + /* "fuzzysearch/_generic_search.pyx":103 * raise MemoryError() * * matches = [] # <<<<<<<<<<<<<< * * cdef size_t index */ - __pyx_t_5 = PyList_New(0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 90; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyList_New(0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_5); __pyx_v_matches = ((PyObject*)__pyx_t_5); __pyx_t_5 = 0; - /* "fuzzysearch/_generic_search.pyx":94 - * cdef size_t index - * cdef char charchar + /* "fuzzysearch/_generic_search.pyx":108 + * cdef char seq_char + * * try: # <<<<<<<<<<<<<< * index = 0 * have_realloced = False */ /*try:*/ { - /* "fuzzysearch/_generic_search.pyx":95 - * cdef char charchar + /* "fuzzysearch/_generic_search.pyx":109 + * * try: * index = 0 # <<<<<<<<<<<<<< * have_realloced = False - * for charchar in sequence[:seq_len]: + * for seq_char in sequence[:seq_len]: */ __pyx_v_index = 0; - /* "fuzzysearch/_generic_search.pyx":96 + /* "fuzzysearch/_generic_search.pyx":110 * try: * index = 0 * have_realloced = False # <<<<<<<<<<<<<< - * for charchar in sequence[:seq_len]: + * for seq_char in sequence[:seq_len]: * candidates[n_candidates] = GenericSearchCandidate(index, 0, 0, 0, 0, 0) */ __pyx_v_have_realloced = 0; - /* "fuzzysearch/_generic_search.pyx":97 + /* "fuzzysearch/_generic_search.pyx":111 * index = 0 * have_realloced = False - * for charchar in sequence[:seq_len]: # <<<<<<<<<<<<<< + * for seq_char in sequence[:seq_len]: # <<<<<<<<<<<<<< * candidates[n_candidates] = GenericSearchCandidate(index, 0, 0, 0, 0, 0) * n_candidates += 1 */ - __pyx_t_5 = __Pyx_PyBytes_FromStringAndSize(__pyx_v_sequence + 0, __pyx_v_seq_len - 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_5 = __Pyx_PyBytes_FromStringAndSize(__pyx_v_sequence + 0, __pyx_v_seq_len - 0); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 111; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_5); __pyx_t_7 = PyBytes_AS_STRING(__pyx_t_5); __pyx_t_8 = (__pyx_t_7 + PyBytes_GET_SIZE(__pyx_t_5)); for (__pyx_t_9 = __pyx_t_7; __pyx_t_9 < __pyx_t_8; __pyx_t_9++) { __pyx_t_6 = __pyx_t_9; - __pyx_v_charchar = (__pyx_t_6[0]); + __pyx_v_seq_char = (__pyx_t_6[0]); - /* "fuzzysearch/_generic_search.pyx":98 + /* "fuzzysearch/_generic_search.pyx":112 * have_realloced = False - * for charchar in sequence[:seq_len]: + * for seq_char in sequence[:seq_len]: * candidates[n_candidates] = GenericSearchCandidate(index, 0, 0, 0, 0, 0) # <<<<<<<<<<<<<< * n_candidates += 1 * @@ -1693,8 +1497,8 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.n_dels = 0; (__pyx_v_candidates[__pyx_v_n_candidates]) = __pyx_t_10; - /* "fuzzysearch/_generic_search.pyx":99 - * for charchar in sequence[:seq_len]: + /* "fuzzysearch/_generic_search.pyx":113 + * for seq_char in sequence[:seq_len]: * candidates[n_candidates] = GenericSearchCandidate(index, 0, 0, 0, 0, 0) * n_candidates += 1 # <<<<<<<<<<<<<< * @@ -1702,7 +1506,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_n_candidates = (__pyx_v_n_candidates + 1); - /* "fuzzysearch/_generic_search.pyx":101 + /* "fuzzysearch/_generic_search.pyx":115 * n_candidates += 1 * * for n_cand in xrange(n_candidates): # <<<<<<<<<<<<<< @@ -1713,7 +1517,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_3; __pyx_t_1+=1) { __pyx_v_n_cand = __pyx_t_1; - /* "fuzzysearch/_generic_search.pyx":102 + /* "fuzzysearch/_generic_search.pyx":116 * * for n_cand in xrange(n_candidates): * cand = candidates[n_cand] # <<<<<<<<<<<<<< @@ -1722,7 +1526,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_cand = (__pyx_v_candidates[__pyx_v_n_cand]); - /* "fuzzysearch/_generic_search.pyx":104 + /* "fuzzysearch/_generic_search.pyx":118 * cand = candidates[n_cand] * * if n_new_candidates + 4 > alloc_size: # <<<<<<<<<<<<<< @@ -1732,7 +1536,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = (((__pyx_v_n_new_candidates + 4) > __pyx_v_alloc_size) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":105 + /* "fuzzysearch/_generic_search.pyx":119 * * if n_new_candidates + 4 > alloc_size: * alloc_size *= 2 # <<<<<<<<<<<<<< @@ -1741,7 +1545,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_alloc_size = (__pyx_v_alloc_size * 2); - /* "fuzzysearch/_generic_search.pyx":106 + /* "fuzzysearch/_generic_search.pyx":120 * if n_new_candidates + 4 > alloc_size: * alloc_size *= 2 * _tmp = realloc(new_candidates, alloc_size * sizeof(GenericSearchCandidate)) # <<<<<<<<<<<<<< @@ -1750,7 +1554,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v__tmp = ((struct __pyx_t_11fuzzysearch_15_generic_search_GenericSearchCandidate *)realloc(__pyx_v_new_candidates, (__pyx_v_alloc_size * (sizeof(struct __pyx_t_11fuzzysearch_15_generic_search_GenericSearchCandidate))))); - /* "fuzzysearch/_generic_search.pyx":107 + /* "fuzzysearch/_generic_search.pyx":121 * alloc_size *= 2 * _tmp = realloc(new_candidates, alloc_size * sizeof(GenericSearchCandidate)) * if _tmp is NULL: # <<<<<<<<<<<<<< @@ -1760,17 +1564,17 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = ((__pyx_v__tmp == NULL) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":108 + /* "fuzzysearch/_generic_search.pyx":122 * _tmp = realloc(new_candidates, alloc_size * sizeof(GenericSearchCandidate)) * if _tmp is NULL: * raise MemoryError() # <<<<<<<<<<<<<< * new_candidates = _tmp * have_realloced = True */ - PyErr_NoMemory(); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + PyErr_NoMemory(); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L6_error;} } - /* "fuzzysearch/_generic_search.pyx":109 + /* "fuzzysearch/_generic_search.pyx":123 * if _tmp is NULL: * raise MemoryError() * new_candidates = _tmp # <<<<<<<<<<<<<< @@ -1779,7 +1583,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_new_candidates = __pyx_v__tmp; - /* "fuzzysearch/_generic_search.pyx":110 + /* "fuzzysearch/_generic_search.pyx":124 * raise MemoryError() * new_candidates = _tmp * have_realloced = True # <<<<<<<<<<<<<< @@ -1791,18 +1595,18 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } __pyx_L12:; - /* "fuzzysearch/_generic_search.pyx":113 + /* "fuzzysearch/_generic_search.pyx":127 * * # if this sequence char is the candidate's next expected char - * if charchar == subsequence[cand.subseq_index]: # <<<<<<<<<<<<<< + * if seq_char == subsequence[cand.subseq_index]: # <<<<<<<<<<<<<< * # if reached the end of the subsequence, return a match * if cand.subseq_index == subseq_len_minus_one: */ - __pyx_t_4 = ((__pyx_v_charchar == (__pyx_v_subsequence[__pyx_v_cand.subseq_index])) != 0); + __pyx_t_4 = ((__pyx_v_seq_char == (__pyx_v_subsequence[__pyx_v_cand.subseq_index])) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":115 - * if charchar == subsequence[cand.subseq_index]: + /* "fuzzysearch/_generic_search.pyx":129 + * if seq_char == subsequence[cand.subseq_index]: * # if reached the end of the subsequence, return a match * if cand.subseq_index == subseq_len_minus_one: # <<<<<<<<<<<<<< * matches.append(Match(cand.start, index + 1, cand.l_dist)) @@ -1811,22 +1615,22 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = ((__pyx_v_cand.subseq_index == __pyx_v_subseq_len_minus_one) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":116 + /* "fuzzysearch/_generic_search.pyx":130 * # if reached the end of the subsequence, return a match * if cand.subseq_index == subseq_len_minus_one: * matches.append(Match(cand.start, index + 1, cand.l_dist)) # <<<<<<<<<<<<<< * # otherwise, update the candidate's subseq_index and keep it * else: */ - __pyx_t_11 = __Pyx_GetModuleGlobalName(__pyx_n_s_Match); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_11 = __Pyx_GetModuleGlobalName(__pyx_n_s_Match); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_12 = __Pyx_PyInt_From_int(__pyx_v_cand.start); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_12 = __Pyx_PyInt_From_int(__pyx_v_cand.start); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_12); - __pyx_t_13 = __Pyx_PyInt_FromSize_t((__pyx_v_index + 1)); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyInt_FromSize_t((__pyx_v_index + 1)); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = PyTuple_New(3); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_15 = PyTuple_New(3); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_15); PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_12); __Pyx_GIVEREF(__pyx_t_12); @@ -1837,17 +1641,17 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_12 = 0; __pyx_t_13 = 0; __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyObject_Call(__pyx_t_11, __pyx_t_15, NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = __Pyx_PyObject_Call(__pyx_t_11, __pyx_t_15, NULL); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_16 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_14); if (unlikely(__pyx_t_16 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_16 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_14); if (unlikely(__pyx_t_16 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; goto __pyx_L15; } /*else*/ { - /* "fuzzysearch/_generic_search.pyx":120 + /* "fuzzysearch/_generic_search.pyx":134 * else: * new_candidates[n_new_candidates] = GenericSearchCandidate( * cand.start, cand.subseq_index + 1, # <<<<<<<<<<<<<< @@ -1857,7 +1661,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.start = __pyx_v_cand.start; __pyx_t_10.subseq_index = (__pyx_v_cand.subseq_index + 1); - /* "fuzzysearch/_generic_search.pyx":121 + /* "fuzzysearch/_generic_search.pyx":135 * new_candidates[n_new_candidates] = GenericSearchCandidate( * cand.start, cand.subseq_index + 1, * cand.l_dist, cand.n_subs, # <<<<<<<<<<<<<< @@ -1867,7 +1671,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.l_dist = __pyx_v_cand.l_dist; __pyx_t_10.n_subs = __pyx_v_cand.n_subs; - /* "fuzzysearch/_generic_search.pyx":122 + /* "fuzzysearch/_generic_search.pyx":136 * cand.start, cand.subseq_index + 1, * cand.l_dist, cand.n_subs, * cand.n_ins, cand.n_dels, # <<<<<<<<<<<<<< @@ -1877,7 +1681,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.n_ins = __pyx_v_cand.n_ins; __pyx_t_10.n_dels = __pyx_v_cand.n_dels; - /* "fuzzysearch/_generic_search.pyx":119 + /* "fuzzysearch/_generic_search.pyx":133 * # otherwise, update the candidate's subseq_index and keep it * else: * new_candidates[n_new_candidates] = GenericSearchCandidate( # <<<<<<<<<<<<<< @@ -1886,7 +1690,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ (__pyx_v_new_candidates[__pyx_v_n_new_candidates]) = __pyx_t_10; - /* "fuzzysearch/_generic_search.pyx":124 + /* "fuzzysearch/_generic_search.pyx":138 * cand.n_ins, cand.n_dels, * ) * n_new_candidates += 1 # <<<<<<<<<<<<<< @@ -1900,7 +1704,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } /*else*/ { - /* "fuzzysearch/_generic_search.pyx":131 + /* "fuzzysearch/_generic_search.pyx":145 * # unless this candidate has already skipped the maximum allowed * # number of characters * if cand.l_dist == max_l_dist: # <<<<<<<<<<<<<< @@ -1910,7 +1714,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = ((__pyx_v_cand.l_dist == __pyx_v_max_l_dist) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":132 + /* "fuzzysearch/_generic_search.pyx":146 * # number of characters * if cand.l_dist == max_l_dist: * continue # <<<<<<<<<<<<<< @@ -1920,7 +1724,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ goto __pyx_L10_continue; } - /* "fuzzysearch/_generic_search.pyx":134 + /* "fuzzysearch/_generic_search.pyx":148 * continue * * if cand.n_ins < max_insertions: # <<<<<<<<<<<<<< @@ -1930,7 +1734,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = ((__pyx_v_cand.n_ins < __pyx_v_max_insertions) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":137 + /* "fuzzysearch/_generic_search.pyx":151 * # add a candidate skipping a sequence char * new_candidates[n_new_candidates] = GenericSearchCandidate( * cand.start, cand.subseq_index, # <<<<<<<<<<<<<< @@ -1940,7 +1744,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.start = __pyx_v_cand.start; __pyx_t_10.subseq_index = __pyx_v_cand.subseq_index; - /* "fuzzysearch/_generic_search.pyx":138 + /* "fuzzysearch/_generic_search.pyx":152 * new_candidates[n_new_candidates] = GenericSearchCandidate( * cand.start, cand.subseq_index, * cand.l_dist + 1, cand.n_subs, # <<<<<<<<<<<<<< @@ -1950,7 +1754,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.l_dist = (__pyx_v_cand.l_dist + 1); __pyx_t_10.n_subs = __pyx_v_cand.n_subs; - /* "fuzzysearch/_generic_search.pyx":139 + /* "fuzzysearch/_generic_search.pyx":153 * cand.start, cand.subseq_index, * cand.l_dist + 1, cand.n_subs, * cand.n_ins + 1, cand.n_dels, # <<<<<<<<<<<<<< @@ -1960,7 +1764,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.n_ins = (__pyx_v_cand.n_ins + 1); __pyx_t_10.n_dels = __pyx_v_cand.n_dels; - /* "fuzzysearch/_generic_search.pyx":136 + /* "fuzzysearch/_generic_search.pyx":150 * if cand.n_ins < max_insertions: * # add a candidate skipping a sequence char * new_candidates[n_new_candidates] = GenericSearchCandidate( # <<<<<<<<<<<<<< @@ -1969,7 +1773,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ (__pyx_v_new_candidates[__pyx_v_n_new_candidates]) = __pyx_t_10; - /* "fuzzysearch/_generic_search.pyx":141 + /* "fuzzysearch/_generic_search.pyx":155 * cand.n_ins + 1, cand.n_dels, * ) * n_new_candidates += 1 # <<<<<<<<<<<<<< @@ -1981,7 +1785,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } __pyx_L17:; - /* "fuzzysearch/_generic_search.pyx":143 + /* "fuzzysearch/_generic_search.pyx":157 * n_new_candidates += 1 * * if cand.subseq_index + 1 < subseq_len: # <<<<<<<<<<<<<< @@ -1991,7 +1795,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = (((__pyx_v_cand.subseq_index + 1) < __pyx_v_subseq_len) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":144 + /* "fuzzysearch/_generic_search.pyx":158 * * if cand.subseq_index + 1 < subseq_len: * if cand.n_subs < max_substitutions: # <<<<<<<<<<<<<< @@ -2001,7 +1805,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = ((__pyx_v_cand.n_subs < __pyx_v_max_substitutions) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":148 + /* "fuzzysearch/_generic_search.pyx":162 * # subsequence char * new_candidates[n_new_candidates] = GenericSearchCandidate( * cand.start, cand.subseq_index + 1, # <<<<<<<<<<<<<< @@ -2011,7 +1815,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.start = __pyx_v_cand.start; __pyx_t_10.subseq_index = (__pyx_v_cand.subseq_index + 1); - /* "fuzzysearch/_generic_search.pyx":149 + /* "fuzzysearch/_generic_search.pyx":163 * new_candidates[n_new_candidates] = GenericSearchCandidate( * cand.start, cand.subseq_index + 1, * cand.l_dist + 1, cand.n_subs + 1, # <<<<<<<<<<<<<< @@ -2021,7 +1825,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.l_dist = (__pyx_v_cand.l_dist + 1); __pyx_t_10.n_subs = (__pyx_v_cand.n_subs + 1); - /* "fuzzysearch/_generic_search.pyx":150 + /* "fuzzysearch/_generic_search.pyx":164 * cand.start, cand.subseq_index + 1, * cand.l_dist + 1, cand.n_subs + 1, * cand.n_ins, cand.n_dels, # <<<<<<<<<<<<<< @@ -2031,7 +1835,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.n_ins = __pyx_v_cand.n_ins; __pyx_t_10.n_dels = __pyx_v_cand.n_dels; - /* "fuzzysearch/_generic_search.pyx":147 + /* "fuzzysearch/_generic_search.pyx":161 * # add a candidate skipping both a sequence char and a * # subsequence char * new_candidates[n_new_candidates] = GenericSearchCandidate( # <<<<<<<<<<<<<< @@ -2040,7 +1844,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ (__pyx_v_new_candidates[__pyx_v_n_new_candidates]) = __pyx_t_10; - /* "fuzzysearch/_generic_search.pyx":152 + /* "fuzzysearch/_generic_search.pyx":166 * cand.n_ins, cand.n_dels, * ) * n_new_candidates += 1 # <<<<<<<<<<<<<< @@ -2051,7 +1855,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ goto __pyx_L19; } - /* "fuzzysearch/_generic_search.pyx":153 + /* "fuzzysearch/_generic_search.pyx":167 * ) * n_new_candidates += 1 * elif cand.n_dels < max_deletions and cand.n_ins < max_insertions: # <<<<<<<<<<<<<< @@ -2067,7 +1871,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } if (__pyx_t_18) { - /* "fuzzysearch/_generic_search.pyx":157 + /* "fuzzysearch/_generic_search.pyx":171 * # subsequence char * new_candidates[n_new_candidates] = GenericSearchCandidate( * cand.start, cand.subseq_index + 1, # <<<<<<<<<<<<<< @@ -2077,7 +1881,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.start = __pyx_v_cand.start; __pyx_t_10.subseq_index = (__pyx_v_cand.subseq_index + 1); - /* "fuzzysearch/_generic_search.pyx":158 + /* "fuzzysearch/_generic_search.pyx":172 * new_candidates[n_new_candidates] = GenericSearchCandidate( * cand.start, cand.subseq_index + 1, * cand.l_dist + 1, cand.n_subs, # <<<<<<<<<<<<<< @@ -2087,7 +1891,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.l_dist = (__pyx_v_cand.l_dist + 1); __pyx_t_10.n_subs = __pyx_v_cand.n_subs; - /* "fuzzysearch/_generic_search.pyx":159 + /* "fuzzysearch/_generic_search.pyx":173 * cand.start, cand.subseq_index + 1, * cand.l_dist + 1, cand.n_subs, * cand.n_ins + 1, cand.n_dels + 1, # <<<<<<<<<<<<<< @@ -2097,7 +1901,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_10.n_ins = (__pyx_v_cand.n_ins + 1); __pyx_t_10.n_dels = (__pyx_v_cand.n_dels + 1); - /* "fuzzysearch/_generic_search.pyx":156 + /* "fuzzysearch/_generic_search.pyx":170 * # add a candidate skipping both a sequence char and a * # subsequence char * new_candidates[n_new_candidates] = GenericSearchCandidate( # <<<<<<<<<<<<<< @@ -2106,7 +1910,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ (__pyx_v_new_candidates[__pyx_v_n_new_candidates]) = __pyx_t_10; - /* "fuzzysearch/_generic_search.pyx":161 + /* "fuzzysearch/_generic_search.pyx":175 * cand.n_ins + 1, cand.n_dels + 1, * ) * n_new_candidates += 1 # <<<<<<<<<<<<<< @@ -2121,7 +1925,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } /*else*/ { - /* "fuzzysearch/_generic_search.pyx":165 + /* "fuzzysearch/_generic_search.pyx":179 * # cand.subseq_index == _subseq_len - 1 * if ( * cand.n_subs < max_substitutions or # <<<<<<<<<<<<<< @@ -2131,7 +1935,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_18 = ((__pyx_v_cand.n_subs < __pyx_v_max_substitutions) != 0); if (!__pyx_t_18) { - /* "fuzzysearch/_generic_search.pyx":167 + /* "fuzzysearch/_generic_search.pyx":181 * cand.n_subs < max_substitutions or * ( * cand.n_dels < max_deletions and # <<<<<<<<<<<<<< @@ -2141,7 +1945,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = ((__pyx_v_cand.n_dels < __pyx_v_max_deletions) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":168 + /* "fuzzysearch/_generic_search.pyx":182 * ( * cand.n_dels < max_deletions and * cand.n_ins < max_insertions # <<<<<<<<<<<<<< @@ -2159,22 +1963,22 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":171 + /* "fuzzysearch/_generic_search.pyx":185 * ) * ): * matches.append(Match(cand.start, index + 1, cand.l_dist + 1)) # <<<<<<<<<<<<<< * * # try skipping subsequence chars */ - __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_Match); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = __Pyx_GetModuleGlobalName(__pyx_n_s_Match); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = __Pyx_PyInt_From_int(__pyx_v_cand.start); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_15 = __Pyx_PyInt_From_int(__pyx_v_cand.start); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_11 = __Pyx_PyInt_FromSize_t((__pyx_v_index + 1)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_11 = __Pyx_PyInt_FromSize_t((__pyx_v_index + 1)); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_13 = __Pyx_PyInt_From_long((__pyx_v_cand.l_dist + 1)); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyInt_From_long((__pyx_v_cand.l_dist + 1)); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_12 = PyTuple_New(3); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_12 = PyTuple_New(3); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_12); PyTuple_SET_ITEM(__pyx_t_12, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); @@ -2185,11 +1989,11 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_15 = 0; __pyx_t_11 = 0; __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_14, __pyx_t_12, NULL); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_14, __pyx_t_12, NULL); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; - __pyx_t_16 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_13); if (unlikely(__pyx_t_16 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 171; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_16 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_13); if (unlikely(__pyx_t_16 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 185; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; goto __pyx_L20; } @@ -2197,7 +2001,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } __pyx_L18:; - /* "fuzzysearch/_generic_search.pyx":174 + /* "fuzzysearch/_generic_search.pyx":188 * * # try skipping subsequence chars * for n_skipped in xrange(1, min(max_deletions - cand.n_dels, max_l_dist - cand.l_dist) + 1): # <<<<<<<<<<<<<< @@ -2211,9 +2015,9 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } else { __pyx_t_22 = __pyx_t_21; } - __pyx_t_13 = __Pyx_PyInt_From_long((__pyx_t_22 + 1)); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyInt_From_long((__pyx_t_22 + 1)); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_12 = PyTuple_New(2); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_12 = PyTuple_New(2); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_12); __Pyx_INCREF(__pyx_int_1); PyTuple_SET_ITEM(__pyx_t_12, 0, __pyx_int_1); @@ -2221,14 +2025,14 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ PyTuple_SET_ITEM(__pyx_t_12, 1, __pyx_t_13); __Pyx_GIVEREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_builtin_xrange, __pyx_t_12, NULL); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_builtin_xrange, __pyx_t_12, NULL); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; if (PyList_CheckExact(__pyx_t_13) || PyTuple_CheckExact(__pyx_t_13)) { __pyx_t_12 = __pyx_t_13; __Pyx_INCREF(__pyx_t_12); __pyx_t_23 = 0; __pyx_t_24 = NULL; } else { - __pyx_t_23 = -1; __pyx_t_12 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_23 = -1; __pyx_t_12 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_12); __pyx_t_24 = Py_TYPE(__pyx_t_12)->tp_iternext; } @@ -2237,16 +2041,16 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ if (!__pyx_t_24 && PyList_CheckExact(__pyx_t_12)) { if (__pyx_t_23 >= PyList_GET_SIZE(__pyx_t_12)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_12, __pyx_t_23); __Pyx_INCREF(__pyx_t_13); __pyx_t_23++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = PyList_GET_ITEM(__pyx_t_12, __pyx_t_23); __Pyx_INCREF(__pyx_t_13); __pyx_t_23++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_12, __pyx_t_23); __pyx_t_23++; if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = PySequence_ITEM(__pyx_t_12, __pyx_t_23); __pyx_t_23++; if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} #endif } else if (!__pyx_t_24 && PyTuple_CheckExact(__pyx_t_12)) { if (__pyx_t_23 >= PyTuple_GET_SIZE(__pyx_t_12)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_12, __pyx_t_23); __Pyx_INCREF(__pyx_t_13); __pyx_t_23++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_12, __pyx_t_23); __Pyx_INCREF(__pyx_t_13); __pyx_t_23++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_12, __pyx_t_23); __pyx_t_23++; if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = PySequence_ITEM(__pyx_t_12, __pyx_t_23); __pyx_t_23++; if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} #endif } else { __pyx_t_13 = __pyx_t_24(__pyx_t_12); @@ -2254,7 +2058,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 174; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} } break; } @@ -2263,62 +2067,62 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __Pyx_XDECREF_SET(__pyx_v_n_skipped, __pyx_t_13); __pyx_t_13 = 0; - /* "fuzzysearch/_generic_search.pyx":177 + /* "fuzzysearch/_generic_search.pyx":191 * # if skipping n_dels sub-sequence chars reaches the end * # of the sub-sequence, yield a match * if cand.subseq_index + n_skipped == subseq_len: # <<<<<<<<<<<<<< * matches.append(Match(cand.start, index + 1, * cand.l_dist + n_skipped)) */ - __pyx_t_13 = __Pyx_PyInt_From_int(__pyx_v_cand.subseq_index); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyInt_From_int(__pyx_v_cand.subseq_index); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_n_skipped); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_n_skipped); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyInt_FromSize_t(__pyx_v_subseq_len); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyInt_FromSize_t(__pyx_v_subseq_len); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_11 = PyObject_RichCompare(__pyx_t_14, __pyx_t_13, Py_EQ); __Pyx_XGOTREF(__pyx_t_11); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_11 = PyObject_RichCompare(__pyx_t_14, __pyx_t_13, Py_EQ); __Pyx_XGOTREF(__pyx_t_11); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_11); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 177; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_11); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 191; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":178 + /* "fuzzysearch/_generic_search.pyx":192 * # of the sub-sequence, yield a match * if cand.subseq_index + n_skipped == subseq_len: * matches.append(Match(cand.start, index + 1, # <<<<<<<<<<<<<< * cand.l_dist + n_skipped)) * break */ - __pyx_t_11 = __Pyx_GetModuleGlobalName(__pyx_n_s_Match); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_11 = __Pyx_GetModuleGlobalName(__pyx_n_s_Match); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 192; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_13 = __Pyx_PyInt_From_int(__pyx_v_cand.start); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyInt_From_int(__pyx_v_cand.start); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 192; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = __Pyx_PyInt_FromSize_t((__pyx_v_index + 1)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = __Pyx_PyInt_FromSize_t((__pyx_v_index + 1)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 192; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); - /* "fuzzysearch/_generic_search.pyx":179 + /* "fuzzysearch/_generic_search.pyx":193 * if cand.subseq_index + n_skipped == subseq_len: * matches.append(Match(cand.start, index + 1, * cand.l_dist + n_skipped)) # <<<<<<<<<<<<<< * break * # otherwise, if skipping n_skipped sub-sequence chars */ - __pyx_t_15 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_15 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_25 = PyNumber_Add(__pyx_t_15, __pyx_v_n_skipped); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 179; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_25 = PyNumber_Add(__pyx_t_15, __pyx_v_n_skipped); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 193; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_25); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - /* "fuzzysearch/_generic_search.pyx":178 + /* "fuzzysearch/_generic_search.pyx":192 * # of the sub-sequence, yield a match * if cand.subseq_index + n_skipped == subseq_len: * matches.append(Match(cand.start, index + 1, # <<<<<<<<<<<<<< * cand.l_dist + n_skipped)) * break */ - __pyx_t_15 = PyTuple_New(3); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_15 = PyTuple_New(3); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 192; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_15); PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_13); __Pyx_GIVEREF(__pyx_t_13); @@ -2329,14 +2133,14 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_13 = 0; __pyx_t_14 = 0; __pyx_t_25 = 0; - __pyx_t_25 = __Pyx_PyObject_Call(__pyx_t_11, __pyx_t_15, NULL); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_25 = __Pyx_PyObject_Call(__pyx_t_11, __pyx_t_15, NULL); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 192; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_25); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_16 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_25); if (unlikely(__pyx_t_16 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 178; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_16 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_25); if (unlikely(__pyx_t_16 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 192; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_25); __pyx_t_25 = 0; - /* "fuzzysearch/_generic_search.pyx":180 + /* "fuzzysearch/_generic_search.pyx":194 * matches.append(Match(cand.start, index + 1, * cand.l_dist + n_skipped)) * break # <<<<<<<<<<<<<< @@ -2346,82 +2150,82 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ goto __pyx_L22_break; } - /* "fuzzysearch/_generic_search.pyx":184 + /* "fuzzysearch/_generic_search.pyx":198 * # reaches a sub-sequence char identical to this sequence * # char ... - * elif charchar == subsequence[cand.subseq_index + n_skipped]: # <<<<<<<<<<<<<< + * elif seq_char == subsequence[cand.subseq_index + n_skipped]: # <<<<<<<<<<<<<< * # if this is the last char of the sub-sequence, yield * # a match */ - __pyx_t_25 = __Pyx_PyInt_From_int(__pyx_v_cand.subseq_index); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_25 = __Pyx_PyInt_From_int(__pyx_v_cand.subseq_index); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_25); - __pyx_t_15 = PyNumber_Add(__pyx_t_25, __pyx_v_n_skipped); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_15 = PyNumber_Add(__pyx_t_25, __pyx_v_n_skipped); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_25); __pyx_t_25 = 0; - __pyx_t_26 = __Pyx_PyIndex_AsSsize_t(__pyx_t_15); if (unlikely((__pyx_t_26 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 184; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_26 = __Pyx_PyIndex_AsSsize_t(__pyx_t_15); if (unlikely((__pyx_t_26 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 198; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_4 = ((__pyx_v_charchar == (__pyx_v_subsequence[__pyx_t_26])) != 0); + __pyx_t_4 = ((__pyx_v_seq_char == (__pyx_v_subsequence[__pyx_t_26])) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":187 + /* "fuzzysearch/_generic_search.pyx":201 * # if this is the last char of the sub-sequence, yield * # a match * if cand.subseq_index + n_skipped + 1 == subseq_len: # <<<<<<<<<<<<<< * matches.append(Match(cand.start, index + 1, * cand.l_dist + n_skipped)) */ - __pyx_t_15 = __Pyx_PyInt_From_int(__pyx_v_cand.subseq_index); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_15 = __Pyx_PyInt_From_int(__pyx_v_cand.subseq_index); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_15); - __pyx_t_25 = PyNumber_Add(__pyx_t_15, __pyx_v_n_skipped); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_25 = PyNumber_Add(__pyx_t_15, __pyx_v_n_skipped); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_25); __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_15 = PyNumber_Add(__pyx_t_25, __pyx_int_1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_15 = PyNumber_Add(__pyx_t_25, __pyx_int_1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_15); __Pyx_DECREF(__pyx_t_25); __pyx_t_25 = 0; - __pyx_t_25 = __Pyx_PyInt_FromSize_t(__pyx_v_subseq_len); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_25 = __Pyx_PyInt_FromSize_t(__pyx_v_subseq_len); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_25); - __pyx_t_11 = PyObject_RichCompare(__pyx_t_15, __pyx_t_25, Py_EQ); __Pyx_XGOTREF(__pyx_t_11); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_11 = PyObject_RichCompare(__pyx_t_15, __pyx_t_25, Py_EQ); __Pyx_XGOTREF(__pyx_t_11); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; __Pyx_DECREF(__pyx_t_25); __pyx_t_25 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_11); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 187; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_11); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 201; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":188 + /* "fuzzysearch/_generic_search.pyx":202 * # a match * if cand.subseq_index + n_skipped + 1 == subseq_len: * matches.append(Match(cand.start, index + 1, # <<<<<<<<<<<<<< * cand.l_dist + n_skipped)) * # otherwise add a candidate skipping n_skipped */ - __pyx_t_11 = __Pyx_GetModuleGlobalName(__pyx_n_s_Match); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_11 = __Pyx_GetModuleGlobalName(__pyx_n_s_Match); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_11); - __pyx_t_25 = __Pyx_PyInt_From_int(__pyx_v_cand.start); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_25 = __Pyx_PyInt_From_int(__pyx_v_cand.start); if (unlikely(!__pyx_t_25)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_25); - __pyx_t_15 = __Pyx_PyInt_FromSize_t((__pyx_v_index + 1)); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_15 = __Pyx_PyInt_FromSize_t((__pyx_v_index + 1)); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_15); - /* "fuzzysearch/_generic_search.pyx":189 + /* "fuzzysearch/_generic_search.pyx":203 * if cand.subseq_index + n_skipped + 1 == subseq_len: * matches.append(Match(cand.start, index + 1, * cand.l_dist + n_skipped)) # <<<<<<<<<<<<<< * # otherwise add a candidate skipping n_skipped * # subsequence chars */ - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 203; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = PyNumber_Add(__pyx_t_14, __pyx_v_n_skipped); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 189; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = PyNumber_Add(__pyx_t_14, __pyx_v_n_skipped); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 203; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - /* "fuzzysearch/_generic_search.pyx":188 + /* "fuzzysearch/_generic_search.pyx":202 * # a match * if cand.subseq_index + n_skipped + 1 == subseq_len: * matches.append(Match(cand.start, index + 1, # <<<<<<<<<<<<<< * cand.l_dist + n_skipped)) * # otherwise add a candidate skipping n_skipped */ - __pyx_t_14 = PyTuple_New(3); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = PyTuple_New(3); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); PyTuple_SET_ITEM(__pyx_t_14, 0, __pyx_t_25); __Pyx_GIVEREF(__pyx_t_25); @@ -2432,17 +2236,17 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_25 = 0; __pyx_t_15 = 0; __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_11, __pyx_t_14, NULL); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_11, __pyx_t_14, NULL); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_16 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_13); if (unlikely(__pyx_t_16 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 188; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_16 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_13); if (unlikely(__pyx_t_16 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 202; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; goto __pyx_L24; } /*else*/ { - /* "fuzzysearch/_generic_search.pyx":194 + /* "fuzzysearch/_generic_search.pyx":208 * else: * new_candidates[n_new_candidates] = GenericSearchCandidate( * cand.start, cand.subseq_index + 1 + n_skipped, # <<<<<<<<<<<<<< @@ -2450,33 +2254,33 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ * cand.n_ins, cand.n_dels + n_skipped, */ __pyx_t_10.start = __pyx_v_cand.start; - __pyx_t_13 = __Pyx_PyInt_From_long((__pyx_v_cand.subseq_index + 1)); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyInt_From_long((__pyx_v_cand.subseq_index + 1)); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_n_skipped); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_n_skipped); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_27 = __Pyx_PyInt_As_int(__pyx_t_14); if (unlikely((__pyx_t_27 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 194; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_27 = __Pyx_PyInt_As_int(__pyx_t_14); if (unlikely((__pyx_t_27 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 208; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_10.subseq_index = __pyx_t_27; - /* "fuzzysearch/_generic_search.pyx":195 + /* "fuzzysearch/_generic_search.pyx":209 * new_candidates[n_new_candidates] = GenericSearchCandidate( * cand.start, cand.subseq_index + 1 + n_skipped, * cand.l_dist + n_skipped, cand.n_subs, # <<<<<<<<<<<<<< * cand.n_ins, cand.n_dels + n_skipped, * ) */ - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 209; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_13 = PyNumber_Add(__pyx_t_14, __pyx_v_n_skipped); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = PyNumber_Add(__pyx_t_14, __pyx_v_n_skipped); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 209; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_27 = __Pyx_PyInt_As_int(__pyx_t_13); if (unlikely((__pyx_t_27 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 195; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_27 = __Pyx_PyInt_As_int(__pyx_t_13); if (unlikely((__pyx_t_27 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 209; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __pyx_t_10.l_dist = __pyx_t_27; __pyx_t_10.n_subs = __pyx_v_cand.n_subs; - /* "fuzzysearch/_generic_search.pyx":196 + /* "fuzzysearch/_generic_search.pyx":210 * cand.start, cand.subseq_index + 1 + n_skipped, * cand.l_dist + n_skipped, cand.n_subs, * cand.n_ins, cand.n_dels + n_skipped, # <<<<<<<<<<<<<< @@ -2484,16 +2288,16 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ * n_new_candidates += 1 */ __pyx_t_10.n_ins = __pyx_v_cand.n_ins; - __pyx_t_13 = __Pyx_PyInt_From_int(__pyx_v_cand.n_dels); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyInt_From_int(__pyx_v_cand.n_dels); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_n_skipped); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = PyNumber_Add(__pyx_t_13, __pyx_v_n_skipped); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_27 = __Pyx_PyInt_As_int(__pyx_t_14); if (unlikely((__pyx_t_27 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 196; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_27 = __Pyx_PyInt_As_int(__pyx_t_14); if (unlikely((__pyx_t_27 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 210; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; __pyx_t_10.n_dels = __pyx_t_27; - /* "fuzzysearch/_generic_search.pyx":193 + /* "fuzzysearch/_generic_search.pyx":207 * # subsequence chars * else: * new_candidates[n_new_candidates] = GenericSearchCandidate( # <<<<<<<<<<<<<< @@ -2502,7 +2306,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ (__pyx_v_new_candidates[__pyx_v_n_new_candidates]) = __pyx_t_10; - /* "fuzzysearch/_generic_search.pyx":198 + /* "fuzzysearch/_generic_search.pyx":212 * cand.n_ins, cand.n_dels + n_skipped, * ) * n_new_candidates += 1 # <<<<<<<<<<<<<< @@ -2513,7 +2317,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } __pyx_L24:; - /* "fuzzysearch/_generic_search.pyx":199 + /* "fuzzysearch/_generic_search.pyx":213 * ) * n_new_candidates += 1 * break # <<<<<<<<<<<<<< @@ -2530,7 +2334,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_L10_continue:; } - /* "fuzzysearch/_generic_search.pyx":205 + /* "fuzzysearch/_generic_search.pyx":219 * * # new_candidates = candidates; candidates = [] * _tmp = candidates # <<<<<<<<<<<<<< @@ -2539,7 +2343,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v__tmp = __pyx_v_candidates; - /* "fuzzysearch/_generic_search.pyx":206 + /* "fuzzysearch/_generic_search.pyx":220 * # new_candidates = candidates; candidates = [] * _tmp = candidates * candidates = new_candidates # <<<<<<<<<<<<<< @@ -2548,7 +2352,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_candidates = __pyx_v_new_candidates; - /* "fuzzysearch/_generic_search.pyx":207 + /* "fuzzysearch/_generic_search.pyx":221 * _tmp = candidates * candidates = new_candidates * new_candidates = _tmp # <<<<<<<<<<<<<< @@ -2557,7 +2361,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_new_candidates = __pyx_v__tmp; - /* "fuzzysearch/_generic_search.pyx":208 + /* "fuzzysearch/_generic_search.pyx":222 * candidates = new_candidates * new_candidates = _tmp * n_candidates = n_new_candidates # <<<<<<<<<<<<<< @@ -2566,7 +2370,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_n_candidates = __pyx_v_n_new_candidates; - /* "fuzzysearch/_generic_search.pyx":209 + /* "fuzzysearch/_generic_search.pyx":223 * new_candidates = _tmp * n_candidates = n_new_candidates * n_new_candidates = 0 # <<<<<<<<<<<<<< @@ -2575,7 +2379,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_n_new_candidates = 0; - /* "fuzzysearch/_generic_search.pyx":211 + /* "fuzzysearch/_generic_search.pyx":225 * n_new_candidates = 0 * * if have_realloced: # <<<<<<<<<<<<<< @@ -2585,7 +2389,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = (__pyx_v_have_realloced != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":212 + /* "fuzzysearch/_generic_search.pyx":226 * * if have_realloced: * have_realloced = False # <<<<<<<<<<<<<< @@ -2594,7 +2398,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_have_realloced = 0; - /* "fuzzysearch/_generic_search.pyx":213 + /* "fuzzysearch/_generic_search.pyx":227 * if have_realloced: * have_realloced = False * _tmp = realloc(new_candidates, alloc_size * sizeof(GenericSearchCandidate)) # <<<<<<<<<<<<<< @@ -2603,7 +2407,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v__tmp = ((struct __pyx_t_11fuzzysearch_15_generic_search_GenericSearchCandidate *)realloc(__pyx_v_new_candidates, (__pyx_v_alloc_size * (sizeof(struct __pyx_t_11fuzzysearch_15_generic_search_GenericSearchCandidate))))); - /* "fuzzysearch/_generic_search.pyx":214 + /* "fuzzysearch/_generic_search.pyx":228 * have_realloced = False * _tmp = realloc(new_candidates, alloc_size * sizeof(GenericSearchCandidate)) * if _tmp is NULL: # <<<<<<<<<<<<<< @@ -2613,17 +2417,17 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_4 = ((__pyx_v__tmp == NULL) != 0); if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":215 + /* "fuzzysearch/_generic_search.pyx":229 * _tmp = realloc(new_candidates, alloc_size * sizeof(GenericSearchCandidate)) * if _tmp is NULL: * raise MemoryError() # <<<<<<<<<<<<<< * new_candidates = _tmp * */ - PyErr_NoMemory(); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 215; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + PyErr_NoMemory(); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 229; __pyx_clineno = __LINE__; goto __pyx_L6_error;} } - /* "fuzzysearch/_generic_search.pyx":216 + /* "fuzzysearch/_generic_search.pyx":230 * if _tmp is NULL: * raise MemoryError() * new_candidates = _tmp # <<<<<<<<<<<<<< @@ -2635,7 +2439,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } __pyx_L25:; - /* "fuzzysearch/_generic_search.pyx":218 + /* "fuzzysearch/_generic_search.pyx":232 * new_candidates = _tmp * * index += 1 # <<<<<<<<<<<<<< @@ -2646,7 +2450,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "fuzzysearch/_generic_search.pyx":220 + /* "fuzzysearch/_generic_search.pyx":234 * index += 1 * * for n_cand in xrange(n_candidates): # <<<<<<<<<<<<<< @@ -2657,7 +2461,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_3; __pyx_t_1+=1) { __pyx_v_n_cand = __pyx_t_1; - /* "fuzzysearch/_generic_search.pyx":221 + /* "fuzzysearch/_generic_search.pyx":235 * * for n_cand in xrange(n_candidates): * cand = candidates[n_cand] # <<<<<<<<<<<<<< @@ -2666,57 +2470,57 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ __pyx_v_cand = (__pyx_v_candidates[__pyx_v_n_cand]); - /* "fuzzysearch/_generic_search.pyx":223 + /* "fuzzysearch/_generic_search.pyx":237 * cand = candidates[n_cand] * # note: index == length(sequence) * n_skipped = subseq_len - cand.subseq_index # <<<<<<<<<<<<<< * if cand.n_dels + n_skipped <= max_deletions and \ * cand.l_dist + n_skipped <= max_l_dist: */ - __pyx_t_5 = __Pyx_PyInt_FromSize_t((__pyx_v_subseq_len - __pyx_v_cand.subseq_index)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 223; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_5 = __Pyx_PyInt_FromSize_t((__pyx_v_subseq_len - __pyx_v_cand.subseq_index)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 237; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_XDECREF_SET(__pyx_v_n_skipped, __pyx_t_5); __pyx_t_5 = 0; - /* "fuzzysearch/_generic_search.pyx":224 + /* "fuzzysearch/_generic_search.pyx":238 * # note: index == length(sequence) * n_skipped = subseq_len - cand.subseq_index * if cand.n_dels + n_skipped <= max_deletions and \ # <<<<<<<<<<<<<< * cand.l_dist + n_skipped <= max_l_dist: * matches.append(Match(cand.start, index, cand.l_dist + n_skipped)) */ - __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_cand.n_dels); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 224; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_5 = __Pyx_PyInt_From_int(__pyx_v_cand.n_dels); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 238; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_12 = PyNumber_Add(__pyx_t_5, __pyx_v_n_skipped); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 224; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_12 = PyNumber_Add(__pyx_t_5, __pyx_v_n_skipped); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 238; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_12); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = __Pyx_PyInt_From_unsigned_int(__pyx_v_max_deletions); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 224; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_5 = __Pyx_PyInt_From_unsigned_int(__pyx_v_max_deletions); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 238; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_14 = PyObject_RichCompare(__pyx_t_12, __pyx_t_5, Py_LE); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 224; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = PyObject_RichCompare(__pyx_t_12, __pyx_t_5, Py_LE); __Pyx_XGOTREF(__pyx_t_14); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 238; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 224; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_14); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 238; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; if (__pyx_t_4) { - /* "fuzzysearch/_generic_search.pyx":225 + /* "fuzzysearch/_generic_search.pyx":239 * n_skipped = subseq_len - cand.subseq_index * if cand.n_dels + n_skipped <= max_deletions and \ * cand.l_dist + n_skipped <= max_l_dist: # <<<<<<<<<<<<<< * matches.append(Match(cand.start, index, cand.l_dist + n_skipped)) * */ - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 239; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_5 = PyNumber_Add(__pyx_t_14, __pyx_v_n_skipped); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_5 = PyNumber_Add(__pyx_t_14, __pyx_v_n_skipped); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 239; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_14 = __Pyx_PyInt_From_unsigned_int(__pyx_v_max_l_dist); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = __Pyx_PyInt_From_unsigned_int(__pyx_v_max_l_dist); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 239; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_12 = PyObject_RichCompare(__pyx_t_5, __pyx_t_14, Py_LE); __Pyx_XGOTREF(__pyx_t_12); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_12 = PyObject_RichCompare(__pyx_t_5, __pyx_t_14, Py_LE); __Pyx_XGOTREF(__pyx_t_12); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 239; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_14); __pyx_t_14 = 0; - __pyx_t_18 = __Pyx_PyObject_IsTrue(__pyx_t_12); if (unlikely(__pyx_t_18 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_18 = __Pyx_PyObject_IsTrue(__pyx_t_12); if (unlikely(__pyx_t_18 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 239; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __pyx_t_19 = __pyx_t_18; } else { @@ -2724,25 +2528,25 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } if (__pyx_t_19) { - /* "fuzzysearch/_generic_search.pyx":226 + /* "fuzzysearch/_generic_search.pyx":240 * if cand.n_dels + n_skipped <= max_deletions and \ * cand.l_dist + n_skipped <= max_l_dist: * matches.append(Match(cand.start, index, cand.l_dist + n_skipped)) # <<<<<<<<<<<<<< * * finally: */ - __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_Match); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_Match); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 240; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_12); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_cand.start); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_cand.start); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 240; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_14); - __pyx_t_5 = __Pyx_PyInt_FromSize_t(__pyx_v_index); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_5 = __Pyx_PyInt_FromSize_t(__pyx_v_index); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 240; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_5); - __pyx_t_13 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = __Pyx_PyInt_From_int(__pyx_v_cand.l_dist); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 240; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); - __pyx_t_11 = PyNumber_Add(__pyx_t_13, __pyx_v_n_skipped); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_11 = PyNumber_Add(__pyx_t_13, __pyx_v_n_skipped); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 240; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PyTuple_New(3); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_13 = PyTuple_New(3); if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 240; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_13); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_14); __Pyx_GIVEREF(__pyx_t_14); @@ -2753,11 +2557,11 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_14 = 0; __pyx_t_5 = 0; __pyx_t_11 = 0; - __pyx_t_11 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_t_13, NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_11 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_t_13, NULL); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 240; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_GOTREF(__pyx_t_11); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_16 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_11); if (unlikely(__pyx_t_16 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L6_error;} + __pyx_t_16 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_11); if (unlikely(__pyx_t_16 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 240; __pyx_clineno = __LINE__; goto __pyx_L6_error;} __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; goto __pyx_L29; } @@ -2765,7 +2569,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ } } - /* "fuzzysearch/_generic_search.pyx":229 + /* "fuzzysearch/_generic_search.pyx":243 * * finally: * free(candidates) # <<<<<<<<<<<<<< @@ -2776,7 +2580,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ /*normal exit:*/{ free(__pyx_v_candidates); - /* "fuzzysearch/_generic_search.pyx":230 + /* "fuzzysearch/_generic_search.pyx":244 * finally: * free(candidates) * free(new_candidates) # <<<<<<<<<<<<<< @@ -2807,7 +2611,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_t_27 = __pyx_lineno; __pyx_t_28 = __pyx_clineno; __pyx_t_29 = __pyx_filename; { - /* "fuzzysearch/_generic_search.pyx":229 + /* "fuzzysearch/_generic_search.pyx":243 * * finally: * free(candidates) # <<<<<<<<<<<<<< @@ -2816,7 +2620,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ free(__pyx_v_candidates); - /* "fuzzysearch/_generic_search.pyx":230 + /* "fuzzysearch/_generic_search.pyx":244 * finally: * free(candidates) * free(new_candidates) # <<<<<<<<<<<<<< @@ -2842,7 +2646,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_L7:; } - /* "fuzzysearch/_generic_search.pyx":232 + /* "fuzzysearch/_generic_search.pyx":246 * free(new_candidates) * * return matches # <<<<<<<<<<<<<< @@ -2854,12 +2658,12 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __pyx_r = __pyx_v_matches; goto __pyx_L0; - /* "fuzzysearch/_generic_search.pyx":62 - * ) - * - * def _c_find_near_matches_generic_linear_programming( # <<<<<<<<<<<<<< - * char* subsequence, size_t subseq_len, - * char* sequence, size_t seq_len, + /* "fuzzysearch/_generic_search.pyx":75 + * # subsequence strings, which means if they contain null bytes the data after + * # the first null byte will not be copied. + * cdef _c_find_near_matches_generic_linear_programming( # <<<<<<<<<<<<<< + * const char* subsequence, size_t subseq_len, + * const char* sequence, size_t seq_len, */ /* function exit code */ @@ -2872,7 +2676,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ __Pyx_XDECREF(__pyx_t_15); __Pyx_XDECREF(__pyx_t_25); __Pyx_AddTraceback("fuzzysearch._generic_search._c_find_near_matches_generic_linear_programming", __pyx_clineno, __pyx_lineno, __pyx_filename); - __pyx_r = NULL; + __pyx_r = 0; __pyx_L0:; __Pyx_XDECREF(__pyx_v_matches); __Pyx_XDECREF(__pyx_v_n_skipped); @@ -2881,7 +2685,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ return __pyx_r; } -/* "fuzzysearch/_generic_search.pyx":236 +/* "fuzzysearch/_generic_search.pyx":250 * * * def c_find_near_matches_generic_ngrams(subsequence, sequence, # <<<<<<<<<<<<<< @@ -2890,10 +2694,10 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2_c_find_near_matches_ */ /* Python wrapper */ -static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_5c_find_near_matches_generic_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_11fuzzysearch_15_generic_search_4c_find_near_matches_generic_ngrams[] = "search for near-matches of subsequence in sequence\n\n This searches for near-matches, where the nearly-matching parts of the\n sequence must meet the following limitations (relative to the subsequence):\n\n * the maximum allowed number of character substitutions\n * the maximum allowed number of new characters inserted\n * and the maximum allowed number of character deletions\n * the total number of substitutions, insertions and deletions\n "; -static PyMethodDef __pyx_mdef_11fuzzysearch_15_generic_search_5c_find_near_matches_generic_ngrams = {__Pyx_NAMESTR("c_find_near_matches_generic_ngrams"), (PyCFunction)__pyx_pw_11fuzzysearch_15_generic_search_5c_find_near_matches_generic_ngrams, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_11fuzzysearch_15_generic_search_4c_find_near_matches_generic_ngrams)}; -static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_5c_find_near_matches_generic_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { +static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_3c_find_near_matches_generic_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static char __pyx_doc_11fuzzysearch_15_generic_search_2c_find_near_matches_generic_ngrams[] = "search for near-matches of subsequence in sequence\n\n This searches for near-matches, where the nearly-matching parts of the\n sequence must meet the following limitations (relative to the subsequence):\n\n * the maximum allowed number of character substitutions\n * the maximum allowed number of new characters inserted\n * and the maximum allowed number of character deletions\n * the total number of substitutions, insertions and deletions\n "; +static PyMethodDef __pyx_mdef_11fuzzysearch_15_generic_search_3c_find_near_matches_generic_ngrams = {__Pyx_NAMESTR("c_find_near_matches_generic_ngrams"), (PyCFunction)__pyx_pw_11fuzzysearch_15_generic_search_3c_find_near_matches_generic_ngrams, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_11fuzzysearch_15_generic_search_2c_find_near_matches_generic_ngrams)}; +static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_3c_find_near_matches_generic_ngrams(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_subsequence = 0; PyObject *__pyx_v_sequence = 0; PyObject *__pyx_v_max_substitutions = 0; @@ -2910,7 +2714,7 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_5c_find_near_matches_g static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_subsequence,&__pyx_n_s_sequence,&__pyx_n_s_max_substitutions,&__pyx_n_s_max_insertions,&__pyx_n_s_max_deletions,&__pyx_n_s_max_l_dist,0}; PyObject* values[6] = {0,0,0,0,0,0}; - /* "fuzzysearch/_generic_search.pyx":238 + /* "fuzzysearch/_generic_search.pyx":252 * def c_find_near_matches_generic_ngrams(subsequence, sequence, * max_substitutions, max_insertions, * max_deletions, max_l_dist=None): # <<<<<<<<<<<<<< @@ -2939,22 +2743,22 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_5c_find_near_matches_g case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sequence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_ngrams", 0, 5, 6, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 236; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_ngrams", 0, 5, 6, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_substitutions)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_ngrams", 0, 5, 6, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 236; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_ngrams", 0, 5, 6, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_insertions)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_ngrams", 0, 5, 6, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 236; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_ngrams", 0, 5, 6, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_max_deletions)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_ngrams", 0, 5, 6, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 236; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_ngrams", 0, 5, 6, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: if (kw_args > 0) { @@ -2963,7 +2767,7 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_5c_find_near_matches_g } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "c_find_near_matches_generic_ngrams") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 236; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "c_find_near_matches_generic_ngrams") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -2986,15 +2790,15 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_5c_find_near_matches_g } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_ngrams", 0, 5, 6, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 236; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("c_find_near_matches_generic_ngrams", 0, 5, 6, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("fuzzysearch._generic_search.c_find_near_matches_generic_ngrams", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_generic_ngrams(__pyx_self, __pyx_v_subsequence, __pyx_v_sequence, __pyx_v_max_substitutions, __pyx_v_max_insertions, __pyx_v_max_deletions, __pyx_v_max_l_dist); + __pyx_r = __pyx_pf_11fuzzysearch_15_generic_search_2c_find_near_matches_generic_ngrams(__pyx_self, __pyx_v_subsequence, __pyx_v_sequence, __pyx_v_max_substitutions, __pyx_v_max_insertions, __pyx_v_max_deletions, __pyx_v_max_l_dist); - /* "fuzzysearch/_generic_search.pyx":236 + /* "fuzzysearch/_generic_search.pyx":250 * * * def c_find_near_matches_generic_ngrams(subsequence, sequence, # <<<<<<<<<<<<<< @@ -3007,7 +2811,7 @@ static PyObject *__pyx_pw_11fuzzysearch_15_generic_search_5c_find_near_matches_g return __pyx_r; } -static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_generic_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_subsequence, PyObject *__pyx_v_sequence, PyObject *__pyx_v_max_substitutions, PyObject *__pyx_v_max_insertions, PyObject *__pyx_v_max_deletions, PyObject *__pyx_v_max_l_dist) { +static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_2c_find_near_matches_generic_ngrams(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_subsequence, PyObject *__pyx_v_sequence, PyObject *__pyx_v_max_substitutions, PyObject *__pyx_v_max_insertions, PyObject *__pyx_v_max_deletions, PyObject *__pyx_v_max_l_dist) { size_t __pyx_v__subseq_len; CYTHON_UNUSED size_t __pyx_v__subseq_len_minus_one; size_t __pyx_v__seq_len; @@ -3015,14 +2819,14 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g unsigned int __pyx_v_c_max_insertions; unsigned int __pyx_v_c_max_deletions; unsigned int __pyx_v_c_max_l_dist; - char *__pyx_v_c_sequence; - char *__pyx_v_c_subsequence; - char *__pyx_v_ngram_str; + char const *__pyx_v_c_sequence; + char const *__pyx_v_c_subsequence; size_t __pyx_v_ngram_len; - int __pyx_v_index; int __pyx_v_small_search_start_index; size_t __pyx_v_ngram_start; - char *__pyx_v_match_ptr; + char const *__pyx_v_match_ptr; + int *__pyx_v_kmpNext; + struct KMPstate __pyx_v_kmp_state; PyObject *__pyx_v_matches = NULL; PyObject *__pyx_v_small_search_length = NULL; PyObject *__pyx_v_match = NULL; @@ -3037,141 +2841,139 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g unsigned int __pyx_t_7; PyObject *__pyx_t_8 = NULL; PyObject *__pyx_t_9 = NULL; - char *__pyx_t_10; - long __pyx_t_11; - PyObject *(*__pyx_t_12)(PyObject *); - size_t __pyx_t_13; - PyObject *__pyx_t_14 = NULL; - PyObject *__pyx_t_15 = NULL; - PyObject *__pyx_t_16 = NULL; + char const *__pyx_t_10; + char const *__pyx_t_11; + long __pyx_t_12; + PyObject *(*__pyx_t_13)(PyObject *); + size_t __pyx_t_14; + Py_ssize_t __pyx_t_15; + PyObject *(*__pyx_t_16)(PyObject *); PyObject *__pyx_t_17 = NULL; PyObject *__pyx_t_18 = NULL; PyObject *__pyx_t_19 = NULL; - Py_ssize_t __pyx_t_20; - PyObject *(*__pyx_t_21)(PyObject *); + int __pyx_t_20; + int __pyx_t_21; int __pyx_t_22; - int __pyx_t_23; - int __pyx_t_24; - char const *__pyx_t_25; + char const *__pyx_t_23; + PyObject *__pyx_t_24 = NULL; + PyObject *__pyx_t_25 = NULL; PyObject *__pyx_t_26 = NULL; PyObject *__pyx_t_27 = NULL; PyObject *__pyx_t_28 = NULL; PyObject *__pyx_t_29 = NULL; - PyObject *__pyx_t_30 = NULL; - PyObject *__pyx_t_31 = NULL; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("c_find_near_matches_generic_ngrams", 0); - /* "fuzzysearch/_generic_search.pyx":249 + /* "fuzzysearch/_generic_search.pyx":263 * * the total number of substitutions, insertions and deletions * """ * if not isinstance(sequence, ALLOWED_TYPES): # <<<<<<<<<<<<<< * raise TypeError('sequence is of invalid type %s' % type(subsequence)) * if not isinstance(subsequence, ALLOWED_TYPES): */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_ALLOWED_TYPES); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 249; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_ALLOWED_TYPES); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 263; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = PyObject_IsInstance(__pyx_v_sequence, __pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 249; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyObject_IsInstance(__pyx_v_sequence, __pyx_t_1); if (unlikely(__pyx_t_2 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 263; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_3 = ((!(__pyx_t_2 != 0)) != 0); if (__pyx_t_3) { - /* "fuzzysearch/_generic_search.pyx":250 + /* "fuzzysearch/_generic_search.pyx":264 * """ * if not isinstance(sequence, ALLOWED_TYPES): * raise TypeError('sequence is of invalid type %s' % type(subsequence)) # <<<<<<<<<<<<<< * if not isinstance(subsequence, ALLOWED_TYPES): * raise TypeError('subsequence is of invalid type %s' % type(subsequence)) */ - __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_sequence_is_of_invalid_type_s, ((PyObject *)Py_TYPE(__pyx_v_subsequence))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_sequence_is_of_invalid_type_s, ((PyObject *)Py_TYPE(__pyx_v_subsequence))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 264; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 264; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 264; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_Raise(__pyx_t_1, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 264; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "fuzzysearch/_generic_search.pyx":251 + /* "fuzzysearch/_generic_search.pyx":265 * if not isinstance(sequence, ALLOWED_TYPES): * raise TypeError('sequence is of invalid type %s' % type(subsequence)) * if not isinstance(subsequence, ALLOWED_TYPES): # <<<<<<<<<<<<<< * raise TypeError('subsequence is of invalid type %s' % type(subsequence)) * */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_ALLOWED_TYPES); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_ALLOWED_TYPES); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 265; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_3 = PyObject_IsInstance(__pyx_v_subsequence, __pyx_t_1); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_IsInstance(__pyx_v_subsequence, __pyx_t_1); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 265; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_2 = ((!(__pyx_t_3 != 0)) != 0); if (__pyx_t_2) { - /* "fuzzysearch/_generic_search.pyx":252 + /* "fuzzysearch/_generic_search.pyx":266 * raise TypeError('sequence is of invalid type %s' % type(subsequence)) * if not isinstance(subsequence, ALLOWED_TYPES): * raise TypeError('subsequence is of invalid type %s' % type(subsequence)) # <<<<<<<<<<<<<< * * if not subsequence: */ - __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_subsequence_is_of_invalid_type_s, ((PyObject *)Py_TYPE(__pyx_v_subsequence))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyString_Format(__pyx_kp_s_subsequence_is_of_invalid_type_s, ((PyObject *)Py_TYPE(__pyx_v_subsequence))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 266; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 266; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_TypeError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 266; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_Raise(__pyx_t_1, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 266; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "fuzzysearch/_generic_search.pyx":254 + /* "fuzzysearch/_generic_search.pyx":268 * raise TypeError('subsequence is of invalid type %s' % type(subsequence)) * * if not subsequence: # <<<<<<<<<<<<<< * raise ValueError('Given subsequence is empty!') * */ - __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_subsequence); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 254; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_IsTrue(__pyx_v_subsequence); if (unlikely(__pyx_t_2 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_3 = ((!__pyx_t_2) != 0); if (__pyx_t_3) { - /* "fuzzysearch/_generic_search.pyx":255 + /* "fuzzysearch/_generic_search.pyx":269 * * if not subsequence: * raise ValueError('Given subsequence is empty!') # <<<<<<<<<<<<<< * * # optimization: prepare some often used things in advance */ - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_Raise(__pyx_t_1, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "fuzzysearch/_generic_search.pyx":258 + /* "fuzzysearch/_generic_search.pyx":272 * * # optimization: prepare some often used things in advance * cdef size_t _subseq_len = len(subsequence) # <<<<<<<<<<<<<< * cdef size_t _subseq_len_minus_one = _subseq_len - 1 * cdef size_t _seq_len = len(sequence) */ - __pyx_t_5 = PyObject_Length(__pyx_v_subsequence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 258; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_subsequence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__subseq_len = __pyx_t_5; - /* "fuzzysearch/_generic_search.pyx":259 + /* "fuzzysearch/_generic_search.pyx":273 * # optimization: prepare some often used things in advance * cdef size_t _subseq_len = len(subsequence) * cdef size_t _subseq_len_minus_one = _subseq_len - 1 # <<<<<<<<<<<<<< @@ -3180,17 +2982,17 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g */ __pyx_v__subseq_len_minus_one = (__pyx_v__subseq_len - 1); - /* "fuzzysearch/_generic_search.pyx":260 + /* "fuzzysearch/_generic_search.pyx":274 * cdef size_t _subseq_len = len(subsequence) * cdef size_t _subseq_len_minus_one = _subseq_len - 1 * cdef size_t _seq_len = len(sequence) # <<<<<<<<<<<<<< * * cdef unsigned int c_max_substitutions = max_substitutions if max_substitutions is not None else (1<<29) */ - __pyx_t_5 = PyObject_Length(__pyx_v_sequence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_sequence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__seq_len = __pyx_t_5; - /* "fuzzysearch/_generic_search.pyx":262 + /* "fuzzysearch/_generic_search.pyx":276 * cdef size_t _seq_len = len(sequence) * * cdef unsigned int c_max_substitutions = max_substitutions if max_substitutions is not None else (1<<29) # <<<<<<<<<<<<<< @@ -3199,14 +3001,14 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g */ __pyx_t_3 = (__pyx_v_max_substitutions != Py_None); if ((__pyx_t_3 != 0)) { - __pyx_t_7 = __Pyx_PyInt_As_unsigned_int(__pyx_v_max_substitutions); if (unlikely((__pyx_t_7 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_As_unsigned_int(__pyx_v_max_substitutions); if (unlikely((__pyx_t_7 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = __pyx_t_7; } else { __pyx_t_6 = 536870912; } __pyx_v_c_max_substitutions = __pyx_t_6; - /* "fuzzysearch/_generic_search.pyx":263 + /* "fuzzysearch/_generic_search.pyx":277 * * cdef unsigned int c_max_substitutions = max_substitutions if max_substitutions is not None else (1<<29) * cdef unsigned int c_max_insertions = max_insertions if max_insertions is not None else (1<<29) # <<<<<<<<<<<<<< @@ -3215,14 +3017,14 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g */ __pyx_t_3 = (__pyx_v_max_insertions != Py_None); if ((__pyx_t_3 != 0)) { - __pyx_t_7 = __Pyx_PyInt_As_unsigned_int(__pyx_v_max_insertions); if (unlikely((__pyx_t_7 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 263; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_As_unsigned_int(__pyx_v_max_insertions); if (unlikely((__pyx_t_7 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 277; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = __pyx_t_7; } else { __pyx_t_6 = 536870912; } __pyx_v_c_max_insertions = __pyx_t_6; - /* "fuzzysearch/_generic_search.pyx":264 + /* "fuzzysearch/_generic_search.pyx":278 * cdef unsigned int c_max_substitutions = max_substitutions if max_substitutions is not None else (1<<29) * cdef unsigned int c_max_insertions = max_insertions if max_insertions is not None else (1<<29) * cdef unsigned int c_max_deletions = max_deletions if max_deletions is not None else (1<<29) # <<<<<<<<<<<<<< @@ -3231,14 +3033,14 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g */ __pyx_t_3 = (__pyx_v_max_deletions != Py_None); if ((__pyx_t_3 != 0)) { - __pyx_t_7 = __Pyx_PyInt_As_unsigned_int(__pyx_v_max_deletions); if (unlikely((__pyx_t_7 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 264; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_As_unsigned_int(__pyx_v_max_deletions); if (unlikely((__pyx_t_7 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = __pyx_t_7; } else { __pyx_t_6 = 536870912; } __pyx_v_c_max_deletions = __pyx_t_6; - /* "fuzzysearch/_generic_search.pyx":269 + /* "fuzzysearch/_generic_search.pyx":283 * cdef unsigned int c_max_l_dist = min( * max_l_dist if max_l_dist is not None else (1<<29), * c_max_substitutions + c_max_insertions + c_max_deletions, # <<<<<<<<<<<<<< @@ -3247,7 +3049,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g */ __pyx_t_6 = ((__pyx_v_c_max_substitutions + __pyx_v_c_max_insertions) + __pyx_v_c_max_deletions); - /* "fuzzysearch/_generic_search.pyx":268 + /* "fuzzysearch/_generic_search.pyx":282 * # TODO: write a good comment * cdef unsigned int c_max_l_dist = min( * max_l_dist if max_l_dist is not None else (1<<29), # <<<<<<<<<<<<<< @@ -3263,21 +3065,21 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g __pyx_t_1 = __pyx_int_536870912; } - /* "fuzzysearch/_generic_search.pyx":269 + /* "fuzzysearch/_generic_search.pyx":283 * cdef unsigned int c_max_l_dist = min( * max_l_dist if max_l_dist is not None else (1<<29), * c_max_substitutions + c_max_insertions + c_max_deletions, # <<<<<<<<<<<<<< * ) * */ - __pyx_t_8 = __Pyx_PyInt_From_unsigned_int(__pyx_t_6); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_From_unsigned_int(__pyx_t_6); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_9 = PyObject_RichCompare(__pyx_t_8, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_9); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = PyObject_RichCompare(__pyx_t_8, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_9); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_9); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; if (__pyx_t_3) { - __pyx_t_9 = __Pyx_PyInt_From_unsigned_int(__pyx_t_6); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_PyInt_From_unsigned_int(__pyx_t_6); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_9); __pyx_t_4 = __pyx_t_9; __pyx_t_9 = 0; @@ -3286,39 +3088,39 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g __pyx_t_4 = __pyx_t_1; } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_6 = __Pyx_PyInt_As_unsigned_int(__pyx_t_4); if (unlikely((__pyx_t_6 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyInt_As_unsigned_int(__pyx_t_4); if (unlikely((__pyx_t_6 == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_v_c_max_l_dist = __pyx_t_6; - /* "fuzzysearch/_generic_search.pyx":272 + /* "fuzzysearch/_generic_search.pyx":286 * ) * - * cdef char* c_sequence = sequence # <<<<<<<<<<<<<< - * cdef char* c_subsequence = subsequence - * cdef char* ngram_str + * cdef const char* c_sequence = sequence # <<<<<<<<<<<<<< + * cdef const char* c_subsequence = subsequence + * */ - __pyx_t_10 = __Pyx_PyObject_AsString(__pyx_v_sequence); if (unlikely((!__pyx_t_10) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_AsString(__pyx_v_sequence); if (unlikely((!__pyx_t_10) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_c_sequence = __pyx_t_10; - /* "fuzzysearch/_generic_search.pyx":273 + /* "fuzzysearch/_generic_search.pyx":287 * - * cdef char* c_sequence = sequence - * cdef char* c_subsequence = subsequence # <<<<<<<<<<<<<< - * cdef char* ngram_str + * cdef const char* c_sequence = sequence + * cdef const char* c_subsequence = subsequence # <<<<<<<<<<<<<< * + * cdef size_t ngram_len = _subseq_len // (c_max_l_dist + 1) */ - __pyx_t_10 = __Pyx_PyObject_AsString(__pyx_v_subsequence); if (unlikely((!__pyx_t_10) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_c_subsequence = __pyx_t_10; + __pyx_t_11 = __Pyx_PyObject_AsString(__pyx_v_subsequence); if (unlikely((!__pyx_t_11) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_c_subsequence = __pyx_t_11; - /* "fuzzysearch/_generic_search.pyx":276 - * cdef char* ngram_str + /* "fuzzysearch/_generic_search.pyx":289 + * cdef const char* c_subsequence = subsequence * * cdef size_t ngram_len = _subseq_len // (c_max_l_dist + 1) # <<<<<<<<<<<<<< * if ngram_len == 0: * raise ValueError('the subsequence length must be greater than max_l_dist') */ - __pyx_t_11 = (__pyx_v_c_max_l_dist + 1); - if (unlikely(__pyx_t_11 == 0)) { + __pyx_t_12 = (__pyx_v_c_max_l_dist + 1); + if (unlikely(__pyx_t_12 == 0)) { #ifdef WITH_THREAD PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); #endif @@ -3326,11 +3128,11 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g #ifdef WITH_THREAD PyGILState_Release(__pyx_gilstate_save); #endif - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_v_ngram_len = (__pyx_v__subseq_len / __pyx_t_11); + __pyx_v_ngram_len = (__pyx_v__subseq_len / __pyx_t_12); - /* "fuzzysearch/_generic_search.pyx":277 + /* "fuzzysearch/_generic_search.pyx":290 * * cdef size_t ngram_len = _subseq_len // (c_max_l_dist + 1) * if ngram_len == 0: # <<<<<<<<<<<<<< @@ -3340,91 +3142,82 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g __pyx_t_3 = ((__pyx_v_ngram_len == 0) != 0); if (__pyx_t_3) { - /* "fuzzysearch/_generic_search.pyx":278 + /* "fuzzysearch/_generic_search.pyx":291 * cdef size_t ngram_len = _subseq_len // (c_max_l_dist + 1) * if ngram_len == 0: * raise ValueError('the subsequence length must be greater than max_l_dist') # <<<<<<<<<<<<<< * - * ngram_str = malloc((ngram_len + 1) * sizeof(char)) + * cdef int index, small_search_start_index */ - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_Raise(__pyx_t_4, 0, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "fuzzysearch/_generic_search.pyx":280 - * raise ValueError('the subsequence length must be greater than max_l_dist') - * - * ngram_str = malloc((ngram_len + 1) * sizeof(char)) # <<<<<<<<<<<<<< - * if ngram_str is NULL: + /* "fuzzysearch/_generic_search.pyx":299 + * cdef int *kmpNext + * cdef KMPstate kmp_state + * kmpNext = malloc(ngram_len * sizeof(int)) # <<<<<<<<<<<<<< + * if kmpNext is NULL: * raise MemoryError() */ - __pyx_v_ngram_str = ((char *)malloc(((__pyx_v_ngram_len + 1) * (sizeof(char))))); + __pyx_v_kmpNext = ((int *)malloc((__pyx_v_ngram_len * (sizeof(int))))); - /* "fuzzysearch/_generic_search.pyx":281 - * - * ngram_str = malloc((ngram_len + 1) * sizeof(char)) - * if ngram_str is NULL: # <<<<<<<<<<<<<< + /* "fuzzysearch/_generic_search.pyx":300 + * cdef KMPstate kmp_state + * kmpNext = malloc(ngram_len * sizeof(int)) + * if kmpNext is NULL: # <<<<<<<<<<<<<< * raise MemoryError() * */ - __pyx_t_3 = ((__pyx_v_ngram_str == NULL) != 0); + __pyx_t_3 = ((__pyx_v_kmpNext == NULL) != 0); if (__pyx_t_3) { - /* "fuzzysearch/_generic_search.pyx":282 - * ngram_str = malloc((ngram_len + 1) * sizeof(char)) - * if ngram_str is NULL: + /* "fuzzysearch/_generic_search.pyx":301 + * kmpNext = malloc(ngram_len * sizeof(int)) + * if kmpNext is NULL: * raise MemoryError() # <<<<<<<<<<<<<< * - * cdef int index, small_search_start_index - */ - PyErr_NoMemory(); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - - /* "fuzzysearch/_generic_search.pyx":288 - * cdef char *match_ptr - * - * matches = [] # <<<<<<<<<<<<<< - * * try: */ - __pyx_t_4 = PyList_New(0); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 288; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_v_matches = ((PyObject*)__pyx_t_4); - __pyx_t_4 = 0; + PyErr_NoMemory(); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } - /* "fuzzysearch/_generic_search.pyx":290 - * matches = [] + /* "fuzzysearch/_generic_search.pyx":303 + * raise MemoryError() * * try: # <<<<<<<<<<<<<< - * ngram_str[ngram_len] = 0 - * + * matches = [] + * for ngram_start in xrange(0, _subseq_len - ngram_len + 1, ngram_len): */ /*try:*/ { - /* "fuzzysearch/_generic_search.pyx":291 + /* "fuzzysearch/_generic_search.pyx":304 * * try: - * ngram_str[ngram_len] = 0 # <<<<<<<<<<<<<< - * + * matches = [] # <<<<<<<<<<<<<< * for ngram_start in xrange(0, _subseq_len - ngram_len + 1, ngram_len): + * preKMP(c_subsequence + ngram_start, ngram_len, kmpNext) */ - (__pyx_v_ngram_str[__pyx_v_ngram_len]) = 0; - - /* "fuzzysearch/_generic_search.pyx":293 - * ngram_str[ngram_len] = 0 - * - * for ngram_start in xrange(0, _subseq_len - ngram_len + 1, ngram_len): # <<<<<<<<<<<<<< - * strncpy(ngram_str, c_subsequence + ngram_start, ngram_len) - * - */ - __pyx_t_4 = __Pyx_PyInt_FromSize_t(((__pyx_v__subseq_len - __pyx_v_ngram_len) + 1)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_4 = PyList_New(0); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = __Pyx_PyInt_FromSize_t(__pyx_v_ngram_len); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_v_matches = ((PyObject*)__pyx_t_4); + __pyx_t_4 = 0; + + /* "fuzzysearch/_generic_search.pyx":305 + * try: + * matches = [] + * for ngram_start in xrange(0, _subseq_len - ngram_len + 1, ngram_len): # <<<<<<<<<<<<<< + * preKMP(c_subsequence + ngram_start, ngram_len, kmpNext) + * + */ + __pyx_t_4 = __Pyx_PyInt_FromSize_t(((__pyx_v__subseq_len - __pyx_v_ngram_len) + 1)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_1 = __Pyx_PyInt_FromSize_t(__pyx_v_ngram_len); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_9 = PyTuple_New(3); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_9 = PyTuple_New(3); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_9); __Pyx_INCREF(__pyx_int_0); PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_int_0); @@ -3435,110 +3228,110 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g __Pyx_GIVEREF(__pyx_t_1); __pyx_t_4 = 0; __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_xrange, __pyx_t_9, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_xrange, __pyx_t_9, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; if (PyList_CheckExact(__pyx_t_1) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_9 = __pyx_t_1; __Pyx_INCREF(__pyx_t_9); __pyx_t_5 = 0; - __pyx_t_12 = NULL; + __pyx_t_13 = NULL; } else { - __pyx_t_5 = -1; __pyx_t_9 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_5 = -1; __pyx_t_9 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_9); - __pyx_t_12 = Py_TYPE(__pyx_t_9)->tp_iternext; + __pyx_t_13 = Py_TYPE(__pyx_t_9)->tp_iternext; } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { - if (!__pyx_t_12 && PyList_CheckExact(__pyx_t_9)) { + if (!__pyx_t_13 && PyList_CheckExact(__pyx_t_9)) { if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_9)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_9, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_9, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_9, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_9, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} #endif - } else if (!__pyx_t_12 && PyTuple_CheckExact(__pyx_t_9)) { + } else if (!__pyx_t_13 && PyTuple_CheckExact(__pyx_t_9)) { if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_9)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_9, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_9, __pyx_t_5); __Pyx_INCREF(__pyx_t_1); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_9, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_1 = PySequence_ITEM(__pyx_t_9, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} #endif } else { - __pyx_t_1 = __pyx_t_12(__pyx_t_9); + __pyx_t_1 = __pyx_t_13(__pyx_t_9); if (unlikely(!__pyx_t_1)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} } break; } __Pyx_GOTREF(__pyx_t_1); } - __pyx_t_13 = __Pyx_PyInt_As_size_t(__pyx_t_1); if (unlikely((__pyx_t_13 == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_14 = __Pyx_PyInt_As_size_t(__pyx_t_1); if (unlikely((__pyx_t_14 == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_ngram_start = __pyx_t_13; + __pyx_v_ngram_start = __pyx_t_14; - /* "fuzzysearch/_generic_search.pyx":294 - * + /* "fuzzysearch/_generic_search.pyx":306 + * matches = [] * for ngram_start in xrange(0, _subseq_len - ngram_len + 1, ngram_len): - * strncpy(ngram_str, c_subsequence + ngram_start, ngram_len) # <<<<<<<<<<<<<< + * preKMP(c_subsequence + ngram_start, ngram_len, kmpNext) # <<<<<<<<<<<<<< * - * # TODO: handle null characters properly! + * kmp_state = KMP_init(c_subsequence + ngram_start, ngram_len, c_sequence, _seq_len, kmpNext) */ - strncpy(__pyx_v_ngram_str, (__pyx_v_c_subsequence + __pyx_v_ngram_start), __pyx_v_ngram_len); + preKMP((__pyx_v_c_subsequence + __pyx_v_ngram_start), __pyx_v_ngram_len, __pyx_v_kmpNext); - /* "fuzzysearch/_generic_search.pyx":297 + /* "fuzzysearch/_generic_search.pyx":308 + * preKMP(c_subsequence + ngram_start, ngram_len, kmpNext) * - * # TODO: handle null characters properly! - * match_ptr = strstr(c_sequence, ngram_str) # <<<<<<<<<<<<<< + * kmp_state = KMP_init(c_subsequence + ngram_start, ngram_len, c_sequence, _seq_len, kmpNext) # <<<<<<<<<<<<<< + * match_ptr = KMP_find_next(&kmp_state) * while match_ptr != NULL: - * index = (match_ptr - c_sequence) */ - __pyx_v_match_ptr = strstr(__pyx_v_c_sequence, __pyx_v_ngram_str); + __pyx_v_kmp_state = KMP_init((__pyx_v_c_subsequence + __pyx_v_ngram_start), __pyx_v_ngram_len, __pyx_v_c_sequence, __pyx_v__seq_len, __pyx_v_kmpNext); - /* "fuzzysearch/_generic_search.pyx":298 - * # TODO: handle null characters properly! - * match_ptr = strstr(c_sequence, ngram_str) + /* "fuzzysearch/_generic_search.pyx":309 + * + * kmp_state = KMP_init(c_subsequence + ngram_start, ngram_len, c_sequence, _seq_len, kmpNext) + * match_ptr = KMP_find_next(&kmp_state) # <<<<<<<<<<<<<< + * while match_ptr != NULL: + * small_search_start_index = (match_ptr - c_sequence) - ngram_start - c_max_l_dist + */ + __pyx_v_match_ptr = KMP_find_next((&__pyx_v_kmp_state)); + + /* "fuzzysearch/_generic_search.pyx":310 + * kmp_state = KMP_init(c_subsequence + ngram_start, ngram_len, c_sequence, _seq_len, kmpNext) + * match_ptr = KMP_find_next(&kmp_state) * while match_ptr != NULL: # <<<<<<<<<<<<<< - * index = (match_ptr - c_sequence) - * small_search_start_index = index - ngram_start - c_max_l_dist + * small_search_start_index = (match_ptr - c_sequence) - ngram_start - c_max_l_dist + * small_search_length = _subseq_len + (2 * c_max_l_dist) */ while (1) { __pyx_t_3 = ((__pyx_v_match_ptr != NULL) != 0); if (!__pyx_t_3) break; - /* "fuzzysearch/_generic_search.pyx":299 - * match_ptr = strstr(c_sequence, ngram_str) + /* "fuzzysearch/_generic_search.pyx":311 + * match_ptr = KMP_find_next(&kmp_state) * while match_ptr != NULL: - * index = (match_ptr - c_sequence) # <<<<<<<<<<<<<< - * small_search_start_index = index - ngram_start - c_max_l_dist - * small_search_length = _subseq_len + (2 * c_max_l_dist) - */ - __pyx_v_index = (__pyx_v_match_ptr - __pyx_v_c_sequence); - - /* "fuzzysearch/_generic_search.pyx":300 - * while match_ptr != NULL: - * index = (match_ptr - c_sequence) - * small_search_start_index = index - ngram_start - c_max_l_dist # <<<<<<<<<<<<<< + * small_search_start_index = (match_ptr - c_sequence) - ngram_start - c_max_l_dist # <<<<<<<<<<<<<< * small_search_length = _subseq_len + (2 * c_max_l_dist) * if small_search_start_index < 0: */ - __pyx_v_small_search_start_index = ((__pyx_v_index - __pyx_v_ngram_start) - __pyx_v_c_max_l_dist); + __pyx_v_small_search_start_index = (((__pyx_v_match_ptr - __pyx_v_c_sequence) - __pyx_v_ngram_start) - __pyx_v_c_max_l_dist); - /* "fuzzysearch/_generic_search.pyx":301 - * index = (match_ptr - c_sequence) - * small_search_start_index = index - ngram_start - c_max_l_dist + /* "fuzzysearch/_generic_search.pyx":312 + * while match_ptr != NULL: + * small_search_start_index = (match_ptr - c_sequence) - ngram_start - c_max_l_dist * small_search_length = _subseq_len + (2 * c_max_l_dist) # <<<<<<<<<<<<<< * if small_search_start_index < 0: * small_search_length += small_search_start_index */ - __pyx_t_1 = __Pyx_PyInt_FromSize_t((__pyx_v__subseq_len + (2 * __pyx_v_c_max_l_dist))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_1 = __Pyx_PyInt_FromSize_t((__pyx_v__subseq_len + (2 * __pyx_v_c_max_l_dist))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_XDECREF_SET(__pyx_v_small_search_length, __pyx_t_1); __pyx_t_1 = 0; - /* "fuzzysearch/_generic_search.pyx":302 - * small_search_start_index = index - ngram_start - c_max_l_dist + /* "fuzzysearch/_generic_search.pyx":313 + * small_search_start_index = (match_ptr - c_sequence) - ngram_start - c_max_l_dist * small_search_length = _subseq_len + (2 * c_max_l_dist) * if small_search_start_index < 0: # <<<<<<<<<<<<<< * small_search_length += small_search_start_index @@ -3547,22 +3340,22 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g __pyx_t_3 = ((__pyx_v_small_search_start_index < 0) != 0); if (__pyx_t_3) { - /* "fuzzysearch/_generic_search.pyx":303 + /* "fuzzysearch/_generic_search.pyx":314 * small_search_length = _subseq_len + (2 * c_max_l_dist) * if small_search_start_index < 0: * small_search_length += small_search_start_index # <<<<<<<<<<<<<< * small_search_start_index = 0 * if small_search_start_index + small_search_length > _seq_len: */ - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_small_search_start_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_small_search_start_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyNumber_InPlaceAdd(__pyx_v_small_search_length, __pyx_t_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_4 = PyNumber_InPlaceAdd(__pyx_v_small_search_length, __pyx_t_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF_SET(__pyx_v_small_search_length, __pyx_t_4); __pyx_t_4 = 0; - /* "fuzzysearch/_generic_search.pyx":304 + /* "fuzzysearch/_generic_search.pyx":315 * if small_search_start_index < 0: * small_search_length += small_search_start_index * small_search_start_index = 0 # <<<<<<<<<<<<<< @@ -3574,35 +3367,35 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g } __pyx_L15:; - /* "fuzzysearch/_generic_search.pyx":305 + /* "fuzzysearch/_generic_search.pyx":316 * small_search_length += small_search_start_index * small_search_start_index = 0 * if small_search_start_index + small_search_length > _seq_len: # <<<<<<<<<<<<<< * small_search_length = _seq_len - small_search_start_index * # try to expand left and/or right according to n_ngram */ - __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_small_search_start_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_small_search_start_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = PyNumber_Add(__pyx_t_4, __pyx_v_small_search_length); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_1 = PyNumber_Add(__pyx_t_4, __pyx_v_small_search_length); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyInt_FromSize_t(__pyx_v__seq_len); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_4 = __Pyx_PyInt_FromSize_t(__pyx_v__seq_len); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_8 = PyObject_RichCompare(__pyx_t_1, __pyx_t_4, Py_GT); __Pyx_XGOTREF(__pyx_t_8); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_8 = PyObject_RichCompare(__pyx_t_1, __pyx_t_4, Py_GT); __Pyx_XGOTREF(__pyx_t_8); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely(__pyx_t_3 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; if (__pyx_t_3) { - /* "fuzzysearch/_generic_search.pyx":306 + /* "fuzzysearch/_generic_search.pyx":317 * small_search_start_index = 0 * if small_search_start_index + small_search_length > _seq_len: * small_search_length = _seq_len - small_search_start_index # <<<<<<<<<<<<<< * # try to expand left and/or right according to n_ngram * for match in _c_find_near_matches_generic_linear_programming( */ - __pyx_t_8 = __Pyx_PyInt_FromSize_t((__pyx_v__seq_len - __pyx_v_small_search_start_index)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_8 = __Pyx_PyInt_FromSize_t((__pyx_v__seq_len - __pyx_v_small_search_start_index)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF_SET(__pyx_v_small_search_length, __pyx_t_8); __pyx_t_8 = 0; @@ -3610,263 +3403,193 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g } __pyx_L16:; - /* "fuzzysearch/_generic_search.pyx":308 - * small_search_length = _seq_len - small_search_start_index - * # try to expand left and/or right according to n_ngram - * for match in _c_find_near_matches_generic_linear_programming( # <<<<<<<<<<<<<< + /* "fuzzysearch/_generic_search.pyx":322 * c_subsequence, _subseq_len, * c_sequence + small_search_start_index, - */ - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_c_find_near_matches_generic_lin); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_8); - - /* "fuzzysearch/_generic_search.pyx":309 - * # try to expand left and/or right according to n_ngram - * for match in _c_find_near_matches_generic_linear_programming( - * c_subsequence, _subseq_len, # <<<<<<<<<<<<<< - * c_sequence + small_search_start_index, - * small_search_length, - */ - __pyx_t_4 = __Pyx_PyBytes_FromString(__pyx_v_c_subsequence); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 309; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = __Pyx_PyInt_FromSize_t(__pyx_v__subseq_len); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 309; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_1); - - /* "fuzzysearch/_generic_search.pyx":310 - * for match in _c_find_near_matches_generic_linear_programming( - * c_subsequence, _subseq_len, - * c_sequence + small_search_start_index, # <<<<<<<<<<<<<< - * small_search_length, + * small_search_length, # <<<<<<<<<<<<<< * c_max_substitutions, c_max_insertions, c_max_deletions, c_max_l_dist, - */ - __pyx_t_14 = __Pyx_PyBytes_FromString((__pyx_v_c_sequence + __pyx_v_small_search_start_index)); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_14); - - /* "fuzzysearch/_generic_search.pyx":312 - * c_sequence + small_search_start_index, - * small_search_length, - * c_max_substitutions, c_max_insertions, c_max_deletions, c_max_l_dist, # <<<<<<<<<<<<<< * ): - * matches.append(match._replace( */ - __pyx_t_15 = __Pyx_PyInt_From_unsigned_int(__pyx_v_c_max_substitutions); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_15); - __pyx_t_16 = __Pyx_PyInt_From_unsigned_int(__pyx_v_c_max_insertions); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_16); - __pyx_t_17 = __Pyx_PyInt_From_unsigned_int(__pyx_v_c_max_deletions); if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_17); - __pyx_t_18 = __Pyx_PyInt_From_unsigned_int(__pyx_v_c_max_l_dist); if (unlikely(!__pyx_t_18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_18); + __pyx_t_14 = __Pyx_PyInt_As_size_t(__pyx_v_small_search_length); if (unlikely((__pyx_t_14 == (size_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 322; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - /* "fuzzysearch/_generic_search.pyx":308 + /* "fuzzysearch/_generic_search.pyx":319 * small_search_length = _seq_len - small_search_start_index * # try to expand left and/or right according to n_ngram * for match in _c_find_near_matches_generic_linear_programming( # <<<<<<<<<<<<<< * c_subsequence, _subseq_len, * c_sequence + small_search_start_index, */ - __pyx_t_19 = PyTuple_New(8); if (unlikely(!__pyx_t_19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_19); - PyTuple_SET_ITEM(__pyx_t_19, 0, __pyx_t_4); - __Pyx_GIVEREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_19, 1, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_19, 2, __pyx_t_14); - __Pyx_GIVEREF(__pyx_t_14); - __Pyx_INCREF(__pyx_v_small_search_length); - PyTuple_SET_ITEM(__pyx_t_19, 3, __pyx_v_small_search_length); - __Pyx_GIVEREF(__pyx_v_small_search_length); - PyTuple_SET_ITEM(__pyx_t_19, 4, __pyx_t_15); - __Pyx_GIVEREF(__pyx_t_15); - PyTuple_SET_ITEM(__pyx_t_19, 5, __pyx_t_16); - __Pyx_GIVEREF(__pyx_t_16); - PyTuple_SET_ITEM(__pyx_t_19, 6, __pyx_t_17); - __Pyx_GIVEREF(__pyx_t_17); - PyTuple_SET_ITEM(__pyx_t_19, 7, __pyx_t_18); - __Pyx_GIVEREF(__pyx_t_18); - __pyx_t_4 = 0; - __pyx_t_1 = 0; - __pyx_t_14 = 0; - __pyx_t_15 = 0; - __pyx_t_16 = 0; - __pyx_t_17 = 0; - __pyx_t_18 = 0; - __pyx_t_18 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_19, NULL); if (unlikely(!__pyx_t_18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_18); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - if (PyList_CheckExact(__pyx_t_18) || PyTuple_CheckExact(__pyx_t_18)) { - __pyx_t_19 = __pyx_t_18; __Pyx_INCREF(__pyx_t_19); __pyx_t_20 = 0; - __pyx_t_21 = NULL; + __pyx_t_8 = __pyx_f_11fuzzysearch_15_generic_search__c_find_near_matches_generic_linear_programming(__pyx_v_c_subsequence, __pyx_v__subseq_len, (__pyx_v_c_sequence + __pyx_v_small_search_start_index), __pyx_t_14, __pyx_v_c_max_substitutions, __pyx_v_c_max_insertions, __pyx_v_c_max_deletions, __pyx_v_c_max_l_dist); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __Pyx_GOTREF(__pyx_t_8); + if (PyList_CheckExact(__pyx_t_8) || PyTuple_CheckExact(__pyx_t_8)) { + __pyx_t_4 = __pyx_t_8; __Pyx_INCREF(__pyx_t_4); __pyx_t_15 = 0; + __pyx_t_16 = NULL; } else { - __pyx_t_20 = -1; __pyx_t_19 = PyObject_GetIter(__pyx_t_18); if (unlikely(!__pyx_t_19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_19); - __pyx_t_21 = Py_TYPE(__pyx_t_19)->tp_iternext; + __pyx_t_15 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_t_8); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_16 = Py_TYPE(__pyx_t_4)->tp_iternext; } - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; for (;;) { - if (!__pyx_t_21 && PyList_CheckExact(__pyx_t_19)) { - if (__pyx_t_20 >= PyList_GET_SIZE(__pyx_t_19)) break; + if (!__pyx_t_16 && PyList_CheckExact(__pyx_t_4)) { + if (__pyx_t_15 >= PyList_GET_SIZE(__pyx_t_4)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_18 = PyList_GET_ITEM(__pyx_t_19, __pyx_t_20); __Pyx_INCREF(__pyx_t_18); __pyx_t_20++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_15); __Pyx_INCREF(__pyx_t_8); __pyx_t_15++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L9_error;} #else - __pyx_t_18 = PySequence_ITEM(__pyx_t_19, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_8 = PySequence_ITEM(__pyx_t_4, __pyx_t_15); __pyx_t_15++; if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L9_error;} #endif - } else if (!__pyx_t_21 && PyTuple_CheckExact(__pyx_t_19)) { - if (__pyx_t_20 >= PyTuple_GET_SIZE(__pyx_t_19)) break; + } else if (!__pyx_t_16 && PyTuple_CheckExact(__pyx_t_4)) { + if (__pyx_t_15 >= PyTuple_GET_SIZE(__pyx_t_4)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_18 = PyTuple_GET_ITEM(__pyx_t_19, __pyx_t_20); __Pyx_INCREF(__pyx_t_18); __pyx_t_20++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_15); __Pyx_INCREF(__pyx_t_8); __pyx_t_15++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L9_error;} #else - __pyx_t_18 = PySequence_ITEM(__pyx_t_19, __pyx_t_20); __pyx_t_20++; if (unlikely(!__pyx_t_18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_8 = PySequence_ITEM(__pyx_t_4, __pyx_t_15); __pyx_t_15++; if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L9_error;} #endif } else { - __pyx_t_18 = __pyx_t_21(__pyx_t_19); - if (unlikely(!__pyx_t_18)) { + __pyx_t_8 = __pyx_t_16(__pyx_t_4); + if (unlikely(!__pyx_t_8)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L9_error;} } break; } - __Pyx_GOTREF(__pyx_t_18); + __Pyx_GOTREF(__pyx_t_8); } - __Pyx_XDECREF_SET(__pyx_v_match, __pyx_t_18); - __pyx_t_18 = 0; + __Pyx_XDECREF_SET(__pyx_v_match, __pyx_t_8); + __pyx_t_8 = 0; - /* "fuzzysearch/_generic_search.pyx":314 + /* "fuzzysearch/_generic_search.pyx":325 * c_max_substitutions, c_max_insertions, c_max_deletions, c_max_l_dist, * ): * matches.append(match._replace( # <<<<<<<<<<<<<< * start=match.start + small_search_start_index, * end=match.end + small_search_start_index, */ - __pyx_t_18 = __Pyx_PyObject_GetAttrStr(__pyx_v_match, __pyx_n_s_replace); if (unlikely(!__pyx_t_18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_18); - __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_match, __pyx_n_s_replace); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_8); + __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __Pyx_GOTREF(__pyx_t_1); - /* "fuzzysearch/_generic_search.pyx":315 + /* "fuzzysearch/_generic_search.pyx":326 * ): * matches.append(match._replace( * start=match.start + small_search_start_index, # <<<<<<<<<<<<<< * end=match.end + small_search_start_index, * )) */ - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_match, __pyx_n_s_start); if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 315; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_match, __pyx_n_s_start); if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_17); - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_small_search_start_index); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 315; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_16); - __pyx_t_15 = PyNumber_Add(__pyx_t_17, __pyx_t_16); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 315; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_15); + __pyx_t_18 = __Pyx_PyInt_From_int(__pyx_v_small_search_start_index); if (unlikely(!__pyx_t_18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __Pyx_GOTREF(__pyx_t_18); + __pyx_t_19 = PyNumber_Add(__pyx_t_17, __pyx_t_18); if (unlikely(!__pyx_t_19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __Pyx_GOTREF(__pyx_t_19); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_start, __pyx_t_15) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_start, __pyx_t_19) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; - /* "fuzzysearch/_generic_search.pyx":316 + /* "fuzzysearch/_generic_search.pyx":327 * matches.append(match._replace( * start=match.start + small_search_start_index, * end=match.end + small_search_start_index, # <<<<<<<<<<<<<< * )) - * match_ptr = strstr(match_ptr + 1, ngram_str) + * match_ptr = KMP_find_next(&kmp_state) */ - __pyx_t_15 = __Pyx_PyObject_GetAttrStr(__pyx_v_match, __pyx_n_s_end); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_15); - __pyx_t_16 = __Pyx_PyInt_From_int(__pyx_v_small_search_start_index); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L9_error;} - __Pyx_GOTREF(__pyx_t_16); - __pyx_t_17 = PyNumber_Add(__pyx_t_15, __pyx_t_16); if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_19 = __Pyx_PyObject_GetAttrStr(__pyx_v_match, __pyx_n_s_end); if (unlikely(!__pyx_t_19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 327; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __Pyx_GOTREF(__pyx_t_19); + __pyx_t_18 = __Pyx_PyInt_From_int(__pyx_v_small_search_start_index); if (unlikely(!__pyx_t_18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 327; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __Pyx_GOTREF(__pyx_t_18); + __pyx_t_17 = PyNumber_Add(__pyx_t_19, __pyx_t_18); if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 327; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_17); - __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_end, __pyx_t_17) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; + if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_end, __pyx_t_17) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "fuzzysearch/_generic_search.pyx":314 + /* "fuzzysearch/_generic_search.pyx":325 * c_max_substitutions, c_max_insertions, c_max_deletions, c_max_l_dist, * ): * matches.append(match._replace( # <<<<<<<<<<<<<< * start=match.start + small_search_start_index, * end=match.end + small_search_start_index, */ - __pyx_t_17 = __Pyx_PyObject_Call(__pyx_t_18, __pyx_empty_tuple, __pyx_t_8); if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __pyx_t_17 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_empty_tuple, __pyx_t_1); if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_GOTREF(__pyx_t_17); - __Pyx_DECREF(__pyx_t_18); __pyx_t_18 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_22 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_17); if (unlikely(__pyx_t_22 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L9_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_20 = __Pyx_PyList_Append(__pyx_v_matches, __pyx_t_17); if (unlikely(__pyx_t_20 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L9_error;} __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; } - __Pyx_DECREF(__pyx_t_19); __pyx_t_19 = 0; + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "fuzzysearch/_generic_search.pyx":318 + /* "fuzzysearch/_generic_search.pyx":329 * end=match.end + small_search_start_index, * )) - * match_ptr = strstr(match_ptr + 1, ngram_str) # <<<<<<<<<<<<<< + * match_ptr = KMP_find_next(&kmp_state) # <<<<<<<<<<<<<< * * finally: */ - __pyx_v_match_ptr = strstr((__pyx_v_match_ptr + 1), __pyx_v_ngram_str); + __pyx_v_match_ptr = KMP_find_next((&__pyx_v_kmp_state)); } } __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; } - /* "fuzzysearch/_generic_search.pyx":321 + /* "fuzzysearch/_generic_search.pyx":332 * * finally: - * free(ngram_str) # <<<<<<<<<<<<<< + * free(kmpNext) # <<<<<<<<<<<<<< * * return matches */ /*finally:*/ { /*normal exit:*/{ - free(__pyx_v_ngram_str); + free(__pyx_v_kmpNext); goto __pyx_L10; } /*exception exit:*/{ __pyx_L9_error:; - __pyx_t_26 = 0; __pyx_t_27 = 0; __pyx_t_28 = 0; __pyx_t_29 = 0; __pyx_t_30 = 0; __pyx_t_31 = 0; - __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_XDECREF(__pyx_t_14); __pyx_t_14 = 0; - __Pyx_XDECREF(__pyx_t_15); __pyx_t_15 = 0; - __Pyx_XDECREF(__pyx_t_16); __pyx_t_16 = 0; + __pyx_t_24 = 0; __pyx_t_25 = 0; __pyx_t_26 = 0; __pyx_t_27 = 0; __pyx_t_28 = 0; __pyx_t_29 = 0; + __Pyx_XDECREF(__pyx_t_19); __pyx_t_19 = 0; __Pyx_XDECREF(__pyx_t_18); __pyx_t_18 = 0; __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0; + __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; - __Pyx_XDECREF(__pyx_t_19); __pyx_t_19 = 0; + __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0; - if (PY_MAJOR_VERSION >= 3) __Pyx_ExceptionSwap(&__pyx_t_29, &__pyx_t_30, &__pyx_t_31); - if ((PY_MAJOR_VERSION < 3) || unlikely(__Pyx_GetException(&__pyx_t_26, &__pyx_t_27, &__pyx_t_28) < 0)) __Pyx_ErrFetch(&__pyx_t_26, &__pyx_t_27, &__pyx_t_28); + if (PY_MAJOR_VERSION >= 3) __Pyx_ExceptionSwap(&__pyx_t_27, &__pyx_t_28, &__pyx_t_29); + if ((PY_MAJOR_VERSION < 3) || unlikely(__Pyx_GetException(&__pyx_t_24, &__pyx_t_25, &__pyx_t_26) < 0)) __Pyx_ErrFetch(&__pyx_t_24, &__pyx_t_25, &__pyx_t_26); + __Pyx_XGOTREF(__pyx_t_24); + __Pyx_XGOTREF(__pyx_t_25); __Pyx_XGOTREF(__pyx_t_26); __Pyx_XGOTREF(__pyx_t_27); __Pyx_XGOTREF(__pyx_t_28); __Pyx_XGOTREF(__pyx_t_29); - __Pyx_XGOTREF(__pyx_t_30); - __Pyx_XGOTREF(__pyx_t_31); - __pyx_t_23 = __pyx_lineno; __pyx_t_24 = __pyx_clineno; __pyx_t_25 = __pyx_filename; + __pyx_t_21 = __pyx_lineno; __pyx_t_22 = __pyx_clineno; __pyx_t_23 = __pyx_filename; { - free(__pyx_v_ngram_str); + free(__pyx_v_kmpNext); } if (PY_MAJOR_VERSION >= 3) { + __Pyx_XGIVEREF(__pyx_t_27); + __Pyx_XGIVEREF(__pyx_t_28); __Pyx_XGIVEREF(__pyx_t_29); - __Pyx_XGIVEREF(__pyx_t_30); - __Pyx_XGIVEREF(__pyx_t_31); - __Pyx_ExceptionReset(__pyx_t_29, __pyx_t_30, __pyx_t_31); + __Pyx_ExceptionReset(__pyx_t_27, __pyx_t_28, __pyx_t_29); } + __Pyx_XGIVEREF(__pyx_t_24); + __Pyx_XGIVEREF(__pyx_t_25); __Pyx_XGIVEREF(__pyx_t_26); - __Pyx_XGIVEREF(__pyx_t_27); - __Pyx_XGIVEREF(__pyx_t_28); - __Pyx_ErrRestore(__pyx_t_26, __pyx_t_27, __pyx_t_28); - __pyx_t_26 = 0; __pyx_t_27 = 0; __pyx_t_28 = 0; __pyx_t_29 = 0; __pyx_t_30 = 0; __pyx_t_31 = 0; - __pyx_lineno = __pyx_t_23; __pyx_clineno = __pyx_t_24; __pyx_filename = __pyx_t_25; + __Pyx_ErrRestore(__pyx_t_24, __pyx_t_25, __pyx_t_26); + __pyx_t_24 = 0; __pyx_t_25 = 0; __pyx_t_26 = 0; __pyx_t_27 = 0; __pyx_t_28 = 0; __pyx_t_29 = 0; + __pyx_lineno = __pyx_t_21; __pyx_clineno = __pyx_t_22; __pyx_filename = __pyx_t_23; goto __pyx_L1_error; } __pyx_L10:; } - /* "fuzzysearch/_generic_search.pyx":323 - * free(ngram_str) + /* "fuzzysearch/_generic_search.pyx":334 + * free(kmpNext) * * return matches # <<<<<<<<<<<<<< */ @@ -3875,7 +3598,7 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g __pyx_r = __pyx_v_matches; goto __pyx_L0; - /* "fuzzysearch/_generic_search.pyx":236 + /* "fuzzysearch/_generic_search.pyx":250 * * * def c_find_near_matches_generic_ngrams(subsequence, sequence, # <<<<<<<<<<<<<< @@ -3889,9 +3612,6 @@ static PyObject *__pyx_pf_11fuzzysearch_15_generic_search_4c_find_near_matches_g __Pyx_XDECREF(__pyx_t_4); __Pyx_XDECREF(__pyx_t_8); __Pyx_XDECREF(__pyx_t_9); - __Pyx_XDECREF(__pyx_t_14); - __Pyx_XDECREF(__pyx_t_15); - __Pyx_XDECREF(__pyx_t_16); __Pyx_XDECREF(__pyx_t_17); __Pyx_XDECREF(__pyx_t_18); __Pyx_XDECREF(__pyx_t_19); @@ -3940,9 +3660,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_s_Users_taleinat_dev_fuzzysearch, __pyx_k_Users_taleinat_dev_fuzzysearch, sizeof(__pyx_k_Users_taleinat_dev_fuzzysearch), 0, 0, 1, 0}, {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1}, {&__pyx_n_s_all, __pyx_k_all, sizeof(__pyx_k_all), 0, 0, 1, 1}, - {&__pyx_n_s_alloc_size, __pyx_k_alloc_size, sizeof(__pyx_k_alloc_size), 0, 0, 1, 1}, {&__pyx_n_s_binary_type, __pyx_k_binary_type, sizeof(__pyx_k_binary_type), 0, 0, 1, 1}, - {&__pyx_n_s_c_find_near_matches_generic_lin, __pyx_k_c_find_near_matches_generic_lin, sizeof(__pyx_k_c_find_near_matches_generic_lin), 0, 0, 1, 1}, {&__pyx_n_s_c_find_near_matches_generic_line, __pyx_k_c_find_near_matches_generic_line, sizeof(__pyx_k_c_find_near_matches_generic_line), 0, 0, 1, 1}, {&__pyx_n_s_c_find_near_matches_generic_ngra, __pyx_k_c_find_near_matches_generic_ngra, sizeof(__pyx_k_c_find_near_matches_generic_ngra), 0, 0, 1, 1}, {&__pyx_n_s_c_max_deletions, __pyx_k_c_max_deletions, sizeof(__pyx_k_c_max_deletions), 0, 0, 1, 1}, @@ -3951,15 +3669,13 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_c_max_substitutions, __pyx_k_c_max_substitutions, sizeof(__pyx_k_c_max_substitutions), 0, 0, 1, 1}, {&__pyx_n_s_c_sequence, __pyx_k_c_sequence, sizeof(__pyx_k_c_sequence), 0, 0, 1, 1}, {&__pyx_n_s_c_subsequence, __pyx_k_c_subsequence, sizeof(__pyx_k_c_subsequence), 0, 0, 1, 1}, - {&__pyx_n_s_cand, __pyx_k_cand, sizeof(__pyx_k_cand), 0, 0, 1, 1}, - {&__pyx_n_s_candidates, __pyx_k_candidates, sizeof(__pyx_k_candidates), 0, 0, 1, 1}, - {&__pyx_n_s_charchar, __pyx_k_charchar, sizeof(__pyx_k_charchar), 0, 0, 1, 1}, {&__pyx_n_s_end, __pyx_k_end, sizeof(__pyx_k_end), 0, 0, 1, 1}, {&__pyx_n_s_fuzzysearch__generic_search, __pyx_k_fuzzysearch__generic_search, sizeof(__pyx_k_fuzzysearch__generic_search), 0, 0, 1, 1}, {&__pyx_n_s_fuzzysearch_common, __pyx_k_fuzzysearch_common, sizeof(__pyx_k_fuzzysearch_common), 0, 0, 1, 1}, - {&__pyx_n_s_have_realloced, __pyx_k_have_realloced, sizeof(__pyx_k_have_realloced), 0, 0, 1, 1}, {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, {&__pyx_n_s_index, __pyx_k_index, sizeof(__pyx_k_index), 0, 0, 1, 1}, + {&__pyx_n_s_kmpNext, __pyx_k_kmpNext, sizeof(__pyx_k_kmpNext), 0, 0, 1, 1}, + {&__pyx_n_s_kmp_state, __pyx_k_kmp_state, sizeof(__pyx_k_kmp_state), 0, 0, 1, 1}, {&__pyx_n_s_l_dist, __pyx_k_l_dist, sizeof(__pyx_k_l_dist), 0, 0, 1, 1}, {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, {&__pyx_n_s_match, __pyx_k_match, sizeof(__pyx_k_match), 0, 0, 1, 1}, @@ -3970,21 +3686,14 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_max_l_dist, __pyx_k_max_l_dist, sizeof(__pyx_k_max_l_dist), 0, 0, 1, 1}, {&__pyx_n_s_max_substitutions, __pyx_k_max_substitutions, sizeof(__pyx_k_max_substitutions), 0, 0, 1, 1}, {&__pyx_n_s_maxint, __pyx_k_maxint, sizeof(__pyx_k_maxint), 0, 0, 1, 1}, - {&__pyx_n_s_n_cand, __pyx_k_n_cand, sizeof(__pyx_k_n_cand), 0, 0, 1, 1}, - {&__pyx_n_s_n_candidates, __pyx_k_n_candidates, sizeof(__pyx_k_n_candidates), 0, 0, 1, 1}, {&__pyx_n_s_n_dels, __pyx_k_n_dels, sizeof(__pyx_k_n_dels), 0, 0, 1, 1}, {&__pyx_n_s_n_ins, __pyx_k_n_ins, sizeof(__pyx_k_n_ins), 0, 0, 1, 1}, - {&__pyx_n_s_n_new_candidates, __pyx_k_n_new_candidates, sizeof(__pyx_k_n_new_candidates), 0, 0, 1, 1}, - {&__pyx_n_s_n_skipped, __pyx_k_n_skipped, sizeof(__pyx_k_n_skipped), 0, 0, 1, 1}, {&__pyx_n_s_n_subs, __pyx_k_n_subs, sizeof(__pyx_k_n_subs), 0, 0, 1, 1}, - {&__pyx_n_s_new_candidates, __pyx_k_new_candidates, sizeof(__pyx_k_new_candidates), 0, 0, 1, 1}, {&__pyx_n_s_ngram_len, __pyx_k_ngram_len, sizeof(__pyx_k_ngram_len), 0, 0, 1, 1}, {&__pyx_n_s_ngram_start, __pyx_k_ngram_start, sizeof(__pyx_k_ngram_start), 0, 0, 1, 1}, - {&__pyx_n_s_ngram_str, __pyx_k_ngram_str, sizeof(__pyx_k_ngram_str), 0, 0, 1, 1}, {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, {&__pyx_n_s_replace, __pyx_k_replace, sizeof(__pyx_k_replace), 0, 0, 1, 1}, {&__pyx_n_s_seq_len, __pyx_k_seq_len, sizeof(__pyx_k_seq_len), 0, 0, 1, 1}, - {&__pyx_n_s_seq_len_2, __pyx_k_seq_len_2, sizeof(__pyx_k_seq_len_2), 0, 0, 1, 1}, {&__pyx_n_s_sequence, __pyx_k_sequence, sizeof(__pyx_k_sequence), 0, 0, 1, 1}, {&__pyx_kp_s_sequence_is_of_invalid_type_s, __pyx_k_sequence_is_of_invalid_type_s, sizeof(__pyx_k_sequence_is_of_invalid_type_s), 0, 0, 1, 0}, {&__pyx_n_s_six, __pyx_k_six, sizeof(__pyx_k_six), 0, 0, 1, 1}, @@ -3993,27 +3702,24 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_start, __pyx_k_start, sizeof(__pyx_k_start), 0, 0, 1, 1}, {&__pyx_n_s_subseq_index, __pyx_k_subseq_index, sizeof(__pyx_k_subseq_index), 0, 0, 1, 1}, {&__pyx_n_s_subseq_len, __pyx_k_subseq_len, sizeof(__pyx_k_subseq_len), 0, 0, 1, 1}, - {&__pyx_n_s_subseq_len_2, __pyx_k_subseq_len_2, sizeof(__pyx_k_subseq_len_2), 0, 0, 1, 1}, {&__pyx_n_s_subseq_len_minus_one, __pyx_k_subseq_len_minus_one, sizeof(__pyx_k_subseq_len_minus_one), 0, 0, 1, 1}, - {&__pyx_n_s_subseq_len_minus_one_2, __pyx_k_subseq_len_minus_one_2, sizeof(__pyx_k_subseq_len_minus_one_2), 0, 0, 1, 1}, {&__pyx_n_s_subsequence, __pyx_k_subsequence, sizeof(__pyx_k_subsequence), 0, 0, 1, 1}, {&__pyx_kp_s_subsequence_is_of_invalid_type_s, __pyx_k_subsequence_is_of_invalid_type_s, sizeof(__pyx_k_subsequence_is_of_invalid_type_s), 0, 0, 1, 0}, {&__pyx_n_s_sys, __pyx_k_sys, sizeof(__pyx_k_sys), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, {&__pyx_kp_s_the_subsequence_length_must_be_g, __pyx_k_the_subsequence_length_must_be_g, sizeof(__pyx_k_the_subsequence_length_must_be_g), 0, 0, 1, 0}, - {&__pyx_n_s_tmp, __pyx_k_tmp, sizeof(__pyx_k_tmp), 0, 0, 1, 1}, {&__pyx_n_s_xrange, __pyx_k_xrange, sizeof(__pyx_k_xrange), 0, 0, 1, 1}, {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 43; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 84; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_ImportError = __Pyx_GetBuiltinName(__pyx_n_s_ImportError); if (!__pyx_builtin_ImportError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 31; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_TypeError = __Pyx_GetBuiltinName(__pyx_n_s_TypeError); if (!__pyx_builtin_TypeError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 53; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 97; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #if PY_MAJOR_VERSION >= 3 - __pyx_builtin_xrange = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_xrange) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_xrange = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_xrange) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_builtin_xrange = __Pyx_GetBuiltinName(__pyx_n_s_xrange); if (!__pyx_builtin_xrange) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 101; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_xrange = __Pyx_GetBuiltinName(__pyx_n_s_xrange); if (!__pyx_builtin_xrange) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif return 0; __pyx_L1_error:; @@ -4024,74 +3730,62 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "fuzzysearch/_generic_search.pyx":48 + /* "fuzzysearch/_generic_search.pyx":58 * * if not subsequence: * raise ValueError('Given subsequence is empty!') # <<<<<<<<<<<<<< * - * c_subsequence = subsequence + * cdef const char *c_subsequence = subsequence */ - __pyx_tuple_ = PyTuple_Pack(1, __pyx_kp_s_Given_subsequence_is_empty); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 48; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple_ = PyTuple_Pack(1, __pyx_kp_s_Given_subsequence_is_empty); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - /* "fuzzysearch/_generic_search.pyx":255 + /* "fuzzysearch/_generic_search.pyx":269 * * if not subsequence: * raise ValueError('Given subsequence is empty!') # <<<<<<<<<<<<<< * * # optimization: prepare some often used things in advance */ - __pyx_tuple__2 = PyTuple_Pack(1, __pyx_kp_s_Given_subsequence_is_empty); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__2 = PyTuple_Pack(1, __pyx_kp_s_Given_subsequence_is_empty); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "fuzzysearch/_generic_search.pyx":278 + /* "fuzzysearch/_generic_search.pyx":291 * cdef size_t ngram_len = _subseq_len // (c_max_l_dist + 1) * if ngram_len == 0: * raise ValueError('the subsequence length must be greater than max_l_dist') # <<<<<<<<<<<<<< * - * ngram_str = malloc((ngram_len + 1) * sizeof(char)) + * cdef int index, small_search_start_index */ - __pyx_tuple__3 = PyTuple_Pack(1, __pyx_kp_s_the_subsequence_length_must_be_g); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__3 = PyTuple_Pack(1, __pyx_kp_s_the_subsequence_length_must_be_g); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - /* "fuzzysearch/_generic_search.pyx":27 + /* "fuzzysearch/_generic_search.pyx":37 * * * def c_find_near_matches_generic_linear_programming(subsequence, sequence, # <<<<<<<<<<<<<< * max_substitutions, * max_insertions, */ - __pyx_tuple__4 = PyTuple_Pack(8, __pyx_n_s_subsequence, __pyx_n_s_sequence, __pyx_n_s_max_substitutions, __pyx_n_s_max_insertions, __pyx_n_s_max_deletions, __pyx_n_s_max_l_dist, __pyx_n_s_c_subsequence, __pyx_n_s_c_sequence); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__4 = PyTuple_Pack(8, __pyx_n_s_subsequence, __pyx_n_s_sequence, __pyx_n_s_max_substitutions, __pyx_n_s_max_insertions, __pyx_n_s_max_deletions, __pyx_n_s_max_l_dist, __pyx_n_s_c_subsequence, __pyx_n_s_c_sequence); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); - __pyx_codeobj__5 = (PyObject*)__Pyx_PyCode_New(6, 0, 8, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__4, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_taleinat_dev_fuzzysearch, __pyx_n_s_c_find_near_matches_generic_line, 27, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__5 = (PyObject*)__Pyx_PyCode_New(6, 0, 8, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__4, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_taleinat_dev_fuzzysearch, __pyx_n_s_c_find_near_matches_generic_line, 37, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "fuzzysearch/_generic_search.pyx":62 - * ) - * - * def _c_find_near_matches_generic_linear_programming( # <<<<<<<<<<<<<< - * char* subsequence, size_t subseq_len, - * char* sequence, size_t seq_len, - */ - __pyx_tuple__6 = PyTuple_Pack(22, __pyx_n_s_subsequence, __pyx_n_s_subseq_len, __pyx_n_s_sequence, __pyx_n_s_seq_len, __pyx_n_s_max_substitutions, __pyx_n_s_max_insertions, __pyx_n_s_max_deletions, __pyx_n_s_max_l_dist, __pyx_n_s_subseq_len_minus_one, __pyx_n_s_alloc_size, __pyx_n_s_candidates, __pyx_n_s_new_candidates, __pyx_n_s_tmp, __pyx_n_s_cand, __pyx_n_s_n_candidates, __pyx_n_s_n_new_candidates, __pyx_n_s_n_cand, __pyx_n_s_matches, __pyx_n_s_index, __pyx_n_s_charchar, __pyx_n_s_have_realloced, __pyx_n_s_n_skipped); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_tuple__6); - __Pyx_GIVEREF(__pyx_tuple__6); - __pyx_codeobj__7 = (PyObject*)__Pyx_PyCode_New(8, 0, 22, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__6, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_taleinat_dev_fuzzysearch, __pyx_n_s_c_find_near_matches_generic_lin, 62, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - - /* "fuzzysearch/_generic_search.pyx":236 + /* "fuzzysearch/_generic_search.pyx":250 * * * def c_find_near_matches_generic_ngrams(subsequence, sequence, # <<<<<<<<<<<<<< * max_substitutions, max_insertions, * max_deletions, max_l_dist=None): */ - __pyx_tuple__8 = PyTuple_Pack(24, __pyx_n_s_subsequence, __pyx_n_s_sequence, __pyx_n_s_max_substitutions, __pyx_n_s_max_insertions, __pyx_n_s_max_deletions, __pyx_n_s_max_l_dist, __pyx_n_s_subseq_len_2, __pyx_n_s_subseq_len_minus_one_2, __pyx_n_s_seq_len_2, __pyx_n_s_c_max_substitutions, __pyx_n_s_c_max_insertions, __pyx_n_s_c_max_deletions, __pyx_n_s_c_max_l_dist, __pyx_n_s_c_sequence, __pyx_n_s_c_subsequence, __pyx_n_s_ngram_str, __pyx_n_s_ngram_len, __pyx_n_s_index, __pyx_n_s_small_search_start_index, __pyx_n_s_ngram_start, __pyx_n_s_match_ptr, __pyx_n_s_matches, __pyx_n_s_small_search_length, __pyx_n_s_match); if (unlikely(!__pyx_tuple__8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 236; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_tuple__8); - __Pyx_GIVEREF(__pyx_tuple__8); - __pyx_codeobj__9 = (PyObject*)__Pyx_PyCode_New(6, 0, 24, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__8, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_taleinat_dev_fuzzysearch, __pyx_n_s_c_find_near_matches_generic_ngra, 236, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 236; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__6 = PyTuple_Pack(25, __pyx_n_s_subsequence, __pyx_n_s_sequence, __pyx_n_s_max_substitutions, __pyx_n_s_max_insertions, __pyx_n_s_max_deletions, __pyx_n_s_max_l_dist, __pyx_n_s_subseq_len, __pyx_n_s_subseq_len_minus_one, __pyx_n_s_seq_len, __pyx_n_s_c_max_substitutions, __pyx_n_s_c_max_insertions, __pyx_n_s_c_max_deletions, __pyx_n_s_c_max_l_dist, __pyx_n_s_c_sequence, __pyx_n_s_c_subsequence, __pyx_n_s_ngram_len, __pyx_n_s_index, __pyx_n_s_small_search_start_index, __pyx_n_s_ngram_start, __pyx_n_s_match_ptr, __pyx_n_s_kmpNext, __pyx_n_s_kmp_state, __pyx_n_s_matches, __pyx_n_s_small_search_length, __pyx_n_s_match); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__6); + __Pyx_GIVEREF(__pyx_tuple__6); + __pyx_codeobj__7 = (PyObject*)__Pyx_PyCode_New(6, 0, 25, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__6, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_taleinat_dev_fuzzysearch, __pyx_n_s_c_find_near_matches_generic_ngra, 250, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -4235,7 +3929,7 @@ PyMODINIT_FUNC PyInit__generic_search(void) * import six * from fuzzysearch.common import Match # <<<<<<<<<<<<<< * from libc.stdlib cimport malloc, free, realloc - * from libc.string cimport strstr, strncpy + * */ __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); @@ -4251,14 +3945,14 @@ PyMODINIT_FUNC PyInit__generic_search(void) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "fuzzysearch/_generic_search.pyx":8 - * + /* "fuzzysearch/_generic_search.pyx":18 + * const char* KMP_find_next(KMPstate *kmp_state) * * __all__ = [ # <<<<<<<<<<<<<< * 'c_find_near_matches_generic_linear_programming', * 'c_find_near_matches_generic_ngrams', */ - __pyx_t_1 = PyList_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyList_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_n_s_c_find_near_matches_generic_line); PyList_SET_ITEM(__pyx_t_1, 0, __pyx_n_s_c_find_near_matches_generic_line); @@ -4266,22 +3960,22 @@ PyMODINIT_FUNC PyInit__generic_search(void) __Pyx_INCREF(__pyx_n_s_c_find_near_matches_generic_ngra); PyList_SET_ITEM(__pyx_t_1, 1, __pyx_n_s_c_find_near_matches_generic_ngra); __Pyx_GIVEREF(__pyx_n_s_c_find_near_matches_generic_ngra); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_all, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 8; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_all, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "fuzzysearch/_generic_search.pyx":18 + /* "fuzzysearch/_generic_search.pyx":28 * * * ALLOWED_TYPES = (six.binary_type, bytearray) # <<<<<<<<<<<<<< * try: * from Bio.Seq import Seq */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_six); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_six); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_binary_type); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_binary_type); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_2); __Pyx_GIVEREF(__pyx_t_2); @@ -4289,10 +3983,10 @@ PyMODINIT_FUNC PyInit__generic_search(void) PyTuple_SET_ITEM(__pyx_t_1, 1, ((PyObject *)((PyObject*)(&PyByteArray_Type)))); __Pyx_GIVEREF(((PyObject *)((PyObject*)(&PyByteArray_Type)))); __pyx_t_2 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_ALLOWED_TYPES, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_ALLOWED_TYPES, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 28; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "fuzzysearch/_generic_search.pyx":19 + /* "fuzzysearch/_generic_search.pyx":29 * * ALLOWED_TYPES = (six.binary_type, bytearray) * try: # <<<<<<<<<<<<<< @@ -4306,50 +4000,50 @@ PyMODINIT_FUNC PyInit__generic_search(void) __Pyx_XGOTREF(__pyx_t_5); /*try:*/ { - /* "fuzzysearch/_generic_search.pyx":20 + /* "fuzzysearch/_generic_search.pyx":30 * ALLOWED_TYPES = (six.binary_type, bytearray) * try: * from Bio.Seq import Seq # <<<<<<<<<<<<<< * except ImportError: * pass */ - __pyx_t_1 = PyList_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L2_error;} + __pyx_t_1 = PyList_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L2_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(__pyx_n_s_Seq); PyList_SET_ITEM(__pyx_t_1, 0, __pyx_n_s_Seq); __Pyx_GIVEREF(__pyx_n_s_Seq); - __pyx_t_2 = __Pyx_Import(__pyx_n_s_Bio_Seq, __pyx_t_1, -1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L2_error;} + __pyx_t_2 = __Pyx_Import(__pyx_n_s_Bio_Seq, __pyx_t_1, -1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L2_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_Seq); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L2_error;} + __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_Seq); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L2_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_Seq, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L2_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_Seq, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 30; __pyx_clineno = __LINE__; goto __pyx_L2_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; } /*else:*/ { - /* "fuzzysearch/_generic_search.pyx":24 + /* "fuzzysearch/_generic_search.pyx":34 * pass * else: * ALLOWED_TYPES += (Seq,) # <<<<<<<<<<<<<< * * */ - __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_ALLOWED_TYPES); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L4_except_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_ALLOWED_TYPES); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L4_except_error;} __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_Seq); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L4_except_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_Seq); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L4_except_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L4_except_error;} + __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L4_except_error;} __Pyx_GOTREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_InPlaceAdd(__pyx_t_2, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L4_except_error;} + __pyx_t_1 = PyNumber_InPlaceAdd(__pyx_t_2, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L4_except_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_ALLOWED_TYPES, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 24; __pyx_clineno = __LINE__; goto __pyx_L4_except_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_ALLOWED_TYPES, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L4_except_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; } __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -4360,7 +4054,7 @@ PyMODINIT_FUNC PyInit__generic_search(void) __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "fuzzysearch/_generic_search.pyx":21 + /* "fuzzysearch/_generic_search.pyx":31 * try: * from Bio.Seq import Seq * except ImportError: # <<<<<<<<<<<<<< @@ -4387,40 +4081,28 @@ PyMODINIT_FUNC PyInit__generic_search(void) __pyx_L9_try_end:; } - /* "fuzzysearch/_generic_search.pyx":27 + /* "fuzzysearch/_generic_search.pyx":37 * * * def c_find_near_matches_generic_linear_programming(subsequence, sequence, # <<<<<<<<<<<<<< * max_substitutions, * max_insertions, */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_11fuzzysearch_15_generic_search_1c_find_near_matches_generic_linear_programming, NULL, __pyx_n_s_fuzzysearch__generic_search); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_11fuzzysearch_15_generic_search_1c_find_near_matches_generic_linear_programming, NULL, __pyx_n_s_fuzzysearch__generic_search); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_c_find_near_matches_generic_line, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 27; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_c_find_near_matches_generic_line, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "fuzzysearch/_generic_search.pyx":62 - * ) - * - * def _c_find_near_matches_generic_linear_programming( # <<<<<<<<<<<<<< - * char* subsequence, size_t subseq_len, - * char* sequence, size_t seq_len, - */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_11fuzzysearch_15_generic_search_3_c_find_near_matches_generic_linear_programming, NULL, __pyx_n_s_fuzzysearch__generic_search); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_c_find_near_matches_generic_lin, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "fuzzysearch/_generic_search.pyx":236 + /* "fuzzysearch/_generic_search.pyx":250 * * * def c_find_near_matches_generic_ngrams(subsequence, sequence, # <<<<<<<<<<<<<< * max_substitutions, max_insertions, * max_deletions, max_l_dist=None): */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_11fuzzysearch_15_generic_search_5c_find_near_matches_generic_ngrams, NULL, __pyx_n_s_fuzzysearch__generic_search); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 236; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_11fuzzysearch_15_generic_search_3c_find_near_matches_generic_ngrams, NULL, __pyx_n_s_fuzzysearch__generic_search); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_c_find_near_matches_generic_ngra, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 236; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_c_find_near_matches_generic_ngra, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "fuzzysearch/_generic_search.pyx":1 @@ -5075,106 +4757,6 @@ bad: return (target_type) value; \ } -#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 - #if CYTHON_USE_PYLONG_INTERNALS - #include "longintrepr.h" - #endif -#endif -static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) { - const size_t neg_one = (size_t) -1, const_zero = 0; - const int is_unsigned = neg_one > const_zero; -#if PY_MAJOR_VERSION < 3 - if (likely(PyInt_Check(x))) { - if (sizeof(size_t) < sizeof(long)) { - __PYX_VERIFY_RETURN_INT(size_t, long, PyInt_AS_LONG) - } else { - long val = PyInt_AS_LONG(x); - if (is_unsigned && unlikely(val < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to size_t"); - return (size_t) -1; - } - return (size_t) val; - } - } else -#endif - if (likely(PyLong_Check(x))) { - if (is_unsigned) { -#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 - #if CYTHON_USE_PYLONG_INTERNALS - if (sizeof(digit) <= sizeof(size_t)) { - switch (Py_SIZE(x)) { - case 0: return 0; - case 1: return (size_t) ((PyLongObject*)x)->ob_digit[0]; - } - } - #endif -#endif - if (unlikely(Py_SIZE(x) < 0)) { - PyErr_SetString(PyExc_OverflowError, - "can't convert negative value to size_t"); - return (size_t) -1; - } - if (sizeof(size_t) <= sizeof(unsigned long)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long, PyLong_AsUnsignedLong) - } else if (sizeof(size_t) <= sizeof(unsigned long long)) { - __PYX_VERIFY_RETURN_INT(size_t, unsigned long long, PyLong_AsUnsignedLongLong) - } - } else { -#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 - #if CYTHON_USE_PYLONG_INTERNALS - if (sizeof(digit) <= sizeof(size_t)) { - switch (Py_SIZE(x)) { - case 0: return 0; - case 1: return +(size_t) ((PyLongObject*)x)->ob_digit[0]; - case -1: return -(size_t) ((PyLongObject*)x)->ob_digit[0]; - } - } - #endif -#endif - if (sizeof(size_t) <= sizeof(long)) { - __PYX_VERIFY_RETURN_INT(size_t, long, PyLong_AsLong) - } else if (sizeof(size_t) <= sizeof(long long)) { - __PYX_VERIFY_RETURN_INT(size_t, long long, PyLong_AsLongLong) - } - } - { -#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) - PyErr_SetString(PyExc_RuntimeError, - "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); -#else - size_t val; - PyObject *v = __Pyx_PyNumber_Int(x); - #if PY_MAJOR_VERSION < 3 - if (likely(v) && !PyLong_Check(v)) { - PyObject *tmp = v; - v = PyNumber_Long(tmp); - Py_DECREF(tmp); - } - #endif - if (likely(v)) { - int one = 1; int is_little = (int)*(unsigned char *)&one; - unsigned char *bytes = (unsigned char *)&val; - int ret = _PyLong_AsByteArray((PyLongObject *)v, - bytes, sizeof(val), - is_little, !is_unsigned); - Py_DECREF(v); - if (likely(!ret)) - return val; - } -#endif - return (size_t) -1; - } - } else { - size_t val; - PyObject *tmp = __Pyx_PyNumber_Int(x); - if (!tmp) return (size_t) -1; - val = __Pyx_PyInt_As_size_t(tmp); - Py_DECREF(tmp); - return val; - } -} - #if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 #if CYTHON_USE_PYLONG_INTERNALS #include "longintrepr.h" @@ -5375,6 +4957,106 @@ static CYTHON_INLINE char __Pyx_PyInt_As_char(PyObject *x) { } } +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + #include "longintrepr.h" + #endif +#endif +static CYTHON_INLINE size_t __Pyx_PyInt_As_size_t(PyObject *x) { + const size_t neg_one = (size_t) -1, const_zero = 0; + const int is_unsigned = neg_one > const_zero; +#if PY_MAJOR_VERSION < 3 + if (likely(PyInt_Check(x))) { + if (sizeof(size_t) < sizeof(long)) { + __PYX_VERIFY_RETURN_INT(size_t, long, PyInt_AS_LONG) + } else { + long val = PyInt_AS_LONG(x); + if (is_unsigned && unlikely(val < 0)) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to size_t"); + return (size_t) -1; + } + return (size_t) val; + } + } else +#endif + if (likely(PyLong_Check(x))) { + if (is_unsigned) { +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + if (sizeof(digit) <= sizeof(size_t)) { + switch (Py_SIZE(x)) { + case 0: return 0; + case 1: return (size_t) ((PyLongObject*)x)->ob_digit[0]; + } + } + #endif +#endif + if (unlikely(Py_SIZE(x) < 0)) { + PyErr_SetString(PyExc_OverflowError, + "can't convert negative value to size_t"); + return (size_t) -1; + } + if (sizeof(size_t) <= sizeof(unsigned long)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long, PyLong_AsUnsignedLong) + } else if (sizeof(size_t) <= sizeof(unsigned long long)) { + __PYX_VERIFY_RETURN_INT(size_t, unsigned long long, PyLong_AsUnsignedLongLong) + } + } else { +#if CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 + #if CYTHON_USE_PYLONG_INTERNALS + if (sizeof(digit) <= sizeof(size_t)) { + switch (Py_SIZE(x)) { + case 0: return 0; + case 1: return +(size_t) ((PyLongObject*)x)->ob_digit[0]; + case -1: return -(size_t) ((PyLongObject*)x)->ob_digit[0]; + } + } + #endif +#endif + if (sizeof(size_t) <= sizeof(long)) { + __PYX_VERIFY_RETURN_INT(size_t, long, PyLong_AsLong) + } else if (sizeof(size_t) <= sizeof(long long)) { + __PYX_VERIFY_RETURN_INT(size_t, long long, PyLong_AsLongLong) + } + } + { +#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) + PyErr_SetString(PyExc_RuntimeError, + "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); +#else + size_t val; + PyObject *v = __Pyx_PyNumber_Int(x); + #if PY_MAJOR_VERSION < 3 + if (likely(v) && !PyLong_Check(v)) { + PyObject *tmp = v; + v = PyNumber_Long(tmp); + Py_DECREF(tmp); + } + #endif + if (likely(v)) { + int one = 1; int is_little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&val; + int ret = _PyLong_AsByteArray((PyLongObject *)v, + bytes, sizeof(val), + is_little, !is_unsigned); + Py_DECREF(v); + if (likely(!ret)) + return val; + } +#endif + return (size_t) -1; + } + } else { + size_t val; + PyObject *tmp = __Pyx_PyNumber_Int(x); + if (!tmp) return (size_t) -1; + val = __Pyx_PyInt_As_size_t(tmp); + Py_DECREF(tmp); + return val; + } +} + static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) { const int neg_one = (int) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; diff --git a/fuzzysearch/_generic_search.pyx b/fuzzysearch/_generic_search.pyx index 814de6e..d1c5060 100644 --- a/fuzzysearch/_generic_search.pyx +++ b/fuzzysearch/_generic_search.pyx @@ -2,8 +2,18 @@ from sys import maxint import six from fuzzysearch.common import Match from libc.stdlib cimport malloc, free, realloc -from libc.string cimport strstr, strncpy +cdef extern from "kmp.h": + struct KMPstate: + pass # no need to specify the fields if they aren't accessed directly + + void preKMP(const char *subsequence, int subsequence_len, int *kmpNext) + + KMPstate KMP_init(const char *subseq, int subseq_len, + const char *seq, int seq_len, + int *kmpNext) + + const char* KMP_find_next(KMPstate *kmp_state) __all__ = [ 'c_find_near_matches_generic_linear_programming', @@ -47,8 +57,8 @@ def c_find_near_matches_generic_linear_programming(subsequence, sequence, if not subsequence: raise ValueError('Given subsequence is empty!') - c_subsequence = subsequence - c_sequence = sequence + cdef const char *c_subsequence = subsequence + cdef const char *c_sequence = sequence return _c_find_near_matches_generic_linear_programming( c_subsequence, len(subsequence), @@ -59,9 +69,12 @@ def c_find_near_matches_generic_linear_programming(subsequence, sequence, max_l_dist if max_l_dist is not None else (1<<29), ) -def _c_find_near_matches_generic_linear_programming( - char* subsequence, size_t subseq_len, - char* sequence, size_t seq_len, +# The following MUST be a cdef, otherwise Cython copies the sequence and +# subsequence strings, which means if they contain null bytes the data after +# the first null byte will not be copied. +cdef _c_find_near_matches_generic_linear_programming( + const char* subsequence, size_t subseq_len, + const char* sequence, size_t seq_len, unsigned int max_substitutions, unsigned int max_insertions, unsigned int max_deletions, @@ -90,11 +103,12 @@ def _c_find_near_matches_generic_linear_programming( matches = [] cdef size_t index - cdef char charchar + cdef char seq_char + try: index = 0 have_realloced = False - for charchar in sequence[:seq_len]: + for seq_char in sequence[:seq_len]: candidates[n_candidates] = GenericSearchCandidate(index, 0, 0, 0, 0, 0) n_candidates += 1 @@ -110,7 +124,7 @@ def _c_find_near_matches_generic_linear_programming( have_realloced = True # if this sequence char is the candidate's next expected char - if charchar == subsequence[cand.subseq_index]: + if seq_char == subsequence[cand.subseq_index]: # if reached the end of the subsequence, return a match if cand.subseq_index == subseq_len_minus_one: matches.append(Match(cand.start, index + 1, cand.l_dist)) @@ -181,7 +195,7 @@ def _c_find_near_matches_generic_linear_programming( # otherwise, if skipping n_skipped sub-sequence chars # reaches a sub-sequence char identical to this sequence # char ... - elif charchar == subsequence[cand.subseq_index + n_skipped]: + elif seq_char == subsequence[cand.subseq_index + n_skipped]: # if this is the last char of the sub-sequence, yield # a match if cand.subseq_index + n_skipped + 1 == subseq_len: @@ -269,35 +283,32 @@ def c_find_near_matches_generic_ngrams(subsequence, sequence, c_max_substitutions + c_max_insertions + c_max_deletions, ) - cdef char* c_sequence = sequence - cdef char* c_subsequence = subsequence - cdef char* ngram_str + cdef const char* c_sequence = sequence + cdef const char* c_subsequence = subsequence cdef size_t ngram_len = _subseq_len // (c_max_l_dist + 1) if ngram_len == 0: raise ValueError('the subsequence length must be greater than max_l_dist') - ngram_str = malloc((ngram_len + 1) * sizeof(char)) - if ngram_str is NULL: - raise MemoryError() - cdef int index, small_search_start_index cdef size_t ngram_start - cdef char *match_ptr - matches = [] + cdef const char *match_ptr + cdef int *kmpNext + cdef KMPstate kmp_state + kmpNext = malloc(ngram_len * sizeof(int)) + if kmpNext is NULL: + raise MemoryError() try: - ngram_str[ngram_len] = 0 - + matches = [] for ngram_start in xrange(0, _subseq_len - ngram_len + 1, ngram_len): - strncpy(ngram_str, c_subsequence + ngram_start, ngram_len) + preKMP(c_subsequence + ngram_start, ngram_len, kmpNext) - # TODO: handle null characters properly! - match_ptr = strstr(c_sequence, ngram_str) + kmp_state = KMP_init(c_subsequence + ngram_start, ngram_len, c_sequence, _seq_len, kmpNext) + match_ptr = KMP_find_next(&kmp_state) while match_ptr != NULL: - index = (match_ptr - c_sequence) - small_search_start_index = index - ngram_start - c_max_l_dist + small_search_start_index = (match_ptr - c_sequence) - ngram_start - c_max_l_dist small_search_length = _subseq_len + (2 * c_max_l_dist) if small_search_start_index < 0: small_search_length += small_search_start_index @@ -315,9 +326,9 @@ def c_find_near_matches_generic_ngrams(subsequence, sequence, start=match.start + small_search_start_index, end=match.end + small_search_start_index, )) - match_ptr = strstr(match_ptr + 1, ngram_str) + match_ptr = KMP_find_next(&kmp_state) finally: - free(ngram_str) + free(kmpNext) return matches \ No newline at end of file diff --git a/setup.py b/setup.py index d4ec900..f858422 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,8 @@ _common_module = Extension( ) _generic_search_module = Extension( 'fuzzysearch._generic_search', - sources=['fuzzysearch/_generic_search.c'], + sources=['fuzzysearch/_generic_search.c', 'fuzzysearch/kmp.c'], + include_dirs=['.'], ) setup( diff --git a/tests/test_generic_search.py b/tests/test_generic_search.py index 86efe07..46d7b24 100644 --- a/tests/test_generic_search.py +++ b/tests/test_generic_search.py @@ -130,6 +130,17 @@ class TestGenericSearchBase(object): [Match(start=3, end=5, dist=1)], ) + def test_null_bytes(self): + self.assertEqual( + self.search('abc', 'xx\0abcxx', 0, 0, 0, 0), + [Match(start=3, end=6, dist=0)], + ) + + self.assertEqual( + self.search('a\0b', 'xxa\0bcxx', 0, 0, 0, 0), + [Match(start=2, end=5, dist=0)], + ) + class TestGenericSearch(TestGenericSearchBase, unittest.TestCase): def search(self, pattern, sequence, max_subs, max_ins, max_dels,