diff --git a/src/fuzzysearch/_common.c b/src/fuzzysearch/_common.c index b8044f8..a93b4e6 100644 --- a/src/fuzzysearch/_common.c +++ b/src/fuzzysearch/_common.c @@ -1,3 +1,4 @@ +#define PY_SSIZE_T_CLEAN #include #include "src/fuzzysearch/memmem.h" @@ -34,8 +35,8 @@ static PyObject * search_exact_byteslike(PyObject *self, PyObject *args, PyObject *kwdict) { /* input params */ const char *subseq, *seq; - int subseq_len, seq_len; - long int start_index=0, end_index=-1; + Py_ssize_t subseq_len, seq_len; + Py_ssize_t start_index=0, end_index=-1; static char *kwlist[] = {"subsequence", "sequence", "start_index", "end_index", NULL}; @@ -125,9 +126,11 @@ count_differences_with_maximum_byteslike(PyObject *self, PyObject *args) { /* input params */ const char *seq1, *seq2; - int seq1_len, seq2_len, max_differences; + Py_ssize_t seq1_len, seq2_len; + int max_differences; - int i, n_differences; + Py_ssize_t i; + int n_differences; if (!PyArg_ParseTuple( args, diff --git a/src/fuzzysearch/_substitutions_only.c b/src/fuzzysearch/_substitutions_only.c index 10ec17d..ddf7437 100644 --- a/src/fuzzysearch/_substitutions_only.c +++ b/src/fuzzysearch/_substitutions_only.c @@ -1,3 +1,4 @@ +#define PY_SSIZE_T_CLEAN #include #if PY_MAJOR_VERSION >= 3 @@ -22,7 +23,7 @@ #ifdef IS_PY3K -#define PyInt_FromLong(x) PyLong_FromLong(x) +#define PyInt_FromSsize_t(x) PyLong_FromSsize_t(x) #endif #define DECLARE_VARS \ PyObject *results; \ @@ -32,7 +33,7 @@ if (unlikely(!results)) \ return NULL; #define OUTPUT_VALUE(x) do { \ - next_result = PyInt_FromLong((x)); \ + next_result = PyInt_FromSsize_t((x)); \ if (unlikely(next_result == NULL)) { \ Py_DECREF(results); \ return NULL; \ diff --git a/src/fuzzysearch/_substitutions_only_lp_template.h b/src/fuzzysearch/_substitutions_only_lp_template.h index ac2509b..982a1db 100644 --- a/src/fuzzysearch/_substitutions_only_lp_template.h +++ b/src/fuzzysearch/_substitutions_only_lp_template.h @@ -21,11 +21,12 @@ FUNCTION_NAME(PyObject *self, PyObject *args) /* input params */ const char *subsequence; const char *sequence; - int subseq_len_input, seq_len_input, max_substitutions_input; - unsigned int subseq_len, seq_len, max_substitutions; + Py_ssize_t subseq_len, seq_len; + int max_substitutions_input; + unsigned int max_substitutions; unsigned int *sub_counts; - unsigned int seq_idx, subseq_idx, count_idx; + Py_ssize_t seq_idx, subseq_idx, count_idx; DECLARE_VARS; @@ -42,8 +43,8 @@ FUNCTION_NAME(PyObject *self, PyObject *args) if (unlikely(!PyArg_ParseTuple( args, ARGSPEC, - &subsequence, &subseq_len_input, - &sequence, &seq_len_input, + &subsequence, &subseq_len, + &sequence, &seq_len, &max_substitutions_input ))) { return NULL; @@ -55,15 +56,11 @@ FUNCTION_NAME(PyObject *self, PyObject *args) } max_substitutions = (unsigned int) max_substitutions_input; - if (unlikely(subseq_len_input < 0 || seq_len_input < 0)) { + if (unlikely(subseq_len < 0 || seq_len < 0)) { PyErr_SetString(PyExc_Exception, "an unknown error occurred"); return NULL; } - subseq_len = (unsigned int) subseq_len_input; - seq_len = (unsigned int) seq_len_input; - /* this is required because simple_memmem_with_needle_sum() returns the - haystack if the needle is empty */ if (unlikely(subseq_len == 0)) { PyErr_SetString(PyExc_ValueError, "subsequence must not be empty"); return NULL; diff --git a/src/fuzzysearch/_substitutions_only_ngrams_template.h b/src/fuzzysearch/_substitutions_only_ngrams_template.h index 3df36b8..0bf0b10 100644 --- a/src/fuzzysearch/_substitutions_only_ngrams_template.h +++ b/src/fuzzysearch/_substitutions_only_ngrams_template.h @@ -24,8 +24,9 @@ FUNCTION_NAME(PyObject *self, PyObject *args) /* input params */ const char *subsequence; const char *sequence; - int subseq_len_input, seq_len_input, max_substitutions_input; - unsigned int subseq_len, seq_len, max_substitutions; + Py_ssize_t subseq_len, seq_len; + int max_substitutions_input; + unsigned int max_substitutions; unsigned int ngram_len, ngram_start, subseq_len_after_ngram; const char *match_ptr, *seq_ptr, *subseq_ptr, *subseq_end; @@ -47,8 +48,8 @@ FUNCTION_NAME(PyObject *self, PyObject *args) if (unlikely(!PyArg_ParseTuple( args, ARGSPEC, - &subsequence, &subseq_len_input, - &sequence, &seq_len_input, + &subsequence, &subseq_len, + &sequence, &seq_len, &max_substitutions_input ))) { return NULL; @@ -60,13 +61,13 @@ FUNCTION_NAME(PyObject *self, PyObject *args) } max_substitutions = (unsigned int) max_substitutions_input; - if (unlikely(subseq_len_input < 0 || seq_len_input < 0)) { + if (unlikely(subseq_len < 0 || seq_len < 0)) { PyErr_SetString(PyExc_Exception, "an unknown error occurred"); return NULL; } - subseq_len = (unsigned int) subseq_len_input; - seq_len = (unsigned int) seq_len_input; + /* this is required because simple_memmem_with_needle_sum() returns the + haystack if the needle is empty */ if (unlikely(subseq_len == 0)) { PyErr_SetString(PyExc_ValueError, "subsequence must not be empty"); return NULL; @@ -78,7 +79,7 @@ FUNCTION_NAME(PyObject *self, PyObject *args) RETURN_AT_END; } - ngram_len = subseq_len / (max_substitutions + 1); + ngram_len = ((unsigned long) subseq_len) / ((unsigned long) max_substitutions + 1); if (unlikely(ngram_len <= 0)) { /* ngram_len <= 0 * * IFF * @@ -87,7 +88,7 @@ FUNCTION_NAME(PyObject *self, PyObject *args) * max_substitutions >= subseq_len * * * * So the sub-sequence may be found at any index. */ - for (ngram_start = 0; ngram_start <= seq_len - subseq_len; ngram_start++) { + for (ngram_start = 0; ngram_start + subseq_len <= seq_len; ngram_start++) { OUTPUT_VALUE(ngram_start); } RETURN_AT_END; @@ -95,7 +96,7 @@ FUNCTION_NAME(PyObject *self, PyObject *args) subseq_end = subsequence + subseq_len; - for (ngram_start = 0; ngram_start <= subseq_len - ngram_len; ngram_start += ngram_len) { + for (ngram_start = 0; ngram_start + ngram_len <= subseq_len; ngram_start += ngram_len) { subseq_len_after_ngram = subseq_len - (ngram_start + ngram_len); subseq_sum = calc_sum(subsequence + ngram_start, ngram_len);