fix incorrect ref counting
This commit is contained in:
parent
74e41ad716
commit
c6eebb70a5
|
@ -43,7 +43,7 @@ jobs:
|
|||
- name: Run Unit Tests
|
||||
run: |
|
||||
pip install .
|
||||
pip install pytest hypothesis
|
||||
pip install pytest hypothesis pandas
|
||||
pytest
|
||||
|
||||
|
||||
|
|
4
setup.py
4
setup.py
|
@ -38,8 +38,8 @@ class BuildExt(build_ext):
|
|||
elif ct == 'msvc':
|
||||
opts.append('/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version())
|
||||
for ext in self.extensions:
|
||||
ext.extra_compile_args = opts
|
||||
ext.extra_link_args = link_opts
|
||||
ext.extra_compile_args += opts
|
||||
ext.extra_link_args += link_opts
|
||||
build_ext.build_extensions(self)
|
||||
|
||||
setup(
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -11,6 +11,7 @@ from cpython.list cimport PyList_New
|
|||
from cpython.list cimport PyList_SET_ITEM
|
||||
from cpython.object cimport PyObject
|
||||
from cpython.ref cimport Py_INCREF
|
||||
from cpython.ref cimport Py_DECREF
|
||||
|
||||
import heapq
|
||||
|
||||
|
@ -358,6 +359,10 @@ cdef inline extract_dict(scorer_context context, choices, processor, size_t limi
|
|||
score = context.scorer(context.context, choice, score_cutoff)
|
||||
|
||||
if score >= score_cutoff:
|
||||
# especially the key object might be created on the fly by e.g. pandas.Dataframe
|
||||
# so we need to ensure Python does not deallocate it
|
||||
Py_INCREF(choice)
|
||||
Py_INCREF(choice_key)
|
||||
results.push_back(DictMatchElem(score, i, <PyObject*>choice, <PyObject*>choice_key))
|
||||
index += 1
|
||||
|
||||
|
@ -379,10 +384,15 @@ cdef inline extract_dict(scorer_context context, choices, processor, size_t limi
|
|||
# https://stackoverflow.com/questions/43553763/cythonize-list-of-all-splits-of-a-string/43557675#43557675
|
||||
PyList_SET_ITEM(result_list, i,
|
||||
<object>Py_BuildValue("OdO",
|
||||
<PyObject*>choices[<object>results[i].key],
|
||||
<PyObject*>results[i].choice,
|
||||
results[i].score,
|
||||
<PyObject*>results[i].key))
|
||||
|
||||
# decref all reference counts
|
||||
for i in range(results.size()):
|
||||
Py_DECREF(<object>results[i].choice)
|
||||
Py_DECREF(<object>results[i].key)
|
||||
|
||||
return result_list
|
||||
|
||||
|
||||
|
@ -393,7 +403,7 @@ cdef inline extract_list(scorer_context context, choices, processor, size_t limi
|
|||
# todo possibly a smaller vector would be good to reduce memory usage
|
||||
cdef vector[ListMatchElem] results
|
||||
results.reserve(<size_t>len(choices))
|
||||
cdef object result_list
|
||||
cdef list result_list
|
||||
|
||||
if processor is not None:
|
||||
for choice in choices:
|
||||
|
@ -751,4 +761,3 @@ def extract_iter(query, choices, scorer=fuzz.WRatio, processor=utils.default_pro
|
|||
if py_score >= score_cutoff:
|
||||
yield(choice, py_score, index)
|
||||
index += 1
|
||||
|
|
@ -1 +1 @@
|
|||
Subproject commit ea6f17dd4d3af1f15f46ff608da7cfa28625ed5a
|
||||
Subproject commit 91f20cd9930e620c7c250381bcca640570480dbd
|
|
@ -3,6 +3,6 @@ rapid string matching library
|
|||
"""
|
||||
__author__ = "Max Bachmann"
|
||||
__license__ = "MIT"
|
||||
__version__ = "1.1.1"
|
||||
__version__ = "1.1.2"
|
||||
|
||||
from rapidfuzz import process, fuzz, utils, levenshtein, string_metric
|
||||
|
|
|
@ -5,6 +5,7 @@ import unittest
|
|||
import pytest
|
||||
|
||||
from rapidfuzz import process, fuzz, utils
|
||||
import pandas as pd
|
||||
|
||||
class ProcessTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
|
@ -187,6 +188,12 @@ class ProcessTest(unittest.TestCase):
|
|||
best = process.extractOne(query, choices)
|
||||
self.assertEqual(best[0], choices[1])
|
||||
|
||||
def testIssue81(self):
|
||||
# this mostly tests whether this segfaults due to incorrect ref counting
|
||||
choices = pd.Series(['test color brightness', 'test lemon', 'test lavender'], index=[67478, 67479, 67480])
|
||||
matches = process.extract("test", choices)
|
||||
assert matches == [('test color brightness', 90.0, 67478), ('test lemon', 90.0, 67479), ('test lavender', 90.0, 67480)]
|
||||
|
||||
|
||||
def custom_scorer(s1, s2, processor=None, score_cutoff=0):
|
||||
return fuzz.ratio(s1, s2, processor=processor, score_cutoff=score_cutoff)
|
||||
|
|
Loading…
Reference in New Issue