reduce string copies and tarball size
This commit is contained in:
parent
f0f8247d02
commit
15c6dbb6fb
|
@ -5,4 +5,6 @@ include LICENSE
|
|||
recursive-include src/rapidfuzz-cpp/src *.hpp *.txx
|
||||
recursive-include src/rapidfuzz-cpp/extern/boost *
|
||||
recursive-include src/rapidfuzz-cpp/extern/difflib *
|
||||
recursive-include src/rapidfuzz-cpp/extern/nonstd *
|
||||
recursive-include extern/variant *
|
||||
include src/rapidfuzz-cpp/LICENSE
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
Boost Software License - Version 1.0 - August 17th, 2003
|
||||
|
||||
Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following:
|
||||
|
||||
The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
File diff suppressed because it is too large
Load Diff
6
setup.py
6
setup.py
|
@ -53,19 +53,19 @@ setup(
|
|||
Extension(
|
||||
'rapidfuzz.levenshtein',
|
||||
['src/py_levenshtein.cpp'],
|
||||
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern"],
|
||||
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern", "extern"],
|
||||
language='c++',
|
||||
),
|
||||
Extension(
|
||||
'rapidfuzz.fuzz',
|
||||
['src/py_fuzz.cpp'],
|
||||
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern"],
|
||||
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern", "extern"],
|
||||
language='c++',
|
||||
),
|
||||
Extension(
|
||||
'rapidfuzz.utils',
|
||||
['src/py_utils.cpp'],
|
||||
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern"],
|
||||
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern", "extern"],
|
||||
language='c++',
|
||||
),
|
||||
],
|
||||
|
|
274
src/py_fuzz.cpp
274
src/py_fuzz.cpp
|
@ -6,12 +6,13 @@
|
|||
#include <Python.h>
|
||||
#include <string>
|
||||
#include "fuzz.hpp"
|
||||
#include "string_utils.hpp"
|
||||
#include <boost/utility/string_view.hpp>
|
||||
#include "utils.hpp"
|
||||
#include "py_utils.hpp"
|
||||
#include <nonstd/string_view.hpp>
|
||||
#include <boost/optional.hpp>
|
||||
|
||||
namespace fuzz = rapidfuzz::fuzz;
|
||||
namespace string_utils = rapidfuzz::string_utils;
|
||||
namespace utils = rapidfuzz::utils;
|
||||
|
||||
boost::optional<std::pair<wchar_t*, Py_ssize_t>> PyString_AsBuffer(PyObject* str, PyObject *processor) {
|
||||
PyObject *proc_str = PyObject_CallFunctionObjArgs(processor, str, NULL);
|
||||
|
@ -69,17 +70,16 @@ static PyObject* fuzz_impl(T&& scorer, bool processor_default, PyObject* args, P
|
|||
return NULL;
|
||||
}
|
||||
auto result = scorer(
|
||||
boost::wstring_view(s1->first, s1->second),
|
||||
boost::wstring_view(s2->first, s2->second),
|
||||
nonstd::wstring_view(s1->first, s1->second),
|
||||
nonstd::wstring_view(s2->first, s2->second),
|
||||
score_cutoff);
|
||||
|
||||
PyMem_Free(s1->first);
|
||||
PyMem_Free(s2->first);
|
||||
|
||||
return PyFloat_FromDouble(result);
|
||||
|
||||
}
|
||||
|
||||
|
||||
auto s1 = PyString_AsBuffer(py_s1);
|
||||
if(!s1) {
|
||||
return NULL;
|
||||
|
@ -94,16 +94,16 @@ static PyObject* fuzz_impl(T&& scorer, bool processor_default, PyObject* args, P
|
|||
|
||||
if (use_preprocessing(processor, processor_default)) {
|
||||
result = scorer(
|
||||
string_utils::default_process(std::wstring(s1->first, s1->second)),
|
||||
string_utils::default_process(std::wstring(s2->first, s2->second)),
|
||||
utils::default_process(std::wstring(s1->first, s1->second)),
|
||||
utils::default_process(std::wstring(s2->first, s2->second)),
|
||||
score_cutoff);
|
||||
} else {
|
||||
result = scorer(
|
||||
boost::wstring_view(s1->first, s1->second),
|
||||
boost::wstring_view(s2->first, s2->second),
|
||||
nonstd::wstring_view(s1->first, s1->second),
|
||||
nonstd::wstring_view(s2->first, s2->second),
|
||||
score_cutoff);
|
||||
}
|
||||
|
||||
|
||||
PyMem_Free(s1->first);
|
||||
PyMem_Free(s2->first);
|
||||
|
||||
|
@ -130,7 +130,64 @@ PyDoc_STRVAR(ratio_docstring,
|
|||
);
|
||||
|
||||
static PyObject* ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
||||
return fuzz_impl(fuzz::ratio<boost::wstring_view, boost::wstring_view>, false, args, keywds);
|
||||
PyObject *py_s1;
|
||||
PyObject *py_s2;
|
||||
PyObject *processor = NULL;
|
||||
double score_cutoff = 0;
|
||||
static const char *kwlist[] = {"s1", "s2", "processor", "score_cutoff", NULL};
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "UU|Od", const_cast<char **>(kwlist),
|
||||
&py_s1, &py_s2, &processor, &score_cutoff)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicode_READY(py_s1) || PyUnicode_READY(py_s2)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyCallable_Check(processor)) {
|
||||
PyObject *proc_s1 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
|
||||
if (proc_s1 == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *proc_s2 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
|
||||
if (proc_s2 == NULL) {
|
||||
Py_DecRef(proc_s1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
auto s1_view = decode_python_string(proc_s1);
|
||||
auto s2_view = decode_python_string(proc_s2);
|
||||
|
||||
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::ratio(val1, val2, score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
|
||||
Py_DecRef(proc_s1);
|
||||
Py_DecRef(proc_s2);
|
||||
|
||||
return PyFloat_FromDouble(result);
|
||||
}
|
||||
|
||||
auto s1_view = decode_python_string(py_s1);
|
||||
auto s2_view = decode_python_string(py_s2);
|
||||
|
||||
double result;
|
||||
if (use_preprocessing(processor, false)) {
|
||||
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::ratio(
|
||||
utils::default_process(val1),
|
||||
utils::default_process(val2),
|
||||
score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
} else {
|
||||
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::ratio(val1, val2, score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
}
|
||||
|
||||
return PyFloat_FromDouble(result);
|
||||
}
|
||||
|
||||
|
||||
|
@ -153,7 +210,7 @@ PyDoc_STRVAR(partial_ratio_docstring,
|
|||
);
|
||||
|
||||
static PyObject* partial_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
||||
return fuzz_impl(fuzz::partial_ratio<boost::wstring_view, boost::wstring_view>, false, args, keywds);
|
||||
return fuzz_impl(fuzz::partial_ratio<nonstd::wstring_view, nonstd::wstring_view>, false, args, keywds);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(token_sort_ratio_docstring,
|
||||
|
@ -175,7 +232,64 @@ PyDoc_STRVAR(token_sort_ratio_docstring,
|
|||
);
|
||||
|
||||
static PyObject* token_sort_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
||||
return fuzz_impl(fuzz::token_sort_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
|
||||
PyObject *py_s1;
|
||||
PyObject *py_s2;
|
||||
PyObject *processor = NULL;
|
||||
double score_cutoff = 0;
|
||||
static const char *kwlist[] = {"s1", "s2", "processor", "score_cutoff", NULL};
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "UU|Od", const_cast<char **>(kwlist),
|
||||
&py_s1, &py_s2, &processor, &score_cutoff)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicode_READY(py_s1) || PyUnicode_READY(py_s2)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyCallable_Check(processor)) {
|
||||
PyObject *proc_s1 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
|
||||
if (proc_s1 == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *proc_s2 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
|
||||
if (proc_s2 == NULL) {
|
||||
Py_DecRef(proc_s1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
auto s1_view = decode_python_string(proc_s1);
|
||||
auto s2_view = decode_python_string(proc_s2);
|
||||
|
||||
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::token_sort_ratio(val1, val2, score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
|
||||
Py_DecRef(proc_s1);
|
||||
Py_DecRef(proc_s2);
|
||||
|
||||
return PyFloat_FromDouble(result);
|
||||
}
|
||||
|
||||
auto s1_view = decode_python_string(py_s1);
|
||||
auto s2_view = decode_python_string(py_s2);
|
||||
|
||||
double result;
|
||||
if (use_preprocessing(processor, true)) {
|
||||
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::token_sort_ratio(
|
||||
utils::default_process(val1),
|
||||
utils::default_process(val2),
|
||||
score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
} else {
|
||||
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::token_sort_ratio(val1, val2, score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
}
|
||||
|
||||
return PyFloat_FromDouble(result);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(partial_token_sort_ratio_docstring,
|
||||
|
@ -194,7 +308,7 @@ PyDoc_STRVAR(partial_token_sort_ratio_docstring,
|
|||
);
|
||||
|
||||
static PyObject* partial_token_sort_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
||||
return fuzz_impl(fuzz::partial_token_sort_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
|
||||
return fuzz_impl(fuzz::partial_token_sort_ratio<nonstd::wstring_view, nonstd::wstring_view>, true, args, keywds);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(token_set_ratio_docstring,
|
||||
|
@ -218,7 +332,64 @@ PyDoc_STRVAR(token_set_ratio_docstring,
|
|||
);
|
||||
|
||||
static PyObject* token_set_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
||||
return fuzz_impl(fuzz::token_set_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
|
||||
PyObject *py_s1;
|
||||
PyObject *py_s2;
|
||||
PyObject *processor = NULL;
|
||||
double score_cutoff = 0;
|
||||
static const char *kwlist[] = {"s1", "s2", "processor", "score_cutoff", NULL};
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "UU|Od", const_cast<char **>(kwlist),
|
||||
&py_s1, &py_s2, &processor, &score_cutoff)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicode_READY(py_s1) || PyUnicode_READY(py_s2)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyCallable_Check(processor)) {
|
||||
PyObject *proc_s1 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
|
||||
if (proc_s1 == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *proc_s2 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
|
||||
if (proc_s2 == NULL) {
|
||||
Py_DecRef(proc_s1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
auto s1_view = decode_python_string(proc_s1);
|
||||
auto s2_view = decode_python_string(proc_s2);
|
||||
|
||||
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::token_set_ratio(val1, val2, score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
|
||||
Py_DecRef(proc_s1);
|
||||
Py_DecRef(proc_s2);
|
||||
|
||||
return PyFloat_FromDouble(result);
|
||||
}
|
||||
|
||||
auto s1_view = decode_python_string(py_s1);
|
||||
auto s2_view = decode_python_string(py_s2);
|
||||
|
||||
double result;
|
||||
if (use_preprocessing(processor, true)) {
|
||||
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::token_set_ratio(
|
||||
utils::default_process(val1),
|
||||
utils::default_process(val2),
|
||||
score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
} else {
|
||||
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::token_set_ratio(val1, val2, score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
}
|
||||
|
||||
return PyFloat_FromDouble(result);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(partial_token_set_ratio_docstring,
|
||||
|
@ -238,7 +409,7 @@ PyDoc_STRVAR(partial_token_set_ratio_docstring,
|
|||
|
||||
|
||||
static PyObject* partial_token_set_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
||||
return fuzz_impl(fuzz::partial_token_set_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
|
||||
return fuzz_impl(fuzz::partial_token_set_ratio<nonstd::wstring_view, nonstd::wstring_view>, true, args, keywds);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(token_ratio_docstring,
|
||||
|
@ -258,7 +429,64 @@ PyDoc_STRVAR(token_ratio_docstring,
|
|||
);
|
||||
|
||||
static PyObject* token_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
||||
return fuzz_impl(fuzz::token_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
|
||||
PyObject *py_s1;
|
||||
PyObject *py_s2;
|
||||
PyObject *processor = NULL;
|
||||
double score_cutoff = 0;
|
||||
static const char *kwlist[] = {"s1", "s2", "processor", "score_cutoff", NULL};
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, keywds, "UU|Od", const_cast<char **>(kwlist),
|
||||
&py_s1, &py_s2, &processor, &score_cutoff)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyUnicode_READY(py_s1) || PyUnicode_READY(py_s2)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (PyCallable_Check(processor)) {
|
||||
PyObject *proc_s1 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
|
||||
if (proc_s1 == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyObject *proc_s2 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
|
||||
if (proc_s2 == NULL) {
|
||||
Py_DecRef(proc_s1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
auto s1_view = decode_python_string(proc_s1);
|
||||
auto s2_view = decode_python_string(proc_s2);
|
||||
|
||||
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::token_ratio(val1, val2, score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
|
||||
Py_DecRef(proc_s1);
|
||||
Py_DecRef(proc_s2);
|
||||
|
||||
return PyFloat_FromDouble(result);
|
||||
}
|
||||
|
||||
auto s1_view = decode_python_string(py_s1);
|
||||
auto s2_view = decode_python_string(py_s2);
|
||||
|
||||
double result;
|
||||
if (use_preprocessing(processor, true)) {
|
||||
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::token_ratio(
|
||||
utils::default_process(val1),
|
||||
utils::default_process(val2),
|
||||
score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
} else {
|
||||
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return fuzz::token_ratio(val1, val2, score_cutoff);
|
||||
}, s1_view, s2_view);
|
||||
}
|
||||
|
||||
return PyFloat_FromDouble(result);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(partial_token_ratio_docstring,
|
||||
|
@ -278,7 +506,7 @@ PyDoc_STRVAR(partial_token_ratio_docstring,
|
|||
);
|
||||
|
||||
static PyObject* partial_token_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
||||
return fuzz_impl(fuzz::partial_token_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
|
||||
return fuzz_impl(fuzz::partial_token_ratio<nonstd::wstring_view, nonstd::wstring_view>, true, args, keywds);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(WRatio_docstring,
|
||||
|
@ -297,7 +525,7 @@ PyDoc_STRVAR(WRatio_docstring,
|
|||
);
|
||||
|
||||
static PyObject* WRatio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
||||
return fuzz_impl(fuzz::WRatio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
|
||||
return fuzz_impl(fuzz::WRatio<nonstd::wstring_view, nonstd::wstring_view>, true, args, keywds);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(QRatio_docstring,
|
||||
|
@ -319,7 +547,7 @@ PyDoc_STRVAR(QRatio_docstring,
|
|||
);
|
||||
|
||||
static PyObject* QRatio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
||||
return fuzz_impl(fuzz::ratio<boost::wstring_view, boost::wstring_view>, false, args, keywds);
|
||||
return fuzz_impl(fuzz::ratio<nonstd::wstring_view, nonstd::wstring_view>, false, args, keywds);
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(quick_lev_ratio_docstring,
|
||||
|
@ -340,7 +568,7 @@ PyDoc_STRVAR(quick_lev_ratio_docstring,
|
|||
);
|
||||
|
||||
static PyObject* quick_lev_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
||||
return fuzz_impl(fuzz::quick_lev_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
|
||||
return fuzz_impl(fuzz::quick_lev_ratio<nonstd::wstring_view, nonstd::wstring_view>, true, args, keywds);
|
||||
}
|
||||
|
||||
/* The cast of the function is necessary since PyCFunction values
|
||||
|
|
|
@ -3,8 +3,8 @@
|
|||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include <string>
|
||||
#include "levenshtein.hpp"
|
||||
#include "py_utils.hpp"
|
||||
|
||||
namespace levenshtein = rapidfuzz::levenshtein;
|
||||
|
||||
|
@ -34,18 +34,11 @@ PyObject* distance(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
Py_ssize_t len_s1 = PyUnicode_GET_LENGTH(py_s1);
|
||||
wchar_t* buffer_s1 = PyUnicode_AsWideCharString(py_s1, &len_s1);
|
||||
boost::wstring_view s1(buffer_s1, len_s1);
|
||||
|
||||
Py_ssize_t len_s2 = PyUnicode_GET_LENGTH(py_s2);
|
||||
wchar_t* buffer_s2 = PyUnicode_AsWideCharString(py_s2, &len_s2);
|
||||
boost::wstring_view s2(buffer_s2, len_s2);
|
||||
|
||||
std::size_t result = levenshtein::distance(s1, s2);
|
||||
|
||||
PyMem_Free(buffer_s1);
|
||||
PyMem_Free(buffer_s2);
|
||||
auto s1_view = decode_python_string(py_s1);
|
||||
auto s2_view = decode_python_string(py_s2);
|
||||
std::size_t result = mpark::visit([](auto&& val1, auto&& val2) {
|
||||
return levenshtein::distance(val1, val2);
|
||||
}, s1_view, s2_view);
|
||||
|
||||
return PyLong_FromSize_t(result);
|
||||
}
|
||||
|
@ -79,18 +72,11 @@ PyObject* normalized_distance(PyObject* /*self*/, PyObject* args, PyObject* keyw
|
|||
return NULL;
|
||||
}
|
||||
|
||||
Py_ssize_t len_s1 = PyUnicode_GET_LENGTH(py_s1);
|
||||
wchar_t* buffer_s1 = PyUnicode_AsWideCharString(py_s1, &len_s1);
|
||||
boost::wstring_view s1(buffer_s1, len_s1);
|
||||
|
||||
Py_ssize_t len_s2 = PyUnicode_GET_LENGTH(py_s2);
|
||||
wchar_t* buffer_s2 = PyUnicode_AsWideCharString(py_s2, &len_s2);
|
||||
boost::wstring_view s2(buffer_s2, len_s2);
|
||||
|
||||
double result = levenshtein::normalized_distance(s1, s2, score_cutoff/100);
|
||||
|
||||
PyMem_Free(buffer_s1);
|
||||
PyMem_Free(buffer_s2);
|
||||
auto s1_view = decode_python_string(py_s1);
|
||||
auto s2_view = decode_python_string(py_s2);
|
||||
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return levenshtein::normalized_distance(val1, val2, score_cutoff/100);
|
||||
}, s1_view, s2_view);
|
||||
|
||||
return PyFloat_FromDouble(result*100);
|
||||
}
|
||||
|
@ -129,33 +115,33 @@ PyObject* weighted_distance(PyObject* /*self*/, PyObject* args, PyObject* keywds
|
|||
return NULL;
|
||||
}
|
||||
|
||||
Py_ssize_t len_s1 = PyUnicode_GET_LENGTH(py_s1);
|
||||
wchar_t* buffer_s1 = PyUnicode_AsWideCharString(py_s1, &len_s1);
|
||||
boost::wstring_view s1(buffer_s1, len_s1);
|
||||
|
||||
Py_ssize_t len_s2 = PyUnicode_GET_LENGTH(py_s2);
|
||||
wchar_t* buffer_s2 = PyUnicode_AsWideCharString(py_s2, &len_s2);
|
||||
boost::wstring_view s2(buffer_s2, len_s2);
|
||||
auto s1_view = decode_python_string(py_s1);
|
||||
auto s2_view = decode_python_string(py_s2);
|
||||
|
||||
std::size_t result = 0;
|
||||
if (insert_cost == 1 && delete_cost == 1) {
|
||||
if (replace_cost == 1) {
|
||||
result = levenshtein::distance(s1, s2);
|
||||
result = mpark::visit([](auto&& val1, auto&& val2) {
|
||||
return levenshtein::distance(val1, val2);
|
||||
}, s1_view, s2_view);
|
||||
} else if (replace_cost == 2) {
|
||||
result = levenshtein::weighted_distance(s1, s2);
|
||||
result = mpark::visit([](auto&& val1, auto&& val2) {
|
||||
return levenshtein::weighted_distance(val1, val2);
|
||||
}, s1_view, s2_view);
|
||||
} else {
|
||||
result = levenshtein::generic_distance(s1, s2, {insert_cost, delete_cost, replace_cost});
|
||||
result = mpark::visit([insert_cost, delete_cost, replace_cost](auto&& val1, auto&& val2) {
|
||||
return levenshtein::generic_distance(val1, val2, {insert_cost, delete_cost, replace_cost});
|
||||
}, s1_view, s2_view);
|
||||
}
|
||||
} else {
|
||||
result = levenshtein::generic_distance(s1, s2, {insert_cost, delete_cost, replace_cost});
|
||||
result = mpark::visit([insert_cost, delete_cost, replace_cost](auto&& val1, auto&& val2) {
|
||||
return levenshtein::generic_distance(val1, val2, {insert_cost, delete_cost, replace_cost});
|
||||
}, s1_view, s2_view);
|
||||
}
|
||||
|
||||
PyMem_Free(buffer_s1);
|
||||
PyMem_Free(buffer_s2);
|
||||
return PyLong_FromSize_t(result);
|
||||
}
|
||||
|
||||
|
||||
constexpr const char * normalized_weighted_distance_docstring = R"(
|
||||
Calculates a normalized levenshtein distance based on levenshtein.weighted_distance
|
||||
It uses the following costs for edit operations:
|
||||
|
@ -191,19 +177,12 @@ PyObject* normalized_weighted_distance(PyObject* /*self*/, PyObject* args, PyObj
|
|||
return NULL;
|
||||
}
|
||||
|
||||
Py_ssize_t len_s1 = PyUnicode_GET_LENGTH(py_s1);
|
||||
wchar_t* buffer_s1 = PyUnicode_AsWideCharString(py_s1, &len_s1);
|
||||
boost::wstring_view s1(buffer_s1, len_s1);
|
||||
auto s1_view = decode_python_string(py_s1);
|
||||
auto s2_view = decode_python_string(py_s2);
|
||||
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
|
||||
return levenshtein::normalized_weighted_distance(val1, val2, score_cutoff/100);
|
||||
}, s1_view, s2_view);
|
||||
|
||||
Py_ssize_t len_s2 = PyUnicode_GET_LENGTH(py_s2);
|
||||
wchar_t* buffer_s2 = PyUnicode_AsWideCharString(py_s2, &len_s2);
|
||||
boost::wstring_view s2(buffer_s2, len_s2);
|
||||
|
||||
double result = levenshtein::normalized_weighted_distance(s1, s2, score_cutoff/100);
|
||||
|
||||
PyMem_Free(buffer_s1);
|
||||
PyMem_Free(buffer_s2);
|
||||
|
||||
return PyFloat_FromDouble(result*100);
|
||||
}
|
||||
|
||||
|
|
|
@ -5,10 +5,8 @@
|
|||
#include <Python.h>
|
||||
#include <string>
|
||||
#include "utils.hpp"
|
||||
#include "string_utils.hpp"
|
||||
|
||||
namespace utils = rapidfuzz::utils;
|
||||
namespace string_utils = rapidfuzz::string_utils;
|
||||
|
||||
constexpr const char * default_process_docstring = R"()";
|
||||
|
||||
|
@ -26,11 +24,35 @@ static PyObject* default_process(PyObject* /*self*/, PyObject* args, PyObject* k
|
|||
}
|
||||
|
||||
Py_ssize_t len = PyUnicode_GET_LENGTH(py_sentence);
|
||||
wchar_t* buffer = PyUnicode_AsWideCharString(py_sentence, &len);
|
||||
std::wstring result = string_utils::default_process(std::wstring(buffer, len));
|
||||
PyMem_Free(buffer);
|
||||
void* str = PyUnicode_DATA(py_sentence);
|
||||
|
||||
int str_kind = PyUnicode_KIND(py_sentence);
|
||||
|
||||
PyObject* result;
|
||||
|
||||
switch (str_kind) {
|
||||
case PyUnicode_1BYTE_KIND:
|
||||
{
|
||||
auto proc_str = utils::default_process(nonstd::basic_string_view<uint8_t>(static_cast<uint8_t*>(str), len));
|
||||
result = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, proc_str.data(), proc_str.size());
|
||||
break;
|
||||
}
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
{
|
||||
auto proc_str = utils::default_process(nonstd::basic_string_view<uint16_t>(static_cast<uint16_t*>(str), len));
|
||||
result = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, proc_str.data(), proc_str.size());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
auto proc_str = utils::default_process(nonstd::basic_string_view<uint32_t>(static_cast<uint32_t*>(str), len));
|
||||
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, proc_str.data(), proc_str.size());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
return PyUnicode_FromWideChar(result.c_str(), result.length());
|
||||
}
|
||||
|
||||
/* The cast of the function is necessary since PyCFunction values
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
/* SPDX-License-Identifier: MIT */
|
||||
/* Copyright © 2020 Max Bachmann */
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include <nonstd/string_view.hpp>
|
||||
#include <variant/variant.hpp>
|
||||
|
||||
|
||||
using python_string_view = mpark::variant<
|
||||
nonstd::basic_string_view<uint8_t>,
|
||||
nonstd::basic_string_view<uint16_t>,
|
||||
nonstd::basic_string_view<uint32_t>
|
||||
>;
|
||||
|
||||
python_string_view decode_python_string(PyObject* py_str) {
|
||||
Py_ssize_t len = PyUnicode_GET_LENGTH(py_str);
|
||||
void* str = PyUnicode_DATA(py_str);
|
||||
|
||||
int str_kind = PyUnicode_KIND(py_str);
|
||||
|
||||
switch (str_kind) {
|
||||
case PyUnicode_1BYTE_KIND:
|
||||
return nonstd::basic_string_view<uint8_t>(static_cast<uint8_t*>(str), len);
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
return nonstd::basic_string_view<uint16_t>(static_cast<uint16_t*>(str), len);
|
||||
default:
|
||||
return nonstd::basic_string_view<uint32_t>(static_cast<uint32_t*>(str), len);
|
||||
}
|
||||
}
|
|
@ -1 +1 @@
|
|||
Subproject commit 8f4528ea9427c5222c866152c64d2046d080226a
|
||||
Subproject commit 43f16b2dc50bc98aa40deb6689246e388f97a254
|
Loading…
Reference in New Issue