reduce string copies and tarball size

This commit is contained in:
maxbachmann 2020-05-22 13:27:05 +02:00
parent f0f8247d02
commit 15c6dbb6fb
No known key found for this signature in database
GPG Key ID: 60334E83C23820B8
9 changed files with 3112 additions and 84 deletions

View File

@ -5,4 +5,6 @@ include LICENSE
recursive-include src/rapidfuzz-cpp/src *.hpp *.txx
recursive-include src/rapidfuzz-cpp/extern/boost *
recursive-include src/rapidfuzz-cpp/extern/difflib *
recursive-include src/rapidfuzz-cpp/extern/nonstd *
recursive-include extern/variant *
include src/rapidfuzz-cpp/LICENSE

7
extern/variant/LICENSE.md vendored Normal file
View File

@ -0,0 +1,7 @@
Boost Software License - Version 1.0 - August 17th, 2003
Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following:
The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

2760
extern/variant/variant.hpp vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -53,19 +53,19 @@ setup(
Extension(
'rapidfuzz.levenshtein',
['src/py_levenshtein.cpp'],
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern"],
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern", "extern"],
language='c++',
),
Extension(
'rapidfuzz.fuzz',
['src/py_fuzz.cpp'],
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern"],
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern", "extern"],
language='c++',
),
Extension(
'rapidfuzz.utils',
['src/py_utils.cpp'],
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern"],
include_dirs=["src/rapidfuzz-cpp/src", "src/rapidfuzz-cpp/extern", "extern"],
language='c++',
),
],

View File

@ -6,12 +6,13 @@
#include <Python.h>
#include <string>
#include "fuzz.hpp"
#include "string_utils.hpp"
#include <boost/utility/string_view.hpp>
#include "utils.hpp"
#include "py_utils.hpp"
#include <nonstd/string_view.hpp>
#include <boost/optional.hpp>
namespace fuzz = rapidfuzz::fuzz;
namespace string_utils = rapidfuzz::string_utils;
namespace utils = rapidfuzz::utils;
boost::optional<std::pair<wchar_t*, Py_ssize_t>> PyString_AsBuffer(PyObject* str, PyObject *processor) {
PyObject *proc_str = PyObject_CallFunctionObjArgs(processor, str, NULL);
@ -69,17 +70,16 @@ static PyObject* fuzz_impl(T&& scorer, bool processor_default, PyObject* args, P
return NULL;
}
auto result = scorer(
boost::wstring_view(s1->first, s1->second),
boost::wstring_view(s2->first, s2->second),
nonstd::wstring_view(s1->first, s1->second),
nonstd::wstring_view(s2->first, s2->second),
score_cutoff);
PyMem_Free(s1->first);
PyMem_Free(s2->first);
return PyFloat_FromDouble(result);
}
auto s1 = PyString_AsBuffer(py_s1);
if(!s1) {
return NULL;
@ -94,16 +94,16 @@ static PyObject* fuzz_impl(T&& scorer, bool processor_default, PyObject* args, P
if (use_preprocessing(processor, processor_default)) {
result = scorer(
string_utils::default_process(std::wstring(s1->first, s1->second)),
string_utils::default_process(std::wstring(s2->first, s2->second)),
utils::default_process(std::wstring(s1->first, s1->second)),
utils::default_process(std::wstring(s2->first, s2->second)),
score_cutoff);
} else {
result = scorer(
boost::wstring_view(s1->first, s1->second),
boost::wstring_view(s2->first, s2->second),
nonstd::wstring_view(s1->first, s1->second),
nonstd::wstring_view(s2->first, s2->second),
score_cutoff);
}
PyMem_Free(s1->first);
PyMem_Free(s2->first);
@ -130,7 +130,64 @@ PyDoc_STRVAR(ratio_docstring,
);
static PyObject* ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return fuzz_impl(fuzz::ratio<boost::wstring_view, boost::wstring_view>, false, args, keywds);
PyObject *py_s1;
PyObject *py_s2;
PyObject *processor = NULL;
double score_cutoff = 0;
static const char *kwlist[] = {"s1", "s2", "processor", "score_cutoff", NULL};
if (!PyArg_ParseTupleAndKeywords(args, keywds, "UU|Od", const_cast<char **>(kwlist),
&py_s1, &py_s2, &processor, &score_cutoff)) {
return NULL;
}
if (PyUnicode_READY(py_s1) || PyUnicode_READY(py_s2)) {
return NULL;
}
if (PyCallable_Check(processor)) {
PyObject *proc_s1 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
if (proc_s1 == NULL) {
return NULL;
}
PyObject *proc_s2 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
if (proc_s2 == NULL) {
Py_DecRef(proc_s1);
return NULL;
}
auto s1_view = decode_python_string(proc_s1);
auto s2_view = decode_python_string(proc_s2);
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::ratio(val1, val2, score_cutoff);
}, s1_view, s2_view);
Py_DecRef(proc_s1);
Py_DecRef(proc_s2);
return PyFloat_FromDouble(result);
}
auto s1_view = decode_python_string(py_s1);
auto s2_view = decode_python_string(py_s2);
double result;
if (use_preprocessing(processor, false)) {
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::ratio(
utils::default_process(val1),
utils::default_process(val2),
score_cutoff);
}, s1_view, s2_view);
} else {
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::ratio(val1, val2, score_cutoff);
}, s1_view, s2_view);
}
return PyFloat_FromDouble(result);
}
@ -153,7 +210,7 @@ PyDoc_STRVAR(partial_ratio_docstring,
);
static PyObject* partial_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return fuzz_impl(fuzz::partial_ratio<boost::wstring_view, boost::wstring_view>, false, args, keywds);
return fuzz_impl(fuzz::partial_ratio<nonstd::wstring_view, nonstd::wstring_view>, false, args, keywds);
}
PyDoc_STRVAR(token_sort_ratio_docstring,
@ -175,7 +232,64 @@ PyDoc_STRVAR(token_sort_ratio_docstring,
);
static PyObject* token_sort_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return fuzz_impl(fuzz::token_sort_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
PyObject *py_s1;
PyObject *py_s2;
PyObject *processor = NULL;
double score_cutoff = 0;
static const char *kwlist[] = {"s1", "s2", "processor", "score_cutoff", NULL};
if (!PyArg_ParseTupleAndKeywords(args, keywds, "UU|Od", const_cast<char **>(kwlist),
&py_s1, &py_s2, &processor, &score_cutoff)) {
return NULL;
}
if (PyUnicode_READY(py_s1) || PyUnicode_READY(py_s2)) {
return NULL;
}
if (PyCallable_Check(processor)) {
PyObject *proc_s1 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
if (proc_s1 == NULL) {
return NULL;
}
PyObject *proc_s2 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
if (proc_s2 == NULL) {
Py_DecRef(proc_s1);
return NULL;
}
auto s1_view = decode_python_string(proc_s1);
auto s2_view = decode_python_string(proc_s2);
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::token_sort_ratio(val1, val2, score_cutoff);
}, s1_view, s2_view);
Py_DecRef(proc_s1);
Py_DecRef(proc_s2);
return PyFloat_FromDouble(result);
}
auto s1_view = decode_python_string(py_s1);
auto s2_view = decode_python_string(py_s2);
double result;
if (use_preprocessing(processor, true)) {
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::token_sort_ratio(
utils::default_process(val1),
utils::default_process(val2),
score_cutoff);
}, s1_view, s2_view);
} else {
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::token_sort_ratio(val1, val2, score_cutoff);
}, s1_view, s2_view);
}
return PyFloat_FromDouble(result);
}
PyDoc_STRVAR(partial_token_sort_ratio_docstring,
@ -194,7 +308,7 @@ PyDoc_STRVAR(partial_token_sort_ratio_docstring,
);
static PyObject* partial_token_sort_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return fuzz_impl(fuzz::partial_token_sort_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
return fuzz_impl(fuzz::partial_token_sort_ratio<nonstd::wstring_view, nonstd::wstring_view>, true, args, keywds);
}
PyDoc_STRVAR(token_set_ratio_docstring,
@ -218,7 +332,64 @@ PyDoc_STRVAR(token_set_ratio_docstring,
);
static PyObject* token_set_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return fuzz_impl(fuzz::token_set_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
PyObject *py_s1;
PyObject *py_s2;
PyObject *processor = NULL;
double score_cutoff = 0;
static const char *kwlist[] = {"s1", "s2", "processor", "score_cutoff", NULL};
if (!PyArg_ParseTupleAndKeywords(args, keywds, "UU|Od", const_cast<char **>(kwlist),
&py_s1, &py_s2, &processor, &score_cutoff)) {
return NULL;
}
if (PyUnicode_READY(py_s1) || PyUnicode_READY(py_s2)) {
return NULL;
}
if (PyCallable_Check(processor)) {
PyObject *proc_s1 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
if (proc_s1 == NULL) {
return NULL;
}
PyObject *proc_s2 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
if (proc_s2 == NULL) {
Py_DecRef(proc_s1);
return NULL;
}
auto s1_view = decode_python_string(proc_s1);
auto s2_view = decode_python_string(proc_s2);
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::token_set_ratio(val1, val2, score_cutoff);
}, s1_view, s2_view);
Py_DecRef(proc_s1);
Py_DecRef(proc_s2);
return PyFloat_FromDouble(result);
}
auto s1_view = decode_python_string(py_s1);
auto s2_view = decode_python_string(py_s2);
double result;
if (use_preprocessing(processor, true)) {
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::token_set_ratio(
utils::default_process(val1),
utils::default_process(val2),
score_cutoff);
}, s1_view, s2_view);
} else {
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::token_set_ratio(val1, val2, score_cutoff);
}, s1_view, s2_view);
}
return PyFloat_FromDouble(result);
}
PyDoc_STRVAR(partial_token_set_ratio_docstring,
@ -238,7 +409,7 @@ PyDoc_STRVAR(partial_token_set_ratio_docstring,
static PyObject* partial_token_set_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return fuzz_impl(fuzz::partial_token_set_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
return fuzz_impl(fuzz::partial_token_set_ratio<nonstd::wstring_view, nonstd::wstring_view>, true, args, keywds);
}
PyDoc_STRVAR(token_ratio_docstring,
@ -258,7 +429,64 @@ PyDoc_STRVAR(token_ratio_docstring,
);
static PyObject* token_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return fuzz_impl(fuzz::token_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
PyObject *py_s1;
PyObject *py_s2;
PyObject *processor = NULL;
double score_cutoff = 0;
static const char *kwlist[] = {"s1", "s2", "processor", "score_cutoff", NULL};
if (!PyArg_ParseTupleAndKeywords(args, keywds, "UU|Od", const_cast<char **>(kwlist),
&py_s1, &py_s2, &processor, &score_cutoff)) {
return NULL;
}
if (PyUnicode_READY(py_s1) || PyUnicode_READY(py_s2)) {
return NULL;
}
if (PyCallable_Check(processor)) {
PyObject *proc_s1 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
if (proc_s1 == NULL) {
return NULL;
}
PyObject *proc_s2 = PyObject_CallFunctionObjArgs(processor, py_s2, NULL);
if (proc_s2 == NULL) {
Py_DecRef(proc_s1);
return NULL;
}
auto s1_view = decode_python_string(proc_s1);
auto s2_view = decode_python_string(proc_s2);
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::token_ratio(val1, val2, score_cutoff);
}, s1_view, s2_view);
Py_DecRef(proc_s1);
Py_DecRef(proc_s2);
return PyFloat_FromDouble(result);
}
auto s1_view = decode_python_string(py_s1);
auto s2_view = decode_python_string(py_s2);
double result;
if (use_preprocessing(processor, true)) {
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::token_ratio(
utils::default_process(val1),
utils::default_process(val2),
score_cutoff);
}, s1_view, s2_view);
} else {
result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return fuzz::token_ratio(val1, val2, score_cutoff);
}, s1_view, s2_view);
}
return PyFloat_FromDouble(result);
}
PyDoc_STRVAR(partial_token_ratio_docstring,
@ -278,7 +506,7 @@ PyDoc_STRVAR(partial_token_ratio_docstring,
);
static PyObject* partial_token_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return fuzz_impl(fuzz::partial_token_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
return fuzz_impl(fuzz::partial_token_ratio<nonstd::wstring_view, nonstd::wstring_view>, true, args, keywds);
}
PyDoc_STRVAR(WRatio_docstring,
@ -297,7 +525,7 @@ PyDoc_STRVAR(WRatio_docstring,
);
static PyObject* WRatio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return fuzz_impl(fuzz::WRatio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
return fuzz_impl(fuzz::WRatio<nonstd::wstring_view, nonstd::wstring_view>, true, args, keywds);
}
PyDoc_STRVAR(QRatio_docstring,
@ -319,7 +547,7 @@ PyDoc_STRVAR(QRatio_docstring,
);
static PyObject* QRatio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return fuzz_impl(fuzz::ratio<boost::wstring_view, boost::wstring_view>, false, args, keywds);
return fuzz_impl(fuzz::ratio<nonstd::wstring_view, nonstd::wstring_view>, false, args, keywds);
}
PyDoc_STRVAR(quick_lev_ratio_docstring,
@ -340,7 +568,7 @@ PyDoc_STRVAR(quick_lev_ratio_docstring,
);
static PyObject* quick_lev_ratio(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return fuzz_impl(fuzz::quick_lev_ratio<boost::wstring_view, boost::wstring_view>, true, args, keywds);
return fuzz_impl(fuzz::quick_lev_ratio<nonstd::wstring_view, nonstd::wstring_view>, true, args, keywds);
}
/* The cast of the function is necessary since PyCFunction values

View File

@ -3,8 +3,8 @@
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <string>
#include "levenshtein.hpp"
#include "py_utils.hpp"
namespace levenshtein = rapidfuzz::levenshtein;
@ -34,18 +34,11 @@ PyObject* distance(PyObject* /*self*/, PyObject* args, PyObject* keywds) {
return NULL;
}
Py_ssize_t len_s1 = PyUnicode_GET_LENGTH(py_s1);
wchar_t* buffer_s1 = PyUnicode_AsWideCharString(py_s1, &len_s1);
boost::wstring_view s1(buffer_s1, len_s1);
Py_ssize_t len_s2 = PyUnicode_GET_LENGTH(py_s2);
wchar_t* buffer_s2 = PyUnicode_AsWideCharString(py_s2, &len_s2);
boost::wstring_view s2(buffer_s2, len_s2);
std::size_t result = levenshtein::distance(s1, s2);
PyMem_Free(buffer_s1);
PyMem_Free(buffer_s2);
auto s1_view = decode_python_string(py_s1);
auto s2_view = decode_python_string(py_s2);
std::size_t result = mpark::visit([](auto&& val1, auto&& val2) {
return levenshtein::distance(val1, val2);
}, s1_view, s2_view);
return PyLong_FromSize_t(result);
}
@ -79,18 +72,11 @@ PyObject* normalized_distance(PyObject* /*self*/, PyObject* args, PyObject* keyw
return NULL;
}
Py_ssize_t len_s1 = PyUnicode_GET_LENGTH(py_s1);
wchar_t* buffer_s1 = PyUnicode_AsWideCharString(py_s1, &len_s1);
boost::wstring_view s1(buffer_s1, len_s1);
Py_ssize_t len_s2 = PyUnicode_GET_LENGTH(py_s2);
wchar_t* buffer_s2 = PyUnicode_AsWideCharString(py_s2, &len_s2);
boost::wstring_view s2(buffer_s2, len_s2);
double result = levenshtein::normalized_distance(s1, s2, score_cutoff/100);
PyMem_Free(buffer_s1);
PyMem_Free(buffer_s2);
auto s1_view = decode_python_string(py_s1);
auto s2_view = decode_python_string(py_s2);
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return levenshtein::normalized_distance(val1, val2, score_cutoff/100);
}, s1_view, s2_view);
return PyFloat_FromDouble(result*100);
}
@ -129,33 +115,33 @@ PyObject* weighted_distance(PyObject* /*self*/, PyObject* args, PyObject* keywds
return NULL;
}
Py_ssize_t len_s1 = PyUnicode_GET_LENGTH(py_s1);
wchar_t* buffer_s1 = PyUnicode_AsWideCharString(py_s1, &len_s1);
boost::wstring_view s1(buffer_s1, len_s1);
Py_ssize_t len_s2 = PyUnicode_GET_LENGTH(py_s2);
wchar_t* buffer_s2 = PyUnicode_AsWideCharString(py_s2, &len_s2);
boost::wstring_view s2(buffer_s2, len_s2);
auto s1_view = decode_python_string(py_s1);
auto s2_view = decode_python_string(py_s2);
std::size_t result = 0;
if (insert_cost == 1 && delete_cost == 1) {
if (replace_cost == 1) {
result = levenshtein::distance(s1, s2);
result = mpark::visit([](auto&& val1, auto&& val2) {
return levenshtein::distance(val1, val2);
}, s1_view, s2_view);
} else if (replace_cost == 2) {
result = levenshtein::weighted_distance(s1, s2);
result = mpark::visit([](auto&& val1, auto&& val2) {
return levenshtein::weighted_distance(val1, val2);
}, s1_view, s2_view);
} else {
result = levenshtein::generic_distance(s1, s2, {insert_cost, delete_cost, replace_cost});
result = mpark::visit([insert_cost, delete_cost, replace_cost](auto&& val1, auto&& val2) {
return levenshtein::generic_distance(val1, val2, {insert_cost, delete_cost, replace_cost});
}, s1_view, s2_view);
}
} else {
result = levenshtein::generic_distance(s1, s2, {insert_cost, delete_cost, replace_cost});
result = mpark::visit([insert_cost, delete_cost, replace_cost](auto&& val1, auto&& val2) {
return levenshtein::generic_distance(val1, val2, {insert_cost, delete_cost, replace_cost});
}, s1_view, s2_view);
}
PyMem_Free(buffer_s1);
PyMem_Free(buffer_s2);
return PyLong_FromSize_t(result);
}
constexpr const char * normalized_weighted_distance_docstring = R"(
Calculates a normalized levenshtein distance based on levenshtein.weighted_distance
It uses the following costs for edit operations:
@ -191,19 +177,12 @@ PyObject* normalized_weighted_distance(PyObject* /*self*/, PyObject* args, PyObj
return NULL;
}
Py_ssize_t len_s1 = PyUnicode_GET_LENGTH(py_s1);
wchar_t* buffer_s1 = PyUnicode_AsWideCharString(py_s1, &len_s1);
boost::wstring_view s1(buffer_s1, len_s1);
auto s1_view = decode_python_string(py_s1);
auto s2_view = decode_python_string(py_s2);
double result = mpark::visit([score_cutoff](auto&& val1, auto&& val2) {
return levenshtein::normalized_weighted_distance(val1, val2, score_cutoff/100);
}, s1_view, s2_view);
Py_ssize_t len_s2 = PyUnicode_GET_LENGTH(py_s2);
wchar_t* buffer_s2 = PyUnicode_AsWideCharString(py_s2, &len_s2);
boost::wstring_view s2(buffer_s2, len_s2);
double result = levenshtein::normalized_weighted_distance(s1, s2, score_cutoff/100);
PyMem_Free(buffer_s1);
PyMem_Free(buffer_s2);
return PyFloat_FromDouble(result*100);
}

View File

@ -5,10 +5,8 @@
#include <Python.h>
#include <string>
#include "utils.hpp"
#include "string_utils.hpp"
namespace utils = rapidfuzz::utils;
namespace string_utils = rapidfuzz::string_utils;
constexpr const char * default_process_docstring = R"()";
@ -26,11 +24,35 @@ static PyObject* default_process(PyObject* /*self*/, PyObject* args, PyObject* k
}
Py_ssize_t len = PyUnicode_GET_LENGTH(py_sentence);
wchar_t* buffer = PyUnicode_AsWideCharString(py_sentence, &len);
std::wstring result = string_utils::default_process(std::wstring(buffer, len));
PyMem_Free(buffer);
void* str = PyUnicode_DATA(py_sentence);
int str_kind = PyUnicode_KIND(py_sentence);
PyObject* result;
switch (str_kind) {
case PyUnicode_1BYTE_KIND:
{
auto proc_str = utils::default_process(nonstd::basic_string_view<uint8_t>(static_cast<uint8_t*>(str), len));
result = PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, proc_str.data(), proc_str.size());
break;
}
case PyUnicode_2BYTE_KIND:
{
auto proc_str = utils::default_process(nonstd::basic_string_view<uint16_t>(static_cast<uint16_t*>(str), len));
result = PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, proc_str.data(), proc_str.size());
break;
}
default:
{
auto proc_str = utils::default_process(nonstd::basic_string_view<uint32_t>(static_cast<uint32_t*>(str), len));
result = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, proc_str.data(), proc_str.size());
break;
}
}
return result;
return PyUnicode_FromWideChar(result.c_str(), result.length());
}
/* The cast of the function is necessary since PyCFunction values

30
src/py_utils.hpp Normal file
View File

@ -0,0 +1,30 @@
/* SPDX-License-Identifier: MIT */
/* Copyright © 2020 Max Bachmann */
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <nonstd/string_view.hpp>
#include <variant/variant.hpp>
using python_string_view = mpark::variant<
nonstd::basic_string_view<uint8_t>,
nonstd::basic_string_view<uint16_t>,
nonstd::basic_string_view<uint32_t>
>;
python_string_view decode_python_string(PyObject* py_str) {
Py_ssize_t len = PyUnicode_GET_LENGTH(py_str);
void* str = PyUnicode_DATA(py_str);
int str_kind = PyUnicode_KIND(py_str);
switch (str_kind) {
case PyUnicode_1BYTE_KIND:
return nonstd::basic_string_view<uint8_t>(static_cast<uint8_t*>(str), len);
case PyUnicode_2BYTE_KIND:
return nonstd::basic_string_view<uint16_t>(static_cast<uint16_t*>(str), len);
default:
return nonstd::basic_string_view<uint32_t>(static_cast<uint32_t*>(str), len);
}
}

@ -1 +1 @@
Subproject commit 8f4528ea9427c5222c866152c64d2046d080226a
Subproject commit 43f16b2dc50bc98aa40deb6689246e388f97a254