properly link to subprojects
This commit is contained in:
parent
143b394566
commit
dd26483b5f
|
@ -11,11 +11,16 @@ project(rapidfuzz LANGUAGES C CXX)
|
|||
|
||||
find_package(NumPy REQUIRED)
|
||||
find_package(PythonExtensions REQUIRED)
|
||||
find_package(Threads REQUIRED)
|
||||
find_package(Python COMPONENTS Interpreter Development)
|
||||
find_package(Cython REQUIRED)
|
||||
|
||||
set(RAPIDFUZZ_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
set(RF_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
set(TF_BUILD_CUDA OFF CACHE BOOL "Enables build of CUDA code")
|
||||
set(TF_BUILD_TESTS OFF CACHE BOOL "Enables build of tests")
|
||||
set(TF_BUILD_EXAMPLES OFF CACHE BOOL "Enables build of examples")
|
||||
add_subdirectory(extern/taskflow)
|
||||
add_subdirectory(extern/rapidfuzz-cpp)
|
||||
|
||||
add_subdirectory(src/cython)
|
||||
add_subdirectory(src/cython/distance)
|
||||
|
|
|
@ -8,6 +8,7 @@ include src/rapidfuzz/py.typed
|
|||
|
||||
recursive-include src/cython CMakeLists.txt
|
||||
recursive-include src/cython *.hpp
|
||||
recursive-include src/cython *.cpp
|
||||
recursive-include src/cython *.pyx
|
||||
recursive-include src/cython *.pxd
|
||||
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 66c100ca849974efcd1e58dcefd197e1c95db20e
|
||||
Subproject commit cdedc00c0ab3d6b9c8a258dc208448d2eb3f0009
|
2
setup.py
2
setup.py
|
@ -32,5 +32,5 @@ setup(
|
|||
include_package_data=True,
|
||||
python_requires=">=3.6",
|
||||
|
||||
cmake_args=[f'-DRAPIDFUZZ_CAPI_PATH:STRING={rapidfuzz_capi.get_include()}']
|
||||
cmake_args=[f'-DRF_CAPI_PATH:STRING={rapidfuzz_capi.get_include()}']
|
||||
)
|
||||
|
|
|
@ -1,37 +1,27 @@
|
|||
# should use target_include_directories once this is supported by scikit-build
|
||||
include_directories(${RAPIDFUZZ_BASE_DIR}/src/cython)
|
||||
include_directories(${RF_BASE_DIR}/src/cython)
|
||||
|
||||
add_cython_target(cpp_utils CXX)
|
||||
add_library(cpp_utils MODULE
|
||||
${cpp_utils}
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/rapidfuzz/details/unicode.cpp)
|
||||
add_library(cpp_utils MODULE ${cpp_utils} ${RF_BASE_DIR}/src/cython/utils.cpp)
|
||||
target_compile_features(cpp_utils PUBLIC cxx_std_14)
|
||||
target_include_directories(cpp_utils PRIVATE
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/
|
||||
${RAPIDFUZZ_CAPI_PATH}
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython)
|
||||
target_include_directories(cpp_utils PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython)
|
||||
target_link_libraries(cpp_utils rapidfuzz::rapidfuzz)
|
||||
python_extension_module(cpp_utils)
|
||||
install(TARGETS cpp_utils LIBRARY DESTINATION src/rapidfuzz)
|
||||
|
||||
add_cython_target(cpp_fuzz CXX)
|
||||
add_library(cpp_fuzz MODULE
|
||||
${cpp_fuzz}
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/rapidfuzz/details/unicode.cpp)
|
||||
add_library(cpp_fuzz MODULE ${cpp_fuzz})
|
||||
target_compile_features(cpp_fuzz PUBLIC cxx_std_14)
|
||||
target_include_directories(cpp_fuzz PRIVATE
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/
|
||||
${RAPIDFUZZ_CAPI_PATH}
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython)
|
||||
target_include_directories(cpp_fuzz PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython)
|
||||
target_link_libraries(cpp_fuzz rapidfuzz::rapidfuzz)
|
||||
python_extension_module(cpp_fuzz)
|
||||
install(TARGETS cpp_fuzz LIBRARY DESTINATION src/rapidfuzz)
|
||||
|
||||
add_cython_target(cpp_string_metric CXX)
|
||||
add_library(cpp_string_metric MODULE ${cpp_string_metric})
|
||||
target_compile_features(cpp_string_metric PUBLIC cxx_std_14)
|
||||
target_include_directories(cpp_string_metric PRIVATE
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/
|
||||
${RAPIDFUZZ_CAPI_PATH}
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython)
|
||||
target_include_directories(cpp_string_metric PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython)
|
||||
target_link_libraries(cpp_string_metric rapidfuzz::rapidfuzz)
|
||||
python_extension_module(cpp_string_metric)
|
||||
install(TARGETS cpp_string_metric LIBRARY DESTINATION src/rapidfuzz)
|
||||
|
||||
|
@ -39,10 +29,9 @@ add_cython_target(cpp_process CXX)
|
|||
add_library(cpp_process MODULE ${cpp_process})
|
||||
target_compile_features(cpp_process PUBLIC cxx_std_14)
|
||||
target_include_directories(cpp_process PRIVATE
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/optional-lite/include
|
||||
${RAPIDFUZZ_CAPI_PATH}
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython)
|
||||
${RF_BASE_DIR}/extern/optional-lite/include
|
||||
${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython)
|
||||
target_link_libraries(cpp_process rapidfuzz::rapidfuzz)
|
||||
python_extension_module(cpp_process)
|
||||
install(TARGETS cpp_process LIBRARY DESTINATION src/rapidfuzz)
|
||||
|
||||
|
@ -50,14 +39,8 @@ add_cython_target(cpp_process_cdist CXX)
|
|||
add_library(cpp_process_cdist MODULE ${cpp_process_cdist})
|
||||
target_compile_features(cpp_process_cdist PUBLIC cxx_std_14)
|
||||
target_include_directories(cpp_process_cdist PRIVATE
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/optional-lite/include
|
||||
${RAPIDFUZZ_CAPI_PATH}
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/taskflow/
|
||||
${NumPy_INCLUDE_DIR})
|
||||
# this fails with All uses of target_link_libraries with a target must be either all-keyword or all-plain.
|
||||
# target_link_libraries(cpp_process_cdist PRIVATE Threads::Threads)
|
||||
target_link_libraries(cpp_process_cdist Threads::Threads)
|
||||
${RF_BASE_DIR}/extern/optional-lite/include
|
||||
${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${NumPy_INCLUDE_DIR})
|
||||
target_link_libraries(cpp_process_cdist Taskflow rapidfuzz::rapidfuzz)
|
||||
python_extension_module(cpp_process_cdist)
|
||||
install(TARGETS cpp_process_cdist LIBRARY DESTINATION src/rapidfuzz)
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
#pragma once
|
||||
#include "cpp_common.hpp"
|
||||
#include <rapidfuzz/utils.hpp>
|
||||
|
||||
namespace utils = rapidfuzz::utils;
|
||||
#include "utils.hpp"
|
||||
|
||||
PyObject* default_process_impl(PyObject* sentence) {
|
||||
RF_String c_sentence = convert_string(sentence);
|
||||
|
@ -10,19 +8,19 @@ PyObject* default_process_impl(PyObject* sentence) {
|
|||
switch (c_sentence.kind) {
|
||||
case RF_UINT8:
|
||||
{
|
||||
auto proc_str = utils::default_process(
|
||||
auto proc_str = default_process(
|
||||
rapidfuzz::basic_string_view<uint8_t>(static_cast<uint8_t*>(c_sentence.data), c_sentence.length));
|
||||
return PyUnicode_FromKindAndData(PyUnicode_1BYTE_KIND, proc_str.data(), (Py_ssize_t)proc_str.size());
|
||||
}
|
||||
case RF_UINT16:
|
||||
{
|
||||
auto proc_str = utils::default_process(
|
||||
auto proc_str = default_process(
|
||||
rapidfuzz::basic_string_view<uint16_t>(static_cast<uint16_t*>(c_sentence.data), c_sentence.length));
|
||||
return PyUnicode_FromKindAndData(PyUnicode_2BYTE_KIND, proc_str.data(), (Py_ssize_t)proc_str.size());
|
||||
}
|
||||
case RF_UINT32:
|
||||
{
|
||||
auto proc_str = utils::default_process(
|
||||
auto proc_str = default_process(
|
||||
rapidfuzz::basic_string_view<uint32_t>(static_cast<uint32_t*>(c_sentence.data), c_sentence.length));
|
||||
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, proc_str.data(), (Py_ssize_t)proc_str.size());
|
||||
}
|
||||
|
@ -50,7 +48,7 @@ RF_String default_process_func_impl(RF_String sentence) {
|
|||
sentence.dtor = default_string_deinit;
|
||||
sentence.data = str;
|
||||
sentence.kind = sentence.kind;
|
||||
sentence.length = utils::default_process(str, sentence.length);
|
||||
sentence.length = default_process(str, sentence.length);
|
||||
|
||||
return sentence;
|
||||
}
|
||||
|
|
|
@ -1,68 +1,50 @@
|
|||
# should use target_include_directories once this is supported by scikit-build
|
||||
include_directories(${RAPIDFUZZ_BASE_DIR}/src/cython)
|
||||
include_directories(${RF_BASE_DIR}/src/cython)
|
||||
|
||||
add_cython_target(_initialize CXX)
|
||||
add_library(_initialize MODULE ${_initialize})
|
||||
target_compile_features(_initialize PUBLIC cxx_std_14)
|
||||
target_include_directories(_initialize PRIVATE
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/
|
||||
${RAPIDFUZZ_CAPI_PATH}
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython/distance)
|
||||
target_include_directories(_initialize PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)
|
||||
target_link_libraries(_initialize rapidfuzz::rapidfuzz)
|
||||
python_extension_module(_initialize)
|
||||
install(TARGETS _initialize LIBRARY DESTINATION src/rapidfuzz/distance)
|
||||
|
||||
add_cython_target(Hamming CXX)
|
||||
add_library(Hamming MODULE ${Hamming})
|
||||
target_compile_features(Hamming PUBLIC cxx_std_14)
|
||||
target_include_directories(Hamming PRIVATE
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/
|
||||
${RAPIDFUZZ_CAPI_PATH}
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython/distance)
|
||||
target_include_directories(Hamming PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)
|
||||
target_link_libraries(Hamming rapidfuzz::rapidfuzz)
|
||||
python_extension_module(Hamming)
|
||||
install(TARGETS Hamming LIBRARY DESTINATION src/rapidfuzz/distance)
|
||||
|
||||
add_cython_target(Levenshtein CXX)
|
||||
add_library(Levenshtein MODULE ${Levenshtein})
|
||||
target_compile_features(Levenshtein PUBLIC cxx_std_14)
|
||||
target_include_directories(Levenshtein PRIVATE
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/
|
||||
${RAPIDFUZZ_CAPI_PATH}
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython/distance)
|
||||
target_include_directories(Levenshtein PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)
|
||||
target_link_libraries(Levenshtein rapidfuzz::rapidfuzz)
|
||||
python_extension_module(Levenshtein)
|
||||
install(TARGETS Levenshtein LIBRARY DESTINATION src/rapidfuzz/distance)
|
||||
|
||||
add_cython_target(Indel CXX)
|
||||
add_library(Indel MODULE ${Indel})
|
||||
target_compile_features(Indel PUBLIC cxx_std_14)
|
||||
target_include_directories(Indel PRIVATE
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/
|
||||
${RAPIDFUZZ_CAPI_PATH}
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython/distance)
|
||||
target_include_directories(Indel PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)
|
||||
target_link_libraries(Indel rapidfuzz::rapidfuzz)
|
||||
python_extension_module(Indel)
|
||||
install(TARGETS Indel LIBRARY DESTINATION src/rapidfuzz/distance)
|
||||
|
||||
add_cython_target(Jaro CXX)
|
||||
add_library(Jaro MODULE ${Jaro})
|
||||
target_compile_features(Jaro PUBLIC cxx_std_14)
|
||||
target_include_directories(Jaro PRIVATE
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/
|
||||
${RAPIDFUZZ_CAPI_PATH}
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython/distance)
|
||||
target_include_directories(Jaro PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)
|
||||
target_link_libraries(Jaro rapidfuzz::rapidfuzz)
|
||||
python_extension_module(Jaro)
|
||||
install(TARGETS Jaro LIBRARY DESTINATION src/rapidfuzz/distance)
|
||||
|
||||
add_cython_target(JaroWinkler CXX)
|
||||
add_library(JaroWinkler MODULE ${JaroWinkler})
|
||||
target_compile_features(JaroWinkler PUBLIC cxx_std_14)
|
||||
target_include_directories(JaroWinkler PRIVATE
|
||||
${RAPIDFUZZ_BASE_DIR}/extern/rapidfuzz-cpp/
|
||||
${RAPIDFUZZ_CAPI_PATH}
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython
|
||||
${RAPIDFUZZ_BASE_DIR}/src/cython/distance)
|
||||
target_include_directories(JaroWinkler PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)
|
||||
target_link_libraries(JaroWinkler rapidfuzz::rapidfuzz)
|
||||
python_extension_module(JaroWinkler)
|
||||
install(TARGETS JaroWinkler LIBRARY DESTINATION src/rapidfuzz/distance)
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,85 @@
|
|||
#include "unicode.hpp"
|
||||
|
||||
namespace rapidfuzz {
|
||||
namespace Unicode {
|
||||
|
||||
#define ALPHA_MASK 0x01
|
||||
#define DECIMAL_MASK 0x02
|
||||
#define DIGIT_MASK 0x04
|
||||
#define LOWER_MASK 0x08
|
||||
#define LINEBREAK_MASK 0x10
|
||||
#define SPACE_MASK 0x20
|
||||
#define TITLE_MASK 0x40
|
||||
#define UPPER_MASK 0x80
|
||||
#define XID_START_MASK 0x100
|
||||
#define XID_CONTINUE_MASK 0x200
|
||||
#define PRINTABLE_MASK 0x400
|
||||
#define NUMERIC_MASK 0x800
|
||||
#define CASE_IGNORABLE_MASK 0x1000
|
||||
#define CASED_MASK 0x2000
|
||||
#define EXTENDED_CASE_MASK 0x4000
|
||||
|
||||
constexpr static bool is_alnum(const unsigned short flags) {
|
||||
return ((flags & ALPHA_MASK)
|
||||
|| (flags & DECIMAL_MASK)
|
||||
|| (flags & DIGIT_MASK)
|
||||
|| (flags & NUMERIC_MASK));
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
/*
|
||||
These are either deltas to the character or offsets in
|
||||
_PyUnicode_ExtendedCase.
|
||||
*/
|
||||
const int upper;
|
||||
const int lower;
|
||||
const int title;
|
||||
/* Note if more flag space is needed, decimal and digit could be unified. */
|
||||
const unsigned char decimal;
|
||||
const unsigned char digit;
|
||||
const unsigned short flags;
|
||||
} _PyUnicode_TypeRecord;
|
||||
|
||||
#include "unicodetype_db.h"
|
||||
|
||||
static inline const _PyUnicode_TypeRecord * gettyperecord(uint32_t code)
|
||||
{
|
||||
unsigned int index;
|
||||
if (code >= 0x110000)
|
||||
index = 0;
|
||||
else
|
||||
{
|
||||
index = index1[(code>>SHIFT)];
|
||||
index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
|
||||
}
|
||||
|
||||
return &_PyUnicode_TypeRecords[index];
|
||||
}
|
||||
|
||||
uint32_t UnicodeDefaultProcess(uint32_t ch)
|
||||
{
|
||||
/* todo capital sigma not handled
|
||||
* see Python implementation
|
||||
*/
|
||||
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
|
||||
|
||||
/* non alphanumeric characyers are replaces with whitespaces */
|
||||
if (!is_alnum(ctype->flags)) {
|
||||
return ' ';
|
||||
}
|
||||
|
||||
if (ctype->flags & EXTENDED_CASE_MASK) {
|
||||
int index = ctype->lower & 0xFFFF;
|
||||
/*int n = ctype->lower >> 24;
|
||||
int i;
|
||||
for (i = 0; i < n; i++)
|
||||
res[i] = _PyUnicode_ExtendedCase[index + i];*/
|
||||
/* for now ignore extended cases. The only exisiting
|
||||
* on is U+0130 anyways */
|
||||
return _PyUnicode_ExtendedCase[index];
|
||||
}
|
||||
return ch + static_cast<uint32_t>(ctype->lower);
|
||||
}
|
||||
|
||||
} // namespace Unicode
|
||||
} // namespace rapidfuzz
|
|
@ -0,0 +1,98 @@
|
|||
/* SPDX-License-Identifier: MIT */
|
||||
/* Copyright © 2020-present Max Bachmann */
|
||||
|
||||
#pragma once
|
||||
#include <cstddef>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cctype>
|
||||
#include <cwctype>
|
||||
#include <limits>
|
||||
|
||||
#include <rapidfuzz/details/common.hpp>
|
||||
|
||||
uint32_t UnicodeDefaultProcess(uint32_t ch);
|
||||
|
||||
/**
|
||||
* @brief removes any non alphanumeric characters, trim whitespaces from
|
||||
* beginning/end and lowercase the string. Currently this only supports
|
||||
* Ascii. Characters outside of the ascii spec are not changed. This
|
||||
* will be changed in the future to support full unicode. In case this has
|
||||
* has a noticable effect on the performance an additional `ascii_default_process`
|
||||
* function will be provided, that keeps this behaviour
|
||||
*
|
||||
* @tparam CharT char type of the string
|
||||
*
|
||||
* @param s string to process
|
||||
*
|
||||
* @return returns the processed string
|
||||
*/
|
||||
template <typename CharT>
|
||||
size_t default_process(CharT* str, size_t len)
|
||||
{
|
||||
/* mapping converting
|
||||
* - non alphanumeric characters to whitespace (32)
|
||||
* - alphanumeric characters to lowercase
|
||||
*
|
||||
* generated using
|
||||
* `[ord(chr(x).lower()) if chr(x).isalnum() else 0x20 for x in range(256)]`
|
||||
* in Python3.9
|
||||
*/
|
||||
static const int extended_ascii_mapping[256] = {
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 48, 49, 50, 51, 52, 53,
|
||||
54, 55, 56, 57, 32, 32, 32, 32, 32, 32, 32, 97, 98, 99, 100, 101, 102, 103,
|
||||
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121,
|
||||
122, 32, 32, 32, 32, 32, 32, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107,
|
||||
108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 170, 32, 32, 32, 32, 32, 32, 32, 178, 179,
|
||||
32, 181, 32, 32, 32, 185, 186, 32, 188, 189, 190, 32, 224, 225, 226, 227, 228, 229,
|
||||
230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 32,
|
||||
248, 249, 250, 251, 252, 253, 254, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233,
|
||||
234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 32, 248, 249, 250, 251,
|
||||
252, 253, 254, 255};
|
||||
|
||||
std::transform(str, str + len, str, [](CharT ch) {
|
||||
/* irrelevant cases for a given char type are removed at compile time by any decent compiler
|
||||
*/
|
||||
if (ch < 0 || ch > std::numeric_limits<uint32_t>::max()) {
|
||||
return ch;
|
||||
}
|
||||
else if (ch < 256) {
|
||||
return static_cast<CharT>(extended_ascii_mapping[ch]);
|
||||
}
|
||||
else {
|
||||
return static_cast<CharT>(UnicodeDefaultProcess(static_cast<uint32_t>(ch)));
|
||||
}
|
||||
});
|
||||
|
||||
while (len > 0 && str[len - 1] == ' ') {
|
||||
len--;
|
||||
}
|
||||
|
||||
size_t prefix = 0;
|
||||
while (len > 0 && str[prefix] == ' ') {
|
||||
len--;
|
||||
prefix++;
|
||||
}
|
||||
|
||||
if (prefix != 0) {
|
||||
std::copy(str + prefix, str + prefix + len, str);
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
std::basic_string<CharT> default_process(rapidfuzz::basic_string_view<CharT> s)
|
||||
{
|
||||
std::basic_string<CharT> str(s);
|
||||
|
||||
size_t len = default_process(&str[0], str.size());
|
||||
str.resize(len);
|
||||
return str;
|
||||
}
|
Loading…
Reference in New Issue