allow generating cython files (#194)

This commit is contained in:
Max Bachmann 2022-02-12 18:58:10 +01:00 committed by GitHub
parent 567141402d
commit cd8af8cad2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 112 additions and 45 deletions

View File

@ -6,9 +6,34 @@ on:
- main
jobs:
no_cython_install:
name: "Test install with generated cython files"
runs-on: "ubuntu-latest"
steps:
- uses: "actions/checkout@v2"
with:
submodules: 'true'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest hypothesis pandas mypy rapidfuzz_capi Cython==3.0.0a10
- name: Generate cython
run: |
chmod +x ./src/cython/generate.sh
./src/cython/generate.sh
- name: build
run: |
pip install build; python -m build --sdist
# test whether tarball contains all files required for compiling
pip install dist/rapidfuzz-*.tar.gz
tests:
name: "Python ${{ matrix.python-version }}"
runs-on: "ubuntu-latest"
runs-on: ${{matrix.os}}
strategy:
fail-fast: false
matrix:
@ -30,9 +55,7 @@ jobs:
- name: build
run: |
pip install build; python -m build --sdist
# test whether tarball contains all files required for compiling
pip install dist/rapidfuzz-*.tar.gz
pip install .
- name: Test type stubs
if: matrix.python-version != '3.6'

View File

@ -150,6 +150,12 @@ jobs:
python -m pip install --upgrade pip
pip install pytest hypothesis pandas mypy
# The cythonized files allow installation from the sdist without cython
- name: Generate cython
run: |
chmod +x ./src/cython/generate.sh
./src/cython/generate.sh
- name: Build sdist
run: |
pip install build

4
.gitignore vendored
View File

@ -24,6 +24,6 @@ bench_results/
# Hypothesis results
.hypothesis/
# Cython
src/*.cpp
# Cython generated files
*.cxx

View File

@ -19,7 +19,6 @@ endif()
find_package(NumPy REQUIRED)
find_package(PythonExtensions REQUIRED)
find_package(Python COMPONENTS Interpreter Development)
find_package(Cython REQUIRED)
set(RF_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -9,7 +9,7 @@ include src/rapidfuzz/**/*.pyi
include src/rapidfuzz/py.typed
recursive-include src/cython CMakeLists.txt
recursive-include src/cython *.hpp *.h *.cpp *.pyx *.pxd
recursive-include src/cython *.hpp *.h *.cpp *.cxx *.pyx *.pxd
include extern/rapidfuzz-cpp/LICENSE
include extern/rapidfuzz-cpp/CMakeLists.txt

View File

@ -1,15 +1,24 @@
# should use target_include_directories once this is supported by scikit-build
include_directories(${RF_BASE_DIR}/src/cython)
function(create_cython_target _name)
if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx)
set(${_name} ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx PARENT_SCOPE)
else()
find_package(Cython REQUIRED)
# should use target_include_directories once this is supported by scikit-build
include_directories(${RF_BASE_DIR}/src/cython)
add_cython_target(${_name} CXX)
set(${_name} ${_name} PARENT_SCOPE)
endif()
endfunction(create_cython_target)
add_cython_target(cpp_utils CXX)
add_library(cpp_utils MODULE ${cpp_utils} ${RF_BASE_DIR}/src/cython/utils.cpp)
create_cython_target(cpp_utils)
add_library(cpp_utils MODULE ${cpp_utils} ${CMAKE_CURRENT_LIST_DIR}/utils.cpp)
target_compile_features(cpp_utils PUBLIC cxx_std_14)
target_include_directories(cpp_utils PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython)
target_link_libraries(cpp_utils PRIVATE rapidfuzz::rapidfuzz)
python_extension_module(cpp_utils)
install(TARGETS cpp_utils LIBRARY DESTINATION src/rapidfuzz)
add_cython_target(cpp_fuzz CXX)
create_cython_target(cpp_fuzz)
add_library(cpp_fuzz MODULE ${cpp_fuzz})
target_compile_features(cpp_fuzz PUBLIC cxx_std_14)
target_include_directories(cpp_fuzz PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython)
@ -17,7 +26,7 @@ target_link_libraries(cpp_fuzz PRIVATE rapidfuzz::rapidfuzz)
python_extension_module(cpp_fuzz)
install(TARGETS cpp_fuzz LIBRARY DESTINATION src/rapidfuzz)
add_cython_target(cpp_string_metric CXX)
create_cython_target(cpp_string_metric)
add_library(cpp_string_metric MODULE ${cpp_string_metric})
target_compile_features(cpp_string_metric PUBLIC cxx_std_14)
target_include_directories(cpp_string_metric PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython)
@ -25,7 +34,7 @@ target_link_libraries(cpp_string_metric PRIVATE rapidfuzz::rapidfuzz jaro_winkle
python_extension_module(cpp_string_metric)
install(TARGETS cpp_string_metric LIBRARY DESTINATION src/rapidfuzz)
add_cython_target(cpp_process CXX)
create_cython_target(cpp_process)
add_library(cpp_process MODULE ${cpp_process})
target_compile_features(cpp_process PUBLIC cxx_std_14)
target_include_directories(cpp_process PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython)
@ -33,7 +42,7 @@ target_link_libraries(cpp_process PRIVATE rapidfuzz::rapidfuzz)
python_extension_module(cpp_process)
install(TARGETS cpp_process LIBRARY DESTINATION src/rapidfuzz)
add_cython_target(cpp_process_cdist CXX)
create_cython_target(cpp_process_cdist)
add_library(cpp_process_cdist MODULE ${cpp_process_cdist})
target_compile_features(cpp_process_cdist PUBLIC cxx_std_14)
target_include_directories(cpp_process_cdist PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${NumPy_INCLUDE_DIR})

View File

@ -678,10 +678,10 @@ cdef inline extract_dict_f64(query, choices, RF_Scorer* scorer, const RF_ScorerF
get_score_cutoff_f64(score_cutoff, scorer_flags))
# due to score_cutoff not always completely filled
if limit > results.size():
limit = results.size()
if limit > <int64_t>results.size():
limit = <int64_t>results.size()
if limit >= results.size():
if limit >= <int64_t>results.size():
algorithm.sort(results.begin(), results.end(), ExtractComp(scorer_flags))
else:
algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractComp(scorer_flags))
@ -706,10 +706,10 @@ cdef inline extract_dict_i64(query, choices, RF_Scorer* scorer, const RF_ScorerF
get_score_cutoff_i64(score_cutoff, scorer_flags))
# due to score_cutoff not always completely filled
if limit > results.size():
limit = results.size()
if limit > <int64_t>results.size():
limit = <int64_t>results.size()
if limit >= results.size():
if limit >= <int64_t>results.size():
algorithm.sort(results.begin(), results.end(), ExtractComp(scorer_flags))
else:
algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractComp(scorer_flags))
@ -749,10 +749,10 @@ cdef inline extract_list_f64(query, choices, RF_Scorer* scorer, const RF_ScorerF
get_score_cutoff_f64(score_cutoff, scorer_flags))
# due to score_cutoff not always completely filled
if limit > results.size():
limit = results.size()
if limit > <int64_t>results.size():
limit = <int64_t>results.size()
if limit >= results.size():
if limit >= <int64_t>results.size():
algorithm.sort(results.begin(), results.end(), ExtractComp(scorer_flags))
else:
algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractComp(scorer_flags))
@ -777,10 +777,10 @@ cdef inline extract_list_i64(query, choices, RF_Scorer* scorer, const RF_ScorerF
get_score_cutoff_i64(score_cutoff, scorer_flags))
# due to score_cutoff not always completely filled
if limit > results.size():
limit = results.size()
if limit > <int64_t>results.size():
limit = <int64_t>results.size()
if limit >= results.size():
if limit >= <int64_t>results.size():
algorithm.sort(results.begin(), results.end(), ExtractComp(scorer_flags))
else:
algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractComp(scorer_flags))

View File

@ -250,7 +250,7 @@ def cdist(queries, choices, *, scorer=ratio, processor=None, score_cutoff=None,
of the two collections of inputs.
"""
cdef RF_Scorer* scorer_context
cdef RF_Scorer* scorer_context = NULL
cdef RF_ScorerFlags scorer_flags
if processor is True:

View File

@ -1,7 +1,16 @@
# should use target_include_directories once this is supported by scikit-build
include_directories(${RF_BASE_DIR}/src/cython)
function(create_cython_target _name)
if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx)
set(${_name} ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx PARENT_SCOPE)
else()
find_package(Cython REQUIRED)
# should use target_include_directories once this is supported by scikit-build
include_directories(${RF_BASE_DIR}/src/cython)
add_cython_target(${_name} CXX)
set(${_name} ${_name} PARENT_SCOPE)
endif()
endfunction(create_cython_target)
add_cython_target(_initialize CXX)
create_cython_target(_initialize)
add_library(_initialize MODULE ${_initialize})
target_compile_features(_initialize PUBLIC cxx_std_14)
target_include_directories(_initialize PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)
@ -9,7 +18,7 @@ target_link_libraries(_initialize PRIVATE rapidfuzz::rapidfuzz jaro_winkler::jar
python_extension_module(_initialize)
install(TARGETS _initialize LIBRARY DESTINATION src/rapidfuzz/distance)
add_cython_target(Hamming CXX)
create_cython_target(Hamming)
add_library(Hamming MODULE ${Hamming})
target_compile_features(Hamming PUBLIC cxx_std_14)
target_include_directories(Hamming PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)
@ -17,7 +26,7 @@ target_link_libraries(Hamming PRIVATE rapidfuzz::rapidfuzz jaro_winkler::jaro_wi
python_extension_module(Hamming)
install(TARGETS Hamming LIBRARY DESTINATION src/rapidfuzz/distance)
add_cython_target(Levenshtein CXX)
create_cython_target(Levenshtein)
add_library(Levenshtein MODULE ${Levenshtein})
target_compile_features(Levenshtein PUBLIC cxx_std_14)
target_include_directories(Levenshtein PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)
@ -25,7 +34,7 @@ target_link_libraries(Levenshtein PRIVATE rapidfuzz::rapidfuzz jaro_winkler::jar
python_extension_module(Levenshtein)
install(TARGETS Levenshtein LIBRARY DESTINATION src/rapidfuzz/distance)
add_cython_target(Indel CXX)
create_cython_target(Indel)
add_library(Indel MODULE ${Indel})
target_compile_features(Indel PUBLIC cxx_std_14)
target_include_directories(Indel PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)
@ -33,7 +42,7 @@ target_link_libraries(Indel PRIVATE rapidfuzz::rapidfuzz jaro_winkler::jaro_wink
python_extension_module(Indel)
install(TARGETS Indel LIBRARY DESTINATION src/rapidfuzz/distance)
add_cython_target(Jaro CXX)
create_cython_target(Jaro)
add_library(Jaro MODULE ${Jaro})
target_compile_features(Jaro PUBLIC cxx_std_14)
target_include_directories(Jaro PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)
@ -41,7 +50,7 @@ target_link_libraries(Jaro PRIVATE rapidfuzz::rapidfuzz jaro_winkler::jaro_winkl
python_extension_module(Jaro)
install(TARGETS Jaro LIBRARY DESTINATION src/rapidfuzz/distance)
add_cython_target(JaroWinkler CXX)
create_cython_target(JaroWinkler)
add_library(JaroWinkler MODULE ${JaroWinkler})
target_compile_features(JaroWinkler PUBLIC cxx_std_14)
target_include_directories(JaroWinkler PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance)

View File

@ -194,7 +194,7 @@ cdef class Editop:
and other[1] == self.src_pos
and other[2] == self.dest_pos)
def __getitem__(self, i):
def __getitem__(self, Py_ssize_t i):
if i==0 or i==-3: return self.tag
if i==1 or i==-2: return self.src_pos
if i==2 or i==-1: return self.dest_pos
@ -314,14 +314,14 @@ cdef class Editops:
return self.editops.size()
def __getitem__(self, key):
cdef int index
cdef Py_ssize_t index
if isinstance(key, int):
index = key
if index < 0:
index += self.editops.size()
index += <Py_ssize_t>self.editops.size()
if index < 0 or index >= self.editops.size():
if index < 0 or index >= <Py_ssize_t>self.editops.size():
raise IndexError("Editops index out of range")
return Editop(
@ -389,7 +389,7 @@ cdef class Opcode:
and other[3] == self.dest_start
and other[4] == self.dest_end)
def __getitem__(self, i):
def __getitem__(self, Py_ssize_t i):
if i==0 or i==-5: return self.tag
if i==1 or i==-4: return self.src_start
if i==2 or i==-3: return self.src_end
@ -517,14 +517,14 @@ cdef class Opcodes:
return self.opcodes.size()
def __getitem__(self, key):
cdef int index
cdef Py_ssize_t index
if isinstance(key, int):
index = key
if index < 0:
index += self.opcodes.size()
index += <Py_ssize_t>self.opcodes.size()
if index < 0 or index >= self.opcodes.size():
if index < 0 or index >= <Py_ssize_t>self.opcodes.size():
raise IndexError("Opcodes index out of range")
return Opcode(

21
src/cython/generate.sh Executable file
View File

@ -0,0 +1,21 @@
#!/bin/sh
curdir="${0%/*}"
generate_cython()
{
python -m cython -I "$curdir" --cplus "$curdir"/"$1".pyx -o "$curdir"/"$1".cxx
echo "Generated $curdir/$1.cxx"
}
generate_cython cpp_fuzz
generate_cython cpp_process_cdist
generate_cython cpp_process
generate_cython cpp_string_metric
generate_cython cpp_utils
generate_cython distance/_initialize
generate_cython distance/Hamming
generate_cython distance/Indel
generate_cython distance/Jaro
generate_cython distance/JaroWinkler
generate_cython distance/Levenshtein