From cd8af8cad24aa4b212336eb30d6479432bb3f42a Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Sat, 12 Feb 2022 18:58:10 +0100 Subject: [PATCH] allow generating cython files (#194) --- .github/workflows/branchbuild.yml | 31 +++++++++++++++++++++++++---- .github/workflows/releasebuild.yml | 6 ++++++ .gitignore | 4 ++-- CMakeLists.txt | 1 - MANIFEST.in | 2 +- src/cython/CMakeLists.txt | 25 +++++++++++++++-------- src/cython/cpp_process.pyx | 24 +++++++++++----------- src/cython/cpp_process_cdist.pyx | 2 +- src/cython/distance/CMakeLists.txt | 25 +++++++++++++++-------- src/cython/distance/_initialize.pyx | 16 +++++++-------- src/cython/generate.sh | 21 +++++++++++++++++++ 11 files changed, 112 insertions(+), 45 deletions(-) create mode 100755 src/cython/generate.sh diff --git a/.github/workflows/branchbuild.yml b/.github/workflows/branchbuild.yml index bc0e995..bf67f6d 100644 --- a/.github/workflows/branchbuild.yml +++ b/.github/workflows/branchbuild.yml @@ -6,9 +6,34 @@ on: - main jobs: + no_cython_install: + name: "Test install with generated cython files" + runs-on: "ubuntu-latest" + + steps: + - uses: "actions/checkout@v2" + with: + submodules: 'true' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest hypothesis pandas mypy rapidfuzz_capi Cython==3.0.0a10 + + - name: Generate cython + run: | + chmod +x ./src/cython/generate.sh + ./src/cython/generate.sh + + - name: build + run: | + pip install build; python -m build --sdist + # test whether tarball contains all files required for compiling + pip install dist/rapidfuzz-*.tar.gz + tests: name: "Python ${{ matrix.python-version }}" - runs-on: "ubuntu-latest" + runs-on: ${{matrix.os}} strategy: fail-fast: false matrix: @@ -30,9 +55,7 @@ jobs: - name: build run: | - pip install build; python -m build --sdist - # test whether tarball contains all files required for compiling - pip install dist/rapidfuzz-*.tar.gz + pip install . - name: Test type stubs if: matrix.python-version != '3.6' diff --git a/.github/workflows/releasebuild.yml b/.github/workflows/releasebuild.yml index bc55f0a..f2256b6 100644 --- a/.github/workflows/releasebuild.yml +++ b/.github/workflows/releasebuild.yml @@ -150,6 +150,12 @@ jobs: python -m pip install --upgrade pip pip install pytest hypothesis pandas mypy + # The cythonized files allow installation from the sdist without cython + - name: Generate cython + run: | + chmod +x ./src/cython/generate.sh + ./src/cython/generate.sh + - name: Build sdist run: | pip install build diff --git a/.gitignore b/.gitignore index 10f8261..6c7f760 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,6 @@ bench_results/ # Hypothesis results .hypothesis/ -# Cython -src/*.cpp +# Cython generated files +*.cxx diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a5927c..2641178 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,6 @@ endif() find_package(NumPy REQUIRED) find_package(PythonExtensions REQUIRED) find_package(Python COMPONENTS Interpreter Development) -find_package(Cython REQUIRED) set(RF_BASE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/MANIFEST.in b/MANIFEST.in index efc3dcb..2b891a0 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -9,7 +9,7 @@ include src/rapidfuzz/**/*.pyi include src/rapidfuzz/py.typed recursive-include src/cython CMakeLists.txt -recursive-include src/cython *.hpp *.h *.cpp *.pyx *.pxd +recursive-include src/cython *.hpp *.h *.cpp *.cxx *.pyx *.pxd include extern/rapidfuzz-cpp/LICENSE include extern/rapidfuzz-cpp/CMakeLists.txt diff --git a/src/cython/CMakeLists.txt b/src/cython/CMakeLists.txt index 4ce729c..31aa3d0 100644 --- a/src/cython/CMakeLists.txt +++ b/src/cython/CMakeLists.txt @@ -1,15 +1,24 @@ -# should use target_include_directories once this is supported by scikit-build -include_directories(${RF_BASE_DIR}/src/cython) +function(create_cython_target _name) + if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx) + set(${_name} ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx PARENT_SCOPE) + else() + find_package(Cython REQUIRED) + # should use target_include_directories once this is supported by scikit-build + include_directories(${RF_BASE_DIR}/src/cython) + add_cython_target(${_name} CXX) + set(${_name} ${_name} PARENT_SCOPE) + endif() +endfunction(create_cython_target) -add_cython_target(cpp_utils CXX) -add_library(cpp_utils MODULE ${cpp_utils} ${RF_BASE_DIR}/src/cython/utils.cpp) +create_cython_target(cpp_utils) +add_library(cpp_utils MODULE ${cpp_utils} ${CMAKE_CURRENT_LIST_DIR}/utils.cpp) target_compile_features(cpp_utils PUBLIC cxx_std_14) target_include_directories(cpp_utils PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython) target_link_libraries(cpp_utils PRIVATE rapidfuzz::rapidfuzz) python_extension_module(cpp_utils) install(TARGETS cpp_utils LIBRARY DESTINATION src/rapidfuzz) -add_cython_target(cpp_fuzz CXX) +create_cython_target(cpp_fuzz) add_library(cpp_fuzz MODULE ${cpp_fuzz}) target_compile_features(cpp_fuzz PUBLIC cxx_std_14) target_include_directories(cpp_fuzz PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython) @@ -17,7 +26,7 @@ target_link_libraries(cpp_fuzz PRIVATE rapidfuzz::rapidfuzz) python_extension_module(cpp_fuzz) install(TARGETS cpp_fuzz LIBRARY DESTINATION src/rapidfuzz) -add_cython_target(cpp_string_metric CXX) +create_cython_target(cpp_string_metric) add_library(cpp_string_metric MODULE ${cpp_string_metric}) target_compile_features(cpp_string_metric PUBLIC cxx_std_14) target_include_directories(cpp_string_metric PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython) @@ -25,7 +34,7 @@ target_link_libraries(cpp_string_metric PRIVATE rapidfuzz::rapidfuzz jaro_winkle python_extension_module(cpp_string_metric) install(TARGETS cpp_string_metric LIBRARY DESTINATION src/rapidfuzz) -add_cython_target(cpp_process CXX) +create_cython_target(cpp_process) add_library(cpp_process MODULE ${cpp_process}) target_compile_features(cpp_process PUBLIC cxx_std_14) target_include_directories(cpp_process PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython) @@ -33,7 +42,7 @@ target_link_libraries(cpp_process PRIVATE rapidfuzz::rapidfuzz) python_extension_module(cpp_process) install(TARGETS cpp_process LIBRARY DESTINATION src/rapidfuzz) -add_cython_target(cpp_process_cdist CXX) +create_cython_target(cpp_process_cdist) add_library(cpp_process_cdist MODULE ${cpp_process_cdist}) target_compile_features(cpp_process_cdist PUBLIC cxx_std_14) target_include_directories(cpp_process_cdist PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${NumPy_INCLUDE_DIR}) diff --git a/src/cython/cpp_process.pyx b/src/cython/cpp_process.pyx index 2ddc5cd..0201633 100644 --- a/src/cython/cpp_process.pyx +++ b/src/cython/cpp_process.pyx @@ -678,10 +678,10 @@ cdef inline extract_dict_f64(query, choices, RF_Scorer* scorer, const RF_ScorerF get_score_cutoff_f64(score_cutoff, scorer_flags)) # due to score_cutoff not always completely filled - if limit > results.size(): - limit = results.size() + if limit > results.size(): + limit = results.size() - if limit >= results.size(): + if limit >= results.size(): algorithm.sort(results.begin(), results.end(), ExtractComp(scorer_flags)) else: algorithm.partial_sort(results.begin(), results.begin() + limit, results.end(), ExtractComp(scorer_flags)) @@ -706,10 +706,10 @@ cdef inline extract_dict_i64(query, choices, RF_Scorer* scorer, const RF_ScorerF get_score_cutoff_i64(score_cutoff, scorer_flags)) # due to score_cutoff not always completely filled - if limit > results.size(): - limit = results.size() + if limit > results.size(): + limit = results.size() - if limit >= results.size(): + if limit >= results.size(): algorithm.sort(results.begin(), results.end(), ExtractComp(scorer_flags)) else: algorithm.partial_sort(results.begin(), results.begin() + limit, results.end(), ExtractComp(scorer_flags)) @@ -749,10 +749,10 @@ cdef inline extract_list_f64(query, choices, RF_Scorer* scorer, const RF_ScorerF get_score_cutoff_f64(score_cutoff, scorer_flags)) # due to score_cutoff not always completely filled - if limit > results.size(): - limit = results.size() + if limit > results.size(): + limit = results.size() - if limit >= results.size(): + if limit >= results.size(): algorithm.sort(results.begin(), results.end(), ExtractComp(scorer_flags)) else: algorithm.partial_sort(results.begin(), results.begin() + limit, results.end(), ExtractComp(scorer_flags)) @@ -777,10 +777,10 @@ cdef inline extract_list_i64(query, choices, RF_Scorer* scorer, const RF_ScorerF get_score_cutoff_i64(score_cutoff, scorer_flags)) # due to score_cutoff not always completely filled - if limit > results.size(): - limit = results.size() + if limit > results.size(): + limit = results.size() - if limit >= results.size(): + if limit >= results.size(): algorithm.sort(results.begin(), results.end(), ExtractComp(scorer_flags)) else: algorithm.partial_sort(results.begin(), results.begin() + limit, results.end(), ExtractComp(scorer_flags)) diff --git a/src/cython/cpp_process_cdist.pyx b/src/cython/cpp_process_cdist.pyx index ee47edf..e80015d 100644 --- a/src/cython/cpp_process_cdist.pyx +++ b/src/cython/cpp_process_cdist.pyx @@ -250,7 +250,7 @@ def cdist(queries, choices, *, scorer=ratio, processor=None, score_cutoff=None, of the two collections of inputs. """ - cdef RF_Scorer* scorer_context + cdef RF_Scorer* scorer_context = NULL cdef RF_ScorerFlags scorer_flags if processor is True: diff --git a/src/cython/distance/CMakeLists.txt b/src/cython/distance/CMakeLists.txt index be50aed..6e1fb1a 100644 --- a/src/cython/distance/CMakeLists.txt +++ b/src/cython/distance/CMakeLists.txt @@ -1,7 +1,16 @@ -# should use target_include_directories once this is supported by scikit-build -include_directories(${RF_BASE_DIR}/src/cython) +function(create_cython_target _name) + if(EXISTS ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx) + set(${_name} ${CMAKE_CURRENT_LIST_DIR}/${_name}.cxx PARENT_SCOPE) + else() + find_package(Cython REQUIRED) + # should use target_include_directories once this is supported by scikit-build + include_directories(${RF_BASE_DIR}/src/cython) + add_cython_target(${_name} CXX) + set(${_name} ${_name} PARENT_SCOPE) + endif() +endfunction(create_cython_target) -add_cython_target(_initialize CXX) +create_cython_target(_initialize) add_library(_initialize MODULE ${_initialize}) target_compile_features(_initialize PUBLIC cxx_std_14) target_include_directories(_initialize PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance) @@ -9,7 +18,7 @@ target_link_libraries(_initialize PRIVATE rapidfuzz::rapidfuzz jaro_winkler::jar python_extension_module(_initialize) install(TARGETS _initialize LIBRARY DESTINATION src/rapidfuzz/distance) -add_cython_target(Hamming CXX) +create_cython_target(Hamming) add_library(Hamming MODULE ${Hamming}) target_compile_features(Hamming PUBLIC cxx_std_14) target_include_directories(Hamming PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance) @@ -17,7 +26,7 @@ target_link_libraries(Hamming PRIVATE rapidfuzz::rapidfuzz jaro_winkler::jaro_wi python_extension_module(Hamming) install(TARGETS Hamming LIBRARY DESTINATION src/rapidfuzz/distance) -add_cython_target(Levenshtein CXX) +create_cython_target(Levenshtein) add_library(Levenshtein MODULE ${Levenshtein}) target_compile_features(Levenshtein PUBLIC cxx_std_14) target_include_directories(Levenshtein PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance) @@ -25,7 +34,7 @@ target_link_libraries(Levenshtein PRIVATE rapidfuzz::rapidfuzz jaro_winkler::jar python_extension_module(Levenshtein) install(TARGETS Levenshtein LIBRARY DESTINATION src/rapidfuzz/distance) -add_cython_target(Indel CXX) +create_cython_target(Indel) add_library(Indel MODULE ${Indel}) target_compile_features(Indel PUBLIC cxx_std_14) target_include_directories(Indel PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance) @@ -33,7 +42,7 @@ target_link_libraries(Indel PRIVATE rapidfuzz::rapidfuzz jaro_winkler::jaro_wink python_extension_module(Indel) install(TARGETS Indel LIBRARY DESTINATION src/rapidfuzz/distance) -add_cython_target(Jaro CXX) +create_cython_target(Jaro) add_library(Jaro MODULE ${Jaro}) target_compile_features(Jaro PUBLIC cxx_std_14) target_include_directories(Jaro PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance) @@ -41,7 +50,7 @@ target_link_libraries(Jaro PRIVATE rapidfuzz::rapidfuzz jaro_winkler::jaro_winkl python_extension_module(Jaro) install(TARGETS Jaro LIBRARY DESTINATION src/rapidfuzz/distance) -add_cython_target(JaroWinkler CXX) +create_cython_target(JaroWinkler) add_library(JaroWinkler MODULE ${JaroWinkler}) target_compile_features(JaroWinkler PUBLIC cxx_std_14) target_include_directories(JaroWinkler PRIVATE ${RF_CAPI_PATH} ${RF_BASE_DIR}/src/cython ${RF_BASE_DIR}/src/cython/distance) diff --git a/src/cython/distance/_initialize.pyx b/src/cython/distance/_initialize.pyx index cf75025..302089d 100644 --- a/src/cython/distance/_initialize.pyx +++ b/src/cython/distance/_initialize.pyx @@ -194,7 +194,7 @@ cdef class Editop: and other[1] == self.src_pos and other[2] == self.dest_pos) - def __getitem__(self, i): + def __getitem__(self, Py_ssize_t i): if i==0 or i==-3: return self.tag if i==1 or i==-2: return self.src_pos if i==2 or i==-1: return self.dest_pos @@ -314,14 +314,14 @@ cdef class Editops: return self.editops.size() def __getitem__(self, key): - cdef int index + cdef Py_ssize_t index if isinstance(key, int): index = key if index < 0: - index += self.editops.size() + index += self.editops.size() - if index < 0 or index >= self.editops.size(): + if index < 0 or index >= self.editops.size(): raise IndexError("Editops index out of range") return Editop( @@ -389,7 +389,7 @@ cdef class Opcode: and other[3] == self.dest_start and other[4] == self.dest_end) - def __getitem__(self, i): + def __getitem__(self, Py_ssize_t i): if i==0 or i==-5: return self.tag if i==1 or i==-4: return self.src_start if i==2 or i==-3: return self.src_end @@ -517,14 +517,14 @@ cdef class Opcodes: return self.opcodes.size() def __getitem__(self, key): - cdef int index + cdef Py_ssize_t index if isinstance(key, int): index = key if index < 0: - index += self.opcodes.size() + index += self.opcodes.size() - if index < 0 or index >= self.opcodes.size(): + if index < 0 or index >= self.opcodes.size(): raise IndexError("Opcodes index out of range") return Opcode( diff --git a/src/cython/generate.sh b/src/cython/generate.sh new file mode 100755 index 0000000..7e1e2bd --- /dev/null +++ b/src/cython/generate.sh @@ -0,0 +1,21 @@ +#!/bin/sh +curdir="${0%/*}" + +generate_cython() +{ + python -m cython -I "$curdir" --cplus "$curdir"/"$1".pyx -o "$curdir"/"$1".cxx + echo "Generated $curdir/$1.cxx" +} + +generate_cython cpp_fuzz +generate_cython cpp_process_cdist +generate_cython cpp_process +generate_cython cpp_string_metric +generate_cython cpp_utils + +generate_cython distance/_initialize +generate_cython distance/Hamming +generate_cython distance/Indel +generate_cython distance/Jaro +generate_cython distance/JaroWinkler +generate_cython distance/Levenshtein