add cdist implementation

2021-09-10 12:44:54 +02:00 · 2021-09-10 12:44:54 +02:00 · 56f062b063
parent 1aed654d4f
commit 56f062b063
15 changed files with 11706 additions and 3583 deletions
--- a/.github/workflows/pythonbuild.yml
+++ b/.github/workflows/pythonbuild.yml
@ -101,7 +101,8 @@ jobs:
      fail-fast: false
      matrix:
        python_tag: [ "pp36-*", "pp37-*"]
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        # numpy ships no wheels for pypy on mac os
+        os: [ubuntu-latest, windows-latest]
    env:
      CIBW_BUILD: ${{matrix.python_tag}}
      # activate tests when the fix for
--- a/docs/process.rst
+++ b/docs/process.rst
@ -1,6 +1,10 @@
 process module
 ==============

+cdist
+----------
+.. autofunction:: rapidfuzz.process.cdist
+
 extract
 -------
 .. autofunction:: rapidfuzz.process.extract
--- a/pyproject.toml
+++ b/pyproject.toml
@ -2,5 +2,6 @@
 requires = [
    "setuptools",
    "wheel",
+    "oldest-supported-numpy"
 ]
 build-backend = "setuptools.build_meta"
--- a/setup.cfg
+++ b/setup.cfg
@ -26,6 +26,8 @@ package_dir=
    =src
 packages = find:
 python_requires = >=3.5
+install_requires =
+    numpy

 [options.packages.find]
 where=src
--- a/setup.py
+++ b/setup.py
@ -2,6 +2,7 @@ from setuptools import setup, Extension
 from setuptools.command.build_ext import build_ext
 import sys
 import os
+import numpy as np

 # use with export RAPIDFUZZ_TRACE=1
 RAPIDFUZZ_TRACE = os.environ.get("RAPIDFUZZ_TRACE", False)
@ -46,7 +47,7 @@ ext_modules = [
            'src/cpp_process.cpp',
            'src/rapidfuzz-cpp/rapidfuzz/details/unicode.cpp'
        ],
-        include_dirs=["src/rapidfuzz-cpp/"],
+        include_dirs=["src/rapidfuzz-cpp/", np.get_include()],
        language='c++',
    ),
    Extension(
--- a/src/cpp_common.hpp
+++ b/src/cpp_common.hpp
@ -204,8 +204,9 @@ double RATIO##_impl_inner_##PROCESSOR(const proc_string& s1, const Sentence& s2,
 {                                                                                                  \
    switch(s1.kind){                                                                               \
    LIST_OF_CASES(RATIO_FUNC, PROCESSOR)                                                           \
+    default:                                                                                       \
+       throw std::logic_error("Reached end of control flow in " #RATIO "_impl_inner_" #PROCESSOR); \
    }                                                                                              \
-    assert(false); /* silence any warnings about missing return value */                           \
 }

 /* generate <ratio_name>_impl_<processor> functions which are used internally
@ -217,8 +218,9 @@ double RATIO##_impl_##PROCESSOR(const proc_string& s1, const proc_string& s2, Ar
 {                                                                                            \
    switch(s1.kind){                                                                         \
    LIST_OF_CASES(RATIO##_impl_inner_##PROCESSOR, PROCESSOR)                                 \
+    default:                                                                                 \
+       throw std::logic_error("Reached end of control flow in " #RATIO "_impl_" #PROCESSOR); \
    }                                                                                        \
-    assert(false); /* silence any warnings about missing return value */                     \
 }

 #define RATIO_IMPL_DEF(RATIO, RATIO_FUNC)            \
@ -236,8 +238,9 @@ size_t RATIO##_impl_inner_##PROCESSOR(const proc_string& s1, const Sentence& s2,
 {                                                                                                  \
    switch(s1.kind){                                                                               \
    LIST_OF_CASES(RATIO_FUNC, PROCESSOR)                                                           \
+    default:                                                                                       \
+       throw std::logic_error("Reached end of control flow in " #RATIO "_impl_inner_" #PROCESSOR); \
    }                                                                                              \
-    assert(false); /* silence any warnings about missing return value */                           \
 }

 /* generate <ratio_name>_impl_<processor> functions which are used internally
@ -249,8 +252,9 @@ size_t RATIO##_impl_##PROCESSOR(const proc_string& s1, const proc_string& s2, Ar
 {                                                                                            \
    switch(s1.kind){                                                                         \
    LIST_OF_CASES(RATIO##_impl_inner_##PROCESSOR, PROCESSOR)                                 \
+    default:                                                                                 \
+       throw std::logic_error("Reached end of control flow in " #RATIO "_impl_" #PROCESSOR); \
    }                                                                                        \
-    assert(false); /* silence any warnings about missing return value */                     \
 }

 #define DISTANCE_IMPL_DEF(RATIO, RATIO_FUNC)            \
@ -310,3 +314,36 @@ PyObject* RATIO##_default_process(const proc_string& s1, const proc_string& s2,
    size_t result = RATIO##_impl_default_process(s1, s2, max); \
    return dist_to_long(result);                                              \
 }
+
+
+template <typename CharT>
+proc_string default_process_func_impl(proc_string sentence) {
+    CharT* str = static_cast<CharT*>(sentence.data);
+    if (!sentence.allocated)
+    {
+      CharT* temp_str = (CharT*)malloc(sentence.length * sizeof(CharT));
+      if (temp_str == NULL)
+      {
+          throw std::bad_alloc();
+      }
+      std::copy(str, str + sentence.length, temp_str);
+      str = temp_str;
+    }
+
+    sentence.allocated = true;
+    sentence.data = str;
+    sentence.kind = sentence.kind;
+    sentence.length = utils::default_process(str, sentence.length);
+
+    return sentence;
+}
+
+proc_string default_process_func(proc_string sentence) {
+    switch (sentence.kind) {
+    # define X_ENUM(KIND, TYPE, MSVC_TUPLE) case KIND: return default_process_func_impl<TYPE>(std::move(sentence));
+    LIST_OF_CASES()
+    default:
+       throw std::logic_error("Reached end of control flow in default_process_func");
+    # undef X_ENUM
+    }
+}
--- a/src/cpp_common.pxd
+++ b/src/cpp_common.pxd
@ -23,6 +23,7 @@ cdef extern from "cpp_common.hpp":
    int is_valid_string(object py_str) except +
    proc_string convert_string(object py_str)
    void validate_string(object py_str, const char* err) except +
+    proc_string default_process_func(proc_string sentence) except +

 cdef inline proc_string hash_array(arr) except *:
    # TODO on Cpython this does not require any copies
--- a/src/cpp_fuzz.cpp
+++ b/src/cpp_fuzz.cpp
@ -1976,11 +1976,11 @@ static const char __pyx_k_partial_token_set_ratio[] = "partial_token_set_ratio";
 static const char __pyx_k_partial_token_sort_ratio[] = "partial_token_sort_ratio";
 static const char __pyx_k_token_set_ratio_line_217[] = "token_set_ratio (line 217)";
 static const char __pyx_k_token_sort_ratio_line_170[] = "token_sort_ratio (line 170)";
-static const char __pyx_k_Sorts_the_words_in_the_strings[] = "\n    Sorts the words in the strings and calculates the fuzz.ratio between them\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_sort_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n    100.0\n    ";
-static const char __pyx_k_Calculates_a_quick_ratio_betwee[] = "\n    Calculates a quick ratio between two strings using fuzz.ratio.\n    The only difference to fuzz.ratio is, that this preprocesses\n    the strings by default.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Examples\n    --------\n    >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n    100.0\n    ";
-static const char __pyx_k_Calculates_the_normalized_InDel[] = "\n    Calculates the normalized InDel distance.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    See Also\n    --------\n    rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n    Notes\n    -----\n    .. image:: img/ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n    96.55171966552734\n    ";
-static const char __pyx_k_Compares_the_words_in_the_strin[] = "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.ratio\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_set_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    83.8709716796875\n    >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    100.0\n    ";
-static const char __pyx_k_Searches_for_the_optimal_alignm[] = "\n    Searches for the optimal alignment of the shorter string in the\n    longer string and returns the fuzz.ratio for this alignment.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    Depending on the length of the needle (shorter string) different\n    implementations are used to improve the performance.\n\n    short needle (length \342\211\244 64):\n        When using a short needle length the fuzz.ratio is calculated for all\n        alignments that could result in an optimal alignment. It is\n        guaranteed to find the optimal alignment. For short needles this is very\n        fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n        case performance of ``O(NM)``.\n    \n    .. image:: img/partial_ratio_short_needle.svg\n\n    long needle (length > 64):\n        For long needles a similar implementation to FuzzyWuzzy is used.\n        This implementation only considers alignments which start at one\n        of the longest common substrings. This results in a worst case performance\n        of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n        The following Python code shows the concept:\n\n        .. code-block:: python\n\n            blocks = SequenceMatcher(None, needle, longer, False).get_ma""tching_blocks()\n            score = 0\n            for block in blocks:\n                long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n                long_end = long_start + len(shorter)\n                long_substr = longer[long_start:long_end]\n                score = max(score, fuzz.ratio(needle, long_substr))\n\n        This is a lot faster than checking all possible alignments. However it\n        only finds one of the best alignments and not necessarily the optimal one.\n\n    .. image:: img/partial_ratio_long_needle.svg\n\n    Examples\n    --------\n    >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n    100.0\n    ";
+static const char __pyx_k_Sorts_the_words_in_the_strings[] = "\n    Sorts the words in the strings and calculates the fuzz.ratio between them\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_sort_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n    100.0\n    ";
+static const char __pyx_k_Calculates_a_quick_ratio_betwee[] = "\n    Calculates a quick ratio between two strings using fuzz.ratio.\n    The only difference to fuzz.ratio is, that this preprocesses\n    the strings by default.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Examples\n    --------\n    >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n    100.0\n    ";
+static const char __pyx_k_Calculates_the_normalized_InDel[] = "\n    Calculates the normalized InDel distance.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    See Also\n    --------\n    rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n    Notes\n    -----\n    .. image:: img/ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n    96.55171966552734\n    ";
+static const char __pyx_k_Compares_the_words_in_the_strin[] = "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.ratio\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_set_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    83.8709716796875\n    >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    100.0\n    ";
+static const char __pyx_k_Searches_for_the_optimal_alignm[] = "\n    Searches for the optimal alignment of the shorter string in the\n    longer string and returns the fuzz.ratio for this alignment.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    Depending on the length of the needle (shorter string) different\n    implementations are used to improve the performance.\n\n    short needle (length \342\211\244 64):\n        When using a short needle length the fuzz.ratio is calculated for all\n        alignments that could result in an optimal alignment. It is\n        guaranteed to find the optimal alignment. For short needles this is very\n        fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n        case performance of ``O(NM)``.\n    \n    .. image:: img/partial_ratio_short_needle.svg\n\n    long needle (length > 64):\n        For long needles a similar implementation to FuzzyWuzzy is used.\n        This implementation only considers alignments which start at one\n        of the longest common substrings. This results in a worst case performance\n        of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n        The following Python code shows the concept:\n\n        .. code-block:: python\n\n            blocks = SequenceMatcher(None,"" needle, longer, False).get_matching_blocks()\n            score = 0\n            for block in blocks:\n                long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n                long_end = long_start + len(shorter)\n                long_substr = longer[long_start:long_end]\n                score = max(score, fuzz.ratio(needle, long_substr))\n\n        This is a lot faster than checking all possible alignments. However it\n        only finds one of the best alignments and not necessarily the optimal one.\n\n    .. image:: img/partial_ratio_long_needle.svg\n\n    Examples\n    --------\n    >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n    100.0\n    ";
 #if !CYTHON_USE_MODULE_STATE
 static PyObject *__pyx_kp_u_Calculates_a_quick_ratio_betwee;
 static PyObject *__pyx_kp_u_Calculates_the_normalized_InDel;
@ -2542,7 +2542,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_ratio, "\n    Calculates the normalized InDel distance.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    See Also\n    --------\n    rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n    Notes\n    -----\n    .. image:: img/ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n    96.55171966552734\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_ratio, "\n    Calculates the normalized InDel distance.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    See Also\n    --------\n    rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n    Notes\n    -----\n    .. image:: img/ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n    96.55171966552734\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_1ratio = {"ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_1ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_1ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -2926,7 +2926,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_2partial_ratio, "\n    Searches for the optimal alignment of the shorter string in the\n    longer string and returns the fuzz.ratio for this alignment.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    Depending on the length of the needle (shorter string) different\n    implementations are used to improve the performance.\n\n    short needle (length \342\211\244 64):\n        When using a short needle length the fuzz.ratio is calculated for all\n        alignments that could result in an optimal alignment. It is\n        guaranteed to find the optimal alignment. For short needles this is very\n        fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n        case performance of ``O(NM)``.\n    \n    .. image:: img/partial_ratio_short_needle.svg\n\n    long needle (length > 64):\n        For long needles a similar implementation to FuzzyWuzzy is used.\n        This implementation only considers alignments which start at one\n        of the longest common substrings. This results in a worst case performance\n        of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n        The following Python code shows the concept:\n\n        .. code-block:: python\n\n            blocks = SequenceMatcher(None, needle, longer, False).get_ma""tching_blocks()\n            score = 0\n            for block in blocks:\n                long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n                long_end = long_start + len(shorter)\n                long_substr = longer[long_start:long_end]\n                score = max(score, fuzz.ratio(needle, long_substr))\n\n        This is a lot faster than checking all possible alignments. However it\n        only finds one of the best alignments and not necessarily the optimal one.\n\n    .. image:: img/partial_ratio_long_needle.svg\n\n    Examples\n    --------\n    >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n    100.0\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_2partial_ratio, "\n    Searches for the optimal alignment of the shorter string in the\n    longer string and returns the fuzz.ratio for this alignment.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    Depending on the length of the needle (shorter string) different\n    implementations are used to improve the performance.\n\n    short needle (length \342\211\244 64):\n        When using a short needle length the fuzz.ratio is calculated for all\n        alignments that could result in an optimal alignment. It is\n        guaranteed to find the optimal alignment. For short needles this is very\n        fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n        case performance of ``O(NM)``.\n    \n    .. image:: img/partial_ratio_short_needle.svg\n\n    long needle (length > 64):\n        For long needles a similar implementation to FuzzyWuzzy is used.\n        This implementation only considers alignments which start at one\n        of the longest common substrings. This results in a worst case performance\n        of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n        The following Python code shows the concept:\n\n        .. code-block:: python\n\n            blocks = SequenceMatcher(None,"" needle, longer, False).get_matching_blocks()\n            score = 0\n            for block in blocks:\n                long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n                long_end = long_start + len(shorter)\n                long_substr = longer[long_start:long_end]\n                score = max(score, fuzz.ratio(needle, long_substr))\n\n        This is a lot faster than checking all possible alignments. However it\n        only finds one of the best alignments and not necessarily the optimal one.\n\n    .. image:: img/partial_ratio_long_needle.svg\n\n    Examples\n    --------\n    >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n    100.0\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_3partial_ratio = {"partial_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_3partial_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_2partial_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_3partial_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -3310,7 +3310,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_4token_sort_ratio, "\n    Sorts the words in the strings and calculates the fuzz.ratio between them\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_sort_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n    100.0\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_4token_sort_ratio, "\n    Sorts the words in the strings and calculates the fuzz.ratio between them\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_sort_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n    100.0\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_5token_sort_ratio = {"token_sort_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_5token_sort_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_4token_sort_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_5token_sort_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -3694,7 +3694,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_6token_set_ratio, "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.ratio\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_set_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    83.8709716796875\n    >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    100.0\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_6token_set_ratio, "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.ratio\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_set_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    83.8709716796875\n    >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    100.0\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_7token_set_ratio = {"token_set_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_7token_set_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_6token_set_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_7token_set_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -4078,7 +4078,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_8token_ratio, "\n    Helper method that returns the maximum of fuzz.token_set_ratio and fuzz.token_sort_ratio\n    (faster than manually executing the two functions)\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_ratio.svg\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_8token_ratio, "\n    Helper method that returns the maximum of fuzz.token_set_ratio and fuzz.token_sort_ratio\n    (faster than manually executing the two functions)\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_ratio.svg\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_9token_ratio = {"token_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_9token_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_8token_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_9token_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -4462,7 +4462,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_10partial_token_sort_ratio, "\n    sorts the words in the strings and calculates the fuzz.partial_ratio between them\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_sort_ratio.svg\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_10partial_token_sort_ratio, "\n    sorts the words in the strings and calculates the fuzz.partial_ratio between them\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_sort_ratio.svg\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_11partial_token_sort_ratio = {"partial_token_sort_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_11partial_token_sort_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_10partial_token_sort_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_11partial_token_sort_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -4846,7 +4846,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_12partial_token_set_ratio, "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.partial_ratio\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_set_ratio.svg\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_12partial_token_set_ratio, "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.partial_ratio\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_set_ratio.svg\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_13partial_token_set_ratio = {"partial_token_set_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_13partial_token_set_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_12partial_token_set_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_13partial_token_set_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -5230,7 +5230,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_14partial_token_ratio, "\n    Helper method that returns the maximum of fuzz.partial_token_set_ratio and\n    fuzz.partial_token_sort_ratio (faster than manually executing the two functions)\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_ratio.svg\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_14partial_token_ratio, "\n    Helper method that returns the maximum of fuzz.partial_token_set_ratio and\n    fuzz.partial_token_sort_ratio (faster than manually executing the two functions)\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_ratio.svg\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_15partial_token_ratio = {"partial_token_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_15partial_token_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_14partial_token_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_15partial_token_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -5614,7 +5614,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_16WRatio, "\n    Calculates a weighted ratio based on the other ratio algorithms\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/WRatio.svg\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_16WRatio, "\n    Calculates a weighted ratio based on the other ratio algorithms\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/WRatio.svg\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_17WRatio = {"WRatio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_17WRatio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_16WRatio};
 static PyObject *__pyx_pw_8cpp_fuzz_17WRatio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -5998,7 +5998,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_18QRatio, "\n    Calculates a quick ratio between two strings using fuzz.ratio.\n    The only difference to fuzz.ratio is, that this preprocesses\n    the strings by default.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Examples\n    --------\n    >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n    100.0\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_18QRatio, "\n    Calculates a quick ratio between two strings using fuzz.ratio.\n    The only difference to fuzz.ratio is, that this preprocesses\n    the strings by default.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Examples\n    --------\n    >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n    100.0\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_19QRatio = {"QRatio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_19QRatio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_18QRatio};
 static PyObject *__pyx_pw_8cpp_fuzz_19QRatio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -6364,8 +6364,8 @@ static PyObject *__pyx_pf_8cpp_fuzz_18QRatio(CYTHON_UNUSED PyObject *__pyx_self,
  return __pyx_r;
 }

-/* "cpp_common.pxd":27
- *     void validate_string(object py_str, const char* err) except +
+/* "cpp_common.pxd":28
+ *     proc_string default_process_func(proc_string sentence) except +
 * 
 * cdef inline proc_string hash_array(arr) except *:             # <<<<<<<<<<<<<<
 *     # TODO on Cpython this does not require any copies
@ -6407,30 +6407,30 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
  int __pyx_clineno = 0;
  __Pyx_RefNannySetupContext("hash_array", 0);

-  /* "cpp_common.pxd":30
+  /* "cpp_common.pxd":31
 *     # TODO on Cpython this does not require any copies
 *     cdef proc_string s_proc
 *     cdef Py_UCS4 typecode = <Py_UCS4>arr.typecode             # <<<<<<<<<<<<<<
 *     s_proc.length = <size_t>len(arr)
 * 
 */
-  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_arr, __pyx_n_s_typecode); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 30, __pyx_L1_error)
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_arr, __pyx_n_s_typecode); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 31, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 30, __pyx_L1_error)
+  __pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 31, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_typecode = ((Py_UCS4)__pyx_t_2);

-  /* "cpp_common.pxd":31
+  /* "cpp_common.pxd":32
 *     cdef proc_string s_proc
 *     cdef Py_UCS4 typecode = <Py_UCS4>arr.typecode
 *     s_proc.length = <size_t>len(arr)             # <<<<<<<<<<<<<<
 * 
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 */
-  __pyx_t_3 = PyObject_Length(__pyx_v_arr); if (unlikely(__pyx_t_3 == ((Py_ssize_t)-1))) __PYX_ERR(1, 31, __pyx_L1_error)
+  __pyx_t_3 = PyObject_Length(__pyx_v_arr); if (unlikely(__pyx_t_3 == ((Py_ssize_t)-1))) __PYX_ERR(1, 32, __pyx_L1_error)
  __pyx_v_s_proc.length = ((size_t)__pyx_t_3);

-  /* "cpp_common.pxd":33
+  /* "cpp_common.pxd":34
 *     s_proc.length = <size_t>len(arr)
 * 
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))             # <<<<<<<<<<<<<<
@ -6439,7 +6439,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
  __pyx_v_s_proc.data = malloc((__pyx_v_s_proc.length * (sizeof(uint64_t))));

-  /* "cpp_common.pxd":35
+  /* "cpp_common.pxd":36
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 * 
 *     if s_proc.data == NULL:             # <<<<<<<<<<<<<<
@ -6449,16 +6449,16 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
  __pyx_t_4 = ((__pyx_v_s_proc.data == NULL) != 0);
  if (unlikely(__pyx_t_4)) {

-    /* "cpp_common.pxd":36
+    /* "cpp_common.pxd":37
 * 
 *     if s_proc.data == NULL:
 *         raise MemoryError             # <<<<<<<<<<<<<<
 * 
 *     try:
 */
-    PyErr_NoMemory(); __PYX_ERR(1, 36, __pyx_L1_error)
+    PyErr_NoMemory(); __PYX_ERR(1, 37, __pyx_L1_error)

-    /* "cpp_common.pxd":35
+    /* "cpp_common.pxd":36
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 * 
 *     if s_proc.data == NULL:             # <<<<<<<<<<<<<<
@ -6467,7 +6467,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
  }

-  /* "cpp_common.pxd":38
+  /* "cpp_common.pxd":39
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -6483,7 +6483,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
    __Pyx_XGOTREF(__pyx_t_7);
    /*try:*/ {

-      /* "cpp_common.pxd":40
+      /* "cpp_common.pxd":41
 *     try:
 *         # ignore signed/unsigned, since it is not relevant in any of the algorithms
 *         if typecode in {'b', 'B'}: # signed/unsigned char             # <<<<<<<<<<<<<<
@ -6494,7 +6494,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        case 98:
        case 66:

-        /* "cpp_common.pxd":41
+        /* "cpp_common.pxd":42
 *         # ignore signed/unsigned, since it is not relevant in any of the algorithms
 *         if typecode in {'b', 'B'}: # signed/unsigned char
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6503,7 +6503,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;

-        /* "cpp_common.pxd":42
+        /* "cpp_common.pxd":43
 *         if typecode in {'b', 'B'}: # signed/unsigned char
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6515,21 +6515,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;

-          /* "cpp_common.pxd":43
+          /* "cpp_common.pxd":44
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]             # <<<<<<<<<<<<<<
 *         elif typecode == 'u': # 'u' wchar_t
 *             s_proc.kind = RAPIDFUZZ_UINT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 43, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 44, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 43, __pyx_L4_error)
+          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 44, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
        }

-        /* "cpp_common.pxd":40
+        /* "cpp_common.pxd":41
 *     try:
 *         # ignore signed/unsigned, since it is not relevant in any of the algorithms
 *         if typecode in {'b', 'B'}: # signed/unsigned char             # <<<<<<<<<<<<<<
@ -6539,7 +6539,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        case 0x75:

-        /* "cpp_common.pxd":45
+        /* "cpp_common.pxd":46
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode == 'u': # 'u' wchar_t
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6548,7 +6548,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;

-        /* "cpp_common.pxd":46
+        /* "cpp_common.pxd":47
 *         elif typecode == 'u': # 'u' wchar_t
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6560,21 +6560,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;

-          /* "cpp_common.pxd":47
+          /* "cpp_common.pxd":48
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]             # <<<<<<<<<<<<<<
 *         elif typecode in {'h', 'H'}: #  signed/unsigned short
 *             s_proc.kind = RAPIDFUZZ_UINT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 47, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 48, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 47, __pyx_L4_error)
+          __pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 48, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)((Py_UCS4)__pyx_t_2));
        }

-        /* "cpp_common.pxd":44
+        /* "cpp_common.pxd":45
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode == 'u': # 'u' wchar_t             # <<<<<<<<<<<<<<
@ -6584,7 +6584,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        case 0x68:

-        /* "cpp_common.pxd":48
+        /* "cpp_common.pxd":49
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
 *         elif typecode in {'h', 'H'}: #  signed/unsigned short             # <<<<<<<<<<<<<<
@ -6593,7 +6593,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        case 72:

-        /* "cpp_common.pxd":49
+        /* "cpp_common.pxd":50
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
 *         elif typecode in {'h', 'H'}: #  signed/unsigned short
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6602,7 +6602,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;

-        /* "cpp_common.pxd":50
+        /* "cpp_common.pxd":51
 *         elif typecode in {'h', 'H'}: #  signed/unsigned short
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6614,21 +6614,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;

-          /* "cpp_common.pxd":51
+          /* "cpp_common.pxd":52
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]             # <<<<<<<<<<<<<<
 *         elif typecode in {'i', 'I'}: # signed/unsigned int
 *             s_proc.kind = RAPIDFUZZ_UINT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 51, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 52, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 51, __pyx_L4_error)
+          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 52, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
        }

-        /* "cpp_common.pxd":48
+        /* "cpp_common.pxd":49
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
 *         elif typecode in {'h', 'H'}: #  signed/unsigned short             # <<<<<<<<<<<<<<
@ -6638,7 +6638,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        case 0x69:

-        /* "cpp_common.pxd":52
+        /* "cpp_common.pxd":53
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'i', 'I'}: # signed/unsigned int             # <<<<<<<<<<<<<<
@ -6647,7 +6647,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        case 73:

-        /* "cpp_common.pxd":53
+        /* "cpp_common.pxd":54
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'i', 'I'}: # signed/unsigned int
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6656,7 +6656,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;

-        /* "cpp_common.pxd":54
+        /* "cpp_common.pxd":55
 *         elif typecode in {'i', 'I'}: # signed/unsigned int
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6668,21 +6668,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;

-          /* "cpp_common.pxd":55
+          /* "cpp_common.pxd":56
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]             # <<<<<<<<<<<<<<
 *         elif typecode in {'l', 'L'}: # signed/unsigned long
 *             s_proc.kind = RAPIDFUZZ_UINT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 55, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 56, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 55, __pyx_L4_error)
+          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 56, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
        }

-        /* "cpp_common.pxd":52
+        /* "cpp_common.pxd":53
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'i', 'I'}: # signed/unsigned int             # <<<<<<<<<<<<<<
@ -6692,7 +6692,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        case 0x6C:

-        /* "cpp_common.pxd":56
+        /* "cpp_common.pxd":57
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'l', 'L'}: # signed/unsigned long             # <<<<<<<<<<<<<<
@ -6701,7 +6701,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        case 76:

-        /* "cpp_common.pxd":57
+        /* "cpp_common.pxd":58
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'l', 'L'}: # signed/unsigned long
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6710,7 +6710,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;

-        /* "cpp_common.pxd":58
+        /* "cpp_common.pxd":59
 *         elif typecode in {'l', 'L'}: # signed/unsigned long
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6722,21 +6722,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;

-          /* "cpp_common.pxd":59
+          /* "cpp_common.pxd":60
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]             # <<<<<<<<<<<<<<
 *         elif typecode in {'q', 'Q'}: # signed/unsigned long long
 *             s_proc.kind = RAPIDFUZZ_UINT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 59, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 60, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 59, __pyx_L4_error)
+          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 60, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
        }

-        /* "cpp_common.pxd":56
+        /* "cpp_common.pxd":57
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'l', 'L'}: # signed/unsigned long             # <<<<<<<<<<<<<<
@ -6746,7 +6746,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        case 0x71:

-        /* "cpp_common.pxd":60
+        /* "cpp_common.pxd":61
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'q', 'Q'}: # signed/unsigned long long             # <<<<<<<<<<<<<<
@ -6755,7 +6755,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        case 81:

-        /* "cpp_common.pxd":61
+        /* "cpp_common.pxd":62
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'q', 'Q'}: # signed/unsigned long long
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6764,7 +6764,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;

-        /* "cpp_common.pxd":62
+        /* "cpp_common.pxd":63
 *         elif typecode in {'q', 'Q'}: # signed/unsigned long long
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6776,21 +6776,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;

-          /* "cpp_common.pxd":63
+          /* "cpp_common.pxd":64
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]             # <<<<<<<<<<<<<<
 *         else: # float/double are hashed
 *             s_proc.kind = RAPIDFUZZ_INT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 63, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 64, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 63, __pyx_L4_error)
+          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 64, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
        }

-        /* "cpp_common.pxd":60
+        /* "cpp_common.pxd":61
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'q', 'Q'}: # signed/unsigned long long             # <<<<<<<<<<<<<<
@ -6800,7 +6800,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        default:

-        /* "cpp_common.pxd":65
+        /* "cpp_common.pxd":66
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         else: # float/double are hashed
 *             s_proc.kind = RAPIDFUZZ_INT64             # <<<<<<<<<<<<<<
@ -6809,7 +6809,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_INT64;

-        /* "cpp_common.pxd":66
+        /* "cpp_common.pxd":67
 *         else: # float/double are hashed
 *             s_proc.kind = RAPIDFUZZ_INT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6821,23 +6821,23 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;

-          /* "cpp_common.pxd":67
+          /* "cpp_common.pxd":68
 *             s_proc.kind = RAPIDFUZZ_INT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])             # <<<<<<<<<<<<<<
 *     except Exception as e:
 *         free(s_proc.data)
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 67, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 68, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_12 = PyObject_Hash(__pyx_t_1); if (unlikely(__pyx_t_12 == ((Py_hash_t)-1))) __PYX_ERR(1, 67, __pyx_L4_error)
+          __pyx_t_12 = PyObject_Hash(__pyx_t_1); if (unlikely(__pyx_t_12 == ((Py_hash_t)-1))) __PYX_ERR(1, 68, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_12);
        }
        break;
      }

-      /* "cpp_common.pxd":38
+      /* "cpp_common.pxd":39
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -6852,7 +6852,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
    __pyx_L4_error:;
    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;

-    /* "cpp_common.pxd":68
+    /* "cpp_common.pxd":69
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
 *     except Exception as e:             # <<<<<<<<<<<<<<
@ -6862,7 +6862,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
    __pyx_t_13 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
    if (__pyx_t_13) {
      __Pyx_AddTraceback("cpp_common.hash_array", __pyx_clineno, __pyx_lineno, __pyx_filename);
-      if (__Pyx_GetException(&__pyx_t_1, &__pyx_t_14, &__pyx_t_15) < 0) __PYX_ERR(1, 68, __pyx_L6_except_error)
+      if (__Pyx_GetException(&__pyx_t_1, &__pyx_t_14, &__pyx_t_15) < 0) __PYX_ERR(1, 69, __pyx_L6_except_error)
      __Pyx_GOTREF(__pyx_t_1);
      __Pyx_GOTREF(__pyx_t_14);
      __Pyx_GOTREF(__pyx_t_15);
@ -6870,7 +6870,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
      __pyx_v_e = __pyx_t_14;
      /*try:*/ {

-        /* "cpp_common.pxd":69
+        /* "cpp_common.pxd":70
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
 *     except Exception as e:
 *         free(s_proc.data)             # <<<<<<<<<<<<<<
@ -6879,7 +6879,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        free(__pyx_v_s_proc.data);

-        /* "cpp_common.pxd":70
+        /* "cpp_common.pxd":71
 *     except Exception as e:
 *         free(s_proc.data)
 *         s_proc.data = NULL             # <<<<<<<<<<<<<<
@ -6888,7 +6888,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.data = NULL;

-        /* "cpp_common.pxd":71
+        /* "cpp_common.pxd":72
 *         free(s_proc.data)
 *         s_proc.data = NULL
 *         raise             # <<<<<<<<<<<<<<
@ -6900,10 +6900,10 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        __Pyx_XGIVEREF(__pyx_t_15);
        __Pyx_ErrRestoreWithState(__pyx_t_1, __pyx_t_14, __pyx_t_15);
        __pyx_t_1 = 0; __pyx_t_14 = 0; __pyx_t_15 = 0; 
-        __PYX_ERR(1, 71, __pyx_L29_error)
+        __PYX_ERR(1, 72, __pyx_L29_error)
      }

-      /* "cpp_common.pxd":68
+      /* "cpp_common.pxd":69
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
 *     except Exception as e:             # <<<<<<<<<<<<<<
@ -6947,7 +6947,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
    goto __pyx_L6_except_error;
    __pyx_L6_except_error:;

-    /* "cpp_common.pxd":38
+    /* "cpp_common.pxd":39
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -6962,7 +6962,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
    __pyx_L9_try_end:;
  }

-  /* "cpp_common.pxd":73
+  /* "cpp_common.pxd":74
 *         raise
 * 
 *     s_proc.allocated = True             # <<<<<<<<<<<<<<
@ -6971,7 +6971,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
  __pyx_v_s_proc.allocated = 1;

-  /* "cpp_common.pxd":74
+  /* "cpp_common.pxd":75
 * 
 *     s_proc.allocated = True
 *     return move(s_proc)             # <<<<<<<<<<<<<<
@ -6981,8 +6981,8 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
  __pyx_r = cython_std::move<proc_string>(__pyx_v_s_proc);
  goto __pyx_L0;

-  /* "cpp_common.pxd":27
- *     void validate_string(object py_str, const char* err) except +
+  /* "cpp_common.pxd":28
+ *     proc_string default_process_func(proc_string sentence) except +
 * 
 * cdef inline proc_string hash_array(arr) except *:             # <<<<<<<<<<<<<<
 *     # TODO on Cpython this does not require any copies
@ -7002,7 +7002,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
  return __pyx_r;
 }

-/* "cpp_common.pxd":77
+/* "cpp_common.pxd":78
 * 
 * 
 * cdef inline proc_string hash_sequence(seq) except *:             # <<<<<<<<<<<<<<
@ -7046,17 +7046,17 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
  int __pyx_clineno = 0;
  __Pyx_RefNannySetupContext("hash_sequence", 0);

-  /* "cpp_common.pxd":79
+  /* "cpp_common.pxd":80
 * cdef inline proc_string hash_sequence(seq) except *:
 *     cdef proc_string s_proc
 *     s_proc.length = <size_t>len(seq)             # <<<<<<<<<<<<<<
 * 
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 */
-  __pyx_t_1 = PyObject_Length(__pyx_v_seq); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 79, __pyx_L1_error)
+  __pyx_t_1 = PyObject_Length(__pyx_v_seq); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 80, __pyx_L1_error)
  __pyx_v_s_proc.length = ((size_t)__pyx_t_1);

-  /* "cpp_common.pxd":81
+  /* "cpp_common.pxd":82
 *     s_proc.length = <size_t>len(seq)
 * 
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))             # <<<<<<<<<<<<<<
@ -7065,7 +7065,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
  __pyx_v_s_proc.data = malloc((__pyx_v_s_proc.length * (sizeof(uint64_t))));

-  /* "cpp_common.pxd":83
+  /* "cpp_common.pxd":84
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 * 
 *     if s_proc.data == NULL:             # <<<<<<<<<<<<<<
@ -7075,16 +7075,16 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
  __pyx_t_2 = ((__pyx_v_s_proc.data == NULL) != 0);
  if (unlikely(__pyx_t_2)) {

-    /* "cpp_common.pxd":84
+    /* "cpp_common.pxd":85
 * 
 *     if s_proc.data == NULL:
 *         raise MemoryError             # <<<<<<<<<<<<<<
 * 
 *     try:
 */
-    PyErr_NoMemory(); __PYX_ERR(1, 84, __pyx_L1_error)
+    PyErr_NoMemory(); __PYX_ERR(1, 85, __pyx_L1_error)

-    /* "cpp_common.pxd":83
+    /* "cpp_common.pxd":84
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 * 
 *     if s_proc.data == NULL:             # <<<<<<<<<<<<<<
@ -7093,7 +7093,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
  }

-  /* "cpp_common.pxd":86
+  /* "cpp_common.pxd":87
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -7109,7 +7109,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
    __Pyx_XGOTREF(__pyx_t_5);
    /*try:*/ {

-      /* "cpp_common.pxd":87
+      /* "cpp_common.pxd":88
 * 
 *     try:
 *         s_proc.kind = RAPIDFUZZ_INT64             # <<<<<<<<<<<<<<
@ -7118,7 +7118,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
      __pyx_v_s_proc.kind = RAPIDFUZZ_INT64;

-      /* "cpp_common.pxd":88
+      /* "cpp_common.pxd":89
 *     try:
 *         s_proc.kind = RAPIDFUZZ_INT64
 *         for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -7130,19 +7130,19 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
      for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
        __pyx_v_i = __pyx_t_8;

-        /* "cpp_common.pxd":89
+        /* "cpp_common.pxd":90
 *         s_proc.kind = RAPIDFUZZ_INT64
 *         for i in range(s_proc.length):
 *             elem = seq[i]             # <<<<<<<<<<<<<<
 *             # this is required so e.g. a list of char can be compared to a string
 *             if isinstance(elem, str) and len(elem) == 1:
 */
-        __pyx_t_9 = __Pyx_GetItemInt(__pyx_v_seq, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 89, __pyx_L4_error)
+        __pyx_t_9 = __Pyx_GetItemInt(__pyx_v_seq, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 90, __pyx_L4_error)
        __Pyx_GOTREF(__pyx_t_9);
        __Pyx_XDECREF_SET(__pyx_v_elem, __pyx_t_9);
        __pyx_t_9 = 0;

-        /* "cpp_common.pxd":91
+        /* "cpp_common.pxd":92
 *             elem = seq[i]
 *             # this is required so e.g. a list of char can be compared to a string
 *             if isinstance(elem, str) and len(elem) == 1:             # <<<<<<<<<<<<<<
@ -7156,23 +7156,23 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
          __pyx_t_2 = __pyx_t_11;
          goto __pyx_L13_bool_binop_done;
        }
-        __pyx_t_1 = PyObject_Length(__pyx_v_elem); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 91, __pyx_L4_error)
+        __pyx_t_1 = PyObject_Length(__pyx_v_elem); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 92, __pyx_L4_error)
        __pyx_t_11 = ((__pyx_t_1 == 1) != 0);
        __pyx_t_2 = __pyx_t_11;
        __pyx_L13_bool_binop_done:;
        if (__pyx_t_2) {

-          /* "cpp_common.pxd":92
+          /* "cpp_common.pxd":93
 *             # this is required so e.g. a list of char can be compared to a string
 *             if isinstance(elem, str) and len(elem) == 1:
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>elem             # <<<<<<<<<<<<<<
 *             else:
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
 */
-          __pyx_t_12 = __Pyx_PyObject_AsPy_UCS4(__pyx_v_elem); if (unlikely((__pyx_t_12 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 92, __pyx_L4_error)
+          __pyx_t_12 = __Pyx_PyObject_AsPy_UCS4(__pyx_v_elem); if (unlikely((__pyx_t_12 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 93, __pyx_L4_error)
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)((Py_UCS4)__pyx_t_12));

-          /* "cpp_common.pxd":91
+          /* "cpp_common.pxd":92
 *             elem = seq[i]
 *             # this is required so e.g. a list of char can be compared to a string
 *             if isinstance(elem, str) and len(elem) == 1:             # <<<<<<<<<<<<<<
@ -7182,7 +7182,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
          goto __pyx_L12;
        }

-        /* "cpp_common.pxd":94
+        /* "cpp_common.pxd":95
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>elem
 *             else:
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)             # <<<<<<<<<<<<<<
@ -7190,13 +7190,13 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 *         free(s_proc.data)
 */
        /*else*/ {
-          __pyx_t_13 = PyObject_Hash(__pyx_v_elem); if (unlikely(__pyx_t_13 == ((Py_hash_t)-1))) __PYX_ERR(1, 94, __pyx_L4_error)
+          __pyx_t_13 = PyObject_Hash(__pyx_v_elem); if (unlikely(__pyx_t_13 == ((Py_hash_t)-1))) __PYX_ERR(1, 95, __pyx_L4_error)
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_13);
        }
        __pyx_L12:;
      }

-      /* "cpp_common.pxd":86
+      /* "cpp_common.pxd":87
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -7211,7 +7211,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
    __pyx_L4_error:;
    __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0;

-    /* "cpp_common.pxd":95
+    /* "cpp_common.pxd":96
 *             else:
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
 *     except Exception as e:             # <<<<<<<<<<<<<<
@ -7221,7 +7221,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
    __pyx_t_14 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
    if (__pyx_t_14) {
      __Pyx_AddTraceback("cpp_common.hash_sequence", __pyx_clineno, __pyx_lineno, __pyx_filename);
-      if (__Pyx_GetException(&__pyx_t_9, &__pyx_t_15, &__pyx_t_16) < 0) __PYX_ERR(1, 95, __pyx_L6_except_error)
+      if (__Pyx_GetException(&__pyx_t_9, &__pyx_t_15, &__pyx_t_16) < 0) __PYX_ERR(1, 96, __pyx_L6_except_error)
      __Pyx_GOTREF(__pyx_t_9);
      __Pyx_GOTREF(__pyx_t_15);
      __Pyx_GOTREF(__pyx_t_16);
@ -7229,7 +7229,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
      __pyx_v_e = __pyx_t_15;
      /*try:*/ {

-        /* "cpp_common.pxd":96
+        /* "cpp_common.pxd":97
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
 *     except Exception as e:
 *         free(s_proc.data)             # <<<<<<<<<<<<<<
@ -7238,7 +7238,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
        free(__pyx_v_s_proc.data);

-        /* "cpp_common.pxd":97
+        /* "cpp_common.pxd":98
 *     except Exception as e:
 *         free(s_proc.data)
 *         s_proc.data = NULL             # <<<<<<<<<<<<<<
@ -7247,7 +7247,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
        __pyx_v_s_proc.data = NULL;

-        /* "cpp_common.pxd":98
+        /* "cpp_common.pxd":99
 *         free(s_proc.data)
 *         s_proc.data = NULL
 *         raise             # <<<<<<<<<<<<<<
@ -7259,10 +7259,10 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
        __Pyx_XGIVEREF(__pyx_t_16);
        __Pyx_ErrRestoreWithState(__pyx_t_9, __pyx_t_15, __pyx_t_16);
        __pyx_t_9 = 0; __pyx_t_15 = 0; __pyx_t_16 = 0; 
-        __PYX_ERR(1, 98, __pyx_L20_error)
+        __PYX_ERR(1, 99, __pyx_L20_error)
      }

-      /* "cpp_common.pxd":95
+      /* "cpp_common.pxd":96
 *             else:
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
 *     except Exception as e:             # <<<<<<<<<<<<<<
@ -7306,7 +7306,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
    goto __pyx_L6_except_error;
    __pyx_L6_except_error:;

-    /* "cpp_common.pxd":86
+    /* "cpp_common.pxd":87
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -7321,7 +7321,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
    __pyx_L9_try_end:;
  }

-  /* "cpp_common.pxd":100
+  /* "cpp_common.pxd":101
 *         raise
 * 
 *     s_proc.allocated = True             # <<<<<<<<<<<<<<
@ -7329,7 +7329,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
  __pyx_v_s_proc.allocated = 1;

-  /* "cpp_common.pxd":101
+  /* "cpp_common.pxd":102
 * 
 *     s_proc.allocated = True
 *     return move(s_proc)             # <<<<<<<<<<<<<<
@ -7337,7 +7337,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
  __pyx_r = cython_std::move<proc_string>(__pyx_v_s_proc);
  goto __pyx_L0;

-  /* "cpp_common.pxd":77
+  /* "cpp_common.pxd":78
 * 
 * 
 * cdef inline proc_string hash_sequence(seq) except *:             # <<<<<<<<<<<<<<
@ -7463,8 +7463,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
 };
 /* #### Code section: cached_builtins ### */
 static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) {
-  __pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) __PYX_ERR(1, 36, __pyx_L1_error)
-  __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(1, 42, __pyx_L1_error)
+  __pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) __PYX_ERR(1, 37, __pyx_L1_error)
+  __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(1, 43, __pyx_L1_error)
  return 0;
  __pyx_L1_error:;
  return -1;
@ -8252,7 +8252,7 @@ if (!__Pyx_RefNanny) {
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;

-  /* "cpp_common.pxd":77
+  /* "cpp_common.pxd":78
 * 
 * 
 * cdef inline proc_string hash_sequence(seq) except *:             # <<<<<<<<<<<<<<
--- a/src/cpp_fuzz.pyx
+++ b/src/cpp_fuzz.pyx
@ -42,9 +42,9 @@ def ratio(s1, s2, *, processor=None, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -94,9 +94,9 @@ def partial_ratio(s1, s2, *, processor=None, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -173,9 +173,9 @@ def token_sort_ratio(s1, s2, *, processor=True, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -221,9 +221,9 @@ def token_set_ratio(s1, s2, *, processor=True, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -271,9 +271,9 @@ def token_ratio(s1, s2, *, processor=True, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -313,9 +313,9 @@ def partial_token_sort_ratio(s1, s2, *, processor=True, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -356,9 +356,9 @@ def partial_token_set_ratio(s1, s2, *, processor=True, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -399,9 +399,9 @@ def partial_token_ratio(s1, s2, *, processor=True, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -441,9 +441,9 @@ def WRatio(s1, s2, *, processor=True, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -485,9 +485,9 @@ def QRatio(s1, s2, *, processor=True, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
--- a/src/cpp_process.cpp
+++ b/src/cpp_process.cpp
--- a/src/cpp_process.pyx
+++ b/src/cpp_process.pyx
@ -28,16 +28,21 @@ from rapidfuzz.fuzz import (
 from libcpp.vector cimport vector
 from libcpp cimport algorithm
 from libcpp.utility cimport move
+from libc.stdint cimport uint8_t, int32_t
+from libc.math cimport floor

 from cpython.list cimport PyList_New, PyList_SET_ITEM
 from cpython.object cimport PyObject
 from cpython.ref cimport Py_INCREF, Py_DECREF

-from cpp_common cimport proc_string, is_valid_string, convert_string, hash_array, hash_sequence
+from cpp_common cimport proc_string, is_valid_string, convert_string, hash_array, hash_sequence, default_process_func

 import heapq
 from array import array

+import numpy as np
+cimport numpy as np
+cimport cython

 cdef inline proc_string conv_sequence(seq) except *:
    if is_valid_string(seq):
@ -50,15 +55,15 @@ cdef inline proc_string conv_sequence(seq) except *:
 cdef extern from "cpp_process.hpp":
    cdef cppclass CachedScorerContext:
        CachedScorerContext()
-        double ratio(const proc_string&, double) except +
+        double ratio(const proc_string&, double) nogil except +

    cdef cppclass CachedDistanceContext:
        CachedDistanceContext()
-        size_t ratio(const proc_string&, size_t) except +
+        size_t ratio(const proc_string&, size_t) nogil except +

    # normalized distances
    # fuzz
-    CachedScorerContext cached_ratio_init(                   const proc_string&, int) except +
+    CachedScorerContext cached_ratio_init(                   const proc_string&, int) nogil except +
    CachedScorerContext cached_partial_ratio_init(           const proc_string&, int) except +
    CachedScorerContext cached_token_sort_ratio_init(        const proc_string&, int) except +
    CachedScorerContext cached_token_set_ratio_init(         const proc_string&, int) except +
@ -226,7 +231,7 @@ cdef inline extractOne_dict(CachedScorerContext context, choices, processor, dou
        for choice_key, choice in choices.items():
            if choice is None:
                continue
-            
+
            score = context.ratio(conv_sequence(choice), score_cutoff)

            if score >= score_cutoff and score > result_score:
@ -400,8 +405,8 @@ cdef inline py_extractOne_dict(query, choices, scorer, processor, double score_c
            score = scorer(query, processor(choice), **kwargs)

            if score >= score_cutoff and score > result_score:
-                kwargs["score_cutoff"] = score_cutoff
                score_cutoff = score
+                kwargs["score_cutoff"] = score
                result_score = score
                result_choice = choice
                result_key = choice_key
@ -416,8 +421,8 @@ cdef inline py_extractOne_dict(query, choices, scorer, processor, double score_c
            score = scorer(query, choice, **kwargs)

            if score >= score_cutoff and score > result_score:
-                kwargs["score_cutoff"] = score_cutoff
                score_cutoff = score
+                kwargs["score_cutoff"] = score
                result_score = score
                result_choice = choice
                result_key = choice_key
@ -445,8 +450,8 @@ cdef inline py_extractOne_list(query, choices, scorer, processor, double score_c
            score = scorer(query, processor(choice), **kwargs)

            if score >= score_cutoff and score > result_score:
-                kwargs["score_cutoff"] = score_cutoff
                score_cutoff = score
+                kwargs["score_cutoff"] = score
                result_score = score
                result_choice = choice
                result_index = i
@ -461,8 +466,8 @@ cdef inline py_extractOne_list(query, choices, scorer, processor, double score_c
            score = scorer(query, choice, **kwargs)

            if score >= score_cutoff and score > result_score:
-                kwargs["score_cutoff"] = score_cutoff
                score_cutoff = score
+                kwargs["score_cutoff"] = score
                result_score = score
                result_choice = choice
                result_index = i
@ -480,9 +485,9 @@ def extractOne(query, choices, *, scorer=WRatio, processor=default_process, scor

    Parameters
    ----------
-    query : str
+    query : Sequence[Hashable]
        string we want to find
-    choices : Iterable
+    choices : Iterable[Sequence[Hashable]] | Mapping[Sequence[Hashable]]
        list of all strings the query should be compared with or dict with a mapping
        {<result>: <string to compare>}
    scorer : Callable, optional
@ -506,7 +511,7 @@ def extractOne(query, choices, *, scorer=WRatio, processor=default_process, scor

    Returns
    -------
-    Tuple[str, Any, Any]
+    Tuple[Sequence[Hashable], Any, Any]
        Returns the best match in form of a Tuple with 3 elements. The values stored in the
        tuple depend on the types of the input arguments.

@ -634,7 +639,7 @@ def extractOne(query, choices, *, scorer=WRatio, processor=default_process, scor
            return extractOne_dict(move(ScorerContext), choices, processor, c_score_cutoff)
        else:
            return extractOne_list(move(ScorerContext), choices, processor, c_score_cutoff)
-    
+
    if IsIntegratedDistance(scorer):
        # distance implemented in C++
        query_context = conv_sequence(query)
@ -804,13 +809,13 @@ cdef inline extract_list(CachedScorerContext context, choices, processor, size_t
            for i, choice in enumerate(choices):
                if choice is None:
                    continue
-    
+
                proc_choice = processor(choice)
                if proc_choice is None:
                    continue
-    
+
                score = context.ratio(conv_sequence(proc_choice), score_cutoff)
-    
+
                if score >= score_cutoff:
                    Py_INCREF(choice)
                    results.push_back(ListMatchScorerElem(score, i, <PyObject*>choice))
@ -818,23 +823,23 @@ cdef inline extract_list(CachedScorerContext context, choices, processor, size_t
            for i, choice in enumerate(choices):
                if choice is None:
                    continue
-    
+
                score = context.ratio(conv_sequence(choice), score_cutoff)
-    
+
                if score >= score_cutoff:
                    Py_INCREF(choice)
                    results.push_back(ListMatchScorerElem(score, i, <PyObject*>choice))
-    
+
        # due to score_cutoff not always completely filled
        if limit > results.size():
            limit = results.size()
-    
+
        if limit >= results.size():
            algorithm.sort(results.begin(), results.end(), ExtractScorerComp())
        else:
            algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractScorerComp())
            results.resize(limit)
-    
+
        # copy elements into Python List
        result_list = PyList_New(<Py_ssize_t>limit)
        for i in range(limit):
@ -863,13 +868,13 @@ cdef inline extract_distance_list(CachedDistanceContext context, choices, proces
            for i, choice in enumerate(choices):
                if choice is None:
                    continue
-    
+
                proc_choice = processor(choice)
                if proc_choice is None:
                    continue
-    
+
                distance = context.ratio(conv_sequence(proc_choice), max_)
-    
+
                if distance <= max_:
                    Py_INCREF(choice)
                    results.push_back(ListMatchDistanceElem(distance, i, <PyObject*>choice))
@ -877,23 +882,23 @@ cdef inline extract_distance_list(CachedDistanceContext context, choices, proces
            for i, choice in enumerate(choices):
                if choice is None:
                    continue
-    
+
                distance = context.ratio(conv_sequence(choice), max_)
-    
+
                if distance <= max_:
                    Py_INCREF(choice)
                    results.push_back(ListMatchDistanceElem(distance, i, <PyObject*>choice))
-    
+
        # due to max_ not always completely filled
        if limit > results.size():
            limit = results.size()
-    
+
        if limit >= results.size():
            algorithm.sort(results.begin(), results.end(), ExtractDistanceComp())
        else:
            algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractDistanceComp())
            results.resize(limit)
-    
+
        # copy elements into Python List
        result_list = PyList_New(<Py_ssize_t>limit)
        for i in range(limit):
@ -980,9 +985,9 @@ def extract(query, choices, *, scorer=WRatio, processor=default_process, limit=5

    Parameters
    ----------
-    query : str
+    query : Sequence[Hashable]
        string we want to find
-    choices : Iterable
+    choices : Collection[Sequence[Hashable]] | Mapping[Sequence[Hashable]]
        list of all strings the query should be compared with or dict with a mapping
        {<result>: <string to compare>}
    scorer : Callable, optional
@ -1008,7 +1013,7 @@ def extract(query, choices, *, scorer=WRatio, processor=default_process, limit=5

    Returns
    -------
-    List[Tuple[str, Any, Any]]
+    List[Tuple[Sequence[Hashable], Any, Any]]
        The return type is always a List of Tuples with 3 elements. However the values stored in the
        tuple depend on the types of the input arguments.

@ -1107,9 +1112,9 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=default_process, sc

    Parameters
    ----------
-    query : str
+    query : Sequence[Hashable]
        string we want to find
-    choices : Iterable
+    choices : Iterable[Sequence[Hashable]] | Mapping[Sequence[Hashable]]
        list of all strings the query should be compared with or dict with a mapping
        {<result>: <string to compare>}
    scorer : Callable, optional
@ -1133,7 +1138,7 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=default_process, sc

    Yields
    -------
-    Tuple[str, Any, Any]
+    Tuple[Sequence[Hashable], Any, Any]
        Yields similarity between the query and each choice in form of a Tuple with 3 elements.
        The values stored in the tuple depend on the types of the input arguments.

@ -1408,3 +1413,325 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=default_process, sc
        yield from py_extract_iter_dict()
    else:
        yield from py_extract_iter_list()
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline cdist_two_lists_similarity(
+    const vector[proc_string]& queries,
+    const vector[proc_string]& choices,
+    scorer, score_cutoff, dict kwargs
+):
+    cdef size_t queries_len = queries.size()
+    cdef size_t choices_len = choices.size()
+    cdef size_t i, j
+    cdef double c_score_cutoff = 0
+    cdef np.ndarray[np.uint8_t, ndim=2] matrix = np.empty((queries_len, choices_len), dtype=np.uint8)
+
+    if score_cutoff is not None:
+        c_score_cutoff = score_cutoff
+    if c_score_cutoff < 0 or c_score_cutoff > 100:
+        raise TypeError("score_cutoff has to be in the range of 0.0 - 100.0")
+
+    c_score_cutoff = floor(c_score_cutoff)
+
+    for i in range(queries_len):
+        ScorerContext = CachedScorerInit(scorer, queries[i], 0, kwargs)
+        for j in range(choices_len):
+            matrix[i, j] = <uint8_t>floor(ScorerContext.ratio(choices[j], c_score_cutoff))
+
+    return matrix
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline cdist_two_lists_distance(
+    const vector[proc_string]& queries, const vector[proc_string]& choices,
+    scorer, score_cutoff, dict kwargs
+):
+    cdef size_t queries_len = queries.size()
+    cdef size_t choices_len = choices.size()
+    cdef size_t i, j
+    cdef size_t c_max = <size_t>-1
+    cdef np.ndarray[np.int32_t, ndim=2] matrix = np.empty((queries_len, choices_len), dtype=np.int32)
+
+    if score_cutoff is not None and score_cutoff != -1:
+        c_max = score_cutoff
+
+    for i in range(queries_len):
+        DistanceContext = CachedDistanceInit(scorer, queries[i], 0, kwargs)
+        for j in range(choices_len):
+            matrix[i, j] = <int32_t>DistanceContext.ratio(choices[j], c_max)
+
+    return matrix
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline py_cdist_two_lists(
+    const vector[PyObject*]& queries, const vector[PyObject*]& choices,
+    scorer, score_cutoff, dict kwargs
+):
+    cdef size_t queries_len = queries.size()
+    cdef size_t choices_len = choices.size()
+    cdef size_t i, j
+    cdef double c_score_cutoff = 0
+    cdef np.ndarray[np.uint8_t, ndim=2] matrix = np.empty((queries_len, choices_len), dtype=np.uint8)
+
+    if score_cutoff is not None:
+        c_score_cutoff = score_cutoff
+    if c_score_cutoff < 0 or c_score_cutoff > 100:
+        raise TypeError("score_cutoff has to be in the range of 0.0 - 100.0")
+
+    c_score_cutoff = floor(c_score_cutoff)
+
+    kwargs["processor"] = None
+    kwargs["score_cutoff"] = c_score_cutoff
+
+    for i in range(queries_len):
+        for j in range(choices_len):
+            matrix[i, j] = <uint8_t>floor(
+                <double>scorer(<object>queries[i], <object>choices[j],**kwargs))
+
+    return matrix
+
+cdef cdist_two_lists(queries, choices, scorer, processor, score_cutoff, dict kwargs):
+    cdef vector[proc_string] proc_queries
+    cdef vector[proc_string] proc_choices
+    cdef vector[PyObject*] proc_py_queries
+    cdef vector[PyObject*] proc_py_choices
+    cdef size_t queries_len = <size_t>len(queries)
+    cdef size_t choices_len = <size_t>len(choices)
+
+    try:
+        if IsIntegratedScorer(scorer) or IsIntegratedDistance(scorer):
+            proc_queries.reserve(queries_len)
+            proc_choices.reserve(choices_len)
+
+            # processor None/False
+            if not processor:
+                for query in queries:
+                    proc_queries.push_back(move(conv_sequence(query)))
+
+                for choice in choices:
+                    proc_choices.push_back(move(conv_sequence(choice)))
+            # processor has to be called through python
+            elif processor is not default_process and callable(processor):
+                proc_py_queries.reserve(queries_len)
+                for query in queries:
+                    proc_query = processor(query)
+                    Py_INCREF(proc_query)
+                    proc_py_queries.push_back(<PyObject*>proc_query)
+                    proc_queries.push_back(move(conv_sequence(proc_query)))
+
+                proc_py_choices.reserve(choices_len)
+                for choice in choices:
+                    proc_choice = processor(choice)
+                    Py_INCREF(proc_choice)
+                    proc_py_choices.push_back(<PyObject*>proc_choice)
+                    proc_choices.push_back(move(conv_sequence(proc_choice)))
+
+            # processor is True / default_process
+            else:
+                for query in queries:
+                    proc_queries.push_back(
+                        move(default_process_func(move(conv_sequence(query))))
+                    )
+
+                for choice in choices:
+                    proc_choices.push_back(
+                        move(default_process_func(move(conv_sequence(choice))))
+                    )
+
+            if IsIntegratedScorer(scorer):
+                return cdist_two_lists_similarity(proc_queries, proc_choices, scorer, score_cutoff, kwargs)
+
+            if IsIntegratedDistance(scorer):
+                return cdist_two_lists_distance(proc_queries, proc_choices, scorer, score_cutoff, kwargs)
+
+        else:
+            proc_py_queries.reserve(queries_len)
+            proc_py_choices.reserve(choices_len)
+
+            # processor None/False
+            if not processor:
+                for query in queries:
+                    Py_INCREF(query)
+                    proc_py_queries.push_back(<PyObject*>query)
+
+                for choice in choices:
+                    Py_INCREF(choice)
+                    proc_py_choices.push_back(<PyObject*>choice)
+            # processor has to be called through python
+            else:
+                if not callable(processor):
+                    processor = default_process
+
+                for query in queries:
+                    proc_query = processor(query)
+                    Py_INCREF(proc_query)
+                    proc_py_queries.push_back(<PyObject*>proc_query)
+
+                for choice in choices:
+                    proc_choice = processor(choice)
+                    Py_INCREF(proc_choice)
+                    proc_py_choices.push_back(<PyObject*>proc_choice)
+
+            return py_cdist_two_lists(proc_py_queries, proc_py_choices, scorer, score_cutoff, kwargs)
+
+    finally:
+        # decref all reference counts
+        for item in proc_py_queries:
+            Py_DECREF(<object>item)
+
+        for item in proc_py_choices:
+            Py_DECREF(<object>item)
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline cdist_single_list_similarity(
+    const vector[proc_string]& queries, scorer, score_cutoff, dict kwargs
+):
+    cdef size_t queries_len = queries.size()
+    cdef size_t i, j
+    cdef double c_score_cutoff = 0
+    cdef np.ndarray[np.uint8_t, ndim=2] matrix = np.empty((queries_len, queries_len), dtype=np.uint8)
+
+    if score_cutoff is not None:
+        c_score_cutoff = score_cutoff
+    if c_score_cutoff < 0 or c_score_cutoff > 100:
+        raise TypeError("score_cutoff has to be in the range of 0.0 - 100.0")
+
+    c_score_cutoff = floor(c_score_cutoff)
+
+    for i in range(queries_len):
+        matrix[i, i] = 100
+        ScorerContext = CachedScorerInit(scorer, queries[i], 0, kwargs)
+        for j in range(i + 1, queries_len):
+            score = <uint8_t>floor(ScorerContext.ratio(queries[j], c_score_cutoff))
+            matrix[i, j] = score
+            matrix[j, i] = score
+
+    return matrix
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline cdist_single_list_distance(
+    const vector[proc_string]& queries, scorer, score_cutoff, dict kwargs
+):
+    cdef size_t queries_len = queries.size()
+    cdef size_t i, j
+    cdef size_t c_max = <size_t>-1
+    cdef np.ndarray[np.int32_t, ndim=2] matrix = np.empty((queries_len, queries_len), dtype=np.int32)
+
+    if score_cutoff is not None and score_cutoff != -1:
+        c_max = score_cutoff
+
+    for i in range(queries_len):
+        matrix[i, i] = 0
+        DistanceContext = CachedDistanceInit(scorer, queries[i], 0, kwargs)
+        for j in range(i + 1, queries_len):
+            score = <int32_t>DistanceContext.ratio(queries[j], c_max)
+            matrix[i, j] = score
+            matrix[j, i] = score
+
+    return matrix
+
+cdef cdist_single_list(queries, scorer, processor, score_cutoff, dict kwargs):
+    cdef size_t queries_len = <size_t>len(queries)
+
+    cdef vector[proc_string] proc_queries
+    cdef vector[PyObject*] proc_py_queries
+
+    try:
+        if IsIntegratedScorer(scorer) or IsIntegratedDistance(scorer):
+            proc_queries.reserve(queries_len)
+
+            # processor None/False
+            if not processor:
+                for query in queries:
+                    proc_queries.push_back(move(conv_sequence(query)))
+            # processor has to be called through python
+            elif processor is not default_process and callable(processor):
+                proc_py_queries.reserve(queries_len)
+                for query in queries:
+                    proc_query = processor(query)
+                    Py_INCREF(proc_query)
+                    proc_py_queries.push_back(<PyObject*>proc_query)
+                    proc_queries.push_back(move(conv_sequence(proc_query)))
+
+            # processor is True / default_process
+            else:
+                for query in queries:
+                    proc_queries.push_back(
+                        move(default_process_func(move(conv_sequence(query))))
+                    )
+
+            if IsIntegratedScorer(scorer):
+                return cdist_single_list_similarity(proc_queries, scorer, score_cutoff, kwargs)
+
+            if IsIntegratedDistance(scorer):
+                return cdist_single_list_distance(proc_queries, scorer, score_cutoff, kwargs)
+
+        else:
+            proc_py_queries.reserve(queries_len)
+
+            # processor None/False
+            if not processor:
+                for query in queries:
+                    Py_INCREF(query)
+                    proc_py_queries.push_back(<PyObject*>query)
+            # processor has to be called through python
+            else:
+                if not callable(processor):
+                    processor = default_process
+
+                for query in queries:
+                    proc_query = processor(query)
+                    Py_INCREF(proc_query)
+                    proc_py_queries.push_back(<PyObject*>proc_query)
+
+            # scorer(a, b) might not be equal to scorer(b, a)
+            return py_cdist_two_lists(proc_py_queries, proc_py_queries, scorer, score_cutoff, kwargs)
+
+    finally:
+        # decref all reference counts
+        for item in proc_py_queries:
+            Py_DECREF(<object>item)
+
+def cdist(queries, choices, *, scorer=ratio, processor=None, score_cutoff=None, **kwargs):
+    """
+    Compute distance/similarity between each pair of the two collections of inputs.
+
+    Parameters
+    ----------
+    queries : Collection[Sequence[Hashable]]
+        list of all strings the queries
+    choices : Collection[Sequence[Hashable]]
+        list of all strings the query should be compared
+    scorer : Callable, optional
+        Optional callable that is used to calculate the matching score between
+        the query and each choice. This can be any of the scorers included in RapidFuzz
+        (both scorers that calculate the edit distance or the normalized edit distance).
+        Custom functions are not supported so far!
+        fuzz.ratio is used by default.
+    processor : Callable, optional
+        Optional callable that is used to preprocess the strings before
+        comparing them. When processor is True ``utils.default_process``
+        is used. Default is None, which deactivates this behaviour.
+    score_cutoff : Any, optional
+        Optional argument for a score threshold. When an edit distance is used this represents the maximum
+        edit distance and matches with a `distance <= score_cutoff` are inserted as -1. When a
+        normalized edit distance is used this represents the minimal similarity
+        and matches with a `similarity >= score_cutoff` are inserted as 0.
+        Default is None, which deactivates this behaviour.
+    **kwargs : Any, optional
+        any other named parameters are passed to the scorer. This can be used to pass
+        e.g. weights to string_metric.levenshtein
+
+    Returns
+    -------
+    List[Tuple[Sequence[Hashable], Any, Any]]
+    """
+    if queries is choices:
+        return cdist_single_list(queries, scorer, processor, score_cutoff, kwargs)
+    else:
+        return cdist_two_lists(queries, choices, scorer, processor, score_cutoff, kwargs)
--- a/src/cpp_string_metric.cpp
+++ b/src/cpp_string_metric.cpp
--- a/src/cpp_string_metric.pyx
+++ b/src/cpp_string_metric.pyx
@ -55,9 +55,9 @@ def levenshtein(s1, s2, *, weights=(1,1,1), processor=None, max=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
@ -259,9 +259,9 @@ def levenshtein_editops(s1, s2, *, processor=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -301,9 +301,9 @@ def normalized_levenshtein(s1, s2, *, weights=(1,1,1), processor=None, score_cut

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
@ -407,9 +407,9 @@ def hamming(s1, s2, *, processor=None, max=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -451,9 +451,9 @@ def normalized_hamming(s1, s2, *, processor=None, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -498,9 +498,9 @@ def jaro_similarity(s1, s2, *, processor=None, score_cutoff=None):

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -537,9 +537,9 @@ def jaro_winkler_similarity(s1, s2, *, double prefix_weight=0.1, processor=None,

    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    prefix_weight : float, optional
        Weight used for the common prefix of the two strings.
--- a/src/rapidfuzz/process.py
+++ b/src/rapidfuzz/process.py
@ -1,4 +1,4 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2021 Max Bachmann

-from rapidfuzz.cpp_process import extract, extractOne, extract_iter
+from rapidfuzz.cpp_process import extract, extractOne, extract_iter, cdist
--- a/tests/test_hypothesis.py
+++ b/tests/test_hypothesis.py
@ -9,6 +9,7 @@ import pytest
 from rapidfuzz import fuzz, process, utils, string_metric
 import random
 from math import isclose
+import numpy as np

 def levenshtein(s1, s2, weights=(1, 1, 1)):
    """
@ -79,6 +80,24 @@ def partial_ratio_short_needle(s1, s2):
        res = max(res, fuzz.ratio(s1, part))
    return res

+def cdist_scorer(queries, choices, scorer):
+    matrix = np.zeros((len(queries), len(choices)), dtype=np.uint8)
+
+    for i, query in enumerate(queries):
+        for j, choice in enumerate(choices):
+            matrix[i, j] = scorer(query, choice)
+
+    return matrix
+
+def cdist_distance(queries, choices, scorer):
+    matrix = np.zeros((len(queries), len(choices)), dtype=np.int32)
+
+    for i, query in enumerate(queries):
+        for j, choice in enumerate(choices):
+            matrix[i, j] = scorer(query, choice)
+
+    return matrix
+
 def extractOne_scorer(s1, s2, scorer, processor=None, **kwargs):
    return process.extractOne(s1, [s2], processor=processor, scorer=scorer, **kwargs)[1]

@ -294,3 +313,19 @@ def test_only_identical_strings_extracted(scorer, processor, choices):

    for match in matches:
        assert processor(query) == processor(match[0])
+
+
+@given(queries=st.lists(st.text(), min_size=1), choices=st.lists(st.text(), min_size=1))
+@settings(max_examples=500, deadline=5000)
+def test_cdist(queries, choices):
+    """
+    Test that cdist returns correct results
+    """
+
+    reference_matrix = cdist_distance(queries, choices, scorer=string_metric.levenshtein)
+    matrix = process.cdist(queries, choices, scorer=string_metric.levenshtein)
+    assert (matrix == reference_matrix).all()
+
+    reference_matrix = cdist_distance(queries, queries, scorer=string_metric.levenshtein)
+    matrix = process.cdist(queries, queries, scorer=string_metric.levenshtein)
+    assert (matrix == reference_matrix).all()