add cdist implementation

2021-09-10 12:44:54 +02:00 · 2021-09-10 12:44:54 +02:00 · 56f062b063
parent 1aed654d4f
commit 56f062b063
15 changed files with 11706 additions and 3583 deletions
--- a/.github/workflows/pythonbuild.yml
+++ b/.github/workflows/pythonbuild.yml
@ -101,7 +101,8 @@ jobs:
      fail-fast: false
      matrix:
        python_tag: [ "pp36-*", "pp37-*"]
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        # numpy ships no wheels for pypy on mac os
        os: [ubuntu-latest, windows-latest]
    env:
      CIBW_BUILD: ${{matrix.python_tag}}
      # activate tests when the fix for
--- a/docs/process.rst
+++ b/docs/process.rst
@ -1,6 +1,10 @@
 process module
 ==============
 cdist
 ----------
 .. autofunction:: rapidfuzz.process.cdist
 extract
 -------
 .. autofunction:: rapidfuzz.process.extract
--- a/pyproject.toml
+++ b/pyproject.toml
@ -2,5 +2,6 @@
 requires = [
    "setuptools",
    "wheel",
    "oldest-supported-numpy"
 ]
 build-backend = "setuptools.build_meta"
--- a/setup.cfg
+++ b/setup.cfg
@ -26,6 +26,8 @@ package_dir=
    =src
 packages = find:
 python_requires = >=3.5
 install_requires =
    numpy
 [options.packages.find]
 where=src
--- a/setup.py
+++ b/setup.py
@ -2,6 +2,7 @@ from setuptools import setup, Extension
 from setuptools.command.build_ext import build_ext
 import sys
 import os
 import numpy as np
 # use with export RAPIDFUZZ_TRACE=1
 RAPIDFUZZ_TRACE = os.environ.get("RAPIDFUZZ_TRACE", False)
@ -46,7 +47,7 @@ ext_modules = [
            'src/cpp_process.cpp',
            'src/rapidfuzz-cpp/rapidfuzz/details/unicode.cpp'
        ],
-        include_dirs=["src/rapidfuzz-cpp/"],
+        include_dirs=["src/rapidfuzz-cpp/", np.get_include()],
        language='c++',
    ),
    Extension(
--- a/src/cpp_common.hpp
+++ b/src/cpp_common.hpp
@ -204,8 +204,9 @@ double RATIO##_impl_inner_##PROCESSOR(const proc_string& s1, const Sentence& s2,
 {                                                                                                  \
    switch(s1.kind){                                                                               \
    LIST_OF_CASES(RATIO_FUNC, PROCESSOR)                                                           \
    default:                                                                                       \
       throw std::logic_error("Reached end of control flow in " #RATIO "_impl_inner_" #PROCESSOR); \
    }                                                                                              \
    assert(false); /* silence any warnings about missing return value */                           \
 }
 /* generate <ratio_name>_impl_<processor> functions which are used internally
@ -217,8 +218,9 @@ double RATIO##_impl_##PROCESSOR(const proc_string& s1, const proc_string& s2, Ar
 {                                                                                            \
    switch(s1.kind){                                                                         \
    LIST_OF_CASES(RATIO##_impl_inner_##PROCESSOR, PROCESSOR)                                 \
    default:                                                                                 \
       throw std::logic_error("Reached end of control flow in " #RATIO "_impl_" #PROCESSOR); \
    }                                                                                        \
    assert(false); /* silence any warnings about missing return value */                     \
 }
 #define RATIO_IMPL_DEF(RATIO, RATIO_FUNC)            \
@ -236,8 +238,9 @@ size_t RATIO##_impl_inner_##PROCESSOR(const proc_string& s1, const Sentence& s2,
 {                                                                                                  \
    switch(s1.kind){                                                                               \
    LIST_OF_CASES(RATIO_FUNC, PROCESSOR)                                                           \
    default:                                                                                       \
       throw std::logic_error("Reached end of control flow in " #RATIO "_impl_inner_" #PROCESSOR); \
    }                                                                                              \
    assert(false); /* silence any warnings about missing return value */                           \
 }
 /* generate <ratio_name>_impl_<processor> functions which are used internally
@ -249,8 +252,9 @@ size_t RATIO##_impl_##PROCESSOR(const proc_string& s1, const proc_string& s2, Ar
 {                                                                                            \
    switch(s1.kind){                                                                         \
    LIST_OF_CASES(RATIO##_impl_inner_##PROCESSOR, PROCESSOR)                                 \
    default:                                                                                 \
       throw std::logic_error("Reached end of control flow in " #RATIO "_impl_" #PROCESSOR); \
    }                                                                                        \
    assert(false); /* silence any warnings about missing return value */                     \
 }
 #define DISTANCE_IMPL_DEF(RATIO, RATIO_FUNC)            \
@ -310,3 +314,36 @@ PyObject* RATIO##_default_process(const proc_string& s1, const proc_string& s2,
    size_t result = RATIO##_impl_default_process(s1, s2, max); \
    return dist_to_long(result);                                              \
 }
 template <typename CharT>
 proc_string default_process_func_impl(proc_string sentence) {
    CharT* str = static_cast<CharT*>(sentence.data);
    if (!sentence.allocated)
    {
      CharT* temp_str = (CharT*)malloc(sentence.length * sizeof(CharT));
      if (temp_str == NULL)
      {
          throw std::bad_alloc();
      }
      std::copy(str, str + sentence.length, temp_str);
      str = temp_str;
    }
    sentence.allocated = true;
    sentence.data = str;
    sentence.kind = sentence.kind;
    sentence.length = utils::default_process(str, sentence.length);
    return sentence;
 }
 proc_string default_process_func(proc_string sentence) {
    switch (sentence.kind) {
    # define X_ENUM(KIND, TYPE, MSVC_TUPLE) case KIND: return default_process_func_impl<TYPE>(std::move(sentence));
    LIST_OF_CASES()
    default:
       throw std::logic_error("Reached end of control flow in default_process_func");
    # undef X_ENUM
    }
 }
--- a/src/cpp_common.pxd
+++ b/src/cpp_common.pxd
@ -23,6 +23,7 @@ cdef extern from "cpp_common.hpp":
    int is_valid_string(object py_str) except +
    proc_string convert_string(object py_str)
    void validate_string(object py_str, const char* err) except +
    proc_string default_process_func(proc_string sentence) except +
 cdef inline proc_string hash_array(arr) except *:
    # TODO on Cpython this does not require any copies
--- a/src/cpp_fuzz.cpp
+++ b/src/cpp_fuzz.cpp
@ -1976,11 +1976,11 @@ static const char __pyx_k_partial_token_set_ratio[] = "partial_token_set_ratio";
 static const char __pyx_k_partial_token_sort_ratio[] = "partial_token_sort_ratio";
 static const char __pyx_k_token_set_ratio_line_217[] = "token_set_ratio (line 217)";
 static const char __pyx_k_token_sort_ratio_line_170[] = "token_sort_ratio (line 170)";
-static const char __pyx_k_Sorts_the_words_in_the_strings[] = "\n    Sorts the words in the strings and calculates the fuzz.ratio between them\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_sort_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n    100.0\n    ";
+static const char __pyx_k_Sorts_the_words_in_the_strings[] = "\n    Sorts the words in the strings and calculates the fuzz.ratio between them\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_sort_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n    100.0\n    ";
-static const char __pyx_k_Calculates_a_quick_ratio_betwee[] = "\n    Calculates a quick ratio between two strings using fuzz.ratio.\n    The only difference to fuzz.ratio is, that this preprocesses\n    the strings by default.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Examples\n    --------\n    >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n    100.0\n    ";
+static const char __pyx_k_Calculates_a_quick_ratio_betwee[] = "\n    Calculates a quick ratio between two strings using fuzz.ratio.\n    The only difference to fuzz.ratio is, that this preprocesses\n    the strings by default.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Examples\n    --------\n    >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n    100.0\n    ";
-static const char __pyx_k_Calculates_the_normalized_InDel[] = "\n    Calculates the normalized InDel distance.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    See Also\n    --------\n    rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n    Notes\n    -----\n    .. image:: img/ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n    96.55171966552734\n    ";
+static const char __pyx_k_Calculates_the_normalized_InDel[] = "\n    Calculates the normalized InDel distance.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    See Also\n    --------\n    rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n    Notes\n    -----\n    .. image:: img/ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n    96.55171966552734\n    ";
-static const char __pyx_k_Compares_the_words_in_the_strin[] = "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.ratio\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_set_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    83.8709716796875\n    >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    100.0\n    ";
+static const char __pyx_k_Compares_the_words_in_the_strin[] = "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.ratio\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_set_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    83.8709716796875\n    >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    100.0\n    ";
-static const char __pyx_k_Searches_for_the_optimal_alignm[] = "\n    Searches for the optimal alignment of the shorter string in the\n    longer string and returns the fuzz.ratio for this alignment.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    Depending on the length of the needle (shorter string) different\n    implementations are used to improve the performance.\n\n    short needle (length \342\211\244 64):\n        When using a short needle length the fuzz.ratio is calculated for all\n        alignments that could result in an optimal alignment. It is\n        guaranteed to find the optimal alignment. For short needles this is very\n        fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n        case performance of ``O(NM)``.\n    \n    .. image:: img/partial_ratio_short_needle.svg\n\n    long needle (length > 64):\n        For long needles a similar implementation to FuzzyWuzzy is used.\n        This implementation only considers alignments which start at one\n        of the longest common substrings. This results in a worst case performance\n        of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n        The following Python code shows the concept:\n\n        .. code-block:: python\n\n            blocks = SequenceMatcher(None, needle, longer, False).get_ma""tching_blocks()\n            score = 0\n            for block in blocks:\n                long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n                long_end = long_start + len(shorter)\n                long_substr = longer[long_start:long_end]\n                score = max(score, fuzz.ratio(needle, long_substr))\n\n        This is a lot faster than checking all possible alignments. However it\n        only finds one of the best alignments and not necessarily the optimal one.\n\n    .. image:: img/partial_ratio_long_needle.svg\n\n    Examples\n    --------\n    >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n    100.0\n    ";
+static const char __pyx_k_Searches_for_the_optimal_alignm[] = "\n    Searches for the optimal alignment of the shorter string in the\n    longer string and returns the fuzz.ratio for this alignment.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    Depending on the length of the needle (shorter string) different\n    implementations are used to improve the performance.\n\n    short needle (length \342\211\244 64):\n        When using a short needle length the fuzz.ratio is calculated for all\n        alignments that could result in an optimal alignment. It is\n        guaranteed to find the optimal alignment. For short needles this is very\n        fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n        case performance of ``O(NM)``.\n    \n    .. image:: img/partial_ratio_short_needle.svg\n\n    long needle (length > 64):\n        For long needles a similar implementation to FuzzyWuzzy is used.\n        This implementation only considers alignments which start at one\n        of the longest common substrings. This results in a worst case performance\n        of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n        The following Python code shows the concept:\n\n        .. code-block:: python\n\n            blocks = SequenceMatcher(None,"" needle, longer, False).get_matching_blocks()\n            score = 0\n            for block in blocks:\n                long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n                long_end = long_start + len(shorter)\n                long_substr = longer[long_start:long_end]\n                score = max(score, fuzz.ratio(needle, long_substr))\n\n        This is a lot faster than checking all possible alignments. However it\n        only finds one of the best alignments and not necessarily the optimal one.\n\n    .. image:: img/partial_ratio_long_needle.svg\n\n    Examples\n    --------\n    >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n    100.0\n    ";
 #if !CYTHON_USE_MODULE_STATE
 static PyObject *__pyx_kp_u_Calculates_a_quick_ratio_betwee;
 static PyObject *__pyx_kp_u_Calculates_the_normalized_InDel;
@ -2542,7 +2542,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_ratio, "\n    Calculates the normalized InDel distance.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    See Also\n    --------\n    rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n    Notes\n    -----\n    .. image:: img/ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n    96.55171966552734\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_ratio, "\n    Calculates the normalized InDel distance.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    See Also\n    --------\n    rapidfuzz.string_metric.normalized_levenshtein : Normalized levenshtein distance\n\n    Notes\n    -----\n    .. image:: img/ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.ratio(\"this is a test\", \"this is a test!\")\n    96.55171966552734\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_1ratio = {"ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_1ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_1ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -2926,7 +2926,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_2partial_ratio, "\n    Searches for the optimal alignment of the shorter string in the\n    longer string and returns the fuzz.ratio for this alignment.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    Depending on the length of the needle (shorter string) different\n    implementations are used to improve the performance.\n\n    short needle (length \342\211\244 64):\n        When using a short needle length the fuzz.ratio is calculated for all\n        alignments that could result in an optimal alignment. It is\n        guaranteed to find the optimal alignment. For short needles this is very\n        fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n        case performance of ``O(NM)``.\n    \n    .. image:: img/partial_ratio_short_needle.svg\n\n    long needle (length > 64):\n        For long needles a similar implementation to FuzzyWuzzy is used.\n        This implementation only considers alignments which start at one\n        of the longest common substrings. This results in a worst case performance\n        of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n        The following Python code shows the concept:\n\n        .. code-block:: python\n\n            blocks = SequenceMatcher(None, needle, longer, False).get_ma""tching_blocks()\n            score = 0\n            for block in blocks:\n                long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n                long_end = long_start + len(shorter)\n                long_substr = longer[long_start:long_end]\n                score = max(score, fuzz.ratio(needle, long_substr))\n\n        This is a lot faster than checking all possible alignments. However it\n        only finds one of the best alignments and not necessarily the optimal one.\n\n    .. image:: img/partial_ratio_long_needle.svg\n\n    Examples\n    --------\n    >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n    100.0\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_2partial_ratio, "\n    Searches for the optimal alignment of the shorter string in the\n    longer string and returns the fuzz.ratio for this alignment.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is None, which deactivates this behaviour.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    Depending on the length of the needle (shorter string) different\n    implementations are used to improve the performance.\n\n    short needle (length \342\211\244 64):\n        When using a short needle length the fuzz.ratio is calculated for all\n        alignments that could result in an optimal alignment. It is\n        guaranteed to find the optimal alignment. For short needles this is very\n        fast, since for them fuzz.ratio runs in ``O(N)`` time. This results in a worst\n        case performance of ``O(NM)``.\n    \n    .. image:: img/partial_ratio_short_needle.svg\n\n    long needle (length > 64):\n        For long needles a similar implementation to FuzzyWuzzy is used.\n        This implementation only considers alignments which start at one\n        of the longest common substrings. This results in a worst case performance\n        of ``O(N[N/64]M)``. However usually most of the alignments can be skipped.\n        The following Python code shows the concept:\n\n        .. code-block:: python\n\n            blocks = SequenceMatcher(None,"" needle, longer, False).get_matching_blocks()\n            score = 0\n            for block in blocks:\n                long_start = block[1] - block[0] if (block[1] - block[0]) > 0 else 0\n                long_end = long_start + len(shorter)\n                long_substr = longer[long_start:long_end]\n                score = max(score, fuzz.ratio(needle, long_substr))\n\n        This is a lot faster than checking all possible alignments. However it\n        only finds one of the best alignments and not necessarily the optimal one.\n\n    .. image:: img/partial_ratio_long_needle.svg\n\n    Examples\n    --------\n    >>> fuzz.partial_ratio(\"this is a test\", \"this is a test!\")\n    100.0\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_3partial_ratio = {"partial_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_3partial_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_2partial_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_3partial_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -3310,7 +3310,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_4token_sort_ratio, "\n    Sorts the words in the strings and calculates the fuzz.ratio between them\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_sort_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n    100.0\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_4token_sort_ratio, "\n    Sorts the words in the strings and calculates the fuzz.ratio between them\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_sort_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy wuzzy was a bear\", \"wuzzy fuzzy was a bear\")\n    100.0\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_5token_sort_ratio = {"token_sort_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_5token_sort_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_4token_sort_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_5token_sort_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -3694,7 +3694,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_6token_set_ratio, "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.ratio\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_set_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    83.8709716796875\n    >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    100.0\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_6token_set_ratio, "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.ratio\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_set_ratio.svg\n\n    Examples\n    --------\n    >>> fuzz.token_sort_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    83.8709716796875\n    >>> fuzz.token_set_ratio(\"fuzzy was a bear\", \"fuzzy fuzzy was a bear\")\n    100.0\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_7token_set_ratio = {"token_set_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_7token_set_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_6token_set_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_7token_set_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -4078,7 +4078,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_8token_ratio, "\n    Helper method that returns the maximum of fuzz.token_set_ratio and fuzz.token_sort_ratio\n    (faster than manually executing the two functions)\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_ratio.svg\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_8token_ratio, "\n    Helper method that returns the maximum of fuzz.token_set_ratio and fuzz.token_sort_ratio\n    (faster than manually executing the two functions)\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/token_ratio.svg\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_9token_ratio = {"token_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_9token_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_8token_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_9token_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -4462,7 +4462,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_10partial_token_sort_ratio, "\n    sorts the words in the strings and calculates the fuzz.partial_ratio between them\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_sort_ratio.svg\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_10partial_token_sort_ratio, "\n    sorts the words in the strings and calculates the fuzz.partial_ratio between them\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_sort_ratio.svg\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_11partial_token_sort_ratio = {"partial_token_sort_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_11partial_token_sort_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_10partial_token_sort_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_11partial_token_sort_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -4846,7 +4846,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_12partial_token_set_ratio, "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.partial_ratio\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_set_ratio.svg\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_12partial_token_set_ratio, "\n    Compares the words in the strings based on unique and common words between them\n    using fuzz.partial_ratio\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_set_ratio.svg\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_13partial_token_set_ratio = {"partial_token_set_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_13partial_token_set_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_12partial_token_set_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_13partial_token_set_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -5230,7 +5230,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_14partial_token_ratio, "\n    Helper method that returns the maximum of fuzz.partial_token_set_ratio and\n    fuzz.partial_token_sort_ratio (faster than manually executing the two functions)\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_ratio.svg\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_14partial_token_ratio, "\n    Helper method that returns the maximum of fuzz.partial_token_set_ratio and\n    fuzz.partial_token_sort_ratio (faster than manually executing the two functions)\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/partial_token_ratio.svg\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_15partial_token_ratio = {"partial_token_ratio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_15partial_token_ratio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_14partial_token_ratio};
 static PyObject *__pyx_pw_8cpp_fuzz_15partial_token_ratio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -5614,7 +5614,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_16WRatio, "\n    Calculates a weighted ratio based on the other ratio algorithms\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/WRatio.svg\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_16WRatio, "\n    Calculates a weighted ratio based on the other ratio algorithms\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Notes\n    -----\n    .. image:: img/WRatio.svg\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_17WRatio = {"WRatio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_17WRatio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_16WRatio};
 static PyObject *__pyx_pw_8cpp_fuzz_17WRatio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -5998,7 +5998,7 @@ PyObject *const *__pyx_args, Py_ssize_t __pyx_nargs, PyObject *__pyx_kwds
 PyObject *__pyx_args, PyObject *__pyx_kwds
 #endif
 ); /*proto*/
-PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_18QRatio, "\n    Calculates a quick ratio between two strings using fuzz.ratio.\n    The only difference to fuzz.ratio is, that this preprocesses\n    the strings by default.\n\n    Parameters\n    ----------\n    s1 : str\n        First string to compare.\n    s2 : str\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Examples\n    --------\n    >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n    100.0\n    ");
+PyDoc_STRVAR(__pyx_doc_8cpp_fuzz_18QRatio, "\n    Calculates a quick ratio between two strings using fuzz.ratio.\n    The only difference to fuzz.ratio is, that this preprocesses\n    the strings by default.\n\n    Parameters\n    ----------\n    s1 : Sequence[Hashable]\n        First string to compare.\n    s2 : Sequence[Hashable]\n        Second string to compare.\n    processor: bool or callable, optional\n        Optional callable that is used to preprocess the strings before\n        comparing them. When processor is True ``utils.default_process``\n        is used. Default is True.\n    score_cutoff : float, optional\n        Optional argument for a score threshold as a float between 0 and 100.\n        For ratio < score_cutoff 0 is returned instead. Default is 0,\n        which deactivates this behaviour.\n\n    Returns\n    -------\n    similarity : float\n        similarity between s1 and s2 as a float between 0 and 100\n\n    Examples\n    --------\n    >>> fuzz.QRatio(\"this is a test\", \"THIS is a test!\")\n    100.0\n    ");
 static PyMethodDef __pyx_mdef_8cpp_fuzz_19QRatio = {"QRatio", (PyCFunction)(void*)(__Pyx_PyCFunction_FastCallWithKeywords)__pyx_pw_8cpp_fuzz_19QRatio, __Pyx_METH_FASTCALL|METH_KEYWORDS, __pyx_doc_8cpp_fuzz_18QRatio};
 static PyObject *__pyx_pw_8cpp_fuzz_19QRatio(PyObject *__pyx_self, 
 #if CYTHON_METH_FASTCALL
@ -6364,8 +6364,8 @@ static PyObject *__pyx_pf_8cpp_fuzz_18QRatio(CYTHON_UNUSED PyObject *__pyx_self,
  return __pyx_r;
 }
-/* "cpp_common.pxd":27
+/* "cpp_common.pxd":28
- *     void validate_string(object py_str, const char* err) except +
+ *     proc_string default_process_func(proc_string sentence) except +
 * 
 * cdef inline proc_string hash_array(arr) except *:             # <<<<<<<<<<<<<<
 *     # TODO on Cpython this does not require any copies
@ -6407,30 +6407,30 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
  int __pyx_clineno = 0;
  __Pyx_RefNannySetupContext("hash_array", 0);
-  /* "cpp_common.pxd":30
+  /* "cpp_common.pxd":31
 *     # TODO on Cpython this does not require any copies
 *     cdef proc_string s_proc
 *     cdef Py_UCS4 typecode = <Py_UCS4>arr.typecode             # <<<<<<<<<<<<<<
 *     s_proc.length = <size_t>len(arr)
 * 
 */
-  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_arr, __pyx_n_s_typecode); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 30, __pyx_L1_error)
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_arr, __pyx_n_s_typecode); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 31, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
-  __pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 30, __pyx_L1_error)
+  __pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 31, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
  __pyx_v_typecode = ((Py_UCS4)__pyx_t_2);
-  /* "cpp_common.pxd":31
+  /* "cpp_common.pxd":32
 *     cdef proc_string s_proc
 *     cdef Py_UCS4 typecode = <Py_UCS4>arr.typecode
 *     s_proc.length = <size_t>len(arr)             # <<<<<<<<<<<<<<
 * 
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 */
-  __pyx_t_3 = PyObject_Length(__pyx_v_arr); if (unlikely(__pyx_t_3 == ((Py_ssize_t)-1))) __PYX_ERR(1, 31, __pyx_L1_error)
+  __pyx_t_3 = PyObject_Length(__pyx_v_arr); if (unlikely(__pyx_t_3 == ((Py_ssize_t)-1))) __PYX_ERR(1, 32, __pyx_L1_error)
  __pyx_v_s_proc.length = ((size_t)__pyx_t_3);
-  /* "cpp_common.pxd":33
+  /* "cpp_common.pxd":34
 *     s_proc.length = <size_t>len(arr)
 * 
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))             # <<<<<<<<<<<<<<
@ -6439,7 +6439,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
  __pyx_v_s_proc.data = malloc((__pyx_v_s_proc.length * (sizeof(uint64_t))));
-  /* "cpp_common.pxd":35
+  /* "cpp_common.pxd":36
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 * 
 *     if s_proc.data == NULL:             # <<<<<<<<<<<<<<
@ -6449,16 +6449,16 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
  __pyx_t_4 = ((__pyx_v_s_proc.data == NULL) != 0);
  if (unlikely(__pyx_t_4)) {
-    /* "cpp_common.pxd":36
+    /* "cpp_common.pxd":37
 * 
 *     if s_proc.data == NULL:
 *         raise MemoryError             # <<<<<<<<<<<<<<
 * 
 *     try:
 */
-    PyErr_NoMemory(); __PYX_ERR(1, 36, __pyx_L1_error)
+    PyErr_NoMemory(); __PYX_ERR(1, 37, __pyx_L1_error)
-    /* "cpp_common.pxd":35
+    /* "cpp_common.pxd":36
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 * 
 *     if s_proc.data == NULL:             # <<<<<<<<<<<<<<
@ -6467,7 +6467,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
  }
-  /* "cpp_common.pxd":38
+  /* "cpp_common.pxd":39
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -6483,7 +6483,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
    __Pyx_XGOTREF(__pyx_t_7);
    /*try:*/ {
-      /* "cpp_common.pxd":40
+      /* "cpp_common.pxd":41
 *     try:
 *         # ignore signed/unsigned, since it is not relevant in any of the algorithms
 *         if typecode in {'b', 'B'}: # signed/unsigned char             # <<<<<<<<<<<<<<
@ -6494,7 +6494,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        case 98:
        case 66:
-        /* "cpp_common.pxd":41
+        /* "cpp_common.pxd":42
 *         # ignore signed/unsigned, since it is not relevant in any of the algorithms
 *         if typecode in {'b', 'B'}: # signed/unsigned char
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6503,7 +6503,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
-        /* "cpp_common.pxd":42
+        /* "cpp_common.pxd":43
 *         if typecode in {'b', 'B'}: # signed/unsigned char
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6515,21 +6515,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;
-          /* "cpp_common.pxd":43
+          /* "cpp_common.pxd":44
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]             # <<<<<<<<<<<<<<
 *         elif typecode == 'u': # 'u' wchar_t
 *             s_proc.kind = RAPIDFUZZ_UINT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 43, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 44, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 43, __pyx_L4_error)
+          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 44, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
        }
-        /* "cpp_common.pxd":40
+        /* "cpp_common.pxd":41
 *     try:
 *         # ignore signed/unsigned, since it is not relevant in any of the algorithms
 *         if typecode in {'b', 'B'}: # signed/unsigned char             # <<<<<<<<<<<<<<
@ -6539,7 +6539,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        case 0x75:
-        /* "cpp_common.pxd":45
+        /* "cpp_common.pxd":46
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode == 'u': # 'u' wchar_t
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6548,7 +6548,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
-        /* "cpp_common.pxd":46
+        /* "cpp_common.pxd":47
 *         elif typecode == 'u': # 'u' wchar_t
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6560,21 +6560,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;
-          /* "cpp_common.pxd":47
+          /* "cpp_common.pxd":48
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]             # <<<<<<<<<<<<<<
 *         elif typecode in {'h', 'H'}: #  signed/unsigned short
 *             s_proc.kind = RAPIDFUZZ_UINT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 47, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 48, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 47, __pyx_L4_error)
+          __pyx_t_2 = __Pyx_PyObject_AsPy_UCS4(__pyx_t_1); if (unlikely((__pyx_t_2 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 48, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)((Py_UCS4)__pyx_t_2));
        }
-        /* "cpp_common.pxd":44
+        /* "cpp_common.pxd":45
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode == 'u': # 'u' wchar_t             # <<<<<<<<<<<<<<
@ -6584,7 +6584,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        case 0x68:
-        /* "cpp_common.pxd":48
+        /* "cpp_common.pxd":49
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
 *         elif typecode in {'h', 'H'}: #  signed/unsigned short             # <<<<<<<<<<<<<<
@ -6593,7 +6593,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        case 72:
-        /* "cpp_common.pxd":49
+        /* "cpp_common.pxd":50
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
 *         elif typecode in {'h', 'H'}: #  signed/unsigned short
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6602,7 +6602,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
-        /* "cpp_common.pxd":50
+        /* "cpp_common.pxd":51
 *         elif typecode in {'h', 'H'}: #  signed/unsigned short
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6614,21 +6614,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;
-          /* "cpp_common.pxd":51
+          /* "cpp_common.pxd":52
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]             # <<<<<<<<<<<<<<
 *         elif typecode in {'i', 'I'}: # signed/unsigned int
 *             s_proc.kind = RAPIDFUZZ_UINT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 51, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 52, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 51, __pyx_L4_error)
+          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 52, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
        }
-        /* "cpp_common.pxd":48
+        /* "cpp_common.pxd":49
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>arr[i]
 *         elif typecode in {'h', 'H'}: #  signed/unsigned short             # <<<<<<<<<<<<<<
@ -6638,7 +6638,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        case 0x69:
-        /* "cpp_common.pxd":52
+        /* "cpp_common.pxd":53
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'i', 'I'}: # signed/unsigned int             # <<<<<<<<<<<<<<
@ -6647,7 +6647,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        case 73:
-        /* "cpp_common.pxd":53
+        /* "cpp_common.pxd":54
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'i', 'I'}: # signed/unsigned int
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6656,7 +6656,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
-        /* "cpp_common.pxd":54
+        /* "cpp_common.pxd":55
 *         elif typecode in {'i', 'I'}: # signed/unsigned int
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6668,21 +6668,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;
-          /* "cpp_common.pxd":55
+          /* "cpp_common.pxd":56
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]             # <<<<<<<<<<<<<<
 *         elif typecode in {'l', 'L'}: # signed/unsigned long
 *             s_proc.kind = RAPIDFUZZ_UINT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 55, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 56, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 55, __pyx_L4_error)
+          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 56, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
        }
-        /* "cpp_common.pxd":52
+        /* "cpp_common.pxd":53
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'i', 'I'}: # signed/unsigned int             # <<<<<<<<<<<<<<
@ -6692,7 +6692,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        case 0x6C:
-        /* "cpp_common.pxd":56
+        /* "cpp_common.pxd":57
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'l', 'L'}: # signed/unsigned long             # <<<<<<<<<<<<<<
@ -6701,7 +6701,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        case 76:
-        /* "cpp_common.pxd":57
+        /* "cpp_common.pxd":58
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'l', 'L'}: # signed/unsigned long
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6710,7 +6710,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
-        /* "cpp_common.pxd":58
+        /* "cpp_common.pxd":59
 *         elif typecode in {'l', 'L'}: # signed/unsigned long
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6722,21 +6722,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;
-          /* "cpp_common.pxd":59
+          /* "cpp_common.pxd":60
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]             # <<<<<<<<<<<<<<
 *         elif typecode in {'q', 'Q'}: # signed/unsigned long long
 *             s_proc.kind = RAPIDFUZZ_UINT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 59, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 60, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 59, __pyx_L4_error)
+          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 60, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
        }
-        /* "cpp_common.pxd":56
+        /* "cpp_common.pxd":57
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'l', 'L'}: # signed/unsigned long             # <<<<<<<<<<<<<<
@ -6746,7 +6746,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        case 0x71:
-        /* "cpp_common.pxd":60
+        /* "cpp_common.pxd":61
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'q', 'Q'}: # signed/unsigned long long             # <<<<<<<<<<<<<<
@ -6755,7 +6755,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        case 81:
-        /* "cpp_common.pxd":61
+        /* "cpp_common.pxd":62
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'q', 'Q'}: # signed/unsigned long long
 *             s_proc.kind = RAPIDFUZZ_UINT64             # <<<<<<<<<<<<<<
@ -6764,7 +6764,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_UINT64;
-        /* "cpp_common.pxd":62
+        /* "cpp_common.pxd":63
 *         elif typecode in {'q', 'Q'}: # signed/unsigned long long
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6776,21 +6776,21 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;
-          /* "cpp_common.pxd":63
+          /* "cpp_common.pxd":64
 *             s_proc.kind = RAPIDFUZZ_UINT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]             # <<<<<<<<<<<<<<
 *         else: # float/double are hashed
 *             s_proc.kind = RAPIDFUZZ_INT64
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 63, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 64, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 63, __pyx_L4_error)
+          __pyx_t_11 = __Pyx_PyInt_As_uint64_t(__pyx_t_1); if (unlikely((__pyx_t_11 == ((uint64_t)-1)) && PyErr_Occurred())) __PYX_ERR(1, 64, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_11);
        }
-        /* "cpp_common.pxd":60
+        /* "cpp_common.pxd":61
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         elif typecode in {'q', 'Q'}: # signed/unsigned long long             # <<<<<<<<<<<<<<
@ -6800,7 +6800,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        break;
        default:
-        /* "cpp_common.pxd":65
+        /* "cpp_common.pxd":66
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>arr[i]
 *         else: # float/double are hashed
 *             s_proc.kind = RAPIDFUZZ_INT64             # <<<<<<<<<<<<<<
@ -6809,7 +6809,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.kind = RAPIDFUZZ_INT64;
-        /* "cpp_common.pxd":66
+        /* "cpp_common.pxd":67
 *         else: # float/double are hashed
 *             s_proc.kind = RAPIDFUZZ_INT64
 *             for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -6821,23 +6821,23 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        for (__pyx_t_10 = 0; __pyx_t_10 < __pyx_t_9; __pyx_t_10+=1) {
          __pyx_v_i = __pyx_t_10;
-          /* "cpp_common.pxd":67
+          /* "cpp_common.pxd":68
 *             s_proc.kind = RAPIDFUZZ_INT64
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])             # <<<<<<<<<<<<<<
 *     except Exception as e:
 *         free(s_proc.data)
 */
-          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 67, __pyx_L4_error)
+          __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_arr, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 68, __pyx_L4_error)
          __Pyx_GOTREF(__pyx_t_1);
-          __pyx_t_12 = PyObject_Hash(__pyx_t_1); if (unlikely(__pyx_t_12 == ((Py_hash_t)-1))) __PYX_ERR(1, 67, __pyx_L4_error)
+          __pyx_t_12 = PyObject_Hash(__pyx_t_1); if (unlikely(__pyx_t_12 == ((Py_hash_t)-1))) __PYX_ERR(1, 68, __pyx_L4_error)
          __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_12);
        }
        break;
      }
-      /* "cpp_common.pxd":38
+      /* "cpp_common.pxd":39
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -6852,7 +6852,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
    __pyx_L4_error:;
    __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = 0;
-    /* "cpp_common.pxd":68
+    /* "cpp_common.pxd":69
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
 *     except Exception as e:             # <<<<<<<<<<<<<<
@ -6862,7 +6862,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
    __pyx_t_13 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
    if (__pyx_t_13) {
      __Pyx_AddTraceback("cpp_common.hash_array", __pyx_clineno, __pyx_lineno, __pyx_filename);
-      if (__Pyx_GetException(&__pyx_t_1, &__pyx_t_14, &__pyx_t_15) < 0) __PYX_ERR(1, 68, __pyx_L6_except_error)
+      if (__Pyx_GetException(&__pyx_t_1, &__pyx_t_14, &__pyx_t_15) < 0) __PYX_ERR(1, 69, __pyx_L6_except_error)
      __Pyx_GOTREF(__pyx_t_1);
      __Pyx_GOTREF(__pyx_t_14);
      __Pyx_GOTREF(__pyx_t_15);
@ -6870,7 +6870,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
      __pyx_v_e = __pyx_t_14;
      /*try:*/ {
-        /* "cpp_common.pxd":69
+        /* "cpp_common.pxd":70
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
 *     except Exception as e:
 *         free(s_proc.data)             # <<<<<<<<<<<<<<
@ -6879,7 +6879,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        free(__pyx_v_s_proc.data);
-        /* "cpp_common.pxd":70
+        /* "cpp_common.pxd":71
 *     except Exception as e:
 *         free(s_proc.data)
 *         s_proc.data = NULL             # <<<<<<<<<<<<<<
@ -6888,7 +6888,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
        __pyx_v_s_proc.data = NULL;
-        /* "cpp_common.pxd":71
+        /* "cpp_common.pxd":72
 *         free(s_proc.data)
 *         s_proc.data = NULL
 *         raise             # <<<<<<<<<<<<<<
@ -6900,10 +6900,10 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
        __Pyx_XGIVEREF(__pyx_t_15);
        __Pyx_ErrRestoreWithState(__pyx_t_1, __pyx_t_14, __pyx_t_15);
        __pyx_t_1 = 0; __pyx_t_14 = 0; __pyx_t_15 = 0; 
-        __PYX_ERR(1, 71, __pyx_L29_error)
+        __PYX_ERR(1, 72, __pyx_L29_error)
      }
-      /* "cpp_common.pxd":68
+      /* "cpp_common.pxd":69
 *             for i in range(s_proc.length):
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
 *     except Exception as e:             # <<<<<<<<<<<<<<
@ -6947,7 +6947,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
    goto __pyx_L6_except_error;
    __pyx_L6_except_error:;
-    /* "cpp_common.pxd":38
+    /* "cpp_common.pxd":39
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -6962,7 +6962,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
    __pyx_L9_try_end:;
  }
-  /* "cpp_common.pxd":73
+  /* "cpp_common.pxd":74
 *         raise
 * 
 *     s_proc.allocated = True             # <<<<<<<<<<<<<<
@ -6971,7 +6971,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
 */
  __pyx_v_s_proc.allocated = 1;
-  /* "cpp_common.pxd":74
+  /* "cpp_common.pxd":75
 * 
 *     s_proc.allocated = True
 *     return move(s_proc)             # <<<<<<<<<<<<<<
@ -6981,8 +6981,8 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
  __pyx_r = cython_std::move<proc_string>(__pyx_v_s_proc);
  goto __pyx_L0;
-  /* "cpp_common.pxd":27
+  /* "cpp_common.pxd":28
- *     void validate_string(object py_str, const char* err) except +
+ *     proc_string default_process_func(proc_string sentence) except +
 * 
 * cdef inline proc_string hash_array(arr) except *:             # <<<<<<<<<<<<<<
 *     # TODO on Cpython this does not require any copies
@ -7002,7 +7002,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_array(PyObject *__pyx
  return __pyx_r;
 }
-/* "cpp_common.pxd":77
+/* "cpp_common.pxd":78
 * 
 * 
 * cdef inline proc_string hash_sequence(seq) except *:             # <<<<<<<<<<<<<<
@ -7046,17 +7046,17 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
  int __pyx_clineno = 0;
  __Pyx_RefNannySetupContext("hash_sequence", 0);
-  /* "cpp_common.pxd":79
+  /* "cpp_common.pxd":80
 * cdef inline proc_string hash_sequence(seq) except *:
 *     cdef proc_string s_proc
 *     s_proc.length = <size_t>len(seq)             # <<<<<<<<<<<<<<
 * 
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 */
-  __pyx_t_1 = PyObject_Length(__pyx_v_seq); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 79, __pyx_L1_error)
+  __pyx_t_1 = PyObject_Length(__pyx_v_seq); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 80, __pyx_L1_error)
  __pyx_v_s_proc.length = ((size_t)__pyx_t_1);
-  /* "cpp_common.pxd":81
+  /* "cpp_common.pxd":82
 *     s_proc.length = <size_t>len(seq)
 * 
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))             # <<<<<<<<<<<<<<
@ -7065,7 +7065,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
  __pyx_v_s_proc.data = malloc((__pyx_v_s_proc.length * (sizeof(uint64_t))));
-  /* "cpp_common.pxd":83
+  /* "cpp_common.pxd":84
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 * 
 *     if s_proc.data == NULL:             # <<<<<<<<<<<<<<
@ -7075,16 +7075,16 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
  __pyx_t_2 = ((__pyx_v_s_proc.data == NULL) != 0);
  if (unlikely(__pyx_t_2)) {
-    /* "cpp_common.pxd":84
+    /* "cpp_common.pxd":85
 * 
 *     if s_proc.data == NULL:
 *         raise MemoryError             # <<<<<<<<<<<<<<
 * 
 *     try:
 */
-    PyErr_NoMemory(); __PYX_ERR(1, 84, __pyx_L1_error)
+    PyErr_NoMemory(); __PYX_ERR(1, 85, __pyx_L1_error)
-    /* "cpp_common.pxd":83
+    /* "cpp_common.pxd":84
 *     s_proc.data = malloc(s_proc.length * sizeof(uint64_t))
 * 
 *     if s_proc.data == NULL:             # <<<<<<<<<<<<<<
@ -7093,7 +7093,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
  }
-  /* "cpp_common.pxd":86
+  /* "cpp_common.pxd":87
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -7109,7 +7109,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
    __Pyx_XGOTREF(__pyx_t_5);
    /*try:*/ {
-      /* "cpp_common.pxd":87
+      /* "cpp_common.pxd":88
 * 
 *     try:
 *         s_proc.kind = RAPIDFUZZ_INT64             # <<<<<<<<<<<<<<
@ -7118,7 +7118,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
      __pyx_v_s_proc.kind = RAPIDFUZZ_INT64;
-      /* "cpp_common.pxd":88
+      /* "cpp_common.pxd":89
 *     try:
 *         s_proc.kind = RAPIDFUZZ_INT64
 *         for i in range(s_proc.length):             # <<<<<<<<<<<<<<
@ -7130,19 +7130,19 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
      for (__pyx_t_8 = 0; __pyx_t_8 < __pyx_t_7; __pyx_t_8+=1) {
        __pyx_v_i = __pyx_t_8;
-        /* "cpp_common.pxd":89
+        /* "cpp_common.pxd":90
 *         s_proc.kind = RAPIDFUZZ_INT64
 *         for i in range(s_proc.length):
 *             elem = seq[i]             # <<<<<<<<<<<<<<
 *             # this is required so e.g. a list of char can be compared to a string
 *             if isinstance(elem, str) and len(elem) == 1:
 */
-        __pyx_t_9 = __Pyx_GetItemInt(__pyx_v_seq, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 89, __pyx_L4_error)
+        __pyx_t_9 = __Pyx_GetItemInt(__pyx_v_seq, __pyx_v_i, size_t, 0, __Pyx_PyInt_FromSize_t, 0, 0, 1); if (unlikely(!__pyx_t_9)) __PYX_ERR(1, 90, __pyx_L4_error)
        __Pyx_GOTREF(__pyx_t_9);
        __Pyx_XDECREF_SET(__pyx_v_elem, __pyx_t_9);
        __pyx_t_9 = 0;
-        /* "cpp_common.pxd":91
+        /* "cpp_common.pxd":92
 *             elem = seq[i]
 *             # this is required so e.g. a list of char can be compared to a string
 *             if isinstance(elem, str) and len(elem) == 1:             # <<<<<<<<<<<<<<
@ -7156,23 +7156,23 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
          __pyx_t_2 = __pyx_t_11;
          goto __pyx_L13_bool_binop_done;
        }
-        __pyx_t_1 = PyObject_Length(__pyx_v_elem); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 91, __pyx_L4_error)
+        __pyx_t_1 = PyObject_Length(__pyx_v_elem); if (unlikely(__pyx_t_1 == ((Py_ssize_t)-1))) __PYX_ERR(1, 92, __pyx_L4_error)
        __pyx_t_11 = ((__pyx_t_1 == 1) != 0);
        __pyx_t_2 = __pyx_t_11;
        __pyx_L13_bool_binop_done:;
        if (__pyx_t_2) {
-          /* "cpp_common.pxd":92
+          /* "cpp_common.pxd":93
 *             # this is required so e.g. a list of char can be compared to a string
 *             if isinstance(elem, str) and len(elem) == 1:
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>elem             # <<<<<<<<<<<<<<
 *             else:
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
 */
-          __pyx_t_12 = __Pyx_PyObject_AsPy_UCS4(__pyx_v_elem); if (unlikely((__pyx_t_12 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 92, __pyx_L4_error)
+          __pyx_t_12 = __Pyx_PyObject_AsPy_UCS4(__pyx_v_elem); if (unlikely((__pyx_t_12 == (Py_UCS4)-1) && PyErr_Occurred())) __PYX_ERR(1, 93, __pyx_L4_error)
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)((Py_UCS4)__pyx_t_12));
-          /* "cpp_common.pxd":91
+          /* "cpp_common.pxd":92
 *             elem = seq[i]
 *             # this is required so e.g. a list of char can be compared to a string
 *             if isinstance(elem, str) and len(elem) == 1:             # <<<<<<<<<<<<<<
@ -7182,7 +7182,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
          goto __pyx_L12;
        }
-        /* "cpp_common.pxd":94
+        /* "cpp_common.pxd":95
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>elem
 *             else:
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)             # <<<<<<<<<<<<<<
@ -7190,13 +7190,13 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 *         free(s_proc.data)
 */
        /*else*/ {
-          __pyx_t_13 = PyObject_Hash(__pyx_v_elem); if (unlikely(__pyx_t_13 == ((Py_hash_t)-1))) __PYX_ERR(1, 94, __pyx_L4_error)
+          __pyx_t_13 = PyObject_Hash(__pyx_v_elem); if (unlikely(__pyx_t_13 == ((Py_hash_t)-1))) __PYX_ERR(1, 95, __pyx_L4_error)
          (((uint64_t *)__pyx_v_s_proc.data)[__pyx_v_i]) = ((uint64_t)__pyx_t_13);
        }
        __pyx_L12:;
      }
-      /* "cpp_common.pxd":86
+      /* "cpp_common.pxd":87
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -7211,7 +7211,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
    __pyx_L4_error:;
    __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0;
-    /* "cpp_common.pxd":95
+    /* "cpp_common.pxd":96
 *             else:
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
 *     except Exception as e:             # <<<<<<<<<<<<<<
@ -7221,7 +7221,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
    __pyx_t_14 = __Pyx_PyErr_ExceptionMatches(((PyObject *)(&((PyTypeObject*)PyExc_Exception)[0])));
    if (__pyx_t_14) {
      __Pyx_AddTraceback("cpp_common.hash_sequence", __pyx_clineno, __pyx_lineno, __pyx_filename);
-      if (__Pyx_GetException(&__pyx_t_9, &__pyx_t_15, &__pyx_t_16) < 0) __PYX_ERR(1, 95, __pyx_L6_except_error)
+      if (__Pyx_GetException(&__pyx_t_9, &__pyx_t_15, &__pyx_t_16) < 0) __PYX_ERR(1, 96, __pyx_L6_except_error)
      __Pyx_GOTREF(__pyx_t_9);
      __Pyx_GOTREF(__pyx_t_15);
      __Pyx_GOTREF(__pyx_t_16);
@ -7229,7 +7229,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
      __pyx_v_e = __pyx_t_15;
      /*try:*/ {
-        /* "cpp_common.pxd":96
+        /* "cpp_common.pxd":97
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
 *     except Exception as e:
 *         free(s_proc.data)             # <<<<<<<<<<<<<<
@ -7238,7 +7238,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
        free(__pyx_v_s_proc.data);
-        /* "cpp_common.pxd":97
+        /* "cpp_common.pxd":98
 *     except Exception as e:
 *         free(s_proc.data)
 *         s_proc.data = NULL             # <<<<<<<<<<<<<<
@ -7247,7 +7247,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
        __pyx_v_s_proc.data = NULL;
-        /* "cpp_common.pxd":98
+        /* "cpp_common.pxd":99
 *         free(s_proc.data)
 *         s_proc.data = NULL
 *         raise             # <<<<<<<<<<<<<<
@ -7259,10 +7259,10 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
        __Pyx_XGIVEREF(__pyx_t_16);
        __Pyx_ErrRestoreWithState(__pyx_t_9, __pyx_t_15, __pyx_t_16);
        __pyx_t_9 = 0; __pyx_t_15 = 0; __pyx_t_16 = 0; 
-        __PYX_ERR(1, 98, __pyx_L20_error)
+        __PYX_ERR(1, 99, __pyx_L20_error)
      }
-      /* "cpp_common.pxd":95
+      /* "cpp_common.pxd":96
 *             else:
 *                 (<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
 *     except Exception as e:             # <<<<<<<<<<<<<<
@ -7306,7 +7306,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
    goto __pyx_L6_except_error;
    __pyx_L6_except_error:;
-    /* "cpp_common.pxd":86
+    /* "cpp_common.pxd":87
 *         raise MemoryError
 * 
 *     try:             # <<<<<<<<<<<<<<
@ -7321,7 +7321,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
    __pyx_L9_try_end:;
  }
-  /* "cpp_common.pxd":100
+  /* "cpp_common.pxd":101
 *         raise
 * 
 *     s_proc.allocated = True             # <<<<<<<<<<<<<<
@ -7329,7 +7329,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
 */
  __pyx_v_s_proc.allocated = 1;
-  /* "cpp_common.pxd":101
+  /* "cpp_common.pxd":102
 * 
 *     s_proc.allocated = True
 *     return move(s_proc)             # <<<<<<<<<<<<<<
@ -7337,7 +7337,7 @@ static CYTHON_INLINE proc_string __pyx_f_10cpp_common_hash_sequence(PyObject *__
  __pyx_r = cython_std::move<proc_string>(__pyx_v_s_proc);
  goto __pyx_L0;
-  /* "cpp_common.pxd":77
+  /* "cpp_common.pxd":78
 * 
 * 
 * cdef inline proc_string hash_sequence(seq) except *:             # <<<<<<<<<<<<<<
@ -7463,8 +7463,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = {
 };
 /* #### Code section: cached_builtins ### */
 static CYTHON_SMALL_CODE int __Pyx_InitCachedBuiltins(void) {
-  __pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) __PYX_ERR(1, 36, __pyx_L1_error)
+  __pyx_builtin_MemoryError = __Pyx_GetBuiltinName(__pyx_n_s_MemoryError); if (!__pyx_builtin_MemoryError) __PYX_ERR(1, 37, __pyx_L1_error)
-  __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(1, 42, __pyx_L1_error)
+  __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(1, 43, __pyx_L1_error)
  return 0;
  __pyx_L1_error:;
  return -1;
@ -8252,7 +8252,7 @@ if (!__Pyx_RefNanny) {
  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
-  /* "cpp_common.pxd":77
+  /* "cpp_common.pxd":78
 * 
 * 
 * cdef inline proc_string hash_sequence(seq) except *:             # <<<<<<<<<<<<<<
--- a/src/cpp_fuzz.pyx
+++ b/src/cpp_fuzz.pyx
@ -42,9 +42,9 @@ def ratio(s1, s2, *, processor=None, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -94,9 +94,9 @@ def partial_ratio(s1, s2, *, processor=None, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -173,9 +173,9 @@ def token_sort_ratio(s1, s2, *, processor=True, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -221,9 +221,9 @@ def token_set_ratio(s1, s2, *, processor=True, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -271,9 +271,9 @@ def token_ratio(s1, s2, *, processor=True, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -313,9 +313,9 @@ def partial_token_sort_ratio(s1, s2, *, processor=True, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -356,9 +356,9 @@ def partial_token_set_ratio(s1, s2, *, processor=True, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -399,9 +399,9 @@ def partial_token_ratio(s1, s2, *, processor=True, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -441,9 +441,9 @@ def WRatio(s1, s2, *, processor=True, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -485,9 +485,9 @@ def QRatio(s1, s2, *, processor=True, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
--- a/src/cpp_process.cpp
+++ b/src/cpp_process.cpp
--- a/src/cpp_process.pyx
+++ b/src/cpp_process.pyx
@ -28,16 +28,21 @@ from rapidfuzz.fuzz import (
 from libcpp.vector cimport vector
 from libcpp cimport algorithm
 from libcpp.utility cimport move
 from libc.stdint cimport uint8_t, int32_t
 from libc.math cimport floor
 from cpython.list cimport PyList_New, PyList_SET_ITEM
 from cpython.object cimport PyObject
 from cpython.ref cimport Py_INCREF, Py_DECREF
-from cpp_common cimport proc_string, is_valid_string, convert_string, hash_array, hash_sequence
+from cpp_common cimport proc_string, is_valid_string, convert_string, hash_array, hash_sequence, default_process_func
 import heapq
 from array import array
 import numpy as np
 cimport numpy as np
 cimport cython
 cdef inline proc_string conv_sequence(seq) except *:
    if is_valid_string(seq):
@ -50,15 +55,15 @@ cdef inline proc_string conv_sequence(seq) except *:
 cdef extern from "cpp_process.hpp":
    cdef cppclass CachedScorerContext:
        CachedScorerContext()
-        double ratio(const proc_string&, double) except +
+        double ratio(const proc_string&, double) nogil except +
    cdef cppclass CachedDistanceContext:
        CachedDistanceContext()
-        size_t ratio(const proc_string&, size_t) except +
+        size_t ratio(const proc_string&, size_t) nogil except +
    # normalized distances
    # fuzz
-    CachedScorerContext cached_ratio_init(                   const proc_string&, int) except +
+    CachedScorerContext cached_ratio_init(                   const proc_string&, int) nogil except +
    CachedScorerContext cached_partial_ratio_init(           const proc_string&, int) except +
    CachedScorerContext cached_token_sort_ratio_init(        const proc_string&, int) except +
    CachedScorerContext cached_token_set_ratio_init(         const proc_string&, int) except +
@ -226,7 +231,7 @@ cdef inline extractOne_dict(CachedScorerContext context, choices, processor, dou
        for choice_key, choice in choices.items():
            if choice is None:
                continue
-            
+
            score = context.ratio(conv_sequence(choice), score_cutoff)
            if score >= score_cutoff and score > result_score:
@ -400,8 +405,8 @@ cdef inline py_extractOne_dict(query, choices, scorer, processor, double score_c
            score = scorer(query, processor(choice), **kwargs)
            if score >= score_cutoff and score > result_score:
                kwargs["score_cutoff"] = score_cutoff
                score_cutoff = score
                kwargs["score_cutoff"] = score
                result_score = score
                result_choice = choice
                result_key = choice_key
@ -416,8 +421,8 @@ cdef inline py_extractOne_dict(query, choices, scorer, processor, double score_c
            score = scorer(query, choice, **kwargs)
            if score >= score_cutoff and score > result_score:
                kwargs["score_cutoff"] = score_cutoff
                score_cutoff = score
                kwargs["score_cutoff"] = score
                result_score = score
                result_choice = choice
                result_key = choice_key
@ -445,8 +450,8 @@ cdef inline py_extractOne_list(query, choices, scorer, processor, double score_c
            score = scorer(query, processor(choice), **kwargs)
            if score >= score_cutoff and score > result_score:
                kwargs["score_cutoff"] = score_cutoff
                score_cutoff = score
                kwargs["score_cutoff"] = score
                result_score = score
                result_choice = choice
                result_index = i
@ -461,8 +466,8 @@ cdef inline py_extractOne_list(query, choices, scorer, processor, double score_c
            score = scorer(query, choice, **kwargs)
            if score >= score_cutoff and score > result_score:
                kwargs["score_cutoff"] = score_cutoff
                score_cutoff = score
                kwargs["score_cutoff"] = score
                result_score = score
                result_choice = choice
                result_index = i
@ -480,9 +485,9 @@ def extractOne(query, choices, *, scorer=WRatio, processor=default_process, scor
    Parameters
    ----------
-    query : str
+    query : Sequence[Hashable]
        string we want to find
-    choices : Iterable
+    choices : Iterable[Sequence[Hashable]] | Mapping[Sequence[Hashable]]
        list of all strings the query should be compared with or dict with a mapping
        {<result>: <string to compare>}
    scorer : Callable, optional
@ -506,7 +511,7 @@ def extractOne(query, choices, *, scorer=WRatio, processor=default_process, scor
    Returns
    -------
-    Tuple[str, Any, Any]
+    Tuple[Sequence[Hashable], Any, Any]
        Returns the best match in form of a Tuple with 3 elements. The values stored in the
        tuple depend on the types of the input arguments.
@ -634,7 +639,7 @@ def extractOne(query, choices, *, scorer=WRatio, processor=default_process, scor
            return extractOne_dict(move(ScorerContext), choices, processor, c_score_cutoff)
        else:
            return extractOne_list(move(ScorerContext), choices, processor, c_score_cutoff)
-    
+
    if IsIntegratedDistance(scorer):
        # distance implemented in C++
        query_context = conv_sequence(query)
@ -804,13 +809,13 @@ cdef inline extract_list(CachedScorerContext context, choices, processor, size_t
            for i, choice in enumerate(choices):
                if choice is None:
                    continue
-    
+
                proc_choice = processor(choice)
                if proc_choice is None:
                    continue
-    
+
                score = context.ratio(conv_sequence(proc_choice), score_cutoff)
-    
+
                if score >= score_cutoff:
                    Py_INCREF(choice)
                    results.push_back(ListMatchScorerElem(score, i, <PyObject*>choice))
@ -818,23 +823,23 @@ cdef inline extract_list(CachedScorerContext context, choices, processor, size_t
            for i, choice in enumerate(choices):
                if choice is None:
                    continue
-    
+
                score = context.ratio(conv_sequence(choice), score_cutoff)
-    
+
                if score >= score_cutoff:
                    Py_INCREF(choice)
                    results.push_back(ListMatchScorerElem(score, i, <PyObject*>choice))
-    
+
        # due to score_cutoff not always completely filled
        if limit > results.size():
            limit = results.size()
-    
+
        if limit >= results.size():
            algorithm.sort(results.begin(), results.end(), ExtractScorerComp())
        else:
            algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractScorerComp())
            results.resize(limit)
-    
+
        # copy elements into Python List
        result_list = PyList_New(<Py_ssize_t>limit)
        for i in range(limit):
@ -863,13 +868,13 @@ cdef inline extract_distance_list(CachedDistanceContext context, choices, proces
            for i, choice in enumerate(choices):
                if choice is None:
                    continue
-    
+
                proc_choice = processor(choice)
                if proc_choice is None:
                    continue
-    
+
                distance = context.ratio(conv_sequence(proc_choice), max_)
-    
+
                if distance <= max_:
                    Py_INCREF(choice)
                    results.push_back(ListMatchDistanceElem(distance, i, <PyObject*>choice))
@ -877,23 +882,23 @@ cdef inline extract_distance_list(CachedDistanceContext context, choices, proces
            for i, choice in enumerate(choices):
                if choice is None:
                    continue
-    
+
                distance = context.ratio(conv_sequence(choice), max_)
-    
+
                if distance <= max_:
                    Py_INCREF(choice)
                    results.push_back(ListMatchDistanceElem(distance, i, <PyObject*>choice))
-    
+
        # due to max_ not always completely filled
        if limit > results.size():
            limit = results.size()
-    
+
        if limit >= results.size():
            algorithm.sort(results.begin(), results.end(), ExtractDistanceComp())
        else:
            algorithm.partial_sort(results.begin(), results.begin() + <ptrdiff_t>limit, results.end(), ExtractDistanceComp())
            results.resize(limit)
-    
+
        # copy elements into Python List
        result_list = PyList_New(<Py_ssize_t>limit)
        for i in range(limit):
@ -980,9 +985,9 @@ def extract(query, choices, *, scorer=WRatio, processor=default_process, limit=5
    Parameters
    ----------
-    query : str
+    query : Sequence[Hashable]
        string we want to find
-    choices : Iterable
+    choices : Collection[Sequence[Hashable]] | Mapping[Sequence[Hashable]]
        list of all strings the query should be compared with or dict with a mapping
        {<result>: <string to compare>}
    scorer : Callable, optional
@ -1008,7 +1013,7 @@ def extract(query, choices, *, scorer=WRatio, processor=default_process, limit=5
    Returns
    -------
-    List[Tuple[str, Any, Any]]
+    List[Tuple[Sequence[Hashable], Any, Any]]
        The return type is always a List of Tuples with 3 elements. However the values stored in the
        tuple depend on the types of the input arguments.
@ -1107,9 +1112,9 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=default_process, sc
    Parameters
    ----------
-    query : str
+    query : Sequence[Hashable]
        string we want to find
-    choices : Iterable
+    choices : Iterable[Sequence[Hashable]] | Mapping[Sequence[Hashable]]
        list of all strings the query should be compared with or dict with a mapping
        {<result>: <string to compare>}
    scorer : Callable, optional
@ -1133,7 +1138,7 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=default_process, sc
    Yields
    -------
-    Tuple[str, Any, Any]
+    Tuple[Sequence[Hashable], Any, Any]
        Yields similarity between the query and each choice in form of a Tuple with 3 elements.
        The values stored in the tuple depend on the types of the input arguments.
@ -1408,3 +1413,325 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=default_process, sc
        yield from py_extract_iter_dict()
    else:
        yield from py_extract_iter_list()
@cython.boundscheck(False)
@cython.wraparound(False)
 cdef inline cdist_two_lists_similarity(
    const vector[proc_string]& queries,
    const vector[proc_string]& choices,
    scorer, score_cutoff, dict kwargs
 ):
    cdef size_t queries_len = queries.size()
    cdef size_t choices_len = choices.size()
    cdef size_t i, j
    cdef double c_score_cutoff = 0
    cdef np.ndarray[np.uint8_t, ndim=2] matrix = np.empty((queries_len, choices_len), dtype=np.uint8)
    if score_cutoff is not None:
        c_score_cutoff = score_cutoff
    if c_score_cutoff < 0 or c_score_cutoff > 100:
        raise TypeError("score_cutoff has to be in the range of 0.0 - 100.0")
    c_score_cutoff = floor(c_score_cutoff)
    for i in range(queries_len):
        ScorerContext = CachedScorerInit(scorer, queries[i], 0, kwargs)
        for j in range(choices_len):
            matrix[i, j] = <uint8_t>floor(ScorerContext.ratio(choices[j], c_score_cutoff))
    return matrix
@cython.boundscheck(False)
@cython.wraparound(False)
 cdef inline cdist_two_lists_distance(
    const vector[proc_string]& queries, const vector[proc_string]& choices,
    scorer, score_cutoff, dict kwargs
 ):
    cdef size_t queries_len = queries.size()
    cdef size_t choices_len = choices.size()
    cdef size_t i, j
    cdef size_t c_max = <size_t>-1
    cdef np.ndarray[np.int32_t, ndim=2] matrix = np.empty((queries_len, choices_len), dtype=np.int32)
    if score_cutoff is not None and score_cutoff != -1:
        c_max = score_cutoff
    for i in range(queries_len):
        DistanceContext = CachedDistanceInit(scorer, queries[i], 0, kwargs)
        for j in range(choices_len):
            matrix[i, j] = <int32_t>DistanceContext.ratio(choices[j], c_max)
    return matrix
@cython.boundscheck(False)
@cython.wraparound(False)
 cdef inline py_cdist_two_lists(
    const vector[PyObject*]& queries, const vector[PyObject*]& choices,
    scorer, score_cutoff, dict kwargs
 ):
    cdef size_t queries_len = queries.size()
    cdef size_t choices_len = choices.size()
    cdef size_t i, j
    cdef double c_score_cutoff = 0
    cdef np.ndarray[np.uint8_t, ndim=2] matrix = np.empty((queries_len, choices_len), dtype=np.uint8)
    if score_cutoff is not None:
        c_score_cutoff = score_cutoff
    if c_score_cutoff < 0 or c_score_cutoff > 100:
        raise TypeError("score_cutoff has to be in the range of 0.0 - 100.0")
    c_score_cutoff = floor(c_score_cutoff)
    kwargs["processor"] = None
    kwargs["score_cutoff"] = c_score_cutoff
    for i in range(queries_len):
        for j in range(choices_len):
            matrix[i, j] = <uint8_t>floor(
                <double>scorer(<object>queries[i], <object>choices[j],**kwargs))
    return matrix
 cdef cdist_two_lists(queries, choices, scorer, processor, score_cutoff, dict kwargs):
    cdef vector[proc_string] proc_queries
    cdef vector[proc_string] proc_choices
    cdef vector[PyObject*] proc_py_queries
    cdef vector[PyObject*] proc_py_choices
    cdef size_t queries_len = <size_t>len(queries)
    cdef size_t choices_len = <size_t>len(choices)
    try:
        if IsIntegratedScorer(scorer) or IsIntegratedDistance(scorer):
            proc_queries.reserve(queries_len)
            proc_choices.reserve(choices_len)
            # processor None/False
            if not processor:
                for query in queries:
                    proc_queries.push_back(move(conv_sequence(query)))
                for choice in choices:
                    proc_choices.push_back(move(conv_sequence(choice)))
            # processor has to be called through python
            elif processor is not default_process and callable(processor):
                proc_py_queries.reserve(queries_len)
                for query in queries:
                    proc_query = processor(query)
                    Py_INCREF(proc_query)
                    proc_py_queries.push_back(<PyObject*>proc_query)
                    proc_queries.push_back(move(conv_sequence(proc_query)))
                proc_py_choices.reserve(choices_len)
                for choice in choices:
                    proc_choice = processor(choice)
                    Py_INCREF(proc_choice)
                    proc_py_choices.push_back(<PyObject*>proc_choice)
                    proc_choices.push_back(move(conv_sequence(proc_choice)))
            # processor is True / default_process
            else:
                for query in queries:
                    proc_queries.push_back(
                        move(default_process_func(move(conv_sequence(query))))
                    )
                for choice in choices:
                    proc_choices.push_back(
                        move(default_process_func(move(conv_sequence(choice))))
                    )
            if IsIntegratedScorer(scorer):
                return cdist_two_lists_similarity(proc_queries, proc_choices, scorer, score_cutoff, kwargs)
            if IsIntegratedDistance(scorer):
                return cdist_two_lists_distance(proc_queries, proc_choices, scorer, score_cutoff, kwargs)
        else:
            proc_py_queries.reserve(queries_len)
            proc_py_choices.reserve(choices_len)
            # processor None/False
            if not processor:
                for query in queries:
                    Py_INCREF(query)
                    proc_py_queries.push_back(<PyObject*>query)
                for choice in choices:
                    Py_INCREF(choice)
                    proc_py_choices.push_back(<PyObject*>choice)
            # processor has to be called through python
            else:
                if not callable(processor):
                    processor = default_process
                for query in queries:
                    proc_query = processor(query)
                    Py_INCREF(proc_query)
                    proc_py_queries.push_back(<PyObject*>proc_query)
                for choice in choices:
                    proc_choice = processor(choice)
                    Py_INCREF(proc_choice)
                    proc_py_choices.push_back(<PyObject*>proc_choice)
            return py_cdist_two_lists(proc_py_queries, proc_py_choices, scorer, score_cutoff, kwargs)
    finally:
        # decref all reference counts
        for item in proc_py_queries:
            Py_DECREF(<object>item)
        for item in proc_py_choices:
            Py_DECREF(<object>item)
@cython.boundscheck(False)
@cython.wraparound(False)
 cdef inline cdist_single_list_similarity(
    const vector[proc_string]& queries, scorer, score_cutoff, dict kwargs
 ):
    cdef size_t queries_len = queries.size()
    cdef size_t i, j
    cdef double c_score_cutoff = 0
    cdef np.ndarray[np.uint8_t, ndim=2] matrix = np.empty((queries_len, queries_len), dtype=np.uint8)
    if score_cutoff is not None:
        c_score_cutoff = score_cutoff
    if c_score_cutoff < 0 or c_score_cutoff > 100:
        raise TypeError("score_cutoff has to be in the range of 0.0 - 100.0")
    c_score_cutoff = floor(c_score_cutoff)
    for i in range(queries_len):
        matrix[i, i] = 100
        ScorerContext = CachedScorerInit(scorer, queries[i], 0, kwargs)
        for j in range(i + 1, queries_len):
            score = <uint8_t>floor(ScorerContext.ratio(queries[j], c_score_cutoff))
            matrix[i, j] = score
            matrix[j, i] = score
    return matrix
@cython.boundscheck(False)
@cython.wraparound(False)
 cdef inline cdist_single_list_distance(
    const vector[proc_string]& queries, scorer, score_cutoff, dict kwargs
 ):
    cdef size_t queries_len = queries.size()
    cdef size_t i, j
    cdef size_t c_max = <size_t>-1
    cdef np.ndarray[np.int32_t, ndim=2] matrix = np.empty((queries_len, queries_len), dtype=np.int32)
    if score_cutoff is not None and score_cutoff != -1:
        c_max = score_cutoff
    for i in range(queries_len):
        matrix[i, i] = 0
        DistanceContext = CachedDistanceInit(scorer, queries[i], 0, kwargs)
        for j in range(i + 1, queries_len):
            score = <int32_t>DistanceContext.ratio(queries[j], c_max)
            matrix[i, j] = score
            matrix[j, i] = score
    return matrix
 cdef cdist_single_list(queries, scorer, processor, score_cutoff, dict kwargs):
    cdef size_t queries_len = <size_t>len(queries)
    cdef vector[proc_string] proc_queries
    cdef vector[PyObject*] proc_py_queries
    try:
        if IsIntegratedScorer(scorer) or IsIntegratedDistance(scorer):
            proc_queries.reserve(queries_len)
            # processor None/False
            if not processor:
                for query in queries:
                    proc_queries.push_back(move(conv_sequence(query)))
            # processor has to be called through python
            elif processor is not default_process and callable(processor):
                proc_py_queries.reserve(queries_len)
                for query in queries:
                    proc_query = processor(query)
                    Py_INCREF(proc_query)
                    proc_py_queries.push_back(<PyObject*>proc_query)
                    proc_queries.push_back(move(conv_sequence(proc_query)))
            # processor is True / default_process
            else:
                for query in queries:
                    proc_queries.push_back(
                        move(default_process_func(move(conv_sequence(query))))
                    )
            if IsIntegratedScorer(scorer):
                return cdist_single_list_similarity(proc_queries, scorer, score_cutoff, kwargs)
            if IsIntegratedDistance(scorer):
                return cdist_single_list_distance(proc_queries, scorer, score_cutoff, kwargs)
        else:
            proc_py_queries.reserve(queries_len)
            # processor None/False
            if not processor:
                for query in queries:
                    Py_INCREF(query)
                    proc_py_queries.push_back(<PyObject*>query)
            # processor has to be called through python
            else:
                if not callable(processor):
                    processor = default_process
                for query in queries:
                    proc_query = processor(query)
                    Py_INCREF(proc_query)
                    proc_py_queries.push_back(<PyObject*>proc_query)
            # scorer(a, b) might not be equal to scorer(b, a)
            return py_cdist_two_lists(proc_py_queries, proc_py_queries, scorer, score_cutoff, kwargs)
    finally:
        # decref all reference counts
        for item in proc_py_queries:
            Py_DECREF(<object>item)
 def cdist(queries, choices, *, scorer=ratio, processor=None, score_cutoff=None, **kwargs):
    """
    Compute distance/similarity between each pair of the two collections of inputs.
    Parameters
    ----------
    queries : Collection[Sequence[Hashable]]
        list of all strings the queries
    choices : Collection[Sequence[Hashable]]
        list of all strings the query should be compared
    scorer : Callable, optional
        Optional callable that is used to calculate the matching score between
        the query and each choice. This can be any of the scorers included in RapidFuzz
        (both scorers that calculate the edit distance or the normalized edit distance).
        Custom functions are not supported so far!
        fuzz.ratio is used by default.
    processor : Callable, optional
        Optional callable that is used to preprocess the strings before
        comparing them. When processor is True ``utils.default_process``
        is used. Default is None, which deactivates this behaviour.
    score_cutoff : Any, optional
        Optional argument for a score threshold. When an edit distance is used this represents the maximum
        edit distance and matches with a `distance <= score_cutoff` are inserted as -1. When a
        normalized edit distance is used this represents the minimal similarity
        and matches with a `similarity >= score_cutoff` are inserted as 0.
        Default is None, which deactivates this behaviour.
    **kwargs : Any, optional
        any other named parameters are passed to the scorer. This can be used to pass
        e.g. weights to string_metric.levenshtein
    Returns
    -------
    List[Tuple[Sequence[Hashable], Any, Any]]
    """
    if queries is choices:
        return cdist_single_list(queries, scorer, processor, score_cutoff, kwargs)
    else:
        return cdist_two_lists(queries, choices, scorer, processor, score_cutoff, kwargs)
--- a/src/cpp_string_metric.cpp
+++ b/src/cpp_string_metric.cpp
--- a/src/cpp_string_metric.pyx
+++ b/src/cpp_string_metric.pyx
@ -55,9 +55,9 @@ def levenshtein(s1, s2, *, weights=(1,1,1), processor=None, max=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
@ -259,9 +259,9 @@ def levenshtein_editops(s1, s2, *, processor=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -301,9 +301,9 @@ def normalized_levenshtein(s1, s2, *, weights=(1,1,1), processor=None, score_cut
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    weights : Tuple[int, int, int] or None, optional
        The weights for the three operations in the form
@ -407,9 +407,9 @@ def hamming(s1, s2, *, processor=None, max=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -451,9 +451,9 @@ def normalized_hamming(s1, s2, *, processor=None, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -498,9 +498,9 @@ def jaro_similarity(s1, s2, *, processor=None, score_cutoff=None):
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    processor: bool or callable, optional
        Optional callable that is used to preprocess the strings before
@ -537,9 +537,9 @@ def jaro_winkler_similarity(s1, s2, *, double prefix_weight=0.1, processor=None,
    Parameters
    ----------
-    s1 : str
+    s1 : Sequence[Hashable]
        First string to compare.
-    s2 : str
+    s2 : Sequence[Hashable]
        Second string to compare.
    prefix_weight : float, optional
        Weight used for the common prefix of the two strings.
--- a/src/rapidfuzz/process.py
+++ b/src/rapidfuzz/process.py
@ -1,4 +1,4 @@
 # SPDX-License-Identifier: MIT
 # Copyright (C) 2021 Max Bachmann
-from rapidfuzz.cpp_process import extract, extractOne, extract_iter
+from rapidfuzz.cpp_process import extract, extractOne, extract_iter, cdist
--- a/tests/test_hypothesis.py
+++ b/tests/test_hypothesis.py
@ -9,6 +9,7 @@ import pytest
 from rapidfuzz import fuzz, process, utils, string_metric
 import random
 from math import isclose
 import numpy as np
 def levenshtein(s1, s2, weights=(1, 1, 1)):
    """
@ -79,6 +80,24 @@ def partial_ratio_short_needle(s1, s2):
        res = max(res, fuzz.ratio(s1, part))
    return res
 def cdist_scorer(queries, choices, scorer):
    matrix = np.zeros((len(queries), len(choices)), dtype=np.uint8)
    for i, query in enumerate(queries):
        for j, choice in enumerate(choices):
            matrix[i, j] = scorer(query, choice)
    return matrix
 def cdist_distance(queries, choices, scorer):
    matrix = np.zeros((len(queries), len(choices)), dtype=np.int32)
    for i, query in enumerate(queries):
        for j, choice in enumerate(choices):
            matrix[i, j] = scorer(query, choice)
    return matrix
 def extractOne_scorer(s1, s2, scorer, processor=None, **kwargs):
    return process.extractOne(s1, [s2], processor=processor, scorer=scorer, **kwargs)[1]
@ -294,3 +313,19 @@ def test_only_identical_strings_extracted(scorer, processor, choices):
    for match in matches:
        assert processor(query) == processor(match[0])
@given(queries=st.lists(st.text(), min_size=1), choices=st.lists(st.text(), min_size=1))
@settings(max_examples=500, deadline=5000)
 def test_cdist(queries, choices):
    """
    Test that cdist returns correct results
    """
    reference_matrix = cdist_distance(queries, choices, scorer=string_metric.levenshtein)
    matrix = process.cdist(queries, choices, scorer=string_metric.levenshtein)
    assert (matrix == reference_matrix).all()
    reference_matrix = cdist_distance(queries, queries, scorer=string_metric.levenshtein)
    matrix = process.cdist(queries, queries, scorer=string_metric.levenshtein)
    assert (matrix == reference_matrix).all()