diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 3ba1e5f..c179db3 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -17,7 +17,7 @@ jobs:
- name: Install dependencies
run: |
- python -m pip install Sphinx sphinx_rtd_theme numpy
+ python -m pip install Sphinx furo numpy
python -m pip install .
- name: Build Site
diff --git a/LICENSE b/LICENSE
index 5b55a39..a1313a3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,4 +1,4 @@
-Copyright © 2020 maxbachmann
+Copyright © 2020-present Max Bachmann
Copyright © 2011 Adam Cohen
Permission is hereby granted, free of charge, to any person obtaining
diff --git a/docs/contributing.rst b/docs/Contributing.rst
similarity index 100%
rename from docs/contributing.rst
rename to docs/Contributing.rst
diff --git a/docs/installation.rst b/docs/Installation.rst
similarity index 100%
rename from docs/installation.rst
rename to docs/Installation.rst
diff --git a/docs/license.rst b/docs/License.rst
similarity index 100%
rename from docs/license.rst
rename to docs/License.rst
diff --git a/docs/Usage/distance/Hamming.rst b/docs/Usage/distance/Hamming.rst
new file mode 100644
index 0000000..aaf2749
--- /dev/null
+++ b/docs/Usage/distance/Hamming.rst
@@ -0,0 +1,10 @@
+Hamming
+--------------------------
+
+distance
+~~~~~~~~
+.. autofunction:: rapidfuzz.distance.Hamming.distance
+
+normalized_distance
+~~~~~~~~~~~~~~~~~~~
+.. autofunction:: rapidfuzz.distance.Hamming.normalized_distance
diff --git a/docs/Usage/distance/Indel.rst b/docs/Usage/distance/Indel.rst
new file mode 100644
index 0000000..3c6c406
--- /dev/null
+++ b/docs/Usage/distance/Indel.rst
@@ -0,0 +1,18 @@
+Indel
+------------------------
+
+distance
+~~~~~~~~
+.. autofunction:: rapidfuzz.distance.Indel.distance
+
+normalized_distance
+~~~~~~~~~~~~~~~~~~~
+.. autofunction:: rapidfuzz.distance.Indel.normalized_distance
+
+editops
+~~~~~~~
+.. autofunction:: rapidfuzz.distance.Indel.editops
+
+opcodes
+~~~~~~~
+.. autofunction:: rapidfuzz.distance.Indel.opcodes
diff --git a/docs/Usage/distance/Jaro.rst b/docs/Usage/distance/Jaro.rst
new file mode 100644
index 0000000..e03250a
--- /dev/null
+++ b/docs/Usage/distance/Jaro.rst
@@ -0,0 +1,6 @@
+Jaro
+-----------------------
+
+similarity
+~~~~~~~~~~
+.. autofunction:: rapidfuzz.distance.Jaro.similarity
diff --git a/docs/Usage/distance/JaroWinkler.rst b/docs/Usage/distance/JaroWinkler.rst
new file mode 100644
index 0000000..0c852ea
--- /dev/null
+++ b/docs/Usage/distance/JaroWinkler.rst
@@ -0,0 +1,6 @@
+JaroWinkler
+------------------------------
+
+similarity
+~~~~~~~~~~
+.. autofunction:: rapidfuzz.distance.JaroWinkler.similarity
\ No newline at end of file
diff --git a/docs/Usage/distance/Levenshtein.rst b/docs/Usage/distance/Levenshtein.rst
new file mode 100644
index 0000000..28c7a7c
--- /dev/null
+++ b/docs/Usage/distance/Levenshtein.rst
@@ -0,0 +1,25 @@
+Levenshtein
+------------------------------
+distance
+~~~~~~~~
+.. autofunction:: rapidfuzz.distance.Levenshtein.distance
+
+normalized_distance
+~~~~~~~~~~~~~~~~~~~
+.. autofunction:: rapidfuzz.distance.Levenshtein.normalized_distance
+
+similarity
+~~~~~~~~~~
+.. autofunction:: rapidfuzz.distance.Levenshtein.similarity
+
+normalized_similarity
+~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: rapidfuzz.distance.Levenshtein.normalized_similarity
+
+editops
+~~~~~~~
+.. autofunction:: rapidfuzz.distance.Levenshtein.editops
+
+opcodes
+~~~~~~~
+.. autofunction:: rapidfuzz.distance.Levenshtein.opcodes
\ No newline at end of file
diff --git a/docs/img/indel_levenshtein.svg b/docs/Usage/distance/img/indel_levenshtein.svg
similarity index 100%
rename from docs/img/indel_levenshtein.svg
rename to docs/Usage/distance/img/indel_levenshtein.svg
diff --git a/docs/img/uniform_levenshtein.svg b/docs/Usage/distance/img/uniform_levenshtein.svg
similarity index 100%
rename from docs/img/uniform_levenshtein.svg
rename to docs/Usage/distance/img/uniform_levenshtein.svg
diff --git a/docs/Usage/distance/index.rst b/docs/Usage/distance/index.rst
new file mode 100644
index 0000000..85fe325
--- /dev/null
+++ b/docs/Usage/distance/index.rst
@@ -0,0 +1,21 @@
+distance
+==================
+
+rapidfuzz.distance.Editops
+------------------------------
+.. autoclass:: rapidfuzz.distance.Editops
+ :members:
+
+rapidfuzz.distance.Opcodes
+------------------------------
+.. autoclass:: rapidfuzz.distance.Opcodes
+ :members:
+
+.. toctree::
+ :maxdepth: 1
+
+ Levenshtein
+ Indel
+ Hamming
+ Jaro
+ JaroWinkler
diff --git a/docs/fuzz.rst b/docs/Usage/fuzz.rst
similarity index 96%
rename from docs/fuzz.rst
rename to docs/Usage/fuzz.rst
index b53fb94..bf74f2b 100644
--- a/docs/fuzz.rst
+++ b/docs/Usage/fuzz.rst
@@ -1,5 +1,5 @@
-fuzz module
-===========
+rapidfuzz.fuzz
+==============
ratio
-----
diff --git a/docs/img/RapidFuzz.svg b/docs/Usage/img/RapidFuzz.svg
similarity index 100%
rename from docs/img/RapidFuzz.svg
rename to docs/Usage/img/RapidFuzz.svg
diff --git a/docs/img/WRatio.svg b/docs/Usage/img/WRatio.svg
similarity index 100%
rename from docs/img/WRatio.svg
rename to docs/Usage/img/WRatio.svg
diff --git a/docs/img/extractOne.svg b/docs/Usage/img/extractOne.svg
similarity index 100%
rename from docs/img/extractOne.svg
rename to docs/Usage/img/extractOne.svg
diff --git a/docs/Usage/img/indel_levenshtein.svg b/docs/Usage/img/indel_levenshtein.svg
new file mode 100644
index 0000000..3977ae5
--- /dev/null
+++ b/docs/Usage/img/indel_levenshtein.svg
@@ -0,0 +1,1597 @@
+
+
+
diff --git a/docs/img/partial_ratio_long_needle.svg b/docs/Usage/img/partial_ratio_long_needle.svg
similarity index 100%
rename from docs/img/partial_ratio_long_needle.svg
rename to docs/Usage/img/partial_ratio_long_needle.svg
diff --git a/docs/img/partial_ratio_short_needle.svg b/docs/Usage/img/partial_ratio_short_needle.svg
similarity index 100%
rename from docs/img/partial_ratio_short_needle.svg
rename to docs/Usage/img/partial_ratio_short_needle.svg
diff --git a/docs/img/partial_token_ratio.svg b/docs/Usage/img/partial_token_ratio.svg
similarity index 100%
rename from docs/img/partial_token_ratio.svg
rename to docs/Usage/img/partial_token_ratio.svg
diff --git a/docs/img/partial_token_set_ratio.svg b/docs/Usage/img/partial_token_set_ratio.svg
similarity index 100%
rename from docs/img/partial_token_set_ratio.svg
rename to docs/Usage/img/partial_token_set_ratio.svg
diff --git a/docs/img/partial_token_sort_ratio.svg b/docs/Usage/img/partial_token_sort_ratio.svg
similarity index 100%
rename from docs/img/partial_token_sort_ratio.svg
rename to docs/Usage/img/partial_token_sort_ratio.svg
diff --git a/docs/img/ratio.svg b/docs/Usage/img/ratio.svg
similarity index 100%
rename from docs/img/ratio.svg
rename to docs/Usage/img/ratio.svg
diff --git a/docs/img/scorer.svg b/docs/Usage/img/scorer.svg
similarity index 100%
rename from docs/img/scorer.svg
rename to docs/Usage/img/scorer.svg
diff --git a/docs/img/token_ratio.svg b/docs/Usage/img/token_ratio.svg
similarity index 100%
rename from docs/img/token_ratio.svg
rename to docs/Usage/img/token_ratio.svg
diff --git a/docs/img/token_set_ratio.svg b/docs/Usage/img/token_set_ratio.svg
similarity index 100%
rename from docs/img/token_set_ratio.svg
rename to docs/Usage/img/token_set_ratio.svg
diff --git a/docs/img/token_sort_ratio.svg b/docs/Usage/img/token_sort_ratio.svg
similarity index 100%
rename from docs/img/token_sort_ratio.svg
rename to docs/Usage/img/token_sort_ratio.svg
diff --git a/docs/Usage/img/uniform_levenshtein.svg b/docs/Usage/img/uniform_levenshtein.svg
new file mode 100644
index 0000000..36d6685
--- /dev/null
+++ b/docs/Usage/img/uniform_levenshtein.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/Usage/index.rst b/docs/Usage/index.rst
new file mode 100644
index 0000000..e9e46db
--- /dev/null
+++ b/docs/Usage/index.rst
@@ -0,0 +1,12 @@
+Usage
+=====
+
+.. toctree::
+ :maxdepth: 3
+
+ process
+ distance/index
+ fuzz
+ string_metric
+ utils
+
diff --git a/docs/process.rst b/docs/Usage/process.rst
similarity index 88%
rename from docs/process.rst
rename to docs/Usage/process.rst
index 314cd20..95a6a6f 100644
--- a/docs/process.rst
+++ b/docs/Usage/process.rst
@@ -1,5 +1,5 @@
-process module
-==============
+rapidfuzz.process
+=================
cdist
----------
diff --git a/docs/string_metric.rst b/docs/Usage/string_metric.rst
similarity index 91%
rename from docs/string_metric.rst
rename to docs/Usage/string_metric.rst
index 5bb756c..e9d730a 100644
--- a/docs/string_metric.rst
+++ b/docs/Usage/string_metric.rst
@@ -1,5 +1,5 @@
-string_metric module
-====================
+rapidfuzz.string_metric
+=======================
levenshtein
-----------
diff --git a/docs/utils.rst b/docs/Usage/utils.rst
similarity index 72%
rename from docs/utils.rst
rename to docs/Usage/utils.rst
index e8a0872..b1f6493 100644
--- a/docs/utils.rst
+++ b/docs/Usage/utils.rst
@@ -1,5 +1,5 @@
-utils module
-==============
+rapidfuzz.utils
+===============
default_process
---------------
diff --git a/docs/conf.py b/docs/conf.py
index 6869e4f..7eddbba 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -48,7 +48,9 @@ exclude_patterns = []
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
-html_theme = 'sphinx_rtd_theme'
+html_theme = 'furo'
+pygments_style = "monokai"
+pygments_dark_style = "monokai"
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
diff --git a/docs/index.rst b/docs/index.rst
index 48af0ea..34fd81d 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -1,37 +1,25 @@
-.. RapidFuzz documentation master file, created by
- sphinx-quickstart on Fri Jan 1 19:02:29 2021.
- You can adapt this file completely to your liking, but it should at least
- contain the root `toctree` directive.
-
Welcome to RapidFuzz's documentation!
=====================================
.. toctree::
:maxdepth: 2
- :caption: Installation:
- installation
+ Installation
.. toctree::
- :maxdepth: 2
- :caption: Usage:
+ :maxdepth: 3
- fuzz
- string_metric
- process
- utils
+ Usage/index
.. toctree::
:maxdepth: 1
- :caption: Contributing:
- contributing
+ Contributing
.. toctree::
:maxdepth: 2
- :caption: License:
- license
+ License
Indices and tables
==================
diff --git a/src/cython/cpp_process_cdist.pyx b/src/cython/cpp_process_cdist.pyx
index 0eed650..991e170 100644
--- a/src/cython/cpp_process_cdist.pyx
+++ b/src/cython/cpp_process_cdist.pyx
@@ -207,10 +207,12 @@ def cdist(queries, choices, *, scorer=ratio, processor=None, score_cutoff=None,
scorer : Callable, optional
Optional callable that is used to calculate the matching score between
the query and each choice. This can be:
+
- a scorer using the RapidFuzz C-API like the builtin scorers in RapidFuzz,
which can return a distance or similarity between two strings. Further details can be found here.
- a Python function which returns a similarity between two strings in the range 0-100. This is not
recommended, since it is far slower than a scorer using the RapidFuzz C-API.
+
fuzz.ratio is used by default.
processor : Callable, optional
Optional callable that is used to preprocess the strings before
@@ -224,11 +226,13 @@ def cdist(queries, choices, *, scorer=ratio, processor=None, score_cutoff=None,
dtype : data-type, optional
The desired data-type for the result array.Depending on the scorer type the following
dtypes are supported:
+
- similarity:
- - np.float32, np.float64
- - np.uint8 -> stores fixed point representation of the result scaled to a range 0-100
+ - np.float32, np.float64
+ - np.uint8 -> stores fixed point representation of the result scaled to a range 0-100
- distance:
- - np.int8, np.int16, np.int32, np.int64
+ - np.int8, np.int16, np.int32, np.int64
+
If not given, then the type will be np.float32 for similarities and np.int32 for distances.
workers : int, optional
The calculation is subdivided into workers sections and evaluated in parallel.
diff --git a/src/cython/distance/Indel.pyx b/src/cython/distance/Indel.pyx
index bb87b01..0dd73ac 100644
--- a/src/cython/distance/Indel.pyx
+++ b/src/cython/distance/Indel.pyx
@@ -108,14 +108,14 @@ def distance(s1, s2, *, weights=(1,1,1), processor=None, score_cutoff=None):
- If the length of the shorter string is ≤ 64 after removing the common affix
Hyyrös' lcs algorithm is used, which calculates the Indel distance in
- parallel. The algorithm is described by [1]_ and is extended with support
+ parallel. The algorithm is described by [5]_ and is extended with support
for UTF32 in this implementation. The time complexity of this
algorithm is ``O(N)``.
- If the length of the shorter string is ≥ 64 after removing the common affix
a blockwise implementation of the Hyyrös' lcs algorithm is used, which calculates
the Levenshtein distance in parallel (64 characters at a time).
- The algorithm is described by [1]_. The time complexity of this
+ The algorithm is described by [5]_. The time complexity of this
algorithm is ``O([N/64]M)``.
The following image shows a benchmark of the Indel distance in RapidFuzz
@@ -128,7 +128,7 @@ def distance(s1, s2, *, weights=(1,1,1), processor=None, score_cutoff=None):
References
----------
- .. [4] Hyyrö, Heikki. "Bit-Parallel LCS-length Computation Revisited"
+ .. [5] Hyyrö, Heikki. "Bit-Parallel LCS-length Computation Revisited"
Proc. 15th Australasian Workshop on Combinatorial Algorithms (AWOCA 2004).
Examples
@@ -243,11 +243,11 @@ def editops(s1, s2, *, processor=None):
Notes
-----
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
- described [1]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
+ described [6]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
References
----------
- .. [1] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
+ .. [6] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
Stringology (2004).
Examples
@@ -289,11 +289,11 @@ def opcodes(s1, s2, *, processor=None):
Notes
-----
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
- described [1]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
+ described [7]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
References
----------
- .. [1] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
+ .. [7] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
Stringology (2004).
Examples
diff --git a/src/cython/distance/Levenshtein.pyx b/src/cython/distance/Levenshtein.pyx
index 5a79af3..351a153 100644
--- a/src/cython/distance/Levenshtein.pyx
+++ b/src/cython/distance/Levenshtein.pyx
@@ -450,11 +450,11 @@ def editops(s1, s2, *, processor=None):
Notes
-----
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
- described [1]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
+ described [8]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
References
----------
- .. [1] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
+ .. [8] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
Stringology (2004).
Examples
@@ -496,11 +496,11 @@ def opcodes(s1, s2, *, processor=None):
Notes
-----
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
- described [1]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
+ described [9]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
References
----------
- .. [1] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
+ .. [9] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
Stringology (2004).
Examples
diff --git a/src/cython/distance/_initialize.pyx b/src/cython/distance/_initialize.pyx
index 0fd7eeb..a16d6cd 100644
--- a/src/cython/distance/_initialize.pyx
+++ b/src/cython/distance/_initialize.pyx
@@ -174,9 +174,10 @@ cdef class Editops:
Each tuple is of the form (tag, src_pos, dest_pos).
The tags are strings, with these meanings:
- 'replace': s1[src_pos] should be replaced by s2[dest_pos]
- 'delete': s1[src_pos] should be deleted.
- 'insert': s2[dest_pos] should be inserted at s1[src_pos].
+
+ 'replace': s1[src_pos] should be replaced by s2[dest_pos]
+ 'delete': s1[src_pos] should be deleted
+ 'insert': s2[dest_pos] should be inserted at s1[src_pos]
"""
def __init__(self, editops=None, src_len=0, dest_len=0):
@@ -308,15 +309,14 @@ cdef class Opcodes:
tuple preceding it, and likewise for j1 == the previous j2.
The tags are strings, with these meanings:
- 'replace': s1[i1:i2] should be replaced by s2[j1:j2]
- 'delete': s1[i1:i2] should be deleted.
- Note that j1==j2 in this case.
- 'insert': s2[j1:j2] should be inserted at s1[i1:i1].
- Note that i1==i2 in this case.
- 'equal': s1[i1:i2] == s2[j1:j2]
+
+ 'replace': s1[i1:i2] should be replaced by s2[j1:j2]
+ 'delete': s1[i1:i2] should be deleted. Note that j1==j2 in this case.
+ 'insert': s2[j1:j2] should be inserted at s1[i1:i1]. Note that i1==i2 in this case.
+ 'equal': s1[i1:i2] == s2[j1:j2]
Note
- --------
+ ----
Opcodes uses tuples similar to difflib's SequenceMatcher to make them
interoperable
"""