update documentation theme
|
@ -17,7 +17,7 @@ jobs:
|
|||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install Sphinx sphinx_rtd_theme numpy
|
||||
python -m pip install Sphinx furo numpy
|
||||
python -m pip install .
|
||||
|
||||
- name: Build Site
|
||||
|
|
2
LICENSE
|
@ -1,4 +1,4 @@
|
|||
Copyright © 2020 maxbachmann
|
||||
Copyright © 2020-present Max Bachmann
|
||||
Copyright © 2011 Adam Cohen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
Hamming
|
||||
--------------------------
|
||||
|
||||
distance
|
||||
~~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Hamming.distance
|
||||
|
||||
normalized_distance
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Hamming.normalized_distance
|
|
@ -0,0 +1,18 @@
|
|||
Indel
|
||||
------------------------
|
||||
|
||||
distance
|
||||
~~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Indel.distance
|
||||
|
||||
normalized_distance
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Indel.normalized_distance
|
||||
|
||||
editops
|
||||
~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Indel.editops
|
||||
|
||||
opcodes
|
||||
~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Indel.opcodes
|
|
@ -0,0 +1,6 @@
|
|||
Jaro
|
||||
-----------------------
|
||||
|
||||
similarity
|
||||
~~~~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Jaro.similarity
|
|
@ -0,0 +1,6 @@
|
|||
JaroWinkler
|
||||
------------------------------
|
||||
|
||||
similarity
|
||||
~~~~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.JaroWinkler.similarity
|
|
@ -0,0 +1,25 @@
|
|||
Levenshtein
|
||||
------------------------------
|
||||
distance
|
||||
~~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Levenshtein.distance
|
||||
|
||||
normalized_distance
|
||||
~~~~~~~~~~~~~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Levenshtein.normalized_distance
|
||||
|
||||
similarity
|
||||
~~~~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Levenshtein.similarity
|
||||
|
||||
normalized_similarity
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Levenshtein.normalized_similarity
|
||||
|
||||
editops
|
||||
~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Levenshtein.editops
|
||||
|
||||
opcodes
|
||||
~~~~~~~
|
||||
.. autofunction:: rapidfuzz.distance.Levenshtein.opcodes
|
Before Width: | Height: | Size: 42 KiB After Width: | Height: | Size: 42 KiB |
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
|
@ -0,0 +1,21 @@
|
|||
distance
|
||||
==================
|
||||
|
||||
rapidfuzz.distance.Editops
|
||||
------------------------------
|
||||
.. autoclass:: rapidfuzz.distance.Editops
|
||||
:members:
|
||||
|
||||
rapidfuzz.distance.Opcodes
|
||||
------------------------------
|
||||
.. autoclass:: rapidfuzz.distance.Opcodes
|
||||
:members:
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Levenshtein
|
||||
Indel
|
||||
Hamming
|
||||
Jaro
|
||||
JaroWinkler
|
|
@ -1,5 +1,5 @@
|
|||
fuzz module
|
||||
===========
|
||||
rapidfuzz.fuzz
|
||||
==============
|
||||
|
||||
ratio
|
||||
-----
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB |
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
Before Width: | Height: | Size: 54 KiB After Width: | Height: | Size: 54 KiB |
After Width: | Height: | Size: 42 KiB |
Before Width: | Height: | Size: 42 KiB After Width: | Height: | Size: 42 KiB |
Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB |
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 28 KiB |
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
Before Width: | Height: | Size: 54 KiB After Width: | Height: | Size: 54 KiB |
Before Width: | Height: | Size: 27 KiB After Width: | Height: | Size: 27 KiB |
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
After Width: | Height: | Size: 32 KiB |
|
@ -0,0 +1,12 @@
|
|||
Usage
|
||||
=====
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 3
|
||||
|
||||
process
|
||||
distance/index
|
||||
fuzz
|
||||
string_metric
|
||||
utils
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
process module
|
||||
==============
|
||||
rapidfuzz.process
|
||||
=================
|
||||
|
||||
cdist
|
||||
----------
|
|
@ -1,5 +1,5 @@
|
|||
string_metric module
|
||||
====================
|
||||
rapidfuzz.string_metric
|
||||
=======================
|
||||
|
||||
levenshtein
|
||||
-----------
|
|
@ -1,5 +1,5 @@
|
|||
utils module
|
||||
==============
|
||||
rapidfuzz.utils
|
||||
===============
|
||||
|
||||
default_process
|
||||
---------------
|
|
@ -48,7 +48,9 @@ exclude_patterns = []
|
|||
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||
# a list of builtin themes.
|
||||
#
|
||||
html_theme = 'sphinx_rtd_theme'
|
||||
html_theme = 'furo'
|
||||
pygments_style = "monokai"
|
||||
pygments_dark_style = "monokai"
|
||||
|
||||
# Add any paths that contain custom static files (such as style sheets) here,
|
||||
# relative to this directory. They are copied after the builtin static files,
|
||||
|
|
|
@ -1,37 +1,25 @@
|
|||
.. RapidFuzz documentation master file, created by
|
||||
sphinx-quickstart on Fri Jan 1 19:02:29 2021.
|
||||
You can adapt this file completely to your liking, but it should at least
|
||||
contain the root `toctree` directive.
|
||||
|
||||
Welcome to RapidFuzz's documentation!
|
||||
=====================================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Installation:
|
||||
|
||||
installation
|
||||
Installation
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: Usage:
|
||||
:maxdepth: 3
|
||||
|
||||
fuzz
|
||||
string_metric
|
||||
process
|
||||
utils
|
||||
Usage/index
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:caption: Contributing:
|
||||
|
||||
contributing
|
||||
Contributing
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
:caption: License:
|
||||
|
||||
license
|
||||
License
|
||||
|
||||
Indices and tables
|
||||
==================
|
||||
|
|
|
@ -207,10 +207,12 @@ def cdist(queries, choices, *, scorer=ratio, processor=None, score_cutoff=None,
|
|||
scorer : Callable, optional
|
||||
Optional callable that is used to calculate the matching score between
|
||||
the query and each choice. This can be:
|
||||
|
||||
- a scorer using the RapidFuzz C-API like the builtin scorers in RapidFuzz,
|
||||
which can return a distance or similarity between two strings. Further details can be found here.
|
||||
- a Python function which returns a similarity between two strings in the range 0-100. This is not
|
||||
recommended, since it is far slower than a scorer using the RapidFuzz C-API.
|
||||
|
||||
fuzz.ratio is used by default.
|
||||
processor : Callable, optional
|
||||
Optional callable that is used to preprocess the strings before
|
||||
|
@ -224,11 +226,13 @@ def cdist(queries, choices, *, scorer=ratio, processor=None, score_cutoff=None,
|
|||
dtype : data-type, optional
|
||||
The desired data-type for the result array.Depending on the scorer type the following
|
||||
dtypes are supported:
|
||||
|
||||
- similarity:
|
||||
- np.float32, np.float64
|
||||
- np.uint8 -> stores fixed point representation of the result scaled to a range 0-100
|
||||
- np.float32, np.float64
|
||||
- np.uint8 -> stores fixed point representation of the result scaled to a range 0-100
|
||||
- distance:
|
||||
- np.int8, np.int16, np.int32, np.int64
|
||||
- np.int8, np.int16, np.int32, np.int64
|
||||
|
||||
If not given, then the type will be np.float32 for similarities and np.int32 for distances.
|
||||
workers : int, optional
|
||||
The calculation is subdivided into workers sections and evaluated in parallel.
|
||||
|
|
|
@ -108,14 +108,14 @@ def distance(s1, s2, *, weights=(1,1,1), processor=None, score_cutoff=None):
|
|||
|
||||
- If the length of the shorter string is ≤ 64 after removing the common affix
|
||||
Hyyrös' lcs algorithm is used, which calculates the Indel distance in
|
||||
parallel. The algorithm is described by [1]_ and is extended with support
|
||||
parallel. The algorithm is described by [5]_ and is extended with support
|
||||
for UTF32 in this implementation. The time complexity of this
|
||||
algorithm is ``O(N)``.
|
||||
|
||||
- If the length of the shorter string is ≥ 64 after removing the common affix
|
||||
a blockwise implementation of the Hyyrös' lcs algorithm is used, which calculates
|
||||
the Levenshtein distance in parallel (64 characters at a time).
|
||||
The algorithm is described by [1]_. The time complexity of this
|
||||
The algorithm is described by [5]_. The time complexity of this
|
||||
algorithm is ``O([N/64]M)``.
|
||||
|
||||
The following image shows a benchmark of the Indel distance in RapidFuzz
|
||||
|
@ -128,7 +128,7 @@ def distance(s1, s2, *, weights=(1,1,1), processor=None, score_cutoff=None):
|
|||
|
||||
References
|
||||
----------
|
||||
.. [4] Hyyrö, Heikki. "Bit-Parallel LCS-length Computation Revisited"
|
||||
.. [5] Hyyrö, Heikki. "Bit-Parallel LCS-length Computation Revisited"
|
||||
Proc. 15th Australasian Workshop on Combinatorial Algorithms (AWOCA 2004).
|
||||
|
||||
Examples
|
||||
|
@ -243,11 +243,11 @@ def editops(s1, s2, *, processor=None):
|
|||
Notes
|
||||
-----
|
||||
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
|
||||
described [1]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
|
||||
described [6]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
|
||||
.. [6] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
|
||||
Stringology (2004).
|
||||
|
||||
Examples
|
||||
|
@ -289,11 +289,11 @@ def opcodes(s1, s2, *, processor=None):
|
|||
Notes
|
||||
-----
|
||||
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
|
||||
described [1]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
|
||||
described [7]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
|
||||
.. [7] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
|
||||
Stringology (2004).
|
||||
|
||||
Examples
|
||||
|
|
|
@ -450,11 +450,11 @@ def editops(s1, s2, *, processor=None):
|
|||
Notes
|
||||
-----
|
||||
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
|
||||
described [1]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
|
||||
described [8]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
|
||||
.. [8] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
|
||||
Stringology (2004).
|
||||
|
||||
Examples
|
||||
|
@ -496,11 +496,11 @@ def opcodes(s1, s2, *, processor=None):
|
|||
Notes
|
||||
-----
|
||||
The alignment is calculated using an algorithm of Heikki Hyyrö, which is
|
||||
described [1]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
|
||||
described [9]_. It has a time complexity and memory usage of ``O([N/64] * M)``.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
|
||||
.. [9] Hyyrö, Heikki. "A Note on Bit-Parallel Alignment Computation."
|
||||
Stringology (2004).
|
||||
|
||||
Examples
|
||||
|
|
|
@ -174,9 +174,10 @@ cdef class Editops:
|
|||
Each tuple is of the form (tag, src_pos, dest_pos).
|
||||
|
||||
The tags are strings, with these meanings:
|
||||
'replace': s1[src_pos] should be replaced by s2[dest_pos]
|
||||
'delete': s1[src_pos] should be deleted.
|
||||
'insert': s2[dest_pos] should be inserted at s1[src_pos].
|
||||
|
||||
'replace': s1[src_pos] should be replaced by s2[dest_pos]
|
||||
'delete': s1[src_pos] should be deleted
|
||||
'insert': s2[dest_pos] should be inserted at s1[src_pos]
|
||||
"""
|
||||
|
||||
def __init__(self, editops=None, src_len=0, dest_len=0):
|
||||
|
@ -308,15 +309,14 @@ cdef class Opcodes:
|
|||
tuple preceding it, and likewise for j1 == the previous j2.
|
||||
|
||||
The tags are strings, with these meanings:
|
||||
'replace': s1[i1:i2] should be replaced by s2[j1:j2]
|
||||
'delete': s1[i1:i2] should be deleted.
|
||||
Note that j1==j2 in this case.
|
||||
'insert': s2[j1:j2] should be inserted at s1[i1:i1].
|
||||
Note that i1==i2 in this case.
|
||||
'equal': s1[i1:i2] == s2[j1:j2]
|
||||
|
||||
'replace': s1[i1:i2] should be replaced by s2[j1:j2]
|
||||
'delete': s1[i1:i2] should be deleted. Note that j1==j2 in this case.
|
||||
'insert': s2[j1:j2] should be inserted at s1[i1:i1]. Note that i1==i2 in this case.
|
||||
'equal': s1[i1:i2] == s2[j1:j2]
|
||||
|
||||
Note
|
||||
--------
|
||||
----
|
||||
Opcodes uses tuples similar to difflib's SequenceMatcher to make them
|
||||
interoperable
|
||||
"""
|
||||
|
|