mirror of https://github.com/Yomguithereal/fog.git
Levenshtein experiments
This commit is contained in:
parent
8402e092ab
commit
ee0ff5c8f6
|
@ -0,0 +1,59 @@
|
|||
from Levenshtein import distance
|
||||
from fog.metrics import levenshtein_distance, limited_levenshtein_distance
|
||||
from experiments.utils import Timer
|
||||
|
||||
BASIC_TESTS = [
|
||||
('book', 'back', 2),
|
||||
('bbbbookkkk', 'bbbbackkkk', 2),
|
||||
('hello', 'helo', 1),
|
||||
('good sir', 'baal', 8),
|
||||
('say', 'shiver', 5),
|
||||
('feature', 'get-project-features', 13),
|
||||
('example', 'samples', 3),
|
||||
('sturgeon', 'urgently', 6),
|
||||
('levenshtein', 'frankenstein', 6),
|
||||
('distance', 'difference', 5),
|
||||
('a', 'b', 1),
|
||||
('ab', 'ac', 1),
|
||||
('ac', 'bc', 1),
|
||||
('abc', 'axc', 1),
|
||||
('xabxcdxxefxgx', '1ab2cd34ef5g6', 6),
|
||||
('a', '', 1),
|
||||
('ab', 'a', 1),
|
||||
('ab', 'b', 1),
|
||||
('abc', 'ac', 1),
|
||||
('xabxcdxxefxgx', 'abcdefg', 6),
|
||||
('', 'a', 1),
|
||||
('a', 'ab', 1),
|
||||
('b', 'ab', 1),
|
||||
('ac', 'abc', 1),
|
||||
('abcdefg', 'xabxcdxxefxgx', 6),
|
||||
('', '', 0),
|
||||
('a', 'a', 0),
|
||||
('abc', 'abc', 0),
|
||||
('', '', 0),
|
||||
('a', '', 1),
|
||||
('', 'a', 1),
|
||||
('abc', '', 3),
|
||||
('', 'abc', 3),
|
||||
('因為我是中國人所以我會說中文', '因為我是英國人所以我會說英文', 2)
|
||||
]
|
||||
|
||||
RUNS = 100_000
|
||||
SIZE_MULTIPLICATOR = 1
|
||||
|
||||
with Timer('python-Levenshtein'):
|
||||
for _ in range(RUNS):
|
||||
for A, B, _ in BASIC_TESTS:
|
||||
distance(A * SIZE_MULTIPLICATOR, B * SIZE_MULTIPLICATOR)
|
||||
|
||||
with Timer('fog'):
|
||||
for _ in range(RUNS):
|
||||
for A, B, _ in BASIC_TESTS:
|
||||
levenshtein_distance(A * SIZE_MULTIPLICATOR, B * SIZE_MULTIPLICATOR)
|
||||
|
||||
with Timer('fog-limited'):
|
||||
for _ in range(RUNS):
|
||||
for A, B, _ in BASIC_TESTS:
|
||||
limited_levenshtein_distance(1, A * SIZE_MULTIPLICATOR, B * SIZE_MULTIPLICATOR)
|
||||
|
|
@ -63,6 +63,12 @@ def levenshtein_distance(str A, str B):
|
|||
if LA == 0:
|
||||
return LB
|
||||
|
||||
if LA == 1:
|
||||
if A[start] in B:
|
||||
return LB - 1
|
||||
|
||||
return LB
|
||||
|
||||
cdef unsigned short *codes = <unsigned short *> malloc(LB * sizeof(unsigned short))
|
||||
cdef unsigned int *vector = <unsigned int *> malloc(LB * sizeof(unsigned int))
|
||||
|
||||
|
@ -118,6 +124,7 @@ def levenshtein_distance(str A, str B):
|
|||
|
||||
return current
|
||||
|
||||
|
||||
@cython.boundscheck(False)
|
||||
def limited_levenshtein_distance(unsigned int max_distance, str A, str B):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue