From 779e26b6a0c45ef1458432bd30a852900330e148 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Sat, 6 Mar 2021 12:27:32 +0100 Subject: [PATCH] add some benchmarks to Levenshtein distance --- .gitattributes | 1 - bench/benchmark_indel_levenshtein.py | 46 ++++ bench/benchmark_uniform_levenshtein.py | 63 ++++++ bench/results/levenshtein_indel.csv | 257 +++++++++++++++++++++ bench/results/levenshtein_uniform.csv | 257 +++++++++++++++++++++ src/cpp_string_metric.cpp | 302 ++++++++++++------------- src/cpp_string_metric.pyx | 34 +++ 7 files changed, 808 insertions(+), 152 deletions(-) create mode 100644 bench/benchmark_indel_levenshtein.py create mode 100644 bench/benchmark_uniform_levenshtein.py create mode 100644 bench/results/levenshtein_indel.csv create mode 100644 bench/results/levenshtein_uniform.csv diff --git a/.gitattributes b/.gitattributes index 0a708bd..0c1be8b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,3 @@ -extern/variant/* linguist-vendored src/cpp_process.cpp linguist-vendored src/cpp_fuzz.cpp linguist-vendored src/cpp_string_metric.cpp linguist-vendored diff --git a/bench/benchmark_indel_levenshtein.py b/bench/benchmark_indel_levenshtein.py new file mode 100644 index 0000000..eb4d817 --- /dev/null +++ b/bench/benchmark_indel_levenshtein.py @@ -0,0 +1,46 @@ +# todo combine benchmarks of scorers into common code base +import timeit +import pandas +import numpy as np + +def benchmark(name, func, setup, lengths, count): + print(f"starting {name}") + start = timeit.default_timer() + results = [] + for length in lengths: + test = timeit.Timer(func, setup=setup.format(length, count)) + results.append(min(test.timeit(number=1) for _ in range(7)) / count) + stop = timeit.default_timer() + print(f"finished {name}, Runtime: ", stop - start) + return results + +setup =""" +from rapidfuzz import string_metric, process, fuzz +import Levenshtein +import string +import random +random.seed(18) +characters = string.ascii_letters + string.digits + string.whitespace + string.punctuation +a = ''.join(random.choice(characters) for _ in range({0})) +b_list = [''.join(random.choice(characters) for _ in range({0})) for _ in range({1})] +""" + +lengths = list(range(1,512,2)) +count = 1000 + +time_rapidfuzz = benchmark("rapidfuzz", + '[string_metric.levenshtein(a, b, (1,1,2)) for b in b_list]', + setup, lengths, count) + +# this gets very slow, so only benchmark it for smaller values +time_python_levenshtein = benchmark("python-Levenshtein", + '[Levenshtein.ratio(a, b) for b in b_list]', + setup, list(range(1,256,2)), count) + [np.NaN] * 128 + +df = pandas.DataFrame(data={ + "length": lengths, + "rapidfuzz": time_rapidfuzz, + "python-Levenshtein": time_python_levenshtein, +}) + +df.to_csv("results/levenshtein_indel.csv", sep=',',index=False) diff --git a/bench/benchmark_uniform_levenshtein.py b/bench/benchmark_uniform_levenshtein.py new file mode 100644 index 0000000..68bd3e3 --- /dev/null +++ b/bench/benchmark_uniform_levenshtein.py @@ -0,0 +1,63 @@ +import timeit +import pandas +import numpy as np + +def benchmark(name, func, setup, lengths, count): + print(f"starting {name}") + start = timeit.default_timer() + results = [] + for length in lengths: + test = timeit.Timer(func, setup=setup.format(length, count)) + results.append(min(test.timeit(number=1) for _ in range(7)) / count) + stop = timeit.default_timer() + print(f"finished {name}, Runtime: ", stop - start) + return results + +setup =""" +from rapidfuzz import string_metric +import Levenshtein +import polyleven +import edlib +import editdistance +import string +import random +random.seed(18) +characters = string.ascii_letters + string.digits + string.whitespace + string.punctuation +a = ''.join(random.choice(characters) for _ in range({0})) +b_list = [''.join(random.choice(characters) for _ in range({0})) for _ in range({1})] +""" + +lengths = list(range(1,512,2)) +count = 2000 + +time_rapidfuzz = benchmark("rapidfuzz", + '[string_metric.levenshtein(a, b) for b in b_list]', + setup, lengths, count) + +time_polyleven = benchmark("polyleven", + '[polyleven.levenshtein(a, b) for b in b_list]', + setup, lengths, count) + +# this gets very slow, so only benchmark it for smaller values +time_python_levenshtein = benchmark("python-Levenshtein", + '[Levenshtein.distance(a, b) for b in b_list]', + setup, list(range(1,256,2)), count) + [np.NaN] * 128 + +time_edlib = benchmark("edlib", + '[edlib.align(a, b) for b in b_list]', + setup, lengths, count) + +time_editdistance = benchmark("editdistance", + '[editdistance.eval(a, b) for b in b_list]', + setup, lengths, count) + +df = pandas.DataFrame(data={ + "length": lengths, + "rapidfuzz": time_rapidfuzz, + "polyleven": time_polyleven, + "python-Levenshtein": time_python_levenshtein, + "edlib": time_edlib, + "editdistance": time_editdistance +}) + +df.to_csv("results/levenshtein_uniform.csv", sep=',',index=False) diff --git a/bench/results/levenshtein_indel.csv b/bench/results/levenshtein_indel.csv new file mode 100644 index 0000000..397ad79 --- /dev/null +++ b/bench/results/levenshtein_indel.csv @@ -0,0 +1,257 @@ +length,rapidfuzz,python-Levenshtein +1,1.2468099885154517e-07,1.0524900062591769e-07 +3,1.3070699787931517e-07,1.4172100054565818e-07 +5,1.376649997837376e-07,1.6557599883526564e-07 +7,1.423840003553778e-07,1.9732100190594793e-07 +9,1.4730299881193786e-07,2.3082799816620536e-07 +11,1.5495200204895808e-07,2.8296399977989493e-07 +13,1.6093800149974413e-07,3.063600015593693e-07 +15,1.6728799892007374e-07,4.2989499706891366e-07 +17,1.73566000739811e-07,4.978910001227632e-07 +19,1.8033800006378442e-07,5.746030001319014e-07 +21,1.869509978860151e-07,5.730039993068204e-07 +23,1.9280099877505563e-07,6.615389975195284e-07 +25,1.9892199998139403e-07,7.551790004072245e-07 +27,2.0476099962252193e-07,8.575210013077595e-07 +29,2.1009199917898513e-07,9.70551001955755e-07 +31,2.1750599989900364e-07,1.1884669984283391e-06 +33,2.2445500144385732e-07,1.3859749997209292e-06 +35,2.465420002408791e-07,1.5269529976649209e-06 +37,2.3682500250288286e-07,1.694680999207776e-06 +39,2.4279399804072457e-07,1.7842809975263662e-06 +41,2.494870022928808e-07,2.0377930013637525e-06 +43,2.5495099907857365e-07,2.22583299910184e-06 +45,2.6129300022148525e-07,2.4161880028259475e-06 +47,2.674850002222229e-07,2.5204009980370755e-06 +49,2.7427799795987084e-07,2.8303500002948568e-06 +51,2.8008099980070255e-07,2.9500000018742867e-06 +53,2.858629995898809e-07,3.175680001731962e-06 +55,2.914520009653643e-07,3.448655999818584e-06 +57,2.981799989356659e-07,3.7015720008639617e-06 +59,3.03971999528585e-07,4.022116998385172e-06 +61,3.098369998042472e-07,4.168318002484739e-06 +63,3.158210020046681e-07,4.442164001375204e-06 +65,1.0491019966138993e-06,4.725435999716865e-06 +67,1.0886290001508314e-06,5.015237999032252e-06 +69,1.1150500031362752e-06,5.302932000631699e-06 +71,1.1387689992261585e-06,5.6032380016404205e-06 +73,1.1649619991658255e-06,5.913788998441305e-06 +75,1.181684001494432e-06,6.205016001331387e-06 +77,1.2148690002504736e-06,6.7056199986836874e-06 +79,1.2383309986034873e-06,6.861327998194611e-06 +81,1.3082799996482208e-06,7.202180000604131e-06 +83,1.4248540028347634e-06,7.767548002448165e-06 +85,1.422738001565449e-06,7.941087998915464e-06 +87,1.450416999432491e-06,8.280346999526955e-06 +89,1.5645239982404746e-06,8.693921998201404e-06 +91,1.5476300031878053e-06,9.015847001137445e-06 +93,1.5512939971813466e-06,9.27098000101978e-06 +95,1.6289170016534626e-06,9.883511997031746e-06 +97,1.6473529976792634e-06,1.0306655000022148e-05 +99,1.6678770007274579e-06,1.0720752001361688e-05 +101,1.701962999504758e-06,1.1172553000506014e-05 +103,1.7424110010324511e-06,1.1588848999963375e-05 +105,1.7684420017758384e-06,1.2029535999317887e-05 +107,1.796177002688637e-06,1.2479967997933272e-05 +109,1.81574499947601e-06,1.29443390032975e-05 +111,1.8369239987805487e-06,1.3434120999590959e-05 +113,1.8487369998183567e-06,1.3986853002279531e-05 +115,1.8867140024667605e-06,1.4473731000180123e-05 +117,1.9749730017792897e-06,1.497004900011234e-05 +119,1.949879002495436e-06,1.589433499975712e-05 +121,1.976345000002766e-06,1.6349208999599794e-05 +123,2.0034359986311756e-06,1.6501529000379376e-05 +125,2.031471998634515e-06,1.7031809999025427e-05 +127,2.0568289983202702e-06,1.757243300016853e-05 +129,2.660693000507308e-06,1.8120761997124645e-05 +131,2.704915001231711e-06,1.8666838001081488e-05 +133,2.742181000940036e-06,1.9250337001722074e-05 +135,2.7654069999698547e-06,1.9798255001660435e-05 +137,2.8146959994046483e-06,2.0371348000480793e-05 +139,2.848131000064313e-06,2.0973023001715774e-05 +141,2.908540998760145e-06,2.1568209998804378e-05 +143,2.9293380030139815e-06,2.2170842999912565e-05 +145,2.980754998134216e-06,2.2787788999266922e-05 +147,3.0067059997236357e-06,2.34046160003345e-05 +149,3.0448510005953722e-06,2.404278999892995e-05 +151,3.0942239973228424e-06,2.4670007998793152e-05 +153,3.138435000437312e-06,2.5332846998935565e-05 +155,3.1191550006042233e-06,2.5980982001783558e-05 +157,3.1581880029989407e-06,2.664249900044524e-05 +159,3.2115840003825725e-06,2.732840700264205e-05 +161,3.324657998746261e-06,2.801514800012228e-05 +163,3.272826001193607e-06,2.8701378996629502e-05 +165,3.308176997961709e-06,2.9406192999886115e-05 +167,3.3442149979237e-06,3.0100380001385928e-05 +169,3.3808389998739584e-06,3.081379200011725e-05 +171,3.4166340010415296e-06,3.154944199923193e-05 +173,3.4526390008977615e-06,3.228768099870649e-05 +175,3.488081001705723e-06,3.3011422998242776e-05 +177,3.5223129998485093e-06,3.376738900260534e-05 +179,3.55820800177753e-06,3.452633800043259e-05 +181,3.5955040002590975e-06,3.5284777000924806e-05 +183,3.630714003520552e-06,3.606981000120868e-05 +185,3.666762997454498e-06,3.6843775000306774e-05 +187,3.7032309992355297e-06,3.764745199805475e-05 +189,3.7365140015026556e-06,3.842952999912086e-05 +191,3.7778189980599565e-06,3.9245997002581135e-05 +193,4.630024999642046e-06,4.009805099849473e-05 +195,4.686315998696955e-06,4.0887466999265596e-05 +197,4.731885001092451e-06,4.172541099978844e-05 +199,4.772701002366375e-06,4.257207100090454e-05 +201,4.818712000997039e-06,4.3414261999714656e-05 +203,4.864718001044821e-06,4.42741710030532e-05 +205,4.907221002213191e-06,4.515596399869537e-05 +207,4.953144998580683e-06,4.603339999812306e-05 +209,4.997805997845717e-06,4.690771599780419e-05 +211,5.037921000621281e-06,4.780510400087223e-05 +213,5.079448001197306e-06,4.8702468997362304e-05 +215,5.1418200018815695e-06,4.962222600079258e-05 +217,5.174026999156922e-06,5.053158100054134e-05 +219,5.216950998146785e-06,5.146839900044142e-05 +221,5.259593999653589e-06,5.241342899898882e-05 +223,5.309487001795788e-06,5.334128300091834e-05 +225,5.352066000341438e-06,5.429771399940364e-05 +227,5.389450001530349e-06,5.5271369998081354e-05 +229,5.4432690012617964e-06,5.6246666998049474e-05 +231,5.492337000760017e-06,5.722174499896937e-05 +233,5.5285480011662e-06,5.816311300077359e-05 +235,5.581846999120898e-06,5.906109100033063e-05 +237,5.616723999992246e-06,6.012580199967488e-05 +239,5.662220999511191e-06,6.120757900134776e-05 +241,5.706366002414143e-06,6.210174300213111e-05 +243,5.763371998909861e-06,6.32610519969603e-05 +245,5.806315999507206e-06,6.418973600011668e-05 +247,5.847345000802307e-06,6.517587800044566e-05 +249,5.879340998944827e-06,6.639412399817956e-05 +251,5.920961997617269e-06,6.737035399783053e-05 +253,5.971488000795943e-06,6.849385800160235e-05 +255,6.028040999808582e-06,6.939535399942543e-05 +257,7.119267997040879e-06, +259,7.1898960013641045e-06, +261,7.247834000736475e-06, +263,7.307443000172498e-06, +265,7.3486049986968286e-06, +267,7.404593001410831e-06, +269,7.451663001120324e-06, +271,7.502079999540001e-06, +273,7.5532590017246545e-06, +275,7.6157079965923915e-06, +277,7.657873000425752e-06, +279,7.720159999735187e-06, +281,7.764944999507861e-06, +283,7.815608998498646e-06, +285,7.864227001846302e-06, +287,7.933565000712406e-06, +289,7.9909129999578e-06, +291,8.04641300055664e-06, +293,8.095223998680012e-06, +295,8.19476600008784e-06, +297,8.254046999354615e-06, +299,8.268542998848716e-06, +301,8.319330001540948e-06, +303,8.365203000721522e-06, +305,8.4094029989501e-06, +307,8.452086000033886e-06, +309,8.512714997777948e-06, +311,8.562089002225547e-06, +313,8.613975001935615e-06, +315,8.672142001159955e-06, +317,8.70904999828781e-06, +319,8.760986998822772e-06, +321,1.0181099998590071e-05, +323,1.0254421998979523e-05, +325,1.0315794999769422e-05, +327,1.0376272002758925e-05, +329,1.0433391998958541e-05, +331,1.043399699847214e-05, +333,1.0558438996667974e-05, +335,1.0616043000482023e-05, +337,1.0701788000005763e-05, +339,1.0763944999780505e-05, +341,1.0751557001640321e-05, +343,1.0883609000302386e-05, +345,1.094224099870189e-05, +347,1.1006179000105477e-05, +349,1.0990069997205865e-05, +351,1.105769700006931e-05, +353,1.1175416999321897e-05, +355,1.1232648001168854e-05, +357,1.129243000104907e-05, +359,1.1349045002134517e-05, +361,1.1347469000611454e-05, +363,1.139406799848075e-05, +365,1.1534793000464561e-05, +367,1.1586165997869102e-05, +369,1.1695573000906733e-05, +371,1.1749377998057753e-05, +373,1.1812402000941802e-05, +375,1.1869988997204928e-05, +377,1.193034699826967e-05, +379,1.198369100166019e-05, +381,1.204505900022923e-05, +383,1.2121160998503911e-05, +385,1.3734405998548028e-05, +387,1.3839148003171431e-05, +389,1.390762899973197e-05, +391,1.3978423001390184e-05, +393,1.4064621002034983e-05, +395,1.4136107998638181e-05, +397,1.4195920000929617e-05, +399,1.4260307998483769e-05, +401,1.4319512996735284e-05, +403,1.440900200032047e-05, +405,1.447246899988386e-05, +407,1.4431284002057509e-05, +409,1.4521236000291538e-05, +411,1.4674283000204014e-05, +413,1.4649940003437224e-05, +415,1.4816418002737919e-05, +417,1.4894330000970513e-05, +419,1.4982695996877737e-05, +421,1.5038404999359045e-05, +423,1.5136540998355486e-05, +425,1.5166949000558816e-05, +427,1.5262089000316337e-05, +429,1.5313507999962893e-05, +431,1.5405965998070315e-05, +433,1.5482713999517728e-05, +435,1.5537671999481974e-05, +437,1.56186969979899e-05, +439,1.569520900011412e-05, +441,1.577032400018652e-05, +443,1.583094300076482e-05, +445,1.5882118997978976e-05, +447,1.5964442998665618e-05, +449,1.7917952001880622e-05, +451,1.7928388002474095e-05, +453,1.8133491001208313e-05, +455,1.819837400034885e-05, +457,1.826715300194337e-05, +459,1.8355690997850616e-05, +461,1.842843100166647e-05, +463,1.8504235998989317e-05, +465,1.857958199980203e-05, +467,1.8573443998320728e-05, +469,1.876819199969759e-05, +471,1.8853860998206075e-05, +473,1.8885140998463613e-05, +475,1.894054399963352e-05, +477,1.904286900025909e-05, +479,1.910568500170484e-05, +481,1.9185843997547635e-05, +483,1.927839099880657e-05, +485,1.9331005998537877e-05, +487,1.9440864998614416e-05, +489,1.9505702999595088e-05, +491,1.959123100095894e-05, +493,1.9700485998328076e-05, +495,1.9776149998506297e-05, +497,1.983789099904243e-05, +499,1.992908499960322e-05, +501,2.000021700223442e-05, +503,2.0075207998161206e-05, +505,2.0127668001805433e-05, +507,2.022459599902504e-05, +509,2.0276803999877302e-05, +511,2.0369763999042335e-05, diff --git a/bench/results/levenshtein_uniform.csv b/bench/results/levenshtein_uniform.csv new file mode 100644 index 0000000..26fa167 --- /dev/null +++ b/bench/results/levenshtein_uniform.csv @@ -0,0 +1,257 @@ +length,rapidfuzz,polyleven,python-Levenshtein,edlib,editdistance +1,1.1868500000389304e-07,1.3419399965641786e-07,8.809399969322838e-08,1.173390500025562e-06,1.8403750073048286e-07 +3,1.2571299998853648e-07,1.4306399998531561e-07,1.2584300020535009e-07,1.4431384000090475e-06,3.3988450013566764e-07 +5,1.3181900000347463e-07,1.4947499994377722e-07,1.436219999959576e-07,1.8077366999932568e-06,5.73024000004807e-07 +7,1.406230000071673e-07,1.5495699972234432e-07,1.7044500009433249e-07,2.098987499994109e-06,8.09755999398476e-07 +9,1.476800000119738e-07,1.6333499979737098e-07,2.0917000028930488e-07,2.3607205999724103e-06,1.0465779996593482e-06 +11,1.5763999999762746e-07,1.720569998724386e-07,2.5574199980837874e-07,2.608982299989293e-06,1.30297400028212e-06 +13,1.6653200000860125e-07,1.8100299985235323e-07,3.209369997421163e-07,2.906105999954889e-06,1.675448999776563e-06 +15,1.7319299999485338e-07,1.902110002447444e-07,3.7229500003377325e-07,3.191073200014216e-06,1.7951925001398194e-06 +17,1.8051499999671703e-07,1.9893099988621547e-07,4.4757600016964724e-07,3.412432500044815e-06,1.9984339996881317e-06 +19,1.8910199999311322e-07,2.0717100005640527e-07,5.366800000956573e-07,3.5666306000166517e-06,2.1883409999645664e-06 +21,1.9711999999572074e-07,2.1562500023719625e-07,6.417539998437861e-07,3.7767147000522523e-06,2.4684364998392994e-06 +23,2.058199999908084e-07,2.2377799996320392e-07,7.551230000899522e-07,4.053098700023838e-06,2.643716499733273e-06 +25,2.134079999933647e-07,2.3179399977379952e-07,8.772119999775896e-07,4.257835400039766e-06,2.8498005003712024e-06 +27,2.2175399999468933e-07,2.400359999228385e-07,1.014301999930467e-06,4.469676400003664e-06,3.0511919994751224e-06 +29,2.3199199999623947e-07,2.4800300025162874e-07,1.1565340000743165e-06,4.308660399965447e-06,3.2511130002603748e-06 +31,2.4083000000985064e-07,2.5590399991415317e-07,1.3454350000756675e-06,4.535376800049562e-06,3.4754979997160262e-06 +33,2.4826300000313487e-07,2.65549999767245e-07,1.509656000052928e-06,4.675932100053615e-06,3.6364609995871432e-06 +35,2.5635700001203075e-07,2.734009999585396e-07,1.6890759998204883e-06,4.768740300005446e-06,3.831261999948765e-06 +37,2.6371199999175585e-07,2.815040002133173e-07,1.890184999865596e-06,4.906963299981726e-06,4.03323550017376e-06 +39,2.7284499999780113e-07,2.894290000767796e-07,2.083845000015572e-06,5.093585799932043e-06,4.2792960002771e-06 +41,2.794010000002345e-07,3.084219997617765e-07,2.34501300019474e-06,5.2612680000493125e-06,4.461288999664248e-06 +43,2.892069999944624e-07,3.057859998989443e-07,2.57303399985176e-06,5.478563199994824e-06,4.628238500117732e-06 +45,2.96037999987675e-07,3.140540002277703e-07,2.8007300002173e-06,5.701822799983347e-06,4.81603800017183e-06 +47,3.0560300000104234e-07,3.2302299996445074e-07,3.0499849999614526e-06,6.218689399975119e-06,5.034949999753735e-06 +49,3.126829999899883e-07,3.306520002297475e-07,3.314623999813193e-06,6.5288057000543626e-06,5.1712425001824155e-06 +51,3.2278000000474093e-07,3.3951300019907644e-07,3.5934249999627367e-06,6.6469786999732595e-06,5.366241000047012e-06 +53,3.2946000000322326e-07,3.4659700031625104e-07,3.875131000313558e-06,6.670764599948598e-06,5.472506999467441e-06 +55,3.3899800000369847e-07,3.5553899988371994e-07,4.241133000050467e-06,6.910302599953866e-06,5.631097999867052e-06 +57,3.4524999999518964e-07,3.626060001806764e-07,4.625921999831917e-06,7.145072399998752e-06,6.501988999843888e-06 +59,3.5378299999422325e-07,3.7095000016051933e-07,5.163817999800813e-06,7.431629999973665e-06,6.486221500381362e-06 +61,3.622729999932517e-07,3.8035199986552467e-07,5.525670999759313e-06,7.566961300017282e-06,6.520599999930709e-06 +63,3.711600000002591e-07,3.8820600002509314e-07,6.038356000317435e-06,7.725835500059476e-06,6.897806500091974e-06 +65,1.1409370000023955e-06,1.6252649998023114e-06,6.646771999839984e-06,1.2074574799953552e-05,7.180683999649773e-06 +67,1.210356999990836e-06,1.7160059996967903e-06,6.865887999992993e-06,1.3006635000056122e-05,7.4547684998833575e-06 +69,1.2598089999897863e-06,1.7802259999371017e-06,7.0848210002623094e-06,1.3396385499981987e-05,7.388586000161012e-06 +71,1.308710999992968e-06,1.8511069997657608e-06,7.65203700029815e-06,1.370664050000414e-05,7.888875999924495e-06 +73,1.3478010000085303e-06,1.9054930003221669e-06,7.918969999991533e-06,1.3841394300015964e-05,7.939535000332398e-06 +75,1.520093999999972e-06,1.9843579998450884e-06,8.163473999957204e-06,1.4223668600061499e-05,8.12262700037536e-06 +77,1.6031359999999497e-06,2.0496810002441638e-06,8.728575000077395e-06,1.4264137999998638e-05,8.276972999738064e-06 +79,1.6138769999969324e-06,2.1083169999656084e-06,9.063925000191375e-06,1.4691883900013636e-05,8.264962500106776e-06 +81,1.6521539999985182e-06,2.186359000006633e-06,9.564869000314502e-06,1.4475649099949805e-05,8.42381500024203e-06 +83,1.7847780000010972e-06,2.2352390001287857e-06,9.994752000238802e-06,1.468018799996571e-05,8.575667499826522e-06 +85,1.7791410000000954e-06,2.309813000010764e-06,1.046966199965027e-05,1.4694576599958967e-05,8.770901999923807e-06 +87,1.772411000004581e-06,2.3600920003445936e-06,1.096699899972009e-05,1.4836430300056236e-05,8.900858500055619e-06 +89,1.798901000000796e-06,2.4182890001611668e-06,1.1469862000012656e-05,1.502379599996857e-05,9.024816999954055e-06 +91,1.8439629999988936e-06,2.4808409998513526e-06,1.1987027000031958e-05,1.5219286699993972e-05,9.391939999659371e-06 +93,1.8715719999988773e-06,2.551053999923169e-06,1.2587943999733395e-05,1.5345884899943485e-05,9.358370500194724e-06 +95,1.905643000000623e-06,2.603822999844852e-06,1.3137269000253582e-05,1.547855899998467e-05,9.567823499310179e-06 +97,1.9452800000010483e-06,2.6780799998959997e-06,1.3715703999878316e-05,1.563046579994989e-05,9.836441500738147e-06 +99,1.971807000003878e-06,2.727709999817308e-06,1.4294856000105936e-05,1.561794440003723e-05,9.895374500047182e-06 +101,2.0100020000057843e-06,2.8158400000393164e-06,1.4862864999940939e-05,1.571535250004672e-05,1.026750350047223e-05 +103,2.0546340000038297e-06,2.872343999570148e-06,1.5465368999684868e-05,1.5828928900009488e-05,1.0430514000290713e-05 +105,2.085660999995298e-06,2.9513709996535907e-06,1.6059492000294994e-05,1.5912442999979252e-05,1.0626714500176604e-05 +107,2.1173180000033654e-06,3.0016430000614488e-06,1.666508900007102e-05,1.599805309997464e-05,1.0656286999619624e-05 +109,2.1542160000080918e-06,3.074543999900925e-06,1.7276913999921818e-05,1.6094980300022142e-05,1.0867476999919745e-05 +111,2.1918850000020027e-06,3.1456659999093977e-06,1.7893164999804875e-05,1.6154133700001694e-05,1.0865902499972435e-05 +113,2.233150999998656e-06,3.2208540001192884e-06,1.8548525999904087e-05,1.636539740002263e-05,1.1213955000130226e-05 +115,2.2712720000015444e-06,3.2817430001159664e-06,1.918714400017052e-05,1.6334746400025323e-05,1.1453830000391462e-05 +117,2.2984609999809893e-06,3.343966000102228e-06,1.9843149000280392e-05,1.654471279998688e-05,1.1611259000346763e-05 +119,2.3540550000120675e-06,3.416932000163797e-06,2.0531699000002845e-05,1.6769442499935394e-05,1.1683505500514003e-05 +121,2.3876219999863225e-06,3.5052809998887827e-06,2.1235878999959824e-05,1.7061671699957518e-05,1.1758080499930657e-05 +123,2.4399799999912377e-06,3.5566229998948984e-06,2.1899153000049413e-05,1.6963570400002937e-05,1.20360194996465e-05 +125,2.488616000022148e-06,3.6531010000544477e-06,2.262432099996659e-05,1.7134647599959866e-05,1.2012328000309935e-05 +127,2.5490269999863807e-06,3.736258999651909e-06,2.333958500003064e-05,1.7361669699948832e-05,1.2281267000616935e-05 +129,3.5103289999938173e-06,4.663819999677799e-06,2.4050677999639452e-05,2.2247314299966088e-05,1.3443910500427591e-05 +131,3.615252999992435e-06,4.759237000143911e-06,2.4785272999906746e-05,2.356447459997071e-05,1.6381731999899786e-05 +133,3.690820999992184e-06,4.8775949999253495e-06,2.554028799977459e-05,2.474819810004192e-05,1.6565407499911088e-05 +135,3.7384620000011635e-06,4.954021000230569e-06,2.629304399988541e-05,2.515236550007103e-05,1.681198500045866e-05 +137,3.8112900000157882e-06,5.080449000161026e-06,2.708617400003277e-05,2.530308839995996e-05,1.6837157500049215e-05 +139,3.872915000016518e-06,5.179206999855524e-06,2.7870433000316553e-05,2.549675330001265e-05,1.7037541500030786e-05 +141,3.934725000021899e-06,5.876413999885699e-06,2.8624325000237154e-05,2.5630653899952452e-05,1.71746810001423e-05 +143,3.9858179999896485e-06,5.817812000259438e-06,2.942745599966656e-05,2.5587768300010794e-05,1.733695450002415e-05 +145,4.0532529999950385e-06,5.941325999629042e-06,3.024347399968974e-05,2.570756410004833e-05,1.7486718000327526e-05 +147,4.108688000002303e-06,6.020863000230747e-06,3.10270740001215e-05,2.580618920001143e-05,1.7675985500318347e-05 +149,4.163400000010141e-06,6.128498000180116e-06,3.1857814999966654e-05,2.591060770000695e-05,1.7675048000455718e-05 +151,4.230379000006223e-06,6.223307000254862e-06,3.271246300027997e-05,2.596277700004066e-05,1.7832233999797608e-05 +153,4.281233999989809e-06,6.3640520002081746e-06,3.356230500003221e-05,2.6084718300080568e-05,1.8080191999615635e-05 +155,4.3402960000094026e-06,6.4442749999216175e-06,3.4384571999908066e-05,2.624103640000612e-05,1.8250333000651152e-05 +157,4.404476999980034e-06,6.576811999821075e-06,3.5257034000096614e-05,2.635952199998428e-05,1.834267100002762e-05 +159,4.438622000009218e-06,6.657796000126836e-06,3.623611100010749e-05,2.635107299993252e-05,1.851089999945543e-05 +161,4.4866679999984175e-06,6.7962600001010275e-06,3.70947299998079e-05,2.6446089799992482e-05,1.863353550015745e-05 +163,4.556137999998098e-06,6.869682999877114e-06,3.804532200001631e-05,2.646513409999897e-05,1.8801155500113964e-05 +165,4.596227000007502e-06,7.002756000019873e-06,3.896553100003075e-05,2.6506888499989145e-05,1.8943417499940552e-05 +167,4.661907999974346e-06,7.095271999787655e-06,3.996101600023394e-05,2.6489891499932128e-05,1.9166645500263256e-05 +169,4.707229999979745e-06,7.209048999811784e-06,4.076316699956806e-05,2.656393670004036e-05,1.9303191999824776e-05 +171,4.757991999980505e-06,7.302135999907477e-06,4.173142799982088e-05,2.657363389998864e-05,1.9381161999262986e-05 +173,4.817724000020006e-06,7.444138000209932e-06,4.268075000027238e-05,2.6628140499997243e-05,1.9563834500331724e-05 +175,4.871601999980157e-06,7.4961330001315234e-06,4.362822600023719e-05,2.669399839996913e-05,1.97225525007525e-05 +177,4.920072999993863e-06,7.6424229996519e-06,4.4626334999975374e-05,2.6757840499976738e-05,1.9793204000052357e-05 +179,4.981022999999141e-06,7.717531999787752e-06,4.561113699992347e-05,2.680855299995528e-05,1.997033800034842e-05 +181,5.051306999973804e-06,7.85036700017372e-06,4.660313499971379e-05,2.6977438900030397e-05,2.011062800011132e-05 +183,5.099084999983461e-06,7.94323899981464e-06,4.769112600024528e-05,2.712066000003688e-05,2.0277561499824514e-05 +185,5.169604000002437e-06,8.074896999914927e-06,4.867781200027821e-05,2.71955809001156e-05,2.0450016500035417e-05 +187,5.235115000004953e-06,8.156995000263124e-06,4.975785199985694e-05,2.727097409988346e-05,2.0673109499512065e-05 +189,5.3020860000003715e-06,8.268025999768725e-06,5.104821799977799e-05,2.739153069996973e-05,2.075331149990234e-05 +191,5.3935669999987126e-06,8.394967000185715e-06,5.184024199979831e-05,2.7571585700025024e-05,2.101696899990202e-05 +193,6.795562999997174e-06,9.895142999994278e-06,5.298614599996654e-05,3.243740660000185e-05,2.1817500500219467e-05 +195,6.909876000008807e-06,1.004762299999129e-05,5.428638799958208e-05,3.264271629996074e-05,2.2154690000206756e-05 +197,7.024921999999379e-06,1.0231634999854576e-05,5.541178199973729e-05,3.270464900015213e-05,2.2184527999343117e-05 +199,7.128632999979346e-06,1.0373865000019575e-05,5.646551399968304e-05,3.286992590001319e-05,2.2443230000135374e-05 +201,7.171497000001636e-06,1.0517668999909802e-05,5.744309200008502e-05,3.2913186400037375e-05,2.248834350029938e-05 +203,7.279723999999988e-06,1.0678781000024174e-05,5.861460799997076e-05,3.3034673200018005e-05,2.2885386500092864e-05 +205,7.355028999995739e-06,1.0818396000104255e-05,5.9885380999730836e-05,3.313494499998341e-05,2.2806776500146952e-05 +207,7.436175999998795e-06,1.0951607999686527e-05,6.091342199988503e-05,3.312673999989784e-05,2.3045066499435052e-05 +209,7.512593000001288e-06,1.1139744000047356e-05,6.198954899991804e-05,3.324090090009121e-05,2.3186731999885522e-05 +211,7.582385000006297e-06,1.1291579000044294e-05,6.314900000006673e-05,3.324467399997957e-05,2.3547735499960252e-05 +213,7.662374000005907e-06,1.1416781999741944e-05,6.439513200029978e-05,3.3287432399993125e-05,2.361175500027457e-05 +215,7.757905999994818e-06,1.1580817000321985e-05,6.555293500014159e-05,3.3334836099857056e-05,2.3827626499951293e-05 +217,7.811227000019017e-06,1.1707784999998696e-05,6.671023999979298e-05,3.343614309997065e-05,2.3923315499814636e-05 +219,7.866030999991835e-06,1.185596200002692e-05,6.788391800000682e-05,3.349219850006193e-05,2.427017700028955e-05 +221,7.919067000017322e-06,1.1993348000032711e-05,6.911634700009017e-05,3.3557663499959745e-05,2.4128734500663995e-05 +223,8.009917000009637e-06,1.2137200999859488e-05,7.034868000027927e-05,3.365103809992433e-05,2.453250950020447e-05 +225,8.07584400001815e-06,1.2270020999949334e-05,7.156570500001179e-05,3.3624974799931796e-05,2.4438113499854808e-05 +227,8.158194999992931e-06,1.2455217000024275e-05,7.281896799986498e-05,3.377987999992911e-05,2.5158597500194445e-05 +229,8.223411999978226e-06,1.256955899998502e-05,7.415176299991801e-05,3.3737511599974826e-05,2.4709719999918888e-05 +231,8.300086999980748e-06,1.2721528999918518e-05,7.538581500011787e-05,3.385432300001412e-05,2.539340600014839e-05 +233,8.374773000014102e-06,1.285602000007202e-05,7.492427900024267e-05,3.3965842100042215e-05,2.5280521999775373e-05 +235,8.442121999991059e-06,1.2995034999676134e-05,7.722381099983977e-05,3.402543980009796e-05,2.559828350058524e-05 +237,8.518306999974357e-06,1.3061558000117655e-05,7.925191499998618e-05,3.4100303699960934e-05,2.5548944000547636e-05 +239,8.57802400000196e-06,1.330828299978748e-05,8.052223899994715e-05,3.421797650007647e-05,2.6101423000000068e-05 +241,8.659747999985257e-06,1.3390172000072199e-05,8.182751300000745e-05,3.4356965900042276e-05,2.5954169999749865e-05 +243,8.719455999994353e-06,1.3546659999974507e-05,8.315562500001761e-05,3.4487164199890694e-05,2.6275525499841025e-05 +245,8.793838000002552e-06,1.3659475999702408e-05,8.454959999971835e-05,3.4666111000115054e-05,2.634458700049436e-05 +247,8.88029899999765e-06,1.3811194000027172e-05,8.585265299961975e-05,3.4624762299972636e-05,2.665732700006629e-05 +249,8.973362000006089e-06,1.3973442999940743e-05,8.72854690001077e-05,3.475139410002157e-05,2.669932749995496e-05 +251,9.04906599998867e-06,1.4072542000121755e-05,8.856035799999516e-05,3.482194080006593e-05,2.7160594499946454e-05 +253,9.153697999977342e-06,1.4240521999909107e-05,8.990764899999703e-05,3.494407540001703e-05,2.723350599990226e-05 +255,9.258233999986487e-06,1.4427650000016001e-05,9.137486100007664e-05,3.5472384400054575e-05,2.7416934999564545e-05 +257,1.109320599999819e-05,1.6279750999729002e-05,,4.0078424200146396e-05,3.0842187000416746e-05 +259,1.1291284000009227e-05,1.657038300027125e-05,,4.024790010007564e-05,3.110012150045805e-05 +261,1.143758300000286e-05,1.6744278999794917e-05,,4.112411070000235e-05,3.134457950000069e-05 +263,1.1533052999993745e-05,1.6968128999906186e-05,,4.333411409988912e-05,3.165600600004836e-05 +265,1.1654933999977856e-05,1.7144414000085818e-05,,4.5324285099923136e-05,3.1819752500268804e-05 +267,1.1746720000019196e-05,1.7257866999898396e-05,,4.610846679988754e-05,3.1863536999480856e-05 +269,1.1856280000017705e-05,1.7423151999992114e-05,,4.64117677998729e-05,3.2222365000052376e-05 +271,1.1968281999998e-05,1.7692169999918405e-05,,4.633251140003267e-05,3.229285999987042e-05 +273,1.2042036999986294e-05,1.7878705999919476e-05,,4.6558167900002445e-05,3.254276400002709e-05 +275,1.214604199998348e-05,1.804113200023494e-05,,4.659846960003051e-05,3.2702343500204734e-05 +277,1.2238410000009026e-05,1.82639869999548e-05,,4.6595473599882114e-05,3.295667549991776e-05 +279,1.2350554999983388e-05,1.8402392000098188e-05,,4.671673530010594e-05,3.3117990500613815e-05 +281,1.2419846000000236e-05,1.860434999980498e-05,,4.678626400000212e-05,3.335526899991237e-05 +283,1.2527865999999222e-05,1.878022499977305e-05,,4.67671734999385e-05,3.35218909995092e-05 +285,1.2632690000003776e-05,1.8948439999803667e-05,,4.6993393699995065e-05,3.392029650058248e-05 +287,1.2688796999981378e-05,1.912892199970884e-05,,4.694131989999733e-05,3.396506050012249e-05 +289,1.2817135999995345e-05,1.9282528999610805e-05,,4.709835120011121e-05,3.424038350021874e-05 +291,1.2891703000008193e-05,1.949619300012273e-05,,4.719288079995749e-05,3.442173350049416e-05 +293,1.3012441999990187e-05,1.9664942999952474e-05,,4.725964859990199e-05,3.463347050001175e-05 +295,1.30799540000055e-05,1.9857891999890855e-05,,4.715466630004812e-05,3.48634285001026e-05 +297,1.3167272999993428e-05,1.9889738000074425e-05,,4.7219540400146794e-05,3.5145335000379416e-05 +299,1.3276182999987894e-05,2.0119482999689352e-05,,4.720380590006243e-05,3.527650550040562e-05 +301,1.335929499998656e-05,2.033260500002143e-05,,4.715729819999978e-05,3.582457950051321e-05 +303,1.3452744000005624e-05,2.0539813000141294e-05,,4.742078599992965e-05,3.56606314999226e-05 +305,1.3527363999997988e-05,2.0560716000090905e-05,,4.750693200003298e-05,3.5971684499600086e-05 +307,1.362342999999555e-05,2.0865698999841696e-05,,4.756764760004444e-05,3.6155496500214215e-05 +309,1.3706313000000134e-05,2.0955006000349382e-05,,4.766778520006483e-05,3.650117249981122e-05 +311,1.3805463000011288e-05,2.10980209999434e-05,,4.773251210008311e-05,3.6671089999799736e-05 +313,1.3904494000001932e-05,2.1338193000246974e-05,,4.787442209999426e-05,3.6873856999591224e-05 +315,1.4004397000007885e-05,2.135833800002729e-05,,4.787121119989024e-05,3.703504199984309e-05 +317,1.412347600000885e-05,2.1719423999911668e-05,,4.81441851999989e-05,3.739714450057363e-05 +319,1.4259206999980731e-05,2.1920882999893364e-05,,4.840966960000514e-05,3.75748205005948e-05 +321,1.6571241000008284e-05,2.418560400019487e-05,,5.320086789997731e-05,4.002799250065436e-05 +323,1.6760218999991138e-05,2.4605007999980445e-05,,5.3405299299993204e-05,3.984390550067474e-05 +325,1.690808199998628e-05,2.4799152000014145e-05,,5.342963010007224e-05,4.035070749978331e-05 +327,1.7006000999998606e-05,2.493004899997686e-05,,5.353471059988806e-05,4.044400949987903e-05 +329,1.7155619999982714e-05,2.532599400001345e-05,,5.36890201999995e-05,4.098322599929816e-05 +331,1.7307810000005473e-05,2.548047600021164e-05,,5.366199820000474e-05,4.0902536499743295e-05 +333,1.741246999998225e-05,2.5685450000310085e-05,,5.388023920004343e-05,4.135747300006187e-05 +335,1.7478904999990164e-05,2.5886772999911045e-05,,5.3968838500077256e-05,4.1320302000713124e-05 +337,1.7636052000000292e-05,2.6074174999848764e-05,,5.385340829998313e-05,4.179780149934231e-05 +339,1.776942899999767e-05,2.637939899977937e-05,,5.3902563200063016e-05,4.178428350041941e-05 +341,1.789130500000624e-05,2.652449600009277e-05,,5.385439479996421e-05,4.235052049989463e-05 +343,1.7996374999995624e-05,2.6876000999891408e-05,,5.408674600002996e-05,4.2320073999690064e-05 +345,1.809443400000533e-05,2.70490240000072e-05,,5.4184780300056436e-05,4.275410100035515e-05 +347,1.821040900000525e-05,2.7119013000174165e-05,,5.429341300005035e-05,4.268682549991354e-05 +349,1.8313470999999025e-05,2.7418285999829096e-05,,5.4320748099962655e-05,4.303412199988088e-05 +351,1.8399142000021128e-05,2.747021499999392e-05,,5.436418760000378e-05,4.303049000009196e-05 +353,1.853103899998132e-05,2.7959989999999376e-05,,5.443688160012243e-05,4.3458881500555434e-05 +355,1.8632321000012548e-05,2.8087966999919453e-05,,5.4551434600034556e-05,4.3436328000098005e-05 +357,1.873782200001984e-05,2.8200835999996345e-05,,5.459612410013506e-05,4.3792313000267317e-05 +359,1.8839035000013386e-05,2.840676599998915e-05,,5.454124150001007e-05,4.399722950074647e-05 +361,1.892681200001789e-05,2.86477989998275e-05,,5.465676919993712e-05,4.4310500499705084e-05 +363,1.905813900000908e-05,2.8954981999959272e-05,,5.4741559099966245e-05,4.441664149999269e-05 +365,1.9118246999994424e-05,2.91266250001172e-05,,5.478005740005756e-05,4.476469500059466e-05 +367,1.9268799000002444e-05,2.9243446999771552e-05,,5.494924659997195e-05,4.493476700008614e-05 +369,1.9363011000024246e-05,2.952158299967777e-05,,5.5065845100034497e-05,4.528392150041327e-05 +371,1.9471745999993574e-05,2.958924599988677e-05,,5.525929510004062e-05,4.524546700031351e-05 +373,1.9582520000000158e-05,2.9943561999971284e-05,,5.539336109995929e-05,4.581996700017044e-05 +375,1.971806900002093e-05,2.994532299999264e-05,,5.5440843799988206e-05,4.5830755000679346e-05 +377,1.9825994000001403e-05,3.0265079999935552e-05,,5.5485567300092944e-05,4.6101895500214595e-05 +379,1.993333999999436e-05,3.029921100005595e-05,,5.563547809997546e-05,4.5943817499392023e-05 +381,2.0073537000001807e-05,3.067433299975164e-05,,5.5831068599945875e-05,4.6646144499391084e-05 +383,2.0246181000004527e-05,3.080504299987297e-05,,5.644827529995382e-05,4.6622882000519897e-05 +385,2.2980462000020905e-05,3.3578229999875485e-05,,6.098124030013423e-05,4.930046150002454e-05 +387,2.316204000001676e-05,3.4176908000063124e-05,,6.125216079999516e-05,4.961065650059027e-05 +389,2.3369731000002506e-05,3.4528282999872314e-05,,6.118583639999998e-05,4.9973804500041296e-05 +391,2.35083629999906e-05,3.4675143000185934e-05,,6.136202719990251e-05,4.989375500008464e-05 +393,2.365131999999903e-05,3.510468300009961e-05,,6.148874350001278e-05,5.0763263000590085e-05 +395,2.375129300000367e-05,3.5219742000208505e-05,,6.153614220002055e-05,5.070214050010691e-05 +397,2.394928600000413e-05,3.558097800032556e-05,,6.171069350002652e-05,5.092248950040812e-05 +399,2.4057702999982666e-05,3.561898599991764e-05,,6.164522889994259e-05,5.131039649950253e-05 +401,2.41848169999912e-05,3.601219400025002e-05,,6.183610249991034e-05,5.155986950012448e-05 +403,2.428869899998176e-05,3.598346599983415e-05,,6.190940690012212e-05,5.1552625500335125e-05 +405,2.4464819999991504e-05,3.651467700001376e-05,,6.181639510014065e-05,5.188454000017373e-05 +407,2.4506658000007066e-05,3.6466360999838786e-05,,6.185912109995116e-05,5.2302760000202394e-05 +409,2.472715900000821e-05,3.7058816999888224e-05,,6.200726739989478e-05,5.251504299940279e-05 +411,2.486268899997413e-05,3.7179432999892016e-05,,6.1936995600081e-05,5.268927899942355e-05 +413,2.498605400000997e-05,3.717658100003973e-05,,6.206541179999476e-05,5.2807500500421155e-05 +415,2.510999599999764e-05,3.764449099980993e-05,,6.209758780005357e-05,5.3076476000569525e-05 +417,2.523934100000247e-05,3.787568000007013e-05,,6.216939059995638e-05,5.366225400030089e-05 +419,2.535589299998264e-05,3.810839500010843e-05,,6.232799490007892e-05,5.372596600045654e-05 +421,2.547493399998757e-05,3.8095099000202025e-05,,6.223583269984375e-05,5.411172399999487e-05 +423,2.5608584000025306e-05,3.86273600001914e-05,,6.240265129999898e-05,5.415566050032794e-05 +425,2.568974700000126e-05,3.88332510001419e-05,,6.241767870014883e-05,5.448702749981749e-05 +427,2.5847646999977772e-05,3.874143500024729e-05,,6.257362340002146e-05,5.466328900001827e-05 +429,2.5969867000014803e-05,3.934265300040352e-05,,6.259217699989677e-05,5.500244850009039e-05 +431,2.6048656000000393e-05,3.964892500016504e-05,,6.27965536999909e-05,5.530678549985168e-05 +433,2.620645399997557e-05,3.981365600020581e-05,,6.300613069997779e-05,5.576178600040294e-05 +435,2.6289307000013195e-05,4.0031074000125956e-05,,6.297886829997879e-05,5.583795450002072e-05 +437,2.649120099999891e-05,4.02854210001351e-05,,6.321917140012374e-05,5.613353099943197e-05 +439,2.656470699997726e-05,4.038523600002008e-05,,6.327236580000319e-05,5.614412900013122e-05 +441,2.6730138999994325e-05,4.068574800021452e-05,,6.337684199988871e-05,5.64931244998661e-05 +443,2.6830756000038035e-05,4.0837893000116306e-05,,6.348795819994847e-05,5.650883850012178e-05 +445,2.6994894999972987e-05,4.098672600002829e-05,,6.359127490013634e-05,5.698729999949137e-05 +447,2.7193979999992732e-05,4.136010000001989e-05,,6.419779259995268e-05,5.711569349932688e-05 +449,3.037892100002182e-05,4.452321000007942e-05,,6.886446019998402e-05,6.105917399963801e-05 +451,3.0662639000013316e-05,4.487769099978323e-05,,6.909542619996501e-05,6.114174800040928e-05 +453,3.082121899996082e-05,4.533448699976361e-05,,6.912210760001472e-05,6.142202899991389e-05 +455,3.1004032999987884e-05,4.556388099990727e-05,,6.925471899994591e-05,6.174264350011072e-05 +457,3.113929500000268e-05,4.5910112000001395e-05,,6.937000569996599e-05,6.220378250054636e-05 +459,3.1341420999979165e-05,4.596033999996507e-05,,6.946433940011047e-05,6.179874149984244e-05 +461,3.144988900004364e-05,4.650317699997686e-05,,6.965121060002276e-05,6.212368599972251e-05 +463,3.16053399999987e-05,4.701457400005893e-05,,6.968682690003332e-05,6.258292799975606e-05 +465,3.173726699998269e-05,4.712764399982916e-05,,6.959654330003104e-05,6.295023300026514e-05 +467,3.1881915999974814e-05,4.717966399994112e-05,,6.957229130002816e-05,6.339502149967303e-05 +469,3.206901999999445e-05,4.769093899994914e-05,,6.966278240015527e-05,6.341633099964384e-05 +471,3.218361699998695e-05,4.785621100018034e-05,,6.975435130007099e-05,6.365726749936584e-05 +473,3.233288399997036e-05,4.830055599995832e-05,,6.970227929996326e-05,6.34794609995879e-05 +475,3.24865780000323e-05,4.838001899997835e-05,,6.991995490006957e-05,6.372634500075947e-05 +477,3.2596264000005705e-05,4.856058399991525e-05,,6.995761250000214e-05,6.410864650024451e-05 +479,3.2742199999972854e-05,4.9151995000102036e-05,,7.001146660004451e-05,6.438287150012911e-05 +481,3.292377299999316e-05,4.931360600039626e-05,,7.019085499996436e-05,6.483184849912505e-05 +483,3.299801399998614e-05,4.964082600008623e-05,,7.02185993999592e-05,6.502312300017365e-05 +485,3.3141932000035016e-05,4.995582800029297e-05,,7.036133999990852e-05,6.523342099990259e-05 +487,3.3302762999994675e-05,5.000761600012993e-05,,7.031938869986334e-05,6.551909599966166e-05 +489,3.3464127999991435e-05,5.029823399991074e-05,,7.058074020005734e-05,6.557789449925621e-05 +491,3.3553594000011344e-05,5.069046600010552e-05,,7.054368720000638e-05,6.617511449985614e-05 +493,3.370228500000394e-05,5.104721299994708e-05,,7.054038859987485e-05,6.650590249955712e-05 +495,3.388215100000025e-05,5.124169000009715e-05,,7.089727659986238e-05,6.64643125001021e-05 +497,3.3965432000002235e-05,5.137543999990157e-05,,7.088362819995381e-05,6.644193000011e-05 +499,3.412805900001104e-05,5.169260000002396e-05,,7.093735140006176e-05,6.65725585004111e-05 +501,3.431646100000307e-05,5.228185199985092e-05,,7.116033019992756e-05,6.70534860000771e-05 +503,3.438569799999414e-05,5.204082600039328e-05,,7.136115960001916e-05,6.732430500051124e-05 +505,3.456698700000516e-05,5.2604458000132584e-05,,7.163654120013235e-05,6.769553200047085e-05 +507,3.474381199998788e-05,5.280265400006101e-05,,7.160754239994276e-05,6.763535150003008e-05 +509,3.4888624000018346e-05,5.305266799996389e-05,,7.179135480000695e-05,6.808248850029485e-05 +511,3.512258299997484e-05,5.325088099971253e-05,,7.254744040001242e-05,6.862808899950324e-05 diff --git a/src/cpp_string_metric.cpp b/src/cpp_string_metric.cpp index 09556f8..6bd80a1 100644 --- a/src/cpp_string_metric.cpp +++ b/src/cpp_string_metric.cpp @@ -1274,8 +1274,8 @@ static const char __pyx_k_levenshtein_line_24[] = "levenshtein (line 24)"; static const char __pyx_k_normalized_levenshtein[] = "normalized_levenshtein"; static const char __pyx_k_src_cpp_string_metric_pyx[] = "src/cpp_string_metric.pyx"; static const char __pyx_k_Calculates_a_normalized_levensh[] = "\n Calculates a normalized levenshtein distance using custom\n costs for insertion, deletion and substitution.\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n weights : Tuple[int, int, int] or None, optional\n The weights for the three operations in the form\n (insertion, deletion, substitution). Default is (1, 1, 1),\n which gives all three operations a weight of 1.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is None, which deactivates this behaviour.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n ratio : float\n Normalized weighted levenshtein distance between s1 and s2\n as a float between 0 and 100\n\n Raises\n ------\n ValueError\n If unsupported weights are provided a ValueError is thrown\n\n See Also\n --------\n levenshtein : Levenshtein distance\n\n Notes\n -----\n The normalization of the Levenshtein distance is performed in the following way:\n\n .. math::\n :nowrap:\n\n \\begin{align*}\n dist_{max} &= \\begin{cases}\n min(len(s1), len(s2)) \\cdot sub, & \\text{if } sub \\leq ins + del \\\\\n len(s1) \\cdot del + len(s2) \\cdot ins, & \\text{otherwise}\n \\end{cases}\\\\[10pt]\n\n dist_{max} &= \\begin{cases}\n dist_{max} + (len(s1) - len(s2)) \\cdot del, & \\text{if } len(s1) > len(s2) \\\\\n dist_{max} + (len(s2) - len(s1)) \\cdot ins, & \\text{if } len(s1) < len(s2) \\\\\n dist_{max}, & \\text{if } len(s""1) = len(s2)\n \\end{cases}\\\\[10pt]\n\n ratio &= 100 \\cdot \\frac{distance(s1, s2)}{dist_{max}}\n \\end{align*}\n\n Examples\n --------\n Find the normalized Levenshtein distance between two strings:\n\n >>> from rapidfuzz.string_metric import normalized_levenshtein\n >>> normalized_levenshtein(\"lewenstein\", \"levenshtein\")\n 81.81818181818181\n\n Setting a score_cutoff allows the implementation to select\n a more efficient implementation:\n\n >>> normalized_levenshtein(\"lewenstein\", \"levenshtein\", score_cutoff=85)\n 0.0\n\n It is possible to select different weights by passing a `weight`\n tuple.\n\n >>> normalized_levenshtein(\"lewenstein\", \"levenshtein\", weights=(1,1,2))\n 85.71428571428571\n\n When a different processor is used s1 and s2 do not have to be strings\n\n >>> normalized_levenshtein([\"lewenstein\"], [\"levenshtein\"], processor=lambda s: s[0])\n 81.81818181818181\n "; -static const char __pyx_k_Calculates_the_minimum_number_o[] = "\n Calculates the minimum number of insertions, deletions, and substitutions\n required to change one sequence into the other according to Levenshtein with custom\n costs for insertion, deletion and substitution\n\n Parameters\n ----------\n s1 : str\n First string to compare\n s2 : str\n Second string to compare\n weights : Tuple[int, int, int] or None, optional\n The weights for the three operations in the form\n (insertion, deletion, substitution). Default is (1, 1, 1),\n which gives all three operations a weight of 1.\n max : int or None, optional\n Maximum Levenshtein distance between s1 and s2, that is\n considered as a result. If the distance is bigger than max,\n -1 is returned instead. Default is None, which deactivates\n this behaviour.\n\n Returns\n -------\n distance : int\n levenshtein distance between s1 and s2\n\n Notes\n -----\n Depending on the input parameters different optimized implementation are used\n to improve the performance.\n\n Insertion = Deletion = Substitution:\n This is known as uniform Levenshtein distance and is the distance most commonly\n referred to as Levenshtein distance. The following implementation is used\n with a worst-case performance of ``O([N/64]M)``.\n\n - if max is 0 the similarity can be calculated using a direct comparision,\n since no difference between the strings is allowed. The time complexity of\n this algorithm is ``O(N)``.\n\n - A common prefix/suffix of the two compared strings does not affect\n the Levenshtein distance, so the affix is removed before calculating the\n similarity.\n\n - If max is \342\211\244 3 the mbleven algorithm is used. This algorithm\n checks all possible edit operations that are possible under\n the threshold `max`. The time complexity of this algorithm is ``O(N)``.\n\n - If the length of th""e shorter string is \342\211\244 64 after removing the common affix\n Hyyr\303\266s' algorithm is used, which calculates the Levenshtein distance in\n parallel. The algorithm is described by [1]_. The time complexity of this\n algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\245 64 after removing the common affix\n a blockwise implementation of Myers' algorithm is used, which calculates\n the Levenshtein distance in parallel (64 characters at a time).\n The algorithm is described by [3]_. The time complexity of this\n algorithm is ``O([N/64]M)``.\n\n\n Insertion = Deletion, Substitution >= Insertion + Deletion:\n Since every Substitution can be performed as Insertion + Deletion, this variant\n of the Levenshtein distance only uses Insertions and Deletions. Therefore this\n variant is often referred to as InDel-Distance. The following implementation\n is used with a worst-case performance of ``O([N/64]M)``.\n\n - if max is 0 the similarity can be calculated using a direct comparision,\n since no difference between the strings is allowed. The time complexity of\n this algorithm is ``O(N)``.\n\n - if max is 1 and the two strings have a similar length, the similarity can be\n calculated using a direct comparision aswell, since a substitution would cause\n a edit distance higher than max. The time complexity of this algorithm\n is ``O(N)``.\n\n - A common prefix/suffix of the two compared strings does not affect\n the Levenshtein distance, so the affix is removed before calculating the\n similarity.\n\n - If max is \342\211\244 4 the mbleven algorithm is used. This algorithm\n checks all possible edit operations that are possible under\n the threshold `max`. As a difference to the normal Levenshtein distance this\n algorithm can even be used up to a threshold of 4 here, since t""he higher weight\n of substitutions decreases the amount of possible edit operations.\n The time complexity of this algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\244 64 after removing the common affix\n the BitPAl algorithm is used, which calculates the Levenshtein distance in\n parallel. The algorithm is described by [4]_ and is extended with support\n for UTF32 in this implementation. The time complexity of this\n algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\245 64 after removing the common affix\n a blockwise implementation of the BitPAl algorithm is used, which calculates\n the Levenshtein distance in parallel (64 characters at a time).\n The algorithm is described by [4]_. The time complexity of this\n algorithm is ``O([N/64]M)``.\n\n Other weights:\n The implementation for other weights is based on Wagner-Fischer.\n It has a performance of ``O(N * M)`` and has a memory usage of ``O(N)``.\n Further details can be found in [2]_.\n\n\n References\n ----------\n .. [1] Hyyr\303\266, Heikki. \"A Bit-Vector Algorithm for Computing\n Levenshtein and Damerau Edit Distances.\"\n Nordic Journal of Computing, Volume 10 (2003): 29-39.\n .. [2] Wagner, Robert & Fischer, Michael\n \"The String-to-String Correction Problem.\"\n J. ACM. 21. (1974): 168-173\n .. [3] Myers, Gene. \"A fast bit-vector algorithm for approximate\n string matching based on dynamic programming.\"\n Journal of the ACM (JACM) 46.3 (1999): 395-415.\n .. [4] Loving, Joshua & Hern\303\241ndez, Y\303\266zen & Benson, Gary.\n \"BitPAl: A Bit-Parallel, General Integer-Scoring Sequence\n Alignment Algorithm. Bioinformatics\"\n Bioinformatics, Volume 30 (2014): 3166\342\200\2233173\n\n Examples\n --------\n Find the Levenshtein distance ""between two strings:\n\n >>> from rapidfuzz.string_metric import levenshtein\n >>> levenshtein(\"lewenstein\", \"levenshtein\")\n 2\n\n Setting a maximum distance allows the implementation to select\n a more efficient implementation:\n\n >>> levenshtein(\"lewenstein\", \"levenshtein\", max=1)\n -1\n\n It is possible to select different weights by passing a `weight`\n tuple.\n\n >>> levenshtein(\"lewenstein\", \"levenshtein\", weights=(1,1,2))\n 3\n "; -static const char __pyx_k_normalized_levenshtein_line_179[] = "normalized_levenshtein (line 179)"; +static const char __pyx_k_Calculates_the_minimum_number_o[] = "\n Calculates the minimum number of insertions, deletions, and substitutions\n required to change one sequence into the other according to Levenshtein with custom\n costs for insertion, deletion and substitution\n\n Parameters\n ----------\n s1 : str\n First string to compare\n s2 : str\n Second string to compare\n weights : Tuple[int, int, int] or None, optional\n The weights for the three operations in the form\n (insertion, deletion, substitution). Default is (1, 1, 1),\n which gives all three operations a weight of 1.\n max : int or None, optional\n Maximum Levenshtein distance between s1 and s2, that is\n considered as a result. If the distance is bigger than max,\n -1 is returned instead. Default is None, which deactivates\n this behaviour.\n\n Returns\n -------\n distance : int\n levenshtein distance between s1 and s2\n\n Notes\n -----\n Depending on the input parameters different optimized implementation are used\n to improve the performance.\n\n Insertion = Deletion = Substitution:\n This is known as uniform Levenshtein distance and is the distance most commonly\n referred to as Levenshtein distance. The following implementation is used\n with a worst-case performance of ``O([N/64]M)``.\n\n - if max is 0 the similarity can be calculated using a direct comparision,\n since no difference between the strings is allowed. The time complexity of\n this algorithm is ``O(N)``.\n\n - A common prefix/suffix of the two compared strings does not affect\n the Levenshtein distance, so the affix is removed before calculating the\n similarity.\n\n - If max is \342\211\244 3 the mbleven algorithm is used. This algorithm\n checks all possible edit operations that are possible under\n the threshold `max`. The time complexity of this algorithm is ``O(N)``.\n\n - If the length of th""e shorter string is \342\211\244 64 after removing the common affix\n Hyyr\303\266s' algorithm is used, which calculates the Levenshtein distance in\n parallel. The algorithm is described by [1]_. The time complexity of this\n algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\245 64 after removing the common affix\n a blockwise implementation of Myers' algorithm is used, which calculates\n the Levenshtein distance in parallel (64 characters at a time).\n The algorithm is described by [3]_. The time complexity of this\n algorithm is ``O([N/64]M)``.\n\n The following image shows a benchmark of the Levenshtein distance in multiple\n Python libraries. All of them are implemented either in C/C++ or Cython.\n The graph shows, that python-Levenshtein is the only library with a time\n complexity of ``O(NM)``, while all other libraries have a time complexity of\n ``O([N/64]M)``. Especially for long strings RapidFuzz is a lot faster than\n all the other tested libraries.\n\n .. image:: img/uniform_levenshtein.svg\n\n\n Insertion = Deletion, Substitution >= Insertion + Deletion:\n Since every Substitution can be performed as Insertion + Deletion, this variant\n of the Levenshtein distance only uses Insertions and Deletions. Therefore this\n variant is often referred to as InDel-Distance. The following implementation\n is used with a worst-case performance of ``O([N/64]M)``.\n\n - if max is 0 the similarity can be calculated using a direct comparision,\n since no difference between the strings is allowed. The time complexity of\n this algorithm is ``O(N)``.\n\n - if max is 1 and the two strings have a similar length, the similarity can be\n calculated using a direct comparision aswell, since a substitution would cause\n a edit distance higher than max. The time complexity of this algorithm\n is ``O(N)``.\n""\n - A common prefix/suffix of the two compared strings does not affect\n the Levenshtein distance, so the affix is removed before calculating the\n similarity.\n\n - If max is \342\211\244 4 the mbleven algorithm is used. This algorithm\n checks all possible edit operations that are possible under\n the threshold `max`. As a difference to the normal Levenshtein distance this\n algorithm can even be used up to a threshold of 4 here, since the higher weight\n of substitutions decreases the amount of possible edit operations.\n The time complexity of this algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\244 64 after removing the common affix\n the BitPAl algorithm is used, which calculates the Levenshtein distance in\n parallel. The algorithm is described by [4]_ and is extended with support\n for UTF32 in this implementation. The time complexity of this\n algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\245 64 after removing the common affix\n a blockwise implementation of the BitPAl algorithm is used, which calculates\n the Levenshtein distance in parallel (64 characters at a time).\n The algorithm is described by [4]_. The time complexity of this\n algorithm is ``O([N/64]M)``.\n\n The following image shows a benchmark of the InDel distance in RapidFuzz\n and python-Levenshtein. Similar to the normal Levenshtein distance\n python-Levenshtein uses a implementation with a time complexity of ``O(NM)``,\n while RapidFuzz has a time complexity of ``O([N/64]M)``.\n\n .. image:: img/indel_levenshtein.svg\n\n\n Other weights:\n The implementation for other weights is based on Wagner-Fischer.\n It has a performance of ``O(N * M)`` and has a memory usage of ``O(N)``.\n Further details can be found in [2]_.\n\n\n References\n ----------\n .. [1] Hyyr\303""\266, Heikki. \"A Bit-Vector Algorithm for Computing\n Levenshtein and Damerau Edit Distances.\"\n Nordic Journal of Computing, Volume 10 (2003): 29-39.\n .. [2] Wagner, Robert & Fischer, Michael\n \"The String-to-String Correction Problem.\"\n J. ACM. 21. (1974): 168-173\n .. [3] Myers, Gene. \"A fast bit-vector algorithm for approximate\n string matching based on dynamic programming.\"\n Journal of the ACM (JACM) 46.3 (1999): 395-415.\n .. [4] Loving, Joshua & Hern\303\241ndez, Y\303\266zen & Benson, Gary.\n \"BitPAl: A Bit-Parallel, General Integer-Scoring Sequence\n Alignment Algorithm. Bioinformatics\"\n Bioinformatics, Volume 30 (2014): 3166\342\200\2233173\n\n Examples\n --------\n Find the Levenshtein distance between two strings:\n\n >>> from rapidfuzz.string_metric import levenshtein\n >>> levenshtein(\"lewenstein\", \"levenshtein\")\n 2\n\n Setting a maximum distance allows the implementation to select\n a more efficient implementation:\n\n >>> levenshtein(\"lewenstein\", \"levenshtein\", max=1)\n -1\n\n It is possible to select different weights by passing a `weight`\n tuple.\n\n >>> levenshtein(\"lewenstein\", \"levenshtein\", weights=(1,1,2))\n 3\n "; +static const char __pyx_k_normalized_levenshtein_line_196[] = "normalized_levenshtein (line 196)"; static PyObject *__pyx_kp_u_Calculates_a_normalized_levensh; static PyObject *__pyx_kp_u_Calculates_the_minimum_number_o; static PyObject *__pyx_n_s_cline_in_traceback; @@ -1293,7 +1293,7 @@ static PyObject *__pyx_n_s_max_2; static PyObject *__pyx_n_s_name; static PyObject *__pyx_n_s_normalized_hamming; static PyObject *__pyx_n_s_normalized_levenshtein; -static PyObject *__pyx_kp_u_normalized_levenshtein_line_179; +static PyObject *__pyx_kp_u_normalized_levenshtein_line_196; static PyObject *__pyx_n_s_processor; static PyObject *__pyx_n_s_rapidfuzz_utils; static PyObject *__pyx_n_s_s1; @@ -1387,7 +1387,7 @@ static PyObject *__pyx_f_17cpp_string_metric_dummy(void) { /* Python wrapper */ static PyObject *__pyx_pw_17cpp_string_metric_1levenshtein(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_17cpp_string_metric_levenshtein[] = "\n Calculates the minimum number of insertions, deletions, and substitutions\n required to change one sequence into the other according to Levenshtein with custom\n costs for insertion, deletion and substitution\n\n Parameters\n ----------\n s1 : str\n First string to compare\n s2 : str\n Second string to compare\n weights : Tuple[int, int, int] or None, optional\n The weights for the three operations in the form\n (insertion, deletion, substitution). Default is (1, 1, 1),\n which gives all three operations a weight of 1.\n max : int or None, optional\n Maximum Levenshtein distance between s1 and s2, that is\n considered as a result. If the distance is bigger than max,\n -1 is returned instead. Default is None, which deactivates\n this behaviour.\n\n Returns\n -------\n distance : int\n levenshtein distance between s1 and s2\n\n Notes\n -----\n Depending on the input parameters different optimized implementation are used\n to improve the performance.\n\n Insertion = Deletion = Substitution:\n This is known as uniform Levenshtein distance and is the distance most commonly\n referred to as Levenshtein distance. The following implementation is used\n with a worst-case performance of ``O([N/64]M)``.\n\n - if max is 0 the similarity can be calculated using a direct comparision,\n since no difference between the strings is allowed. The time complexity of\n this algorithm is ``O(N)``.\n\n - A common prefix/suffix of the two compared strings does not affect\n the Levenshtein distance, so the affix is removed before calculating the\n similarity.\n\n - If max is \342\211\244 3 the mbleven algorithm is used. This algorithm\n checks all possible edit operations that are possible under\n the threshold `max`. The time complexity of this algorithm is ``O(N)``.\n\n - If the length of th""e shorter string is \342\211\244 64 after removing the common affix\n Hyyr\303\266s' algorithm is used, which calculates the Levenshtein distance in\n parallel. The algorithm is described by [1]_. The time complexity of this\n algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\245 64 after removing the common affix\n a blockwise implementation of Myers' algorithm is used, which calculates\n the Levenshtein distance in parallel (64 characters at a time).\n The algorithm is described by [3]_. The time complexity of this\n algorithm is ``O([N/64]M)``.\n\n\n Insertion = Deletion, Substitution >= Insertion + Deletion:\n Since every Substitution can be performed as Insertion + Deletion, this variant\n of the Levenshtein distance only uses Insertions and Deletions. Therefore this\n variant is often referred to as InDel-Distance. The following implementation\n is used with a worst-case performance of ``O([N/64]M)``.\n\n - if max is 0 the similarity can be calculated using a direct comparision,\n since no difference between the strings is allowed. The time complexity of\n this algorithm is ``O(N)``.\n\n - if max is 1 and the two strings have a similar length, the similarity can be\n calculated using a direct comparision aswell, since a substitution would cause\n a edit distance higher than max. The time complexity of this algorithm\n is ``O(N)``.\n\n - A common prefix/suffix of the two compared strings does not affect\n the Levenshtein distance, so the affix is removed before calculating the\n similarity.\n\n - If max is \342\211\244 4 the mbleven algorithm is used. This algorithm\n checks all possible edit operations that are possible under\n the threshold `max`. As a difference to the normal Levenshtein distance this\n algorithm can even be used up to a threshold of 4 here, since t""he higher weight\n of substitutions decreases the amount of possible edit operations.\n The time complexity of this algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\244 64 after removing the common affix\n the BitPAl algorithm is used, which calculates the Levenshtein distance in\n parallel. The algorithm is described by [4]_ and is extended with support\n for UTF32 in this implementation. The time complexity of this\n algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\245 64 after removing the common affix\n a blockwise implementation of the BitPAl algorithm is used, which calculates\n the Levenshtein distance in parallel (64 characters at a time).\n The algorithm is described by [4]_. The time complexity of this\n algorithm is ``O([N/64]M)``.\n\n Other weights:\n The implementation for other weights is based on Wagner-Fischer.\n It has a performance of ``O(N * M)`` and has a memory usage of ``O(N)``.\n Further details can be found in [2]_.\n\n\n References\n ----------\n .. [1] Hyyr\303\266, Heikki. \"A Bit-Vector Algorithm for Computing\n Levenshtein and Damerau Edit Distances.\"\n Nordic Journal of Computing, Volume 10 (2003): 29-39.\n .. [2] Wagner, Robert & Fischer, Michael\n \"The String-to-String Correction Problem.\"\n J. ACM. 21. (1974): 168-173\n .. [3] Myers, Gene. \"A fast bit-vector algorithm for approximate\n string matching based on dynamic programming.\"\n Journal of the ACM (JACM) 46.3 (1999): 395-415.\n .. [4] Loving, Joshua & Hern\303\241ndez, Y\303\266zen & Benson, Gary.\n \"BitPAl: A Bit-Parallel, General Integer-Scoring Sequence\n Alignment Algorithm. Bioinformatics\"\n Bioinformatics, Volume 30 (2014): 3166\342\200\2233173\n\n Examples\n --------\n Find the Levenshtein distance ""between two strings:\n\n >>> from rapidfuzz.string_metric import levenshtein\n >>> levenshtein(\"lewenstein\", \"levenshtein\")\n 2\n\n Setting a maximum distance allows the implementation to select\n a more efficient implementation:\n\n >>> levenshtein(\"lewenstein\", \"levenshtein\", max=1)\n -1\n\n It is possible to select different weights by passing a `weight`\n tuple.\n\n >>> levenshtein(\"lewenstein\", \"levenshtein\", weights=(1,1,2))\n 3\n "; +static char __pyx_doc_17cpp_string_metric_levenshtein[] = "\n Calculates the minimum number of insertions, deletions, and substitutions\n required to change one sequence into the other according to Levenshtein with custom\n costs for insertion, deletion and substitution\n\n Parameters\n ----------\n s1 : str\n First string to compare\n s2 : str\n Second string to compare\n weights : Tuple[int, int, int] or None, optional\n The weights for the three operations in the form\n (insertion, deletion, substitution). Default is (1, 1, 1),\n which gives all three operations a weight of 1.\n max : int or None, optional\n Maximum Levenshtein distance between s1 and s2, that is\n considered as a result. If the distance is bigger than max,\n -1 is returned instead. Default is None, which deactivates\n this behaviour.\n\n Returns\n -------\n distance : int\n levenshtein distance between s1 and s2\n\n Notes\n -----\n Depending on the input parameters different optimized implementation are used\n to improve the performance.\n\n Insertion = Deletion = Substitution:\n This is known as uniform Levenshtein distance and is the distance most commonly\n referred to as Levenshtein distance. The following implementation is used\n with a worst-case performance of ``O([N/64]M)``.\n\n - if max is 0 the similarity can be calculated using a direct comparision,\n since no difference between the strings is allowed. The time complexity of\n this algorithm is ``O(N)``.\n\n - A common prefix/suffix of the two compared strings does not affect\n the Levenshtein distance, so the affix is removed before calculating the\n similarity.\n\n - If max is \342\211\244 3 the mbleven algorithm is used. This algorithm\n checks all possible edit operations that are possible under\n the threshold `max`. The time complexity of this algorithm is ``O(N)``.\n\n - If the length of th""e shorter string is \342\211\244 64 after removing the common affix\n Hyyr\303\266s' algorithm is used, which calculates the Levenshtein distance in\n parallel. The algorithm is described by [1]_. The time complexity of this\n algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\245 64 after removing the common affix\n a blockwise implementation of Myers' algorithm is used, which calculates\n the Levenshtein distance in parallel (64 characters at a time).\n The algorithm is described by [3]_. The time complexity of this\n algorithm is ``O([N/64]M)``.\n\n The following image shows a benchmark of the Levenshtein distance in multiple\n Python libraries. All of them are implemented either in C/C++ or Cython.\n The graph shows, that python-Levenshtein is the only library with a time\n complexity of ``O(NM)``, while all other libraries have a time complexity of\n ``O([N/64]M)``. Especially for long strings RapidFuzz is a lot faster than\n all the other tested libraries.\n\n .. image:: img/uniform_levenshtein.svg\n\n\n Insertion = Deletion, Substitution >= Insertion + Deletion:\n Since every Substitution can be performed as Insertion + Deletion, this variant\n of the Levenshtein distance only uses Insertions and Deletions. Therefore this\n variant is often referred to as InDel-Distance. The following implementation\n is used with a worst-case performance of ``O([N/64]M)``.\n\n - if max is 0 the similarity can be calculated using a direct comparision,\n since no difference between the strings is allowed. The time complexity of\n this algorithm is ``O(N)``.\n\n - if max is 1 and the two strings have a similar length, the similarity can be\n calculated using a direct comparision aswell, since a substitution would cause\n a edit distance higher than max. The time complexity of this algorithm\n is ``O(N)``.\n""\n - A common prefix/suffix of the two compared strings does not affect\n the Levenshtein distance, so the affix is removed before calculating the\n similarity.\n\n - If max is \342\211\244 4 the mbleven algorithm is used. This algorithm\n checks all possible edit operations that are possible under\n the threshold `max`. As a difference to the normal Levenshtein distance this\n algorithm can even be used up to a threshold of 4 here, since the higher weight\n of substitutions decreases the amount of possible edit operations.\n The time complexity of this algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\244 64 after removing the common affix\n the BitPAl algorithm is used, which calculates the Levenshtein distance in\n parallel. The algorithm is described by [4]_ and is extended with support\n for UTF32 in this implementation. The time complexity of this\n algorithm is ``O(N)``.\n\n - If the length of the shorter string is \342\211\245 64 after removing the common affix\n a blockwise implementation of the BitPAl algorithm is used, which calculates\n the Levenshtein distance in parallel (64 characters at a time).\n The algorithm is described by [4]_. The time complexity of this\n algorithm is ``O([N/64]M)``.\n\n The following image shows a benchmark of the InDel distance in RapidFuzz\n and python-Levenshtein. Similar to the normal Levenshtein distance\n python-Levenshtein uses a implementation with a time complexity of ``O(NM)``,\n while RapidFuzz has a time complexity of ``O([N/64]M)``.\n\n .. image:: img/indel_levenshtein.svg\n\n\n Other weights:\n The implementation for other weights is based on Wagner-Fischer.\n It has a performance of ``O(N * M)`` and has a memory usage of ``O(N)``.\n Further details can be found in [2]_.\n\n\n References\n ----------\n .. [1] Hyyr\303""\266, Heikki. \"A Bit-Vector Algorithm for Computing\n Levenshtein and Damerau Edit Distances.\"\n Nordic Journal of Computing, Volume 10 (2003): 29-39.\n .. [2] Wagner, Robert & Fischer, Michael\n \"The String-to-String Correction Problem.\"\n J. ACM. 21. (1974): 168-173\n .. [3] Myers, Gene. \"A fast bit-vector algorithm for approximate\n string matching based on dynamic programming.\"\n Journal of the ACM (JACM) 46.3 (1999): 395-415.\n .. [4] Loving, Joshua & Hern\303\241ndez, Y\303\266zen & Benson, Gary.\n \"BitPAl: A Bit-Parallel, General Integer-Scoring Sequence\n Alignment Algorithm. Bioinformatics\"\n Bioinformatics, Volume 30 (2014): 3166\342\200\2233173\n\n Examples\n --------\n Find the Levenshtein distance between two strings:\n\n >>> from rapidfuzz.string_metric import levenshtein\n >>> levenshtein(\"lewenstein\", \"levenshtein\")\n 2\n\n Setting a maximum distance allows the implementation to select\n a more efficient implementation:\n\n >>> levenshtein(\"lewenstein\", \"levenshtein\", max=1)\n -1\n\n It is possible to select different weights by passing a `weight`\n tuple.\n\n >>> levenshtein(\"lewenstein\", \"levenshtein\", weights=(1,1,2))\n 3\n "; static PyMethodDef __pyx_mdef_17cpp_string_metric_1levenshtein = {"levenshtein", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_17cpp_string_metric_1levenshtein, METH_VARARGS|METH_KEYWORDS, __pyx_doc_17cpp_string_metric_levenshtein}; static PyObject *__pyx_pw_17cpp_string_metric_1levenshtein(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_s1 = 0; @@ -1501,7 +1501,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject int __pyx_clineno = 0; __Pyx_RefNannySetupContext("levenshtein", 0); - /* "cpp_string_metric.pyx":165 + /* "cpp_string_metric.pyx":182 * 3 * """ * cdef size_t insertion = 1 # <<<<<<<<<<<<<< @@ -1510,7 +1510,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject */ __pyx_v_insertion = 1; - /* "cpp_string_metric.pyx":166 + /* "cpp_string_metric.pyx":183 * """ * cdef size_t insertion = 1 * cdef size_t deletion = 1 # <<<<<<<<<<<<<< @@ -1519,7 +1519,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject */ __pyx_v_deletion = 1; - /* "cpp_string_metric.pyx":167 + /* "cpp_string_metric.pyx":184 * cdef size_t insertion = 1 * cdef size_t deletion = 1 * cdef size_t substitution = 1 # <<<<<<<<<<<<<< @@ -1528,7 +1528,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject */ __pyx_v_substitution = 1; - /* "cpp_string_metric.pyx":168 + /* "cpp_string_metric.pyx":185 * cdef size_t deletion = 1 * cdef size_t substitution = 1 * cdef size_t max_ = -1 # <<<<<<<<<<<<<< @@ -1537,17 +1537,17 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject */ __pyx_v_max_ = -1L; - /* "cpp_string_metric.pyx":170 + /* "cpp_string_metric.pyx":187 * cdef size_t max_ = -1 * * if weights: # <<<<<<<<<<<<<< * insertion, deletion, substitution = weights * */ - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_weights); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 170, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_weights); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 187, __pyx_L1_error) if (__pyx_t_1) { - /* "cpp_string_metric.pyx":171 + /* "cpp_string_metric.pyx":188 * * if weights: * insertion, deletion, substitution = weights # <<<<<<<<<<<<<< @@ -1560,7 +1560,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject if (unlikely(size != 3)) { if (size > 3) __Pyx_RaiseTooManyValuesError(3); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - __PYX_ERR(0, 171, __pyx_L1_error) + __PYX_ERR(0, 188, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS if (likely(PyTuple_CheckExact(sequence))) { @@ -1576,16 +1576,16 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject __Pyx_INCREF(__pyx_t_3); __Pyx_INCREF(__pyx_t_4); #else - __pyx_t_2 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 171, __pyx_L1_error) + __pyx_t_2 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 188, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 171, __pyx_L1_error) + __pyx_t_3 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 188, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - __pyx_t_4 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 171, __pyx_L1_error) + __pyx_t_4 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 188, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); #endif } else { Py_ssize_t index = -1; - __pyx_t_5 = PyObject_GetIter(__pyx_v_weights); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 171, __pyx_L1_error) + __pyx_t_5 = PyObject_GetIter(__pyx_v_weights); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 188, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_6 = Py_TYPE(__pyx_t_5)->tp_iternext; index = 0; __pyx_t_2 = __pyx_t_6(__pyx_t_5); if (unlikely(!__pyx_t_2)) goto __pyx_L4_unpacking_failed; @@ -1594,7 +1594,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject __Pyx_GOTREF(__pyx_t_3); index = 2; __pyx_t_4 = __pyx_t_6(__pyx_t_5); if (unlikely(!__pyx_t_4)) goto __pyx_L4_unpacking_failed; __Pyx_GOTREF(__pyx_t_4); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_6(__pyx_t_5), 3) < 0) __PYX_ERR(0, 171, __pyx_L1_error) + if (__Pyx_IternextUnpackEndCheck(__pyx_t_6(__pyx_t_5), 3) < 0) __PYX_ERR(0, 188, __pyx_L1_error) __pyx_t_6 = NULL; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; goto __pyx_L5_unpacking_done; @@ -1602,20 +1602,20 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_6 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - __PYX_ERR(0, 171, __pyx_L1_error) + __PYX_ERR(0, 188, __pyx_L1_error) __pyx_L5_unpacking_done:; } - __pyx_t_7 = __Pyx_PyInt_As_size_t(__pyx_t_2); if (unlikely((__pyx_t_7 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 171, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyInt_As_size_t(__pyx_t_2); if (unlikely((__pyx_t_7 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 188, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_8 = __Pyx_PyInt_As_size_t(__pyx_t_3); if (unlikely((__pyx_t_8 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 171, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyInt_As_size_t(__pyx_t_3); if (unlikely((__pyx_t_8 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 188, __pyx_L1_error) __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_9 = __Pyx_PyInt_As_size_t(__pyx_t_4); if (unlikely((__pyx_t_9 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 171, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_size_t(__pyx_t_4); if (unlikely((__pyx_t_9 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 188, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_v_insertion = __pyx_t_7; __pyx_v_deletion = __pyx_t_8; __pyx_v_substitution = __pyx_t_9; - /* "cpp_string_metric.pyx":170 + /* "cpp_string_metric.pyx":187 * cdef size_t max_ = -1 * * if weights: # <<<<<<<<<<<<<< @@ -1624,7 +1624,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject */ } - /* "cpp_string_metric.pyx":173 + /* "cpp_string_metric.pyx":190 * insertion, deletion, substitution = weights * * if max is not None: # <<<<<<<<<<<<<< @@ -1635,17 +1635,17 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject __pyx_t_10 = (__pyx_t_1 != 0); if (__pyx_t_10) { - /* "cpp_string_metric.pyx":174 + /* "cpp_string_metric.pyx":191 * * if max is not None: * max_ = max # <<<<<<<<<<<<<< * * return levenshtein_impl(s1, s2, insertion, deletion, substitution, max_) */ - __pyx_t_9 = __Pyx_PyInt_As_size_t(__pyx_v_max); if (unlikely((__pyx_t_9 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 174, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_size_t(__pyx_v_max); if (unlikely((__pyx_t_9 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 191, __pyx_L1_error) __pyx_v_max_ = __pyx_t_9; - /* "cpp_string_metric.pyx":173 + /* "cpp_string_metric.pyx":190 * insertion, deletion, substitution = weights * * if max is not None: # <<<<<<<<<<<<<< @@ -1654,7 +1654,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject */ } - /* "cpp_string_metric.pyx":176 + /* "cpp_string_metric.pyx":193 * max_ = max * * return levenshtein_impl(s1, s2, insertion, deletion, substitution, max_) # <<<<<<<<<<<<<< @@ -1664,11 +1664,11 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject __Pyx_XDECREF(__pyx_r); try { __pyx_t_4 = levenshtein_impl(__pyx_v_s1, __pyx_v_s2, __pyx_v_insertion, __pyx_v_deletion, __pyx_v_substitution, __pyx_v_max_); - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 176, __pyx_L1_error) - if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 176, __pyx_L1_error) + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 193, __pyx_L1_error) + if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 193, __pyx_L1_error) } catch(...) { __Pyx_CppExn2PyErr(); - __PYX_ERR(0, 176, __pyx_L1_error) + __PYX_ERR(0, 193, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_4); __pyx_r = __pyx_t_4; @@ -1697,7 +1697,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_levenshtein(CYTHON_UNUSED PyObject return __pyx_r; } -/* "cpp_string_metric.pyx":179 +/* "cpp_string_metric.pyx":196 * * * def normalized_levenshtein(s1, s2, weights=(1,1,1), processor=None, double score_cutoff=0.0): # <<<<<<<<<<<<<< @@ -1752,7 +1752,7 @@ static PyObject *__pyx_pw_17cpp_string_metric_3normalized_levenshtein(PyObject * case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_s2)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("normalized_levenshtein", 0, 2, 5, 1); __PYX_ERR(0, 179, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("normalized_levenshtein", 0, 2, 5, 1); __PYX_ERR(0, 196, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: @@ -1774,7 +1774,7 @@ static PyObject *__pyx_pw_17cpp_string_metric_3normalized_levenshtein(PyObject * } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "normalized_levenshtein") < 0)) __PYX_ERR(0, 179, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "normalized_levenshtein") < 0)) __PYX_ERR(0, 196, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -1795,14 +1795,14 @@ static PyObject *__pyx_pw_17cpp_string_metric_3normalized_levenshtein(PyObject * __pyx_v_weights = values[2]; __pyx_v_processor = values[3]; if (values[4]) { - __pyx_v_score_cutoff = __pyx_PyFloat_AsDouble(values[4]); if (unlikely((__pyx_v_score_cutoff == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 179, __pyx_L3_error) + __pyx_v_score_cutoff = __pyx_PyFloat_AsDouble(values[4]); if (unlikely((__pyx_v_score_cutoff == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 196, __pyx_L3_error) } else { __pyx_v_score_cutoff = ((double)((double)0.0)); } } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("normalized_levenshtein", 0, 2, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 179, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("normalized_levenshtein", 0, 2, 5, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 196, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("cpp_string_metric.normalized_levenshtein", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -1840,7 +1840,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU __Pyx_INCREF(__pyx_v_s1); __Pyx_INCREF(__pyx_v_s2); - /* "cpp_string_metric.pyx":265 + /* "cpp_string_metric.pyx":282 * 81.81818181818181 * """ * cdef size_t insertion = 1 # <<<<<<<<<<<<<< @@ -1849,7 +1849,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU */ __pyx_v_insertion = 1; - /* "cpp_string_metric.pyx":266 + /* "cpp_string_metric.pyx":283 * """ * cdef size_t insertion = 1 * cdef size_t deletion = 1 # <<<<<<<<<<<<<< @@ -1858,7 +1858,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU */ __pyx_v_deletion = 1; - /* "cpp_string_metric.pyx":267 + /* "cpp_string_metric.pyx":284 * cdef size_t insertion = 1 * cdef size_t deletion = 1 * cdef size_t substitution = 1 # <<<<<<<<<<<<<< @@ -1867,7 +1867,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU */ __pyx_v_substitution = 1; - /* "cpp_string_metric.pyx":269 + /* "cpp_string_metric.pyx":286 * cdef size_t substitution = 1 * * if s1 is None or s2 is None: # <<<<<<<<<<<<<< @@ -1887,7 +1887,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU __pyx_L4_bool_binop_done:; if (__pyx_t_1) { - /* "cpp_string_metric.pyx":270 + /* "cpp_string_metric.pyx":287 * * if s1 is None or s2 is None: * return 0 # <<<<<<<<<<<<<< @@ -1899,7 +1899,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "cpp_string_metric.pyx":269 + /* "cpp_string_metric.pyx":286 * cdef size_t substitution = 1 * * if s1 is None or s2 is None: # <<<<<<<<<<<<<< @@ -1908,17 +1908,17 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU */ } - /* "cpp_string_metric.pyx":272 + /* "cpp_string_metric.pyx":289 * return 0 * * if weights: # <<<<<<<<<<<<<< * insertion, deletion, substitution = weights * */ - __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_weights); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 272, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_IsTrue(__pyx_v_weights); if (unlikely(__pyx_t_1 < 0)) __PYX_ERR(0, 289, __pyx_L1_error) if (__pyx_t_1) { - /* "cpp_string_metric.pyx":273 + /* "cpp_string_metric.pyx":290 * * if weights: * insertion, deletion, substitution = weights # <<<<<<<<<<<<<< @@ -1931,7 +1931,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU if (unlikely(size != 3)) { if (size > 3) __Pyx_RaiseTooManyValuesError(3); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - __PYX_ERR(0, 273, __pyx_L1_error) + __PYX_ERR(0, 290, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS if (likely(PyTuple_CheckExact(sequence))) { @@ -1947,16 +1947,16 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU __Pyx_INCREF(__pyx_t_5); __Pyx_INCREF(__pyx_t_6); #else - __pyx_t_4 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 273, __pyx_L1_error) + __pyx_t_4 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 273, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 273, __pyx_L1_error) + __pyx_t_6 = PySequence_ITEM(sequence, 2); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); #endif } else { Py_ssize_t index = -1; - __pyx_t_7 = PyObject_GetIter(__pyx_v_weights); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 273, __pyx_L1_error) + __pyx_t_7 = PyObject_GetIter(__pyx_v_weights); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); __pyx_t_8 = Py_TYPE(__pyx_t_7)->tp_iternext; index = 0; __pyx_t_4 = __pyx_t_8(__pyx_t_7); if (unlikely(!__pyx_t_4)) goto __pyx_L7_unpacking_failed; @@ -1965,7 +1965,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU __Pyx_GOTREF(__pyx_t_5); index = 2; __pyx_t_6 = __pyx_t_8(__pyx_t_7); if (unlikely(!__pyx_t_6)) goto __pyx_L7_unpacking_failed; __Pyx_GOTREF(__pyx_t_6); - if (__Pyx_IternextUnpackEndCheck(__pyx_t_8(__pyx_t_7), 3) < 0) __PYX_ERR(0, 273, __pyx_L1_error) + if (__Pyx_IternextUnpackEndCheck(__pyx_t_8(__pyx_t_7), 3) < 0) __PYX_ERR(0, 290, __pyx_L1_error) __pyx_t_8 = NULL; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; goto __pyx_L8_unpacking_done; @@ -1973,20 +1973,20 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; __pyx_t_8 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - __PYX_ERR(0, 273, __pyx_L1_error) + __PYX_ERR(0, 290, __pyx_L1_error) __pyx_L8_unpacking_done:; } - __pyx_t_9 = __Pyx_PyInt_As_size_t(__pyx_t_4); if (unlikely((__pyx_t_9 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 273, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyInt_As_size_t(__pyx_t_4); if (unlikely((__pyx_t_9 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_10 = __Pyx_PyInt_As_size_t(__pyx_t_5); if (unlikely((__pyx_t_10 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 273, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyInt_As_size_t(__pyx_t_5); if (unlikely((__pyx_t_10 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_11 = __Pyx_PyInt_As_size_t(__pyx_t_6); if (unlikely((__pyx_t_11 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 273, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyInt_As_size_t(__pyx_t_6); if (unlikely((__pyx_t_11 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_insertion = __pyx_t_9; __pyx_v_deletion = __pyx_t_10; __pyx_v_substitution = __pyx_t_11; - /* "cpp_string_metric.pyx":272 + /* "cpp_string_metric.pyx":289 * return 0 * * if weights: # <<<<<<<<<<<<<< @@ -1995,7 +1995,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU */ } - /* "cpp_string_metric.pyx":275 + /* "cpp_string_metric.pyx":292 * insertion, deletion, substitution = weights * * if processor is True or processor == default_process: # <<<<<<<<<<<<<< @@ -2009,17 +2009,17 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU __pyx_t_1 = __pyx_t_3; goto __pyx_L10_bool_binop_done; } - __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_default_process); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 275, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_default_process); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 292, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); - __pyx_t_5 = PyObject_RichCompare(__pyx_v_processor, __pyx_t_6, Py_EQ); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 275, __pyx_L1_error) + __pyx_t_5 = PyObject_RichCompare(__pyx_v_processor, __pyx_t_6, Py_EQ); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 292, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 275, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 292, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_1 = __pyx_t_3; __pyx_L10_bool_binop_done:; if (__pyx_t_1) { - /* "cpp_string_metric.pyx":276 + /* "cpp_string_metric.pyx":293 * * if processor is True or processor == default_process: * return normalized_levenshtein_impl_default_process( # <<<<<<<<<<<<<< @@ -2028,7 +2028,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU */ __Pyx_XDECREF(__pyx_r); - /* "cpp_string_metric.pyx":277 + /* "cpp_string_metric.pyx":294 * if processor is True or processor == default_process: * return normalized_levenshtein_impl_default_process( * s1, s2, insertion, deletion, substitution, score_cutoff) # <<<<<<<<<<<<<< @@ -2037,26 +2037,26 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU */ try { __pyx_t_12 = normalized_levenshtein_impl_default_process(__pyx_v_s1, __pyx_v_s2, __pyx_v_insertion, __pyx_v_deletion, __pyx_v_substitution, __pyx_v_score_cutoff); - if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 276, __pyx_L1_error) + if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 293, __pyx_L1_error) } catch(...) { __Pyx_CppExn2PyErr(); - __PYX_ERR(0, 276, __pyx_L1_error) + __PYX_ERR(0, 293, __pyx_L1_error) } - /* "cpp_string_metric.pyx":276 + /* "cpp_string_metric.pyx":293 * * if processor is True or processor == default_process: * return normalized_levenshtein_impl_default_process( # <<<<<<<<<<<<<< * s1, s2, insertion, deletion, substitution, score_cutoff) * elif callable(processor): */ - __pyx_t_5 = PyFloat_FromDouble(__pyx_t_12); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 276, __pyx_L1_error) + __pyx_t_5 = PyFloat_FromDouble(__pyx_t_12); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 293, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_r = __pyx_t_5; __pyx_t_5 = 0; goto __pyx_L0; - /* "cpp_string_metric.pyx":275 + /* "cpp_string_metric.pyx":292 * insertion, deletion, substitution = weights * * if processor is True or processor == default_process: # <<<<<<<<<<<<<< @@ -2065,18 +2065,18 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU */ } - /* "cpp_string_metric.pyx":278 + /* "cpp_string_metric.pyx":295 * return normalized_levenshtein_impl_default_process( * s1, s2, insertion, deletion, substitution, score_cutoff) * elif callable(processor): # <<<<<<<<<<<<<< * s1 = processor(s1) * s2 = processor(s2) */ - __pyx_t_1 = __Pyx_PyCallable_Check(__pyx_v_processor); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(0, 278, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyCallable_Check(__pyx_v_processor); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(0, 295, __pyx_L1_error) __pyx_t_3 = (__pyx_t_1 != 0); if (__pyx_t_3) { - /* "cpp_string_metric.pyx":279 + /* "cpp_string_metric.pyx":296 * s1, s2, insertion, deletion, substitution, score_cutoff) * elif callable(processor): * s1 = processor(s1) # <<<<<<<<<<<<<< @@ -2096,13 +2096,13 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU } __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_4, __pyx_v_s1) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_s1); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 279, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 296, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF_SET(__pyx_v_s1, __pyx_t_5); __pyx_t_5 = 0; - /* "cpp_string_metric.pyx":280 + /* "cpp_string_metric.pyx":297 * elif callable(processor): * s1 = processor(s1) * s2 = processor(s2) # <<<<<<<<<<<<<< @@ -2122,13 +2122,13 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU } __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_4, __pyx_v_s2) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_s2); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 280, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 297, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF_SET(__pyx_v_s2, __pyx_t_5); __pyx_t_5 = 0; - /* "cpp_string_metric.pyx":278 + /* "cpp_string_metric.pyx":295 * return normalized_levenshtein_impl_default_process( * s1, s2, insertion, deletion, substitution, score_cutoff) * elif callable(processor): # <<<<<<<<<<<<<< @@ -2137,7 +2137,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU */ } - /* "cpp_string_metric.pyx":282 + /* "cpp_string_metric.pyx":299 * s2 = processor(s2) * * return normalized_levenshtein_impl(s1, s2, insertion, deletion, substitution, score_cutoff) # <<<<<<<<<<<<<< @@ -2147,18 +2147,18 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU __Pyx_XDECREF(__pyx_r); try { __pyx_t_12 = normalized_levenshtein_impl(__pyx_v_s1, __pyx_v_s2, __pyx_v_insertion, __pyx_v_deletion, __pyx_v_substitution, __pyx_v_score_cutoff); - if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 282, __pyx_L1_error) + if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 299, __pyx_L1_error) } catch(...) { __Pyx_CppExn2PyErr(); - __PYX_ERR(0, 282, __pyx_L1_error) + __PYX_ERR(0, 299, __pyx_L1_error) } - __pyx_t_5 = PyFloat_FromDouble(__pyx_t_12); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 282, __pyx_L1_error) + __pyx_t_5 = PyFloat_FromDouble(__pyx_t_12); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 299, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_r = __pyx_t_5; __pyx_t_5 = 0; goto __pyx_L0; - /* "cpp_string_metric.pyx":179 + /* "cpp_string_metric.pyx":196 * * * def normalized_levenshtein(s1, s2, weights=(1,1,1), processor=None, double score_cutoff=0.0): # <<<<<<<<<<<<<< @@ -2182,7 +2182,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU return __pyx_r; } -/* "cpp_string_metric.pyx":285 +/* "cpp_string_metric.pyx":302 * * * def hamming(s1, s2, max=None): # <<<<<<<<<<<<<< @@ -2192,7 +2192,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_2normalized_levenshtein(CYTHON_UNU /* Python wrapper */ static PyObject *__pyx_pw_17cpp_string_metric_5hamming(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_17cpp_string_metric_4hamming[] = "\n Calculates the Hamming distance between two strings.\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n max : int or None, optional\n Maximum Hamming distance between s1 and s2, that is\n considered as a result. If the distance is bigger than max,\n -1 is returned instead. Default is None, which deactivates\n this behaviour.\n\n Returns\n -------\n distance : int\n Hamming distance between s1 and s2\n "; +static char __pyx_doc_17cpp_string_metric_4hamming[] = "\n Calculates the Hamming distance between two strings.\n The hamming distance is defined as the number of positions \n where the two strings differ. It describes the minimum\n amount of substitutions required to transform s1 into s2.\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n max : int or None, optional\n Maximum Hamming distance between s1 and s2, that is\n considered as a result. If the distance is bigger than max,\n -1 is returned instead. Default is None, which deactivates\n this behaviour.\n\n Returns\n -------\n distance : int\n Hamming distance between s1 and s2\n\n Raises\n ------\n ValueError\n If s1 and s2 have a different length\n "; static PyMethodDef __pyx_mdef_17cpp_string_metric_5hamming = {"hamming", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_17cpp_string_metric_5hamming, METH_VARARGS|METH_KEYWORDS, __pyx_doc_17cpp_string_metric_4hamming}; static PyObject *__pyx_pw_17cpp_string_metric_5hamming(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_s1 = 0; @@ -2230,7 +2230,7 @@ static PyObject *__pyx_pw_17cpp_string_metric_5hamming(PyObject *__pyx_self, PyO case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_s2)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("hamming", 0, 2, 3, 1); __PYX_ERR(0, 285, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("hamming", 0, 2, 3, 1); __PYX_ERR(0, 302, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: @@ -2240,7 +2240,7 @@ static PyObject *__pyx_pw_17cpp_string_metric_5hamming(PyObject *__pyx_self, PyO } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "hamming") < 0)) __PYX_ERR(0, 285, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "hamming") < 0)) __PYX_ERR(0, 302, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -2258,7 +2258,7 @@ static PyObject *__pyx_pw_17cpp_string_metric_5hamming(PyObject *__pyx_self, PyO } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("hamming", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 285, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("hamming", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 302, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("cpp_string_metric.hamming", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -2284,8 +2284,8 @@ static PyObject *__pyx_pf_17cpp_string_metric_4hamming(CYTHON_UNUSED PyObject *_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("hamming", 0); - /* "cpp_string_metric.pyx":306 - * Hamming distance between s1 and s2 + /* "cpp_string_metric.pyx":331 + * If s1 and s2 have a different length * """ * cdef size_t max_ = -1 # <<<<<<<<<<<<<< * @@ -2293,7 +2293,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_4hamming(CYTHON_UNUSED PyObject *_ */ __pyx_v_max_ = -1L; - /* "cpp_string_metric.pyx":308 + /* "cpp_string_metric.pyx":333 * cdef size_t max_ = -1 * * if max is not None: # <<<<<<<<<<<<<< @@ -2304,17 +2304,17 @@ static PyObject *__pyx_pf_17cpp_string_metric_4hamming(CYTHON_UNUSED PyObject *_ __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "cpp_string_metric.pyx":309 + /* "cpp_string_metric.pyx":334 * * if max is not None: * max_ = max # <<<<<<<<<<<<<< * * return hamming_impl(s1, s2, max_) */ - __pyx_t_3 = __Pyx_PyInt_As_size_t(__pyx_v_max); if (unlikely((__pyx_t_3 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyInt_As_size_t(__pyx_v_max); if (unlikely((__pyx_t_3 == (size_t)-1) && PyErr_Occurred())) __PYX_ERR(0, 334, __pyx_L1_error) __pyx_v_max_ = __pyx_t_3; - /* "cpp_string_metric.pyx":308 + /* "cpp_string_metric.pyx":333 * cdef size_t max_ = -1 * * if max is not None: # <<<<<<<<<<<<<< @@ -2323,7 +2323,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_4hamming(CYTHON_UNUSED PyObject *_ */ } - /* "cpp_string_metric.pyx":311 + /* "cpp_string_metric.pyx":336 * max_ = max * * return hamming_impl(s1, s2, max_) # <<<<<<<<<<<<<< @@ -2333,18 +2333,18 @@ static PyObject *__pyx_pf_17cpp_string_metric_4hamming(CYTHON_UNUSED PyObject *_ __Pyx_XDECREF(__pyx_r); try { __pyx_t_4 = hamming_impl(__pyx_v_s1, __pyx_v_s2, __pyx_v_max_); - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 311, __pyx_L1_error) - if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 311, __pyx_L1_error) + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 336, __pyx_L1_error) + if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 336, __pyx_L1_error) } catch(...) { __Pyx_CppExn2PyErr(); - __PYX_ERR(0, 311, __pyx_L1_error) + __PYX_ERR(0, 336, __pyx_L1_error) } __Pyx_GOTREF(__pyx_t_4); __pyx_r = __pyx_t_4; __pyx_t_4 = 0; goto __pyx_L0; - /* "cpp_string_metric.pyx":285 + /* "cpp_string_metric.pyx":302 * * * def hamming(s1, s2, max=None): # <<<<<<<<<<<<<< @@ -2363,7 +2363,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_4hamming(CYTHON_UNUSED PyObject *_ return __pyx_r; } -/* "cpp_string_metric.pyx":314 +/* "cpp_string_metric.pyx":339 * * * def normalized_hamming(s1, s2, processor=None, double score_cutoff=0.0): # <<<<<<<<<<<<<< @@ -2373,7 +2373,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_4hamming(CYTHON_UNUSED PyObject *_ /* Python wrapper */ static PyObject *__pyx_pw_17cpp_string_metric_7normalized_hamming(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static char __pyx_doc_17cpp_string_metric_6normalized_hamming[] = "\n Calculates a normalized hamming distance\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is None, which deactivates this behaviour.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n ratio : float\n Normalized hamming distance between s1 and s2\n as a float between 0 and 100\n "; +static char __pyx_doc_17cpp_string_metric_6normalized_hamming[] = "\n Calculates a normalized hamming distance\n\n Parameters\n ----------\n s1 : str\n First string to compare.\n s2 : str\n Second string to compare.\n processor: bool or callable, optional\n Optional callable that is used to preprocess the strings before\n comparing them. When processor is True ``utils.default_process``\n is used. Default is None, which deactivates this behaviour.\n score_cutoff : float, optional\n Optional argument for a score threshold as a float between 0 and 100.\n For ratio < score_cutoff 0 is returned instead. Default is 0,\n which deactivates this behaviour.\n\n Returns\n -------\n ratio : float\n Normalized hamming distance between s1 and s2\n as a float between 0 and 100\n\n Raises\n ------\n ValueError\n If s1 and s2 have a different length\n\n See Also\n --------\n hamming : Hamming distance\n "; static PyMethodDef __pyx_mdef_17cpp_string_metric_7normalized_hamming = {"normalized_hamming", (PyCFunction)(void*)(PyCFunctionWithKeywords)__pyx_pw_17cpp_string_metric_7normalized_hamming, METH_VARARGS|METH_KEYWORDS, __pyx_doc_17cpp_string_metric_6normalized_hamming}; static PyObject *__pyx_pw_17cpp_string_metric_7normalized_hamming(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_s1 = 0; @@ -2414,7 +2414,7 @@ static PyObject *__pyx_pw_17cpp_string_metric_7normalized_hamming(PyObject *__py case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_s2)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("normalized_hamming", 0, 2, 4, 1); __PYX_ERR(0, 314, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("normalized_hamming", 0, 2, 4, 1); __PYX_ERR(0, 339, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: @@ -2430,7 +2430,7 @@ static PyObject *__pyx_pw_17cpp_string_metric_7normalized_hamming(PyObject *__py } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "normalized_hamming") < 0)) __PYX_ERR(0, 314, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "normalized_hamming") < 0)) __PYX_ERR(0, 339, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -2448,14 +2448,14 @@ static PyObject *__pyx_pw_17cpp_string_metric_7normalized_hamming(PyObject *__py __pyx_v_s2 = values[1]; __pyx_v_processor = values[2]; if (values[3]) { - __pyx_v_score_cutoff = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_score_cutoff == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 314, __pyx_L3_error) + __pyx_v_score_cutoff = __pyx_PyFloat_AsDouble(values[3]); if (unlikely((__pyx_v_score_cutoff == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 339, __pyx_L3_error) } else { __pyx_v_score_cutoff = ((double)((double)0.0)); } } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("normalized_hamming", 0, 2, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 314, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("normalized_hamming", 0, 2, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 339, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("cpp_string_metric.normalized_hamming", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -2485,8 +2485,8 @@ static PyObject *__pyx_pf_17cpp_string_metric_6normalized_hamming(CYTHON_UNUSED __Pyx_INCREF(__pyx_v_s1); __Pyx_INCREF(__pyx_v_s2); - /* "cpp_string_metric.pyx":339 - * as a float between 0 and 100 + /* "cpp_string_metric.pyx":373 + * hamming : Hamming distance * """ * if s1 is None or s2 is None: # <<<<<<<<<<<<<< * return 0 @@ -2505,7 +2505,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_6normalized_hamming(CYTHON_UNUSED __pyx_L4_bool_binop_done:; if (__pyx_t_1) { - /* "cpp_string_metric.pyx":340 + /* "cpp_string_metric.pyx":374 * """ * if s1 is None or s2 is None: * return 0 # <<<<<<<<<<<<<< @@ -2517,8 +2517,8 @@ static PyObject *__pyx_pf_17cpp_string_metric_6normalized_hamming(CYTHON_UNUSED __pyx_r = __pyx_int_0; goto __pyx_L0; - /* "cpp_string_metric.pyx":339 - * as a float between 0 and 100 + /* "cpp_string_metric.pyx":373 + * hamming : Hamming distance * """ * if s1 is None or s2 is None: # <<<<<<<<<<<<<< * return 0 @@ -2526,7 +2526,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_6normalized_hamming(CYTHON_UNUSED */ } - /* "cpp_string_metric.pyx":342 + /* "cpp_string_metric.pyx":376 * return 0 * * if processor is True or processor == default_process: # <<<<<<<<<<<<<< @@ -2540,17 +2540,17 @@ static PyObject *__pyx_pf_17cpp_string_metric_6normalized_hamming(CYTHON_UNUSED __pyx_t_1 = __pyx_t_3; goto __pyx_L7_bool_binop_done; } - __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_default_process); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 342, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_default_process); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyObject_RichCompare(__pyx_v_processor, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 342, __pyx_L1_error) + __pyx_t_5 = PyObject_RichCompare(__pyx_v_processor, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 342, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_1 = __pyx_t_3; __pyx_L7_bool_binop_done:; if (__pyx_t_1) { - /* "cpp_string_metric.pyx":343 + /* "cpp_string_metric.pyx":377 * * if processor is True or processor == default_process: * return normalized_hamming_impl_default_process(s1, s2, score_cutoff) # <<<<<<<<<<<<<< @@ -2560,18 +2560,18 @@ static PyObject *__pyx_pf_17cpp_string_metric_6normalized_hamming(CYTHON_UNUSED __Pyx_XDECREF(__pyx_r); try { __pyx_t_6 = normalized_hamming_impl_default_process(__pyx_v_s1, __pyx_v_s2, __pyx_v_score_cutoff); - if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 343, __pyx_L1_error) + if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 377, __pyx_L1_error) } catch(...) { __Pyx_CppExn2PyErr(); - __PYX_ERR(0, 343, __pyx_L1_error) + __PYX_ERR(0, 377, __pyx_L1_error) } - __pyx_t_5 = PyFloat_FromDouble(__pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 343, __pyx_L1_error) + __pyx_t_5 = PyFloat_FromDouble(__pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 377, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_r = __pyx_t_5; __pyx_t_5 = 0; goto __pyx_L0; - /* "cpp_string_metric.pyx":342 + /* "cpp_string_metric.pyx":376 * return 0 * * if processor is True or processor == default_process: # <<<<<<<<<<<<<< @@ -2580,18 +2580,18 @@ static PyObject *__pyx_pf_17cpp_string_metric_6normalized_hamming(CYTHON_UNUSED */ } - /* "cpp_string_metric.pyx":344 + /* "cpp_string_metric.pyx":378 * if processor is True or processor == default_process: * return normalized_hamming_impl_default_process(s1, s2, score_cutoff) * elif callable(processor): # <<<<<<<<<<<<<< * s1 = processor(s1) * s2 = processor(s2) */ - __pyx_t_1 = __Pyx_PyCallable_Check(__pyx_v_processor); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(0, 344, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyCallable_Check(__pyx_v_processor); if (unlikely(__pyx_t_1 == ((int)-1))) __PYX_ERR(0, 378, __pyx_L1_error) __pyx_t_3 = (__pyx_t_1 != 0); if (__pyx_t_3) { - /* "cpp_string_metric.pyx":345 + /* "cpp_string_metric.pyx":379 * return normalized_hamming_impl_default_process(s1, s2, score_cutoff) * elif callable(processor): * s1 = processor(s1) # <<<<<<<<<<<<<< @@ -2611,13 +2611,13 @@ static PyObject *__pyx_pf_17cpp_string_metric_6normalized_hamming(CYTHON_UNUSED } __pyx_t_5 = (__pyx_t_7) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_7, __pyx_v_s1) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_v_s1); __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 345, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF_SET(__pyx_v_s1, __pyx_t_5); __pyx_t_5 = 0; - /* "cpp_string_metric.pyx":346 + /* "cpp_string_metric.pyx":380 * elif callable(processor): * s1 = processor(s1) * s2 = processor(s2) # <<<<<<<<<<<<<< @@ -2637,13 +2637,13 @@ static PyObject *__pyx_pf_17cpp_string_metric_6normalized_hamming(CYTHON_UNUSED } __pyx_t_5 = (__pyx_t_7) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_7, __pyx_v_s2) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_v_s2); __Pyx_XDECREF(__pyx_t_7); __pyx_t_7 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 346, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF_SET(__pyx_v_s2, __pyx_t_5); __pyx_t_5 = 0; - /* "cpp_string_metric.pyx":344 + /* "cpp_string_metric.pyx":378 * if processor is True or processor == default_process: * return normalized_hamming_impl_default_process(s1, s2, score_cutoff) * elif callable(processor): # <<<<<<<<<<<<<< @@ -2652,7 +2652,7 @@ static PyObject *__pyx_pf_17cpp_string_metric_6normalized_hamming(CYTHON_UNUSED */ } - /* "cpp_string_metric.pyx":348 + /* "cpp_string_metric.pyx":382 * s2 = processor(s2) * * return normalized_hamming_impl(s1, s2, score_cutoff) # <<<<<<<<<<<<<< @@ -2660,18 +2660,18 @@ static PyObject *__pyx_pf_17cpp_string_metric_6normalized_hamming(CYTHON_UNUSED __Pyx_XDECREF(__pyx_r); try { __pyx_t_6 = normalized_hamming_impl(__pyx_v_s1, __pyx_v_s2, __pyx_v_score_cutoff); - if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 348, __pyx_L1_error) + if (unlikely(PyErr_Occurred())) __PYX_ERR(0, 382, __pyx_L1_error) } catch(...) { __Pyx_CppExn2PyErr(); - __PYX_ERR(0, 348, __pyx_L1_error) + __PYX_ERR(0, 382, __pyx_L1_error) } - __pyx_t_5 = PyFloat_FromDouble(__pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 348, __pyx_L1_error) + __pyx_t_5 = PyFloat_FromDouble(__pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_r = __pyx_t_5; __pyx_t_5 = 0; goto __pyx_L0; - /* "cpp_string_metric.pyx":314 + /* "cpp_string_metric.pyx":339 * * * def normalized_hamming(s1, s2, processor=None, double score_cutoff=0.0): # <<<<<<<<<<<<<< @@ -2757,7 +2757,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_name, __pyx_k_name, sizeof(__pyx_k_name), 0, 0, 1, 1}, {&__pyx_n_s_normalized_hamming, __pyx_k_normalized_hamming, sizeof(__pyx_k_normalized_hamming), 0, 0, 1, 1}, {&__pyx_n_s_normalized_levenshtein, __pyx_k_normalized_levenshtein, sizeof(__pyx_k_normalized_levenshtein), 0, 0, 1, 1}, - {&__pyx_kp_u_normalized_levenshtein_line_179, __pyx_k_normalized_levenshtein_line_179, sizeof(__pyx_k_normalized_levenshtein_line_179), 0, 1, 0, 0}, + {&__pyx_kp_u_normalized_levenshtein_line_196, __pyx_k_normalized_levenshtein_line_196, sizeof(__pyx_k_normalized_levenshtein_line_196), 0, 1, 0, 0}, {&__pyx_n_s_processor, __pyx_k_processor, sizeof(__pyx_k_processor), 0, 0, 1, 1}, {&__pyx_n_s_rapidfuzz_utils, __pyx_k_rapidfuzz_utils, sizeof(__pyx_k_rapidfuzz_utils), 0, 0, 1, 1}, {&__pyx_n_s_s1, __pyx_k_s1, sizeof(__pyx_k_s1), 0, 0, 1, 1}, @@ -2795,44 +2795,44 @@ static CYTHON_SMALL_CODE int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); - /* "cpp_string_metric.pyx":179 + /* "cpp_string_metric.pyx":196 * * * def normalized_levenshtein(s1, s2, weights=(1,1,1), processor=None, double score_cutoff=0.0): # <<<<<<<<<<<<<< * """ * Calculates a normalized levenshtein distance using custom */ - __pyx_tuple__5 = PyTuple_Pack(8, __pyx_n_s_s1, __pyx_n_s_s2, __pyx_n_s_weights, __pyx_n_s_processor, __pyx_n_s_score_cutoff, __pyx_n_s_insertion, __pyx_n_s_deletion, __pyx_n_s_substitution); if (unlikely(!__pyx_tuple__5)) __PYX_ERR(0, 179, __pyx_L1_error) + __pyx_tuple__5 = PyTuple_Pack(8, __pyx_n_s_s1, __pyx_n_s_s2, __pyx_n_s_weights, __pyx_n_s_processor, __pyx_n_s_score_cutoff, __pyx_n_s_insertion, __pyx_n_s_deletion, __pyx_n_s_substitution); if (unlikely(!__pyx_tuple__5)) __PYX_ERR(0, 196, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__5); __Pyx_GIVEREF(__pyx_tuple__5); - __pyx_codeobj__6 = (PyObject*)__Pyx_PyCode_New(5, 0, 8, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__5, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_cpp_string_metric_pyx, __pyx_n_s_normalized_levenshtein, 179, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__6)) __PYX_ERR(0, 179, __pyx_L1_error) + __pyx_codeobj__6 = (PyObject*)__Pyx_PyCode_New(5, 0, 8, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__5, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_cpp_string_metric_pyx, __pyx_n_s_normalized_levenshtein, 196, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__6)) __PYX_ERR(0, 196, __pyx_L1_error) - /* "cpp_string_metric.pyx":285 + /* "cpp_string_metric.pyx":302 * * * def hamming(s1, s2, max=None): # <<<<<<<<<<<<<< * """ * Calculates the Hamming distance between two strings. */ - __pyx_tuple__7 = PyTuple_Pack(4, __pyx_n_s_s1, __pyx_n_s_s2, __pyx_n_s_max, __pyx_n_s_max_2); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(0, 285, __pyx_L1_error) + __pyx_tuple__7 = PyTuple_Pack(4, __pyx_n_s_s1, __pyx_n_s_s2, __pyx_n_s_max, __pyx_n_s_max_2); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(0, 302, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__7); __Pyx_GIVEREF(__pyx_tuple__7); - __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(3, 0, 4, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_cpp_string_metric_pyx, __pyx_n_s_hamming, 285, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(0, 285, __pyx_L1_error) - __pyx_tuple__9 = PyTuple_Pack(1, ((PyObject *)Py_None)); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(0, 285, __pyx_L1_error) + __pyx_codeobj__8 = (PyObject*)__Pyx_PyCode_New(3, 0, 4, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__7, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_cpp_string_metric_pyx, __pyx_n_s_hamming, 302, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__8)) __PYX_ERR(0, 302, __pyx_L1_error) + __pyx_tuple__9 = PyTuple_Pack(1, ((PyObject *)Py_None)); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(0, 302, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__9); __Pyx_GIVEREF(__pyx_tuple__9); - /* "cpp_string_metric.pyx":314 + /* "cpp_string_metric.pyx":339 * * * def normalized_hamming(s1, s2, processor=None, double score_cutoff=0.0): # <<<<<<<<<<<<<< * """ * Calculates a normalized hamming distance */ - __pyx_tuple__10 = PyTuple_Pack(4, __pyx_n_s_s1, __pyx_n_s_s2, __pyx_n_s_processor, __pyx_n_s_score_cutoff); if (unlikely(!__pyx_tuple__10)) __PYX_ERR(0, 314, __pyx_L1_error) + __pyx_tuple__10 = PyTuple_Pack(4, __pyx_n_s_s1, __pyx_n_s_s2, __pyx_n_s_processor, __pyx_n_s_score_cutoff); if (unlikely(!__pyx_tuple__10)) __PYX_ERR(0, 339, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__10); __Pyx_GIVEREF(__pyx_tuple__10); - __pyx_codeobj__11 = (PyObject*)__Pyx_PyCode_New(4, 0, 4, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__10, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_cpp_string_metric_pyx, __pyx_n_s_normalized_hamming, 314, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__11)) __PYX_ERR(0, 314, __pyx_L1_error) + __pyx_codeobj__11 = (PyObject*)__Pyx_PyCode_New(4, 0, 4, 0, CO_OPTIMIZED|CO_NEWLOCALS, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__10, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_src_cpp_string_metric_pyx, __pyx_n_s_normalized_hamming, 339, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__11)) __PYX_ERR(0, 339, __pyx_L1_error) __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -3150,16 +3150,16 @@ if (!__Pyx_RefNanny) { if (PyDict_SetItem(__pyx_d, __pyx_n_s_levenshtein, __pyx_t_2) < 0) __PYX_ERR(0, 24, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "cpp_string_metric.pyx":179 + /* "cpp_string_metric.pyx":196 * * * def normalized_levenshtein(s1, s2, weights=(1,1,1), processor=None, double score_cutoff=0.0): # <<<<<<<<<<<<<< * """ * Calculates a normalized levenshtein distance using custom */ - __pyx_t_2 = PyFloat_FromDouble(((double)0.0)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 179, __pyx_L1_error) + __pyx_t_2 = PyFloat_FromDouble(((double)0.0)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 196, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 179, __pyx_L1_error) + __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 196, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject*)__pyx_tuple_)); __Pyx_GIVEREF(((PyObject*)__pyx_tuple_)); @@ -3170,36 +3170,36 @@ if (!__Pyx_RefNanny) { __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_17cpp_string_metric_3normalized_levenshtein, 0, __pyx_n_s_normalized_levenshtein, NULL, __pyx_n_s_cpp_string_metric, __pyx_d, ((PyObject *)__pyx_codeobj__6)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 179, __pyx_L1_error) + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_17cpp_string_metric_3normalized_levenshtein, 0, __pyx_n_s_normalized_levenshtein, NULL, __pyx_n_s_cpp_string_metric, __pyx_d, ((PyObject *)__pyx_codeobj__6)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 196, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_t_1); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_normalized_levenshtein, __pyx_t_2) < 0) __PYX_ERR(0, 179, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_normalized_levenshtein, __pyx_t_2) < 0) __PYX_ERR(0, 196, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "cpp_string_metric.pyx":285 + /* "cpp_string_metric.pyx":302 * * * def hamming(s1, s2, max=None): # <<<<<<<<<<<<<< * """ * Calculates the Hamming distance between two strings. */ - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_17cpp_string_metric_5hamming, 0, __pyx_n_s_hamming, NULL, __pyx_n_s_cpp_string_metric, __pyx_d, ((PyObject *)__pyx_codeobj__8)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 285, __pyx_L1_error) + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_17cpp_string_metric_5hamming, 0, __pyx_n_s_hamming, NULL, __pyx_n_s_cpp_string_metric, __pyx_d, ((PyObject *)__pyx_codeobj__8)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 302, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_tuple__9); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_hamming, __pyx_t_2) < 0) __PYX_ERR(0, 285, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_hamming, __pyx_t_2) < 0) __PYX_ERR(0, 302, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "cpp_string_metric.pyx":314 + /* "cpp_string_metric.pyx":339 * * * def normalized_hamming(s1, s2, processor=None, double score_cutoff=0.0): # <<<<<<<<<<<<<< * """ * Calculates a normalized hamming distance */ - __pyx_t_2 = PyFloat_FromDouble(((double)0.0)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 314, __pyx_L1_error) + __pyx_t_2 = PyFloat_FromDouble(((double)0.0)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 339, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 314, __pyx_L1_error) + __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 339, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_INCREF(((PyObject *)Py_None)); __Pyx_GIVEREF(((PyObject *)Py_None)); @@ -3207,11 +3207,11 @@ if (!__Pyx_RefNanny) { __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_2); __pyx_t_2 = 0; - __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_17cpp_string_metric_7normalized_hamming, 0, __pyx_n_s_normalized_hamming, NULL, __pyx_n_s_cpp_string_metric, __pyx_d, ((PyObject *)__pyx_codeobj__11)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 314, __pyx_L1_error) + __pyx_t_2 = __Pyx_CyFunction_New(&__pyx_mdef_17cpp_string_metric_7normalized_hamming, 0, __pyx_n_s_normalized_hamming, NULL, __pyx_n_s_cpp_string_metric, __pyx_d, ((PyObject *)__pyx_codeobj__11)); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 339, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_CyFunction_SetDefaultsTuple(__pyx_t_2, __pyx_t_1); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_normalized_hamming, __pyx_t_2) < 0) __PYX_ERR(0, 314, __pyx_L1_error) + if (PyDict_SetItem(__pyx_d, __pyx_n_s_normalized_hamming, __pyx_t_2) < 0) __PYX_ERR(0, 339, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "cpp_string_metric.pyx":1 @@ -3222,7 +3222,7 @@ if (!__Pyx_RefNanny) { __pyx_t_2 = __Pyx_PyDict_NewPresized(2); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); if (PyDict_SetItem(__pyx_t_2, __pyx_kp_u_levenshtein_line_24, __pyx_kp_u_Calculates_the_minimum_number_o) < 0) __PYX_ERR(0, 1, __pyx_L1_error) - if (PyDict_SetItem(__pyx_t_2, __pyx_kp_u_normalized_levenshtein_line_179, __pyx_kp_u_Calculates_a_normalized_levensh) < 0) __PYX_ERR(0, 1, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_2, __pyx_kp_u_normalized_levenshtein_line_196, __pyx_kp_u_Calculates_a_normalized_levensh) < 0) __PYX_ERR(0, 1, __pyx_L1_error) if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_2) < 0) __PYX_ERR(0, 1, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; diff --git a/src/cpp_string_metric.pyx b/src/cpp_string_metric.pyx index 0a67991..2eed4d6 100644 --- a/src/cpp_string_metric.pyx +++ b/src/cpp_string_metric.pyx @@ -81,6 +81,15 @@ def levenshtein(s1, s2, weights=(1,1,1), max=None): The algorithm is described by [3]_. The time complexity of this algorithm is ``O([N/64]M)``. + The following image shows a benchmark of the Levenshtein distance in multiple + Python libraries. All of them are implemented either in C/C++ or Cython. + The graph shows, that python-Levenshtein is the only library with a time + complexity of ``O(NM)``, while all other libraries have a time complexity of + ``O([N/64]M)``. Especially for long strings RapidFuzz is a lot faster than + all the other tested libraries. + + .. image:: img/uniform_levenshtein.svg + Insertion = Deletion, Substitution >= Insertion + Deletion: Since every Substitution can be performed as Insertion + Deletion, this variant @@ -120,6 +129,14 @@ def levenshtein(s1, s2, weights=(1,1,1), max=None): The algorithm is described by [4]_. The time complexity of this algorithm is ``O([N/64]M)``. + The following image shows a benchmark of the InDel distance in RapidFuzz + and python-Levenshtein. Similar to the normal Levenshtein distance + python-Levenshtein uses a implementation with a time complexity of ``O(NM)``, + while RapidFuzz has a time complexity of ``O([N/64]M)``. + + .. image:: img/indel_levenshtein.svg + + Other weights: The implementation for other weights is based on Wagner-Fischer. It has a performance of ``O(N * M)`` and has a memory usage of ``O(N)``. @@ -285,6 +302,9 @@ def normalized_levenshtein(s1, s2, weights=(1,1,1), processor=None, double score def hamming(s1, s2, max=None): """ Calculates the Hamming distance between two strings. + The hamming distance is defined as the number of positions + where the two strings differ. It describes the minimum + amount of substitutions required to transform s1 into s2. Parameters ---------- @@ -302,6 +322,11 @@ def hamming(s1, s2, max=None): ------- distance : int Hamming distance between s1 and s2 + + Raises + ------ + ValueError + If s1 and s2 have a different length """ cdef size_t max_ = -1 @@ -335,6 +360,15 @@ def normalized_hamming(s1, s2, processor=None, double score_cutoff=0.0): ratio : float Normalized hamming distance between s1 and s2 as a float between 0 and 100 + + Raises + ------ + ValueError + If s1 and s2 have a different length + + See Also + -------- + hamming : Hamming distance """ if s1 is None or s2 is None: return 0