jellyfish/benchmarks/timedruns.py

87 lines
2.1 KiB
Python

import sys
import timeit
import csv
open_kwargs = {"encoding": "utf8"}
def _load_data(name):
with open("./testdata/{}.csv".format(name), **open_kwargs) as f:
yield from csv.reader(f)
def _load_n(name, n):
data = []
iterator = _load_data(name)
while n > 0:
try:
data.append(next(iterator))
n -= 1
except StopIteration:
iterator = _load_data(name)
return data
def time_func(funcname, name, params, ftype):
TEST_N = 100
TEST_ITERATIONS = 10000
if params == 1:
run = "[{}(x) for x, y in data]".format(funcname)
elif params == 2:
run = "[{}(x, y) for x, y, z in data]".format(funcname)
if ftype == "python":
path = "_jellyfish"
elif ftype == "c":
path = "cjellyfish"
elif ftype == "rust":
path = "_rustyfish"
return (
timeit.timeit(
run,
setup="""from __main__ import _load_n
from jellyfish.{} import {}
data = _load_n('{}', {})
""".format(
path, funcname, name, TEST_N
),
number=TEST_ITERATIONS,
)
/ (TEST_N * TEST_ITERATIONS)
)
testing = [
("damerau_levenshtein_distance", "damerau_levenshtein", 2),
("hamming_distance", "hamming", 2),
("jaro_similarity", "jaro_distance", 2),
("jaro_winkler_similarity", "jaro_winkler", 2),
("levenshtein_distance", "levenshtein", 2),
("match_rating_codex", "match_rating_codex", 1),
("match_rating_comparison", "match_rating_comparison", 2),
("metaphone", "metaphone", 1),
("nysiis", "nysiis", 1),
("soundex", "soundex", 1),
]
def main():
py_version = "{}.{}.{}".format(*sys.version_info[0:3])
if sys.argv[1] == "old":
jf_version = "0.10"
ftypes = ("c", "python")
elif sys.argv[1] == "new":
jf_version = "dev"
ftypes = ("rust",)
for ftype in ftypes:
for funcname, name, params in testing:
result = time_func(funcname, name, params, ftype)
print(f"{py_version},{jf_version},{ftype},{funcname},{result}")
if __name__ == "__main__":
main()