fuzzysearch/benchmarks/__init__.py

47 lines
1.6 KiB
Python
Raw Normal View History

from fuzzysearch.levenshtein import \
find_near_matches_levenshtein_linear_programming
from fuzzysearch.levenshtein_ngram import \
find_near_matches_levenshtein_ngrams as fnm_levenshtein_ngrams
from fuzzysearch.susbstitutions_only import \
find_near_matches_substitutions_ngrams as fnm_substitutions_ngrams, \
find_near_matches_substitutions_linear_programming
def fnm_levenshtein_lp(subsequence, sequence, max_l_dist):
return list(find_near_matches_levenshtein_linear_programming(
subsequence, sequence, max_l_dist))
def fnm_substitutions_lp(subsequence, sequence, max_substitutions):
return list(find_near_matches_substitutions_linear_programming(
subsequence, sequence, max_substitutions))
search_functions = {
'levenshtein_lp': fnm_levenshtein_lp,
'levenshtein_ngrams': fnm_levenshtein_ngrams,
'substitutions_lp': fnm_substitutions_lp,
'substitutions_ngrams': fnm_substitutions_ngrams,
}
benchmarks = {
'dna_no_match': dict(
subsequence = 'GCTAGCTAGCTA',
sequence = '"ATCG" * (10**3)',
max_dist = 1,
),
}
def run_benchmark(search_func_name, benchmark_name):
search_func = search_functions[search_func_name]
search_args = dict(benchmarks[benchmark_name])
if search_func in (fnm_levenshtein_ngrams, fnm_levenshtein_lp):
search_args['max_l_dist'] = search_args.pop('max_dist')
elif search_func in (fnm_substitutions_ngrams, fnm_substitutions_lp):
search_args['max_substitutions'] = search_args.pop('max_dist')
else:
raise Exception('Unsupported search function: %r' % search_func)
return search_func(**search_args)