From 2f89e5d139fdb73ad54867878e383584bfeb6941 Mon Sep 17 00:00:00 2001 From: Yomguithereal Date: Fri, 31 Aug 2018 18:42:35 +0200 Subject: [PATCH] Flag customizable for levenshtein_1d --- fog/key/levenshtein_1d.py | 26 ++++++++++++-------------- test/key/levenshtein_1d_test.py | 10 +++------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/fog/key/levenshtein_1d.py b/fog/key/levenshtein_1d.py index daf72da..850666b 100644 --- a/fog/key/levenshtein_1d.py +++ b/fog/key/levenshtein_1d.py @@ -13,10 +13,8 @@ # from functools import partial -FLAG = '\x00' - -def levenshtein_1d_keys(string, transpositions=False): +def levenshtein_1d_keys(string, transpositions=False, flag='\x00'): """ Function returning an iterator over Levenshtein 1D keys, being the series of keys colliding with other strings being at a Levenshtein distance of @@ -43,7 +41,7 @@ def levenshtein_1d_keys(string, transpositions=False): for i in range(n): # Substitution - yield string[:i] + FLAG + string[i + 1:] + yield string[:i] + flag + string[i + 1:] # Transpositions if i > 0 and transpositions: @@ -56,16 +54,16 @@ def levenshtein_1d_keys(string, transpositions=False): if i > 0 and string[i - 1] == string[i]: continue - yield string[:i] + FLAG + string[i:] + yield string[:i] + flag + string[i:] # Last addition - yield string + FLAG + yield string + flag damerau_levenshtein_1d_keys = partial(levenshtein_1d_keys, transpositions=True) -def levenshtein_1d_blocks(string, transpositions=False): +def levenshtein_1d_blocks(string, transpositions=False, flag='\x00'): """ Function returning the minimal set of longest Levenshtein distance <= 1 blocking keys of target string. Under the hood, this splits the given @@ -88,24 +86,24 @@ def levenshtein_1d_blocks(string, transpositions=False): n = len(string) if n == 1: - return (FLAG + string, string + FLAG, '\x00') + return (flag + string, string + flag, '\x00') h = n // 2 # String has even length, we just split in half if n % 2 == 0 and not transpositions: - first_half = FLAG + string[:h] - second_half = string[h:] + FLAG + first_half = flag + string[:h] + second_half = string[h:] + flag return (first_half, second_half) # String has odd length, we split twice h1 = h + 1 - first_half = FLAG + string[:h] - second_half = string[h:] + FLAG - first_half1 = FLAG + string[:h1] - second_half1 = string[h1:] + FLAG + first_half = flag + string[:h] + second_half = string[h:] + flag + first_half1 = flag + string[:h1] + second_half1 = string[h1:] + flag return (first_half, second_half, first_half1, second_half1) diff --git a/test/key/levenshtein_1d_test.py b/test/key/levenshtein_1d_test.py index da690df..85bef4a 100644 --- a/test/key/levenshtein_1d_test.py +++ b/test/key/levenshtein_1d_test.py @@ -62,22 +62,18 @@ SECOND_TRANSPOSITIONS_TEST = [ ] -def prettify(s): - return s.replace('\x00', '!') - - class TestLevenshtein1D(object): def test_keys(self): - keys = set(prettify(k) for k in levenshtein_1d_keys('hello')) + keys = set(levenshtein_1d_keys('hello', flag='!')) assert keys == HELLO_KEYS - keys_with_transpositions = set(prettify(k) for k in levenshtein_1d_keys('hello', transpositions=True)) + keys_with_transpositions = set(levenshtein_1d_keys('hello', transpositions=True, flag='!')) assert keys_with_transpositions == HELLO_KEYS | HELLO_TRANSPOSITION_KEYS - keys_with_transpositions = set(prettify(k) for k in damerau_levenshtein_1d_keys('hello')) + keys_with_transpositions = set(damerau_levenshtein_1d_keys('hello', flag='!')) assert keys_with_transpositions == HELLO_KEYS | HELLO_TRANSPOSITION_KEYS