improve hash function
This commit is contained in:
parent
72e2ca7d95
commit
53434ca085
|
@ -12,6 +12,7 @@
|
|||
- fix implementation of Hamming.normalized_similarity
|
||||
- fix default score_cutoff of Hamming.similarity
|
||||
- fix implementation of LCSseq.distance when used in the process module
|
||||
- treat hash for -1 and -2 as different
|
||||
|
||||
### [2.0.15] - 2022-06-24
|
||||
#### Fixed
|
||||
|
|
|
@ -116,6 +116,11 @@ cdef extern from "cpp_common.hpp":
|
|||
|
||||
vector[T] vector_slice[T](const vector[T]& vec, int start, int stop, int step) except +
|
||||
|
||||
cdef inline uint64_t rf_hash(val) except *:
|
||||
if val == -1:
|
||||
return <uint64_t>-1
|
||||
return <uint64_t>hash(val)
|
||||
|
||||
cdef inline RF_String hash_array(arr) except *:
|
||||
# TODO on Cpython this does not require any copies
|
||||
cdef RF_String s_proc
|
||||
|
@ -156,7 +161,7 @@ cdef inline RF_String hash_array(arr) except *:
|
|||
else: # float/double are hashed
|
||||
s_proc.kind = RF_StringType.RF_UINT64
|
||||
for i in range(s_proc.length):
|
||||
(<uint64_t*>s_proc.data)[i] = <uint64_t>hash(arr[i])
|
||||
(<uint64_t*>s_proc.data)[i] = rf_hash(arr[i])
|
||||
except Exception as e:
|
||||
free(s_proc.data)
|
||||
s_proc.data = NULL
|
||||
|
@ -183,7 +188,7 @@ cdef inline RF_String hash_sequence(seq) except *:
|
|||
if isinstance(elem, str) and len(elem) == 1:
|
||||
(<uint64_t*>s_proc.data)[i] = <uint64_t><Py_UCS4>elem
|
||||
else:
|
||||
(<uint64_t*>s_proc.data)[i] = <uint64_t>hash(elem)
|
||||
(<uint64_t*>s_proc.data)[i] = rf_hash(elem)
|
||||
except Exception as e:
|
||||
free(s_proc.data)
|
||||
s_proc.data = NULL
|
||||
|
|
|
@ -60,6 +60,7 @@ def test_cross_type_matching():
|
|||
assert Levenshtein.distance("aaaa", ["a", "a", "a", "a"]) == 0
|
||||
# todo add support in pure python
|
||||
assert Levenshtein_cpp.distance("aaaa", [ord("a"), ord("a"), "a", "a"]) == 0
|
||||
assert Levenshtein_cpp.distance([0, -1], [0, -2]) == 1
|
||||
|
||||
|
||||
def test_word_error_rate():
|
||||
|
|
Loading…
Reference in New Issue