mirror of https://github.com/explosion/spaCy.git
hash_string() should not depend on python's internal unicode representation, also fixes https://github.com/spacy-io/sense2vec/issues/5 for py2
This commit is contained in:
parent
7adbd7a785
commit
b740f20191
|
@ -23,10 +23,8 @@ import ujson as json
|
||||||
|
|
||||||
|
|
||||||
cpdef hash_t hash_string(unicode string) except 0:
|
cpdef hash_t hash_string(unicode string) except 0:
|
||||||
# This has to be like this for
|
chars = string.encode('utf8')
|
||||||
chars = <char*>PyUnicode_AS_DATA(string)
|
return hash64(<char*>chars, len(chars), 1)
|
||||||
size = PyUnicode_GET_DATA_SIZE(string)
|
|
||||||
return hash64(chars, size, 1)
|
|
||||||
|
|
||||||
|
|
||||||
cdef unicode _decode(const Utf8Str* string):
|
cdef unicode _decode(const Utf8Str* string):
|
||||||
|
|
Loading…
Reference in New Issue