mirror of https://github.com/explosion/spaCy.git
* Add string decode and encode helpers to string_tools
This commit is contained in:
parent
7fd9b2f1f8
commit
213a440ffc
|
@ -1,3 +1,7 @@
|
||||||
|
cpdef bytes to_bytes(unicode string)
|
||||||
|
|
||||||
|
cpdef unicode from_bytes(bytes string)
|
||||||
|
|
||||||
cpdef unicode substr(unicode string, int start, int end, size_t length)
|
cpdef unicode substr(unicode string, int start, int end, size_t length)
|
||||||
|
|
||||||
cdef bint is_whitespace(Py_UNICODE c)
|
cdef bint is_whitespace(Py_UNICODE c)
|
||||||
|
|
|
@ -1,6 +1,14 @@
|
||||||
# cython: profile=True
|
# cython: profile=True
|
||||||
|
|
||||||
|
|
||||||
|
cpdef bytes to_bytes(unicode string):
|
||||||
|
return string.encode('utf8')
|
||||||
|
|
||||||
|
|
||||||
|
cpdef unicode from_bytes(bytes string):
|
||||||
|
return string.decode('utf8')
|
||||||
|
|
||||||
|
|
||||||
cpdef unicode substr(unicode string, int start, int end, size_t length):
|
cpdef unicode substr(unicode string, int start, int end, size_t length):
|
||||||
if end >= length:
|
if end >= length:
|
||||||
end = -1
|
end = -1
|
||||||
|
|
Loading…
Reference in New Issue