From 213a440ffc2ded6e41a40faa12bde954bcaf4d1b Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 15 Aug 2014 23:57:27 +0200 Subject: [PATCH] * Add string decode and encode helpers to string_tools --- spacy/string_tools.pxd | 4 ++++ spacy/string_tools.pyx | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/spacy/string_tools.pxd b/spacy/string_tools.pxd index 7f27c19e5..a034bf230 100644 --- a/spacy/string_tools.pxd +++ b/spacy/string_tools.pxd @@ -1,3 +1,7 @@ +cpdef bytes to_bytes(unicode string) + +cpdef unicode from_bytes(bytes string) + cpdef unicode substr(unicode string, int start, int end, size_t length) cdef bint is_whitespace(Py_UNICODE c) diff --git a/spacy/string_tools.pyx b/spacy/string_tools.pyx index 2f199766f..f1854a0b0 100644 --- a/spacy/string_tools.pyx +++ b/spacy/string_tools.pyx @@ -1,6 +1,14 @@ # cython: profile=True +cpdef bytes to_bytes(unicode string): + return string.encode('utf8') + + +cpdef unicode from_bytes(bytes string): + return string.decode('utf8') + + cpdef unicode substr(unicode string, int start, int end, size_t length): if end >= length: end = -1