From 1406e24327d1825f8b8d1bd6c4f212223e53c30d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 23 Jul 2015 19:36:21 +0200 Subject: [PATCH] * Fix unicode error for Python3 --- spacy/serialize/bits.pyx | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/spacy/serialize/bits.pyx b/spacy/serialize/bits.pyx index 3b879b2ee..105db4269 100644 --- a/spacy/serialize/bits.pyx +++ b/spacy/serialize/bits.pyx @@ -1,3 +1,5 @@ +from __future__ import unicode_literals + from libc.string cimport memcpy # Note that we're setting the most significant bits here first, when in practice @@ -14,7 +16,7 @@ cdef Code bit_append(Code code, bint bit) nogil: cdef class BitArray: - def __init__(self, data=b''): + def __init__(self, bytes data=b''): self.data = data self.byte = 0 self.bit_of_byte = 0 @@ -78,8 +80,15 @@ cdef class BitArray: return output def as_bytes(self): + cdef unsigned char byte if self.bit_of_byte != 0: - return self.data + chr(self.byte) + byte = chr(self.byte) + # Jump through some hoops for Python3 + if isinstance(byte, unicode): + byte_char = byte + return self.data + &byte_char + else: + return self.data + chr(self.byte) else: return self.data