From 1d18cedae846d93215042242e0d7eac9e18edcaa Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 1 Jun 2017 10:48:43 -0500 Subject: [PATCH] Fiddle with msgpack bytes vs unicode --- spacy/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/util.py b/spacy/util.py index 087a43881..a70e3883b 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -423,11 +423,11 @@ def to_bytes(getters, exclude): for key, getter in getters.items(): if key not in exclude: serialized[key] = getter() - return msgpack.dumps(serialized) + return msgpack.dumps(serialized, use_bin_type=True, encoding='utf8') def from_bytes(bytes_data, setters, exclude): - msg = msgpack.loads(bytes_data) + msg = msgpack.loads(bytes_data, encoding='utf8') for key, setter in setters.items(): if key not in exclude: setter(msg[key])