From 92f9e5cc9afc6c3efd6c43048e734a64845606b3 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 31 May 2017 07:14:11 -0500 Subject: [PATCH 1/2] Silence env_opt, and fix serialization for GPU --- spacy/util.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/spacy/util.py b/spacy/util.py index df66b59a8..ff5c33aab 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -21,6 +21,7 @@ import ujson from .symbols import ORTH from .compat import cupy, CudaStream, path2str, basestring_, input_, unicode_ +from .compat import copy_array, normalize_string_keys LANGUAGES = {} @@ -242,6 +243,12 @@ def itershuffle(iterable, bufsize=1000): raise StopIteration +_PRINT_ENV = False +def set_env_log(value): + global _PRINT_ENV + _PRINT_ENV = value + + def env_opt(name, default=None): if type(default) is float: type_convert = float @@ -249,14 +256,17 @@ def env_opt(name, default=None): type_convert = int if 'SPACY_' + name.upper() in os.environ: value = type_convert(os.environ['SPACY_' + name.upper()]) - print(name, "=", repr(value), "via", "$SPACY_" + name.upper()) + if _PRINT_ENV: + print(name, "=", repr(value), "via", "$SPACY_" + name.upper()) return value elif name in os.environ: value = type_convert(os.environ[name]) - print(name, "=", repr(value), "via", '$' + name) + if _PRINT_ENV: + print(name, "=", repr(value), "via", '$' + name) return value else: - print(name, '=', repr(default), "by default") + if _PRINT_ENV: + print(name, '=', repr(default), "by default") return default @@ -432,7 +442,9 @@ def model_to_bytes(model): i = 0 for layer in queue: if hasattr(layer, '_mem'): - weights.append({'dims': dict(getattr(layer, '_dims', {})), 'params': []}) + weights.append({ + 'dims': normalize_string_keys(getattr(layer, '_dims', {})), + 'params': []}) if hasattr(layer, 'seed'): weights[-1]['seed'] = layer.seed @@ -469,7 +481,7 @@ def model_from_bytes(model, bytes_data): setattr(layer, dim, value) for param in weights[i]['params']: dest = getattr(layer, param['name']) - dest[:] = param['value'] + copy_array(dest, param['value']) i += 1 if hasattr(layer, '_layers'): queue.extend(layer._layers) From 480ef8bfc8b92b2de6c4960070c9269cfa505c4f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 31 May 2017 07:14:29 -0500 Subject: [PATCH 2/2] Add compat function to normalize dict keys --- spacy/compat.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/spacy/compat.py b/spacy/compat.py index 2a551a831..b3019f45b 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -6,6 +6,8 @@ import ftfy import sys import ujson +import thinc.neural.util + try: import cPickle as pickle except ImportError: @@ -32,6 +34,7 @@ copy_reg = copy_reg CudaStream = CudaStream cupy = cupy fix_text = ftfy.fix_text +copy_array = thinc.neural.util.copy_array is_python2 = six.PY2 is_python3 = six.PY3 @@ -71,3 +74,16 @@ def is_config(python2=None, python3=None, windows=None, linux=None, osx=None): (windows == None or windows == is_windows) and (linux == None or linux == is_linux) and (osx == None or osx == is_osx)) + + +def normalize_string_keys(old): + '''Given a dictionary, make sure keys are unicode strings, not bytes.''' + new = {} + for key, value in old: + if isinstance(key, bytes_): + new[key.decode('utf8')] = value + else: + new[key] = value + return new + +