Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2017-05-31 15:06:56 +02:00
commit 6c51cd77b4
2 changed files with 33 additions and 5 deletions

View File

@ -6,6 +6,8 @@ import ftfy
import sys import sys
import ujson import ujson
import thinc.neural.util
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
@ -32,6 +34,7 @@ copy_reg = copy_reg
CudaStream = CudaStream CudaStream = CudaStream
cupy = cupy cupy = cupy
fix_text = ftfy.fix_text fix_text = ftfy.fix_text
copy_array = thinc.neural.util.copy_array
is_python2 = six.PY2 is_python2 = six.PY2
is_python3 = six.PY3 is_python3 = six.PY3
@ -71,3 +74,16 @@ def is_config(python2=None, python3=None, windows=None, linux=None, osx=None):
(windows == None or windows == is_windows) and (windows == None or windows == is_windows) and
(linux == None or linux == is_linux) and (linux == None or linux == is_linux) and
(osx == None or osx == is_osx)) (osx == None or osx == is_osx))
def normalize_string_keys(old):
'''Given a dictionary, make sure keys are unicode strings, not bytes.'''
new = {}
for key, value in old:
if isinstance(key, bytes_):
new[key.decode('utf8')] = value
else:
new[key] = value
return new

View File

@ -22,6 +22,7 @@ import ujson
from .symbols import ORTH from .symbols import ORTH
from .compat import cupy, CudaStream, path2str, basestring_, input_, unicode_ from .compat import cupy, CudaStream, path2str, basestring_, input_, unicode_
from .compat import copy_array, normalize_string_keys
LANGUAGES = {} LANGUAGES = {}
@ -243,6 +244,12 @@ def itershuffle(iterable, bufsize=1000):
raise StopIteration raise StopIteration
_PRINT_ENV = False
def set_env_log(value):
global _PRINT_ENV
_PRINT_ENV = value
def env_opt(name, default=None): def env_opt(name, default=None):
if type(default) is float: if type(default) is float:
type_convert = float type_convert = float
@ -250,13 +257,16 @@ def env_opt(name, default=None):
type_convert = int type_convert = int
if 'SPACY_' + name.upper() in os.environ: if 'SPACY_' + name.upper() in os.environ:
value = type_convert(os.environ['SPACY_' + name.upper()]) value = type_convert(os.environ['SPACY_' + name.upper()])
if _PRINT_ENV:
print(name, "=", repr(value), "via", "$SPACY_" + name.upper()) print(name, "=", repr(value), "via", "$SPACY_" + name.upper())
return value return value
elif name in os.environ: elif name in os.environ:
value = type_convert(os.environ[name]) value = type_convert(os.environ[name])
if _PRINT_ENV:
print(name, "=", repr(value), "via", '$' + name) print(name, "=", repr(value), "via", '$' + name)
return value return value
else: else:
if _PRINT_ENV:
print(name, '=', repr(default), "by default") print(name, '=', repr(default), "by default")
return default return default
@ -451,7 +461,9 @@ def model_to_bytes(model):
i = 0 i = 0
for layer in queue: for layer in queue:
if hasattr(layer, '_mem'): if hasattr(layer, '_mem'):
weights.append({'dims': dict(getattr(layer, '_dims', {})), 'params': []}) weights.append({
'dims': normalize_string_keys(getattr(layer, '_dims', {})),
'params': []})
if hasattr(layer, 'seed'): if hasattr(layer, 'seed'):
weights[-1]['seed'] = layer.seed weights[-1]['seed'] = layer.seed
@ -488,7 +500,7 @@ def model_from_bytes(model, bytes_data):
setattr(layer, dim, value) setattr(layer, dim, value)
for param in weights[i]['params']: for param in weights[i]['params']:
dest = getattr(layer, param['name']) dest = getattr(layer, param['name'])
dest[:] = param['value'] copy_array(dest, param['value'])
i += 1 i += 1
if hasattr(layer, '_layers'): if hasattr(layer, '_layers'):
queue.extend(layer._layers) queue.extend(layer._layers)