diff --git a/spacy/compat.py b/spacy/compat.py index c2ab27d7e..4ef24cd8b 100644 --- a/spacy/compat.py +++ b/spacy/compat.py @@ -61,6 +61,14 @@ elif is_python3: json_dumps = lambda data: ujson.dumps(data, indent=2) path2str = lambda path: str(path) + +def b_to_str(b_str): + if is_python2: + return b_str + # important: if no encoding is set, string becomes "b'...'" + return str(b_str, encoding='utf8') + + def getattr_(obj, name, *default): if is_python3 and isinstance(name, bytes): name = name.decode('utf8') diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index 8468720cd..7c479f94c 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .render import DependencyRenderer, EntityRenderer from ..tokens import Doc +from ..compat import b_to_str from ..util import prints, is_in_jupyter @@ -65,7 +66,9 @@ def serve(docs, style='dep', page=True, minify=False, options={}, manual=False, def app(environ, start_response): - start_response('200 OK', [('Content-type', 'text/html; charset=utf-8')]) + # headers and status need to be bytes in Python 2, see #1227 + headers = [(b_to_str(b'Content-type'), b_to_str(b'text/html; charset=utf-8'))] + start_response(b_to_str(b'200 OK'), headers) res = _html['parsed'].encode(encoding='utf-8') return [res] diff --git a/spacy/language.py b/spacy/language.py index fad2e2119..0284c4636 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -292,6 +292,11 @@ class Language(object): >>> for docs, golds in epoch: >>> state = nlp.update(docs, golds, sgd=optimizer) """ + if len(docs) != len(golds): + raise IndexError("Update expects same number of docs and golds " + "Got: %d, %d" % (len(docs), len(golds))) + if len(docs) == 0: + return tok2vec = self.pipeline[0] feats = tok2vec.doc2feats(docs) grads = {} diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx index d4367c6df..29e8de0aa 100644 --- a/spacy/syntax/arc_eager.pyx +++ b/spacy/syntax/arc_eager.pyx @@ -362,7 +362,7 @@ cdef class ArcEager(TransitionSystem): if not self.has_gold(gold): return None for i in range(gold.length): - if gold.heads[i] is None: # Missing values + if gold.heads[i] is None or gold.labels[i] is None: # Missing values gold.c.heads[i] = i gold.c.has_dep[i] = False else: