Merge remote-tracking branch 'upstream/develop' into indonesian

This commit is contained in:
Jim Geovedi 2017-08-03 12:40:19 +07:00
commit 4705ae19ba
4 changed files with 18 additions and 2 deletions

View File

@ -61,6 +61,14 @@ elif is_python3:
json_dumps = lambda data: ujson.dumps(data, indent=2)
path2str = lambda path: str(path)
def b_to_str(b_str):
if is_python2:
return b_str
# important: if no encoding is set, string becomes "b'...'"
return str(b_str, encoding='utf8')
def getattr_(obj, name, *default):
if is_python3 and isinstance(name, bytes):
name = name.decode('utf8')

View File

@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .render import DependencyRenderer, EntityRenderer
from ..tokens import Doc
from ..compat import b_to_str
from ..util import prints, is_in_jupyter
@ -65,7 +66,9 @@ def serve(docs, style='dep', page=True, minify=False, options={}, manual=False,
def app(environ, start_response):
start_response('200 OK', [('Content-type', 'text/html; charset=utf-8')])
# headers and status need to be bytes in Python 2, see #1227
headers = [(b_to_str(b'Content-type'), b_to_str(b'text/html; charset=utf-8'))]
start_response(b_to_str(b'200 OK'), headers)
res = _html['parsed'].encode(encoding='utf-8')
return [res]

View File

@ -292,6 +292,11 @@ class Language(object):
>>> for docs, golds in epoch:
>>> state = nlp.update(docs, golds, sgd=optimizer)
"""
if len(docs) != len(golds):
raise IndexError("Update expects same number of docs and golds "
"Got: %d, %d" % (len(docs), len(golds)))
if len(docs) == 0:
return
tok2vec = self.pipeline[0]
feats = tok2vec.doc2feats(docs)
grads = {}

View File

@ -362,7 +362,7 @@ cdef class ArcEager(TransitionSystem):
if not self.has_gold(gold):
return None
for i in range(gold.length):
if gold.heads[i] is None: # Missing values
if gold.heads[i] is None or gold.labels[i] is None: # Missing values
gold.c.heads[i] = i
gold.c.has_dep[i] = False
else: