Merge from develop

2020-05-20 12:27:31 +02:00 · 2020-05-20 12:27:31 +02:00 · 24efd54a42
parent 7f5715a081
commit 24efd54a42
3 changed files with 12 additions and 12 deletions
--- a/spacy/cli/train_from_config.py
+++ b/spacy/cli/train_from_config.py
@ -224,8 +224,9 @@ def train_from_config(
 def create_train_batches(nlp, corpus, cfg):
    is_first = True
    while True:
-        train_examples = list(corpus.train_dataset(
+        train_examples = corpus.train_dataset(
            nlp,
            noise_level=0.0,
            orth_variant_level=cfg["orth_variant_level"],
@ -323,6 +324,8 @@ def train_while_improving(
            for subbatch in subdivide_batch(batch, accumulate_gradient):
                nlp.update(subbatch, drop=dropout, losses=losses, sgd=False)
            for name, proc in nlp.pipeline:
        for name, proc in nlp.pipeline:
            if hasattr(proc, "model"):
                proc.model.finish_update(optimizer)
        optimizer.step_schedules()
        if not (step % eval_frequency):
--- a/spacy/syntax/_parser_model.pyx
+++ b/spacy/syntax/_parser_model.pyx
@ -474,7 +474,11 @@ cdef class precompute_hiddens:
            # This will usually be on GPU
            d_best = ops.asarray(d_best)
            # Fix nans (which can occur from unseen classes.)
-            d_best[ops.xp.isnan(d_best)] = 0.
+            try:
                d_best[ops.xp.isnan(d_best)] = 0.
            except:
                print(ops.xp.isnan(d_best))
                raise
            if self.activation == "maxout":
                mask_ = ops.asarray(mask)
                return ops.backprop_maxout(d_best, mask_, self.nP)
--- a/spacy/util.py
+++ b/spacy/util.py
@ -598,16 +598,9 @@ def minibatch_by_words(examples, size, tuples=True, count_words=len, tolerance=0
            try:
                example = next(examples)
            except StopIteration:
-                if oversize:
+                if batch:
-                    examples = iter(oversize)
+                    yield batch
-                    oversize = []
+                return
                    if batch:
                        yield batch
                    break
                else:
                    if batch:
                        yield batch
                    return
            n_words = count_words(example.doc)
            if n_words < (batch_size + tol_size):
                batch_size -= n_words