diff --git a/spacy/cli/train_from_config.py b/spacy/cli/train_from_config.py index 96c5b676e..54eedf69e 100644 --- a/spacy/cli/train_from_config.py +++ b/spacy/cli/train_from_config.py @@ -224,8 +224,9 @@ def train_from_config( def create_train_batches(nlp, corpus, cfg): + is_first = True while True: - train_examples = list(corpus.train_dataset( + train_examples = corpus.train_dataset( nlp, noise_level=0.0, orth_variant_level=cfg["orth_variant_level"], @@ -323,6 +324,8 @@ def train_while_improving( for subbatch in subdivide_batch(batch, accumulate_gradient): nlp.update(subbatch, drop=dropout, losses=losses, sgd=False) for name, proc in nlp.pipeline: + for name, proc in nlp.pipeline: + if hasattr(proc, "model"): proc.model.finish_update(optimizer) optimizer.step_schedules() if not (step % eval_frequency): diff --git a/spacy/syntax/_parser_model.pyx b/spacy/syntax/_parser_model.pyx index 69f5bd6f6..60d22a1ab 100644 --- a/spacy/syntax/_parser_model.pyx +++ b/spacy/syntax/_parser_model.pyx @@ -474,7 +474,11 @@ cdef class precompute_hiddens: # This will usually be on GPU d_best = ops.asarray(d_best) # Fix nans (which can occur from unseen classes.) - d_best[ops.xp.isnan(d_best)] = 0. + try: + d_best[ops.xp.isnan(d_best)] = 0. + except: + print(ops.xp.isnan(d_best)) + raise if self.activation == "maxout": mask_ = ops.asarray(mask) return ops.backprop_maxout(d_best, mask_, self.nP) diff --git a/spacy/util.py b/spacy/util.py index f39813694..7f35c2f7c 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -598,16 +598,9 @@ def minibatch_by_words(examples, size, tuples=True, count_words=len, tolerance=0 try: example = next(examples) except StopIteration: - if oversize: - examples = iter(oversize) - oversize = [] - if batch: - yield batch - break - else: - if batch: - yield batch - return + if batch: + yield batch + return n_words = count_words(example.doc) if n_words < (batch_size + tol_size): batch_size -= n_words