mirror of https://github.com/explosion/spaCy.git
Revert "Remove peeking from Parser.begin_training (#5456)"
This reverts commit 9393253b66
.
The model shouldn't need to see all examples, and actually in v3 there's
no equivalent step. All examples are provided to the component, for the
component to do stuff like figuring out the labels. The model just needs
to do stuff like shape inference.
This commit is contained in:
parent
85f1acfaa0
commit
64adda3202
|
@ -9,6 +9,7 @@ import numpy
|
|||
cimport cython.parallel
|
||||
import numpy.random
|
||||
cimport numpy as np
|
||||
from itertools import islice
|
||||
from cpython.ref cimport PyObject, Py_XDECREF
|
||||
from cpython.exc cimport PyErr_CheckSignals, PyErr_SetFromErrno
|
||||
from libc.math cimport exp
|
||||
|
@ -620,15 +621,15 @@ cdef class Parser:
|
|||
self.model, cfg = self.Model(self.moves.n_moves, **cfg)
|
||||
if sgd is None:
|
||||
sgd = self.create_optimizer()
|
||||
docs = []
|
||||
golds = []
|
||||
for raw_text, annots_brackets in get_gold_tuples():
|
||||
doc_sample = []
|
||||
gold_sample = []
|
||||
for raw_text, annots_brackets in islice(get_gold_tuples(), 1000):
|
||||
for annots, brackets in annots_brackets:
|
||||
ids, words, tags, heads, deps, ents = annots
|
||||
docs.append(Doc(self.vocab, words=words))
|
||||
golds.append(GoldParse(docs[-1], words=words, tags=tags,
|
||||
doc_sample.append(Doc(self.vocab, words=words))
|
||||
gold_sample.append(GoldParse(doc_sample[-1], words=words, tags=tags,
|
||||
heads=heads, deps=deps, entities=ents))
|
||||
self.model.begin_training(docs, golds)
|
||||
self.model.begin_training(doc_sample, gold_sample)
|
||||
if pipeline is not None:
|
||||
self.init_multitask_objectives(get_gold_tuples, pipeline, sgd=sgd, **cfg)
|
||||
link_vectors_to_models(self.vocab)
|
||||
|
|
Loading…
Reference in New Issue