mirror of https://github.com/explosion/spaCy.git
Pass kwargs into pipeline components during begin_training
This commit is contained in:
parent
ab35ac4e6f
commit
d7c9b53120
|
@ -144,7 +144,8 @@ class Pipe(object):
|
||||||
return create_default_optimizer(self.model.ops,
|
return create_default_optimizer(self.model.ops,
|
||||||
**self.cfg.get('optimizer', {}))
|
**self.cfg.get('optimizer', {}))
|
||||||
|
|
||||||
def begin_training(self, gold_tuples=tuple(), pipeline=None, sgd=None):
|
def begin_training(self, gold_tuples=tuple(), pipeline=None, sgd=None,
|
||||||
|
**kwargs):
|
||||||
"""Initialize the pipe for training, using data exampes if available.
|
"""Initialize the pipe for training, using data exampes if available.
|
||||||
If no model has been initialized yet, the model is added."""
|
If no model has been initialized yet, the model is added."""
|
||||||
if self.model is True:
|
if self.model is True:
|
||||||
|
@ -344,7 +345,8 @@ class Tensorizer(Pipe):
|
||||||
loss = (d_scores**2).sum()
|
loss = (d_scores**2).sum()
|
||||||
return loss, d_scores
|
return loss, d_scores
|
||||||
|
|
||||||
def begin_training(self, gold_tuples=tuple(), pipeline=None, sgd=None):
|
def begin_training(self, gold_tuples=tuple(), pipeline=None, sgd=None,
|
||||||
|
**kwargs):
|
||||||
"""Allocate models, pre-process training data and acquire an
|
"""Allocate models, pre-process training data and acquire an
|
||||||
optimizer.
|
optimizer.
|
||||||
|
|
||||||
|
@ -467,7 +469,8 @@ class Tagger(Pipe):
|
||||||
d_scores = self.model.ops.unflatten(d_scores, [len(d) for d in docs])
|
d_scores = self.model.ops.unflatten(d_scores, [len(d) for d in docs])
|
||||||
return float(loss), d_scores
|
return float(loss), d_scores
|
||||||
|
|
||||||
def begin_training(self, gold_tuples=tuple(), pipeline=None, sgd=None):
|
def begin_training(self, gold_tuples=tuple(), pipeline=None, sgd=None,
|
||||||
|
**kwargs):
|
||||||
orig_tag_map = dict(self.vocab.morphology.tag_map)
|
orig_tag_map = dict(self.vocab.morphology.tag_map)
|
||||||
new_tag_map = OrderedDict()
|
new_tag_map = OrderedDict()
|
||||||
for raw_text, annots_brackets in gold_tuples:
|
for raw_text, annots_brackets in gold_tuples:
|
||||||
|
@ -641,7 +644,7 @@ class MultitaskObjective(Tagger):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def begin_training(self, gold_tuples=tuple(), pipeline=None, tok2vec=None,
|
def begin_training(self, gold_tuples=tuple(), pipeline=None, tok2vec=None,
|
||||||
sgd=None):
|
sgd=None, **kwargs):
|
||||||
gold_tuples = nonproj.preprocess_training_data(gold_tuples)
|
gold_tuples = nonproj.preprocess_training_data(gold_tuples)
|
||||||
for raw_text, annots_brackets in gold_tuples:
|
for raw_text, annots_brackets in gold_tuples:
|
||||||
for annots, brackets in annots_brackets:
|
for annots, brackets in annots_brackets:
|
||||||
|
@ -766,7 +769,7 @@ class SimilarityHook(Pipe):
|
||||||
def update(self, doc1_doc2, golds, sgd=None, drop=0.):
|
def update(self, doc1_doc2, golds, sgd=None, drop=0.):
|
||||||
sims, bp_sims = self.model.begin_update(doc1_doc2, drop=drop)
|
sims, bp_sims = self.model.begin_update(doc1_doc2, drop=drop)
|
||||||
|
|
||||||
def begin_training(self, _=tuple(), pipeline=None, sgd=None):
|
def begin_training(self, _=tuple(), pipeline=None, sgd=None, **kwargs):
|
||||||
"""Allocate model, using width from tensorizer in pipeline.
|
"""Allocate model, using width from tensorizer in pipeline.
|
||||||
|
|
||||||
gold_tuples (iterable): Gold-standard training data.
|
gold_tuples (iterable): Gold-standard training data.
|
||||||
|
@ -887,6 +890,7 @@ cdef class DependencyParser(Parser):
|
||||||
self._multitasks.append(labeller)
|
self._multitasks.append(labeller)
|
||||||
|
|
||||||
def init_multitask_objectives(self, gold_tuples, pipeline, sgd=None, **cfg):
|
def init_multitask_objectives(self, gold_tuples, pipeline, sgd=None, **cfg):
|
||||||
|
self.add_multitask_objective('tag')
|
||||||
for labeller in self._multitasks:
|
for labeller in self._multitasks:
|
||||||
tok2vec = self.model[0]
|
tok2vec = self.model[0]
|
||||||
labeller.begin_training(gold_tuples, pipeline=pipeline,
|
labeller.begin_training(gold_tuples, pipeline=pipeline,
|
||||||
|
|
Loading…
Reference in New Issue