mirror of https://github.com/explosion/spaCy.git
Copy the Example objects (and their predicted Doc) in nlp.evaluate() and nlp.update() (#6765)
* Make copy of examples in nlp.update and nlp.evaluate * Avoid circular import * Fix evaluate
This commit is contained in:
parent
bfc212e68f
commit
88acbfc050
|
@ -1084,6 +1084,7 @@ class Language:
|
||||||
if len(examples) == 0:
|
if len(examples) == 0:
|
||||||
return losses
|
return losses
|
||||||
validate_examples(examples, "Language.update")
|
validate_examples(examples, "Language.update")
|
||||||
|
examples = _copy_examples(examples)
|
||||||
if sgd is None:
|
if sgd is None:
|
||||||
if self._optimizer is None:
|
if self._optimizer is None:
|
||||||
self._optimizer = self.create_optimizer()
|
self._optimizer = self.create_optimizer()
|
||||||
|
@ -1093,7 +1094,6 @@ class Language:
|
||||||
for i, (name, proc) in enumerate(self.pipeline):
|
for i, (name, proc) in enumerate(self.pipeline):
|
||||||
component_cfg.setdefault(name, {})
|
component_cfg.setdefault(name, {})
|
||||||
component_cfg[name].setdefault("drop", drop)
|
component_cfg[name].setdefault("drop", drop)
|
||||||
component_cfg[name].setdefault("set_annotations", False)
|
|
||||||
for name, proc in self.pipeline:
|
for name, proc in self.pipeline:
|
||||||
if name in exclude or not hasattr(proc, "update"):
|
if name in exclude or not hasattr(proc, "update"):
|
||||||
continue
|
continue
|
||||||
|
@ -1299,6 +1299,7 @@ class Language:
|
||||||
"""
|
"""
|
||||||
examples = list(examples)
|
examples = list(examples)
|
||||||
validate_examples(examples, "Language.evaluate")
|
validate_examples(examples, "Language.evaluate")
|
||||||
|
examples = _copy_examples(examples)
|
||||||
if batch_size is None:
|
if batch_size is None:
|
||||||
batch_size = self.batch_size
|
batch_size = self.batch_size
|
||||||
if component_cfg is None:
|
if component_cfg is None:
|
||||||
|
@ -1311,8 +1312,6 @@ class Language:
|
||||||
scorer = Scorer(**kwargs)
|
scorer = Scorer(**kwargs)
|
||||||
# reset annotation in predicted docs and time tokenization
|
# reset annotation in predicted docs and time tokenization
|
||||||
start_time = timer()
|
start_time = timer()
|
||||||
for eg in examples:
|
|
||||||
eg.predicted = self.make_doc(eg.reference.text)
|
|
||||||
# apply all pipeline components
|
# apply all pipeline components
|
||||||
for name, pipe in self.pipeline:
|
for name, pipe in self.pipeline:
|
||||||
kwargs = component_cfg.get(name, {})
|
kwargs = component_cfg.get(name, {})
|
||||||
|
@ -1821,6 +1820,15 @@ class DisabledPipes(list):
|
||||||
self[:] = []
|
self[:] = []
|
||||||
|
|
||||||
|
|
||||||
|
def _copy_examples(examples: Iterable[Example]) -> List[Example]:
|
||||||
|
"""Make a copy of a batch of examples, copying the predicted Doc as well.
|
||||||
|
This is used in contexts where we need to take ownership of the examples
|
||||||
|
so that they can be mutated, for instance during Language.evaluate and
|
||||||
|
Language.update.
|
||||||
|
"""
|
||||||
|
return [Example(eg.x.copy(), eg.y) for eg in examples]
|
||||||
|
|
||||||
|
|
||||||
def _apply_pipes(
|
def _apply_pipes(
|
||||||
make_doc: Callable[[str], Doc],
|
make_doc: Callable[[str], Doc],
|
||||||
pipes: Iterable[Callable[[Doc], Doc]],
|
pipes: Iterable[Callable[[Doc], Doc]],
|
||||||
|
|
Loading…
Reference in New Issue