Copy the Example objects (and their predicted Doc) in nlp.evaluate() and nlp.update() (#6765)

* Make copy of examples in nlp.update and nlp.evaluate

* Avoid circular import

* Fix evaluate
This commit is contained in:
Matthew Honnibal 2021-01-20 02:47:44 +11:00 committed by GitHub
parent bfc212e68f
commit 88acbfc050
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 11 additions and 3 deletions

View File

@ -1084,6 +1084,7 @@ class Language:
if len(examples) == 0: if len(examples) == 0:
return losses return losses
validate_examples(examples, "Language.update") validate_examples(examples, "Language.update")
examples = _copy_examples(examples)
if sgd is None: if sgd is None:
if self._optimizer is None: if self._optimizer is None:
self._optimizer = self.create_optimizer() self._optimizer = self.create_optimizer()
@ -1093,7 +1094,6 @@ class Language:
for i, (name, proc) in enumerate(self.pipeline): for i, (name, proc) in enumerate(self.pipeline):
component_cfg.setdefault(name, {}) component_cfg.setdefault(name, {})
component_cfg[name].setdefault("drop", drop) component_cfg[name].setdefault("drop", drop)
component_cfg[name].setdefault("set_annotations", False)
for name, proc in self.pipeline: for name, proc in self.pipeline:
if name in exclude or not hasattr(proc, "update"): if name in exclude or not hasattr(proc, "update"):
continue continue
@ -1299,6 +1299,7 @@ class Language:
""" """
examples = list(examples) examples = list(examples)
validate_examples(examples, "Language.evaluate") validate_examples(examples, "Language.evaluate")
examples = _copy_examples(examples)
if batch_size is None: if batch_size is None:
batch_size = self.batch_size batch_size = self.batch_size
if component_cfg is None: if component_cfg is None:
@ -1311,8 +1312,6 @@ class Language:
scorer = Scorer(**kwargs) scorer = Scorer(**kwargs)
# reset annotation in predicted docs and time tokenization # reset annotation in predicted docs and time tokenization
start_time = timer() start_time = timer()
for eg in examples:
eg.predicted = self.make_doc(eg.reference.text)
# apply all pipeline components # apply all pipeline components
for name, pipe in self.pipeline: for name, pipe in self.pipeline:
kwargs = component_cfg.get(name, {}) kwargs = component_cfg.get(name, {})
@ -1821,6 +1820,15 @@ class DisabledPipes(list):
self[:] = [] self[:] = []
def _copy_examples(examples: Iterable[Example]) -> List[Example]:
"""Make a copy of a batch of examples, copying the predicted Doc as well.
This is used in contexts where we need to take ownership of the examples
so that they can be mutated, for instance during Language.evaluate and
Language.update.
"""
return [Example(eg.x.copy(), eg.y) for eg in examples]
def _apply_pipes( def _apply_pipes(
make_doc: Callable[[str], Doc], make_doc: Callable[[str], Doc],
pipes: Iterable[Callable[[Doc], Doc]], pipes: Iterable[Callable[[Doc], Doc]],