From 88acbfc050a63e4c245f62419212e44d9062e8e1 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 20 Jan 2021 02:47:44 +1100 Subject: [PATCH] Copy the Example objects (and their predicted Doc) in nlp.evaluate() and nlp.update() (#6765) * Make copy of examples in nlp.update and nlp.evaluate * Avoid circular import * Fix evaluate --- spacy/language.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index 3c4899fdc..d98a0e7a1 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1084,6 +1084,7 @@ class Language: if len(examples) == 0: return losses validate_examples(examples, "Language.update") + examples = _copy_examples(examples) if sgd is None: if self._optimizer is None: self._optimizer = self.create_optimizer() @@ -1093,7 +1094,6 @@ class Language: for i, (name, proc) in enumerate(self.pipeline): component_cfg.setdefault(name, {}) component_cfg[name].setdefault("drop", drop) - component_cfg[name].setdefault("set_annotations", False) for name, proc in self.pipeline: if name in exclude or not hasattr(proc, "update"): continue @@ -1299,6 +1299,7 @@ class Language: """ examples = list(examples) validate_examples(examples, "Language.evaluate") + examples = _copy_examples(examples) if batch_size is None: batch_size = self.batch_size if component_cfg is None: @@ -1311,8 +1312,6 @@ class Language: scorer = Scorer(**kwargs) # reset annotation in predicted docs and time tokenization start_time = timer() - for eg in examples: - eg.predicted = self.make_doc(eg.reference.text) # apply all pipeline components for name, pipe in self.pipeline: kwargs = component_cfg.get(name, {}) @@ -1821,6 +1820,15 @@ class DisabledPipes(list): self[:] = [] +def _copy_examples(examples: Iterable[Example]) -> List[Example]: + """Make a copy of a batch of examples, copying the predicted Doc as well. + This is used in contexts where we need to take ownership of the examples + so that they can be mutated, for instance during Language.evaluate and + Language.update. + """ + return [Example(eg.x.copy(), eg.y) for eg in examples] + + def _apply_pipes( make_doc: Callable[[str], Doc], pipes: Iterable[Callable[[Doc], Doc]],