From bbdb5f62b70e9e12c6d4a8d9581e064ce846d19c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 18 Sep 2020 14:26:42 +0200 Subject: [PATCH] Temporary work-around for scoring a subset of components (#6090) * Try hacking the scorer to work around sentence boundaries * Upd scorer * Set dev version * Upd scorer hack * Fix version * Improve comment on hack --- spacy/scorer.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/spacy/scorer.py b/spacy/scorer.py index 7f7418237..da22d59d4 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -270,6 +270,18 @@ class Scorer: for example in examples: pred_doc = example.predicted gold_doc = example.reference + # TODO + # This is a temporary hack to work around the problem that the scorer + # fails if you have examples that are not fully annotated for all + # the tasks in your pipeline. For instance, you might have a corpus + # of NER annotations that does not set sentence boundaries, but the + # pipeline includes a parser or senter, and then the score_weights + # are used to evaluate that component. When the scorer attempts + # to read the sentences from the gold document, it fails. + try: + list(getter(gold_doc, attr)) + except ValueError: + continue # Find all labels in gold and doc labels = set( [k.label_ for k in getter(gold_doc, attr)]