From bbdb5f62b70e9e12c6d4a8d9581e064ce846d19c Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 18 Sep 2020 14:26:42 +0200
Subject: [PATCH] Temporary work-around for scoring a subset of components
 (#6090)

* Try hacking the scorer to work around sentence boundaries

* Upd scorer

* Set dev version

* Upd scorer hack

* Fix version

* Improve comment on hack
---
 spacy/scorer.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/spacy/scorer.py b/spacy/scorer.py
index 7f7418237..da22d59d4 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -270,6 +270,18 @@ class Scorer:
         for example in examples:
             pred_doc = example.predicted
             gold_doc = example.reference
+            # TODO
+            # This is a temporary hack to work around the problem that the scorer
+            # fails if you have examples that are not fully annotated for all
+            # the tasks in your pipeline. For instance, you might have a corpus
+            # of NER annotations that does not set sentence boundaries, but the
+            # pipeline includes a parser or senter, and then the score_weights
+            # are used to evaluate that component. When the scorer attempts
+            # to read the sentences from the gold document, it fails.
+            try:
+                list(getter(gold_doc, attr))
+            except ValueError:
+                continue
             # Find all labels in gold and doc
             labels = set(
                 [k.label_ for k in getter(gold_doc, attr)]