From 6b2e5c4b8a5818920a9dac7f692d34474f4768ae Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 28 May 2015 22:39:08 +0200 Subject: [PATCH] * Avoid NER scoring for sentences with some missing NER values. --- spacy/scorer.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/spacy/scorer.py b/spacy/scorer.py index 8a912a9fe..a91f37a1d 100644 --- a/spacy/scorer.py +++ b/spacy/scorer.py @@ -95,16 +95,16 @@ class Scorer(object): self.tags.fp += 1 else: cand_tags.add((gold_i, token.tag_)) - cand_ents = set() - for ent in tokens.ents: - first = gold.cand_to_gold[ent.start] - last = gold.cand_to_gold[ent.end-1] - if first is None or last is None: - self.ner.fp += 1 - else: - cand_ents.add((ent.label_, first, last)) - - self.ner.score_set(cand_ents, gold_ents) + if '-' not in [token[-1] for token in gold.orig_annot]: + cand_ents = set() + for ent in tokens.ents: + first = gold.cand_to_gold[ent.start] + last = gold.cand_to_gold[ent.end-1] + if first is None or last is None: + self.ner.fp += 1 + else: + cand_ents.add((ent.label_, first, last)) + self.ner.score_set(cand_ents, gold_ents) self.tags.score_set(cand_tags, gold_tags) self.labelled.score_set(cand_deps, gold_deps) self.unlabelled.score_set(