From 968dabdde405efcbe703ac82766059ceda8548ac Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 23 Feb 2018 23:48:09 +0100 Subject: [PATCH] Fix bug in multi-task objective --- spacy/pipeline.pyx | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index 8405e1310..6fbf95eea 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -690,11 +690,7 @@ class MultitaskObjective(Tagger): for i, gold in enumerate(golds): for j in range(len(docs[i])): # Handes alignment for tokenization differences - gold_idx = gold.cand_to_gold[j] - if gold_idx is None: - idx += 1 - continue - label = self.make_label(gold_idx, gold.words, gold.tags, + label = self.make_label(j, gold.words, gold.tags, gold.heads, gold.labels, gold.ents) if label is None or label not in self.labels: correct[idx] = guesses[idx] @@ -749,6 +745,8 @@ class MultitaskObjective(Tagger): of gold data. You can pass cache=False if you know the cache will do the wrong thing. ''' + assert len(words) == len(heads) + assert target < len(words), (target, len(words)) if cache: if id(heads) in _cache: return _cache[id(heads)][target] @@ -783,8 +781,6 @@ class MultitaskObjective(Tagger): return sent_tags[target] - - class SimilarityHook(Pipe): """ Experimental: A pipeline component to install a hook for supervised