From d4fdb97c8752038d9641d33af5ac73049b6e8262 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 25 Feb 2018 14:55:00 +0100 Subject: [PATCH] Fix alignment for words with spaces --- spacy/gold.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index f6bf38700..d9fa8eb93 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -66,6 +66,8 @@ def align(cand_words, gold_words): if cand_words == gold_words: alignment = numpy.arange(len(cand_words)) return 0, alignment, alignment, {}, {} + cand_words = [w.replace(' ', '') for w in cand_words] + gold_words = [w.replace(' ', '') for w in gold_words] cost, i2j, j2i, matrix = _align.align(cand_words, gold_words) i2j_multi, j2i_multi = _align.multi_align(i2j, j2i, [len(w) for w in cand_words], [len(w) for w in gold_words])