From 66a3f2ba21610025848d0c965121825e452638d1 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 16 Aug 2018 00:42:36 +0200 Subject: [PATCH] Lower-case text before alignment --- spacy/gold.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/gold.pyx b/spacy/gold.pyx index 2dd2fe728..20a319f5d 100644 --- a/spacy/gold.pyx +++ b/spacy/gold.pyx @@ -76,8 +76,8 @@ def align(cand_words, gold_words): if cand_words == gold_words: alignment = numpy.arange(len(cand_words)) return 0, alignment, alignment, {}, {} - cand_words = [w.replace(' ', '') for w in cand_words] - gold_words = [w.replace(' ', '') for w in gold_words] + cand_words = [w.replace(' ', '').lower() for w in cand_words] + gold_words = [w.replace(' ', '').lower() for w in gold_words] cost, i2j, j2i, matrix = _align.align(cand_words, gold_words) i2j_multi, j2i_multi = _align.multi_align(i2j, j2i, [len(w) for w in cand_words], [len(w) for w in gold_words])