From a32b033b8c30fa038ce8845333c1560059475f39 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 22 Jul 2019 14:18:24 +0200 Subject: [PATCH] Add regression test for #4002 Test that the PhraseMatcher can match on overwritten NORM attributes. --- spacy/tests/regression/test_issue4002.py | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 spacy/tests/regression/test_issue4002.py diff --git a/spacy/tests/regression/test_issue4002.py b/spacy/tests/regression/test_issue4002.py new file mode 100644 index 000000000..d9b509a30 --- /dev/null +++ b/spacy/tests/regression/test_issue4002.py @@ -0,0 +1,28 @@ +# coding: utf8 +from __future__ import unicode_literals + +import pytest +from spacy.matcher import PhraseMatcher +from spacy.tokens import Doc + + +@pytest.mark.xfail +def test_issue4002(en_vocab): + """Test that the PhraseMatcher can match on overwritten NORM attributes. + """ + matcher = PhraseMatcher(en_vocab, attr="NORM") + pattern1 = Doc(en_vocab, words=["c", "d"]) + assert [t.norm_ for t in pattern1] == ["c", "d"] + matcher.add("TEST", None, pattern1) + doc = Doc(en_vocab, words=["a", "b", "c", "d"]) + assert [t.norm_ for t in doc] == ["a", "b", "c", "d"] + matches = matcher(doc) + assert len(matches) == 1 + matcher = PhraseMatcher(en_vocab, attr="NORM") + pattern2 = Doc(en_vocab, words=["1", "2"]) + pattern2[0].norm_ = "c" + pattern2[1].norm_ = "d" + assert [t.norm_ for t in pattern2] == ["c", "d"] + matcher.add("TEST", None, pattern2) + matches = matcher(doc) + assert len(matches) == 1