From a32b033b8c30fa038ce8845333c1560059475f39 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Mon, 22 Jul 2019 14:18:24 +0200
Subject: [PATCH] Add regression test for #4002

Test that the PhraseMatcher can match on overwritten NORM attributes.
---
 spacy/tests/regression/test_issue4002.py | 28 ++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 spacy/tests/regression/test_issue4002.py

diff --git a/spacy/tests/regression/test_issue4002.py b/spacy/tests/regression/test_issue4002.py
new file mode 100644
index 000000000..d9b509a30
--- /dev/null
+++ b/spacy/tests/regression/test_issue4002.py
@@ -0,0 +1,28 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+import pytest
+from spacy.matcher import PhraseMatcher
+from spacy.tokens import Doc
+
+
+@pytest.mark.xfail
+def test_issue4002(en_vocab):
+    """Test that the PhraseMatcher can match on overwritten NORM attributes.
+    """
+    matcher = PhraseMatcher(en_vocab, attr="NORM")
+    pattern1 = Doc(en_vocab, words=["c", "d"])
+    assert [t.norm_ for t in pattern1] == ["c", "d"]
+    matcher.add("TEST", None, pattern1)
+    doc = Doc(en_vocab, words=["a", "b", "c", "d"])
+    assert [t.norm_ for t in doc] == ["a", "b", "c", "d"]
+    matches = matcher(doc)
+    assert len(matches) == 1
+    matcher = PhraseMatcher(en_vocab, attr="NORM")
+    pattern2 = Doc(en_vocab, words=["1", "2"])
+    pattern2[0].norm_ = "c"
+    pattern2[1].norm_ = "d"
+    assert [t.norm_ for t in pattern2] == ["c", "d"]
+    matcher.add("TEST", None, pattern2)
+    matches = matcher(doc)
+    assert len(matches) == 1