From 414a69b736546b3f5adfb1a6f8dccf5b91160694 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Fri, 8 Feb 2019 20:50:01 +0100
Subject: [PATCH] Add xfailing test (see #1971, #2675, #2671)

---
 spacy/tests/regression/test_issue1971.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 spacy/tests/regression/test_issue1971.py

diff --git a/spacy/tests/regression/test_issue1971.py b/spacy/tests/regression/test_issue1971.py
new file mode 100644
index 000000000..93bfc7410
--- /dev/null
+++ b/spacy/tests/regression/test_issue1971.py
@@ -0,0 +1,23 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+from spacy.matcher import Matcher
+from spacy.tokens import Token, Doc
+
+
+def test_issue1971(en_vocab):
+    # Possibly related to #2675 and #2671?
+    matcher = Matcher(en_vocab)
+    pattern = [
+        {"ORTH": "Doe"},
+        {"ORTH": "!", "OP": "?"},
+        {"_": {"optional": True}, "OP": "?"},
+        {"ORTH": "!", "OP": "?"},
+    ]
+    Token.set_extension("optional", default=False)
+    matcher.add("TEST", None, pattern)
+    doc = Doc(en_vocab, words=["Hello", "John", "Doe", "!"])
+    # We could also assert length 1 here, but this is more conclusive, because
+    # the real problem here is that it returns a duplicate match for a match_id
+    # that's not actually in the vocab!
+    assert all(match_id in en_vocab.strings for match_id, start, end in matcher(doc))