From 414a69b736546b3f5adfb1a6f8dccf5b91160694 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 8 Feb 2019 20:50:01 +0100 Subject: [PATCH] Add xfailing test (see #1971, #2675, #2671) --- spacy/tests/regression/test_issue1971.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 spacy/tests/regression/test_issue1971.py diff --git a/spacy/tests/regression/test_issue1971.py b/spacy/tests/regression/test_issue1971.py new file mode 100644 index 000000000..93bfc7410 --- /dev/null +++ b/spacy/tests/regression/test_issue1971.py @@ -0,0 +1,23 @@ +# coding: utf8 +from __future__ import unicode_literals + +from spacy.matcher import Matcher +from spacy.tokens import Token, Doc + + +def test_issue1971(en_vocab): + # Possibly related to #2675 and #2671? + matcher = Matcher(en_vocab) + pattern = [ + {"ORTH": "Doe"}, + {"ORTH": "!", "OP": "?"}, + {"_": {"optional": True}, "OP": "?"}, + {"ORTH": "!", "OP": "?"}, + ] + Token.set_extension("optional", default=False) + matcher.add("TEST", None, pattern) + doc = Doc(en_vocab, words=["Hello", "John", "Doe", "!"]) + # We could also assert length 1 here, but this is more conclusive, because + # the real problem here is that it returns a duplicate match for a match_id + # that's not actually in the vocab! + assert all(match_id in en_vocab.strings for match_id, start, end in matcher(doc))