diff --git a/spacy/tests/regression/test_issue3839.py b/spacy/tests/regression/test_issue3839.py new file mode 100644 index 000000000..fa915faf0 --- /dev/null +++ b/spacy/tests/regression/test_issue3839.py @@ -0,0 +1,23 @@ +# coding: utf8 +from __future__ import unicode_literals + +import pytest +from spacy.matcher import Matcher +from spacy.tokens import Doc + + +@pytest.mark.xfail +def test_issue3839(en_vocab): + """Test that match IDs returned by the matcher are correct, are in the string """ + doc = Doc(en_vocab, words=["terrific", "group", "of", "people"]) + matcher = Matcher(en_vocab) + match_id = "PATTERN" + pattern1 = [{"LOWER": "terrific"}, {"OP": "?"}, {"LOWER": "group"}] + pattern2 = [{"LOWER": "terrific"}, {"OP": "?"}, {"OP": "?"}, {"LOWER": "group"}] + matcher.add(match_id, None, pattern1) + matches = matcher(doc) + assert matches[0][0] == en_vocab.strings[match_id] + matcher = Matcher(en_vocab) + matcher.add(match_id, None, pattern2) + matches = matcher(doc) + assert matches[0][0] == en_vocab.strings[match_id]