diff --git a/spacy/tests/regression/test_issue3328.py b/spacy/tests/regression/test_issue3328.py new file mode 100644 index 000000000..fce25ca1c --- /dev/null +++ b/spacy/tests/regression/test_issue3328.py @@ -0,0 +1,21 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import pytest +from spacy.matcher import Matcher +from spacy.tokens import Doc + + +@pytest.mark.xfail +def test_issue3328(en_vocab): + doc = Doc(en_vocab, words=["Hello", ",", "how", "are", "you", "doing", "?"]) + matcher = Matcher(en_vocab) + patterns = [ + [{"LOWER": {"IN": ["hello", "how"]}}], + [{"LOWER": {"IN": ["you", "doing"]}}], + ] + matcher.add("TEST", None, *patterns) + matches = matcher(doc) + assert len(matches) == 4 + matched_texts = [doc[start:end].text for _, start, end in matches] + assert matched_texts == ["Hello", "how", "you", "doing"]