mirror of https://github.com/explosion/spaCy.git
67 lines
1.6 KiB
Python
67 lines
1.6 KiB
Python
|
# coding: utf-8
|
||
|
from __future__ import unicode_literals
|
||
|
|
||
|
import pytest
|
||
|
from spacy.matcher import Matcher
|
||
|
from spacy.tokens import Doc
|
||
|
|
||
|
|
||
|
PATTERNS = [
|
||
|
("1", [[{"LEMMA": "have"}, {"LOWER": "to"}, {"LOWER": "do"}, {"POS": "ADP"}]]),
|
||
|
(
|
||
|
"2",
|
||
|
[
|
||
|
[
|
||
|
{"LEMMA": "have"},
|
||
|
{"IS_ASCII": True, "IS_PUNCT": False, "OP": "*"},
|
||
|
{"LOWER": "to"},
|
||
|
{"LOWER": "do"},
|
||
|
{"POS": "ADP"},
|
||
|
]
|
||
|
],
|
||
|
),
|
||
|
(
|
||
|
"3",
|
||
|
[
|
||
|
[
|
||
|
{"LEMMA": "have"},
|
||
|
{"IS_ASCII": True, "IS_PUNCT": False, "OP": "?"},
|
||
|
{"LOWER": "to"},
|
||
|
{"LOWER": "do"},
|
||
|
{"POS": "ADP"},
|
||
|
]
|
||
|
],
|
||
|
),
|
||
|
]
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def doc(en_tokenizer):
|
||
|
doc = en_tokenizer("also has to do with")
|
||
|
doc[0].tag_ = "RB"
|
||
|
doc[1].tag_ = "VBZ"
|
||
|
doc[2].tag_ = "TO"
|
||
|
doc[3].tag_ = "VB"
|
||
|
doc[4].tag_ = "IN"
|
||
|
return doc
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def matcher(en_tokenizer):
|
||
|
return Matcher(en_tokenizer.vocab)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("pattern", PATTERNS)
|
||
|
def test_issue3009(doc, matcher, pattern):
|
||
|
"""Test problem with matcher quantifiers"""
|
||
|
matcher.add(pattern[0], None, *pattern[1])
|
||
|
matches = matcher(doc)
|
||
|
assert matches
|
||
|
|
||
|
def test_issue2464(matcher):
|
||
|
"""Test problem with successive ?. This is the same bug, so putting it here."""
|
||
|
doc = Doc(matcher.vocab, words=['a', 'b'])
|
||
|
matcher.add('4', None, [{'OP': '?'}, {'OP': '?'}])
|
||
|
matches = matcher(doc)
|
||
|
assert len(matches) == 3
|