spaCy/spacy/tests/regression/test_issue2671.py

29 lines
869 B
Python
Raw Normal View History

# coding: utf-8
from __future__ import unicode_literals
2018-09-27 14:41:57 +00:00
from spacy.lang.en import English
from spacy.matcher import Matcher
def test_issue2671():
2018-09-27 14:41:57 +00:00
"""Ensure the correct entity ID is returned for matches with quantifiers.
See also #2675
2018-09-27 14:41:57 +00:00
"""
nlp = English()
matcher = Matcher(nlp.vocab)
pattern_id = "test_pattern"
pattern = [
{"LOWER": "high"},
{"IS_PUNCT": True, "OP": "?"},
{"LOWER": "adrenaline"},
]
2018-09-27 14:41:57 +00:00
matcher.add(pattern_id, None, pattern)
doc1 = nlp("This is a high-adrenaline situation.")
doc2 = nlp("This is a high adrenaline situation.")
2018-11-30 16:43:08 +00:00
matches1 = matcher(doc1)
for match_id, start, end in matches1:
assert nlp.vocab.strings[match_id] == pattern_id
matches2 = matcher(doc2)
for match_id, start, end in matches2:
assert nlp.vocab.strings[match_id] == pattern_id