spaCy/spacy/tests/regression/test_issue850.py

38 lines
1.3 KiB
Python
Raw Normal View History

2017-05-22 11:54:20 +00:00
# coding: utf-8
2017-06-05 00:26:13 +00:00
from __future__ import unicode_literals
import pytest
2018-02-15 14:39:47 +00:00
from ...matcher import Matcher
from ...vocab import Vocab
from ...attrs import LOWER
from ...tokens import Doc
def test_basic_case():
2017-05-22 11:54:20 +00:00
"""Test Matcher matches with '*' operator and Boolean flag"""
matcher = Matcher(Vocab(
lex_attr_getters={LOWER: lambda string: string.lower()}))
IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
2017-05-22 11:54:20 +00:00
matcher.add('FarAway', None, [{'LOWER': "bob"}, {'OP': '*', 'LOWER': 'and'}, {'LOWER': 'frank'}])
doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
match = matcher(doc)
assert len(match) == 1
2017-05-22 11:54:20 +00:00
ent_id, start, end = match[0]
assert start == 0
assert end == 4
2017-03-07 16:16:26 +00:00
def test_issue850():
"""The variable-length pattern matches the
succeeding token. Check we handle the ambiguity correctly."""
matcher = Matcher(Vocab(
lex_attr_getters={LOWER: lambda string: string.lower()}))
IS_ANY_TOKEN = matcher.vocab.add_flag(lambda x: True)
2017-05-22 11:54:20 +00:00
matcher.add('FarAway', None, [{'LOWER': "bob"}, {'OP': '*', 'IS_ANY_TOKEN': True}, {'LOWER': 'frank'}])
doc = Doc(matcher.vocab, words=['bob', 'and', 'and', 'frank'])
match = matcher(doc)
assert len(match) == 1
2017-05-22 11:54:20 +00:00
ent_id, start, end = match[0]
assert start == 0
assert end == 4