2017-01-10 18:24:10 +00:00
|
|
|
# coding: utf-8
|
2016-11-06 10:21:36 +00:00
|
|
|
from __future__ import unicode_literals
|
2017-01-10 18:24:10 +00:00
|
|
|
|
2016-11-06 10:21:36 +00:00
|
|
|
from ...attrs import *
|
|
|
|
from ...matcher import Matcher
|
|
|
|
from ...tokens import Doc
|
|
|
|
from ...en import English
|
|
|
|
|
2017-01-10 18:24:10 +00:00
|
|
|
|
2016-11-06 10:21:36 +00:00
|
|
|
def test_overlapping_matches():
|
|
|
|
vocab = English.Defaults.create_vocab()
|
|
|
|
doc = Doc(vocab, words=['n', '=', '1', ';', 'a', ':', '5', '%'])
|
|
|
|
|
|
|
|
matcher = Matcher(vocab)
|
|
|
|
matcher.add_entity(
|
|
|
|
"ab",
|
|
|
|
acceptor=None,
|
|
|
|
on_match=None
|
|
|
|
)
|
|
|
|
matcher.add_pattern(
|
|
|
|
'ab',
|
|
|
|
[
|
|
|
|
{IS_ALPHA: True},
|
|
|
|
{ORTH: ':'},
|
|
|
|
{LIKE_NUM: True},
|
|
|
|
{ORTH: '%'}
|
|
|
|
], label='a')
|
|
|
|
matcher.add_pattern(
|
|
|
|
'ab',
|
|
|
|
[
|
|
|
|
{IS_ALPHA: True},
|
|
|
|
{ORTH: '='},
|
|
|
|
{LIKE_NUM: True},
|
|
|
|
], label='b')
|
2017-01-10 18:24:10 +00:00
|
|
|
|
2016-11-06 10:21:36 +00:00
|
|
|
matches = matcher(doc)
|
|
|
|
assert len(matches) == 2
|