mirror of https://github.com/explosion/spaCy.git
35 lines
805 B
Python
35 lines
805 B
Python
|
from __future__ import unicode_literals
|
||
|
from ...attrs import *
|
||
|
from ...matcher import Matcher
|
||
|
from ...tokens import Doc
|
||
|
from ...en import English
|
||
|
|
||
|
def test_overlapping_matches():
|
||
|
vocab = English.Defaults.create_vocab()
|
||
|
doc = Doc(vocab, words=['n', '=', '1', ';', 'a', ':', '5', '%'])
|
||
|
|
||
|
matcher = Matcher(vocab)
|
||
|
matcher.add_entity(
|
||
|
"ab",
|
||
|
acceptor=None,
|
||
|
on_match=None
|
||
|
)
|
||
|
matcher.add_pattern(
|
||
|
'ab',
|
||
|
[
|
||
|
{IS_ALPHA: True},
|
||
|
{ORTH: ':'},
|
||
|
{LIKE_NUM: True},
|
||
|
{ORTH: '%'}
|
||
|
], label='a')
|
||
|
matcher.add_pattern(
|
||
|
'ab',
|
||
|
[
|
||
|
{IS_ALPHA: True},
|
||
|
{ORTH: '='},
|
||
|
{LIKE_NUM: True},
|
||
|
], label='b')
|
||
|
|
||
|
matches = matcher(doc)
|
||
|
assert len(matches) == 2
|