spaCy/spacy/tests/regression/test_issue590.py

# coding: utf-8
from __future__ import unicode_literals

from ...attrs import *
from ...matcher import Matcher
from ...tokens import Doc
from ...en import English


def test_overlapping_matches():
    vocab = English.Defaults.create_vocab()
    doc = Doc(vocab, words=['n', '=', '1', ';', 'a', ':', '5', '%'])

    matcher = Matcher(vocab)
    matcher.add_entity(
        "ab",
        acceptor=None,
        on_match=None
    )
    matcher.add_pattern(
        'ab',
        [
            {IS_ALPHA: True},
            {ORTH: ':'},
            {LIKE_NUM: True},
            {ORTH: '%'}
        ], label='a')
    matcher.add_pattern(
        'ab',
        [
            {IS_ALPHA: True},
            {ORTH: '='},
            {LIKE_NUM: True},
        ], label='b')

    matches = matcher(doc)
    assert len(matches) == 2