2017-01-10 18:24:10 +00:00
|
|
|
# coding: utf-8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2016-10-28 14:38:32 +00:00
|
|
|
import spacy
|
|
|
|
import spacy.matcher
|
2016-10-28 15:41:16 +00:00
|
|
|
from spacy.attrs import IS_PUNCT, ORTH
|
2016-10-28 14:38:32 +00:00
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
2017-01-10 18:24:10 +00:00
|
|
|
|
2016-10-28 14:38:32 +00:00
|
|
|
@pytest.mark.models
|
|
|
|
def test_matcher_segfault():
|
|
|
|
nlp = spacy.load('en', parser=False, entity=False)
|
|
|
|
matcher = spacy.matcher.Matcher(nlp.vocab)
|
2017-01-10 18:24:10 +00:00
|
|
|
content = '''a b; c'''
|
2016-10-28 15:41:16 +00:00
|
|
|
matcher.add(entity_key='1', label='TEST', attrs={}, specs=[[{ORTH: 'a'}, {ORTH: 'b'}]])
|
2016-10-28 14:38:32 +00:00
|
|
|
matcher(nlp(content))
|
2016-10-28 15:41:16 +00:00
|
|
|
matcher.add(entity_key='2', label='TEST', attrs={}, specs=[[{ORTH: 'a'}, {ORTH: 'b'}, {IS_PUNCT: True}, {ORTH: 'c'}]])
|
2016-10-28 14:38:32 +00:00
|
|
|
matcher(nlp(content))
|
2016-10-28 15:41:16 +00:00
|
|
|
matcher.add(entity_key='3', label='TEST', attrs={}, specs=[[{ORTH: 'a'}, {ORTH: 'b'}, {IS_PUNCT: True}, {ORTH: 'd'}]])
|
2016-10-28 14:38:32 +00:00
|
|
|
matcher(nlp(content))
|