mirror of https://github.com/explosion/spaCy.git
Modernise matcher tests and split into two files
This commit is contained in:
parent
27482ebed8
commit
b438cfddbc
|
@ -1,59 +1,53 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
import spacy
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.matcher import Matcher
|
||||
from spacy.tokens.doc import Doc
|
||||
from spacy.attrs import *
|
||||
|
||||
from ...matcher import Matcher
|
||||
from ...attrs import ORTH
|
||||
from ..util import get_doc
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def en_vocab():
|
||||
return spacy.get_lang_class('en').Defaults.create_vocab()
|
||||
|
||||
|
||||
def test_init_matcher(en_vocab):
|
||||
@pytest.mark.parametrize('words,entity', [
|
||||
(["Test", "Entity"], "TestEntity")])
|
||||
def test_matcher_add_empty_entity(en_vocab, words, entity):
|
||||
matcher = Matcher(en_vocab)
|
||||
matcher.add_entity(entity)
|
||||
doc = get_doc(en_vocab, words)
|
||||
assert matcher.n_patterns == 0
|
||||
assert matcher(Doc(en_vocab, words=[u'Some', u'words'])) == []
|
||||
assert matcher(doc) == []
|
||||
|
||||
|
||||
def test_add_empty_entity(en_vocab):
|
||||
@pytest.mark.parametrize('entity1,entity2,attrs', [
|
||||
("TestEntity", "TestEntity2", {"Hello": "World"})])
|
||||
def test_matcher_get_entity_attrs(en_vocab, entity1, entity2, attrs):
|
||||
matcher = Matcher(en_vocab)
|
||||
matcher.add_entity('TestEntity')
|
||||
matcher.add_entity(entity1)
|
||||
assert matcher.get_entity(entity1) == {}
|
||||
matcher.add_entity(entity2, attrs=attrs)
|
||||
assert matcher.get_entity(entity2) == attrs
|
||||
assert matcher.get_entity(entity1) == {}
|
||||
|
||||
|
||||
@pytest.mark.parametrize('words,entity,attrs',
|
||||
[(["Test", "Entity"], "TestEntity", {"Hello": "World"})])
|
||||
def test_matcher_get_entity_via_match(en_vocab, words, entity, attrs):
|
||||
matcher = Matcher(en_vocab)
|
||||
matcher.add_entity(entity, attrs=attrs)
|
||||
doc = get_doc(en_vocab, words)
|
||||
assert matcher.n_patterns == 0
|
||||
assert matcher(Doc(en_vocab, words=[u'Test', u'Entity'])) == []
|
||||
assert matcher(doc) == []
|
||||
|
||||
|
||||
def test_get_entity_attrs(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
matcher.add_entity('TestEntity')
|
||||
entity = matcher.get_entity('TestEntity')
|
||||
assert entity == {}
|
||||
matcher.add_entity('TestEntity2', attrs={'Hello': 'World'})
|
||||
entity = matcher.get_entity('TestEntity2')
|
||||
assert entity == {'Hello': 'World'}
|
||||
assert matcher.get_entity('TestEntity') == {}
|
||||
|
||||
|
||||
def test_get_entity_via_match(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
matcher.add_entity('TestEntity', attrs={u'Hello': u'World'})
|
||||
assert matcher.n_patterns == 0
|
||||
assert matcher(Doc(en_vocab, words=[u'Test', u'Entity'])) == []
|
||||
matcher.add_pattern(u'TestEntity', [{ORTH: u'Test'}, {ORTH: u'Entity'}])
|
||||
matcher.add_pattern(entity, [{ORTH: words[0]}, {ORTH: words[1]}])
|
||||
assert matcher.n_patterns == 1
|
||||
matches = matcher(Doc(en_vocab, words=[u'Test', u'Entity']))
|
||||
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 1
|
||||
assert len(matches[0]) == 4
|
||||
|
||||
ent_id, label, start, end = matches[0]
|
||||
assert ent_id == matcher.vocab.strings[u'TestEntity']
|
||||
assert ent_id == matcher.vocab.strings[entity]
|
||||
assert label == 0
|
||||
assert start == 0
|
||||
assert end == 2
|
||||
attrs = matcher.get_entity(ent_id)
|
||||
assert attrs == {u'Hello': u'World'}
|
||||
|
||||
|
||||
|
||||
assert matcher.get_entity(ent_id) == attrs
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from ...matcher import Matcher
|
||||
from ..util import get_doc
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.parametrize('words', [["Some", "words"]])
|
||||
def test_matcher_init(en_vocab, words):
|
||||
matcher = Matcher(en_vocab)
|
||||
doc = get_doc(en_vocab, words)
|
||||
assert matcher.n_patterns == 0
|
||||
assert matcher(doc) == []
|
Loading…
Reference in New Issue