Fix remove pattern from matcher (#4454)

* raise specific error when removing a matcher rule that doesn't exist

* rephrasing

* bugfix in remove matcher + extended unit test
This commit is contained in:
Sofie Van Landeghem 2019-10-16 13:34:58 +02:00 committed by Matthew Honnibal
parent 2d249a9502
commit 7d1efac4eb
2 changed files with 16 additions and 3 deletions

View File

@ -141,7 +141,7 @@ cdef class Matcher:
cdef int i = 0 cdef int i = 0
while i < self.patterns.size(): while i < self.patterns.size():
pattern_key = get_ent_id(self.patterns.at(i)) pattern_key = get_ent_id(self.patterns.at(i))
if pattern_key == key: if pattern_key == norm_key:
self.patterns.erase(self.patterns.begin()+i) self.patterns.erase(self.patterns.begin()+i)
else: else:
i += 1 i += 1

View File

@ -3,6 +3,8 @@ from __future__ import unicode_literals
import pytest import pytest
import re import re
from spacy.lang.en import English
from spacy.matcher import Matcher from spacy.matcher import Matcher
from spacy.tokens import Doc, Span from spacy.tokens import Doc, Span
@ -145,16 +147,27 @@ def test_matcher_sets_return_correct_tokens(en_vocab):
assert texts == ["zero", "one", "two"] assert texts == ["zero", "one", "two"]
def test_matcher_remove(en_vocab): def test_matcher_remove():
matcher = Matcher(en_vocab) nlp = English()
matcher = Matcher(nlp.vocab)
text = "This is a test case."
pattern = [{"ORTH": "test"}, {"OP": "?"}] pattern = [{"ORTH": "test"}, {"OP": "?"}]
assert len(matcher) == 0 assert len(matcher) == 0
matcher.add("Rule", None, pattern) matcher.add("Rule", None, pattern)
assert "Rule" in matcher assert "Rule" in matcher
# should give two matches
results1 = matcher(nlp(text))
assert(len(results1) == 2)
# removing once should work # removing once should work
matcher.remove("Rule") matcher.remove("Rule")
# should not return any maches anymore
results2 = matcher(nlp(text))
assert (len(results2) == 0)
# removing again should throw an error # removing again should throw an error
with pytest.raises(ValueError): with pytest.raises(ValueError):
matcher.remove("Rule") matcher.remove("Rule")