mirror of https://github.com/explosion/spaCy.git
Fix PhraseMatcher.remove for overlapping patterns (#4437)
This commit is contained in:
parent
f8f68bb062
commit
98a961a60e
|
@ -102,8 +102,10 @@ cdef class PhraseMatcher:
|
|||
cdef vector[MapStruct*] path_nodes
|
||||
cdef vector[key_t] path_keys
|
||||
cdef key_t key_to_remove
|
||||
for keyword in self._docs[key]:
|
||||
for keyword in sorted(self._docs[key], key=lambda x: len(x), reverse=True):
|
||||
current_node = self.c_map
|
||||
path_nodes.clear()
|
||||
path_keys.clear()
|
||||
for token in keyword:
|
||||
result = map_get(current_node, token)
|
||||
if result:
|
||||
|
|
|
@ -226,3 +226,13 @@ def test_phrase_matcher_callback(en_vocab):
|
|||
matcher.add("COMPANY", mock, pattern)
|
||||
matches = matcher(doc)
|
||||
mock.assert_called_once_with(matcher, doc, 0, matches)
|
||||
|
||||
|
||||
def test_phrase_matcher_remove_overlapping_patterns(en_vocab):
|
||||
matcher = PhraseMatcher(en_vocab)
|
||||
pattern1 = Doc(en_vocab, words=["this"])
|
||||
pattern2 = Doc(en_vocab, words=["this", "is"])
|
||||
pattern3 = Doc(en_vocab, words=["this", "is", "a"])
|
||||
pattern4 = Doc(en_vocab, words=["this", "is", "a", "word"])
|
||||
matcher.add("THIS", None, pattern1, pattern2, pattern3, pattern4)
|
||||
matcher.remove("THIS")
|
||||
|
|
Loading…
Reference in New Issue