Fix PhraseMatcher to work with updated Matcher

#613
This commit is contained in:
Dmitry Sadovnychyi 2016-11-09 00:14:26 +08:00
parent 86c056ba64
commit 9488222e79
1 changed files with 8 additions and 8 deletions

View File

@ -492,7 +492,7 @@ cdef class PhraseMatcher:
abstract_patterns = [] abstract_patterns = []
for length in range(1, max_length): for length in range(1, max_length):
abstract_patterns.append([{tag: True} for tag in get_bilou(length)]) abstract_patterns.append([{tag: True} for tag in get_bilou(length)])
self.matcher.add('Candidate', 'MWE', {}, abstract_patterns) self.matcher.add('Candidate', 'MWE', {}, abstract_patterns, acceptor=self.accept_match)
def add(self, Doc tokens): def add(self, Doc tokens):
cdef int length = tokens.length cdef int length = tokens.length
@ -512,7 +512,7 @@ cdef class PhraseMatcher:
def __call__(self, Doc doc): def __call__(self, Doc doc):
matches = [] matches = []
for label, start, end in self.matcher(doc, acceptor=self.accept_match): for ent_id, label, start, end in self.matcher(doc):
cand = doc[start : end] cand = doc[start : end]
start = cand[0].idx start = cand[0].idx
end = cand[-1].idx + len(cand[-1]) end = cand[-1].idx + len(cand[-1])
@ -526,7 +526,7 @@ cdef class PhraseMatcher:
self(doc) self(doc)
yield doc yield doc
def accept_match(self, Doc doc, int label, int start, int end): def accept_match(self, Doc doc, int ent_id, int label, int start, int end):
assert (end - start) < self.max_length assert (end - start) < self.max_length
cdef int i, j cdef int i, j
for i in range(self.max_length): for i in range(self.max_length):
@ -535,6 +535,6 @@ cdef class PhraseMatcher:
self._phrase_key[i] = doc.c[j].lex.orth self._phrase_key[i] = doc.c[j].lex.orth
cdef hash_t key = hash64(self._phrase_key, self.max_length * sizeof(attr_t), 0) cdef hash_t key = hash64(self._phrase_key, self.max_length * sizeof(attr_t), 0)
if self.phrase_ids.get(key): if self.phrase_ids.get(key):
return True return (ent_id, label, start, end)
else: else:
return False return False