mirror of https://github.com/explosion/spaCy.git
parent
86c056ba64
commit
9488222e79
|
@ -492,7 +492,7 @@ cdef class PhraseMatcher:
|
|||
abstract_patterns = []
|
||||
for length in range(1, max_length):
|
||||
abstract_patterns.append([{tag: True} for tag in get_bilou(length)])
|
||||
self.matcher.add('Candidate', 'MWE', {}, abstract_patterns)
|
||||
self.matcher.add('Candidate', 'MWE', {}, abstract_patterns, acceptor=self.accept_match)
|
||||
|
||||
def add(self, Doc tokens):
|
||||
cdef int length = tokens.length
|
||||
|
@ -512,7 +512,7 @@ cdef class PhraseMatcher:
|
|||
|
||||
def __call__(self, Doc doc):
|
||||
matches = []
|
||||
for label, start, end in self.matcher(doc, acceptor=self.accept_match):
|
||||
for ent_id, label, start, end in self.matcher(doc):
|
||||
cand = doc[start : end]
|
||||
start = cand[0].idx
|
||||
end = cand[-1].idx + len(cand[-1])
|
||||
|
@ -526,7 +526,7 @@ cdef class PhraseMatcher:
|
|||
self(doc)
|
||||
yield doc
|
||||
|
||||
def accept_match(self, Doc doc, int label, int start, int end):
|
||||
def accept_match(self, Doc doc, int ent_id, int label, int start, int end):
|
||||
assert (end - start) < self.max_length
|
||||
cdef int i, j
|
||||
for i in range(self.max_length):
|
||||
|
@ -535,6 +535,6 @@ cdef class PhraseMatcher:
|
|||
self._phrase_key[i] = doc.c[j].lex.orth
|
||||
cdef hash_t key = hash64(self._phrase_key, self.max_length * sizeof(attr_t), 0)
|
||||
if self.phrase_ids.get(key):
|
||||
return True
|
||||
return (ent_id, label, start, end)
|
||||
else:
|
||||
return False
|
||||
|
|
Loading…
Reference in New Issue