mirror of https://github.com/explosion/spaCy.git
Don't automatically write new entities into the Doc in the Matcher. This fixes a long-standing wart, but introduces a *backwards incompatibility.*
This commit is contained in:
parent
e48df859b5
commit
55f1f7edaf
|
@ -294,17 +294,6 @@ cdef class Matcher:
|
||||||
label = pattern[1].attrs[1].value
|
label = pattern[1].attrs[1].value
|
||||||
if acceptor is None or acceptor(doc, ent_id, label, start, end):
|
if acceptor is None or acceptor(doc, ent_id, label, start, end):
|
||||||
matches.append((ent_id, label, start, end))
|
matches.append((ent_id, label, start, end))
|
||||||
seen = set()
|
|
||||||
filtered = []
|
|
||||||
for ent_id, label, start, end in sorted(matches,
|
|
||||||
key=lambda m: (m[2],-(m[2]-m[3]))):
|
|
||||||
if all(i in seen for i in range(start, end)):
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
for i in range(start, end):
|
|
||||||
seen.add(i)
|
|
||||||
filtered.append((label, start, end))
|
|
||||||
doc.ents = [(e.label, e.start, e.end) for e in doc.ents] + filtered
|
|
||||||
return matches
|
return matches
|
||||||
|
|
||||||
def pipe(self, docs, batch_size=1000, n_threads=2):
|
def pipe(self, docs, batch_size=1000, n_threads=2):
|
||||||
|
|
Loading…
Reference in New Issue