mirror of https://github.com/explosion/spaCy.git
Fix Span.sents for edge case of Span being the only Span in the last sentence of a Doc. (#12484)
This commit is contained in:
parent
372a90885e
commit
d85df9d577
|
@ -716,3 +716,18 @@ def test_for_partial_ent_sents():
|
||||||
# equal to the sentences referenced in ent.sents.
|
# equal to the sentences referenced in ent.sents.
|
||||||
for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents):
|
for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents):
|
||||||
assert doc_sent == ent_sent
|
assert doc_sent == ent_sent
|
||||||
|
|
||||||
|
|
||||||
|
def test_for_no_ent_sents():
|
||||||
|
"""Span.sents() should set .sents correctly, even if Span in question is trailing and doesn't form a full
|
||||||
|
sentence.
|
||||||
|
"""
|
||||||
|
doc = Doc(
|
||||||
|
English().vocab,
|
||||||
|
words=["This", "is", "a", "test.", "ENTITY"],
|
||||||
|
sent_starts=[1, 0, 0, 0, 1],
|
||||||
|
)
|
||||||
|
doc.set_ents([Span(doc, 4, 5, "WORK")])
|
||||||
|
sents = list(doc.ents[0].sents)
|
||||||
|
assert len(sents) == 1
|
||||||
|
assert str(sents[0]) == str(doc.ents[0].sent) == "ENTITY"
|
||||||
|
|
|
@ -463,6 +463,10 @@ cdef class Span:
|
||||||
elif i == self.doc.length - 1:
|
elif i == self.doc.length - 1:
|
||||||
yield Span(self.doc, start, self.doc.length)
|
yield Span(self.doc, start, self.doc.length)
|
||||||
|
|
||||||
|
# Ensure that trailing parts of the Span instance are included in last element of .sents.
|
||||||
|
if start == self.doc.length - 1:
|
||||||
|
yield Span(self.doc, start, self.doc.length)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def ents(self):
|
def ents(self):
|
||||||
"""The named entities that fall completely within the span. Returns
|
"""The named entities that fall completely within the span. Returns
|
||||||
|
|
Loading…
Reference in New Issue