mirror of https://github.com/explosion/spaCy.git
add char_span to Span (#4793)
This commit is contained in:
parent
f9b541f9ef
commit
1707e77c5e
|
@ -32,6 +32,24 @@ def doc_not_parsed(en_tokenizer):
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"i_sent,i,j,text",
|
||||||
|
[
|
||||||
|
(0, 0, len("This is a"), "This is a"),
|
||||||
|
(1, 0, len("This is another"), "This is another"),
|
||||||
|
(2, len("And "), len("And ") + len("a third"), "a third"),
|
||||||
|
(0, 1, 2, None),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_char_span(doc, i_sent, i, j, text):
|
||||||
|
sents = list(doc.sents)
|
||||||
|
span = sents[i_sent].char_span(i, j)
|
||||||
|
if not text:
|
||||||
|
assert not span
|
||||||
|
else:
|
||||||
|
assert span.text == text
|
||||||
|
|
||||||
|
|
||||||
def test_spans_sent_spans(doc):
|
def test_spans_sent_spans(doc):
|
||||||
sents = list(doc.sents)
|
sents = list(doc.sents)
|
||||||
assert sents[0].start == 0
|
assert sents[0].start == 0
|
||||||
|
|
|
@ -584,6 +584,22 @@ cdef class Span:
|
||||||
else:
|
else:
|
||||||
return self.doc[root]
|
return self.doc[root]
|
||||||
|
|
||||||
|
def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None):
|
||||||
|
"""Create a `Span` object from the slice `span.text[start : end]`.
|
||||||
|
|
||||||
|
start (int): The index of the first character of the span.
|
||||||
|
end (int): The index of the first character after the span.
|
||||||
|
label (uint64 or string): A label to attach to the Span, e.g. for
|
||||||
|
named entities.
|
||||||
|
kb_id (uint64 or string): An ID from a KB to capture the meaning of a named entity.
|
||||||
|
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
|
||||||
|
the span.
|
||||||
|
RETURNS (Span): The newly constructed object.
|
||||||
|
"""
|
||||||
|
start_idx += self.start_char
|
||||||
|
end_idx += self.start_char
|
||||||
|
return self.doc.char_span(start_idx, end_idx)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def conjuncts(self):
|
def conjuncts(self):
|
||||||
"""Tokens that are conjoined to the span's root.
|
"""Tokens that are conjoined to the span's root.
|
||||||
|
|
Loading…
Reference in New Issue