From 26095f9722e54763484f30764bffdcfb34b3abaf Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 6 May 2016 00:17:38 +0200 Subject: [PATCH] * Add span.sent property, re Issue #366 --- spacy/tests/spans/test_span.py | 13 +++++++++++++ spacy/tokens/span.pyx | 14 ++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/spacy/tests/spans/test_span.py b/spacy/tests/spans/test_span.py index 3619eb88a..ad877df4c 100644 --- a/spacy/tests/spans/test_span.py +++ b/spacy/tests/spans/test_span.py @@ -37,3 +37,16 @@ def test_root2(): doc.from_array([HEAD], heads.T) south_carolina = doc[-2:] assert south_carolina.root.text == 'Carolina' + + +def test_sent(doc): + '''Test new span.sent property''' + #return EN('This is a sentence. This is another sentence. And a third.') + heads = np.asarray([[1, 0, -1, -1, -1, 1, 0, -1, -1, -1, 2, 1, 0, -1]], dtype='int32') + doc.from_array([HEAD], heads.T) + assert len(list(doc.sents)) + span = doc[:2] + assert span.sent.root.text == 'is' + assert span.sent.text == 'This is a sentence.' + span = doc[6:7] + assert span.sent.root.left_edge.text == 'This' diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 801c98523..f8329578b 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -98,6 +98,20 @@ cdef class Span: self.start = start self.end = end + 1 + + property sent: + '''Get the sentence span that this span is a part of.''' + def __get__(self): + # This should raise if we're not parsed. + doc.sents + cdef int n = 0 + root = &self.doc.c[self.start] + while root.head != 0: + root += root.head + n += 1 + if n >= self.doc.length: + raise RuntimeError + return self.doc[root.l_edge : root.r_edge + 1] property vector: def __get__(self):