diff --git a/spacy/tests/tokens/test_tokens_api.py b/spacy/tests/tokens/test_tokens_api.py index 29c559a07..04281bd14 100644 --- a/spacy/tests/tokens/test_tokens_api.py +++ b/spacy/tests/tokens/test_tokens_api.py @@ -164,6 +164,12 @@ def test_merge_hang(): doc.merge(8, 32, '', '', 'ORG') +def test_sents_empty_string(EN): + doc = EN(u'') + sents = list(doc.sents) + assert len(sents) == 0 + + @pytest.mark.models def test_runtime_error(EN): # Example that caused run-time error while parsing Reddit @@ -199,4 +205,3 @@ def test_right_edge(EN): def test_has_vector(EN): doc = EN(u'''apple orange pear''') assert doc.has_vector - diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index e1d90e921..418c3f8f0 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -299,7 +299,8 @@ cdef class Doc: if self.c[i].sent_start: yield Span(self, start, i) start = i - yield Span(self, start, self.length) + if start != self.length: + yield Span(self, start, self.length) cdef int push_back(self, LexemeOrToken lex_or_tok, bint has_space) except -1: if self.length == self.max_length: