diff --git a/spacy/tests/regression/test_issue6755.py b/spacy/tests/regression/test_issue6755.py new file mode 100644 index 000000000..4c735b1ff --- /dev/null +++ b/spacy/tests/regression/test_issue6755.py @@ -0,0 +1,9 @@ +# coding: utf8 +from __future__ import unicode_literals + + +def test_issue6755(en_tokenizer): + doc = en_tokenizer("This is a magnificent sentence.") + span = doc[:0] + assert span.text_with_ws == "" + assert span.text == "" diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index cf0775bae..2ac8af9e4 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -500,7 +500,7 @@ cdef class Span: def text(self): """RETURNS (unicode): The original verbatim text of the span.""" text = self.text_with_ws - if self[-1].whitespace_: + if len(self) > 0 and self[-1].whitespace_: text = text[:-1] return text