From 5ace559201c714ab89b3092b87d791e16973f31d Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Thu, 21 Jan 2021 16:18:46 +0100 Subject: [PATCH] ensure span.text works for an empty span (#6772) --- spacy/tests/regression/test_issue6755.py | 9 +++++++++ spacy/tokens/span.pyx | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 spacy/tests/regression/test_issue6755.py diff --git a/spacy/tests/regression/test_issue6755.py b/spacy/tests/regression/test_issue6755.py new file mode 100644 index 000000000..4c735b1ff --- /dev/null +++ b/spacy/tests/regression/test_issue6755.py @@ -0,0 +1,9 @@ +# coding: utf8 +from __future__ import unicode_literals + + +def test_issue6755(en_tokenizer): + doc = en_tokenizer("This is a magnificent sentence.") + span = doc[:0] + assert span.text_with_ws == "" + assert span.text == "" diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index cf0775bae..2ac8af9e4 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -500,7 +500,7 @@ cdef class Span: def text(self): """RETURNS (unicode): The original verbatim text of the span.""" text = self.text_with_ws - if self[-1].whitespace_: + if len(self) > 0 and self[-1].whitespace_: text = text[:-1] return text