From 97685aecb735289de32c992e3659e503412aeeb5 Mon Sep 17 00:00:00 2001 From: "Yubing (Tom) Dong" Date: Tue, 6 Oct 2015 02:45:49 -0700 Subject: [PATCH] Add slicing support to Span --- spacy/tokens/spans.pyx | 21 ++++++++++++++++++++- tests/tokens/test_tokens_api.py | 18 ++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/spacy/tokens/spans.pyx b/spacy/tokens/spans.pyx index 99efad4b9..955d24ad4 100644 --- a/spacy/tokens/spans.pyx +++ b/spacy/tokens/spans.pyx @@ -50,7 +50,26 @@ cdef class Span: return 0 return self.end - self.start - def __getitem__(self, int i): + def __getitem__(self, object i): + if isinstance(i, slice): + start, end, step = i.start, i.stop, i.step + if start is None: + start = 0 + elif start < 0: + start += len(self) + start = min(len(self), max(0, start)) + + if end is None: + end = len(self) + elif end < 0: + end += len(self) + end = min(len(self), max(start, end)) + + start += self.start + end += self.start + + return self.doc[start:end:i.step] + if i < 0: return self.doc[self.end + i] else: diff --git a/tests/tokens/test_tokens_api.py b/tests/tokens/test_tokens_api.py index 34e54a2af..675f00235 100644 --- a/tests/tokens/test_tokens_api.py +++ b/tests/tokens/test_tokens_api.py @@ -59,6 +59,24 @@ def test_getitem(EN): span = tokens[1:4] assert span[0].orth_ == 'it' + subspan = span[:] + assert to_str(subspan) == 'it/back/!' + subspan = span[:2] + assert to_str(subspan) == 'it/back' + subspan = span[1:] + assert to_str(subspan) == 'back/!' + subspan = span[:-1] + assert to_str(subspan) == 'it/back' + subspan = span[-2:] + assert to_str(subspan) == 'back/!' + subspan = span[1:2] + assert to_str(subspan) == 'back' + subspan = span[-2:-1] + assert to_str(subspan) == 'back' + subspan = span[-50:50] + assert to_str(subspan) == 'it/back/!' + subspan = span[50:-50] + assert subspan.start == subspan.end == 4 and not to_str(subspan) @pytest.mark.models