Fix index boundaries in Span

This commit is contained in:
Adriane Boyd 2020-07-31 14:18:55 +02:00
parent 4f0843e0ec
commit 8f8a5c3386
2 changed files with 11 additions and 2 deletions

View File

@ -606,6 +606,7 @@ class Errors:
'nlp = Chinese(config=cfg)') 'nlp = Chinese(config=cfg)')
E1001 = ("Target token outside of matched span for match with tokens " E1001 = ("Target token outside of matched span for match with tokens "
"'{span}' and offset '{index}' matched by patterns '{patterns}'.") "'{span}' and offset '{index}' matched by patterns '{patterns}'.")
E1002 = ("Span index out of range.")
@add_codes @add_codes

View File

@ -177,9 +177,17 @@ cdef class Span:
return Span(self.doc, start + self.start, end + self.start) return Span(self.doc, start + self.start, end + self.start)
else: else:
if i < 0: if i < 0:
return self.doc[self.end + i] token = self.doc[self.end + i]
if self.start <= token.i < self.end:
return token
else: else:
return self.doc[self.start + i] raise IndexError(Errors.E1002)
else:
token = self.doc[self.start + i]
if self.start <= token.i < self.end:
return token
else:
raise IndexError(Errors.E1002)
def __iter__(self): def __iter__(self):
"""Iterate over `Token` objects. """Iterate over `Token` objects.