mirror of https://github.com/explosion/spaCy.git
* Fix negative indices in tokens
This commit is contained in:
parent
b65b0c07bf
commit
f149259bf5
|
@ -81,12 +81,29 @@ cdef class Tokens:
|
||||||
self.max_length = size
|
self.max_length = size
|
||||||
self.length = 0
|
self.length = 0
|
||||||
|
|
||||||
|
def sentences(self):
|
||||||
|
cdef int i
|
||||||
|
sentences = []
|
||||||
|
sent = Tokens(self.vocab)
|
||||||
|
cdef attr_t period = self.vocab.strings['.']
|
||||||
|
cdef attr_t question = self.vocab.strings['?']
|
||||||
|
cdef attr_t exclamation = self.vocab.strings['!']
|
||||||
|
for i in range(self.length):
|
||||||
|
idx = sent.push_back(idx, &self.data[i])
|
||||||
|
if self.data[i].lex.sic == period or self.data[i].lex.sic == exclamation or \
|
||||||
|
self.data[i].lex.sic == question:
|
||||||
|
sentences.append(sent)
|
||||||
|
sent = Tokens(self.vocab)
|
||||||
|
return sentences
|
||||||
|
|
||||||
def __getitem__(self, i):
|
def __getitem__(self, i):
|
||||||
"""Retrieve a token.
|
"""Retrieve a token.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
token (Token):
|
token (Token):
|
||||||
"""
|
"""
|
||||||
|
if i < 0:
|
||||||
|
i = self.length - i
|
||||||
bounds_check(i, self.length, PADDING)
|
bounds_check(i, self.length, PADDING)
|
||||||
return Token(self, i)
|
return Token(self, i)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue