mirror of https://github.com/explosion/spaCy.git
* Switch to using a heap-allocated vector in tokens
This commit is contained in:
parent
f77b7098c0
commit
08cef75ffd
|
@ -219,7 +219,7 @@ cdef class Language:
|
|||
lexemes = <LexemeC**>calloc(len(tokens) - first_token, sizeof(LexemeC*))
|
||||
cdef size_t j
|
||||
for i, j in enumerate(range(first_token, tokens.v.size())):
|
||||
lexemes[i] = tokens.v[j]
|
||||
lexemes[i] = tokens.v[0][j]
|
||||
self.cache.set(key, lexemes)
|
||||
|
||||
cdef int _split_one(self, Py_UNICODE* characters, size_t length):
|
||||
|
|
|
@ -3,7 +3,7 @@ from libcpp.vector cimport vector
|
|||
|
||||
|
||||
cdef class Tokens:
|
||||
cdef vector[LexemeC*] v
|
||||
cdef vector[LexemeC*] *v
|
||||
|
||||
cpdef size_t id(self, size_t i) except 0
|
||||
cpdef unicode string(self, size_t i)
|
||||
|
|
|
@ -65,6 +65,7 @@ cdef class Tokens:
|
|||
"""
|
||||
def __cinit__(self, string_length=0):
|
||||
size = int(string_length / 3) if string_length >= 3 else 1
|
||||
self.v = new vector[LexemeC*]()
|
||||
self.v.reserve(size)
|
||||
|
||||
def __getitem__(self, i):
|
||||
|
@ -73,11 +74,14 @@ cdef class Tokens:
|
|||
def __len__(self):
|
||||
return self.v.size()
|
||||
|
||||
def __dealloc__(self):
|
||||
del self.v
|
||||
|
||||
def append(self, Lexeme lexeme):
|
||||
self.v.push_back(lexeme._c)
|
||||
|
||||
cpdef unicode string(self, size_t i):
|
||||
cdef bytes utf8_string = self.v[i].string[:self.v[i].length]
|
||||
cdef bytes utf8_string = self.v.at(i).string[:self.v.at(i).length]
|
||||
cdef unicode string = utf8_string.decode('utf8')
|
||||
return string
|
||||
|
||||
|
@ -91,7 +95,7 @@ cdef class Tokens:
|
|||
return self.v.at(i).cluster
|
||||
|
||||
cpdef bint check_flag(self, size_t i, size_t flag_id) except *:
|
||||
return lexeme_check_flag(self.v[i], flag_id)
|
||||
return lexeme_check_flag(self.v.at(i), flag_id)
|
||||
|
||||
cpdef unicode string_view(self, size_t i, size_t view_id):
|
||||
return lexeme_string_view(self.v.at(i), view_id)
|
||||
|
|
Loading…
Reference in New Issue