mirror of https://github.com/explosion/spaCy.git
* Fix tokenizer
This commit is contained in:
parent
2ae0b439b2
commit
3b5baa660f
|
@ -110,16 +110,12 @@ cdef class Tokenizer:
|
||||||
if cached == NULL:
|
if cached == NULL:
|
||||||
return False
|
return False
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef int less_one = cached.length-1
|
|
||||||
if cached.is_lex:
|
if cached.is_lex:
|
||||||
for i in range(less_one):
|
for i in range(cached.length):
|
||||||
# There's a space at the end of the chunk.
|
|
||||||
tokens.push_back(cached.data.lexemes[i], False)
|
tokens.push_back(cached.data.lexemes[i], False)
|
||||||
tokens.push_back(cached.data.lexemes[less_one], False)
|
|
||||||
else:
|
else:
|
||||||
for i in range(less_one):
|
for i in range(cached.length):
|
||||||
tokens.push_back(&cached.data.tokens[i], False)
|
tokens.push_back(&cached.data.tokens[i], False)
|
||||||
tokens.push_back(&cached.data.tokens[less_one], False)
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
cdef int _tokenize(self, Doc tokens, UniStr* span, int start, int end) except -1:
|
cdef int _tokenize(self, Doc tokens, UniStr* span, int start, int end) except -1:
|
||||||
|
|
Loading…
Reference in New Issue