mirror of https://github.com/explosion/spaCy.git
Fix caching in tokenizer
This commit is contained in:
parent
8d692771f6
commit
fe3c42a06b
|
@ -1,4 +1,5 @@
|
|||
# cython: embedsignature=True
|
||||
# cython: profile=True
|
||||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
@ -268,7 +269,7 @@ cdef class Tokenizer:
|
|||
int has_special, int n) except -1:
|
||||
cdef int i
|
||||
for i in range(n):
|
||||
if tokens[i].lex.id == 0:
|
||||
if self.vocab._by_hash.get(tokens[i].lex.orth) == NULL:
|
||||
return 0
|
||||
# See https://github.com/explosion/spaCy/issues/1250
|
||||
if has_special:
|
||||
|
|
Loading…
Reference in New Issue