mirror of https://github.com/explosion/spaCy.git
Fix caching in tokenizer
This commit is contained in:
parent
8d692771f6
commit
fe3c42a06b
|
@ -1,4 +1,5 @@
|
||||||
# cython: embedsignature=True
|
# cython: embedsignature=True
|
||||||
|
# cython: profile=True
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
@ -268,7 +269,7 @@ cdef class Tokenizer:
|
||||||
int has_special, int n) except -1:
|
int has_special, int n) except -1:
|
||||||
cdef int i
|
cdef int i
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
if tokens[i].lex.id == 0:
|
if self.vocab._by_hash.get(tokens[i].lex.orth) == NULL:
|
||||||
return 0
|
return 0
|
||||||
# See https://github.com/explosion/spaCy/issues/1250
|
# See https://github.com/explosion/spaCy/issues/1250
|
||||||
if has_special:
|
if has_special:
|
||||||
|
|
Loading…
Reference in New Issue