Fix caching in tokenizer

This commit is contained in:
Matthew Honnibal 2017-11-15 13:55:46 +01:00
parent 8d692771f6
commit fe3c42a06b
1 changed files with 2 additions and 1 deletions

View File

@ -1,4 +1,5 @@
# cython: embedsignature=True # cython: embedsignature=True
# cython: profile=True
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
@ -268,7 +269,7 @@ cdef class Tokenizer:
int has_special, int n) except -1: int has_special, int n) except -1:
cdef int i cdef int i
for i in range(n): for i in range(n):
if tokens[i].lex.id == 0: if self.vocab._by_hash.get(tokens[i].lex.orth) == NULL:
return 0 return 0
# See https://github.com/explosion/spaCy/issues/1250 # See https://github.com/explosion/spaCy/issues/1250
if has_special: if has_special: