diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index 67ff47743..095fbf4ad 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -1,4 +1,5 @@ # cython: embedsignature=True +# cython: profile=True # coding: utf8 from __future__ import unicode_literals @@ -268,7 +269,7 @@ cdef class Tokenizer: int has_special, int n) except -1: cdef int i for i in range(n): - if tokens[i].lex.id == 0: + if self.vocab._by_hash.get(tokens[i].lex.orth) == NULL: return 0 # See https://github.com/explosion/spaCy/issues/1250 if has_special: