Fix caching in tokenizer

2017-11-15 13:55:46 +01:00 · 2017-11-15 13:55:46 +01:00 · fe3c42a06b
parent 8d692771f6
commit fe3c42a06b
1 changed files with 2 additions and 1 deletions
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -1,4 +1,5 @@
 # cython: embedsignature=True
+# cython: profile=True
 # coding: utf8
 from __future__ import unicode_literals

@ -268,7 +269,7 @@ cdef class Tokenizer:
                          int has_special, int n) except -1:
        cdef int i
        for i in range(n):
-            if tokens[i].lex.id == 0:
+            if self.vocab._by_hash.get(tokens[i].lex.orth) == NULL:
                return 0
        # See https://github.com/explosion/spaCy/issues/1250
        if has_special: