From fe3c42a06b89dd819c2329af6a3bbc46b51a869f Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Wed, 15 Nov 2017 13:55:46 +0100
Subject: [PATCH] Fix caching in tokenizer

---
 spacy/tokenizer.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 67ff47743..095fbf4ad 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -1,4 +1,5 @@
 # cython: embedsignature=True
+# cython: profile=True
 # coding: utf8
 from __future__ import unicode_literals
 
@@ -268,7 +269,7 @@ cdef class Tokenizer:
                           int has_special, int n) except -1:
         cdef int i
         for i in range(n):
-            if tokens[i].lex.id == 0:
+            if self.vocab._by_hash.get(tokens[i].lex.orth) == NULL:
                 return 0
         # See https://github.com/explosion/spaCy/issues/1250
         if has_special: