diff --git a/spacy/tokenizer.pxd b/spacy/tokenizer.pxd
index 828f4550b..9c1398a17 100644
--- a/spacy/tokenizer.pxd
+++ b/spacy/tokenizer.pxd
@@ -34,9 +34,9 @@ cdef class Tokenizer:
                                        vector[SpanC] &filtered)
     cdef int _retokenize_special_spans(self, Doc doc, TokenC* tokens,
                                        object span_data)
-    cdef int _try_cache(self, hash_t key, Doc tokens) except -1
-    cdef int _try_specials(self, hash_t key, Doc tokens,
-                           int* has_special) except -1
+    cdef int _try_specials_and_cache(self, hash_t key, Doc tokens,
+                                     int* has_special,
+                                     bint with_special_cases) except -1
     cdef int _tokenize(self, Doc tokens, unicode span, hash_t key,
                        int* has_special, bint with_special_cases) except -1
     cdef unicode _split_affixes(self, Pool mem, unicode string,
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 12c634e61..759de90d3 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -169,8 +169,6 @@ cdef class Tokenizer:
         cdef int i = 0
         cdef int start = 0
         cdef int has_special = 0
-        cdef bint specials_hit = 0
-        cdef bint cache_hit = 0
         cdef bint in_ws = string[0].isspace()
         cdef unicode span
         # The task here is much like string.split, but not quite
@@ -186,13 +184,7 @@ cdef class Tokenizer:
                     # we don't have to create the slice when we hit the cache.
                     span = string[start:i]
                     key = hash_string(span)
-                    specials_hit = 0
-                    cache_hit = 0
-                    if with_special_cases:
-                        specials_hit = self._try_specials(key, doc, &has_special)
-                    if not specials_hit:
-                        cache_hit = self._try_cache(key, doc)
-                    if not specials_hit and not cache_hit:
+                    if not self._try_specials_and_cache(key, doc, &has_special, with_special_cases):
                         self._tokenize(doc, span, key, &has_special, with_special_cases)
                 if uc == ' ':
                     doc.c[doc.length - 1].spacy = True
@@ -204,13 +196,7 @@ cdef class Tokenizer:
         if start < i:
             span = string[start:]
             key = hash_string(span)
-            specials_hit = 0
-            cache_hit = 0
-            if with_special_cases:
-                specials_hit = self._try_specials(key, doc, &has_special)
-            if not specials_hit:
-                cache_hit = self._try_cache(key, doc)
-            if not specials_hit and not cache_hit:
+            if not self._try_specials_and_cache(key, doc, &has_special, with_special_cases):
                 self._tokenize(doc, span, key, &has_special, with_special_cases)
             doc.c[doc.length - 1].spacy = string[-1] == " " and not in_ws
         return doc
@@ -364,27 +350,33 @@ cdef class Tokenizer:
                     offset += span[3]
         return offset
 
-    cdef int _try_cache(self, hash_t key, Doc tokens) except -1:
-        cached = <_Cached*>self._cache.get(key)
-        if cached == NULL:
-            return False
+    cdef int _try_specials_and_cache(self, hash_t key, Doc tokens, int* has_special, bint with_special_cases) except -1:
+        cdef bint specials_hit = 0
+        cdef bint cache_hit = 0
         cdef int i
-        if cached.is_lex:
-            for i in range(cached.length):
-                tokens.push_back(cached.data.lexemes[i], False)
-        else:
-            for i in range(cached.length):
-                tokens.push_back(&cached.data.tokens[i], False)
-        return True
-
-    cdef int _try_specials(self, hash_t key, Doc tokens, int* has_special) except -1:
-        cached = <_Cached*>self._specials.get(key)
-        if cached == NULL:
+        if with_special_cases:
+            cached = <_Cached*>self._specials.get(key)
+            if cached == NULL:
+                specials_hit = False
+            else:
+                for i in range(cached.length):
+                    tokens.push_back(&cached.data.tokens[i], False)
+                has_special[0] = 1
+                specials_hit = True
+        if not specials_hit:
+            cached = <_Cached*>self._cache.get(key)
+            if cached == NULL:
+                cache_hit = False
+            else:
+                if cached.is_lex:
+                    for i in range(cached.length):
+                        tokens.push_back(cached.data.lexemes[i], False)
+                else:
+                    for i in range(cached.length):
+                        tokens.push_back(&cached.data.tokens[i], False)
+                cache_hit = True
+        if not specials_hit and not cache_hit:
             return False
-        cdef int i
-        for i in range(cached.length):
-            tokens.push_back(&cached.data.tokens[i], False)
-        has_special[0] = 1
         return True
 
     cdef int _tokenize(self, Doc tokens, unicode span, hash_t orig_key, int* has_special, bint with_special_cases) except -1:
@@ -462,12 +454,7 @@ cdef class Tokenizer:
             for i in range(prefixes.size()):
                 tokens.push_back(prefixes[0][i], False)
         if string:
-            if with_special_cases:
-                specials_hit = self._try_specials(hash_string(string), tokens,
-                                                  has_special)
-            if not specials_hit:
-                cache_hit = self._try_cache(hash_string(string), tokens)
-            if specials_hit or cache_hit:
+            if self._try_specials_and_cache(hash_string(string), tokens, has_special, with_special_cases):
                 pass
             elif (self.token_match and self.token_match(string)) or \
                     (self.url_match and \