From cf693f0eae487badbfea4f66ec8610c075d67a73 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 25 Nov 2020 11:43:05 +0100
Subject: [PATCH] Fix token_match in tokenizer

---
 spacy/tokenizer.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 17714940d..24536face 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -404,9 +404,7 @@ cdef class Tokenizer:
         cdef unicode minus_suf
         cdef size_t last_size = 0
         while string and len(string) != last_size:
-            if self.token_match and self.token_match(string) \
-                    and not self.find_prefix(string) \
-                    and not self.find_suffix(string):
+            if self.token_match and self.token_match(string):
                 break
             if with_special_cases and self._specials.get(hash_string(string)) != NULL:
                 break
@@ -679,6 +677,8 @@ cdef class Tokenizer:
                             break
                         suffixes.append(("SUFFIX", substring[split:]))
                         substring = substring[:split]
+                if len(substring) == 0:
+                    continue
                 if token_match(substring):
                     tokens.append(("TOKEN_MATCH", substring))
                     substring = ''