From 33b0f86de3a0c1f45ae4c2e7abdad4ac4ef316d1 Mon Sep 17 00:00:00 2001
From: Felix Sonntag <felix.sonntag@outlook.com>
Date: Sun, 19 Nov 2017 15:14:40 +0100
Subject: [PATCH] Changed tokenizer to add infix when infix_start is offset

---
 spacy/tokenizer.pyx | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 095fbf4ad..600c81fff 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -241,11 +241,10 @@ cdef class Tokenizer:
                     for match in matches:
                         infix_start = match.start()
                         infix_end = match.end()
-                        if infix_start == start:
-                            continue
 
-                        span = string[start:infix_start]
-                        tokens.push_back(self.vocab.get(tokens.mem, span), False)
+                        if infix_start != start:
+                            span = string[start:infix_start]
+                            tokens.push_back(self.vocab.get(tokens.mem, span), False)
 
                         if infix_start != infix_end:
                             # If infix_start != infix_end, it means the infix