From 3b5baa660fa4176c4fd603de2f6ccd966973439e Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Tue, 14 Jul 2015 00:10:51 +0200
Subject: [PATCH] * Fix tokenizer

---
 spacy/tokenizer.pyx | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index df10c35df..d174ca71a 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -110,16 +110,12 @@ cdef class Tokenizer:
         if cached == NULL:
             return False
         cdef int i
-        cdef int less_one = cached.length-1
         if cached.is_lex:
-            for i in range(less_one):
-                # There's a space at the end of the chunk.
+            for i in range(cached.length):
                 tokens.push_back(cached.data.lexemes[i], False)
-            tokens.push_back(cached.data.lexemes[less_one], False)
         else:
-            for i in range(less_one):
+            for i in range(cached.length):
                 tokens.push_back(&cached.data.tokens[i], False)
-            tokens.push_back(&cached.data.tokens[less_one], False)
         return True
 
     cdef int _tokenize(self, Doc tokens, UniStr* span, int start, int end) except -1: