diff --git a/spacy/lang.pxd b/spacy/lang.pxd
index d948dd5bb..9f8362b90 100644
--- a/spacy/lang.pxd
+++ b/spacy/lang.pxd
@@ -30,7 +30,7 @@ cdef class Language:
     cpdef readonly Lexicon lexicon
     cpdef readonly object tokens_class
 
-    cpdef list tokenize(self, unicode text)
+    cpdef Tokens tokenize(self, unicode text)
     cpdef Lexeme lookup(self, unicode text)
 
     cdef _tokenize(self, Tokens tokens, unicode string)
diff --git a/spacy/lang.pyx b/spacy/lang.pyx
index 96ec6797c..a31c3925e 100644
--- a/spacy/lang.pyx
+++ b/spacy/lang.pyx
@@ -62,7 +62,7 @@ cdef class Language:
         """
         return self.lexicon.lookup(string)
 
-    cpdef list tokenize(self, unicode string):
+    cpdef Tokens tokenize(self, unicode string):
         """Tokenize a string.
 
         The tokenization rules are defined in two places:
@@ -78,12 +78,12 @@ cdef class Language:
             tokens (Tokens): A Tokens object, giving access to a sequence of LexIDs.
         """
         cdef size_t length = len(string)
+        cdef Tokens tokens = self.tokens_class(length)
         if length == 0:
-            return []
+            return tokens
 
         cdef size_t start = 0
         cdef size_t i = 0
-        cdef Tokens tokens = self.tokens_class()
         for c in string:
             if c == ' ':
                 if start < i:
@@ -92,11 +92,7 @@ cdef class Language:
             i += 1
         if start < i:
             self._tokenize(tokens, string[start:i])
-        assert tokens
-        output = []
-        for i in range(tokens.length):
-            output.append(Lexeme(<size_t>tokens.lexemes[i]))
-        return output
+        return tokens
 
     cdef _tokenize(self, Tokens tokens, unicode string):
         cdef list lexemes
diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx
index 8495dbae0..75816bebe 100644
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@@ -30,6 +30,14 @@ cdef class Tokens:
         self.size = size
         self.length = 0
 
+    def __getitem__(self, i):
+        if i >= self.length:
+            raise IndexError
+        return Lexeme(<size_t>self.lexemes[i])
+
+    def __len__(self):
+        return self.length
+
     def append(self, Lexeme lexeme):
         self.push_back(lexeme._c)