From 5928d158ce4a719d6a3343bbb0ddd005885c4027 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 22 Jan 2015 02:04:58 +1100 Subject: [PATCH] * Pass the string to Tokens --- spacy/tokenizer.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index fef6916b8..f540eeb88 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -33,7 +33,7 @@ cdef class Tokenizer: cpdef Tokens tokens_from_list(self, list strings): cdef int length = sum([len(s) for s in strings]) - cdef Tokens tokens = Tokens(self.vocab, length) + cdef Tokens tokens = Tokens(self.vocab, ' '.join(strings)) if length == 0: return tokens cdef UniStr string_struct @@ -66,7 +66,7 @@ cdef class Tokenizer: tokens (Tokens): A Tokens object, giving access to a sequence of LexemeCs. """ cdef int length = len(string) - cdef Tokens tokens = Tokens(self.vocab, length) + cdef Tokens tokens = Tokens(self.vocab, string) if length == 0: return tokens cdef int i = 0