diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index b6dd9f7f5..ad3a500a3 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -133,6 +133,10 @@ cdef class Tokenizer: tokens.c[tokens.length - 1].spacy = string[-1] == ' ' and not in_ws return tokens + def pipe(self, texts, batch_size=1000, n_threads=2): + for text in texts: + yield self(text) + cdef int _try_cache(self, hash_t key, Doc tokens) except -1: cached = <_Cached*>self._cache.get(key) if cached == NULL: