* Add pipe() method to tokenizer

This commit is contained in:
Matthew Honnibal 2016-02-03 02:32:37 +01:00
parent 4cbad510ff
commit f9e765cae7
1 changed files with 4 additions and 0 deletions

View File

@ -133,6 +133,10 @@ cdef class Tokenizer:
tokens.c[tokens.length - 1].spacy = string[-1] == ' ' and not in_ws tokens.c[tokens.length - 1].spacy = string[-1] == ' ' and not in_ws
return tokens return tokens
def pipe(self, texts, batch_size=1000, n_threads=2):
for text in texts:
yield self(text)
cdef int _try_cache(self, hash_t key, Doc tokens) except -1: cdef int _try_cache(self, hash_t key, Doc tokens) except -1:
cached = <_Cached*>self._cache.get(key) cached = <_Cached*>self._cache.get(key)
if cached == NULL: if cached == NULL: