mirror of https://github.com/explosion/spaCy.git
* Add pipe() method to tokenizer
This commit is contained in:
parent
4cbad510ff
commit
f9e765cae7
|
@ -133,6 +133,10 @@ cdef class Tokenizer:
|
||||||
tokens.c[tokens.length - 1].spacy = string[-1] == ' ' and not in_ws
|
tokens.c[tokens.length - 1].spacy = string[-1] == ' ' and not in_ws
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
|
def pipe(self, texts, batch_size=1000, n_threads=2):
|
||||||
|
for text in texts:
|
||||||
|
yield self(text)
|
||||||
|
|
||||||
cdef int _try_cache(self, hash_t key, Doc tokens) except -1:
|
cdef int _try_cache(self, hash_t key, Doc tokens) except -1:
|
||||||
cached = <_Cached*>self._cache.get(key)
|
cached = <_Cached*>self._cache.get(key)
|
||||||
if cached == NULL:
|
if cached == NULL:
|
||||||
|
|
Loading…
Reference in New Issue