Document as_tuples keyword arg of Language.pipe

This commit is contained in:
Matthew Honnibal 2017-08-19 12:21:33 +02:00
parent 80236116a6
commit 97aabafb5f
1 changed files with 7 additions and 2 deletions

View File

@ -430,11 +430,16 @@ class Language(object):
except StopIteration:
pass
def pipe(self, texts, tuples=False, n_threads=2, batch_size=1000, disable=[]):
def pipe(self, texts, as_tuples=False, n_threads=2, batch_size=1000,
disable=[]):
"""Process texts as a stream, and yield `Doc` objects in order. Supports
GIL-free multi-threading.
texts (iterator): A sequence of texts to process.
as_tuples (bool):
If set to True, inputs should be a sequence of
(text, context) tuples. Output will then be a sequence of
(doc, context) tuples. Defaults to False.
n_threads (int): The number of worker threads to use. If -1, OpenMP will
decide how many to use at run time. Default is 2.
batch_size (int): The number of texts to buffer.
@ -446,7 +451,7 @@ class Language(object):
>>> for doc in nlp.pipe(texts, batch_size=50, n_threads=4):
>>> assert doc.is_parsed
"""
if tuples:
if as_tuples:
text_context1, text_context2 = itertools.tee(texts)
texts = (tc[0] for tc in text_context1)
contexts = (tc[1] for tc in text_context2)