From 582be8746c41ef4ea5bd7bdf07f15ebc5bcd6d09 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 21 Feb 2019 10:33:16 +0100 Subject: [PATCH] Update multi_processing example --- examples/pipeline/multi_processing.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/examples/pipeline/multi_processing.py b/examples/pipeline/multi_processing.py index 07d3165f9..4c4d29c13 100644 --- a/examples/pipeline/multi_processing.py +++ b/examples/pipeline/multi_processing.py @@ -10,12 +10,13 @@ Compatible with: spaCy v2.0.0+ """ from __future__ import print_function, unicode_literals -from toolz import partition_all from pathlib import Path from joblib import Parallel, delayed +from functools import partial import thinc.extra.datasets import plac import spacy +from spacy.util import minibatch @plac.annotations( @@ -35,10 +36,10 @@ def main(output_dir, model="en_core_web_sm", n_jobs=4, batch_size=1000, limit=10 data, _ = thinc.extra.datasets.imdb() texts, _ = zip(*data[-limit:]) print("Processing texts...") - partitions = partition_all(batch_size, texts) - executor = Parallel(n_jobs=n_jobs) - do = delayed(transform_texts) - tasks = (do(nlp, i, batch, output_dir) for i, batch in enumerate(partitions)) + partitions = minibatch(texts, size=batch_size) + executor = Parallel(n_jobs=n_jobs, backend="multiprocessing", prefer="processes") + do = delayed(partial(transform_texts, nlp)) + tasks = (do(i, batch, output_dir) for i, batch in enumerate(partitions)) executor(tasks)