From d185927998c72fb8163a2456520826fd15907059 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 4 Nov 2017 23:07:03 +0100
Subject: [PATCH 1/2] Undo harmful pickling hacks on Language class

---
 spacy/language.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/spacy/language.py b/spacy/language.py
index bcdb93ef2..c7de79424 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -135,10 +135,6 @@ class Language(object):
         self.pipeline = []
         self._optimizer = None
 
-    def __reduce__(self):
-        bytes_data = self.to_bytes(vocab=False)
-        return (unpickle_language, (self.vocab, self.meta, bytes_data))
-
     @property
     def path(self):
         return self._path
@@ -724,12 +720,6 @@ class DisabledPipes(list):
         self[:] = []
 
 
-def unpickle_language(vocab, meta, bytes_data):
-    lang = Language(vocab=vocab)
-    lang.from_bytes(bytes_data)
-    return lang
-
-
 def _pipe(func, docs):
     for doc in docs:
         func(doc)

From ba0201de072c0be4e353586e4faef387c2d49be0 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 4 Nov 2017 23:07:57 +0100
Subject: [PATCH 2/2] Update multiprocessing example

---
 examples/pipeline/multi_processing.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/examples/pipeline/multi_processing.py b/examples/pipeline/multi_processing.py
index 99bb9c53f..0efb00099 100644
--- a/examples/pipeline/multi_processing.py
+++ b/examples/pipeline/multi_processing.py
@@ -34,21 +34,24 @@ def main(output_dir, model='en_core_web_sm', n_jobs=4, batch_size=1000,
     data, _ = thinc.extra.datasets.imdb()
     texts, _ = zip(*data[-limit:])
     partitions = partition_all(batch_size, texts)
-    items = ((i, [nlp(text) for text in texts], output_dir) for i, texts
-             in enumerate(partitions))
-    Parallel(n_jobs=n_jobs)(delayed(transform_texts)(*item) for item in items)
+    executor = Parallel(n_jobs=n_jobs)
+    do = delayed(transform_texts)
+    tasks = (do(nlp, i, batch, output_dir)
+             for i, batch in enumerate(partitions))
+    executor(tasks)
 
 
-def transform_texts(batch_id, docs, output_dir):
+def transform_texts(nlp, batch_id, texts, output_dir):
+    print(nlp.pipe_names)
     out_path = Path(output_dir) / ('%d.txt' % batch_id)
     if out_path.exists():  # return None in case same batch is called again
         return None
     print('Processing batch', batch_id)
     with out_path.open('w', encoding='utf8') as f:
-        for doc in docs:
+        for doc in nlp.pipe(texts):
             f.write(' '.join(represent_word(w) for w in doc if not w.is_space))
             f.write('\n')
-    print('Saved {} texts to {}.txt'.format(len(docs), batch_id))
+    print('Saved {} texts to {}.txt'.format(len(texts), batch_id))
 
 
 def represent_word(word):