From ecbb9c4b9f89120ba04642852780d592c024b6ef Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Wed, 12 Feb 2020 11:50:42 +0100
Subject: [PATCH 1/8] load Underscore state when multiprocessing

---
 spacy/language.py          | 11 ++++++++---
 spacy/tokens/underscore.py |  8 ++++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/spacy/language.py b/spacy/language.py
index 5544b6341..71180a65d 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -15,6 +15,7 @@ import multiprocessing as mp
 from itertools import chain, cycle
 
 from .tokenizer import Tokenizer
+from .tokens.underscore import Underscore
 from .vocab import Vocab
 from .lemmatizer import Lemmatizer
 from .lookups import Lookups
@@ -852,7 +853,10 @@ class Language(object):
         sender.send()
 
         procs = [
-            mp.Process(target=_apply_pipes, args=(self.make_doc, pipes, rch, sch))
+            mp.Process(
+                target=_apply_pipes,
+                args=(self.make_doc, pipes, rch, sch, Underscore.get_state()),
+            )
             for rch, sch in zip(texts_q, bytedocs_send_ch)
         ]
         for proc in procs:
@@ -1107,7 +1111,7 @@ def _pipe(docs, proc, kwargs):
         yield doc
 
 
-def _apply_pipes(make_doc, pipes, reciever, sender):
+def _apply_pipes(make_doc, pipes, receiver, sender, underscore_state):
     """Worker for Language.pipe
 
     receiver (multiprocessing.Connection): Pipe to receive text. Usually
@@ -1115,8 +1119,9 @@ def _apply_pipes(make_doc, pipes, reciever, sender):
     sender (multiprocessing.Connection): Pipe to send doc. Usually created by
         `multiprocessing.Pipe()`
     """
+    Underscore.load_state(underscore_state)
     while True:
-        texts = reciever.get()
+        texts = receiver.get()
         docs = (make_doc(text) for text in texts)
         for pipe in pipes:
             docs = pipe(docs)
diff --git a/spacy/tokens/underscore.py b/spacy/tokens/underscore.py
index b36fe9294..8dac8526e 100644
--- a/spacy/tokens/underscore.py
+++ b/spacy/tokens/underscore.py
@@ -79,6 +79,14 @@ class Underscore(object):
     def _get_key(self, name):
         return ("._.", name, self._start, self._end)
 
+    @classmethod
+    def get_state(cls):
+        return cls.token_extensions, cls.span_extensions, cls.doc_extensions
+
+    @classmethod
+    def load_state(cls, state):
+        cls.token_extensions, cls.span_extensions, cls.doc_extensions = state
+
 
 def get_ext_args(**kwargs):
     """Validate and convert arguments. Reused in Doc, Token and Span."""

From 05dedaa2cf2e57469ac860fbd0af638c27c02148 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Wed, 12 Feb 2020 12:00:13 +0100
Subject: [PATCH 2/8] add unit test

---
 spacy/tests/regression/test_issue4903.py | 40 ++++++++++++++++++++++++
 spacy/tests/regression/test_issue4924.py |  2 +-
 2 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 spacy/tests/regression/test_issue4903.py

diff --git a/spacy/tests/regression/test_issue4903.py b/spacy/tests/regression/test_issue4903.py
new file mode 100644
index 000000000..97293aec7
--- /dev/null
+++ b/spacy/tests/regression/test_issue4903.py
@@ -0,0 +1,40 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+import spacy
+from spacy.tokens import Span, Doc
+
+
+class CustomPipe:
+    name = "my_pipe"
+
+    def __init__(self):
+        Span.set_extension("my_ext", getter=self._get_my_ext)
+        Doc.set_extension("my_ext", default=None)
+
+    def __call__(self, doc):
+        gathered_ext = []
+        for sent in doc.sents:
+            sent_ext = self._get_my_ext(sent)
+            sent._.set("my_ext", sent_ext)
+            gathered_ext.append(sent_ext)
+
+        doc._.set("my_ext", "\n".join(gathered_ext))
+
+        return doc
+
+    @staticmethod
+    def _get_my_ext(span):
+        return str(span.end)
+
+
+def test_issue4903():
+    # ensures that this runs correctly and doesn't hang or crash on Windows / macOS
+    nlp = spacy.load("en_core_web_sm")
+    custom_component = CustomPipe()
+    nlp.add_pipe(custom_component, after="parser")
+
+    text = ["I like bananas.", "Do you like them?", "No, I prefer wasabi."]
+    # works without 'n_process'
+    for doc in nlp.pipe(text, n_process=2):
+        print(doc)
diff --git a/spacy/tests/regression/test_issue4924.py b/spacy/tests/regression/test_issue4924.py
index 8aea2c3d5..0e45291a9 100644
--- a/spacy/tests/regression/test_issue4924.py
+++ b/spacy/tests/regression/test_issue4924.py
@@ -11,6 +11,6 @@ def nlp():
     return spacy.blank("en")
 
 
-def test_evaluate(nlp):
+def test_issue4924(nlp):
     docs_golds = [("", {})]
     nlp.evaluate(docs_golds)

From 65f5b48b5db0e8e11e73e505469ccdb38e8f07af Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Wed, 12 Feb 2020 12:06:27 +0100
Subject: [PATCH 3/8] add comment

---
 spacy/language.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/spacy/language.py b/spacy/language.py
index 71180a65d..737e0bf3c 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1118,6 +1118,7 @@ def _apply_pipes(make_doc, pipes, receiver, sender, underscore_state):
         created by `multiprocessing.Pipe()`
     sender (multiprocessing.Connection): Pipe to send doc. Usually created by
         `multiprocessing.Pipe()`
+    underscore_state (tuple): The data in the Underscore class of the parent
     """
     Underscore.load_state(underscore_state)
     while True:

From 51d37033c8b2f280cfc0ddf2b1ecf0537f347532 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Wed, 12 Feb 2020 12:10:05 +0100
Subject: [PATCH 4/8] remove old comment

---
 spacy/tests/regression/test_issue4903.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/spacy/tests/regression/test_issue4903.py b/spacy/tests/regression/test_issue4903.py
index 97293aec7..d09b32849 100644
--- a/spacy/tests/regression/test_issue4903.py
+++ b/spacy/tests/regression/test_issue4903.py
@@ -35,6 +35,5 @@ def test_issue4903():
     nlp.add_pipe(custom_component, after="parser")
 
     text = ["I like bananas.", "Do you like them?", "No, I prefer wasabi."]
-    # works without 'n_process'
     for doc in nlp.pipe(text, n_process=2):
         print(doc)

From 46628d88903edaa2c3614339a0d464b9fcdcc690 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Wed, 12 Feb 2020 12:12:52 +0100
Subject: [PATCH 5/8] add some asserts

---
 spacy/tests/regression/test_issue4903.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/spacy/tests/regression/test_issue4903.py b/spacy/tests/regression/test_issue4903.py
index d09b32849..0a255d9a8 100644
--- a/spacy/tests/regression/test_issue4903.py
+++ b/spacy/tests/regression/test_issue4903.py
@@ -35,5 +35,7 @@ def test_issue4903():
     nlp.add_pipe(custom_component, after="parser")
 
     text = ["I like bananas.", "Do you like them?", "No, I prefer wasabi."]
-    for doc in nlp.pipe(text, n_process=2):
-        print(doc)
+    docs = list(nlp.pipe(text, n_process=2))
+    assert docs[0].text == "I like bananas."
+    assert docs[1].text == "Do you like them?"
+    assert docs[2].text == "No, I prefer wasabi."

From 7939c6388656e1abb932b2deb1af90928c297aa2 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Wed, 12 Feb 2020 12:26:27 +0100
Subject: [PATCH 6/8] use English instead of model

---
 spacy/tests/regression/test_issue4903.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/spacy/tests/regression/test_issue4903.py b/spacy/tests/regression/test_issue4903.py
index 0a255d9a8..82e21b79f 100644
--- a/spacy/tests/regression/test_issue4903.py
+++ b/spacy/tests/regression/test_issue4903.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import spacy
+from spacy.lang.en import English
 from spacy.tokens import Span, Doc
 
 
@@ -30,9 +31,10 @@ class CustomPipe:
 
 def test_issue4903():
     # ensures that this runs correctly and doesn't hang or crash on Windows / macOS
-    nlp = spacy.load("en_core_web_sm")
+    nlp = English()
     custom_component = CustomPipe()
-    nlp.add_pipe(custom_component, after="parser")
+    nlp.add_pipe(nlp.create_pipe("sentencizer"))
+    nlp.add_pipe(custom_component, after="sentencizer")
 
     text = ["I like bananas.", "Do you like them?", "No, I prefer wasabi."]
     docs = list(nlp.pipe(text, n_process=2))

From 6e717c62ed2d0407b37ae0e19c033964425419cc Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Wed, 12 Feb 2020 13:21:31 +0100
Subject: [PATCH 7/8] avoid the tests interacting with eachother through the
 global Underscore variable

---
 spacy/tests/regression/test_issue4849.py | 6 ++++++
 spacy/tests/regression/test_issue4903.py | 7 +++++++
 2 files changed, 13 insertions(+)

diff --git a/spacy/tests/regression/test_issue4849.py b/spacy/tests/regression/test_issue4849.py
index 834219773..7e58243bc 100644
--- a/spacy/tests/regression/test_issue4849.py
+++ b/spacy/tests/regression/test_issue4849.py
@@ -3,11 +3,17 @@ from __future__ import unicode_literals
 
 from spacy.lang.en import English
 from spacy.pipeline import EntityRuler
+from spacy.tokens.underscore import Underscore
 
 
 def test_issue4849():
     nlp = English()
 
+    # reset the Underscore object because test_underscore has a lambda function that can't be pickled
+    Underscore.doc_extensions = {}
+    Underscore.span_extensions = {}
+    Underscore.token_extensions = {}
+
     ruler = EntityRuler(
         nlp, patterns=[
             {"label": "PERSON", "pattern": 'joe biden', "id": 'joe-biden'},
diff --git a/spacy/tests/regression/test_issue4903.py b/spacy/tests/regression/test_issue4903.py
index 82e21b79f..156845558 100644
--- a/spacy/tests/regression/test_issue4903.py
+++ b/spacy/tests/regression/test_issue4903.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals
 import spacy
 from spacy.lang.en import English
 from spacy.tokens import Span, Doc
+from spacy.tokens.underscore import Underscore
 
 
 class CustomPipe:
@@ -31,6 +32,12 @@ class CustomPipe:
 
 def test_issue4903():
     # ensures that this runs correctly and doesn't hang or crash on Windows / macOS
+
+    # reset the Underscore object because test_underscore has a lambda function that can't be pickled
+    Underscore.doc_extensions = {}
+    Underscore.span_extensions = {}
+    Underscore.token_extensions = {}
+
     nlp = English()
     custom_component = CustomPipe()
     nlp.add_pipe(nlp.create_pipe("sentencizer"))

From b49a3afd0cde67debd2128b2cf2c816322c6d0d7 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Sun, 23 Feb 2020 15:49:20 +0100
Subject: [PATCH 8/8] use clean_underscore fixture

---
 spacy/tests/doc/test_underscore.py       | 9 +++++++++
 spacy/tests/matcher/test_matcher_api.py  | 2 ++
 spacy/tests/regression/test_issue4849.py | 5 -----
 spacy/tests/regression/test_issue4903.py | 5 -----
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/spacy/tests/doc/test_underscore.py b/spacy/tests/doc/test_underscore.py
index 2877bfeea..c1eff2c20 100644
--- a/spacy/tests/doc/test_underscore.py
+++ b/spacy/tests/doc/test_underscore.py
@@ -7,6 +7,15 @@ from spacy.tokens import Doc, Span, Token
 from spacy.tokens.underscore import Underscore
 
 
+@pytest.fixture(scope="function", autouse=True)
+def clean_underscore():
+    # reset the Underscore object after the test, to avoid having state copied across tests
+    yield
+    Underscore.doc_extensions = {}
+    Underscore.span_extensions = {}
+    Underscore.token_extensions = {}
+
+
 def test_create_doc_underscore():
     doc = Mock()
     doc.doc = doc
diff --git a/spacy/tests/matcher/test_matcher_api.py b/spacy/tests/matcher/test_matcher_api.py
index e4584d03a..a826a0a0e 100644
--- a/spacy/tests/matcher/test_matcher_api.py
+++ b/spacy/tests/matcher/test_matcher_api.py
@@ -6,6 +6,7 @@ import re
 from mock import Mock
 from spacy.matcher import Matcher, DependencyMatcher
 from spacy.tokens import Doc, Token
+from ..doc.test_underscore import clean_underscore
 
 
 @pytest.fixture
@@ -200,6 +201,7 @@ def test_matcher_any_token_operator(en_vocab):
     assert matches[2] == "test hello world"
 
 
+@pytest.mark.usefixtures("clean_underscore")
 def test_matcher_extension_attribute(en_vocab):
     matcher = Matcher(en_vocab)
     get_is_fruit = lambda token: token.text in ("apple", "banana")
diff --git a/spacy/tests/regression/test_issue4849.py b/spacy/tests/regression/test_issue4849.py
index 7e58243bc..85d03fe9a 100644
--- a/spacy/tests/regression/test_issue4849.py
+++ b/spacy/tests/regression/test_issue4849.py
@@ -9,11 +9,6 @@ from spacy.tokens.underscore import Underscore
 def test_issue4849():
     nlp = English()
 
-    # reset the Underscore object because test_underscore has a lambda function that can't be pickled
-    Underscore.doc_extensions = {}
-    Underscore.span_extensions = {}
-    Underscore.token_extensions = {}
-
     ruler = EntityRuler(
         nlp, patterns=[
             {"label": "PERSON", "pattern": 'joe biden', "id": 'joe-biden'},
diff --git a/spacy/tests/regression/test_issue4903.py b/spacy/tests/regression/test_issue4903.py
index 156845558..9a3c10d61 100644
--- a/spacy/tests/regression/test_issue4903.py
+++ b/spacy/tests/regression/test_issue4903.py
@@ -33,11 +33,6 @@ class CustomPipe:
 def test_issue4903():
     # ensures that this runs correctly and doesn't hang or crash on Windows / macOS
 
-    # reset the Underscore object because test_underscore has a lambda function that can't be pickled
-    Underscore.doc_extensions = {}
-    Underscore.span_extensions = {}
-    Underscore.token_extensions = {}
-
     nlp = English()
     custom_component = CustomPipe()
     nlp.add_pipe(nlp.create_pipe("sentencizer"))