2020-02-12 11:00:13 +00:00
|
|
|
# coding: utf8
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2020-02-12 11:26:27 +00:00
|
|
|
from spacy.lang.en import English
|
2020-02-12 11:00:13 +00:00
|
|
|
from spacy.tokens import Span, Doc
|
|
|
|
|
|
|
|
|
|
|
|
class CustomPipe:
|
|
|
|
name = "my_pipe"
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
Span.set_extension("my_ext", getter=self._get_my_ext)
|
|
|
|
Doc.set_extension("my_ext", default=None)
|
|
|
|
|
|
|
|
def __call__(self, doc):
|
|
|
|
gathered_ext = []
|
|
|
|
for sent in doc.sents:
|
|
|
|
sent_ext = self._get_my_ext(sent)
|
|
|
|
sent._.set("my_ext", sent_ext)
|
|
|
|
gathered_ext.append(sent_ext)
|
|
|
|
|
|
|
|
doc._.set("my_ext", "\n".join(gathered_ext))
|
|
|
|
|
|
|
|
return doc
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _get_my_ext(span):
|
|
|
|
return str(span.end)
|
|
|
|
|
|
|
|
|
|
|
|
def test_issue4903():
|
|
|
|
# ensures that this runs correctly and doesn't hang or crash on Windows / macOS
|
2020-02-12 12:21:31 +00:00
|
|
|
|
2020-02-12 11:26:27 +00:00
|
|
|
nlp = English()
|
2020-02-12 11:00:13 +00:00
|
|
|
custom_component = CustomPipe()
|
2020-02-12 11:26:27 +00:00
|
|
|
nlp.add_pipe(nlp.create_pipe("sentencizer"))
|
|
|
|
nlp.add_pipe(custom_component, after="sentencizer")
|
2020-02-12 11:00:13 +00:00
|
|
|
|
|
|
|
text = ["I like bananas.", "Do you like them?", "No, I prefer wasabi."]
|
2020-02-12 11:12:52 +00:00
|
|
|
docs = list(nlp.pipe(text, n_process=2))
|
|
|
|
assert docs[0].text == "I like bananas."
|
|
|
|
assert docs[1].text == "Do you like them?"
|
|
|
|
assert docs[2].text == "No, I prefer wasabi."
|