diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index b651c0996..d2ef21dbd 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -116,7 +116,7 @@ def parse_deps(orig_doc, options={}): doc (Doc): Document do parse. RETURNS (dict): Generated dependency parse keyed by words and arcs. """ - doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes()) + doc = Doc(orig_doc.vocab).from_bytes(orig_doc.to_bytes(exclude=["user_data"])) if not doc.is_parsed: user_warning(Warnings.W005) if options.get("collapse_phrases", False): diff --git a/spacy/tests/regression/test_issue3882.py b/spacy/tests/regression/test_issue3882.py new file mode 100644 index 000000000..1b2dcea25 --- /dev/null +++ b/spacy/tests/regression/test_issue3882.py @@ -0,0 +1,15 @@ +# coding: utf8 +from __future__ import unicode_literals + +from spacy.displacy import parse_deps +from spacy.tokens import Doc + + +def test_issue3882(en_vocab): + """Test that displaCy doesn't serialize the doc.user_data when making a + copy of the Doc. + """ + doc = Doc(en_vocab, words=["Hello", "world"]) + doc.is_parsed = True + doc.user_data["test"] = set() + parse_deps(doc)