spaCy/spacy/tests/test_displacy.py

69 lines
2.4 KiB
Python
Raw Normal View History

2019-03-11 14:28:34 +00:00
# coding: utf-8
from __future__ import unicode_literals
import pytest
from spacy import displacy
from spacy.tokens import Span
from .util import get_doc
def test_displacy_parse_ents(en_vocab):
"""Test that named entities on a Doc are converted into displaCy's format."""
doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
ents = displacy.parse_ents(doc)
assert isinstance(ents, dict)
assert ents["text"] == "But Google is starting from behind "
assert ents["ents"] == [{"start": 4, "end": 10, "label": "ORG"}]
def test_displacy_parse_deps(en_vocab):
"""Test that deps and tags on a Doc are converted into displaCy's format."""
words = ["This", "is", "a", "sentence"]
heads = [1, 0, 1, -2]
pos = ["DET", "VERB", "DET", "NOUN"]
tags = ["DT", "VBZ", "DT", "NN"]
deps = ["nsubj", "ROOT", "det", "attr"]
doc = get_doc(en_vocab, words=words, heads=heads, pos=pos, tags=tags, deps=deps)
deps = displacy.parse_deps(doc)
assert isinstance(deps, dict)
assert deps["words"] == [
{"text": "This", "tag": "DET"},
{"text": "is", "tag": "VERB"},
{"text": "a", "tag": "DET"},
{"text": "sentence", "tag": "NOUN"},
]
assert deps["arcs"] == [
{"start": 0, "end": 1, "label": "nsubj", "dir": "left"},
{"start": 2, "end": 3, "label": "det", "dir": "left"},
{"start": 1, "end": 3, "label": "attr", "dir": "right"},
]
def test_displacy_spans(en_vocab):
"""Test that displaCy can render Spans."""
doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
html = displacy.render(doc[1:4], style="ent")
assert html.startswith("<div")
def test_displacy_render_wrapper(en_vocab):
"""Test that displaCy accepts custom rendering wrapper."""
def wrapper(html):
return "TEST" + html + "TEST"
displacy.set_render_wrapper(wrapper)
doc = get_doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])]
html = displacy.render(doc, style="ent")
assert html.startswith("TEST<div")
assert html.endswith("/div>TEST")
def test_displacy_raises_for_wrong_type(en_vocab):
with pytest.raises(ValueError):
displacy.render("hello world")