import pytest from spacy import displacy from spacy.displacy.render import DependencyRenderer, EntityRenderer from spacy.lang.fa import Persian from spacy.tokens import Span, Doc def test_displacy_parse_ents(en_vocab): """Test that named entities on a Doc are converted into displaCy's format.""" doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"]) doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])] ents = displacy.parse_ents(doc) assert isinstance(ents, dict) assert ents["text"] == "But Google is starting from behind " assert ents["ents"] == [ {"start": 4, "end": 10, "label": "ORG", "kb_id": "", "kb_url": "#"} ] doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")] ents = displacy.parse_ents(doc) assert isinstance(ents, dict) assert ents["text"] == "But Google is starting from behind " assert ents["ents"] == [ {"start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "#"} ] def test_displacy_parse_ents_with_kb_id_options(en_vocab): """Test that named entities with kb_id on a Doc are converted into displaCy's format.""" doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"]) doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")] ents = displacy.parse_ents( doc, {"kb_url_template": "https://www.wikidata.org/wiki/{}"} ) assert isinstance(ents, dict) assert ents["text"] == "But Google is starting from behind " assert ents["ents"] == [ { "start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "https://www.wikidata.org/wiki/Q95", } ] def test_displacy_parse_deps(en_vocab): """Test that deps and tags on a Doc are converted into displaCy's format.""" words = ["This", "is", "a", "sentence"] heads = [1, 1, 3, 1] pos = ["DET", "VERB", "DET", "NOUN"] tags = ["DT", "VBZ", "DT", "NN"] deps = ["nsubj", "ROOT", "det", "attr"] doc = Doc(en_vocab, words=words, heads=heads, pos=pos, tags=tags, deps=deps) deps = displacy.parse_deps(doc) assert isinstance(deps, dict) assert deps["words"] == [ {"lemma": None, "text": words[0], "tag": pos[0]}, {"lemma": None, "text": words[1], "tag": pos[1]}, {"lemma": None, "text": words[2], "tag": pos[2]}, {"lemma": None, "text": words[3], "tag": pos[3]}, ] assert deps["arcs"] == [ {"start": 0, "end": 1, "label": "nsubj", "dir": "left"}, {"start": 2, "end": 3, "label": "det", "dir": "left"}, {"start": 1, "end": 3, "label": "attr", "dir": "right"}, ] def test_displacy_invalid_arcs(): renderer = DependencyRenderer() words = [{"text": "This", "tag": "DET"}, {"text": "is", "tag": "VERB"}] arcs = [ {"start": 0, "end": 1, "label": "nsubj", "dir": "left"}, {"start": -1, "end": 2, "label": "det", "dir": "left"}, ] with pytest.raises(ValueError): renderer.render([{"words": words, "arcs": arcs}]) def test_displacy_spans(en_vocab): """Test that displaCy can render Spans.""" doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"]) doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"])] html = displacy.render(doc[1:4], style="ent") assert html.startswith("