mirror of https://github.com/explosion/spaCy.git
Fix escaping of HTML in displacy ENT (closes #2728)
This commit is contained in:
parent
250e88ef55
commit
80bdcb99c5
|
@ -253,10 +253,10 @@ class EntityRenderer(object):
|
||||||
label = span["label"]
|
label = span["label"]
|
||||||
start = span["start"]
|
start = span["start"]
|
||||||
end = span["end"]
|
end = span["end"]
|
||||||
entity = text[start:end]
|
entity = escape_html(text[start:end])
|
||||||
fragments = text[offset:start].split("\n")
|
fragments = text[offset:start].split("\n")
|
||||||
for i, fragment in enumerate(fragments):
|
for i, fragment in enumerate(fragments):
|
||||||
markup += fragment
|
markup += escape_html(fragment)
|
||||||
if len(fragments) > 1 and i != len(fragments) - 1:
|
if len(fragments) > 1 and i != len(fragments) - 1:
|
||||||
markup += "</br>"
|
markup += "</br>"
|
||||||
if self.ents is None or label.upper() in self.ents:
|
if self.ents is None or label.upper() in self.ents:
|
||||||
|
@ -265,7 +265,7 @@ class EntityRenderer(object):
|
||||||
else:
|
else:
|
||||||
markup += entity
|
markup += entity
|
||||||
offset = end
|
offset = end
|
||||||
markup += text[offset:]
|
markup += escape_html(text[offset:])
|
||||||
markup = TPL_ENTS.format(content=markup, colors=self.colors)
|
markup = TPL_ENTS.format(content=markup, colors=self.colors)
|
||||||
if title:
|
if title:
|
||||||
markup = TPL_TITLE.format(title=title) + markup
|
markup = TPL_TITLE.format(title=title) + markup
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from spacy import displacy
|
||||||
|
from spacy.tokens import Doc, Span
|
||||||
|
|
||||||
|
|
||||||
|
def test_issue2728(en_vocab):
|
||||||
|
"""Test that displaCy ENT visualizer escapes HTML correctly."""
|
||||||
|
doc = Doc(en_vocab, words=["test", "<RELEASE>", "test"])
|
||||||
|
doc.ents = [Span(doc, 0, 1, label="TEST")]
|
||||||
|
html = displacy.render(doc, style="ent")
|
||||||
|
assert "<RELEASE>" in html
|
||||||
|
doc.ents = [Span(doc, 1, 2, label="TEST")]
|
||||||
|
html = displacy.render(doc, style="ent")
|
||||||
|
assert "<RELEASE>" in html
|
Loading…
Reference in New Issue