From 80bdcb99c54e5e84ee746a7633633d434cb3728b Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 21 Feb 2019 14:30:39 +0100 Subject: [PATCH] Fix escaping of HTML in displacy ENT (closes #2728) --- spacy/displacy/render.py | 6 +++--- spacy/tests/regression/test_issue2728.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 spacy/tests/regression/test_issue2728.py diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py index 4f99e86d9..2b8e0c7d2 100644 --- a/spacy/displacy/render.py +++ b/spacy/displacy/render.py @@ -253,10 +253,10 @@ class EntityRenderer(object): label = span["label"] start = span["start"] end = span["end"] - entity = text[start:end] + entity = escape_html(text[start:end]) fragments = text[offset:start].split("\n") for i, fragment in enumerate(fragments): - markup += fragment + markup += escape_html(fragment) if len(fragments) > 1 and i != len(fragments) - 1: markup += "
" if self.ents is None or label.upper() in self.ents: @@ -265,7 +265,7 @@ class EntityRenderer(object): else: markup += entity offset = end - markup += text[offset:] + markup += escape_html(text[offset:]) markup = TPL_ENTS.format(content=markup, colors=self.colors) if title: markup = TPL_TITLE.format(title=title) + markup diff --git a/spacy/tests/regression/test_issue2728.py b/spacy/tests/regression/test_issue2728.py new file mode 100644 index 000000000..ac3cbc91c --- /dev/null +++ b/spacy/tests/regression/test_issue2728.py @@ -0,0 +1,16 @@ +# coding: utf8 +from __future__ import unicode_literals + +from spacy import displacy +from spacy.tokens import Doc, Span + + +def test_issue2728(en_vocab): + """Test that displaCy ENT visualizer escapes HTML correctly.""" + doc = Doc(en_vocab, words=["test", "", "test"]) + doc.ents = [Span(doc, 0, 1, label="TEST")] + html = displacy.render(doc, style="ent") + assert "<RELEASE>" in html + doc.ents = [Span(doc, 1, 2, label="TEST")] + html = displacy.render(doc, style="ent") + assert "<RELEASE>" in html