diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py index 86869e3b8..47407bcb7 100644 --- a/spacy/displacy/render.py +++ b/spacy/displacy/render.py @@ -1,4 +1,3 @@ -import itertools import uuid from typing import Any, Dict, List, Optional, Tuple, Union @@ -218,7 +217,7 @@ class SpanRenderer: + (self.offset_step * (len(entities) - 1)) ) markup += self.span_template.format( - text=token["text"], + text=escape_html(token["text"]), span_slices=slices, span_starts=starts, total_height=total_height, diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py index ce103068a..1570f8d09 100644 --- a/spacy/tests/test_displacy.py +++ b/spacy/tests/test_displacy.py @@ -377,3 +377,22 @@ def test_displacy_manual_sorted_entities(): html = displacy.render(doc, style="ent", manual=True) assert html.find("FIRST") < html.find("SECOND") + + +@pytest.mark.issue(12816) +def test_issue12816(en_vocab) -> None: + """Test that displaCy's span visualizer escapes annotated HTML tags correctly.""" + # Create a doc containing an annotated word and an unannotated HTML tag + doc = Doc(en_vocab, words=["test", ""]) + doc.spans["sc"] = [Span(doc, 0, 1, label="test")] + + # Verify that the HTML tag is escaped when unannotated + html = displacy.render(doc, style="span") + assert "<TEST>" in html + + # Annotate the HTML tag + doc.spans["sc"].append(Span(doc, 1, 2, label="test")) + + # Verify that the HTML tag is still escaped + html = displacy.render(doc, style="span") + assert "<TEST>" in html