From c066fb8a4ee27bf0f90a60a863158ab12fc05fb3 Mon Sep 17 00:00:00 2001 From: harmbuisman Date: Wed, 27 Apr 2022 09:51:58 +0200 Subject: [PATCH] #10672: fixes displacy output for manual unsorted entities (#10673) * #10672: fixes displacy output for manual unsorted entities * #10672: removed unused import * fix prettier formatting Co-authored-by: Harm Buisman Co-authored-by: Sofie Van Landeghem --- .pre-commit-config.yaml | 1 + spacy/displacy/__init__.py | 6 +++++- spacy/tests/test_displacy.py | 15 +++++++++++++++ website/docs/api/top-level.md | 2 +- website/docs/usage/visualizers.md | 4 +--- 5 files changed, 23 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a7a12fd24..bd1baf5f7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,6 +4,7 @@ repos: hooks: - id: black language_version: python3.7 + additional_dependencies: ['click==8.0.4'] - repo: https://gitlab.com/pycqa/flake8 rev: 3.9.2 hooks: diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py index aa00c95d8..5d49b6eb7 100644 --- a/spacy/displacy/__init__.py +++ b/spacy/displacy/__init__.py @@ -4,7 +4,7 @@ spaCy's built in visualization suite for dependencies and named entities. DOCS: https://spacy.io/api/top-level#displacy USAGE: https://spacy.io/usage/visualizers """ -from typing import List, Union, Iterable, Optional, Dict, Any, Callable +from typing import Union, Iterable, Optional, Dict, Any, Callable import warnings from .render import DependencyRenderer, EntityRenderer, SpanRenderer @@ -56,6 +56,10 @@ def render( renderer_func, converter = factories[style] renderer = renderer_func(options=options) parsed = [converter(doc, options) for doc in docs] if not manual else docs # type: ignore + if manual: + for doc in docs: + if isinstance(doc, dict) and "ents" in doc: + doc["ents"] = sorted(doc["ents"], key=lambda x: (x["start"], x["end"])) _html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip() # type: ignore html = _html["parsed"] if RENDER_WRAPPER is not None: diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py index f52c36889..ccc145b44 100644 --- a/spacy/tests/test_displacy.py +++ b/spacy/tests/test_displacy.py @@ -338,3 +338,18 @@ def test_displacy_options_case(): assert "green" in result[1] and "bar" in result[1] assert "red" in result[2] and "FOO" in result[2] assert "green" in result[3] and "BAR" in result[3] + + +@pytest.mark.issue(10672) +def test_displacy_manual_sorted_entities(): + doc = { + "text": "But Google is starting from behind.", + "ents": [ + {"start": 14, "end": 22, "label": "SECOND"}, + {"start": 4, "end": 10, "label": "FIRST"}, + ], + "title": None, + } + + html = displacy.render(doc, style="ent", manual=True) + assert html.find("FIRST") < html.find("SECOND") diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md index 6d7431f28..f2fd1415f 100644 --- a/website/docs/api/top-level.md +++ b/website/docs/api/top-level.md @@ -263,7 +263,7 @@ Render a dependency parse tree or named entity visualization. | Name | Description | | ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span]], Doc, Span]~~ | +| `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span, dict]], Doc, Span, dict]~~ | | `style` | Visualization style, `"dep"` or `"ent"`. Defaults to `"dep"`. ~~str~~ | | `page` | Render markup as full HTML page. Defaults to `True`. ~~bool~~ | | `minify` | Minify HTML markup. Defaults to `False`. ~~bool~~ | diff --git a/website/docs/usage/visualizers.md b/website/docs/usage/visualizers.md index f98c43224..770448c5a 100644 --- a/website/docs/usage/visualizers.md +++ b/website/docs/usage/visualizers.md @@ -342,9 +342,7 @@ want to visualize output from other libraries, like [NLTK](http://www.nltk.org) or [SyntaxNet](https://github.com/tensorflow/models/tree/master/research/syntaxnet). If you set `manual=True` on either `render()` or `serve()`, you can pass in data -in displaCy's format (instead of `Doc` objects). When setting `ents` manually, -make sure to supply them in the right order, i.e. starting with the lowest start -position. +in displaCy's format as a dictionary (instead of `Doc` objects). > #### Example >