mirror of https://github.com/explosion/spaCy.git
* #10672: fixes displacy output for manual unsorted entities * #10672: removed unused import * fix prettier formatting Co-authored-by: Harm Buisman <h.buisman@iknl.nl> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
This commit is contained in:
parent
b3717ba53a
commit
c066fb8a4e
|
@ -4,6 +4,7 @@ repos:
|
|||
hooks:
|
||||
- id: black
|
||||
language_version: python3.7
|
||||
additional_dependencies: ['click==8.0.4']
|
||||
- repo: https://gitlab.com/pycqa/flake8
|
||||
rev: 3.9.2
|
||||
hooks:
|
||||
|
|
|
@ -4,7 +4,7 @@ spaCy's built in visualization suite for dependencies and named entities.
|
|||
DOCS: https://spacy.io/api/top-level#displacy
|
||||
USAGE: https://spacy.io/usage/visualizers
|
||||
"""
|
||||
from typing import List, Union, Iterable, Optional, Dict, Any, Callable
|
||||
from typing import Union, Iterable, Optional, Dict, Any, Callable
|
||||
import warnings
|
||||
|
||||
from .render import DependencyRenderer, EntityRenderer, SpanRenderer
|
||||
|
@ -56,6 +56,10 @@ def render(
|
|||
renderer_func, converter = factories[style]
|
||||
renderer = renderer_func(options=options)
|
||||
parsed = [converter(doc, options) for doc in docs] if not manual else docs # type: ignore
|
||||
if manual:
|
||||
for doc in docs:
|
||||
if isinstance(doc, dict) and "ents" in doc:
|
||||
doc["ents"] = sorted(doc["ents"], key=lambda x: (x["start"], x["end"]))
|
||||
_html["parsed"] = renderer.render(parsed, page=page, minify=minify).strip() # type: ignore
|
||||
html = _html["parsed"]
|
||||
if RENDER_WRAPPER is not None:
|
||||
|
|
|
@ -338,3 +338,18 @@ def test_displacy_options_case():
|
|||
assert "green" in result[1] and "bar" in result[1]
|
||||
assert "red" in result[2] and "FOO" in result[2]
|
||||
assert "green" in result[3] and "BAR" in result[3]
|
||||
|
||||
|
||||
@pytest.mark.issue(10672)
|
||||
def test_displacy_manual_sorted_entities():
|
||||
doc = {
|
||||
"text": "But Google is starting from behind.",
|
||||
"ents": [
|
||||
{"start": 14, "end": 22, "label": "SECOND"},
|
||||
{"start": 4, "end": 10, "label": "FIRST"},
|
||||
],
|
||||
"title": None,
|
||||
}
|
||||
|
||||
html = displacy.render(doc, style="ent", manual=True)
|
||||
assert html.find("FIRST") < html.find("SECOND")
|
||||
|
|
|
@ -263,7 +263,7 @@ Render a dependency parse tree or named entity visualization.
|
|||
|
||||
| Name | Description |
|
||||
| ----------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span]], Doc, Span]~~ |
|
||||
| `docs` | Document(s) or span(s) to visualize. ~~Union[Iterable[Union[Doc, Span, dict]], Doc, Span, dict]~~ |
|
||||
| `style` | Visualization style, `"dep"` or `"ent"`. Defaults to `"dep"`. ~~str~~ |
|
||||
| `page` | Render markup as full HTML page. Defaults to `True`. ~~bool~~ |
|
||||
| `minify` | Minify HTML markup. Defaults to `False`. ~~bool~~ |
|
||||
|
|
|
@ -342,9 +342,7 @@ want to visualize output from other libraries, like [NLTK](http://www.nltk.org)
|
|||
or
|
||||
[SyntaxNet](https://github.com/tensorflow/models/tree/master/research/syntaxnet).
|
||||
If you set `manual=True` on either `render()` or `serve()`, you can pass in data
|
||||
in displaCy's format (instead of `Doc` objects). When setting `ents` manually,
|
||||
make sure to supply them in the right order, i.e. starting with the lowest start
|
||||
position.
|
||||
in displaCy's format as a dictionary (instead of `Doc` objects).
|
||||
|
||||
> #### Example
|
||||
>
|
||||
|
|
Loading…
Reference in New Issue