diff --git a/spacy/displacy/__init__.py b/spacy/displacy/__init__.py
index d9418f675..25d530c83 100644
--- a/spacy/displacy/__init__.py
+++ b/spacy/displacy/__init__.py
@@ -181,11 +181,19 @@ def parse_deps(orig_doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
def parse_ents(doc: Doc, options: Dict[str, Any] = {}) -> Dict[str, Any]:
"""Generate named entities in [{start: i, end: i, label: 'label'}] format.
- doc (Doc): Document do parse.
+ doc (Doc): Document to parse.
+ options (Dict[str, Any]): NER-specific visualisation options.
RETURNS (dict): Generated entities keyed by text (original text) and ents.
"""
+ kb_url_template = options.get("kb_url_template", None)
ents = [
- {"start": ent.start_char, "end": ent.end_char, "label": ent.label_}
+ {
+ "start": ent.start_char,
+ "end": ent.end_char,
+ "label": ent.label_,
+ "kb_id": ent.kb_id_ if ent.kb_id_ else "",
+ "kb_url": kb_url_template.format(ent.kb_id_) if kb_url_template else "#",
+ }
for ent in doc.ents
]
if not ents:
diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py
index 040dd657f..790925888 100644
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@@ -1,8 +1,9 @@
import pytest
+
from spacy import displacy
from spacy.displacy.render import DependencyRenderer, EntityRenderer
-from spacy.tokens import Span, Doc
from spacy.lang.fa import Persian
+from spacy.tokens import Span, Doc
def test_displacy_parse_ents(en_vocab):
@@ -12,7 +13,38 @@ def test_displacy_parse_ents(en_vocab):
ents = displacy.parse_ents(doc)
assert isinstance(ents, dict)
assert ents["text"] == "But Google is starting from behind "
- assert ents["ents"] == [{"start": 4, "end": 10, "label": "ORG"}]
+ assert ents["ents"] == [
+ {"start": 4, "end": 10, "label": "ORG", "kb_id": "", "kb_url": "#"}
+ ]
+
+ doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")]
+ ents = displacy.parse_ents(doc)
+ assert isinstance(ents, dict)
+ assert ents["text"] == "But Google is starting from behind "
+ assert ents["ents"] == [
+ {"start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "#"}
+ ]
+
+
+def test_displacy_parse_ents_with_kb_id_options(en_vocab):
+ """Test that named entities with kb_id on a Doc are converted into displaCy's format."""
+ doc = Doc(en_vocab, words=["But", "Google", "is", "starting", "from", "behind"])
+ doc.ents = [Span(doc, 1, 2, label=doc.vocab.strings["ORG"], kb_id="Q95")]
+
+ ents = displacy.parse_ents(
+ doc, {"kb_url_template": "https://www.wikidata.org/wiki/{}"}
+ )
+ assert isinstance(ents, dict)
+ assert ents["text"] == "But Google is starting from behind "
+ assert ents["ents"] == [
+ {
+ "start": 4,
+ "end": 10,
+ "label": "ORG",
+ "kb_id": "Q95",
+ "kb_url": "https://www.wikidata.org/wiki/Q95",
+ }
+ ]
def test_displacy_parse_deps(en_vocab):
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 4361db4c0..be19f9c3a 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -313,11 +313,12 @@ If a setting is not present in the options, the default value will be used.
> displacy.serve(doc, style="ent", options=options)
> ```
-| Name | Description |
-| --------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ |
-| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
-| `template` 2.2 | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](%%GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
+| Name | Description |
+| ------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `ents` | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~ |
+| `colors` | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~ |
+| `template` 2.2 | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](%%GITHUB_SPACY/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
+| `kb_url_template` 3.2.1 | Optional template to construct the KB url for the entity to link to. Expects a python f-string format with single field to fill in. ~~Optional[str]~~ |
By default, displaCy comes with colors for all entity types used by
[spaCy's trained pipelines](/models). If you're using custom entity types, you
@@ -326,6 +327,14 @@ or pipeline package can also expose a
[`spacy_displacy_colors` entry point](/usage/saving-loading#entry-points-displacy)
to add custom labels and their colors automatically.
+By default, displaCy links to `#` for entities without a `kb_id` set on their
+span. If you wish to link an entity to their URL then consider using the
+`kb_url_template` option from above. For example if the `kb_id` on a span is
+`Q95` and this is a Wikidata identifier then this option can be set to
+`https://www.wikidata.org/wiki/{}`. Clicking on your entity in the rendered HTML
+should redirect you to their Wikidata page, in this case
+`https://www.wikidata.org/wiki/Q95`.
+
## registry {#registry source="spacy/util.py" new="3"}
spaCy's function registry extends
@@ -412,10 +421,10 @@ finished. To log each training step, a
and the accuracy scores on the development set.
The built-in, default logger is the ConsoleLogger, which prints results to the
-console in tabular format. The
+console in tabular format. The
[spacy-loggers](https://github.com/explosion/spacy-loggers) package, included as
-a dependency of spaCy, enables other loggers: currently it provides one that sends
-results to a [Weights & Biases](https://www.wandb.com/) dashboard.
+a dependency of spaCy, enables other loggers: currently it provides one that
+sends results to a [Weights & Biases](https://www.wandb.com/) dashboard.
Instead of using one of the built-in loggers, you can
[implement your own](/usage/training#custom-logging).
@@ -466,7 +475,6 @@ start decreasing across epochs.
-
## Readers {#readers}
### File readers {#file-readers source="github.com/explosion/srsly" new="3"}