From ea5829407660ef34838e7a2296ae47c321fd5e0d Mon Sep 17 00:00:00 2001
From: Renat Shigapov <57352291+shigapov@users.noreply.github.com>
Date: Thu, 9 Sep 2021 15:13:18 +0200
Subject: [PATCH 1/9] add spaCyOpenTapioca to universe
---
website/meta/universe.json | 26 ++++++++++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/website/meta/universe.json b/website/meta/universe.json
index 993e900ff..c5e8017ab 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -642,6 +642,32 @@
"website": "https://ines.io"
}
},
+ {
+ "id": "spacyopentapioca",
+ "title": "spaCyOpenTapioca",
+ "slogan": "Named entity linking on Wikidata in spaCy via OpenTapioca",
+ "description": "A spaCy wrapper of OpenTapioca for named entity linking on Wikidata",
+ "github": "UB-Mannheim/spacyopentapioca",
+ "pip": "spacyopentapioca",
+ "code_example": [
+ "import spacy",
+ "nlp = spacy.blank('en')",
+ "nlp.add_pipe('opentapioca')",
+ "doc = nlp('Christian Drosten works in Germany.')",
+ "for span in doc.ents:",
+ " print((span.text, span.kb_id_, span.label_, span._.description, span._.score))",
+ "# ('Christian Drosten', 'Q1079331', 'PERSON', 'German virologist and university teacher', 3.6533377082098895)",
+ "# ('Germany', 'Q183', 'LOC', 'sovereign state in Central Europe', 2.1099332471902863)",
+ "## Check also span._.types, span._.aliases, span._.rank"
+ ],
+ "category": ["models", "pipeline"],
+ "tags": ["NER", "NEL"]
+ "author": "Renat Shigapov",
+ "author_links": {
+ "twitter": "_shigapov",
+ "github": "shigapov"
+ }
+ },
{
"id": "spacy_hunspell",
"slogan": "Add spellchecking and spelling suggestions to your spaCy pipeline using Hunspell",
From 8940e0baca769be457df976bfab9531eb5d86e40 Mon Sep 17 00:00:00 2001
From: Renat Shigapov <57352291+shigapov@users.noreply.github.com>
Date: Thu, 9 Sep 2021 15:33:29 +0200
Subject: [PATCH 2/9] add agreement
---
.github/contributors/shigapov.md | 106 +++++++++++++++++++++++++++++++
1 file changed, 106 insertions(+)
create mode 100644 .github/contributors/shigapov.md
diff --git a/.github/contributors/shigapov.md b/.github/contributors/shigapov.md
new file mode 100644
index 000000000..3c24c7982
--- /dev/null
+++ b/.github/contributors/shigapov.md
@@ -0,0 +1,106 @@
+# spaCy contributor agreement
+
+This spaCy Contributor Agreement (**"SCA"**) is based on the
+[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf).
+The SCA applies to any contribution that you make to any product or project
+managed by us (the **"project"**), and sets out the intellectual property rights
+you grant to us in the contributed materials. The term **"us"** shall mean
+[ExplosionAI GmbH](https://explosion.ai/legal). The term
+**"you"** shall mean the person or entity identified below.
+
+If you agree to be bound by these terms, fill in the information requested
+below and include the filled-in version with your first pull request, under the
+folder [`.github/contributors/`](/.github/contributors/). The name of the file
+should be your GitHub username, with the extension `.md`. For example, the user
+example_user would create the file `.github/contributors/example_user.md`.
+
+Read this agreement carefully before signing. These terms and conditions
+constitute a binding legal agreement.
+
+## Contributor Agreement
+
+1. The term "contribution" or "contributed materials" means any source code,
+object code, patch, tool, sample, graphic, specification, manual,
+documentation, or any other material posted or submitted by you to the project.
+
+2. With respect to any worldwide copyrights, or copyright applications and
+registrations, in your contribution:
+
+ * you hereby assign to us joint ownership, and to the extent that such
+ assignment is or becomes invalid, ineffective or unenforceable, you hereby
+ grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge,
+ royalty-free, unrestricted license to exercise all rights under those
+ copyrights. This includes, at our option, the right to sublicense these same
+ rights to third parties through multiple levels of sublicensees or other
+ licensing arrangements;
+
+ * you agree that each of us can do all things in relation to your
+ contribution as if each of us were the sole owners, and if one of us makes
+ a derivative work of your contribution, the one who makes the derivative
+ work (or has it made will be the sole owner of that derivative work;
+
+ * you agree that you will not assert any moral rights in your contribution
+ against us, our licensees or transferees;
+
+ * you agree that we may register a copyright in your contribution and
+ exercise all ownership rights associated with it; and
+
+ * you agree that neither of us has any duty to consult with, obtain the
+ consent of, pay or render an accounting to the other for any use or
+ distribution of your contribution.
+
+3. With respect to any patents you own, or that you can license without payment
+to any third party, you hereby grant to us a perpetual, irrevocable,
+non-exclusive, worldwide, no-charge, royalty-free license to:
+
+ * make, have made, use, sell, offer to sell, import, and otherwise transfer
+ your contribution in whole or in part, alone or in combination with or
+ included in any product, work or materials arising out of the project to
+ which your contribution was submitted, and
+
+ * at our option, to sublicense these same rights to third parties through
+ multiple levels of sublicensees or other licensing arrangements.
+
+4. Except as set out above, you keep all right, title, and interest in your
+contribution. The rights that you grant to us under these terms are effective
+on the date you first submitted a contribution to us, even if your submission
+took place before the date you sign these terms.
+
+5. You covenant, represent, warrant and agree that:
+
+ * Each contribution that you submit is and shall be an original work of
+ authorship and you can legally grant the rights set out in this SCA;
+
+ * to the best of your knowledge, each contribution will not violate any
+ third party's copyrights, trademarks, patents, or other intellectual
+ property rights; and
+
+ * each contribution shall be in compliance with U.S. export control laws and
+ other applicable export and import laws. You agree to notify us if you
+ become aware of any circumstance which would make any of the foregoing
+ representations inaccurate in any respect. We may publicly disclose your
+ participation in the project, including the fact that you have signed the SCA.
+
+6. This SCA is governed by the laws of the State of California and applicable
+U.S. Federal law. Any choice of law rules will not apply.
+
+7. Please place an “x” on one of the applicable statement below. Please do NOT
+mark both statements:
+
+ * [x] I am signing on behalf of myself as an individual and no other person
+ or entity, including my employer, has or will have rights with respect to my
+ contributions.
+
+ * [ ] I am signing on behalf of my employer or a legal entity and I have the
+ actual authority to contractually bind that entity.
+
+## Contributor Details
+
+| Field | Entry |
+|------------------------------- | ------------------------ |
+| Name | Renat Shigapov |
+| Company name (if applicable) | |
+| Title or role (if applicable) | |
+| Date | 2021-09-09 |
+| GitHub username | shigapov |
+| Website (optional) | |
From c1927fe9943d236f6a839cd356afcae3a9e0adf7 Mon Sep 17 00:00:00 2001
From: Renat Shigapov <57352291+shigapov@users.noreply.github.com>
Date: Thu, 9 Sep 2021 15:37:34 +0200
Subject: [PATCH 3/9] fix misprint in tags
---
website/meta/universe.json | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/website/meta/universe.json b/website/meta/universe.json
index c5e8017ab..04163c61e 100644
--- a/website/meta/universe.json
+++ b/website/meta/universe.json
@@ -661,7 +661,7 @@
"## Check also span._.types, span._.aliases, span._.rank"
],
"category": ["models", "pipeline"],
- "tags": ["NER", "NEL"]
+ "tags": ["NER", "NEL"],
"author": "Renat Shigapov",
"author_links": {
"twitter": "_shigapov",
From 7562fb53545f396848bd55e774adf1a6b1686af8 Mon Sep 17 00:00:00 2001
From: Renat Shigapov <57352291+shigapov@users.noreply.github.com>
Date: Mon, 13 Sep 2021 08:06:54 +0200
Subject: [PATCH 4/9] add links to entities into the TPL_ENT-template
---
spacy/displacy/templates.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py
index b9cbf717b..c44c1e2a6 100644
--- a/spacy/displacy/templates.py
+++ b/spacy/displacy/templates.py
@@ -52,6 +52,7 @@ TPL_ENT = """
{text}
{label}
+ {kb_id}
"""
From f4b5c4209d6cb3748534ad072784a3f9c4fa194c Mon Sep 17 00:00:00 2001
From: Renat Shigapov <57352291+shigapov@users.noreply.github.com>
Date: Mon, 13 Sep 2021 08:15:07 +0200
Subject: [PATCH 5/9] specify kb_id and kb_url for URL visualisation
---
spacy/displacy/render.py | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index ba56beca3..40269e74f 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -305,7 +305,7 @@ class EntityRenderer:
"""Render entities in text.
text (str): Original text.
- spans (list): Individual entity spans and their start, end and label.
+ spans (list): Individual entity spans and their start, end, label, kb_id and kb_url.
title (str / None): Document title set in Doc.user_data['title'].
"""
markup = ""
@@ -314,6 +314,8 @@ class EntityRenderer:
label = span["label"]
start = span["start"]
end = span["end"]
+ kb_id = str(span.get("kb_id") or "")
+ kb_url = str(span.get("kb_url") or "")
additional_params = span.get("params", {})
entity = escape_html(text[start:end])
fragments = text[offset:start].split("\n")
@@ -323,7 +325,7 @@ class EntityRenderer:
markup += ""
if self.ents is None or label.upper() in self.ents:
color = self.colors.get(label.upper(), self.default_color)
- ent_settings = {"label": label, "text": entity, "bg": color}
+ ent_settings = {"label": label, "text": entity, "bg": color, "kb_id": kb_id, "kb_url": kb_url}
ent_settings.update(additional_params)
markup += self.ent_template.format(**ent_settings)
else:
From e61d93f8c3715d74038c9bd952fe60418fc6f144 Mon Sep 17 00:00:00 2001
From: Renat Shigapov <57352291+shigapov@users.noreply.github.com>
Date: Mon, 13 Sep 2021 08:38:58 +0200
Subject: [PATCH 6/9] add NEL-visualisation to manual-usage
---
website/docs/usage/visualizers.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/website/docs/usage/visualizers.md b/website/docs/usage/visualizers.md
index cc73e7e67..cb643aed1 100644
--- a/website/docs/usage/visualizers.md
+++ b/website/docs/usage/visualizers.md
@@ -297,7 +297,7 @@ position.
>
> ```python
> ex = [{"text": "But Google is starting from behind.",
-> "ents": [{"start": 4, "end": 10, "label": "ORG"}],
+> "ents": [{"start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "https://www.wikidata.org/entity/Q95"}],
> "title": None}]
> html = displacy.render(ex, style="ent", manual=True)
> ```
@@ -323,7 +323,7 @@ position.
### ENT input
{
"text": "But Google is starting from behind.",
- "ents": [{"start": 4, "end": 10, "label": "ORG"}],
+ "ents": [{"start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "https://www.wikidata.org/entity/Q95"}],
"title": None
}
```
From 4bf2606adf4cdd2045a6aeec3a8258c5cb3915ed Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem
Date: Thu, 16 Sep 2021 09:22:38 +0200
Subject: [PATCH 7/9] Update spacy/displacy/render.py
Co-authored-by: Renat Shigapov <57352291+shigapov@users.noreply.github.com>
---
spacy/displacy/render.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index 40269e74f..bd7178724 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -314,8 +314,8 @@ class EntityRenderer:
label = span["label"]
start = span["start"]
end = span["end"]
- kb_id = str(span.get("kb_id") or "")
- kb_url = str(span.get("kb_url") or "")
+ kb_id = span.get("kb_id", "")
+ kb_url = span.get("kb_url", "")
additional_params = span.get("params", {})
entity = escape_html(text[start:end])
fragments = text[offset:start].split("\n")
From 00836c2d7d46b0aa405eb1569856f7923da6d1f0 Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem
Date: Thu, 16 Sep 2021 09:23:21 +0200
Subject: [PATCH 8/9] Update spacy/displacy/templates.py
---
spacy/displacy/templates.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py
index c44c1e2a6..051f72cb2 100644
--- a/spacy/displacy/templates.py
+++ b/spacy/displacy/templates.py
@@ -51,8 +51,9 @@ TPL_ENTS = """
TPL_ENT = """
{text}
- {label}
+ {label}
{kb_id}
+
"""
From 57b5fc1995ded2f4bb9dd6305c567cdd4c1151ee Mon Sep 17 00:00:00 2001
From: Ines Montani
Date: Thu, 23 Sep 2021 17:58:32 +1000
Subject: [PATCH 9/9] Apply suggestions from code review
Co-authored-by: Renat Shigapov <57352291+shigapov@users.noreply.github.com>
---
spacy/displacy/render.py | 6 +++++-
spacy/displacy/templates.py | 2 +-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index bd7178724..406824eb4 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -316,6 +316,10 @@ class EntityRenderer:
end = span["end"]
kb_id = span.get("kb_id", "")
kb_url = span.get("kb_url", "")
+ if kb_id:
+ kb_link = """{}""".format(kb_url, kb_id)
+ else:
+ kb_link = ""
additional_params = span.get("params", {})
entity = escape_html(text[start:end])
fragments = text[offset:start].split("\n")
@@ -325,7 +329,7 @@ class EntityRenderer:
markup += ""
if self.ents is None or label.upper() in self.ents:
color = self.colors.get(label.upper(), self.default_color)
- ent_settings = {"label": label, "text": entity, "bg": color, "kb_id": kb_id, "kb_url": kb_url}
+ ent_settings = {"label": label, "text": entity, "bg": color, "kb_link": kb_link}
ent_settings.update(additional_params)
markup += self.ent_template.format(**ent_settings)
else:
diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py
index 051f72cb2..53fcc1873 100644
--- a/spacy/displacy/templates.py
+++ b/spacy/displacy/templates.py
@@ -52,7 +52,7 @@ TPL_ENT = """
{text}
{label}
- {kb_id}
+ {kb_link}
"""