From ea5829407660ef34838e7a2296ae47c321fd5e0d Mon Sep 17 00:00:00 2001 From: Renat Shigapov <57352291+shigapov@users.noreply.github.com> Date: Thu, 9 Sep 2021 15:13:18 +0200 Subject: [PATCH 1/9] add spaCyOpenTapioca to universe --- website/meta/universe.json | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/website/meta/universe.json b/website/meta/universe.json index 993e900ff..c5e8017ab 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -642,6 +642,32 @@ "website": "https://ines.io" } }, + { + "id": "spacyopentapioca", + "title": "spaCyOpenTapioca", + "slogan": "Named entity linking on Wikidata in spaCy via OpenTapioca", + "description": "A spaCy wrapper of OpenTapioca for named entity linking on Wikidata", + "github": "UB-Mannheim/spacyopentapioca", + "pip": "spacyopentapioca", + "code_example": [ + "import spacy", + "nlp = spacy.blank('en')", + "nlp.add_pipe('opentapioca')", + "doc = nlp('Christian Drosten works in Germany.')", + "for span in doc.ents:", + " print((span.text, span.kb_id_, span.label_, span._.description, span._.score))", + "# ('Christian Drosten', 'Q1079331', 'PERSON', 'German virologist and university teacher', 3.6533377082098895)", + "# ('Germany', 'Q183', 'LOC', 'sovereign state in Central Europe', 2.1099332471902863)", + "## Check also span._.types, span._.aliases, span._.rank" + ], + "category": ["models", "pipeline"], + "tags": ["NER", "NEL"] + "author": "Renat Shigapov", + "author_links": { + "twitter": "_shigapov", + "github": "shigapov" + } + }, { "id": "spacy_hunspell", "slogan": "Add spellchecking and spelling suggestions to your spaCy pipeline using Hunspell", From 8940e0baca769be457df976bfab9531eb5d86e40 Mon Sep 17 00:00:00 2001 From: Renat Shigapov <57352291+shigapov@users.noreply.github.com> Date: Thu, 9 Sep 2021 15:33:29 +0200 Subject: [PATCH 2/9] add agreement --- .github/contributors/shigapov.md | 106 +++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 .github/contributors/shigapov.md diff --git a/.github/contributors/shigapov.md b/.github/contributors/shigapov.md new file mode 100644 index 000000000..3c24c7982 --- /dev/null +++ b/.github/contributors/shigapov.md @@ -0,0 +1,106 @@ +# spaCy contributor agreement + +This spaCy Contributor Agreement (**"SCA"**) is based on the +[Oracle Contributor Agreement](http://www.oracle.com/technetwork/oca-405177.pdf). +The SCA applies to any contribution that you make to any product or project +managed by us (the **"project"**), and sets out the intellectual property rights +you grant to us in the contributed materials. The term **"us"** shall mean +[ExplosionAI GmbH](https://explosion.ai/legal). The term +**"you"** shall mean the person or entity identified below. + +If you agree to be bound by these terms, fill in the information requested +below and include the filled-in version with your first pull request, under the +folder [`.github/contributors/`](/.github/contributors/). The name of the file +should be your GitHub username, with the extension `.md`. For example, the user +example_user would create the file `.github/contributors/example_user.md`. + +Read this agreement carefully before signing. These terms and conditions +constitute a binding legal agreement. + +## Contributor Agreement + +1. The term "contribution" or "contributed materials" means any source code, +object code, patch, tool, sample, graphic, specification, manual, +documentation, or any other material posted or submitted by you to the project. + +2. With respect to any worldwide copyrights, or copyright applications and +registrations, in your contribution: + + * you hereby assign to us joint ownership, and to the extent that such + assignment is or becomes invalid, ineffective or unenforceable, you hereby + grant to us a perpetual, irrevocable, non-exclusive, worldwide, no-charge, + royalty-free, unrestricted license to exercise all rights under those + copyrights. This includes, at our option, the right to sublicense these same + rights to third parties through multiple levels of sublicensees or other + licensing arrangements; + + * you agree that each of us can do all things in relation to your + contribution as if each of us were the sole owners, and if one of us makes + a derivative work of your contribution, the one who makes the derivative + work (or has it made will be the sole owner of that derivative work; + + * you agree that you will not assert any moral rights in your contribution + against us, our licensees or transferees; + + * you agree that we may register a copyright in your contribution and + exercise all ownership rights associated with it; and + + * you agree that neither of us has any duty to consult with, obtain the + consent of, pay or render an accounting to the other for any use or + distribution of your contribution. + +3. With respect to any patents you own, or that you can license without payment +to any third party, you hereby grant to us a perpetual, irrevocable, +non-exclusive, worldwide, no-charge, royalty-free license to: + + * make, have made, use, sell, offer to sell, import, and otherwise transfer + your contribution in whole or in part, alone or in combination with or + included in any product, work or materials arising out of the project to + which your contribution was submitted, and + + * at our option, to sublicense these same rights to third parties through + multiple levels of sublicensees or other licensing arrangements. + +4. Except as set out above, you keep all right, title, and interest in your +contribution. The rights that you grant to us under these terms are effective +on the date you first submitted a contribution to us, even if your submission +took place before the date you sign these terms. + +5. You covenant, represent, warrant and agree that: + + * Each contribution that you submit is and shall be an original work of + authorship and you can legally grant the rights set out in this SCA; + + * to the best of your knowledge, each contribution will not violate any + third party's copyrights, trademarks, patents, or other intellectual + property rights; and + + * each contribution shall be in compliance with U.S. export control laws and + other applicable export and import laws. You agree to notify us if you + become aware of any circumstance which would make any of the foregoing + representations inaccurate in any respect. We may publicly disclose your + participation in the project, including the fact that you have signed the SCA. + +6. This SCA is governed by the laws of the State of California and applicable +U.S. Federal law. Any choice of law rules will not apply. + +7. Please place an “x” on one of the applicable statement below. Please do NOT +mark both statements: + + * [x] I am signing on behalf of myself as an individual and no other person + or entity, including my employer, has or will have rights with respect to my + contributions. + + * [ ] I am signing on behalf of my employer or a legal entity and I have the + actual authority to contractually bind that entity. + +## Contributor Details + +| Field | Entry | +|------------------------------- | ------------------------ | +| Name | Renat Shigapov | +| Company name (if applicable) | | +| Title or role (if applicable) | | +| Date | 2021-09-09 | +| GitHub username | shigapov | +| Website (optional) | | From c1927fe9943d236f6a839cd356afcae3a9e0adf7 Mon Sep 17 00:00:00 2001 From: Renat Shigapov <57352291+shigapov@users.noreply.github.com> Date: Thu, 9 Sep 2021 15:37:34 +0200 Subject: [PATCH 3/9] fix misprint in tags --- website/meta/universe.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index c5e8017ab..04163c61e 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -661,7 +661,7 @@ "## Check also span._.types, span._.aliases, span._.rank" ], "category": ["models", "pipeline"], - "tags": ["NER", "NEL"] + "tags": ["NER", "NEL"], "author": "Renat Shigapov", "author_links": { "twitter": "_shigapov", From 7562fb53545f396848bd55e774adf1a6b1686af8 Mon Sep 17 00:00:00 2001 From: Renat Shigapov <57352291+shigapov@users.noreply.github.com> Date: Mon, 13 Sep 2021 08:06:54 +0200 Subject: [PATCH 4/9] add links to entities into the TPL_ENT-template --- spacy/displacy/templates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py index b9cbf717b..c44c1e2a6 100644 --- a/spacy/displacy/templates.py +++ b/spacy/displacy/templates.py @@ -52,6 +52,7 @@ TPL_ENT = """ {text} {label} + {kb_id} """ From f4b5c4209d6cb3748534ad072784a3f9c4fa194c Mon Sep 17 00:00:00 2001 From: Renat Shigapov <57352291+shigapov@users.noreply.github.com> Date: Mon, 13 Sep 2021 08:15:07 +0200 Subject: [PATCH 5/9] specify kb_id and kb_url for URL visualisation --- spacy/displacy/render.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py index ba56beca3..40269e74f 100644 --- a/spacy/displacy/render.py +++ b/spacy/displacy/render.py @@ -305,7 +305,7 @@ class EntityRenderer: """Render entities in text. text (str): Original text. - spans (list): Individual entity spans and their start, end and label. + spans (list): Individual entity spans and their start, end, label, kb_id and kb_url. title (str / None): Document title set in Doc.user_data['title']. """ markup = "" @@ -314,6 +314,8 @@ class EntityRenderer: label = span["label"] start = span["start"] end = span["end"] + kb_id = str(span.get("kb_id") or "") + kb_url = str(span.get("kb_url") or "") additional_params = span.get("params", {}) entity = escape_html(text[start:end]) fragments = text[offset:start].split("\n") @@ -323,7 +325,7 @@ class EntityRenderer: markup += "
" if self.ents is None or label.upper() in self.ents: color = self.colors.get(label.upper(), self.default_color) - ent_settings = {"label": label, "text": entity, "bg": color} + ent_settings = {"label": label, "text": entity, "bg": color, "kb_id": kb_id, "kb_url": kb_url} ent_settings.update(additional_params) markup += self.ent_template.format(**ent_settings) else: From e61d93f8c3715d74038c9bd952fe60418fc6f144 Mon Sep 17 00:00:00 2001 From: Renat Shigapov <57352291+shigapov@users.noreply.github.com> Date: Mon, 13 Sep 2021 08:38:58 +0200 Subject: [PATCH 6/9] add NEL-visualisation to manual-usage --- website/docs/usage/visualizers.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/website/docs/usage/visualizers.md b/website/docs/usage/visualizers.md index cc73e7e67..cb643aed1 100644 --- a/website/docs/usage/visualizers.md +++ b/website/docs/usage/visualizers.md @@ -297,7 +297,7 @@ position. > > ```python > ex = [{"text": "But Google is starting from behind.", -> "ents": [{"start": 4, "end": 10, "label": "ORG"}], +> "ents": [{"start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "https://www.wikidata.org/entity/Q95"}], > "title": None}] > html = displacy.render(ex, style="ent", manual=True) > ``` @@ -323,7 +323,7 @@ position. ### ENT input { "text": "But Google is starting from behind.", - "ents": [{"start": 4, "end": 10, "label": "ORG"}], + "ents": [{"start": 4, "end": 10, "label": "ORG", "kb_id": "Q95", "kb_url": "https://www.wikidata.org/entity/Q95"}], "title": None } ``` From 4bf2606adf4cdd2045a6aeec3a8258c5cb3915ed Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Thu, 16 Sep 2021 09:22:38 +0200 Subject: [PATCH 7/9] Update spacy/displacy/render.py Co-authored-by: Renat Shigapov <57352291+shigapov@users.noreply.github.com> --- spacy/displacy/render.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py index 40269e74f..bd7178724 100644 --- a/spacy/displacy/render.py +++ b/spacy/displacy/render.py @@ -314,8 +314,8 @@ class EntityRenderer: label = span["label"] start = span["start"] end = span["end"] - kb_id = str(span.get("kb_id") or "") - kb_url = str(span.get("kb_url") or "") + kb_id = span.get("kb_id", "") + kb_url = span.get("kb_url", "") additional_params = span.get("params", {}) entity = escape_html(text[start:end]) fragments = text[offset:start].split("\n") From 00836c2d7d46b0aa405eb1569856f7923da6d1f0 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Thu, 16 Sep 2021 09:23:21 +0200 Subject: [PATCH 8/9] Update spacy/displacy/templates.py --- spacy/displacy/templates.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py index c44c1e2a6..051f72cb2 100644 --- a/spacy/displacy/templates.py +++ b/spacy/displacy/templates.py @@ -51,8 +51,9 @@ TPL_ENTS = """ TPL_ENT = """ {text} - {label} + {label} {kb_id} + """ From 57b5fc1995ded2f4bb9dd6305c567cdd4c1151ee Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 23 Sep 2021 17:58:32 +1000 Subject: [PATCH 9/9] Apply suggestions from code review Co-authored-by: Renat Shigapov <57352291+shigapov@users.noreply.github.com> --- spacy/displacy/render.py | 6 +++++- spacy/displacy/templates.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py index bd7178724..406824eb4 100644 --- a/spacy/displacy/render.py +++ b/spacy/displacy/render.py @@ -316,6 +316,10 @@ class EntityRenderer: end = span["end"] kb_id = span.get("kb_id", "") kb_url = span.get("kb_url", "") + if kb_id: + kb_link = """{}""".format(kb_url, kb_id) + else: + kb_link = "" additional_params = span.get("params", {}) entity = escape_html(text[start:end]) fragments = text[offset:start].split("\n") @@ -325,7 +329,7 @@ class EntityRenderer: markup += "
" if self.ents is None or label.upper() in self.ents: color = self.colors.get(label.upper(), self.default_color) - ent_settings = {"label": label, "text": entity, "bg": color, "kb_id": kb_id, "kb_url": kb_url} + ent_settings = {"label": label, "text": entity, "bg": color, "kb_link": kb_link} ent_settings.update(additional_params) markup += self.ent_template.format(**ent_settings) else: diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py index 051f72cb2..53fcc1873 100644 --- a/spacy/displacy/templates.py +++ b/spacy/displacy/templates.py @@ -52,7 +52,7 @@ TPL_ENT = """ {text} {label} - {kb_id} + {kb_link} """