spaCy/examples/pipeline/dummy_entity_linking.py

72 lines
2.2 KiB
Python
Raw Normal View History

# coding: utf-8
from __future__ import unicode_literals
"""Demonstrate how to build a simple knowledge base and run an Entity Linking algorithm.
Currently still a bit of a dummy algorithm: taking simply the entity with highest probability for a given alias
"""
import spacy
from spacy.kb import KnowledgeBase
2019-03-21 22:17:25 +00:00
def create_kb(vocab):
kb = KnowledgeBase(vocab=vocab)
# adding entities
entity_0 = "Q1004791"
print("adding entity", entity_0)
kb.add_entity(entity_id=entity_0, entity_name="Douglas", prob=0.5)
entity_1 = "Q42"
print("adding entity", entity_1)
kb.add_entity(entity_id=entity_1, entity_name="Douglas Adams", prob=0.5)
entity_2 = "Q5301561"
print("adding entity", entity_2)
kb.add_entity(entity_id=entity_2, entity_name="Douglas Haig", prob=0.5)
# adding aliases
print()
alias_0 = "Douglas"
2019-03-21 22:17:25 +00:00
print("adding alias", alias_0)
kb.add_alias(alias=alias_0, entities=["Q1004791", "Q42", "Q5301561"], probabilities=[0.1, 0.6, 0.2])
alias_1 = "Douglas Adams"
2019-03-21 22:17:25 +00:00
print("adding alias", alias_1)
kb.add_alias(alias=alias_1, entities=["Q42"], probabilities=[0.9])
print()
print("kb size:", len(kb), kb.get_size_entities(), kb.get_size_aliases())
return kb
2019-03-21 22:17:25 +00:00
def add_el(kb, nlp):
2019-03-22 12:55:10 +00:00
el_pipe = nlp.create_pipe(name='entity_linker', config={"kb": kb})
nlp.add_pipe(el_pipe, last=True)
for alias in ["Douglas Adams", "Douglas"]:
candidates = nlp.linker.kb.get_candidates(alias)
print()
print(len(candidates), "candidate(s) for", alias, ":")
for c in candidates:
2019-03-21 22:17:25 +00:00
print(" ", c.entity_id_, c.entity_name_, c.prior_prob)
text = "In The Hitchhiker's Guide to the Galaxy, written by Douglas Adams, " \
2019-03-21 22:17:25 +00:00
"Douglas reminds us to always bring our towel. " \
"The main character in Doug's novel is called Arthur Dent."
doc = nlp(text)
print()
for token in doc:
print("token", token.text, token.ent_type_, token.ent_kb_id_)
print()
for ent in doc.ents:
print("ent", ent.text, ent.label_, ent.kb_id_)
if __name__ == "__main__":
2019-03-21 22:17:25 +00:00
nlp = spacy.load('en_core_web_sm')
my_kb = create_kb(nlp.vocab)
add_el(my_kb, nlp)