diff --git a/examples/pipeline/wikidata_entity_linking.py b/examples/pipeline/wikidata_entity_linking.py index 287e4a50b..02a766d0f 100644 --- a/examples/pipeline/wikidata_entity_linking.py +++ b/examples/pipeline/wikidata_entity_linking.py @@ -61,13 +61,13 @@ def create_kb(vocab, max_entities_per_alias, min_occ, to_print=False): entity_frequencies = _get_entity_frequencies(entities=title_list) print() - print("3. _add_entities", datetime.datetime.now()) + print("3. adding", len(entity_list), "entities", datetime.datetime.now()) print() kb.set_entities(entity_list=entity_list, prob_list=entity_frequencies, vector_list=None, feature_list=None) # _add_entities(kb, entities=entity_list, probs=entity_frequencies, to_print=to_print) print() - print("4. _add_aliases", datetime.datetime.now()) + print("4. adding aliases", datetime.datetime.now()) print() _add_aliases(kb, title_to_id=title_to_id, max_entities_per_alias=max_entities_per_alias, min_occ=min_occ,) @@ -171,7 +171,10 @@ def _add_aliases(kb, title_to_id, max_entities_per_alias, min_occ, to_print=Fals prior_probs.append(p_entity_givenalias) if selected_entities: - kb.add_alias(alias=previous_alias, entities=selected_entities, probabilities=prior_probs) + try: + kb.add_alias(alias=previous_alias, entities=selected_entities, probabilities=prior_probs) + except ValueError as e: + print(e) total_count = 0 counts = list() entities = list() diff --git a/spacy/kb.pyx b/spacy/kb.pyx index ba870661d..d471130d0 100644 --- a/spacy/kb.pyx +++ b/spacy/kb.pyx @@ -179,9 +179,9 @@ cdef class KnowledgeBase: entities_length=len(entities), probabilities_length=len(probabilities))) - # Throw an error if the probabilities sum up to more than 1 + # Throw an error if the probabilities sum up to more than 1 (allow for some rounding errors) prob_sum = sum(probabilities) - if prob_sum > 1: + if prob_sum > 1.00001: raise ValueError(Errors.E133.format(alias=alias, sum=prob_sum)) cdef hash_t alias_hash = self.vocab.strings.add(alias)