diff --git a/spacy/errors.py b/spacy/errors.py index 6e7ec49ae..61ff5a037 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -556,6 +556,8 @@ class Errors(object): E979 = ("Cannot convert {type} to an Example object.") E980 = ("Each link annotation should refer to a dictionary with at most one " "identifier mapping to 1.0, and all others to 0.0.") + E981 = ("The offsets of the annotations for 'links' could not be aligned " + "to token boundaries.") E982 = ("The 'ent_iob' attribute of a Token should be an integer indexing " "into {values}, but found {value}.") E983 = ("Invalid key for '{dict}': {key}. Available keys: " diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx index 841b233c4..2ecee1821 100644 --- a/spacy/gold/example.pyx +++ b/spacy/gold/example.pyx @@ -396,6 +396,8 @@ def _parse_links(vocab, words, spaces, links): start_char, end_char = index start_token = starts.get(start_char) end_token = ends.get(end_char) + if start_token is None or end_token is None: + raise ValueError(Errors.E981) for i in range(start_token, end_token+1): ent_kb_ids[i] = true_kb_ids[0] diff --git a/spacy/tests/test_new_example.py b/spacy/tests/test_new_example.py index b89654554..58eab4a54 100644 --- a/spacy/tests/test_new_example.py +++ b/spacy/tests/test_new_example.py @@ -230,8 +230,7 @@ def test_Example_from_dict_with_links(annots): [ { "words": ["I", "like", "New", "York", "and", "Berlin", "."], - "entities": [(7, 15, "LOC"), (20, 26, "LOC")], - "links": {(0, 1): {"Q7381115": 1.0, "Q2146908": 0.0}}, + "links": {(7, 14): {"Q7381115": 1.0, "Q2146908": 0.0}}, } ], )