diff --git a/spacy/tests/training/test_new_example.py b/spacy/tests/training/test_new_example.py index ba58ea96d..4dd90f416 100644 --- a/spacy/tests/training/test_new_example.py +++ b/spacy/tests/training/test_new_example.py @@ -182,6 +182,27 @@ def test_Example_from_dict_with_entities(annots): assert example.reference[5].ent_type_ == "LOC" +def test_Example_from_dict_with_empty_entities(): + annots = { + "words": ["I", "like", "New", "York", "and", "Berlin", "."], + "entities": [], + } + vocab = Vocab() + predicted = Doc(vocab, words=annots["words"]) + example = Example.from_dict(predicted, annots) + # entities as empty list sets everything to O + assert example.reference.has_annotation("ENT_IOB") + assert len(list(example.reference.ents)) == 0 + assert all(token.ent_iob_ == "O" for token in example.reference) + # various unset/missing entities leaves entities unset + annots["entities"] = None + example = Example.from_dict(predicted, annots) + assert not example.reference.has_annotation("ENT_IOB") + annots.pop("entities", None) + example = Example.from_dict(predicted, annots) + assert not example.reference.has_annotation("ENT_IOB") + + @pytest.mark.parametrize( "annots", [ diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx index a1f3f98b3..732203e7b 100644 --- a/spacy/training/example.pyx +++ b/spacy/training/example.pyx @@ -420,7 +420,7 @@ def _fix_legacy_dict_data(example_dict): token_dict = example_dict.get("token_annotation", {}) doc_dict = example_dict.get("doc_annotation", {}) for key, value in example_dict.items(): - if value: + if value is not None: if key in ("token_annotation", "doc_annotation"): pass elif key == "ids":