From 30d4eb506a4ff19f77ba704d1dbed17931d03c8b Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 18 Jun 2021 10:41:50 +0200 Subject: [PATCH] Fix setting empty entities in Example.from_dict (#8426) --- spacy/tests/training/test_new_example.py | 21 +++++++++++++++++++++ spacy/training/example.pyx | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/spacy/tests/training/test_new_example.py b/spacy/tests/training/test_new_example.py index ba58ea96d..4dd90f416 100644 --- a/spacy/tests/training/test_new_example.py +++ b/spacy/tests/training/test_new_example.py @@ -182,6 +182,27 @@ def test_Example_from_dict_with_entities(annots): assert example.reference[5].ent_type_ == "LOC" +def test_Example_from_dict_with_empty_entities(): + annots = { + "words": ["I", "like", "New", "York", "and", "Berlin", "."], + "entities": [], + } + vocab = Vocab() + predicted = Doc(vocab, words=annots["words"]) + example = Example.from_dict(predicted, annots) + # entities as empty list sets everything to O + assert example.reference.has_annotation("ENT_IOB") + assert len(list(example.reference.ents)) == 0 + assert all(token.ent_iob_ == "O" for token in example.reference) + # various unset/missing entities leaves entities unset + annots["entities"] = None + example = Example.from_dict(predicted, annots) + assert not example.reference.has_annotation("ENT_IOB") + annots.pop("entities", None) + example = Example.from_dict(predicted, annots) + assert not example.reference.has_annotation("ENT_IOB") + + @pytest.mark.parametrize( "annots", [ diff --git a/spacy/training/example.pyx b/spacy/training/example.pyx index a1f3f98b3..732203e7b 100644 --- a/spacy/training/example.pyx +++ b/spacy/training/example.pyx @@ -420,7 +420,7 @@ def _fix_legacy_dict_data(example_dict): token_dict = example_dict.get("token_annotation", {}) doc_dict = example_dict.get("doc_annotation", {}) for key, value in example_dict.items(): - if value: + if value is not None: if key in ("token_annotation", "doc_annotation"): pass elif key == "ids":