From 938436455a0533f46efea6552e7c53ce085a416d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 25 Mar 2018 22:16:19 +0200 Subject: [PATCH] Add test for ent_iob during span merge --- spacy/tests/doc/test_span_merge.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/spacy/tests/doc/test_span_merge.py b/spacy/tests/doc/test_span_merge.py index 61f8ca50d..ae1f4f4a1 100644 --- a/spacy/tests/doc/test_span_merge.py +++ b/spacy/tests/doc/test_span_merge.py @@ -2,6 +2,8 @@ from __future__ import unicode_literals from ..util import get_doc +from ...vocab import Vocab +from ...tokens import Doc import pytest @@ -95,6 +97,21 @@ def test_spans_entity_merge(en_tokenizer): assert len(doc) == 15 +def test_spans_entity_merge_iob(): + # Test entity IOB stays consistent after merging + words = ["a", "b", "c", "d", "e"] + doc = Doc(Vocab(), words=words) + doc.ents = [(doc.vocab.strings.add('ent-abc'), 0, 3), + (doc.vocab.strings.add('ent-d'), 3, 4)] + assert doc[0].ent_iob_ == "B" + assert doc[1].ent_iob_ == "I" + assert doc[2].ent_iob_ == "I" + assert doc[3].ent_iob_ == "B" + doc[0:1].merge() + assert doc[0].ent_iob_ == "B" + assert doc[1].ent_iob_ == "I" + + def test_spans_sentence_update_after_merge(en_tokenizer): text = "Stewart Lee is a stand up comedian. He lives in England and loves Joe Pasquale." heads = [1, 1, 0, 1, 2, -1, -4, -5, 1, 0, -1, -1, -3, -4, 1, -2, -7]