From 938436455a0533f46efea6552e7c53ce085a416d Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 25 Mar 2018 22:16:19 +0200
Subject: [PATCH] Add test for ent_iob during span merge

---
 spacy/tests/doc/test_span_merge.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/spacy/tests/doc/test_span_merge.py b/spacy/tests/doc/test_span_merge.py
index 61f8ca50d..ae1f4f4a1 100644
--- a/spacy/tests/doc/test_span_merge.py
+++ b/spacy/tests/doc/test_span_merge.py
@@ -2,6 +2,8 @@
 from __future__ import unicode_literals
 
 from ..util import get_doc
+from ...vocab import Vocab
+from ...tokens import Doc
 
 import pytest
 
@@ -95,6 +97,21 @@ def test_spans_entity_merge(en_tokenizer):
     assert len(doc) == 15
 
 
+def test_spans_entity_merge_iob():
+    # Test entity IOB stays consistent after merging
+    words = ["a", "b", "c", "d", "e"]
+    doc = Doc(Vocab(), words=words)
+    doc.ents = [(doc.vocab.strings.add('ent-abc'), 0, 3),
+                (doc.vocab.strings.add('ent-d'), 3, 4)]
+    assert doc[0].ent_iob_ == "B"
+    assert doc[1].ent_iob_ == "I"
+    assert doc[2].ent_iob_ == "I"
+    assert doc[3].ent_iob_ == "B"
+    doc[0:1].merge()
+    assert doc[0].ent_iob_ == "B"
+    assert doc[1].ent_iob_ == "I"
+
+
 def test_spans_sentence_update_after_merge(en_tokenizer):
     text = "Stewart Lee is a stand up comedian. He lives in England and loves Joe Pasquale."
     heads = [1, 1, 0, 1, 2, -1, -4, -5, 1, 0, -1, -1, -3, -4, 1, -2, -7]