From fc3cb1fa9ebccc9d2604bcdaede3e7961efe29de Mon Sep 17 00:00:00 2001
From: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
Date: Mon, 29 Jun 2020 13:59:17 +0200
Subject: [PATCH] NER align tests (#5656)

* one_to_man works better. misalignment doesn't yet.

* fix tests

* restore example

* xfail alignment tests
---
 spacy/gold/example.pyx   |  8 ++--
 spacy/tests/test_gold.py | 81 ++++++++++++++++++++++++++--------------
 2 files changed, 57 insertions(+), 32 deletions(-)

diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx
index 169965c3d..505c2a633 100644
--- a/spacy/gold/example.pyx
+++ b/spacy/gold/example.pyx
@@ -47,7 +47,7 @@ cdef class Example:
 
         def __set__(self, doc):
             self.x = doc
-    
+
     property reference:
         def __get__(self):
             return self.y
@@ -60,7 +60,7 @@ cdef class Example:
             self.x.copy(),
             self.y.copy()
         )
- 
+
     @classmethod
     def from_dict(cls, Doc predicted, dict example_dict):
         if example_dict is None:
@@ -78,7 +78,7 @@ cdef class Example:
             predicted,
             annotations2doc(predicted.vocab, tok_dict, doc_dict)
         )
-    
+
     @property
     def alignment(self):
         if self._alignment is None:
@@ -151,7 +151,7 @@ cdef class Example:
                     x_text = self.x.text[end_char:]
                     x_text_offset = end_char
         x_tags = biluo_tags_from_offsets(
-            self.x, 
+            self.x,
             [(e.start_char, e.end_char, e.label_) for e in x_spans],
             missing=None
         )
diff --git a/spacy/tests/test_gold.py b/spacy/tests/test_gold.py
index 17f0933d1..96acb8982 100644
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@@ -230,14 +230,13 @@ def test_json2docs_no_ner(en_vocab):
         Doc(
             doc.vocab,
             words=[w.text for w in doc],
-            spaces=[bool(w.whitespace_) for w in doc]
+            spaces=[bool(w.whitespace_) for w in doc],
         ),
-        doc
+        doc,
     )
     ner_tags = eg.get_aligned_ner()
     assert ner_tags == [None, None, None, None, None]
 
-         
 
 def test_split_sentences(en_vocab):
     words = ["I", "flew", "to", "San Francisco Valley", "had", "loads of fun"]
@@ -283,8 +282,8 @@ def test_split_sentences(en_vocab):
     assert split_examples[1].text == "had loads of fun "
 
 
-def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
-    # one-to-many
+@pytest.mark.xfail(reason="Alignment should be fixed after example refactor")
+def test_gold_biluo_one_to_many(en_vocab, en_tokenizer):
     words = ["I", "flew to", "San Francisco Valley", "."]
     spaces = [True, True, False, False]
     doc = Doc(en_vocab, words=words, spaces=spaces)
@@ -292,9 +291,28 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
     gold_words = ["I", "flew", "to", "San", "Francisco", "Valley", "."]
     example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
     ner_tags = example.get_aligned_ner()
+    assert ner_tags == ["O", "O", "U-LOC", "O"]
+
+    entities = [
+        (len("I "), len("I flew to"), "ORG"),
+        (len("I flew to "), len("I flew to San Francisco Valley"), "LOC"),
+    ]
+    gold_words = ["I", "flew", "to", "San", "Francisco", "Valley", "."]
+    example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
+    ner_tags = example.get_aligned_ner()
+    assert ner_tags == ["O", "U-ORG", "U-LOC", "O"]
+
+    entities = [
+        (len("I "), len("I flew"), "ORG"),
+        (len("I flew to "), len("I flew to San Francisco Valley"), "LOC"),
+    ]
+    gold_words = ["I", "flew", "to", "San", "Francisco", "Valley", "."]
+    example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
+    ner_tags = example.get_aligned_ner()
     assert ner_tags == ["O", None, "U-LOC", "O"]
-    
-    # many-to-one
+
+
+def test_gold_biluo_many_to_one(en_vocab, en_tokenizer):
     words = ["I", "flew", "to", "San", "Francisco", "Valley", "."]
     spaces = [True, True, True, True, True, False, False]
     doc = Doc(en_vocab, words=words, spaces=spaces)
@@ -304,31 +322,38 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
     ner_tags = example.get_aligned_ner()
     assert ner_tags == ["O", "O", "O", "B-LOC", "I-LOC", "L-LOC", "O"]
 
-    # misaligned
+    entities = [
+        (len("I "), len("I flew to"), "ORG"),
+        (len("I flew to "), len("I flew to San Francisco Valley"), "LOC"),
+    ]
+    gold_words = ["I", "flew to", "San Francisco Valley", "."]
+    example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
+    ner_tags = example.get_aligned_ner()
+    assert ner_tags == ["O", "B-ORG", "L-ORG", "B-LOC", "I-LOC", "L-LOC", "O"]
+
+
+@pytest.mark.xfail(reason="Alignment should be fixed after example refactor")
+def test_gold_biluo_misaligned(en_vocab, en_tokenizer):
     words = ["I flew", "to", "San Francisco", "Valley", "."]
     spaces = [True, True, True, False, False]
     doc = Doc(en_vocab, words=words, spaces=spaces)
-    offset_start = len("I flew to ")
-    offset_end = len("I flew to San Francisco Valley")
-    entities = [(offset_start, offset_end, "LOC")]
-    links = {(offset_start, offset_end): {"Q816843": 1.0}}
+    entities = [(len("I flew to "), len("I flew to San Francisco Valley"), "LOC")]
     gold_words = ["I", "flew to", "San", "Francisco Valley", "."]
-    example = Example.from_dict(
-        doc, {"words": gold_words, "entities": entities, "links": links}
-    )
+    example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
     ner_tags = example.get_aligned_ner()
-    assert ner_tags == [None, "O", "B-LOC", "L-LOC", "O"]
-    #assert example.get_aligned("ENT_KB_ID", as_string=True) == [
-    #    "",
-    #    "",
-    #    "Q816843",
-    #    "Q816843",
-    #    "",
-    #]
-    #assert example.to_dict()["doc_annotation"]["links"][(offset_start, offset_end)] == {
-    #    "Q816843": 1.0
-    #}
+    assert ner_tags == ["O", "O", "B-LOC", "L-LOC", "O"]
 
+    entities = [
+        (len("I "), len("I flew to"), "ORG"),
+        (len("I flew to "), len("I flew to San Francisco Valley"), "LOC"),
+    ]
+    gold_words = ["I", "flew to", "San", "Francisco Valley", "."]
+    example = Example.from_dict(doc, {"words": gold_words, "entities": entities})
+    ner_tags = example.get_aligned_ner()
+    assert ner_tags == [None, None, "B-LOC", "L-LOC", "O"]
+
+
+def test_gold_biluo_additional_whitespace(en_vocab, en_tokenizer):
     # additional whitespace tokens in GoldParse words
     words, spaces = get_words_and_spaces(
         ["I", "flew", "to", "San Francisco", "Valley", "."],
@@ -344,7 +369,8 @@ def test_gold_biluo_different_tokenization(en_vocab, en_tokenizer):
     ner_tags = example.get_aligned_ner()
     assert ner_tags == ["O", "O", "O", "O", "B-LOC", "L-LOC", "O"]
 
-    # from issue #4791
+
+def test_gold_biluo_4791(en_vocab, en_tokenizer):
     doc = en_tokenizer("I'll return the ₹54 amount")
     gold_words = ["I", "'ll", "return", "the", "₹", "54", "amount"]
     gold_spaces = [False, True, True, True, False, True, False]
@@ -593,7 +619,6 @@ def test_tuple_format_implicit_invalid():
         _train(train_data)
 
 
-
 def _train(train_data):
     nlp = English()
     ner = nlp.create_pipe("ner")