From 5d56eb70d75dece9c9f485dd0d2fcc6c3e131309 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Thu, 27 Sep 2018 16:41:57 +0200
Subject: [PATCH] Tidy up tests

---
 spacy/tests/regression/test_issue2001-2500.py | 16 ++++++++
 spacy/tests/regression/test_issue2179.py      | 16 --------
 spacy/tests/regression/test_issue2626.py      | 11 +++---
 spacy/tests/regression/test_issue2671.py      | 37 ++++++++++---------
 spacy/tests/regression/test_issue2772.py      |  7 +++-
 5 files changed, 46 insertions(+), 41 deletions(-)
 delete mode 100644 spacy/tests/regression/test_issue2179.py

diff --git a/spacy/tests/regression/test_issue2001-2500.py b/spacy/tests/regression/test_issue2001-2500.py
index d9febb152..2c81651fd 100644
--- a/spacy/tests/regression/test_issue2001-2500.py
+++ b/spacy/tests/regression/test_issue2001-2500.py
@@ -5,10 +5,26 @@ import pytest
 from spacy.tokens import Doc
 from spacy.displacy import render
 from spacy.gold import iob_to_biluo
+from spacy.lang.it import Italian
 
 from ..util import add_vecs_to_vocab
 
 
+@pytest.mark.xfail
+def test_issue2179():
+    """Test that spurious 'extra_labels' aren't created when initializing NER."""
+    nlp = Italian()
+    ner = nlp.create_pipe('ner')
+    ner.add_label('CITIZENSHIP')
+    nlp.add_pipe(ner)
+    nlp.begin_training()
+    nlp2 = Italian()
+    nlp2.add_pipe(nlp2.create_pipe('ner'))
+    nlp2.from_bytes(nlp.to_bytes())
+    assert 'extra_labels' not in nlp2.get_pipe('ner').cfg
+    assert nlp2.get_pipe('ner').labels == ['CITIZENSHIP']
+
+
 def test_issue2219(en_vocab):
     vectors = [("a", [1, 2, 3]), ("letter", [4, 5, 6])]
     add_vecs_to_vocab(en_vocab, vectors)
diff --git a/spacy/tests/regression/test_issue2179.py b/spacy/tests/regression/test_issue2179.py
deleted file mode 100644
index d4f194c0e..000000000
--- a/spacy/tests/regression/test_issue2179.py
+++ /dev/null
@@ -1,16 +0,0 @@
-'''Test that spurious 'extra_labels' aren't created when initializing NER.'''
-import pytest
-from ... import blank
-
-@pytest.mark.xfail
-def test_issue2179():
-    nlp = blank('it')
-    ner = nlp.create_pipe('ner')
-    ner.add_label('CITIZENSHIP')
-    nlp.add_pipe(ner)
-    nlp.begin_training()
-    nlp2 = blank('it')
-    nlp2.add_pipe(nlp2.create_pipe('ner'))
-    nlp2.from_bytes(nlp.to_bytes())
-    assert 'extra_labels' not in nlp2.get_pipe('ner').cfg
-    assert nlp2.get_pipe('ner').labels == ['CITIZENSHIP']
diff --git a/spacy/tests/regression/test_issue2626.py b/spacy/tests/regression/test_issue2626.py
index 9580c3c4b..afe8bc055 100644
--- a/spacy/tests/regression/test_issue2626.py
+++ b/spacy/tests/regression/test_issue2626.py
@@ -1,11 +1,10 @@
+# coding: utf8
 from __future__ import unicode_literals
-import spacy
 
-def test_issue2626():
-    '''Check that this sentence doesn't cause an infinite loop in the tokenizer.'''
-    nlp = spacy.blank('en')
+
+def test_issue2626(en_tokenizer):
+    """Check that sentence doesn't cause an infinite loop in the tokenizer."""
     text = """
     ABLEItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume TABLE ItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume ItemColumn IAcceptance Limits of ErrorIn-Service Limits of ErrorColumn IIColumn IIIColumn IVColumn VComputed VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeUnder Registration of\xa0VolumeOver Registration of\xa0VolumeCubic FeetCubic FeetCubic FeetCubic FeetCubic Feet1Up to 10.0100.0050.0100.005220.0200.0100.0200.010350.0360.0180.0360.0184100.0500.0250.0500.0255Over 100.5% of computed volume0.25% of computed volume0.5% of computed volume0.25% of computed volume
     """
-    doc = nlp.make_doc(text)
-
+    doc = en_tokenizer(text)
diff --git a/spacy/tests/regression/test_issue2671.py b/spacy/tests/regression/test_issue2671.py
index ea33d69c7..561cb2a9e 100644
--- a/spacy/tests/regression/test_issue2671.py
+++ b/spacy/tests/regression/test_issue2671.py
@@ -1,29 +1,30 @@
 # coding: utf-8
 from __future__ import unicode_literals
-import pytest
-from ...lang.en import English
-from ...matcher import Matcher
 
-def get_rule_id(nlp, matcher, doc):
-    matches = matcher(doc)
-    for match_id, start, end in matches:
-        rule_id = nlp.vocab.strings[match_id]
-        span = doc[start:end]
-        return rule_id
+import pytest
+from spacy.lang.en import English
+from spacy.matcher import Matcher
 
 
 def test_issue2671():
-    '''Ensure the correct entity ID is returned for matches with quantifiers.
+    """Ensure the correct entity ID is returned for matches with quantifiers.
     See also #2675
-    '''
+    """
+    def get_rule_id(nlp, matcher, doc):
+        matches = matcher(doc)
+        for match_id, start, end in matches:
+            rule_id = nlp.vocab.strings[match_id]
+            span = doc[start:end]
+            return rule_id
+
     nlp = English()
     matcher = Matcher(nlp.vocab)
-
-    pattern = [{'LOWER': 'high'}, {'IS_PUNCT': True, 'OP': '?'}, {'LOWER': 'adrenaline'}]
-    matcher.add("test_pattern", None, pattern)
-
+    pattern_id = 'test_pattern'
+    pattern = [{'LOWER': 'high'},
+               {'IS_PUNCT': True, 'OP': '?'},
+               {'LOWER': 'adrenaline'}]
+    matcher.add(pattern_id, None, pattern)
     doc1 = nlp("This is a high-adrenaline situation.")
     doc2 = nlp("This is a high adrenaline situation.")
-    # Works correctly
-    assert get_rule_id(nlp, matcher, doc1) == 'test_pattern'
-    assert get_rule_id(nlp, matcher, doc2) == 'test_pattern'
+    assert get_rule_id(nlp, matcher, doc1) == pattern_id
+    assert get_rule_id(nlp, matcher, doc2) == pattern_id
diff --git a/spacy/tests/regression/test_issue2772.py b/spacy/tests/regression/test_issue2772.py
index c9e0cf0f2..d8188c71c 100644
--- a/spacy/tests/regression/test_issue2772.py
+++ b/spacy/tests/regression/test_issue2772.py
@@ -1,8 +1,13 @@
-'''Test that deprojectivization doesn't mess up sentence boundaries.'''
+# coding: utf-8
+from __future__ import unicode_literals
+
 import pytest
+
 from ..util import get_doc
 
+
 def test_issue2772(en_vocab):
+    """Test that deprojectivization doesn't mess up sentence boundaries."""
     words = 'When we write or communicate virtually , we can hide our true feelings .'.split()
     # A tree with a non-projective (i.e. crossing) arc
     # The arcs (0, 4) and (2, 9) cross.