From 0f28418446581dd5df8807f44dcfa72371bdbd98 Mon Sep 17 00:00:00 2001
From: Adriane Boyd <adrianeboyd@gmail.com>
Date: Wed, 4 Sep 2019 20:42:24 +0200
Subject: [PATCH] Add regression test for #1061 back to test suite

---
 spacy/tests/regression/test_issue1001-1500.py | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/spacy/tests/regression/test_issue1001-1500.py b/spacy/tests/regression/test_issue1001-1500.py
index 15da1061b..9074b34b7 100644
--- a/spacy/tests/regression/test_issue1001-1500.py
+++ b/spacy/tests/regression/test_issue1001-1500.py
@@ -13,6 +13,28 @@ from spacy.lemmatizer import Lemmatizer
 from spacy.symbols import ORTH, LEMMA, POS, VERB, VerbForm_part
 
 
+@pytest.mark.xfail
+def test_issue1061():
+    '''Test special-case works after tokenizing. Was caching problem.'''
+    text = 'I like _MATH_ even _MATH_ when _MATH_, except when _MATH_ is _MATH_! but not _MATH_.'
+    tokenizer = English.Defaults.create_tokenizer()
+    doc = tokenizer(text)
+    assert 'MATH' in [w.text for w in doc]
+    assert '_MATH_' not in [w.text for w in doc]
+
+    tokenizer.add_special_case('_MATH_', [{ORTH: '_MATH_'}])
+    doc = tokenizer(text)
+    assert '_MATH_' in [w.text for w in doc]
+    assert 'MATH' not in [w.text for w in doc]
+
+    # For sanity, check it works when pipeline is clean.
+    tokenizer = English.Defaults.create_tokenizer()
+    tokenizer.add_special_case('_MATH_', [{ORTH: '_MATH_'}])
+    doc = tokenizer(text)
+    assert '_MATH_' in [w.text for w in doc]
+    assert 'MATH' not in [w.text for w in doc]
+
+
 @pytest.mark.xfail(
     reason="g is split of as a unit, as the suffix regular expression can not look back further (variable-width)"
 )