From 7ba4111554cd8637763947d21decbab25bd95369 Mon Sep 17 00:00:00 2001
From: Aaron Marquez <aaronmarquez@Aarons-MBP.home>
Date: Thu, 15 Feb 2018 12:46:22 -0800
Subject: [PATCH] Add test for issue-1959

---
 spacy/tests/regression/test_issue1959.py | 25 ++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 spacy/tests/regression/test_issue1959.py

diff --git a/spacy/tests/regression/test_issue1959.py b/spacy/tests/regression/test_issue1959.py
new file mode 100644
index 000000000..2605b0088
--- /dev/null
+++ b/spacy/tests/regression/test_issue1959.py
@@ -0,0 +1,25 @@
+# coding: utf8
+from __future__ import unicode_literals
+import pytest
+
+from ..util import load_test_model
+
+
+@pytest.mark.models('en')
+def test_issue1959():
+    texts = ['Apple is looking at buying U.K. startup for $1 billion.']
+    nlp = load_test_model('en_core_web_sm')
+    nlp.add_pipe(clean_component, name='cleaner', after='ner')
+    doc = nlp(texts[0])
+    doc_pipe = [doc_pipe for doc_pipe in nlp.pipe(texts)]
+    assert doc == doc_pipe[0]
+
+
+def clean_component(doc):
+    """ Clean up text. Make lowercase and remove punctuation and stopwords """
+    # Remove punctuation, symbols (#) and stopwords
+    doc = [tok.text.lower() for tok in doc if (not tok.is_stop
+                                               and tok.pos_ != 'PUNCT' and
+                                               tok.pos_ != 'SYM')]
+    doc = ' '.join(doc)
+    return doc