From e4c84321a590c5e3b1004fe542db7d7f3cca031b Mon Sep 17 00:00:00 2001 From: latkins Date: Tue, 31 Jan 2017 13:47:42 +0000 Subject: [PATCH 1/2] Added regression test for Issue #792. --- spacy/tests/regression/test_issue792.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 spacy/tests/regression/test_issue792.py diff --git a/spacy/tests/regression/test_issue792.py b/spacy/tests/regression/test_issue792.py new file mode 100644 index 000000000..d5aef533f --- /dev/null +++ b/spacy/tests/regression/test_issue792.py @@ -0,0 +1,13 @@ +# coding: utf-8 +from __future__ import unicode_literals + + +def test_issue792(en_tokenizer): + """Test for Issue #792: Trailing whitespace is removed after parsing.""" + text = "This is a string " + doc = en_tokenizer(text) + assert(doc.text_with_ws == text) + + text_unicode = "This is a string\u0020" + doc_unicode = en_tokenizer(text_unicode) + assert(doc_unicode.text_with_ws == text_unicode) From e6465b9ca35c0497b21a684ea4cc152df3b61334 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 31 Jan 2017 15:14:42 +0100 Subject: [PATCH 2/2] Parametrize test cases and mark as xfail --- spacy/tests/regression/test_issue792.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/spacy/tests/regression/test_issue792.py b/spacy/tests/regression/test_issue792.py index d5aef533f..231261523 100644 --- a/spacy/tests/regression/test_issue792.py +++ b/spacy/tests/regression/test_issue792.py @@ -2,12 +2,9 @@ from __future__ import unicode_literals -def test_issue792(en_tokenizer): - """Test for Issue #792: Trailing whitespace is removed after parsing.""" - text = "This is a string " - doc = en_tokenizer(text) - assert(doc.text_with_ws == text) - - text_unicode = "This is a string\u0020" - doc_unicode = en_tokenizer(text_unicode) - assert(doc_unicode.text_with_ws == text_unicode) +@pytest.mark.xfail +@pytest.mark.parametrize('text', ["This is a string ", "This is a string\u0020"]) +def test_issue792(en_tokenizer, text): + """Test for Issue #792: Trailing whitespace is removed after parsing.""" + doc = en_tokenizer(text) + assert(doc.text_with_ws == text)