From 223d2b34841cad6847fe6d9e9d60cc5cce613052 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 16 Jan 2016 16:41:26 +0100 Subject: [PATCH] * Add test for Issue #154: Additional whitespace introduced when string ends with a whitespace token. --- spacy/tests/tokenizer/test_tokenizer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spacy/tests/tokenizer/test_tokenizer.py b/spacy/tests/tokenizer/test_tokenizer.py index be93b9953..c900860c4 100644 --- a/spacy/tests/tokenizer/test_tokenizer.py +++ b/spacy/tests/tokenizer/test_tokenizer.py @@ -142,6 +142,12 @@ def test_ie(en_tokenizer): assert len(tokens) == 6 assert tokens[3].orth_ == "i.e." + +def test_two_whitespace(en_tokenizer): + orig_str = u'there are 2 spaces after this ' + tokens = en_tokenizer(orig_str) + assert repr(tokens.text_with_ws) == repr(orig_str) + #def test_cnts7(): # text = 'But then the 6,000-year ice age came...' # tokens = EN.tokenize(text)