diff --git a/spacy/tests/tokenizer/test_tokenizer.py b/spacy/tests/tokenizer/test_tokenizer.py index be93b9953..c900860c4 100644 --- a/spacy/tests/tokenizer/test_tokenizer.py +++ b/spacy/tests/tokenizer/test_tokenizer.py @@ -142,6 +142,12 @@ def test_ie(en_tokenizer): assert len(tokens) == 6 assert tokens[3].orth_ == "i.e." + +def test_two_whitespace(en_tokenizer): + orig_str = u'there are 2 spaces after this ' + tokens = en_tokenizer(orig_str) + assert repr(tokens.text_with_ws) == repr(orig_str) + #def test_cnts7(): # text = 'But then the 6,000-year ice age came...' # tokens = EN.tokenize(text)