diff --git a/spacy/tests/tokenizer/test_whitespace.py b/spacy/tests/tokenizer/test_whitespace.py index ad34c8791..906ad310c 100644 --- a/spacy/tests/tokenizer/test_whitespace.py +++ b/spacy/tests/tokenizer/test_whitespace.py @@ -1,37 +1,46 @@ """Test that tokens are created correctly for whitespace.""" + + from __future__ import unicode_literals import pytest -def test_single_space(en_tokenizer): - tokens = en_tokenizer('hello possums') +@pytest.mark.parametrize('text', ["hello possums"]) +def test_tokenizer_splits_single_space(en_tokenizer, text): + tokens = en_tokenizer(text) assert len(tokens) == 2 -def test_double_space(en_tokenizer): - tokens = en_tokenizer('hello possums') +@pytest.mark.parametrize('text', ["hello possums"]) +def test_tokenizer_splits_double_space(en_tokenizer, text): + tokens = en_tokenizer(text) assert len(tokens) == 3 - assert tokens[1].orth_ == ' ' + assert tokens[1].text == " " -def test_newline(en_tokenizer): - tokens = en_tokenizer('hello\npossums') +@pytest.mark.parametrize('text', ["hello\npossums"]) +def test_tokenizer_splits_newline(en_tokenizer, text): + tokens = en_tokenizer(text) assert len(tokens) == 3 + assert tokens[1].text == "\n" -def test_newline_space(en_tokenizer): +@pytest.mark.parametrize('text', ["hello \npossums"]) +def test_tokenizer_splits_newline_space(en_tokenizer, text): tokens = en_tokenizer('hello \npossums') assert len(tokens) == 3 -def test_newline_double_space(en_tokenizer): - tokens = en_tokenizer('hello \npossums') +@pytest.mark.parametrize('text', ["hello \npossums"]) +def test_tokenizer_splits_newline_double_space(en_tokenizer, text): + tokens = en_tokenizer(text) assert len(tokens) == 3 -def test_newline_space_wrap(en_tokenizer): - tokens = en_tokenizer('hello \n possums') +@pytest.mark.parametrize('text', ["hello \n possums"]) +def test_tokenizer_splits_newline_space_wrap(en_tokenizer, text): + tokens = en_tokenizer(text) assert len(tokens) == 3