spaCy/spacy/tests/regression/test_issue792.py

19 lines
657 B
Python
Raw Normal View History

2017-01-31 13:47:42 +00:00
# coding: utf-8
from __future__ import unicode_literals
2017-01-31 14:18:30 +00:00
import pytest
2017-01-31 13:47:42 +00:00
2017-01-31 14:19:33 +00:00
@pytest.mark.parametrize('text', ["This is a string ", "This is a string\u0020"])
def test_issue792(en_tokenizer, text):
"""Test for Issue #792: Trailing whitespace is removed after tokenization."""
doc = en_tokenizer(text)
assert ''.join([token.text_with_ws for token in doc]) == text
@pytest.mark.parametrize('text', ["This is a string", "This is a string\n"])
def test_control_issue792(en_tokenizer, text):
"""Test base case for Issue #792: Non-trailing whitespace"""
doc = en_tokenizer(text)
assert ''.join([token.text_with_ws for token in doc]) == text