Update regression test for #801 to match current expected behaviour

This commit is contained in:
Ines Montani 2017-02-02 16:23:05 +01:00
parent 012f4820cb
commit afc6365388
1 changed files with 7 additions and 8 deletions

View File

@ -4,15 +4,14 @@ from __future__ import unicode_literals
import pytest import pytest
@pytest.mark.xfail
@pytest.mark.parametrize('text,tokens', [ @pytest.mark.parametrize('text,tokens', [
('"deserve,"--and', ['"', "deserve", ",", '"', "--", "and"]), ('"deserve,"--and', ['"', "deserve", ',"--', "and"]),
("exception;--exclusive", ["exception", ";", "--", "exclusive"]), ("exception;--exclusive", ["exception", ";--", "exclusive"]),
("day.--Is", ["day", ".", "--", "Is"]), ("day.--Is", ["day", ".--", "Is"]),
("refinement:--just", ["refinement", ":", "--", "just"]), ("refinement:--just", ["refinement", ":--", "just"]),
("memories?--To", ["memories", "?", "--", "To"]), ("memories?--To", ["memories", "?--", "To"]),
("Useful.=--Therefore", ["Useful", ".", "=", "--", "Therefore"]), ("Useful.=--Therefore", ["Useful", ".=--", "Therefore"]),
("=Hope.=--Pandora", ["=", "Hope", ".", "=", "--", "Pandora"])]) ("=Hope.=--Pandora", ["=", "Hope", ".=--", "Pandora"])])
def test_issue801(en_tokenizer, text, tokens): def test_issue801(en_tokenizer, text, tokens):
"""Test that special characters + hyphens are split correctly.""" """Test that special characters + hyphens are split correctly."""
doc = en_tokenizer(text) doc = en_tokenizer(text)