diff --git a/spacy/tests/regression/test_issue775.py b/spacy/tests/regression/test_issue775.py new file mode 100644 index 000000000..fe1c89240 --- /dev/null +++ b/spacy/tests/regression/test_issue775.py @@ -0,0 +1,13 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import pytest + + +@pytest.mark.parametrize('text', ["Shell", "shell"]) +def test_issue775(en_tokenizer, text): + """Test that 'Shell' and 'shell' are excluded from the contractions + generated by the English tokenizer exceptions.""" + tokens = en_tokenizer(text) + assert len(tokens) == 1 + assert tokens[0].text == text