From 9c73983bdd8fcf3df15a00ef5066edd105ef2fd7 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 29 Mar 2016 14:27:13 +1100 Subject: [PATCH] * Add test for hyphenation problem in Issue #302 --- spacy/tests/tokenizer/test_infix.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spacy/tests/tokenizer/test_infix.py b/spacy/tests/tokenizer/test_infix.py index d703682cf..eda4643a6 100644 --- a/spacy/tests/tokenizer/test_infix.py +++ b/spacy/tests/tokenizer/test_infix.py @@ -32,3 +32,9 @@ def test_email(en_tokenizer): assert len(tokens) == 1 +def test_double_hyphen(en_tokenizer): + tokens = en_tokenizer(u'No decent--let alone well-bred--people.') + assert tokens[0].text == u'No' + assert tokens[1].text == u'decent' + assert tokens[2].text == u'--' + assert tokens[3].text == u'let'