From e9e99a56706ab13d6b187b4c1fc409c986e447b9 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 12 Jan 2017 22:57:38 +0100 Subject: [PATCH] Add regression test for #740 --- spacy/tests/regression/test_issue740.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 spacy/tests/regression/test_issue740.py diff --git a/spacy/tests/regression/test_issue740.py b/spacy/tests/regression/test_issue740.py new file mode 100644 index 000000000..a5a1e2cde --- /dev/null +++ b/spacy/tests/regression/test_issue740.py @@ -0,0 +1,13 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import pytest + + +@pytest.mark.parametrize('text', ["3/4/2012", "01/12/1900"]) +def test_issue740(en_tokenizer, text): + """Test that dates are not split and kept as one token. This behaviour is currently inconsistent, since dates separated by hyphens are still split. + This will be hard to prevent without causing clashes with numeric ranges.""" + tokens = en_tokenizer(text) + assert len(tokens) == 1