spaCy/spacy/tests/regression/test_issue740.py

13 lines
466 B
Python
Raw Normal View History

2017-01-12 21:57:38 +00:00
# coding: utf-8
from __future__ import unicode_literals
import pytest
@pytest.mark.parametrize('text', ["3/4/2012", "01/12/1900"])
def test_issue740(en_tokenizer, text):
"""Test that dates are not split and kept as one token. This behaviour is currently inconsistent, since dates separated by hyphens are still split.
This will be hard to prevent without causing clashes with numeric ranges."""
tokens = en_tokenizer(text)
assert len(tokens) == 1