spaCy/spacy/tests/lang/ur/test_prefix_suffix_infix.py

12 lines
286 B
Python
Raw Normal View History

# coding: utf-8
from __future__ import unicode_literals
import pytest
2019-08-20 15:36:34 +00:00
@pytest.mark.parametrize("text", ["ہےں۔", "کیا۔"])
def test_contractions(ur_tokenizer, text):
"""Test specific Urdu punctuation character"""
tokens = ur_tokenizer(text)
assert len(tokens) == 2