mirror of https://github.com/explosion/spaCy.git
29 lines
621 B
Python
29 lines
621 B
Python
import pytest
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"text,lemma",
|
|
[("aprox.", "aproximadament"), ("pàg.", "pàgina"), ("p.ex.", "per exemple")],
|
|
)
|
|
def test_ca_tokenizer_handles_abbr(ca_tokenizer, text, lemma):
|
|
tokens = ca_tokenizer(text)
|
|
assert len(tokens) == 1
|
|
|
|
|
|
def test_ca_tokenizer_handles_exc_in_text(ca_tokenizer):
|
|
text = "La Dra. Puig viu a la pl. dels Til·lers."
|
|
doc = ca_tokenizer(text)
|
|
assert [t.text for t in doc] == [
|
|
"La",
|
|
"Dra.",
|
|
"Puig",
|
|
"viu",
|
|
"a",
|
|
"la",
|
|
"pl.",
|
|
"d",
|
|
"els",
|
|
"Til·lers",
|
|
".",
|
|
]
|