spaCy/spacy/tests/lang/lb/test_exceptions.py

31 lines
892 B
Python
Raw Normal View History

# coding: utf-8
from __future__ import unicode_literals
import pytest
2019-12-21 18:04:17 +00:00
@pytest.mark.parametrize("text", ["z.B.", "Jan."])
def test_lb_tokenizer_handles_abbr(lb_tokenizer, text):
tokens = lb_tokenizer(text)
assert len(tokens) == 1
2019-12-21 18:04:17 +00:00
@pytest.mark.parametrize("text", ["d'Saach", "d'Kanner", "dWelt", "dSuen"])
def test_lb_tokenizer_splits_contractions(lb_tokenizer, text):
tokens = lb_tokenizer(text)
assert len(tokens) == 2
2019-12-21 18:04:17 +00:00
def test_lb_tokenizer_handles_exc_in_text(lb_tokenizer):
text = "Mee 't ass net evident, d'Liewen."
tokens = lb_tokenizer(text)
assert len(tokens) == 9
assert tokens[1].text == "'t"
assert tokens[1].lemma_ == "et"
2019-12-21 18:04:17 +00:00
@pytest.mark.parametrize("text,norm", [("dass", "datt"), ("viläicht", "vläicht")])
def test_lb_norm_exceptions(lb_tokenizer, text, norm):
tokens = lb_tokenizer(text)
assert tokens[0].norm_ == norm