From c085c2d39136aa19e434d5a26d36152e70940562 Mon Sep 17 00:00:00 2001 From: ghoward Date: Tue, 25 Apr 2017 17:44:16 +0200 Subject: [PATCH] Adding some unitests --- spacy/tests/fr/test_lemmatization.py | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 spacy/tests/fr/test_lemmatization.py diff --git a/spacy/tests/fr/test_lemmatization.py b/spacy/tests/fr/test_lemmatization.py new file mode 100644 index 000000000..702ab81e3 --- /dev/null +++ b/spacy/tests/fr/test_lemmatization.py @@ -0,0 +1,36 @@ +# coding: utf-8 + +from __future__ import unicode_literals + +import pytest + +@pytest.mark.xfail +def test_lemmatizer_(fr_tokenizer): + text = "Je suis allé au mois de janv. aux prud’hommes." + tokens = fr_tokenizer(text) + assert len(tokens) == 10 + assert tokens[2].lemma_ == "aller" + +@pytest.mark.xfail +def test_tokenizer_handles_exc_in_text_2(fr_tokenizer): + text = "Je dois manger ce soir" + tokens = fr_tokenizer(text) + assert len(tokens) == 11 + assert tokens[1].lemma_ == "devoir" + + +@pytest.mark.xfail +def test_tokenizer_handles_exc_in_text_2(fr_tokenizer): + text = "Je dois manger ce soir" + tokens = fr_tokenizer(text) + assert len(tokens) == 11 + assert tokens[1].lemma_ == "devoir" + +@pytest.mark.xfail +def test_tokenizer_handles_exc_in_text_2(fr_tokenizer): + # This one is tricky because notes is a NOUN and can be a VERB + text = "Nous validerons vos notes plus tard" + tokens = fr_tokenizer(text) + assert len(tokens) == 11 + assert tokens[1].lemma_ == "valider" + assert tokens[3].lemma_ == "notes" \ No newline at end of file