From 644d6c9e1a763a421d436bb634da361e73c29ca9 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 4 Sep 2017 15:17:44 +0200 Subject: [PATCH] Improve lemmatization tests, re #1296 --- spacy/tests/lang/en/test_lemmatizer.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/spacy/tests/lang/en/test_lemmatizer.py b/spacy/tests/lang/en/test_lemmatizer.py index d02ae1700..00f02ccb4 100644 --- a/spacy/tests/lang/en/test_lemmatizer.py +++ b/spacy/tests/lang/en/test_lemmatizer.py @@ -2,12 +2,18 @@ from __future__ import unicode_literals import pytest +from ....tokens.doc import Doc @pytest.fixture def en_lemmatizer(EN): return EN.Defaults.create_lemmatizer() +@pytest.mark.models('en') +def test_doc_lemmatization(EN): + doc = Doc(EN.vocab, words=['bleed']) + doc[0].tag_ = 'VBP' + assert doc[0].lemma_ == 'bleed' @pytest.mark.models('en') @pytest.mark.parametrize('text,lemmas', [("aardwolves", ["aardwolf"]), @@ -19,6 +25,16 @@ def test_en_lemmatizer_noun_lemmas(en_lemmatizer, text, lemmas): assert en_lemmatizer.noun(text) == set(lemmas) +@pytest.mark.models('en') +@pytest.mark.parametrize('text,lemmas', [("bleed", ["bleed"]), + ("feed", ["feed"]), + ("need", ["need"]), + ("ring", ["ring"]), + ("axes", ["axis", "axe", "ax"])]) +def test_en_lemmatizer_noun_lemmas(en_lemmatizer, text, lemmas): + assert en_lemmatizer.noun(text) == set(lemmas) + + @pytest.mark.xfail @pytest.mark.models('en') def test_en_lemmatizer_base_forms(en_lemmatizer):