spaCy/tests/test_add_lemmas.py

35 lines
679 B
Python

from __future__ import unicode_literals
from spacy.en import English
import pytest
@pytest.fixture
def EN():
return English()
@pytest.fixture
def tagged(EN):
string = u'Bananas in pyjamas are geese.'
tokens = EN(string, tag=True, parse=False)
return tokens
@pytest.fixture
def lemmas(tagged):
return [t.lemma_ for t in tagged]
def test_lemmas(lemmas, tagged):
assert lemmas[0] == 'banana'
assert lemmas[1] == 'in'
assert lemmas[2] == 'pyjama'
assert lemmas[3] == 'be'
if tagged[2].tag == tagged[4].tag:
assert lemmas[4] == 'goose'
def test_didnt(EN):
tokens = EN(u"I didn't do it")
assert tokens[1].lemma_ != u""