spaCy/spacy/tests/regression/test_issue595.py

25 lines
660 B
Python
Raw Normal View History

2017-01-10 18:24:10 +00:00
# coding: utf-8
from __future__ import unicode_literals
from ...symbols import POS, VERB, VerbForm_inf
from ...vocab import Vocab
from ...lemmatizer import Lemmatizer
from ..util import get_doc
2017-01-10 18:24:10 +00:00
import pytest
def test_issue595():
"""Test lemmatization of base forms"""
words = ["Do", "n't", "feed", "the", "dog"]
2017-03-25 21:35:07 +00:00
tag_map = {'VB': {POS: VERB, VerbForm_inf: True}}
rules = {"verb": [["ed", "e"]]}
lemmatizer = Lemmatizer({'verb': {}}, {'verb': {}}, rules)
vocab = Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
doc = get_doc(vocab, words)
doc[2].tag_ = 'VB'
assert doc[2].text == 'feed'
assert doc[2].lemma_ == 'feed'