2017-01-10 18:24:10 +00:00
|
|
|
# coding: utf-8
|
2016-12-18 12:28:51 +00:00
|
|
|
from __future__ import unicode_literals
|
2016-11-03 23:27:32 +00:00
|
|
|
|
2016-12-18 12:28:51 +00:00
|
|
|
from ...symbols import POS, VERB, VerbForm_inf
|
|
|
|
from ...tokens import Doc
|
|
|
|
from ...vocab import Vocab
|
|
|
|
from ...lemmatizer import Lemmatizer
|
2016-11-03 23:27:32 +00:00
|
|
|
|
2017-01-10 18:24:10 +00:00
|
|
|
import pytest
|
|
|
|
|
2016-11-03 23:27:32 +00:00
|
|
|
|
2016-12-18 12:28:51 +00:00
|
|
|
@pytest.fixture
|
|
|
|
def index():
|
|
|
|
return {'verb': {}}
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def exceptions():
|
|
|
|
return {'verb': {}}
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def rules():
|
|
|
|
return {"verb": [["ed", "e"]]}
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def lemmatizer(index, exceptions, rules):
|
|
|
|
return Lemmatizer(index, exceptions, rules)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def tag_map():
|
|
|
|
return {'VB': {POS: VERB, 'morph': VerbForm_inf}}
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def vocab(lemmatizer, tag_map):
|
|
|
|
return Vocab(lemmatizer=lemmatizer, tag_map=tag_map)
|
|
|
|
|
|
|
|
|
2016-12-18 12:33:40 +00:00
|
|
|
def test_not_lemmatize_base_forms(vocab):
|
2016-12-18 12:28:51 +00:00
|
|
|
doc = Doc(vocab, words=["Do", "n't", "feed", "the", "dog"])
|
2016-11-03 23:27:32 +00:00
|
|
|
feed = doc[2]
|
2017-01-10 18:24:10 +00:00
|
|
|
feed.tag_ = 'VB'
|
|
|
|
assert feed.text == 'feed'
|
|
|
|
assert feed.lemma_ == 'feed'
|