2015-01-05 00:53:30 +00:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2014-12-21 10:02:44 +00:00
|
|
|
from spacy.en.lemmatizer import Lemmatizer, read_index, read_exc
|
2015-01-02 14:59:56 +00:00
|
|
|
from spacy.en import DATA_DIR
|
2014-12-07 14:39:13 +00:00
|
|
|
from os import path
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
|
|
def test_read_index():
|
|
|
|
wn = path.join(DATA_DIR, 'wordnet')
|
|
|
|
index = read_index(path.join(wn, 'index.noun'))
|
|
|
|
assert 'man' in index
|
|
|
|
assert 'plantes' not in index
|
|
|
|
assert 'plant' in index
|
|
|
|
|
|
|
|
|
|
|
|
def test_read_exc():
|
|
|
|
wn = path.join(DATA_DIR, 'wordnet')
|
|
|
|
exc = read_exc(path.join(wn, 'verb.exc'))
|
|
|
|
assert exc['was'] == ('be',)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def lemmatizer():
|
2014-12-23 02:18:59 +00:00
|
|
|
return Lemmatizer(path.join(DATA_DIR, 'wordnet'), 0, 0, 0)
|
2014-12-07 14:39:13 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_noun_lemmas(lemmatizer):
|
|
|
|
do = lemmatizer.noun
|
|
|
|
|
|
|
|
assert do('aardwolves') == set(['aardwolf'])
|
|
|
|
assert do('aardwolf') == set(['aardwolf'])
|
|
|
|
assert do('planets') == set(['planet'])
|
|
|
|
assert do('ring') == set(['ring'])
|
|
|
|
assert do('axes') == set(['axis', 'axe', 'ax'])
|