2014-11-12 12:24:32 +00:00
|
|
|
from __future__ import unicode_literals
|
2014-12-23 00:40:32 +00:00
|
|
|
import pytest
|
2014-11-12 12:24:32 +00:00
|
|
|
|
2014-12-23 00:40:32 +00:00
|
|
|
from spacy.en import English
|
2014-11-12 12:24:32 +00:00
|
|
|
|
2014-12-23 00:40:32 +00:00
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def EN():
|
|
|
|
return English()
|
|
|
|
|
|
|
|
|
|
|
|
def test1(EN):
|
2014-11-12 12:24:32 +00:00
|
|
|
words = ['JAPAN', 'GET', 'LUCKY']
|
2014-12-23 00:40:32 +00:00
|
|
|
tokens = EN.tokenizer.tokens_from_list(words)
|
2014-11-12 12:24:32 +00:00
|
|
|
assert len(tokens) == 3
|
2015-01-23 20:22:30 +00:00
|
|
|
assert tokens[0].orth_ == 'JAPAN'
|