spaCy/spacy/tests/doc/test_creation.py

'''Test Doc sets up tokens correctly.'''
from __future__ import unicode_literals
import pytest

from ...vocab import Vocab
from ...tokens.doc import Doc
from ...lemmatizer import Lemmatizer


@pytest.fixture
def lemmatizer():
    return Lemmatizer(lookup={'dogs': 'dog', 'boxen': 'box', 'mice': 'mouse'})


@pytest.fixture
def vocab(lemmatizer):
    return Vocab(lemmatizer=lemmatizer)


def test_empty_doc(vocab):
    doc = Doc(vocab)
    assert len(doc) == 0


def test_single_word(vocab):
    doc = Doc(vocab, words=['a'])
    assert doc.text == 'a '
    doc = Doc(vocab, words=['a'], spaces=[False])
    assert doc.text == 'a'


def test_lookup_lemmatization(vocab):
    doc = Doc(vocab, words=['dogs', 'dogses'])
    assert doc[0].text == 'dogs'
    assert doc[0].lemma_ == 'dog'
    assert doc[1].text == 'dogses'
    assert doc[1].lemma_ == 'dogses'
Add tests for Doc creation 2017-10-11 01:21:23 +00:00			`'''Test Doc sets up tokens correctly.'''`
			`from __future__ import unicode_literals`
			`import pytest`

			`from ...vocab import Vocab`
			`from ...tokens.doc import Doc`
Fix tests 2017-10-11 11:27:18 +00:00			`from ...lemmatizer import Lemmatizer`
Add tests for Doc creation 2017-10-11 01:21:23 +00:00

			`@pytest.fixture`
			`def lemmatizer():`
Fix tests 2017-10-11 11:27:18 +00:00			`return Lemmatizer(lookup={'dogs': 'dog', 'boxen': 'box', 'mice': 'mouse'})`
Add tests for Doc creation 2017-10-11 01:21:23 +00:00

			`@pytest.fixture`
			`def vocab(lemmatizer):`
			`return Vocab(lemmatizer=lemmatizer)`


			`def test_empty_doc(vocab):`
			`doc = Doc(vocab)`
			`assert len(doc) == 0`


			`def test_single_word(vocab):`
			`doc = Doc(vocab, words=['a'])`
			`assert doc.text == 'a '`
			`doc = Doc(vocab, words=['a'], spaces=[False])`
			`assert doc.text == 'a'`


			`def test_lookup_lemmatization(vocab):`
			`doc = Doc(vocab, words=['dogs', 'dogses'])`
			`assert doc[0].text == 'dogs'`
			`assert doc[0].lemma_ == 'dog'`
			`assert doc[1].text == 'dogses'`
			`assert doc[1].lemma_ == 'dogses'`