mirror of https://github.com/explosion/spaCy.git
41 lines
856 B
Python
41 lines
856 B
Python
|
from __future__ import unicode_literals
|
||
|
import pytest
|
||
|
|
||
|
|
||
|
@pytest.fixture(scope="session")
|
||
|
def nlp():
|
||
|
from spacy.en import English
|
||
|
return English()
|
||
|
|
||
|
|
||
|
@pytest.fixture()
|
||
|
def doc(nlp):
|
||
|
return nlp('Hello, world. Here are two sentences.')
|
||
|
|
||
|
|
||
|
@pytest.fixture()
|
||
|
def token(doc):
|
||
|
return doc[0]
|
||
|
|
||
|
|
||
|
def test_load_resources_and_process_text():
|
||
|
from spacy.en import English
|
||
|
nlp = English()
|
||
|
doc = nlp('Hello, world. Here are two sentences.')
|
||
|
|
||
|
|
||
|
def test_get_tokens_and_sentences(doc):
|
||
|
token = doc[0]
|
||
|
sentence = doc.sents.next()
|
||
|
|
||
|
assert token is sentence[0]
|
||
|
assert sentence.text == 'Hello, world.'
|
||
|
|
||
|
|
||
|
def test_use_integer_ids_for_any_strings(nlp, token):
|
||
|
hello_id = nlp.vocab.strings['Hello']
|
||
|
hello_str = nlp.vocab.strings[hello_id]
|
||
|
|
||
|
assert token.orth == hello_id == 3404
|
||
|
assert token.orth_ == hello_str == 'Hello'
|