2015-01-31 02:46:11 +00:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
from spacy.en import English
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def EN():
|
|
|
|
return English()
|
|
|
|
|
|
|
|
|
|
|
|
def test_single_period(EN):
|
|
|
|
string = 'A test sentence.'
|
|
|
|
words = EN(string)
|
|
|
|
assert len(words) == 4
|
2015-03-14 15:10:42 +00:00
|
|
|
assert len(list(words.sents)) == 1
|
|
|
|
assert sum(len(sent) for sent in words.sents) == len(words)
|
2015-01-31 02:46:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_single_no_period(EN):
|
|
|
|
string = 'A test sentence'
|
|
|
|
words = EN(string)
|
|
|
|
assert len(words) == 3
|
2015-03-14 15:10:42 +00:00
|
|
|
assert len(list(words.sents)) == 1
|
|
|
|
assert sum(len(sent) for sent in words.sents) == len(words)
|
2015-01-31 02:46:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_single_exclamation(EN):
|
|
|
|
string = 'A test sentence!'
|
|
|
|
words = EN(string)
|
|
|
|
assert len(words) == 4
|
2015-03-14 15:10:42 +00:00
|
|
|
assert len(list(words.sents)) == 1
|
|
|
|
assert sum(len(sent) for sent in words.sents) == len(words)
|
2015-01-31 02:46:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_single_question(EN):
|
|
|
|
string = 'A test sentence?'
|
|
|
|
words = EN(string, tag=False, parse=False)
|
|
|
|
assert len(words) == 4
|
2015-03-14 15:10:42 +00:00
|
|
|
assert len(list(words.sents)) == 1
|
|
|
|
assert sum(len(sent) for sent in words.sents) == len(words)
|