mirror of https://github.com/explosion/spaCy.git
* Add provisional tests for sentence boundary detection
This commit is contained in:
parent
df749f257d
commit
0fd4a71bed
|
@ -0,0 +1,43 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from spacy.en import English
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def EN():
|
||||||
|
return English()
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_period(EN):
|
||||||
|
string = 'A test sentence.'
|
||||||
|
words = EN(string)
|
||||||
|
assert len(words) == 4
|
||||||
|
assert list(words.sents) == [(0, 4)]
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_no_period(EN):
|
||||||
|
string = 'A test sentence'
|
||||||
|
words = EN(string)
|
||||||
|
assert len(words) == 3
|
||||||
|
assert list(words.sents) == [(0, 3)]
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_exclamation(EN):
|
||||||
|
string = 'A test sentence!'
|
||||||
|
words = EN(string)
|
||||||
|
assert len(words) == 4
|
||||||
|
assert list(words.sents) == [(0, 4)]
|
||||||
|
|
||||||
|
|
||||||
|
def test_single_question(EN):
|
||||||
|
string = 'A test sentence?'
|
||||||
|
words = EN(string, tag=False, parse=False)
|
||||||
|
assert len(words) == 4
|
||||||
|
assert list(words.sents) == [(0, 4)]
|
||||||
|
|
||||||
|
|
||||||
|
def test_(EN):
|
||||||
|
string = 'A test sentence?'
|
||||||
|
words = EN(string, tag=False, parse=False)
|
||||||
|
assert len(words) == 4
|
||||||
|
assert list(words.sents) == [(0, 4)]
|
Loading…
Reference in New Issue