mirror of https://github.com/explosion/spaCy.git
* Add test for Issue #184: Whitespace at sentence boundary causes sentence boundary error.
This commit is contained in:
parent
bba0a5e078
commit
7893de3203
|
@ -1,6 +1,9 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
import numpy
|
||||||
|
from spacy.attrs import HEAD
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.models
|
@pytest.mark.models
|
||||||
def test_space_attachment(EN):
|
def test_space_attachment(EN):
|
||||||
|
@ -10,3 +13,15 @@ def test_space_attachment(EN):
|
||||||
for sent in doc.sents:
|
for sent in doc.sents:
|
||||||
if len(sent) == 1:
|
if len(sent) == 1:
|
||||||
assert not sent[-1].is_space
|
assert not sent[-1].is_space
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail
|
||||||
|
def test_sentence_space(EN):
|
||||||
|
text = ('''I look forward to using Thingamajig. I've been told it will '''
|
||||||
|
'''make my life easier...''')
|
||||||
|
doc = EN(text)
|
||||||
|
doc.from_array([HEAD], numpy.asarray([[1, 0, -1, -2, -1, -1, -5,
|
||||||
|
4, 3, 2, 1, 0, 2, 1, -3, 1, 1, -3, -7]],
|
||||||
|
dtype='int32').T)
|
||||||
|
assert len(list(doc.sents)) == 2
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue