* Add test for Issue #184: Whitespace at sentence boundary causes sentence boundary error.

This commit is contained in:
Matthew Honnibal 2016-01-18 23:04:38 +01:00
parent bba0a5e078
commit 7893de3203
1 changed files with 15 additions and 0 deletions

View File

@ -1,6 +1,9 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import pytest import pytest
import numpy
from spacy.attrs import HEAD
@pytest.mark.models @pytest.mark.models
def test_space_attachment(EN): def test_space_attachment(EN):
@ -10,3 +13,15 @@ def test_space_attachment(EN):
for sent in doc.sents: for sent in doc.sents:
if len(sent) == 1: if len(sent) == 1:
assert not sent[-1].is_space assert not sent[-1].is_space
@pytest.mark.xfail
def test_sentence_space(EN):
text = ('''I look forward to using Thingamajig. I've been told it will '''
'''make my life easier...''')
doc = EN(text)
doc.from_array([HEAD], numpy.asarray([[1, 0, -1, -2, -1, -1, -5,
4, 3, 2, 1, 0, 2, 1, -3, 1, 1, -3, -7]],
dtype='int32').T)
assert len(list(doc.sents)) == 2