This commit is contained in:
Matthew Honnibal 2017-03-18 16:17:28 +01:00
commit de0e6385b4
2 changed files with 33 additions and 0 deletions

View File

@ -0,0 +1,19 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
@pytest.mark.xfail
@pytest.mark.models
def test_issue693(EN):
"""Test that doc.noun_chunks parses the complete sentence."""
text1 = "the TopTown International Airport Board and the Goodwill Space Exploration Partnership."
text2 = "the Goodwill Space Exploration Partnership and the TopTown International Airport Board."
doc1 = EN(text1)
doc2 = EN(text2)
chunks1 = [chunk for chunk in doc1.noun_chunks]
chunks2 = [chunk for chunk in doc2.noun_chunks]
assert len(chunks1) == 2
assert len(chunks2) == 2

View File

@ -0,0 +1,14 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
@pytest.mark.models
def test_issue704(EN):
"""Test that sentence boundaries are detected correctly."""
text = '“Atticus said to Jem one day, “Id rather you shot at tin cans in the backyard, but I know youll go after birds. Shoot all the blue jays you want, if you can hit em, but remember its a sin to kill a mockingbird.”'
doc = EN(text)
sents = [sent for sent in doc.sents]
assert len(sents) == 3