From f57c616830d31e206f7029d91102ee939d57c901 Mon Sep 17 00:00:00 2001 From: ines Date: Sat, 18 Mar 2017 16:04:14 +0100 Subject: [PATCH 1/2] Add regression test for #704 and test new model (resolves #704) (using new English model) --- spacy/tests/regression/test_issue704.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 spacy/tests/regression/test_issue704.py diff --git a/spacy/tests/regression/test_issue704.py b/spacy/tests/regression/test_issue704.py new file mode 100644 index 000000000..2cecf6219 --- /dev/null +++ b/spacy/tests/regression/test_issue704.py @@ -0,0 +1,14 @@ +# coding: utf8 +from __future__ import unicode_literals + +import pytest + + +@pytest.mark.models +def test_issue704(EN): + """Test that sentence boundaries are detected correctly.""" + + text = '“Atticus said to Jem one day, “I’d rather you shot at tin cans in the backyard, but I know you’ll go after birds. Shoot all the blue jays you want, if you can hit ‘em, but remember it’s a sin to kill a mockingbird.”' + doc = EN(text) + sents = [sent for sent in doc.sents] + assert len(sents) == 3 From ad934a9abd973141434a12eb346a339d876b5baf Mon Sep 17 00:00:00 2001 From: ines Date: Sat, 18 Mar 2017 16:12:30 +0100 Subject: [PATCH 2/2] Add regression test for #693 --- spacy/tests/regression/test_issue693.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 spacy/tests/regression/test_issue693.py diff --git a/spacy/tests/regression/test_issue693.py b/spacy/tests/regression/test_issue693.py new file mode 100644 index 000000000..00ba23331 --- /dev/null +++ b/spacy/tests/regression/test_issue693.py @@ -0,0 +1,19 @@ +# coding: utf8 +from __future__ import unicode_literals + +import pytest + + +@pytest.mark.xfail +@pytest.mark.models +def test_issue693(EN): + """Test that doc.noun_chunks parses the complete sentence.""" + + text1 = "the TopTown International Airport Board and the Goodwill Space Exploration Partnership." + text2 = "the Goodwill Space Exploration Partnership and the TopTown International Airport Board." + doc1 = EN(text1) + doc2 = EN(text2) + chunks1 = [chunk for chunk in doc1.noun_chunks] + chunks2 = [chunk for chunk in doc2.noun_chunks] + assert len(chunks1) == 2 + assert len(chunks2) == 2