diff --git a/tests/parser/test_base_nps.py b/tests/parser/test_base_nps.py new file mode 100644 index 000000000..f37c80f07 --- /dev/null +++ b/tests/parser/test_base_nps.py @@ -0,0 +1,40 @@ +import pytest + + +@pytest.mark.models +def test_nsubj(EN): + sent = EN(u'A base phrase should be recognized.') + base_nps = list(sent.noun_chunks) + assert len(base_nps) == 1 + assert base_nps[0].string == 'A base phrase ' + + +@pytest.mark.models +def test_coord(EN): + sent = EN(u'A base phrase and a good phrase are often the same.') + base_nps = list(sent.noun_chunks) + assert len(base_nps) == 2 + assert base_nps[0].string == 'A base phrase ' + assert base_nps[1].string == 'a good phrase ' + + +@pytest.mark.models +def test_pp(EN): + sent = EN(u'A phrase with another phrase occurs') + base_nps = list(sent.noun_chunks) + assert len(base_nps) == 2 + assert base_nps[0].string == 'A phrase ' + assert base_nps[1].string == 'another phrase ' + + +@pytest.mark.models +def test_merge_pp(EN): + sent = EN(u'A phrase with another phrase occurs') + nps = [(np[0].idx, np[-1].idx + len(np[-1]), np[0].ent_type_) for np in sent.noun_chunks] + + for start, end, ent_type in nps: + sent.merge(start, end, u'NP', np.lemma_, ent_type) + assert sent[0].string == 'A phrase ' + assert sent[1].string == 'with ' + assert sent[2].string == 'another phrase ' + assert sent[3].string == 'occurs'