Fix noun_chunk rules around coordination

Closes #693.
This commit is contained in:
Matthew Honnibal 2017-04-07 17:06:40 +02:00
parent 2dc0d28cc4
commit cc36c308f4
2 changed files with 6 additions and 3 deletions

View File

@ -11,9 +11,11 @@ def english_noun_chunks(obj):
conj = doc.vocab.strings['conj']
np_label = doc.vocab.strings['NP']
for i, word in enumerate(obj):
if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps:
if word.pos not in (NOUN, PROPN, PRON):
continue
if word.dep in np_deps:
yield word.left_edge.i, word.i+1, np_label
elif word.pos == NOUN and word.dep == conj:
elif word.dep == conj:
head = word.head
while head.dep == conj and head.head.i < head.i:
head = head.head

View File

@ -4,7 +4,6 @@ from __future__ import unicode_literals
import pytest
@pytest.mark.xfail
@pytest.mark.models
def test_issue693(EN):
"""Test that doc.noun_chunks parses the complete sentence."""
@ -15,5 +14,7 @@ def test_issue693(EN):
doc2 = EN(text2)
chunks1 = [chunk for chunk in doc1.noun_chunks]
chunks2 = [chunk for chunk in doc2.noun_chunks]
for word in doc1:
print(word.text, word.dep_, word.head.text)
assert len(chunks1) == 2
assert len(chunks2) == 2