Fix noun_chunk rules around coordination

Closes #693.
2017-04-07 17:06:40 +02:00 · 2017-04-07 17:06:40 +02:00 · cc36c308f4
parent 2dc0d28cc4
commit cc36c308f4
2 changed files with 6 additions and 3 deletions
--- a/spacy/syntax/iterators.pyx
+++ b/spacy/syntax/iterators.pyx
@ -11,9 +11,11 @@ def english_noun_chunks(obj):
    conj = doc.vocab.strings['conj']
    np_label = doc.vocab.strings['NP']
    for i, word in enumerate(obj):
-        if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps:
+        if word.pos not in (NOUN, PROPN, PRON):
+            continue
+        if word.dep in np_deps:
            yield word.left_edge.i, word.i+1, np_label
-        elif word.pos == NOUN and word.dep == conj:
+        elif word.dep == conj:
            head = word.head
            while head.dep == conj and head.head.i < head.i:
                head = head.head
--- a/spacy/tests/regression/test_issue693.py
+++ b/spacy/tests/regression/test_issue693.py
@ -4,7 +4,6 @@ from __future__ import unicode_literals
 import pytest


-@pytest.mark.xfail
@pytest.mark.models
 def test_issue693(EN):
    """Test that doc.noun_chunks parses the complete sentence."""
@ -15,5 +14,7 @@ def test_issue693(EN):
    doc2 = EN(text2)
    chunks1 = [chunk for chunk in doc1.noun_chunks]
    chunks2 = [chunk for chunk in doc2.noun_chunks]
+    for word in doc1:
+        print(word.text, word.dep_, word.head.text)
    assert len(chunks1) == 2
    assert len(chunks2) == 2