diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 1fbb796ca..1845a5562 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -241,13 +241,22 @@ cdef class Doc: "to install the data") cdef const TokenC* word - labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', 'attr', 'root'] + labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', + 'attr', 'root'] np_deps = [self.vocab.strings[label] for label in labels] + conj = self.vocab.strings['conj'] np_label = self.vocab.strings['NP'] for i in range(self.length): word = &self.c[i] if word.pos == NOUN and word.dep in np_deps: yield Span(self, word.l_edge, i+1, label=np_label) + elif word.pos == NOUN and word.dep == conj: + head = word+word.head + while head.dep == conj and head.head < 0: + head += head.head + # If the head is an NP, and we're coordinated to it, we're an NP + if head.dep in np_deps: + yield Span(self, word.l_edge, i+1, label=np_label) @property def sents(self):