From 3e3bda142d28d2b7b983869f839a770b8d48877c Mon Sep 17 00:00:00 2001
From: Pokey Rule <pokey.rule@gmail.com>
Date: Thu, 24 Nov 2016 10:47:20 +0000
Subject: [PATCH 1/2] Add noun_chunks to Span

---
 spacy/syntax/iterators.pyx |  5 +++--
 spacy/tokens/doc.pyx       |  4 ++++
 spacy/tokens/span.pyx      | 25 +++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/spacy/syntax/iterators.pyx b/spacy/syntax/iterators.pyx
index aeb4e635c..f8951d039 100644
--- a/spacy/syntax/iterators.pyx
+++ b/spacy/syntax/iterators.pyx
@@ -1,13 +1,14 @@
 from spacy.parts_of_speech cimport NOUN, PROPN, PRON
 
 
-def english_noun_chunks(doc):
+def english_noun_chunks(obj):
     labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj',
               'attr', 'ROOT', 'root']
+    doc = obj.doc
     np_deps = [doc.vocab.strings[label] for label in labels]
     conj = doc.vocab.strings['conj']
     np_label = doc.vocab.strings['NP']
-    for i, word in enumerate(doc):
+    for i, word in enumerate(obj):
         if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps:
             yield word.left_edge.i, word.i+1, np_label
         elif word.pos == NOUN and word.dep == conj:
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 3d09b7ad0..8ce2c7fe4 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -223,6 +223,10 @@ cdef class Doc:
     def __repr__(self):
         return self.__str__()
 
+    @property
+    def doc(self):
+        return self
+
     def similarity(self, other):
         '''Make a semantic similarity estimate. The default estimate is cosine
         similarity using an average of word vectors.
diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx
index e645c1a6f..a4f49555a 100644
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@@ -190,6 +190,31 @@ cdef class Span:
         def __get__(self):
             return u''.join([t.text_with_ws for t in self])
 
+    property noun_chunks:
+        '''
+        Yields base noun-phrase #[code Span] objects, if the document
+        has been syntactically parsed. A base noun phrase, or 
+        'NP chunk', is a noun phrase that does not permit other NPs to 
+        be nested within it – so no NP-level coordination, no prepositional 
+        phrases, and no relative clauses. For example:
+        '''
+        def __get__(self):
+            if not self.doc.is_parsed:
+                raise ValueError(
+                    "noun_chunks requires the dependency parse, which "
+                    "requires data to be installed. If you haven't done so, run: "
+                    "\npython -m spacy.%s.download all\n"
+                    "to install the data" % self.vocab.lang)
+            # Accumulate the result before beginning to iterate over it. This prevents
+            # the tokenisation from being changed out from under us during the iteration.
+            # The tricky thing here is that Span accepts its tokenisation changing,
+            # so it's okay once we have the Span objects. See Issue #375
+            spans = []
+            for start, end, label in self.doc.noun_chunks_iterator(self):
+                spans.append(Span(self, start, end, label=label))
+            for span in spans:
+                yield span
+
     property root:
         """The token within the span that's highest in the parse tree. If there's a tie, the earlist is prefered.
 

From b8c4f5ea768126e46b138cc8d3e0c930fb6a5aba Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Thu, 24 Nov 2016 23:30:15 +1100
Subject: [PATCH 2/2] Allow German noun chunks to work on Span

Update the German noun chunks iterator, so that it also works on Span objects.
---
 spacy/syntax/iterators.pyx | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/spacy/syntax/iterators.pyx b/spacy/syntax/iterators.pyx
index f8951d039..ee5e818c1 100644
--- a/spacy/syntax/iterators.pyx
+++ b/spacy/syntax/iterators.pyx
@@ -2,9 +2,11 @@ from spacy.parts_of_speech cimport NOUN, PROPN, PRON
 
 
 def english_noun_chunks(obj):
+    '''Detect base noun phrases from a dependency parse.
+    Works on both Doc and Span.'''
     labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj',
               'attr', 'ROOT', 'root']
-    doc = obj.doc
+    doc = obj.doc # Ensure works on both Doc and Span.
     np_deps = [doc.vocab.strings[label] for label in labels]
     conj = doc.vocab.strings['conj']
     np_label = doc.vocab.strings['NP']
@@ -26,14 +28,15 @@ def english_noun_chunks(obj):
 # extended to the right of the NOUN
 # example: "eine Tasse Tee" (a cup (of) tea) returns "eine Tasse Tee" and not
 # just "eine Tasse", same for "das Thema Familie"
-def german_noun_chunks(doc):
+def german_noun_chunks(obj):
     labels = ['sb', 'oa', 'da', 'nk', 'mo', 'ag', 'ROOT', 'root', 'cj', 'pd', 'og', 'app']
+    doc = obj.doc # Ensure works on both Doc and Span.
     np_label = doc.vocab.strings['NP']
     np_deps = set(doc.vocab.strings[label] for label in labels)
     close_app = doc.vocab.strings['nk']
 
     rbracket = 0
-    for i, word in enumerate(doc):
+    for i, word in enumerate(obj):
         if i < rbracket:
             continue
         if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps: