diff --git a/setup.py b/setup.py
index 176434151..de7d95d22 100644
--- a/setup.py
+++ b/setup.py
@@ -56,6 +56,7 @@ MOD_NAMES = [
     'spacy.tokens.doc',
     'spacy.tokens.span',
     'spacy.tokens.token',
+    'spacy.tokens.npchunks',
     'spacy.serialize.packer',
     'spacy.serialize.huffman',
     'spacy.serialize.bits',
@@ -184,3 +185,4 @@ def setup_package():
 
 if __name__ == '__main__':
     setup_package()
+
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 26088be0c..fa45c8b3e 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -23,6 +23,7 @@ from .token cimport Token
 from ..serialize.bits cimport BitArray
 from ..util import normalize_slice
 
+import npchunks
 
 DEF PADDING = 5
 
@@ -239,24 +240,15 @@ cdef class Doc:
                 "requires data to be installed. If you haven't done so, run: "
                 "\npython -m spacy.en.download all\n"
                 "to install the data")
- 
-        cdef const TokenC* word
-        labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj',
-                  'attr', 'root']
-        np_deps = [self.vocab.strings[label] for label in labels]
-        conj = self.vocab.strings['conj']
-        np_label = self.vocab.strings['NP']
-        for i in range(self.length):
-            word = &self.c[i]
-            if word.pos == NOUN and word.dep in np_deps:
-                yield Span(self, word.l_edge, i+1, label=np_label)
-            elif word.pos == NOUN and word.dep == conj:
-                head = word+word.head
-                while head.dep == conj and head.head < 0:
-                    head += head.head
-                # If the head is an NP, and we're coordinated to it, we're an NP
-                if head.dep in np_deps:
-                    yield Span(self, word.l_edge, i+1, label=np_label)
+
+        chunk_rules = {'en':npchunks.english, 'de':npchunks.german}
+
+        for sent in self.sents:
+            lang = 'en' # todo: make dependent on language of root token
+            for chunk in chunk_rules.get(lang)(sent):
+                yield chunk
+
+        
 
     @property
     def sents(self):
diff --git a/spacy/tokens/npchunks.pxd b/spacy/tokens/npchunks.pxd
new file mode 100644
index 000000000..e69de29bb
diff --git a/spacy/tokens/npchunks.pyx b/spacy/tokens/npchunks.pyx
new file mode 100644
index 000000000..0c5ca32a5
--- /dev/null
+++ b/spacy/tokens/npchunks.pyx
@@ -0,0 +1,54 @@
+
+from ..structs cimport TokenC
+from .doc cimport Doc
+from .span cimport Span
+
+from ..parts_of_speech cimport NOUN, PROPN, PRON
+
+def english(Span sent):
+    cdef const TokenC* word
+    strings = sent.doc.vocab.strings
+    labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', 'attr', 'root']
+    np_deps = [strings[label] for label in labels]
+    conj = strings['conj']
+    np_label = strings['NP']
+    for i in range(sent.start, sent.end):
+        word = &sent.doc.c[i]
+        if word.pos == NOUN and word.dep in np_deps:
+            yield Span(sent.doc, word.l_edge, i+1, label=np_label)
+        elif word.pos == NOUN and word.dep == conj:
+            head = word+word.head
+            while head.dep == conj and head.head < 0:
+                head += head.head
+            # If the head is an NP, and we're coordinated to it, we're an NP
+            if head.dep in np_deps:
+                yield Span(sent.doc, word.l_edge, i+1, label=np_label)
+
+
+def german(Span sent):
+    # this function extracts spans headed by NOUNs starting from the left-most
+    # syntactic dependent until the NOUN itself
+    # for close apposition and measurement construction, the span is sometimes
+    # extended to the right of the NOUN
+    # example: "eine Tasse Tee" (a cup (of) tea) returns "eine Tasse Tee" and not
+    # just "eine Tasse", same for "das Thema Familie"
+    cdef const TokenC* word
+    strings = sent.doc.vocab.strings
+    labels = ['sb', 'oa', 'da', 'nk', 'mo', 'ag', 'root', 'cj', 'pd', 'og', 'app']
+    close_app = strings['nk']
+    np_deps = [strings[label] for label in labels]
+    np_label = strings['NP']
+    for i in range(sent.start, sent.end):
+        word = &sent.doc.c[i]
+        if word.pos == NOUN and word.dep in np_deps:
+            rbracket = i+1
+            # try to extend the span to the right
+            # to capture close apposition/measurement constructions
+            for rdep in sent.doc[i].rights:
+                if rdep.pos == NOUN and rdep.dep == close_app:
+                    rbracket = rdep.i+1
+            yield Span(sent.doc, word.l_edge, rbracket, label=np_label)
+
+
+
+