diff --git a/spacy/gold.pyx b/spacy/gold.pyx
index 45b95b379..bc34290f4 100644
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@@ -142,9 +142,14 @@ def _min_edit_path(cand_words, gold_words):
 
 
 class GoldCorpus(object):
-    '''An annotated corpus, using the JSON file format. Manages
-    annotations for tagging, dependency parsing, NER.'''
+    """An annotated corpus, using the JSON file format. Manages
+    annotations for tagging, dependency parsing and NER."""
     def __init__(self, train_path, dev_path):
+        """Create a GoldCorpus.
+
+        train_path (unicode or Path): File or directory of training data.
+        dev_path (unicode or Path): File or directory of development data.
+        """
         self.train_path = util.ensure_path(train_path)
         self.dev_path = util.ensure_path(dev_path)
         self.train_locs = self.walk_corpus(self.train_path)
diff --git a/spacy/language.py b/spacy/language.py
index 58cee80ac..37f7ae207 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -236,6 +236,12 @@ class Language(object):
             doc.tensor = None
 
     def preprocess_gold(self, docs_golds):
+        """Can be called before training to pre-process gold data. By default,
+        it handles nonprojectivity and adds missing tags to the tag map.
+
+        docs_golds (iterable): Tuples of `Doc` and `GoldParse` objects.
+        YIELDS (tuple): Tuples of preprocessed `Doc` and `GoldParse` objects.
+        """
         for proc in self.pipeline:
             if hasattr(proc, 'preprocess_gold'):
                 docs_golds = proc.preprocess_gold(docs_golds)
diff --git a/website/docs/api/_data.json b/website/docs/api/_data.json
index 900a42553..443ee9a67 100644
--- a/website/docs/api/_data.json
+++ b/website/docs/api/_data.json
@@ -23,7 +23,8 @@
             "Lexeme": "lexeme",
             "Vocab": "vocab",
             "StringStore": "stringstore",
-            "GoldParse": "goldparse"
+            "GoldParse": "goldparse",
+            "GoldCorpus": "goldcorpus"
         },
         "Other": {
             "Annotation Specs": "annotation",
@@ -135,6 +136,11 @@
         "tag": "class"
     },
 
+    "goldcorpus": {
+        "title": "GoldCorpus",
+        "tag": "class"
+    },
+
     "annotation": {
         "title": "Annotation Specifications"
     },
diff --git a/website/docs/api/goldcorpus.jade b/website/docs/api/goldcorpus.jade
new file mode 100644
index 000000000..bfff92ad5
--- /dev/null
+++ b/website/docs/api/goldcorpus.jade
@@ -0,0 +1,23 @@
+//- 💫 DOCS > API > GOLDCORPUS
+
+include ../../_includes/_mixins
+
+p
+    |  An annotated corpus, using the JSON file format. Manages annotations for
+    |  tagging, dependency parsing and NER.
+
++h(2, "init") GoldCorpus.__init__
+    +tag method
+
+p Create a #[code GoldCorpus].
+
++table(["Name", "Type", "Description"])
+    +row
+        +cell #[code train_path]
+        +cell unicode or #[code Path]
+        +cell File or directory of training data.
+
+    +row
+        +cell #[code dev_path]
+        +cell unicode or #[code Path]
+        +cell File or directory of development data.
diff --git a/website/docs/api/goldparse.jade b/website/docs/api/goldparse.jade
index f39558b35..7818912c3 100644
--- a/website/docs/api/goldparse.jade
+++ b/website/docs/api/goldparse.jade
@@ -7,7 +7,7 @@ p Collection for training annotations.
 +h(2, "init") GoldParse.__init__
     +tag method
 
-p Create a GoldParse.
+p Create a #[code GoldParse].
 
 +table(["Name", "Type", "Description"])
     +row
diff --git a/website/docs/api/language.jade b/website/docs/api/language.jade
index 7f6e0829d..455165bca 100644
--- a/website/docs/api/language.jade
+++ b/website/docs/api/language.jade
@@ -82,6 +82,41 @@ p
         +cell #[code Doc]
         +cell A container for accessing the annotations.
 
++h(2, "pipe") Language.pipe
+    +tag method
+
+p
+    |  Process texts as a stream, and yield #[code Doc] objects in order.
+    |  Supports GIL-free multi-threading.
+
++aside-code("Example").
+    texts = [u'One document.', u'...', u'Lots of documents']
+    for doc in nlp.pipe(texts, batch_size=50, n_threads=4):
+        assert doc.is_parsed
+
++table(["Name", "Type", "Description"])
+    +row
+        +cell #[code texts]
+        +cell -
+        +cell A sequence of unicode objects.
+
+    +row
+        +cell #[code n_threads]
+        +cell int
+        +cell
+            |  The number of worker threads to use. If #[code -1], OpenMP will
+            |  decide how many to use at run time. Default is #[code 2].
+
+    +row
+        +cell #[code batch_size]
+        +cell int
+        +cell The number of texts to buffer.
+
+    +footrow
+        +cell yields
+        +cell #[code Doc]
+        +cell Documents in the order of the original text.
+
 +h(2, "update") Language.update
     +tag method
 
@@ -172,40 +207,23 @@ p
         +cell -
         +cell Config parameters.
 
-+h(2, "pipe") Language.pipe
-    +tag method
++h(2, "preprocess_gold") Language.preprocess_gold
 
 p
-    |  Process texts as a stream, and yield #[code Doc] objects in order.
-    |  Supports GIL-free multi-threading.
+    |  Can be called before training to pre-process gold data. By default, it
+    |  handles nonprojectivity and adds missing tags to the tag map.
 
-+aside-code("Example").
-    texts = [u'One document.', u'...', u'Lots of documents']
-    for doc in nlp.pipe(texts, batch_size=50, n_threads=4):
-        assert doc.is_parsed
 
 +table(["Name", "Type", "Description"])
     +row
-        +cell #[code texts]
-        +cell -
-        +cell A sequence of unicode objects.
-
-    +row
-        +cell #[code n_threads]
-        +cell int
-        +cell
-            |  The number of worker threads to use. If #[code -1], OpenMP will
-            |  decide how many to use at run time. Default is #[code 2].
-
-    +row
-        +cell #[code batch_size]
-        +cell int
-        +cell The number of texts to buffer.
+        +cell #[code docs_golds]
+        +cell iterable
+        +cell Tuples of #[code Doc] and #[code GoldParse] objects.
 
     +footrow
         +cell yields
-        +cell #[code Doc]
-        +cell Documents in the order of the original text.
+        +cell tuple
+        +cell Tuples of #[code Doc] and #[code GoldParse] objects.
 
 +h(2, "to_disk") Language.to_disk
     +tag method