diff --git a/spacy/pipeline/dep_parser.pyx b/spacy/pipeline/dep_parser.pyx
index f85b5626a..072c334e2 100644
--- a/spacy/pipeline/dep_parser.pyx
+++ b/spacy/pipeline/dep_parser.pyx
@@ -37,7 +37,6 @@ DEFAULT_PARSER_MODEL = Config().from_str(default_model_config)["model"]
     default_config={
         "moves": None,
         "update_with_oracle_cut_size": 100,
-        "multitasks": [],
         "learn_tokens": False,
         "min_action_freq": 30,
         "model": DEFAULT_PARSER_MODEL,
@@ -51,17 +50,47 @@ def make_parser(
     model: Model,
     moves: Optional[list],
     update_with_oracle_cut_size: int,
-    multitasks: Iterable,
     learn_tokens: bool,
     min_action_freq: int
 ):
+    """Create a transition-based DependencyParser component. The dependency parser
+    jointly learns sentence segmentation and labelled dependency parsing, and can
+    optionally learn to merge tokens that had been over-segmented by the tokenizer.
+
+    The parser uses a variant of the non-monotonic arc-eager transition-system
+    described by Honnibal and Johnson (2014), with the addition of a "break"
+    transition to perform the sentence segmentation. Nivre's pseudo-projective
+    dependency transformation is used to allow the parser to predict
+    non-projective parses.
+
+    The parser is trained using an imitation learning objective. The parser follows
+    the actions predicted by the current weights, and at each state, determines
+    which actions are compatible with the optimal parse that could be reached
+    from the current state. The weights such that the scores assigned to the
+    set of optimal actions is increased, while scores assigned to other
+    actions are decreased. Note that more than one action may be optimal for
+    a given state.
+
+    update_with_oracle_cut_size (int):
+        During training, cut long sequences into shorter segments by creating
+        intermediate states based on the gold-standard history. The model is
+        not very sensitive to this parameter, so you usually won't need to change
+        it. 100 is a good default.
+    learn_tokens (bool): Whether to learn to merge subtokens that are split
+        relative to the gold standard. Experimental.
+    min_action_freq (int): The minimum frequency of labelled actions to retain.
+        Rarer labelled actions have their label backed-off to "dep". While this
+        primarily affects the label accuracy, it can also affect the attachment
+        structure, as the labels are used to represent the pseudo-projectivity
+        transformation.
+    """
     return DependencyParser(
         nlp.vocab,
         model,
         name,
         moves=moves,
         update_with_oracle_cut_size=update_with_oracle_cut_size,
-        multitasks=multitasks,
+        multitasks=[],
         learn_tokens=learn_tokens,
         min_action_freq=min_action_freq
     )
diff --git a/spacy/pipeline/ner.pyx b/spacy/pipeline/ner.pyx
index d13152a4f..a3bc3d920 100644
--- a/spacy/pipeline/ner.pyx
+++ b/spacy/pipeline/ner.pyx
@@ -35,9 +35,6 @@ DEFAULT_NER_MODEL = Config().from_str(default_model_config)["model"]
     default_config={
         "moves": None,
         "update_with_oracle_cut_size": 100,
-        "multitasks": [],
-        "learn_tokens": False,
-        "min_action_freq": 30,
         "model": DEFAULT_NER_MODEL,
     },
     scores=["ents_p", "ents_r", "ents_f", "ents_per_type"],
@@ -50,19 +47,40 @@ def make_ner(
     model: Model,
     moves: Optional[list],
     update_with_oracle_cut_size: int,
-    multitasks: Iterable,
-    learn_tokens: bool,
-    min_action_freq: int
 ):
+    """Create a transition-based EntityRecognizer component. The entity recognizer
+    identifies non-overlapping labelled spans of tokens.
+    
+    The transition-based algorithm used encodes certain assumptions that are
+    effective for "traditional" named entity recognition tasks, but may not be
+    a good fit for every span identification problem. Specifically, the loss
+    function optimizes for whole entity accuracy, so if your inter-annotator
+    agreement on boundary tokens is low, the component will likely perform poorly
+    on your problem. The transition-based algorithm also assumes that the most
+    decisive information about your entities will be close to their initial tokens.
+    If your entities are long and characterised by tokens in their middle, the
+    component will likely do poorly on your task.
+
+    model (Model): The model for the transition-based parser. The model needs
+        to have a specific substructure of named components --- see the
+        spacy.ml.tb_framework.TransitionModel for details.
+    moves (list[str]): A list of transition names. Inferred from the data if not
+        provided.
+    update_with_oracle_cut_size (int):
+        During training, cut long sequences into shorter segments by creating
+        intermediate states based on the gold-standard history. The model is
+        not very sensitive to this parameter, so you usually won't need to change
+        it. 100 is a good default.
+    """
     return EntityRecognizer(
         nlp.vocab,
         model,
         name,
         moves=moves,
         update_with_oracle_cut_size=update_with_oracle_cut_size,
-        multitasks=multitasks,
-        learn_tokens=learn_tokens,
-        min_action_freq=min_action_freq
+        multitasks=[],
+        min_action_freq=1,
+        learn_tokens=False,
     )
 
 
@@ -74,9 +92,11 @@ cdef class EntityRecognizer(Parser):
     TransitionSystem = BiluoPushDown
 
     def add_multitask_objective(self, mt_component):
+        """Register another component as a multi-task objective. Experimental."""
         self._multitasks.append(mt_component)
 
     def init_multitask_objectives(self, get_examples, pipeline, sgd=None, **cfg):
+        """Setup multi-task objective components. Experimental and internal."""
         # TODO: transfer self.model.get_ref("tok2vec") to the multitask's model ?
         for labeller in self._multitasks:
             labeller.model.set_dim("nO", len(self.labels))