Update docs and consistency

2020-07-29 15:14:07 +02:00 · 2020-07-29 15:14:07 +02:00 · b0f57a0cac
parent 62266fb828
commit b0f57a0cac
25 changed files with 646 additions and 298 deletions
--- a/spacy/language.py
+++ b/spacy/language.py
@ -49,6 +49,7 @@ class BaseDefaults:
    overwritten by language subclasses by defining their own subclasses of
    Language.Defaults.
    """
+
    config: Config = Config()
    tokenizer_exceptions: Dict[str, List[dict]] = BASE_EXCEPTIONS
    prefixes: Optional[List[Union[str, Pattern]]] = TOKENIZER_PREFIXES
@ -67,6 +68,7 @@ def create_tokenizer() -> Callable[["Language"], Tokenizer]:
    """Registered function to create a tokenizer. Returns a factory that takes
    the nlp object and returns a Tokenizer instance using the language detaults.
    """
+
    def tokenizer_factory(nlp: "Language") -> Tokenizer:
        prefixes = nlp.Defaults.prefixes
        suffixes = nlp.Defaults.suffixes
@ -1432,7 +1434,9 @@ class Language:
        nlp.resolved = resolved
        return nlp

-    def to_disk(self, path: Union[str, Path], exclude: Iterable[str] = tuple()) -> None:
+    def to_disk(
+        self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
+    ) -> None:
        """Save the current state to a directory.  If a model is loaded, this
        will include the model.

@ -1461,7 +1465,7 @@ class Language:
        util.to_disk(path, serializers, exclude)

    def from_disk(
-        self, path: Union[str, Path], exclude: Iterable[str] = tuple()
+        self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
    ) -> "Language":
        """Loads state from a directory. Modifies the object in place and
        returns it. If the saved `Language` object contains a model, the
@ -1512,7 +1516,7 @@ class Language:
        self._link_components()
        return self

-    def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes:
+    def to_bytes(self, *, exclude: Iterable[str] = tuple()) -> bytes:
        """Serialize the current state to a binary string.

        exclude (list): Names of components or serialization fields to exclude.
@ -1534,7 +1538,7 @@ class Language:
        return util.to_bytes(serializers, exclude)

    def from_bytes(
-        self, bytes_data: bytes, exclude: Iterable[str] = tuple()
+        self, bytes_data: bytes, *, exclude: Iterable[str] = tuple()
    ) -> "Language":
        """Load state from a binary string.

@ -1583,6 +1587,7 @@ class FactoryMeta:
    created whenever a component is defined and stored on the Language class for
    each component instance and factory instance.
    """
+
    factory: str
    default_config: Optional[Dict[str, Any]] = None  # noqa: E704
    assigns: Iterable[str] = tuple()
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@ -400,7 +400,9 @@ class EntityLinker(Pipe):
                for token in ent:
                    token.ent_kb_id_ = kb_id

-    def to_disk(self, path: Union[str, Path], exclude: Iterable[str] = tuple()) -> None:
+    def to_disk(
+        self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
+    ) -> None:
        """Serialize the pipe to disk.

        path (str / Path): Path to a directory.
@ -417,7 +419,7 @@ class EntityLinker(Pipe):
        util.to_disk(path, serialize, exclude)

    def from_disk(
-        self, path: Union[str, Path], exclude: Iterable[str] = tuple()
+        self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
    ) -> "EntityLinker":
        """Load the pipe from disk. Modifies the object in place and returns it.

--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@ -315,7 +315,7 @@ class EntityRuler:
        return Scorer.score_spans(examples, "ents", **kwargs)

    def from_bytes(
-        self, patterns_bytes: bytes, exclude: Iterable[str] = tuple()
+        self, patterns_bytes: bytes, *, exclude: Iterable[str] = tuple()
    ) -> "EntityRuler":
        """Load the entity ruler from a bytestring.

@ -339,7 +339,7 @@ class EntityRuler:
            self.add_patterns(cfg)
        return self

-    def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes:
+    def to_bytes(self, *, exclude: Iterable[str] = tuple()) -> bytes:
        """Serialize the entity ruler patterns to a bytestring.

        RETURNS (bytes): The serialized patterns.
@ -355,7 +355,7 @@ class EntityRuler:
        return srsly.msgpack_dumps(serial)

    def from_disk(
-        self, path: Union[str, Path], exclude: Iterable[str] = tuple()
+        self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
    ) -> "EntityRuler":
        """Load the entity ruler from a file. Expects a file containing
        newline-delimited JSON (JSONL) with one entry per line.
@ -391,7 +391,9 @@ class EntityRuler:
            from_disk(path, deserializers_patterns, {})
        return self

-    def to_disk(self, path: Union[str, Path], exclude: Iterable[str] = tuple()) -> None:
+    def to_disk(
+        self, path: Union[str, Path], *, exclude: Iterable[str] = tuple()
+    ) -> None:
        """Save the entity ruler patterns to a directory. The patterns will be
        saved as newline-delimited JSON (JSONL).

--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -230,7 +230,7 @@ class Morphologizer(Tagger):
            "morph", **kwargs))
        return results

-    def to_bytes(self, exclude=tuple()):
+    def to_bytes(self, *, exclude=tuple()):
        """Serialize the pipe to a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
@ -244,7 +244,7 @@ class Morphologizer(Tagger):
        serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
        return util.to_bytes(serialize, exclude)

-    def from_bytes(self, bytes_data, exclude=tuple()):
+    def from_bytes(self, bytes_data, *, exclude=tuple()):
        """Load the pipe from a bytestring.

        bytes_data (bytes): The serialized pipe.
@ -267,7 +267,7 @@ class Morphologizer(Tagger):
        util.from_bytes(bytes_data, deserialize, exclude)
        return self

-    def to_disk(self, path, exclude=tuple()):
+    def to_disk(self, path, *, exclude=tuple()):
        """Serialize the pipe to disk.

        path (str / Path): Path to a directory.
@ -282,7 +282,7 @@ class Morphologizer(Tagger):
        }
        util.to_disk(path, serialize, exclude)

-    def from_disk(self, path, exclude=tuple()):
+    def from_disk(self, path, *, exclude=tuple()):
        """Load the pipe from disk. Modifies the object in place and returns it.

        path (str / Path): Path to a directory.
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@ -180,7 +180,7 @@ class Pipe:
        """
        return {}

-    def to_bytes(self, exclude=tuple()):
+    def to_bytes(self, *, exclude=tuple()):
        """Serialize the pipe to a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
@ -195,7 +195,7 @@ class Pipe:
            serialize["vocab"] = self.vocab.to_bytes
        return util.to_bytes(serialize, exclude)

-    def from_bytes(self, bytes_data, exclude=tuple()):
+    def from_bytes(self, bytes_data, *, exclude=tuple()):
        """Load the pipe from a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
@ -218,7 +218,7 @@ class Pipe:
        util.from_bytes(bytes_data, deserialize, exclude)
        return self

-    def to_disk(self, path, exclude=tuple()):
+    def to_disk(self, path, *, exclude=tuple()):
        """Serialize the pipe to disk.

        path (str / Path): Path to a directory.
@ -232,7 +232,7 @@ class Pipe:
        serialize["model"] = lambda p: self.model.to_disk(p)
        util.to_disk(path, serialize, exclude)

-    def from_disk(self, path, exclude=tuple()):
+    def from_disk(self, path, *, exclude=tuple()):
        """Load the pipe from disk.

        path (str / Path): Path to a directory.
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@ -162,7 +162,7 @@ class Sentencizer(Pipe):
        del results["sents_per_type"]
        return results

-    def to_bytes(self, exclude=tuple()):
+    def to_bytes(self, *, exclude=tuple()):
        """Serialize the sentencizer to a bytestring.

        RETURNS (bytes): The serialized object.
@ -171,7 +171,7 @@ class Sentencizer(Pipe):
        """
        return srsly.msgpack_dumps({"punct_chars": list(self.punct_chars)})

-    def from_bytes(self, bytes_data, exclude=tuple()):
+    def from_bytes(self, bytes_data, *, exclude=tuple()):
        """Load the sentencizer from a bytestring.

        bytes_data (bytes): The data to load.
@ -183,7 +183,7 @@ class Sentencizer(Pipe):
        self.punct_chars = set(cfg.get("punct_chars", self.default_punct_chars))
        return self

-    def to_disk(self, path, exclude=tuple()):
+    def to_disk(self, path, *, exclude=tuple()):
        """Serialize the sentencizer to disk.

        DOCS: https://spacy.io/api/sentencizer#to_disk
@ -193,7 +193,7 @@ class Sentencizer(Pipe):
        srsly.write_json(path, {"punct_chars": list(self.punct_chars)})


-    def from_disk(self, path, exclude=tuple()):
+    def from_disk(self, path, *, exclude=tuple()):
        """Load the sentencizer from disk.

        DOCS: https://spacy.io/api/sentencizer#from_disk
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@ -157,7 +157,7 @@ class SentenceRecognizer(Tagger):
        del results["sents_per_type"]
        return results

-    def to_bytes(self, exclude=tuple()):
+    def to_bytes(self, *, exclude=tuple()):
        """Serialize the pipe to a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
@ -171,7 +171,7 @@ class SentenceRecognizer(Tagger):
        serialize["cfg"] = lambda: srsly.json_dumps(self.cfg)
        return util.to_bytes(serialize, exclude)

-    def from_bytes(self, bytes_data, exclude=tuple()):
+    def from_bytes(self, bytes_data, *, exclude=tuple()):
        """Load the pipe from a bytestring.

        bytes_data (bytes): The serialized pipe.
@ -194,7 +194,7 @@ class SentenceRecognizer(Tagger):
        util.from_bytes(bytes_data, deserialize, exclude)
        return self

-    def to_disk(self, path, exclude=tuple()):
+    def to_disk(self, path, *, exclude=tuple()):
        """Serialize the pipe to disk.

        path (str / Path): Path to a directory.
@ -209,7 +209,7 @@ class SentenceRecognizer(Tagger):
        }
        util.to_disk(path, serialize, exclude)

-    def from_disk(self, path, exclude=tuple()):
+    def from_disk(self, path, *, exclude=tuple()):
        """Load the pipe from disk. Modifies the object in place and returns it.

        path (str / Path): Path to a directory.
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -370,7 +370,7 @@ class Tagger(Pipe):
        scores.update(Scorer.score_token_attr(examples, "lemma", **kwargs))
        return scores

-    def to_bytes(self, exclude=tuple()):
+    def to_bytes(self, *, exclude=tuple()):
        """Serialize the pipe to a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
@ -388,7 +388,7 @@ class Tagger(Pipe):
        serialize["morph_rules"] = lambda: srsly.msgpack_dumps(morph_rules)
        return util.to_bytes(serialize, exclude)

-    def from_bytes(self, bytes_data, exclude=tuple()):
+    def from_bytes(self, bytes_data, *, exclude=tuple()):
        """Load the pipe from a bytestring.

        bytes_data (bytes): The serialized pipe.
@ -424,7 +424,7 @@ class Tagger(Pipe):
        util.from_bytes(bytes_data, deserialize, exclude)
        return self

-    def to_disk(self, path, exclude=tuple()):
+    def to_disk(self, path, *, exclude=tuple()):
        """Serialize the pipe to disk.

        path (str / Path): Path to a directory.
@ -443,7 +443,7 @@ class Tagger(Pipe):
        }
        util.to_disk(path, serialize, exclude)

-    def from_disk(self, path, exclude=tuple()):
+    def from_disk(self, path, *, exclude=tuple()):
        """Load the pipe from disk. Modifies the object in place and returns it.

        path (str / Path): Path to a directory.
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -728,7 +728,7 @@ cdef class Tokenizer:
        with path.open("wb") as file_:
            file_.write(self.to_bytes(**kwargs))

-    def from_disk(self, path, **kwargs):
+    def from_disk(self, path, *, exclude=tuple()):
        """Loads state from a directory. Modifies the object in place and
        returns it.

@ -741,10 +741,10 @@ cdef class Tokenizer:
        path = util.ensure_path(path)
        with path.open("rb") as file_:
            bytes_data = file_.read()
-        self.from_bytes(bytes_data, **kwargs)
+        self.from_bytes(bytes_data, exclude=exclude)
        return self

-    def to_bytes(self, exclude=tuple()):
+    def to_bytes(self, *, exclude=tuple()):
        """Serialize the current state to a binary string.

        exclude (list): String names of serialization fields to exclude.
@ -763,7 +763,7 @@ cdef class Tokenizer:
        }
        return util.to_bytes(serializers, exclude)

-    def from_bytes(self, bytes_data, exclude=tuple()):
+    def from_bytes(self, bytes_data, *, exclude=tuple()):
        """Load state from a binary string.

        bytes_data (bytes): The data to load from.
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -987,20 +987,20 @@ cdef class Doc:
        other.c = &tokens[PADDING]
        return other

-    def to_disk(self, path, **kwargs):
+    def to_disk(self, path, *, exclude=tuple()):
        """Save the current state to a directory.

        path (str / Path): A path to a directory, which will be created if
            it doesn't exist. Paths may be either strings or Path-like objects.
-        exclude (list): String names of serialization fields to exclude.
+        exclude (Iterable[str]): String names of serialization fields to exclude.

        DOCS: https://spacy.io/api/doc#to_disk
        """
        path = util.ensure_path(path)
        with path.open("wb") as file_:
-            file_.write(self.to_bytes(**kwargs))
+            file_.write(self.to_bytes(exclude=exclude))

-    def from_disk(self, path, **kwargs):
+    def from_disk(self, path, *, exclude=tuple()):
        """Loads state from a directory. Modifies the object in place and
        returns it.

@ -1014,9 +1014,9 @@ cdef class Doc:
        path = util.ensure_path(path)
        with path.open("rb") as file_:
            bytes_data = file_.read()
-        return self.from_bytes(bytes_data, **kwargs)
+        return self.from_bytes(bytes_data, exclude=exclude)

-    def to_bytes(self, exclude=tuple(), **kwargs):
+    def to_bytes(self, *, exclude=tuple()):
        """Serialize, i.e. export the document contents to a binary string.

        exclude (list): String names of serialization fields to exclude.
@ -1025,9 +1025,9 @@ cdef class Doc:

        DOCS: https://spacy.io/api/doc#to_bytes
        """
-        return srsly.msgpack_dumps(self.to_dict(exclude=exclude, **kwargs))
+        return srsly.msgpack_dumps(self.to_dict(exclude=exclude))

-    def from_bytes(self, bytes_data, exclude=tuple(), **kwargs):
+    def from_bytes(self, bytes_data, *, exclude=tuple()):
        """Deserialize, i.e. import the document contents from a binary string.

        data (bytes): The string to load from.
@ -1036,13 +1036,9 @@ cdef class Doc:

        DOCS: https://spacy.io/api/doc#from_bytes
        """
-        return self.from_dict(
-            srsly.msgpack_loads(bytes_data),
-            exclude=exclude,
-            **kwargs
-        )
+        return self.from_dict(srsly.msgpack_loads(bytes_data), exclude=exclude)

-    def to_dict(self, exclude=tuple(), **kwargs):
+    def to_dict(self, *, exclude=tuple()):
        """Export the document contents to a dictionary for serialization.

        exclude (list): String names of serialization fields to exclude.
@ -1090,14 +1086,14 @@ cdef class Doc:
                serializers["user_data_values"] = lambda: srsly.msgpack_dumps(user_data_values)
        return util.to_dict(serializers, exclude)

-    def from_dict(self, msg, exclude=tuple(), **kwargs):
+    def from_dict(self, msg, *, exclude=tuple()):
        """Deserialize, i.e. import the document contents from a binary string.

        data (bytes): The string to load from.
        exclude (list): String names of serialization fields to exclude.
        RETURNS (Doc): Itself.

-        DOCS: https://spacy.io/api/doc#from_bytes
+        DOCS: https://spacy.io/api/doc#from_dict
        """
        if self.length != 0:
            raise ValueError(Errors.E033.format(length=self.length))
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -439,7 +439,7 @@ cdef class Vocab:
            orth = self.strings.add(orth)
        return orth in self.vectors

-    def to_disk(self, path, exclude=tuple()):
+    def to_disk(self, path, *, exclude=tuple()):
        """Save the current state to a directory.

        path (unicode or Path): A path to a directory, which will be created if
@ -459,7 +459,7 @@ cdef class Vocab:
        if "lookups" not in "exclude" and self.lookups is not None:
            self.lookups.to_disk(path)

-    def from_disk(self, path, exclude=tuple()):
+    def from_disk(self, path, *, exclude=tuple()):
        """Loads state from a directory. Modifies the object in place and
        returns it.

@ -488,7 +488,7 @@ cdef class Vocab:
        self._by_orth = PreshMap()
        return self

-    def to_bytes(self, exclude=tuple()):
+    def to_bytes(self, *, exclude=tuple()):
        """Serialize the current state to a binary string.

        exclude (list): String names of serialization fields to exclude.
@ -509,7 +509,7 @@ cdef class Vocab:
        }
        return util.to_bytes(getters, exclude)

-    def from_bytes(self, bytes_data, exclude=tuple()):
+    def from_bytes(self, bytes_data, *, exclude=tuple()):
        """Load state from a binary string.

        bytes_data (bytes): The data to load from.
--- a/website/docs/api/dependencyparser.md
+++ b/website/docs/api/dependencyparser.md
@ -290,10 +290,11 @@ Serialize the pipe to disk.
 > parser.to_disk("/path/to/parser")
 > ```

-| Name      | Type            | Description                                                                                                           |
-| --------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## DependencyParser.from_disk {#from_disk tag="method"}

@ -306,11 +307,12 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > parser.from_disk("/path/to/parser")
 > ```

-| Name        | Type               | Description                                                                |
-| ----------- | ------------------ | -------------------------------------------------------------------------- |
-| `path`      | str / `Path`       | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | `Iterable[str]`    | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `DependencyParser` | The modified `DependencyParser` object.                                    |
+| Name           | Type               | Description                                                                |
+| -------------- | ------------------ | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`       | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                    |                                                                            |
+| `exclude`      | `Iterable[str]`    | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `DependencyParser` | The modified `DependencyParser` object.                                    |

 ## DependencyParser.to_bytes {#to_bytes tag="method"}

@ -323,10 +325,11 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type            | Description                                                               |
-| ----------- | --------------- | ------------------------------------------------------------------------- |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes           | The serialized form of the `DependencyParser` object.                     |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the `DependencyParser` object.                     |

 ## DependencyParser.from_bytes {#from_bytes tag="method"}

@ -340,11 +343,12 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > parser.from_bytes(parser_bytes)
 > ```

-| Name         | Type               | Description                                                               |
-| ------------ | ------------------ | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes              | The data to load from.                                                    |
-| `exclude`    | `Iterable[str]`    | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `DependencyParser` | The `DependencyParser` object.                                            |
+| Name           | Type               | Description                                                               |
+| -------------- | ------------------ | ------------------------------------------------------------------------- |
+| `bytes_data`   | bytes              | The data to load from.                                                    |
+| _keyword-only_ |                    |                                                                           |
+| `exclude`      | `Iterable[str]`    | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `DependencyParser` | The `DependencyParser` object.                                            |

 ## DependencyParser.labels {#labels tag="property"}

--- a/website/docs/api/doc.md
+++ b/website/docs/api/doc.md
@ -385,10 +385,11 @@ Save the current state to a directory.
 > doc.to_disk("/path/to/doc")
 > ```

-| Name      | Type         | Description                                                                                                           |
-| --------- | ------------ | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | list         | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## Doc.from_disk {#from_disk tag="method" new="2"}

@ -402,11 +403,12 @@ Loads state from a directory. Modifies the object in place and returns it.
 > doc = Doc(Vocab()).from_disk("/path/to/doc")
 > ```

-| Name        | Type         | Description                                                                |
-| ----------- | ------------ | -------------------------------------------------------------------------- |
-| `path`      | str / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | list         | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `Doc`        | The modified `Doc` object.                                                 |
+| Name           | Type            | Description                                                                |
+| -------------- | --------------- | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                            |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `Doc`           | The modified `Doc` object.                                                 |

 ## Doc.to_bytes {#to_bytes tag="method"}

@ -419,10 +421,11 @@ Serialize, i.e. export the document contents to a binary string.
 > doc_bytes = doc.to_bytes()
 > ```

-| Name        | Type  | Description                                                               |
-| ----------- | ----- | ------------------------------------------------------------------------- |
-| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes | A losslessly serialized copy of the `Doc`, including all annotations.     |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | A losslessly serialized copy of the `Doc`, including all annotations.     |

 ## Doc.from_bytes {#from_bytes tag="method"}

@ -438,11 +441,12 @@ Deserialize, i.e. import the document contents from a binary string.
 > assert doc.text == doc2.text
 > ```

-| Name        | Type  | Description                                                               |
-| ----------- | ----- | ------------------------------------------------------------------------- |
-| `data`      | bytes | The string to load from.                                                  |
-| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | `Doc` | The `Doc` object.                                                         |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| `data`         | bytes           | The string to load from.                                                  |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `Doc`           | The `Doc` object.                                                         |

 ## Doc.retokenize {#retokenize tag="contextmanager" new="2.1"}

--- a/website/docs/api/entitylinker.md
+++ b/website/docs/api/entitylinker.md
@ -265,10 +265,11 @@ Serialize the pipe to disk.
 > entity_linker.to_disk("/path/to/entity_linker")
 > ```

-| Name      | Type            | Description                                                                                                           |
-| --------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## EntityLinker.from_disk {#from_disk tag="method"}

@ -281,11 +282,12 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > entity_linker.from_disk("/path/to/entity_linker")
 > ```

-| Name        | Type            | Description                                                                |
-| ----------- | --------------- | -------------------------------------------------------------------------- |
-| `path`      | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `EntityLinker`  | The modified `EntityLinker` object.                                        |
+| Name           | Type            | Description                                                                |
+| -------------- | --------------- | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                            |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `EntityLinker`  | The modified `EntityLinker` object.                                        |

 ## Serialization fields {#serialization-fields}

--- a/website/docs/api/entityrecognizer.md
+++ b/website/docs/api/entityrecognizer.md
@ -289,10 +289,11 @@ Serialize the pipe to disk.
 > ner.to_disk("/path/to/ner")
 > ```

-| Name      | Type            | Description                                                                                                           |
-| --------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## EntityRecognizer.from_disk {#from_disk tag="method"}

@ -305,11 +306,12 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > ner.from_disk("/path/to/ner")
 > ```

-| Name        | Type               | Description                                                                |
-| ----------- | ------------------ | -------------------------------------------------------------------------- |
-| `path`      | str / `Path`       | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | `Iterable[str]`    | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `EntityRecognizer` | The modified `EntityRecognizer` object.                                    |
+| Name           | Type               | Description                                                                |
+| -------------- | ------------------ | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`       | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                    |                                                                            |
+| `exclude`      | `Iterable[str]`    | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `EntityRecognizer` | The modified `EntityRecognizer` object.                                    |

 ## EntityRecognizer.to_bytes {#to_bytes tag="method"}

@ -322,10 +324,11 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type            | Description                                                               |
-| ----------- | --------------- | ------------------------------------------------------------------------- |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes           | The serialized form of the `EntityRecognizer` object.                     |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the `EntityRecognizer` object.                     |

 ## EntityRecognizer.from_bytes {#from_bytes tag="method"}

@ -339,11 +342,12 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > ner.from_bytes(ner_bytes)
 > ```

-| Name         | Type               | Description                                                               |
-| ------------ | ------------------ | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes              | The data to load from.                                                    |
-| `exclude`    | `Iterable[str]`    | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `EntityRecognizer` | The `EntityRecognizer` object.                                            |
+| Name           | Type               | Description                                                               |
+| -------------- | ------------------ | ------------------------------------------------------------------------- |
+| `bytes_data`   | bytes              | The data to load from.                                                    |
+| _keyword-only_ |                    |                                                                           |
+| `exclude`      | `Iterable[str]`    | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `EntityRecognizer` | The `EntityRecognizer` object.                                            |

 ## EntityRecognizer.labels {#labels tag="property"}

--- a/website/docs/api/language.md
+++ b/website/docs/api/language.md
@ -645,10 +645,11 @@ the model**.
 > nlp.to_disk("/path/to/models")
 > ```

-| Name      | Type         | Description                                                                                                           |
-| --------- | ------------ | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | list         | Names of pipeline components or [serialization fields](#serialization-fields) to exclude.                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | Names of pipeline components or [serialization fields](#serialization-fields) to exclude.                             |

 ## Language.from_disk {#from_disk tag="method" new="2"}

@ -670,11 +671,12 @@ loaded object.
 > nlp = English().from_disk("/path/to/en_model")
 > ```

-| Name        | Type         | Description                                                                               |
-| ----------- | ------------ | ----------------------------------------------------------------------------------------- |
-| `path`      | str / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects.                |
-| `exclude`   | list         | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | `Language`   | The modified `Language` object.                                                           |
+| Name           | Type            | Description                                                                               |
+| -------------- | --------------- | ----------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects.                |
+| _keyword-only_ |                 |                                                                                           |
+| `exclude`      | `Iterable[str]` | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `Language`      | The modified `Language` object.                                                           |

 ## Language.to_bytes {#to_bytes tag="method"}

@ -686,10 +688,11 @@ Serialize the current state to a binary string.
 > nlp_bytes = nlp.to_bytes()
 > ```

-| Name        | Type  | Description                                                                               |
-| ----------- | ----- | ----------------------------------------------------------------------------------------- |
-| `exclude`   | list  | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes | The serialized form of the `Language` object.                                             |
+| Name           | Type            | Description                                                                               |
+| -------------- | --------------- | ----------------------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                                           |
+| `exclude`      | `Iterable[str]` | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the `Language` object.                                             |

 ## Language.from_bytes {#from_bytes tag="method"}

@ -707,11 +710,12 @@ available to the loaded object.
 > nlp2.from_bytes(nlp_bytes)
 > ```

-| Name         | Type       | Description                                                                               |
-| ------------ | ---------- | ----------------------------------------------------------------------------------------- |
-| `bytes_data` | bytes      | The data to load from.                                                                    |
-| `exclude`    | list       | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `Language` | The `Language` object.                                                                    |
+| Name           | Type            | Description                                                                               |
+| -------------- | --------------- | ----------------------------------------------------------------------------------------- |
+| `bytes_data`   | bytes           | The data to load from.                                                                    |
+| _keyword-only_ |                 |                                                                                           |
+| `exclude`      | `Iterable[str]` | Names of pipeline components or [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `Language`      | The `Language` object.                                                                    |

 ## Attributes {#attributes}

--- a/website/docs/api/morphologizer.md
+++ b/website/docs/api/morphologizer.md
@ -276,10 +276,11 @@ Serialize the pipe to disk.
 > morphologizer.to_disk("/path/to/morphologizer")
 > ```

-| Name      | Type            | Description                                                                                                           |
-| --------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## Morphologizer.from_disk {#from_disk tag="method"}

@ -292,11 +293,12 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > morphologizer.from_disk("/path/to/morphologizer")
 > ```

-| Name        | Type            | Description                                                                |
-| ----------- | --------------- | -------------------------------------------------------------------------- |
-| `path`      | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `Morphologizer` | The modified `Morphologizer` object.                                       |
+| Name           | Type            | Description                                                                |
+| -------------- | --------------- | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                            |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `Morphologizer` | The modified `Morphologizer` object.                                       |

 ## Morphologizer.to_bytes {#to_bytes tag="method"}

@ -309,10 +311,11 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type            | Description                                                               |
-| ----------- | --------------- | ------------------------------------------------------------------------- |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes           | The serialized form of the `Morphologizer` object.                        |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the `Morphologizer` object.                        |

 ## Morphologizer.from_bytes {#from_bytes tag="method"}

@ -326,11 +329,12 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > morphologizer.from_bytes(morphologizer_bytes)
 > ```

-| Name         | Type            | Description                                                               |
-| ------------ | --------------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes           | The data to load from.                                                    |
-| `exclude`    | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `Morphologizer` | The `Morphologizer` object.                                               |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| `bytes_data`   | bytes           | The data to load from.                                                    |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `Morphologizer` | The `Morphologizer` object.                                               |

 ## Morphologizer.labels {#labels tag="property"}

--- a/website/docs/api/pipe.md
+++ b/website/docs/api/pipe.md
@ -306,10 +306,11 @@ Serialize the pipe to disk.
 > pipe.to_disk("/path/to/pipe")
 > ```

-| Name      | Type            | Description                                                                                                           |
-| --------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## Pipe.from_disk {#from_disk tag="method"}

@ -322,11 +323,12 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > pipe.from_disk("/path/to/pipe")
 > ```

-| Name        | Type            | Description                                                                |
-| ----------- | --------------- | -------------------------------------------------------------------------- |
-| `path`      | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `Pipe`          | The modified pipe.                                                         |
+| Name           | Type            | Description                                                                |
+| -------------- | --------------- | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                            |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `Pipe`          | The modified pipe.                                                         |

 ## Pipe.to_bytes {#to_bytes tag="method"}

@ -339,10 +341,11 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type            | Description                                                               |
-| ----------- | --------------- | ------------------------------------------------------------------------- |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes           | The serialized form of the pipe.                                          |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the pipe.                                          |

 ## Pipe.from_bytes {#from_bytes tag="method"}

@ -356,11 +359,12 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > pipe.from_bytes(pipe_bytes)
 > ```

-| Name         | Type            | Description                                                               |
-| ------------ | --------------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes           | The data to load from.                                                    |
-| `exclude`    | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `Pipe`          | The pipe.                                                                 |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| `bytes_data`   | bytes           | The data to load from.                                                    |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `Pipe`          | The pipe.                                                                 |

 ## Serialization fields {#serialization-fields}

--- a/website/docs/api/sentencerecognizer.md
+++ b/website/docs/api/sentencerecognizer.md
@ -291,10 +291,11 @@ Serialize the pipe to disk.
 > senter.to_disk("/path/to/senter")
 > ```

-| Name      | Type            | Description                                                                                                           |
-| --------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## SentenceRecognizer.from_disk {#from_disk tag="method"}

@ -307,11 +308,12 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > senter.from_disk("/path/to/senter")
 > ```

-| Name        | Type                 | Description                                                                |
-| ----------- | -------------------- | -------------------------------------------------------------------------- |
-| `path`      | str / `Path`         | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | `Iterable[str]`      | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `SentenceRecognizer` | The modified `SentenceRecognizer` object.                                  |
+| Name           | Type                 | Description                                                                |
+| -------------- | -------------------- | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`         | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                      |                                                                            |
+| `exclude`      | `Iterable[str]`      | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `SentenceRecognizer` | The modified `SentenceRecognizer` object.                                  |

 ## SentenceRecognizer.to_bytes {#to_bytes tag="method"}

@ -324,10 +326,11 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type            | Description                                                               |
-| ----------- | --------------- | ------------------------------------------------------------------------- |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes           | The serialized form of the `SentenceRecognizer` object.                   |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the `SentenceRecognizer` object.                   |

 ## SentenceRecognizer.from_bytes {#from_bytes tag="method"}

@ -341,11 +344,12 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > senter.from_bytes(senter_bytes)
 > ```

-| Name         | Type                 | Description                                                               |
-| ------------ | -------------------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes                | The data to load from.                                                    |
-| `exclude`    | `Iterable[str]`      | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `SentenceRecognizer` | The `SentenceRecognizer` object.                                          |
+| Name           | Type                 | Description                                                               |
+| -------------- | -------------------- | ------------------------------------------------------------------------- |
+| `bytes_data`   | bytes                | The data to load from.                                                    |
+| _keyword-only_ |                      |                                                                           |
+| `exclude`      | `Iterable[str]`      | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `SentenceRecognizer` | The `SentenceRecognizer` object.                                          |

 ## Serialization fields {#serialization-fields}

--- a/website/docs/api/tagger.md
+++ b/website/docs/api/tagger.md
@ -307,10 +307,11 @@ Serialize the pipe to disk.
 > tagger.to_disk("/path/to/tagger")
 > ```

-| Name      | Type            | Description                                                                                                           |
-| --------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## Tagger.from_disk {#from_disk tag="method"}

@ -323,11 +324,12 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > tagger.from_disk("/path/to/tagger")
 > ```

-| Name        | Type            | Description                                                                |
-| ----------- | --------------- | -------------------------------------------------------------------------- |
-| `path`      | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `Tagger`        | The modified `Tagger` object.                                              |
+| Name           | Type            | Description                                                                |
+| -------------- | --------------- | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                            |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `Tagger`        | The modified `Tagger` object.                                              |

 ## Tagger.to_bytes {#to_bytes tag="method"}

@ -340,10 +342,11 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type            | Description                                                               |
-| ----------- | --------------- | ------------------------------------------------------------------------- |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes           | The serialized form of the `Tagger` object.                               |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the `Tagger` object.                               |

 ## Tagger.from_bytes {#from_bytes tag="method"}

@ -357,11 +360,12 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > tagger.from_bytes(tagger_bytes)
 > ```

-| Name         | Type            | Description                                                               |
-| ------------ | --------------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes           | The data to load from.                                                    |
-| `exclude`    | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `Tagger`        | The `Tagger` object.                                                      |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| `bytes_data`   | bytes           | The data to load from.                                                    |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `Tagger`        | The `Tagger` object.                                                      |

 ## Tagger.labels {#labels tag="property"}

--- a/website/docs/api/textcategorizer.md
+++ b/website/docs/api/textcategorizer.md
@ -325,10 +325,11 @@ Serialize the pipe to disk.
 > textcat.to_disk("/path/to/textcat")
 > ```

-| Name      | Type            | Description                                                                                                           |
-| --------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## TextCategorizer.from_disk {#from_disk tag="method"}

@ -341,11 +342,12 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > textcat.from_disk("/path/to/textcat")
 > ```

-| Name        | Type              | Description                                                                |
-| ----------- | ----------------- | -------------------------------------------------------------------------- |
-| `path`      | str / `Path`      | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | `Iterable[str]`   | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `TextCategorizer` | The modified `TextCategorizer` object.                                     |
+| Name           | Type              | Description                                                                |
+| -------------- | ----------------- | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`      | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                   |                                                                            |
+| `exclude`      | `Iterable[str]`   | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `TextCategorizer` | The modified `TextCategorizer` object.                                     |

 ## TextCategorizer.to_bytes {#to_bytes tag="method"}

@ -358,10 +360,11 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type            | Description                                                               |
-| ----------- | --------------- | ------------------------------------------------------------------------- |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes           | The serialized form of the `TextCategorizer` object.                      |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the `TextCategorizer` object.                      |

 ## TextCategorizer.from_bytes {#from_bytes tag="method"}

@ -375,11 +378,12 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > textcat.from_bytes(textcat_bytes)
 > ```

-| Name         | Type              | Description                                                               |
-| ------------ | ----------------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes             | The data to load from.                                                    |
-| `exclude`    | `Iterable[str]`   | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `TextCategorizer` | The `TextCategorizer` object.                                             |
+| Name           | Type              | Description                                                               |
+| -------------- | ----------------- | ------------------------------------------------------------------------- |
+| `bytes_data`   | bytes             | The data to load from.                                                    |
+| _keyword-only_ |                   |                                                                           |
+| `exclude`      | `Iterable[str]`   | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `TextCategorizer` | The `TextCategorizer` object.                                             |

 ## TextCategorizer.labels {#labels tag="property"}

--- a/website/docs/api/tok2vec.md
+++ b/website/docs/api/tok2vec.md
@ -227,10 +227,11 @@ Serialize the pipe to disk.
 > tok2vec.to_disk("/path/to/tok2vec")
 > ```

-| Name      | Type            | Description                                                                                                           |
-| --------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## Tok2Vec.from_disk {#from_disk tag="method"}

@ -243,11 +244,12 @@ Load the pipe from disk. Modifies the object in place and returns it.
 > tok2vec.from_disk("/path/to/tok2vec")
 > ```

-| Name        | Type            | Description                                                                |
-| ----------- | --------------- | -------------------------------------------------------------------------- |
-| `path`      | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `Tok2Vec`       | The modified `Tok2Vec` object.                                             |
+| Name           | Type            | Description                                                                |
+| -------------- | --------------- | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                            |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `Tok2Vec`       | The modified `Tok2Vec` object.                                             |

 ## Tok2Vec.to_bytes {#to_bytes tag="method"}

@ -260,10 +262,11 @@ Load the pipe from disk. Modifies the object in place and returns it.

 Serialize the pipe to a bytestring.

-| Name        | Type            | Description                                                               |
-| ----------- | --------------- | ------------------------------------------------------------------------- |
-| `exclude`   | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes           | The serialized form of the `Tok2Vec` object.                              |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the `Tok2Vec` object.                              |

 ## Tok2Vec.from_bytes {#from_bytes tag="method"}

@ -277,11 +280,12 @@ Load the pipe from a bytestring. Modifies the object in place and returns it.
 > tok2vec.from_bytes(tok2vec_bytes)
 > ```

-| Name         | Type            | Description                                                               |
-| ------------ | --------------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes           | The data to load from.                                                    |
-| `exclude`    | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `Tok2Vec`       | The `Tok2Vec` object.                                                     |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| `bytes_data`   | bytes           | The data to load from.                                                    |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `Tok2Vec`       | The `Tok2Vec` object.                                                     |

 ## Serialization fields {#serialization-fields}

--- a/website/docs/api/tokenizer.md
+++ b/website/docs/api/tokenizer.md
@ -158,10 +158,11 @@ Serialize the tokenizer to disk.
 > tokenizer.to_disk("/path/to/tokenizer")
 > ```

-| Name      | Type         | Description                                                                                                           |
-| --------- | ------------ | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | list         | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## Tokenizer.from_disk {#from_disk tag="method"}

@ -174,11 +175,12 @@ Load the tokenizer from disk. Modifies the object in place and returns it.
 > tokenizer.from_disk("/path/to/tokenizer")
 > ```

-| Name        | Type         | Description                                                                |
-| ----------- | ------------ | -------------------------------------------------------------------------- |
-| `path`      | str / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | list         | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `Tokenizer`  | The modified `Tokenizer` object.                                           |
+| Name           | Type            | Description                                                                |
+| -------------- | --------------- | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                            |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `Tokenizer`     | The modified `Tokenizer` object.                                           |

 ## Tokenizer.to_bytes {#to_bytes tag="method"}

@ -191,10 +193,11 @@ Load the tokenizer from disk. Modifies the object in place and returns it.

 Serialize the tokenizer to a bytestring.

-| Name        | Type  | Description                                                               |
-| ----------- | ----- | ------------------------------------------------------------------------- |
-| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes | The serialized form of the `Tokenizer` object.                            |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the `Tokenizer` object.                            |

 ## Tokenizer.from_bytes {#from_bytes tag="method"}

@ -209,11 +212,12 @@ it.
 > tokenizer.from_bytes(tokenizer_bytes)
 > ```

-| Name         | Type        | Description                                                               |
-| ------------ | ----------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes       | The data to load from.                                                    |
-| `exclude`    | list        | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `Tokenizer` | The `Tokenizer` object.                                                   |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| `bytes_data`   | bytes           | The data to load from.                                                    |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `Tokenizer`     | The `Tokenizer` object.                                                   |

 ## Attributes {#attributes}

--- a/website/docs/api/transformer.md
+++ b/website/docs/api/transformer.md
@ -51,11 +51,11 @@ architectures and their arguments and hyperparameters.
 > nlp.add_pipe("transformer", config=DEFAULT_CONFIG)
 > ```

-| Setting             | Type                                       | Description                     | Default                                                             |
-| ------------------- | ------------------------------------------ | ------------------------------- | ------------------------------------------------------------------- |
-| `max_batch_items`   | int                                        | Maximum size of a padded batch. | `4096`                                                              |
-| `annotation_setter` | Callable                                   | <!-- TODO: -->                  | [`null_annotation_setter`](/api/transformer#null_annotation_setter) |
-| `model`             | [`Model`](https://thinc.ai/docs/api-model) | The model to use.               | [TransformerModel](/api/architectures#TransformerModel)             |
+| Setting             | Type                                       | Description                                                                                                                                         | Default                                                 |
+| ------------------- | ------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------- |
+| `max_batch_items`   | int                                        | Maximum size of a padded batch.                                                                                                                     | `4096`                                                  |
+| `annotation_setter` | Callable                                   | Function that takes a batch of `Doc` objects and a [`FullTransformerBatch`](#fulltransformerbatch) and can set additional annotations on the `Doc`. | `null_annotation_setter`                                |
+| `model`             | [`Model`](https://thinc.ai/docs/api-model) | The model to use.                                                                                                                                   | [TransformerModel](/api/architectures#TransformerModel) |

 ```python
 https://github.com/explosion/spacy-transformers/blob/master/spacy_transformers/pipeline_component.py
@ -69,8 +69,14 @@ https://github.com/explosion/spacy-transformers/blob/master/spacy_transformers/p
 > # Construction via add_pipe with default model
 > trf = nlp.add_pipe("transformer")
 >
-> # Construction via add_pipe with custom model
-> config = {"model": {"@architectures": "my_transformer"}}
+> # Construction via add_pipe with custom config
+> config = {
+>     "model": {
+>         "@architectures": "spacy-transformers.TransformerModel.v1",
+>         "name": "bert-base-uncased",
+>         "tokenizer_config": {"use_fast": True}
+>     }
+> }
 > trf = nlp.add_pipe("transformer", config=config)
 >
 > # Construction from class
@ -82,26 +88,313 @@ Create a new pipeline instance. In your application, you would normally use a
 shortcut for this and instantiate the component using its string name and
 [`nlp.add_pipe`](/api/language#create_pipe).

-| Name                | Type                                       | Description                                                                                 |
-| ------------------- | ------------------------------------------ | ------------------------------------------------------------------------------------------- |
-| `vocab`             | `Vocab`                                    | The shared vocabulary.                                                                      |
-| `model`             | [`Model`](https://thinc.ai/docs/api-model) | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component.       |
-| `annotation_setter` | `Callable`                                 | <!-- TODO: -->                                                                              |
-| _keyword-only_      |                                            |                                                                                             |
-| `name`              | str                                        | String name of the component instance. Used to add entries to the `losses` during training. |
-| `max_batch_items`   | int                                        | Maximum size of a padded batch. Defaults to `128*32`.                                       |
+| Name                | Type                                       | Description                                                                                                                                                                                                                             |
+| ------------------- | ------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `vocab`             | `Vocab`                                    | The shared vocabulary.                                                                                                                                                                                                                  |
+| `model`             | [`Model`](https://thinc.ai/docs/api-model) | The Thinc [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component.                                                                                                                                                   |
+| `annotation_setter` | `Callable`                                 | Function that takes a batch of `Doc` objects and a [`FullTransformerBatch`](/api/transformer#fulltransformerbatch) and can set additional annotations on the `Doc`. Defaults to `null_annotation_setter`, a function that does nothing. |
+| _keyword-only_      |                                            |                                                                                                                                                                                                                                         |
+| `name`              | str                                        | String name of the component instance. Used to add entries to the `losses` during training.                                                                                                                                             |
+| `max_batch_items`   | int                                        | Maximum size of a padded batch. Defaults to `128*32`.                                                                                                                                                                                   |

-<!-- TODO: document rest -->
+## Transformer.\_\_call\_\_ {#call tag="method"}
+
+Apply the pipe to one document. The document is modified in place, and returned.
+This usually happens under the hood when the `nlp` object is called on a text
+and all pipeline components are applied to the `Doc` in order. Both
+[`__call__`](/api/transformer#call) and [`pipe`](/api/transformer#pipe) delegate
+to the [`predict`](/api/transformer#predict) and
+[`set_annotations`](/api/transformer#set_annotations) methods.
+
+> #### Example
+>
+> ```python
+> doc = nlp("This is a sentence.")
+> trf = nlp.add_pipe("transformer")
+> # This usually happens under the hood
+> processed = transformer(doc)
+> ```
+
+| Name        | Type  | Description              |
+| ----------- | ----- | ------------------------ |
+| `doc`       | `Doc` | The document to process. |
+| **RETURNS** | `Doc` | The processed document.  |
+
+## Transformer.pipe {#pipe tag="method"}
+
+Apply the pipe to a stream of documents. This usually happens under the hood
+when the `nlp` object is called on a text and all pipeline components are
+applied to the `Doc` in order. Both [`__call__`](/api/transformer#call) and
+[`pipe`](/api/transformer#pipe) delegate to the
+[`predict`](/api/transformer#predict) and
+[`set_annotations`](/api/transformer#set_annotations) methods.
+
+> #### Example
+>
+> ```python
+> trf = nlp.add_pipe("transformer")
+> for doc in trf.pipe(docs, batch_size=50):
+>     pass
+> ```
+
+| Name           | Type            | Description                                           |
+| -------------- | --------------- | ----------------------------------------------------- |
+| `stream`       | `Iterable[Doc]` | A stream of documents.                                |
+| _keyword-only_ |                 |                                                       |
+| `batch_size`   | int             | The number of documents to buffer. Defaults to `128`. |
+| **YIELDS**     | `Doc`           | The processed documents in order.                     |
+
+## Transformer.begin_training {#begin_training tag="method"}
+
+Initialize the pipe for training, using data examples if available. Returns an
+[`Optimizer`](https://thinc.ai/docs/api-optimizers) object.
+
+> #### Example
+>
+> ```python
+> trf = nlp.add_pipe("transformer")
+> optimizer = trf.begin_training(pipeline=nlp.pipeline)
+> ```
+
+| Name           | Type                                                | Description                                                                                                    |
+| -------------- | --------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- |
+| `get_examples` | `Callable[[], Iterable[Example]]`                   | Optional function that returns gold-standard annotations in the form of [`Example`](/api/example) objects.     |
+| _keyword-only_ |                                                     |                                                                                                                |
+| `pipeline`     | `List[Tuple[str, Callable]]`                        | Optional list of pipeline components that this component is part of.                                           |
+| `sgd`          | [`Optimizer`](https://thinc.ai/docs/api-optimizers) | An optional optimizer. Will be created via [`create_optimizer`](/api/transformer#create_optimizer) if not set. |
+| **RETURNS**    | [`Optimizer`](https://thinc.ai/docs/api-optimizers) | The optimizer.                                                                                                 |
+
+## Transformer.predict {#predict tag="method"}
+
+Apply the pipeline's model to a batch of docs, without modifying them.
+
+> #### Example
+>
+> ```python
+> trf = nlp.add_pipe("transformer")
+> scores = trf.predict([doc1, doc2])
+> ```
+
+| Name        | Type            | Description                               |
+| ----------- | --------------- | ----------------------------------------- |
+| `docs`      | `Iterable[Doc]` | The documents to predict.                 |
+| **RETURNS** | -               | The model's prediction for each document. |
+
+## Transformer.set_annotations {#set_annotations tag="method"}
+
+Modify a batch of documents, using pre-computed scores.
+
+> #### Example
+>
+> ```python
+> trf = nlp.add_pipe("transformer")
+> scores = trf.predict(docs)
+> trf.set_annotations(docs, scores)
+> ```
+
+| Name     | Type            | Description                                           |
+| -------- | --------------- | ----------------------------------------------------- |
+| `docs`   | `Iterable[Doc]` | The documents to modify.                              |
+| `scores` | -               | The scores to set, produced by `Transformer.predict`. |
+
+## Transformer.update {#update tag="method"}
+
+Learn from a batch of documents and gold-standard information, updating the
+pipe's model. Delegates to [`predict`](/api/transformer#predict).
+
+> #### Example
+>
+> ```python
+> trf = nlp.add_pipe("transformer")
+> optimizer = nlp.begin_training()
+> losses = trf.update(examples, sgd=optimizer)
+> ```
+
+| Name              | Type                                                | Description                                                                                                                               |
+| ----------------- | --------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- |
+| `examples`        | `Iterable[Example]`                                 | A batch of [`Example`](/api/example) objects to learn from.                                                                               |
+| _keyword-only_    |                                                     |                                                                                                                                           |
+| `drop`            | float                                               | The dropout rate.                                                                                                                         |
+| `set_annotations` | bool                                                | Whether or not to update the `Example` objects with the predictions, delegating to [`set_annotations`](/api/transformer#set_annotations). |
+| `sgd`             | [`Optimizer`](https://thinc.ai/docs/api-optimizers) | The optimizer.                                                                                                                            |
+| `losses`          | `Dict[str, float]`                                  | Optional record of the loss during training. Updated using the component name as the key.                                                 |
+| **RETURNS**       | `Dict[str, float]`                                  | The updated `losses` dictionary.                                                                                                          |
+
+## Transformer.create_optimizer {#create_optimizer tag="method"}
+
+Create an optimizer for the pipeline component.
+
+> #### Example
+>
+> ```python
+> trf = nlp.add_pipe("transformer")
+> optimizer = trf.create_optimizer()
+> ```
+
+| Name        | Type                                                | Description    |
+| ----------- | --------------------------------------------------- | -------------- |
+| **RETURNS** | [`Optimizer`](https://thinc.ai/docs/api-optimizers) | The optimizer. |
+
+## Transformer.use_params {#use_params tag="method, contextmanager"}
+
+Modify the pipe's model, to use the given parameter values. At the end of the
+context, the original parameters are restored.
+
+> #### Example
+>
+> ```python
+> trf = nlp.add_pipe("transformer")
+> with trf.use_params(optimizer.averages):
+>     trf.to_disk("/best_model")
+> ```
+
+| Name     | Type | Description                               |
+| -------- | ---- | ----------------------------------------- |
+| `params` | dict | The parameter values to use in the model. |
+
+## Transformer.to_disk {#to_disk tag="method"}
+
+Serialize the pipe to disk.
+
+> #### Example
+>
+> ```python
+> trf = nlp.add_pipe("transformer")
+> trf.to_disk("/path/to/transformer")
+> ```
+
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+
+## Transformer.from_disk {#from_disk tag="method"}
+
+Load the pipe from disk. Modifies the object in place and returns it.
+
+> #### Example
+>
+> ```python
+> trf = nlp.add_pipe("transformer")
+> trf.from_disk("/path/to/transformer")
+> ```
+
+| Name           | Type            | Description                                                                |
+| -------------- | --------------- | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                            |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `Tok2Vec`       | The modified `Tok2Vec` object.                                             |
+
+## Transformer.to_bytes {#to_bytes tag="method"}
+
+> #### Example
+>
+> ```python
+> trf = nlp.add_pipe("transformer")
+> trf_bytes = trf.to_bytes()
+> ```
+
+Serialize the pipe to a bytestring.
+
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the `Tok2Vec` object.                              |
+
+## Transformer.from_bytes {#from_bytes tag="method"}
+
+Load the pipe from a bytestring. Modifies the object in place and returns it.
+
+> #### Example
+>
+> ```python
+> trf_bytes = trf.to_bytes()
+> trf = nlp.add_pipe("transformer")
+> trf.from_bytes(trf_bytes)
+> ```
+
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| `bytes_data`   | bytes           | The data to load from.                                                    |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `Tok2Vec`       | The `Tok2Vec` object.                                                     |
+
+## Serialization fields {#serialization-fields}
+
+During serialization, spaCy will export several data fields used to restore
+different aspects of the object. If needed, you can exclude them from
+serialization by passing in the string names via the `exclude` argument.
+
+> #### Example
+>
+> ```python
+> data = trf.to_disk("/path", exclude=["vocab"])
+> ```
+
+| Name    | Description                                                    |
+| ------- | -------------------------------------------------------------- |
+| `vocab` | The shared [`Vocab`](/api/vocab).                              |
+| `cfg`   | The config file. You usually don't want to exclude this.       |
+| `model` | The binary model data. You usually don't want to exclude this. |

 ## TransformerData {#transformerdata tag="dataclass"}

+Transformer tokens and outputs for one `Doc` object.
+
+| Name      | Type                                               | Description                               |
+| --------- | -------------------------------------------------- | ----------------------------------------- |
+| `tokens`  | `Dict`                                             | <!-- TODO: -->                            |
+| `tensors` | `List[FloatsXd]`                                   | <!-- TODO: -->                            |
+| `align`   | [`Ragged`](https://thinc.ai/docs/api-types#ragged) | <!-- TODO: -->                            |
+| `width`   | int                                                | <!-- TODO: also mention it's property --> |
+
+### TransformerData.empty {#transformerdata-emoty tag="classmethod"}
+
+<!-- TODO: -->
+
+| Name        | Type              | Description    |
+| ----------- | ----------------- | -------------- |
+| **RETURNS** | `TransformerData` | <!-- TODO: --> |
+
 ## FullTransformerBatch {#fulltransformerbatch tag="dataclass"}

+<!-- TODO: -->
+
+| Name       | Type                                                                                                                                               | Description                               |
+| ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- |
+| `spans`    | `List[List[Span]]`                                                                                                                                 | <!-- TODO: -->                            |
+| `tokens`   | [`transformers.BatchEncoding`](https://huggingface.co/transformers/main_classes/tokenizer.html?highlight=batchencoding#transformers.BatchEncoding) | <!-- TODO: -->                            |
+| `tensors`  | `List[torch.Tensor]`                                                                                                                               | <!-- TODO: -->                            |
+| `align`    | [`Ragged`](https://thinc.ai/docs/api-types#ragged)                                                                                                 | <!-- TODO: -->                            |
+| `doc_data` | `List[TransformerData]`                                                                                                                            | <!-- TODO: also mention it's property --> |
+
+### FullTransformerBatch.unsplit_by_doc {#fulltransformerbatch-unsplit_by_doc tag="method"}
+
+<!-- TODO: -->
+
+| Name        | Type                   | Description    |
+| ----------- | ---------------------- | -------------- |
+| `arrays`    | `List[List[Floats3d]]` | <!-- TODO: --> |
+| **RETURNS** | `FullTransformerBatch` | <!-- TODO: --> |
+
+### FullTransformerBatch.split_by_doc {#fulltransformerbatch-split_by_doc tag="method"}
+
+Split a `TransformerData` object that represents a batch into a list with one
+`TransformerData` per `Doc`.
+
+| Name        | Type                    | Description    |
+| ----------- | ----------------------- | -------------- |
+| **RETURNS** | `List[TransformerData]` | <!-- TODO: --> |
+
 ## Custom attributes {#custom-attributes}

 The component sets the following
 [custom extension attributes](/usage/processing-pipeline#custom-components-attributes):

-| Name           | Type              | Description    |
-| -------------- | ----------------- | -------------- |
-| `Doc.trf_data` | `TransformerData` | <!-- TODO: --> |
+| Name           | Type                                                  | Description                                          |
+| -------------- | ----------------------------------------------------- | ---------------------------------------------------- |
+| `Doc.trf_data` | [`TransformerData`](/api/transformer#transformerdata) | Transformer tokens and outputs for the `Doc` object. |
--- a/website/docs/api/vocab.md
+++ b/website/docs/api/vocab.md
@ -230,10 +230,11 @@ Save the current state to a directory.
 > nlp.vocab.to_disk("/path/to/vocab")
 > ```

-| Name      | Type         | Description                                                                                                           |
-| --------- | ------------ | --------------------------------------------------------------------------------------------------------------------- |
-| `path`    | str / `Path` | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
-| `exclude` | list         | String names of [serialization fields](#serialization-fields) to exclude.                                             |
+| Name           | Type            | Description                                                                                                           |
+| -------------- | --------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory, which will be created if it doesn't exist. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                                                                       |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.                                             |

 ## Vocab.from_disk {#from_disk tag="method" new="2"}

@ -246,11 +247,12 @@ Loads state from a directory. Modifies the object in place and returns it.
 > vocab = Vocab().from_disk("/path/to/vocab")
 > ```

-| Name        | Type         | Description                                                                |
-| ----------- | ------------ | -------------------------------------------------------------------------- |
-| `path`      | str / `Path` | A path to a directory. Paths may be either strings or `Path`-like objects. |
-| `exclude`   | list         | String names of [serialization fields](#serialization-fields) to exclude.  |
-| **RETURNS** | `Vocab`      | The modified `Vocab` object.                                               |
+| Name           | Type            | Description                                                                |
+| -------------- | --------------- | -------------------------------------------------------------------------- |
+| `path`         | str / `Path`    | A path to a directory. Paths may be either strings or `Path`-like objects. |
+| _keyword-only_ |                 |                                                                            |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude.  |
+| **RETURNS**    | `Vocab`         | The modified `Vocab` object.                                               |

 ## Vocab.to_bytes {#to_bytes tag="method"}

@ -262,10 +264,11 @@ Serialize the current state to a binary string.
 > vocab_bytes = nlp.vocab.to_bytes()
 > ```

-| Name        | Type  | Description                                                               |
-| ----------- | ----- | ------------------------------------------------------------------------- |
-| `exclude`   | list  | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS** | bytes | The serialized form of the `Vocab` object.                                |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | bytes           | The serialized form of the `Vocab` object.                                |

 ## Vocab.from_bytes {#from_bytes tag="method"}

@ -280,11 +283,12 @@ Load state from a binary string.
 > vocab.from_bytes(vocab_bytes)
 > ```

-| Name         | Type    | Description                                                               |
-| ------------ | ------- | ------------------------------------------------------------------------- |
-| `bytes_data` | bytes   | The data to load from.                                                    |
-| `exclude`    | list    | String names of [serialization fields](#serialization-fields) to exclude. |
-| **RETURNS**  | `Vocab` | The `Vocab` object.                                                       |
+| Name           | Type            | Description                                                               |
+| -------------- | --------------- | ------------------------------------------------------------------------- |
+| `bytes_data`   | bytes           | The data to load from.                                                    |
+| _keyword-only_ |                 |                                                                           |
+| `exclude`      | `Iterable[str]` | String names of [serialization fields](#serialization-fields) to exclude. |
+| **RETURNS**    | `Vocab`         | The `Vocab` object.                                                       |

 ## Attributes {#attributes}