diff --git a/.gitignore b/.gitignore
index 136a8f26d..4dbcd67f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,8 +18,6 @@ website/.npm
 website/logs
 *.log
 npm-debug.log*
-website/www/
-website/_deploy.sh
 quickstart-training-generator.js
 
 # Cython / C extensions
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 81cfbf8cb..0abde2abf 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -5,7 +5,7 @@
 Thanks for your interest in contributing to spaCy 🎉 The project is maintained
 by [@honnibal](https://github.com/honnibal) and [@ines](https://github.com/ines),
 and we'll do our best to help you get started. This page will give you a quick
-overview of how things are organised and most importantly, how to get involved.
+overview of how things are organized and most importantly, how to get involved.
 
 ## Table of contents
 
@@ -195,7 +195,7 @@ modules in `.py` files, not Cython modules in `.pyx` and `.pxd` files.**
 ### Code formatting
 
 [`black`](https://github.com/ambv/black) is an opinionated Python code
-formatter, optimised to produce readable code and small diffs. You can run
+formatter, optimized to produce readable code and small diffs. You can run
 `black` from the command-line, or via your code editor. For example, if you're
 using [Visual Studio Code](https://code.visualstudio.com/), you can add the
 following to your `settings.json` to use `black` for formatting and auto-format
@@ -286,7 +286,7 @@ Code that interacts with the file-system should accept objects that follow the
 If the function is user-facing and takes a path as an argument, it should check
 whether the path is provided as a string. Strings should be converted to
 `pathlib.Path` objects. Serialization and deserialization functions should always
-accept **file-like objects**, as it makes the library io-agnostic. Working on
+accept **file-like objects**, as it makes the library IO-agnostic. Working on
 buffers makes the code more general, easier to test, and compatible with Python
 3's asynchronous IO.
 
@@ -384,7 +384,7 @@ of Python and C++, with additional complexity and syntax from numpy. The
 many "traps for new players". Working in Cython is very rewarding once you're
 over the initial learning curve. As with C and C++, the first way you write
 something in Cython will often be the performance-optimal approach. In contrast,
-Python optimisation generally requires a lot of experimentation. Is it faster to
+Python optimization generally requires a lot of experimentation. Is it faster to
 have an `if item in my_dict` check, or to use `.get()`? What about `try`/`except`?
 Does this numpy operation create a copy? There's no way to guess the answers to
 these questions, and you'll usually be dissatisfied with your results — so
@@ -400,7 +400,7 @@ Python. If it's not fast enough the first time, just switch to Cython.
 - [PEP 8 Style Guide for Python Code](https://www.python.org/dev/peps/pep-0008/) (python.org)
 - [Official Cython documentation](http://docs.cython.org/en/latest/) (cython.org)
 - [Writing C in Cython](https://explosion.ai/blog/writing-c-in-cython) (explosion.ai)
-- [Multi-threading spaCy’s parser and named entity recogniser](https://explosion.ai/blog/multithreading-with-cython) (explosion.ai)
+- [Multi-threading spaCy’s parser and named entity recognizer](https://explosion.ai/blog/multithreading-with-cython) (explosion.ai)
 
 ## Adding tests
 
@@ -412,7 +412,7 @@ name. For example, tests for the `Tokenizer` can be found in
 all test files and test functions need to be prefixed with `test_`.
 
 When adding tests, make sure to use descriptive names, keep the code short and
-concise and only test for one behaviour at a time. Try to `parametrize` test
+concise and only test for one behavior at a time. Try to `parametrize` test
 cases wherever possible, use our pre-defined fixtures for spaCy components and
 avoid unnecessary imports.
 
diff --git a/README.md b/README.md
index 1fece1e5a..cef2a1fdd 100644
--- a/README.md
+++ b/README.md
@@ -49,9 +49,8 @@ It's commercial open-source software, released under the MIT license.
 
 ## 💬 Where to ask questions
 
-The spaCy project is maintained by [@honnibal](https://github.com/honnibal) and
-[@ines](https://github.com/ines), along with core contributors
-[@svlandeg](https://github.com/svlandeg) and
+The spaCy project is maintained by [@honnibal](https://github.com/honnibal),
+[@ines](https://github.com/ines), [@svlandeg](https://github.com/svlandeg) and
 [@adrianeboyd](https://github.com/adrianeboyd). Please understand that we won't
 be able to provide individual support via email. We also believe that help is
 much more valuable if it's shared publicly, so that more people can benefit from
diff --git a/spacy/cli/init_config.py b/spacy/cli/init_config.py
index 9b47dea14..94e0bd6fc 100644
--- a/spacy/cli/init_config.py
+++ b/spacy/cli/init_config.py
@@ -24,7 +24,7 @@ class Optimizations(str, Enum):
 @init_cli.command("config")
 def init_config_cli(
     # fmt: off
-    output_file: Path = Arg("-", help="File to save config.cfg to (or - for stdout)", allow_dash=True),
+    output_file: Path = Arg(..., help="File to save config.cfg to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
     lang: Optional[str] = Opt("en", "--lang", "-l", help="Two-letter code of the language to use"),
     pipeline: Optional[str] = Opt("tagger,parser,ner", "--pipeline", "-p", help="Comma-separated names of trainable pipeline components to include in the model (without 'tok2vec' or 'transformer')"),
     optimize: Optimizations = Opt(Optimizations.efficiency.value, "--optimize", "-o", help="Whether to optimize for efficiency (faster inference, smaller model, lower memory consumption) or higher accuracy (potentially larger and slower model). This will impact the choice of architecture, pretrained weights and related hyperparameters."),
@@ -110,6 +110,13 @@ def init_config(
         "word_vectors": reco["word_vectors"],
         "has_letters": reco["has_letters"],
     }
+    if variables["transformer_data"] and not has_spacy_transformers():
+        msg.warn(
+            "To generate a more effective transformer-based config (GPU-only), "
+            "install the spacy-transformers package and re-run this command. "
+            "The config generated now does not use transformers."
+        )
+        variables["transformer_data"] = None
     base_template = template.render(variables).strip()
     # Giving up on getting the newlines right in jinja for now
     base_template = re.sub(r"\n\n\n+", "\n\n", base_template)
@@ -126,8 +133,6 @@ def init_config(
     for label, value in use_case.items():
         msg.text(f"- {label}: {value}")
     use_transformer = bool(template_vars.use_transformer)
-    if use_transformer:
-        require_spacy_transformers(msg)
     with show_validation_error(hint_fill=False):
         config = util.load_config_from_str(base_template)
         nlp, _ = util.load_model_from_config(config, auto_fill=True)
@@ -149,12 +154,10 @@ def save_config(config: Config, output_file: Path, is_stdout: bool = False) -> N
         print(f"{COMMAND} train {output_file.parts[-1]} {' '.join(variables)}")
 
 
-def require_spacy_transformers(msg: Printer) -> None:
+def has_spacy_transformers() -> bool:
     try:
         import spacy_transformers  # noqa: F401
+
+        return True
     except ImportError:
-        msg.fail(
-            "Using a transformer-based pipeline requires spacy-transformers "
-            "to be installed.",
-            exits=1,
-        )
+        return False
diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja
index 674099abc..0071f1b1a 100644
--- a/spacy/cli/templates/quickstart_training.jinja
+++ b/spacy/cli/templates/quickstart_training.jinja
@@ -107,8 +107,8 @@ factory = "tok2vec"
 @architectures = "spacy.MultiHashEmbed.v1"
 width = ${components.tok2vec.model.encode.width}
 rows = {{ 2000 if optimize == "efficiency" else 7000 }}
-also_embed_subwords = {{ true if has_letters else false }}
-also_use_static_vectors = {{ true if optimize == "accuracy" else false }}
+also_embed_subwords = {{ "true" if has_letters else "false" }}
+also_use_static_vectors = {{ "true" if optimize == "accuracy" else "false" }}
 
 [components.tok2vec.model.encode]
 @architectures = "spacy.MaxoutWindowEncoder.v1"
@@ -195,7 +195,7 @@ initial_rate = 5e-5
 [training.train_corpus]
 @readers = "spacy.Corpus.v1"
 path = ${paths.train}
-max_length = {{ 500 if hardware == "gpu" else 0 }}
+max_length = {{ 500 if hardware == "gpu" else 2000 }}
 
 [training.dev_corpus]
 @readers = "spacy.Corpus.v1"
diff --git a/spacy/displacy/render.py b/spacy/displacy/render.py
index 69f6df8f0..07550f9aa 100644
--- a/spacy/displacy/render.py
+++ b/spacy/displacy/render.py
@@ -252,8 +252,10 @@ class EntityRenderer:
             colors.update(user_color)
         colors.update(options.get("colors", {}))
         self.default_color = DEFAULT_ENTITY_COLOR
-        self.colors = colors
+        self.colors = {label.upper(): color for label, color in colors.items()}
         self.ents = options.get("ents", None)
+        if self.ents is not None:
+            self.ents = [ent.upper() for ent in self.ents]
         self.direction = DEFAULT_DIR
         self.lang = DEFAULT_LANG
         template = options.get("template")
diff --git a/spacy/displacy/templates.py b/spacy/displacy/templates.py
index ff99000f4..b9cbf717b 100644
--- a/spacy/displacy/templates.py
+++ b/spacy/displacy/templates.py
@@ -51,14 +51,14 @@ TPL_ENTS = """
 TPL_ENT = """
 <mark class="entity" style="background: {bg}; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">
     {text}
-    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-left: 0.5rem">{label}</span>
+    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">{label}</span>
 </mark>
 """
 
 TPL_ENT_RTL = """
 <mark class="entity" style="background: {bg}; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em">
     {text}
-    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; text-transform: uppercase; vertical-align: middle; margin-right: 0.5rem">{label}</span>
+    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-right: 0.5rem">{label}</span>
 </mark>
 """
 
diff --git a/spacy/tests/test_displacy.py b/spacy/tests/test_displacy.py
index adac0f7c3..1fa0eeaa1 100644
--- a/spacy/tests/test_displacy.py
+++ b/spacy/tests/test_displacy.py
@@ -1,6 +1,6 @@
 import pytest
 from spacy import displacy
-from spacy.displacy.render import DependencyRenderer
+from spacy.displacy.render import DependencyRenderer, EntityRenderer
 from spacy.tokens import Span
 from spacy.lang.fa import Persian
 
@@ -97,3 +97,17 @@ def test_displacy_render_wrapper(en_vocab):
     assert html.endswith("/div>TEST")
     # Restore
     displacy.set_render_wrapper(lambda html: html)
+
+
+def test_displacy_options_case():
+    ents = ["foo", "BAR"]
+    colors = {"FOO": "red", "bar": "green"}
+    renderer = EntityRenderer({"ents": ents, "colors": colors})
+    text = "abcd"
+    labels = ["foo", "bar", "FOO", "BAR"]
+    spans = [{"start": i, "end": i + 1, "label": labels[i]} for i in range(len(text))]
+    result = renderer.render_ents("abcde", spans, None).split("\n\n")
+    assert "red" in result[0] and "foo" in result[0]
+    assert "green" in result[1] and "bar" in result[1]
+    assert "red" in result[2] and "FOO" in result[2]
+    assert "green" in result[3] and "BAR" in result[3]
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index a13299fff..9fda1800b 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -47,9 +47,9 @@ cdef class Tokenizer:
         `infix_finditer` (callable): A function matching the signature of
             `re.compile(string).finditer` to find infixes.
         token_match (callable): A boolean function matching strings to be
-            recognised as tokens.
+            recognized as tokens.
         url_match (callable): A boolean function matching strings to be
-            recognised as tokens after considering prefixes and suffixes.
+            recognized as tokens after considering prefixes and suffixes.
 
         EXAMPLE:
             >>> tokenizer = Tokenizer(nlp.vocab)
diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md
index acdf4cb19..835815496 100644
--- a/website/docs/api/architectures.md
+++ b/website/docs/api/architectures.md
@@ -399,7 +399,7 @@ one component.
 > subword_features = true
 > ```
 
-Build a transition-based parser model. Can apply to NER or dependency-parsing.
+Build a transition-based parser model. Can apply to NER or dependency parsing.
 Transition-based parsing is an approach to structured prediction where the task
 of predicting the structure is mapped to a series of state transitions. You
 might find [this tutorial](https://explosion.ai/blog/parsing-english-in-python)
@@ -416,8 +416,6 @@ consists of either two or three subnetworks:
   state representation. If not present, the output from the lower model is used
   as action scores directly.
 
-<!-- TODO: model return type -->
-
 | Name                | Description                                                                                                                                                                                                                                                                                                                                                             |
 | ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `tok2vec`           | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~                                                                                                                                                                                                                                                                              |
@@ -426,7 +424,7 @@ consists of either two or three subnetworks:
 | `maxout_pieces`     | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~                                                                                      |
 | `use_upper`         | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |
 | `nO`                | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~                                                                                                                                                                                                                             |
-| **CREATES**         | The model using the architecture. ~~Model~~                                                                                                                                                                                                                                                                                                                             |
+| **CREATES**         | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~                                                                                                                                                                                                                                                                                           |
 
 ### spacy.BILUOTagger.v1 {#BILUOTagger source="spacy/ml/models/simple_ner.py"}
 
diff --git a/website/docs/api/morphology.md b/website/docs/api/morphology.md
index 1b2e159d0..5d5324061 100644
--- a/website/docs/api/morphology.md
+++ b/website/docs/api/morphology.md
@@ -7,7 +7,7 @@ source: spacy/morphology.pyx
 Store the possible morphological analyses for a language, and index them by
 hash. To save space on each token, tokens only know the hash of their
 morphological analysis, so queries of morphological attributes are delegated to
-this class. See [`MorphAnalysis`](/api/morphology#morphansalysis) for the
+this class. See [`MorphAnalysis`](/api/morphology#morphanalysis) for the
 container storing a single morphological analysis.
 
 ## Morphology.\_\_init\_\_ {#init tag="method"}
diff --git a/website/docs/api/token.md b/website/docs/api/token.md
index 4a8e6eba7..0860797aa 100644
--- a/website/docs/api/token.md
+++ b/website/docs/api/token.md
@@ -450,8 +450,8 @@ The L2 norm of the token's vector representation.
 | `pos_`                                       | Coarse-grained part-of-speech from the [Universal POS tag set](https://universaldependencies.org/docs/u/pos/). ~~str~~                                                                                                                                                 |
 | `tag`                                        | Fine-grained part-of-speech. ~~int~~                                                                                                                                                                                                                                   |
 | `tag_`                                       | Fine-grained part-of-speech. ~~str~~                                                                                                                                                                                                                                   |
-| `morph`                                      | Morphological analysis. ~~MorphAnalysis~~                                                                                                                                                                                                                              |
-| `morph_`                                     | Morphological analysis in the Universal Dependencies [FEATS]https://universaldependencies.org/format.html#morphological-annotation format. ~~str~~                                                                                                                     |
+| `morph` <Tag variant="new">3</Tag>           | Morphological analysis. ~~MorphAnalysis~~                                                                                                                                                                                                                              |
+| `morph_` <Tag variant="new">3</Tag>          | Morphological analysis in the Universal Dependencies [FEATS]https://universaldependencies.org/format.html#morphological-annotation format. ~~str~~                                                                                                                     |
 | `dep`                                        | Syntactic dependency relation. ~~int~~                                                                                                                                                                                                                                 |
 | `dep_`                                       | Syntactic dependency relation. ~~str~~                                                                                                                                                                                                                                 |
 | `lang`                                       | Language of the parent document's vocabulary. ~~int~~                                                                                                                                                                                                                  |
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 61fca6ec5..9c65b2982 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -257,7 +257,7 @@ If a setting is not present in the options, the default value will be used.
 | Name                                    | Description                                                                                                                                                                                                                                                                 |
 | --------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `ents`                                  | Entity types to highlight or `None` for all types (default). ~~Optional[List[str]]~~                                                                                                                                                                                        |
-| `colors`                                | Color overrides. Entity types in uppercase should be mapped to color names or values. ~~Dict[str, str]~~                                                                                                                                                                    |
+| `colors`                                | Color overrides. Entity types should be mapped to color names or values. ~~Dict[str, str]~~                                                                                                                                                                                 |
 | `template` <Tag variant="new">2.2</Tag> | Optional template to overwrite the HTML used to render entity spans. Should be a format string and can use `{bg}`, `{text}` and `{label}`. See [`templates.py`](https://github.com/explosion/spaCy/blob/master/spacy/displacy/templates.py) for examples. ~~Optional[str]~~ |
 
 By default, displaCy comes with colors for all entity types used by
@@ -632,6 +632,23 @@ validate its contents.
 | `path`      | Path to the model's `meta.json`. ~~Union[str, Path]~~ |
 | **RETURNS** | The model's meta data. ~~Dict[str, Any]~~             |
 
+### util.get_installed_models {#util.get_installed_models tag="function" new="3"}
+
+List all model packages installed in the current environment. This will include
+any spaCy model that was packaged with [`spacy package`](/api/cli#package).
+Under the hood, model packages expose a Python entry point that spaCy can check,
+without having to load the model.
+
+> #### Example
+>
+> ```python
+> model_names = util.get_installed_models()
+> ```
+
+| Name        | Description                                                                        |
+| ----------- | ---------------------------------------------------------------------------------- |
+| **RETURNS** | The string names of the models installed in the current environment. ~~List[str]~~ |
+
 ### util.is_package {#util.is_package tag="function"}
 
 Check if string maps to a package installed via pip. Mainly used to validate
diff --git a/website/docs/images/sense2vec.jpg b/website/docs/images/sense2vec.jpg
new file mode 100644
index 000000000..3a1772582
Binary files /dev/null and b/website/docs/images/sense2vec.jpg differ
diff --git a/website/docs/usage/101/_vectors-similarity.md b/website/docs/usage/101/_vectors-similarity.md
index a04c96236..92df1b331 100644
--- a/website/docs/usage/101/_vectors-similarity.md
+++ b/website/docs/usage/101/_vectors-similarity.md
@@ -80,25 +80,73 @@ duplicate if it's very similar to an already existing one.
 Each [`Doc`](/api/doc), [`Span`](/api/span), [`Token`](/api/token) and
 [`Lexeme`](/api/lexeme) comes with a [`.similarity`](/api/token#similarity)
 method that lets you compare it with another object, and determine the
-similarity. Of course similarity is always subjective – whether "dog" and "cat"
-are similar really depends on how you're looking at it. spaCy's similarity model
-usually assumes a pretty general-purpose definition of similarity.
+similarity. Of course similarity is always subjective – whether two words, spans
+or documents are similar really depends on how you're looking at it. spaCy's
+similarity model usually assumes a pretty general-purpose definition of
+similarity.
 
-<!-- TODO: use better example here -->
+> #### 📝 Things to try
+>
+> 1. Compare two different tokens and try to find the two most _dissimilar_
+>    tokens in the texts with the lowest similarity score (according to the
+>    vectors).
+> 2. Compare the similarity of two [`Lexeme`](/api/lexeme) objects, entries in
+>    the vocabulary. You can get a lexeme via the `.lex` attribute of a token.
+>    You should see that the similarity results are identical to the token
+>    similarity.
 
 ```python
 ### {executable="true"}
 import spacy
 
 nlp = spacy.load("en_core_web_md")  # make sure to use larger model!
-tokens = nlp("dog cat banana")
+doc1 = nlp("I like salty fries and hamburgers.")
+doc2 = nlp("Fast food tastes very good.")
 
-for token1 in tokens:
-    for token2 in tokens:
-        print(token1.text, token2.text, token1.similarity(token2))
+# Similarity of two documents
+print(doc1, "<->", doc2, doc1.similarity(doc2))
+# Similarity of tokens and spans
+french_fries = doc1[2:4]
+burgers = doc1[5]
+print(french_fries, "<->", burgers, french_fries.similarity(burgers))
 ```
 
-In this case, the model's predictions are pretty on point. A dog is very similar
-to a cat, whereas a banana is not very similar to either of them. Identical
-tokens are obviously 100% similar to each other (just not always exactly `1.0`,
-because of vector math and floating point imprecisions).
+### What to expect from similarity results {#similarity-expectations}
+
+Computing similarity scores can be helpful in many situations, but it's also
+important to maintain **realistic expectations** about what information it can
+provide. Words can be related to each over in many ways, so a single
+"similarity" score will always be a **mix of different signals**, and vectors
+trained on different data can produce very different results that may not be
+useful for your purpose. Here are some important considerations to keep in mind:
+
+- There's no objective definition of similarity. Whether "I like burgers" and "I
+  like pasta" is similar **depends on your application**. Both talk about food
+  preferences, which makes them very similar – but if you're analyzing mentions
+  of food, those sentences are pretty dissimilar, because they talk about very
+  different foods.
+- The similarity of [`Doc`](/api/doc) and [`Span`](/api/span) objects defaults
+  to the **average** of the token vectors. This means that the vector for "fast
+  food" is the average of the vectors for "fast" and "food", which isn't
+  necessarily representative of the phrase "fast food".
+- Vector averaging means that the vector of multiple tokens is **insensitive to
+  the order** of the words. Two documents expressing the same meaning with
+  dissimilar wording will return a lower similarity score than two documents
+  that happen to contain the same words while expressing different meanings.
+
+<Infobox title="Tip: Check out sense2vec" emoji="💡">
+
+[![](../../images/sense2vec.jpg)](https://github.com/explosion/sense2vec)
+
+[`sense2vec`](https://github.com/explosion/sense2vec) is a library developed by
+us that builds on top of spaCy and lets you train and query more interesting and
+detailed word vectors. It combines noun phrases like "fast food" or "fair game"
+and includes the part-of-speech tags and entity labels. The library also
+includes annotation recipes for our annotation tool [Prodigy](https://prodi.gy)
+that let you evaluate vector models and create terminology lists. For more
+details, check out
+[our blog post](https://explosion.ai/blog/sense2vec-reloaded). To explore the
+semantic similarities across all Reddit comments of 2015 and 2019, see the
+[interactive demo](https://explosion.ai/demos/sense2vec).
+
+</Infobox>
diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md
index c2727f5b1..33385ff51 100644
--- a/website/docs/usage/embeddings-transformers.md
+++ b/website/docs/usage/embeddings-transformers.md
@@ -11,6 +11,10 @@ next: /usage/training
 
 <!-- TODO: intro, short explanation of embeddings/transformers, Tok2Vec and Transformer components, point user to processing pipelines docs for more general info that user should know first -->
 
+If you're looking for details on using word vectors and semantic similarity,
+check out the
+[linguistic features docs](/usage/linguistic-features#vectors-similarity).
+
 <Accordion title="What’s the difference between word vectors and language models?" id="vectors-vs-language-models">
 
 The key difference between [word vectors](#word-vectors) and contextual language
@@ -180,7 +184,7 @@ yourself. For details on how to get started with training your own model, check
 out the [training quickstart](/usage/training#quickstart).
 
 <!-- TODO:
-<Project id="en_core_bert">
+<Project id="en_core_trf_lg">
 
 The easiest way to get started is to clone a transformers-based project
 template. Swap in your data, edit the settings and hyperparameters and train,
diff --git a/website/docs/usage/index.md b/website/docs/usage/index.md
index c90c23b28..ede4ab6f9 100644
--- a/website/docs/usage/index.md
+++ b/website/docs/usage/index.md
@@ -169,7 +169,7 @@ $ python setup.py build_ext --inplace           # compile spaCy
 
 Compared to regular install via pip, the
 [`requirements.txt`](https://github.com/explosion/spaCy/tree/master/requirements.txt)
-additionally installs developer dependencies such as Cython. See the 
+additionally installs developer dependencies such as Cython. See the
 [quickstart widget](#quickstart) to get the right commands for your platform and
 Python version.
 
@@ -368,7 +368,7 @@ from is called `spacy`. So, when using spaCy, never call anything else `spacy`.
 
 </Accordion>
 
-<Accordion title="NER model doesn't recognise other entities anymore after training" id="catastrophic-forgetting">
+<Accordion title="NER model doesn't recognize other entities anymore after training" id="catastrophic-forgetting">
 
 If your training data only contained new entities and you didn't mix in any
 examples the model previously recognized, it can cause the model to "forget"
diff --git a/website/docs/usage/linguistic-features.md b/website/docs/usage/linguistic-features.md
index 10efcf875..f52c2b2ad 100644
--- a/website/docs/usage/linguistic-features.md
+++ b/website/docs/usage/linguistic-features.md
@@ -429,7 +429,7 @@ nlp = spacy.load("en_core_web_sm")
 doc = nlp("fb is hiring a new vice president of global policy")
 ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
 print('Before', ents)
-# the model didn't recognise "fb" as an entity :(
+# The model didn't recognize "fb" as an entity :(
 
 fb_ent = Span(doc, 0, 1, label="ORG") # create a Span for the new entity
 doc.ents = list(doc.ents) + [fb_ent]
@@ -558,11 +558,11 @@ import spacy
 nlp = spacy.load("my_custom_el_model")
 doc = nlp("Ada Lovelace was born in London")
 
-# document level
+# Document level
 ents = [(e.text, e.label_, e.kb_id_) for e in doc.ents]
 print(ents)  # [('Ada Lovelace', 'PERSON', 'Q7259'), ('London', 'GPE', 'Q84')]
 
-# token level
+# Token level
 ent_ada_0 = [doc[0].text, doc[0].ent_type_, doc[0].ent_kb_id_]
 ent_ada_1 = [doc[1].text, doc[1].ent_type_, doc[1].ent_kb_id_]
 ent_london_5 = [doc[5].text, doc[5].ent_type_, doc[5].ent_kb_id_]
@@ -914,12 +914,12 @@ from spacy.lang.char_classes import ALPHA, ALPHA_LOWER, ALPHA_UPPER
 from spacy.lang.char_classes import CONCAT_QUOTES, LIST_ELLIPSES, LIST_ICONS
 from spacy.util import compile_infix_regex
 
-# default tokenizer
+# Default tokenizer
 nlp = spacy.load("en_core_web_sm")
 doc = nlp("mother-in-law")
 print([t.text for t in doc]) # ['mother', '-', 'in', '-', 'law']
 
-# modify tokenizer infix patterns
+# Modify tokenizer infix patterns
 infixes = (
     LIST_ELLIPSES
     + LIST_ICONS
@@ -929,8 +929,8 @@ infixes = (
             al=ALPHA_LOWER, au=ALPHA_UPPER, q=CONCAT_QUOTES
         ),
         r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA),
-        # EDIT: commented out regex that splits on hyphens between letters:
-        #r"(?<=[{a}])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS),
+        # ✅ Commented out regex that splits on hyphens between letters:
+        # r"(?<=[{a}])(?:{h})(?=[{a}])".format(a=ALPHA, h=HYPHENS),
         r"(?<=[{a}0-9])[:<>=/](?=[{a}])".format(a=ALPHA),
     ]
 )
@@ -1547,23 +1547,6 @@ import Vectors101 from 'usage/101/\_vectors-similarity.md'
 
 <Vectors101 />
 
-<Infobox title="What to expect from similarity results" variant="warning">
-
-Computing similarity scores can be helpful in many situations, but it's also
-important to maintain **realistic expectations** about what information it can
-provide. Words can be related to each over in many ways, so a single
-"similarity" score will always be a **mix of different signals**, and vectors
-trained on different data can produce very different results that may not be
-useful for your purpose.
-
-Also note that the similarity of `Doc` or `Span` objects defaults to the
-**average** of the token vectors. This means it's insensitive to the order of
-the words. Two documents expressing the same meaning with dissimilar wording
-will return a lower similarity score than two documents that happen to contain
-the same words while expressing different meanings.
-
-</Infobox>
-
 ### Adding word vectors {#adding-vectors}
 
 Custom word vectors can be trained using a number of open-source libraries, such
diff --git a/website/docs/usage/processing-pipelines.md b/website/docs/usage/processing-pipelines.md
index bc8c990e8..a863c6c32 100644
--- a/website/docs/usage/processing-pipelines.md
+++ b/website/docs/usage/processing-pipelines.md
@@ -108,11 +108,11 @@ class, or defined within a [model package](/usage/saving-loading#models).
 >
 > [components.tagger]
 > factory = "tagger"
-> # settings for the tagger component
+> # Settings for the tagger component
 >
 > [components.parser]
 > factory = "parser"
-> # settings for the parser component
+> # Settings for the parser component
 > ```
 
 When you load a model, spaCy first consults the model's
@@ -171,11 +171,11 @@ lang = "en"
 pipeline = ["tagger", "parser", "ner"]
 data_path = "path/to/en_core_web_sm/en_core_web_sm-2.0.0"
 
-cls = spacy.util.get_lang_class(lang)   # 1. Get Language instance, e.g. English()
-nlp = cls()                             # 2. Initialize it
+cls = spacy.util.get_lang_class(lang)  # 1. Get Language class, e.g. English
+nlp = cls()                            # 2. Initialize it
 for name in pipeline:
-    nlp.add_pipe(name)                  # 3. Add the component to the pipeline
-nlp.from_disk(model_data_path)          # 4. Load in the binary data
+    nlp.add_pipe(name)                 # 3. Add the component to the pipeline
+nlp.from_disk(model_data_path)         # 4. Load in the binary data
 ```
 
 When you call `nlp` on a text, spaCy will **tokenize** it and then **call each
@@ -187,9 +187,9 @@ which is then processed by the component next in the pipeline.
 
 ```python
 ### The pipeline under the hood
-doc = nlp.make_doc("This is a sentence")   # create a Doc from raw text
-for name, proc in nlp.pipeline:             # iterate over components in order
-    doc = proc(doc)                         # apply each component
+doc = nlp.make_doc("This is a sentence")  # Create a Doc from raw text
+for name, proc in nlp.pipeline:           # Iterate over components in order
+    doc = proc(doc)                       # Apply each component
 ```
 
 The current processing pipeline is available as `nlp.pipeline`, which returns a
@@ -473,7 +473,7 @@ only being able to modify it afterwards.
 >
 > @Language.component("my_component")
 > def my_component(doc):
->    # do something to the doc here
+>    # Do something to the doc here
 >    return doc
 > ```
 
diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md
index ce6625897..7fdce032e 100644
--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@@ -511,21 +511,21 @@ from spacy.language import Language
 from spacy.matcher import Matcher
 from spacy.tokens import Token
 
-# We're using a component factory because the component needs to be initialized
-# with the shared vocab via the nlp object
+# We're using a component factory because the component needs to be
+# initialized with the shared vocab via the nlp object
 @Language.factory("html_merger")
 def create_bad_html_merger(nlp, name):
-    return BadHTMLMerger(nlp)
+    return BadHTMLMerger(nlp.vocab)
 
 class BadHTMLMerger:
-    def __init__(self, nlp):
+    def __init__(self, vocab):
         patterns = [
             [{"ORTH": "<"}, {"LOWER": "br"}, {"ORTH": ">"}],
             [{"ORTH": "<"}, {"LOWER": "br/"}, {"ORTH": ">"}],
         ]
         # Register a new token extension to flag bad HTML
         Token.set_extension("bad_html", default=False)
-        self.matcher = Matcher(nlp.vocab)
+        self.matcher = Matcher(vocab)
         self.matcher.add("BAD_HTML", patterns)
 
     def __call__(self, doc):
diff --git a/website/docs/usage/training.md b/website/docs/usage/training.md
index 150dff280..116561cd2 100644
--- a/website/docs/usage/training.md
+++ b/website/docs/usage/training.md
@@ -104,10 +104,10 @@ workflows, from data preprocessing to training and packaging your model.
 
 ## Training config {#config}
 
-> #### Migration from spaCy v2.x
+<!-- > #### Migration from spaCy v2.x
 >
 > TODO: once we have an answer for how to update the training command
-> (`spacy migrate`?), add details here
+> (`spacy migrate`?), add details here -->
 
 Training config files include all **settings and hyperparameters** for training
 your model. Instead of providing lots of arguments on the command line, you only
@@ -404,11 +404,15 @@ recipe once the dish has already been prepared. You have to make a new one.
 spaCy includes a variety of built-in [architectures](/api/architectures) for
 different tasks. For example:
 
-<!-- TODO: select example architectures to showcase -->
+<!-- TODO: model return types -->
 
-| Architecture                                    | Description                                                                                                                                                            |
-| ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [HashEmbedCNN](/api/architectures#HashEmbedCNN) | Build spaCy’s “standard” embedding layer, which uses hash embedding with subword features and a CNN with layer-normalized maxout. ~~Model[List[Doc], List[Floats2d]]~~ |
+| Architecture                                                      | Description                                                                                                                                                                                                                                               |
+| ----------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [HashEmbedCNN](/api/architectures#HashEmbedCNN)                   | Build spaCy’s "standard" embedding layer, which uses hash embedding with subword features and a CNN with layer-normalized maxout. ~~Model[List[Doc], List[Floats2d]]~~                                                                                    |
+| [TransitionBasedParser](/api/architectures#TransitionBasedParser) | Build a [transition-based parser](https://explosion.ai/blog/parsing-english-in-python) model used in the default [`EntityRecognizer`](/api/entityrecognizer) and [`DependencyParser`](/api/dependencyparser). ~~Model[List[Docs], List[List[Floats2d]]]~~ |
+| [TextCatEnsemble](/api/architectures#TextCatEnsemble)             | Stacked ensemble of a bag-of-words model and a neural network model with an internal CNN embedding layer. Used in the default [`TextCategorizer`](/api/textcategorizer). ~~Model~~                                                                        |
+
+<!-- TODO: link to not yet existing usage page on custom architectures etc. -->
 
 ### Metrics, training output and weighted scores {#metrics}
 
@@ -788,7 +792,7 @@ you save the transformer outputs for later use.
 
 <!-- TODO:
 
-<Project id="en_core_bert">
+<Project id="en_core_trf_lg">
 
 Try out a BERT-based model pipeline using this project template: swap in your
 data, edit the settings and hyperparameters and train, evaluate, package and
diff --git a/website/docs/usage/v3.md b/website/docs/usage/v3.md
index 837818a83..d71ecba31 100644
--- a/website/docs/usage/v3.md
+++ b/website/docs/usage/v3.md
@@ -10,6 +10,32 @@ menu:
 
 ## Summary {#summary}
 
+<Grid cols={2}>
+
+<div>
+
+</div>
+
+<Infobox title="Table of Contents" id="toc">
+
+- [Summary](#summary)
+- [New features](#features)
+- [Training & config system](#features-training)
+- [Transformer-based pipelines](#features-transformers)
+- [Custom models](#features-custom-models)
+- [End-to-end project workflows](#features-projects)
+- [New built-in components](#features-pipeline-components)
+- [New custom component API](#features-components)
+- [Python type hints](#features-types)
+- [New methods & attributes](#new-methods)
+- [New & updated documentation](#new-docs)
+- [Backwards incompatibilities](#incompat)
+- [Migrating from spaCy v2.x](#migrating)
+
+</Infobox>
+
+</Grid>
+
 ## New Features {#features}
 
 ### New training workflow and config system {#features-training}
@@ -28,6 +54,8 @@ menu:
 
 ### Transformer-based pipelines {#features-transformers}
 
+![Pipeline components listening to shared embedding component](../images/tok2vec-listener.svg)
+
 <Infobox title="Details & Documentation" emoji="📖" list>
 
 - **Usage:** [Embeddings & Transformers](/usage/embeddings-transformers),
@@ -38,7 +66,7 @@ menu:
 - **Architectures: ** [TransformerModel](/api/architectures#TransformerModel),
   [Tok2VecListener](/api/architectures#transformers-Tok2VecListener),
   [Tok2VecTransformer](/api/architectures#Tok2VecTransformer)
-- **Models:** [`en_core_bert_sm`](/models/en)
+- **Models:** [`en_core_trf_lg_sm`](/models/en)
 - **Implementation:**
   [`spacy-transformers`](https://github.com/explosion/spacy-transformers)
 
@@ -46,8 +74,53 @@ menu:
 
 ### Custom models using any framework {#features-custom-models}
 
+<Infobox title="Details & Documentation" emoji="📖" list>
+
+<!-- TODO: link to new custom models page -->
+
+- **Thinc: **
+  [Wrapping PyTorch, TensorFlow & MXNet](https://thinc.ai/docs/usage-frameworks)
+- **API:** [Model architectures](/api/architectures), [`Pipe`](/api/pipe)
+
+</Infobox>
+
 ### Manage end-to-end workflows with projects {#features-projects}
 
+<!-- TODO: update example -->
+
+> #### Example
+>
+> ```cli
+> # Clone a project template
+> $ python -m spacy project clone example
+> $ cd example
+> # Download data assets
+> $ python -m spacy project assets
+> # Run a workflow
+> $ python -m spacy project run train
+> ```
+
+spaCy projects let you manage and share **end-to-end spaCy workflows** for
+different **use cases and domains**, and orchestrate training, packaging and
+serving your custom models. You can start off by cloning a pre-defined project
+template, adjust it to fit your needs, load in your data, train a model, export
+it as a Python package and share the project templates with your team. spaCy
+projects also make it easy to **integrate with other tools** in the data science
+and machine learning ecosystem, including [DVC](/usage/projects#dvc) for data
+version control, [Prodigy](/usage/projects#prodigy) for creating labelled data,
+[Streamlit](/usage/projects#streamlit) for building interactive apps,
+[FastAPI](/usage/projects#fastapi) for serving models in production,
+[Ray](/usage/projects#ray) for parallel training,
+[Weights & Biases](/usage/projects#wandb) for experiment tracking, and more!
+
+<!-- <Project id="some_example_project">
+
+The easiest way to get started with an end-to-end training process is to clone a
+[project](/usage/projects) template. Projects let you manage multi-step
+workflows, from data preprocessing to training and packaging your model.
+
+</Project>-->
+
 <Infobox title="Details & Documentation" emoji="📖" list>
 
 - **Usage:** [spaCy projects](/usage/projects),
@@ -59,6 +132,16 @@ menu:
 
 ### New built-in pipeline components {#features-pipeline-components}
 
+spaCy v3.0 includes several new trainable and rule-based components that you can
+add to your pipeline and customize for your use case:
+
+> #### Example
+>
+> ```python
+> nlp = spacy.blank("en")
+> nlp.add_pipe("lemmatizer")
+> ```
+
 | Name                                            | Description                                                                                                                                                                                                             |
 | ----------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | [`SentenceRecognizer`](/api/sentencerecognizer) | Trainable component for sentence segmentation.                                                                                                                                                                          |
@@ -78,15 +161,37 @@ menu:
 
 ### New and improved pipeline component APIs {#features-components}
 
-- `Language.factory`, `Language.component`
-- `Language.analyze_pipes`
-- Adding components from other models
+> #### Example
+>
+> ```python
+> @Language.component("my_component")
+> def my_component(doc):
+>     return doc
+>
+> nlp.add_pipe("my_component")
+> nlp.add_pipe("ner", source=other_nlp)
+> nlp.analyze_pipes(pretty=True)
+> ```
+
+Defining, configuring, reusing, training and analyzing pipeline components is
+now easier and more convenient. The `@Language.component` and
+`@Language.factory` decorators let you register your component, define its
+default configuration and meta data, like the attribute values it assigns and
+requires. Any custom component can be included during training, and sourcing
+components from existing pretrained models lets you **mix and match custom
+pipelines**. The `nlp.analyze_pipes` method outputs structured information about
+the current pipeline and its components, including the attributes they assign,
+the scores they compute during training and whether any required attributes
+aren't set.
 
 <Infobox title="Details & Documentation" emoji="📖" list>
 
 - **Usage:** [Custom components](/usage/processing-pipelines#custom_components),
-  [Defining components during training](/usage/training#config-components)
-- **API:** [`Language`](/api/language)
+  [Defining components for training](/usage/training#config-components)
+- **API:** [`@Language.component`](/api/language#component),
+  [`@Language.factory`](/api/language#factory),
+  [`Language.add_pipe`](/api/language#add_pipe),
+  [`Language.analyze_pipes`](/api/language#analyze_pipes)
 - **Implementation:**
   [`spacy/language.py`](https://github.com/explosion/spaCy/tree/develop/spacy/language.py)
 
@@ -136,13 +241,14 @@ in your config and see validation errors if the argument values don't match.
 
 </Infobox>
 
-### New methods, attributes and commands
+### New methods, attributes and commands {#new-methods}
 
 The following methods, attributes and commands are new in spaCy v3.0.
 
 | Name                                                                                                                          | Description                                                                                                                                                                                      |
 | ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | [`Token.lex`](/api/token#attributes)                                                                                          | Access a token's [`Lexeme`](/api/lexeme).                                                                                                                                                        |
+| [`Token.morph`](/api/token#attributes) [`Token.morph_`](/api/token#attributes)                                                | Access a token's morphological analysis.                                                                                                                                                         |
 | [`Language.select_pipes`](/api/language#select_pipes)                                                                         | Contextmanager for enabling or disabling specific pipeline components for a block.                                                                                                               |
 | [`Language.analyze_pipes`](/api/language#analyze_pipes)                                                                       | [Analyze](/usage/processing-pipelines#analysis) components and their interdependencies.                                                                                                          |
 | [`Language.resume_training`](/api/language#resume_training)                                                                   | Experimental: continue training a pretrained model and initialize "rehearsal" for components that implement a `rehearse` method to prevent catastrophic forgetting.                              |
@@ -153,9 +259,53 @@ The following methods, attributes and commands are new in spaCy v3.0.
 | [`Pipe.score`](/api/pipe#score)                                                                                               | Method on trainable pipeline components that returns a dictionary of evaluation scores.                                                                                                          |
 | [`registry`](/api/top-level#registry)                                                                                         | Function registry to map functions to string names that can be referenced in [configs](/usage/training#config).                                                                                  |
 | [`util.load_meta`](/api/top-level#util.load_meta) [`util.load_config`](/api/top-level#util.load_config)                       | Updated helpers for loading a model's [`meta.json`](/api/data-formats#meta) and [`config.cfg`](/api/data-formats#config).                                                                        |
+| [`util.get_installed_models`](/api/top-level#util.get_installed_models)                                                       | Names of all models installed in the environment.                                                                                                                                                |
 | [`init config`](/api/cli#init-config) [`init fill-config`](/api/cli#init-fill-config) [`debug config`](/api/cli#debug-config) | CLI commands for initializing, auto-filling and debugging [training configs](/usage/training).                                                                                                   |
 | [`project`](/api/cli#project)                                                                                                 | Suite of CLI commands for cloning, running and managing [spaCy projects](/usage/projects).                                                                                                       |
 
+### New and updated documentation {#new-docs}
+
+<Grid cols={2} gutterBottom={false}>
+
+<div>
+
+To help you get started with spaCy v3.0 and the new features, we've added
+several new or rewritten documentation pages, including a new usage guide on
+[embeddings, transformers and transfer learning](/usage/embeddings-transformers),
+a guide on [training models](/usage/training) rewritten from scratch, a page
+explaining the new [spaCy projects](/usage/projects) and updated usage
+documentation on
+[custom pipeline components](/usage/processing-pipelines#custom-components).
+We've also added a bunch of new illustrations and new API reference pages
+documenting spaCy's machine learning [model architectures](/api/architectures)
+and the expected [data formats](/api/data-formats). API pages about
+[pipeline components](/api/#architecture-pipeline) now include more information,
+like the default config and implementation, and we've adopted a more detailed
+format for documenting argument and return types.
+
+</div>
+
+[![Library architecture](../images/architecture.svg)](/api)
+
+</Grid>
+
+<Infobox title="New or reworked documentation" emoji="📖" list>
+
+- **Usage: ** [Embeddings & Transformers](/usage/embeddings-transformers),
+  [Training models](/usage/training), [Projects](/usage/projects),
+  [Custom pipeline components](/usage/processing-pipelines#custom-components),
+  [Custom tokenizers](/usage/linguistic-features#custom-tokenizer)
+- **API Reference: ** [Library architecture](/api),
+  [Model architectures](/api/architectures), [Data formats](/api/data-formats)
+- **New Classes: ** [`Example`](/api/example), [`Tok2Vec`](/api/tok2vec),
+  [`Transformer`](/api/transformer), [`Lemmatizer`](/api/lemmatizer),
+  [`Morphologizer`](/api/morphologizer),
+  [`AttributeRuler`](/api/attributeruler),
+  [`SentenceRecognizer`](/api/sentencerecognizer), [`Pipe`](/api/pipe),
+  [`Corpus`](/api/corpus)
+
+</Infobox>
+
 ## Backwards Incompatibilities {#incompat}
 
 As always, we've tried to keep the breaking changes to a minimum and focus on
@@ -212,15 +362,16 @@ Note that spaCy v3.0 now requires **Python 3.6+**.
 
 ### Removed or renamed API {#incompat-removed}
 
-| Removed                                                | Replacement                                                                               |
-| ------------------------------------------------------ | ----------------------------------------------------------------------------------------- |
-| `Language.disable_pipes`                               | [`Language.select_pipes`](/api/language#select_pipes)                                     |
-| `GoldParse`                                            | [`Example`](/api/example)                                                                 |
-| `GoldCorpus`                                           | [`Corpus`](/api/corpus)                                                                   |
-| `KnowledgeBase.load_bulk` `KnowledgeBase.dump`         | [`KnowledgeBase.from_disk`](/api/kb#from_disk) [`KnowledgeBase.to_disk`](/api/kb#to_disk) |
-| `spacy debug-data`                                     | [`spacy debug data`](/api/cli#debug-data)                                                 |
-| `spacy profile`                                        | [`spacy debug profile`](/api/cli#debug-profile)                                           |
-| `spacy link` `util.set_data_path` `util.get_data_path` | not needed, model symlinks are deprecated                                                 |
+| Removed                                                  | Replacement                                                                               |
+| -------------------------------------------------------- | ----------------------------------------------------------------------------------------- |
+| `Language.disable_pipes`                                 | [`Language.select_pipes`](/api/language#select_pipes)                                     |
+| `GoldParse`                                              | [`Example`](/api/example)                                                                 |
+| `GoldCorpus`                                             | [`Corpus`](/api/corpus)                                                                   |
+| `KnowledgeBase.load_bulk` `KnowledgeBase.dump`           | [`KnowledgeBase.from_disk`](/api/kb#from_disk) [`KnowledgeBase.to_disk`](/api/kb#to_disk) |
+| `spacy init-model`                                       | [`spacy init model`](/api/cli#init-model)                                                 |
+| `spacy debug-data`                                       | [`spacy debug data`](/api/cli#debug-data)                                                 |
+| `spacy profile`                                          | [`spacy debug profile`](/api/cli#debug-profile)                                           |
+| `spacy link`, `util.set_data_path`, `util.get_data_path` | not needed, model symlinks are deprecated                                                 |
 
 The following deprecated methods, attributes and arguments were removed in v3.0.
 Most of them have been **deprecated for a while** and many would previously
@@ -236,7 +387,7 @@ on them.
 | `Language.tagger`, `Language.parser`, `Language.entity`                                                                 | [`Language.get_pipe`](/api/language#get_pipe)                                                                                                              |
 | keyword-arguments like `vocab=False` on `to_disk`, `from_disk`, `to_bytes`, `from_bytes`                                | `exclude=["vocab"]`                                                                                                                                        |
 | `n_threads` argument on [`Tokenizer`](/api/tokenizer), [`Matcher`](/api/matcher), [`PhraseMatcher`](/api/phrasematcher) | `n_process`                                                                                                                                                |
-| `verbose` argument on [`Language.evaluate`]                                                                             | logging                                                                                                                                                    |
+| `verbose` argument on [`Language.evaluate`](/api/language#evaluate)                                                     | logging (`DEBUG`)                                                                                                                                          |
 | `SentenceSegmenter` hook, `SimilarityHook`                                                                              | [user hooks](/usage/processing-pipelines#custom-components-user-hooks), [`Sentencizer`](/api/sentencizer), [`SentenceRecognizer`](/api/sentenceregognizer) |
 
 ## Migrating from v2.x {#migrating}
diff --git a/website/docs/usage/visualizers.md b/website/docs/usage/visualizers.md
index f33340063..4ba0112b6 100644
--- a/website/docs/usage/visualizers.md
+++ b/website/docs/usage/visualizers.md
@@ -121,10 +121,10 @@ import DisplacyEntHtml from 'images/displacy-ent2.html'
 
 The entity visualizer lets you customize the following `options`:
 
-| Argument | Description                                                                                                                |
-| -------- | -------------------------------------------------------------------------------------------------------------------------- |
-| `ents`   | Entity types to highlight (`None` for all types). Defaults to `None`. ~~Optional[List[str]]~~                              | `None` |
-| `colors` | Color overrides. Entity types in uppercase should be mapped to color names or values. Defaults to `{}`. ~~Dict[str, str]~~ |
+| Argument | Description                                                                                                   |
+| -------- | ------------------------------------------------------------------------------------------------------------- |
+| `ents`   | Entity types to highlight (`None` for all types). Defaults to `None`. ~~Optional[List[str]]~~                 | `None` |
+| `colors` | Color overrides. Entity types should be mapped to color names or values. Defaults to `{}`. ~~Dict[str, str]~~ |
 
 If you specify a list of `ents`, only those entity types will be rendered – for
 example, you can choose to display `PERSON` entities. Internally, the visualizer
diff --git a/website/src/components/link.js b/website/src/components/link.js
index 3644479c5..acded7d0d 100644
--- a/website/src/components/link.js
+++ b/website/src/components/link.js
@@ -6,7 +6,7 @@ import classNames from 'classnames'
 
 import Icon from './icon'
 import classes from '../styles/link.module.sass'
-import { isString } from './util'
+import { isString, isImage } from './util'
 
 const internalRegex = /(http(s?)):\/\/(prodi.gy|spacy.io|irl.spacy.io|explosion.ai|course.spacy.io)/gi
 
@@ -39,7 +39,7 @@ export default function Link({
     const dest = to || href
     const external = forceExternal || /(http(s?)):\/\//gi.test(dest)
     const icon = getIcon(dest)
-    const withIcon = !hidden && !hideIcon && !!icon
+    const withIcon = !hidden && !hideIcon && !!icon && !isImage(children)
     const sourceWithText = withIcon && isString(children)
     const linkClassNames = classNames(classes.root, className, {
         [classes.hidden]: hidden,
diff --git a/website/src/components/util.js b/website/src/components/util.js
index 844f2c133..a9c6efcf5 100644
--- a/website/src/components/util.js
+++ b/website/src/components/util.js
@@ -46,6 +46,17 @@ export function isString(obj) {
     return typeof obj === 'string' || obj instanceof String
 }
 
+/**
+ * @param obj - The object to check.
+ * @returns {boolean} – Whether the object is an image
+ */
+export function isImage(obj) {
+    if (!obj || !React.isValidElement(obj)) {
+        return false
+    }
+    return obj.props.name == 'img' || obj.props.className == 'gatsby-resp-image-wrapper'
+}
+
 /**
  * @param obj - The object to check.
  * @returns {boolean} - Whether the object is empty.
diff --git a/website/src/styles/layout.sass b/website/src/styles/layout.sass
index 775523190..b71eccd80 100644
--- a/website/src/styles/layout.sass
+++ b/website/src/styles/layout.sass
@@ -363,7 +363,7 @@ body [id]:target
         color: var(--color-red-medium)
         background: var(--color-red-transparent)
 
-    &.italic
+    &.italic, &.comment
         font-style: italic
 
 
@@ -384,9 +384,11 @@ body [id]:target
 // Settings for ini syntax (config files)
 [class*="language-ini"]
     color: var(--syntax-comment)
+    font-style: italic !important
 
     .token
         color: var(--color-subtle)
+        font-style: normal !important
 
 
 .gatsby-highlight-code-line
@@ -424,6 +426,7 @@ body [id]:target
 
     .cm-comment
         color: var(--syntax-comment)
+        font-style: italic
 
     .cm-keyword
         color: var(--syntax-keyword)
diff --git a/website/src/widgets/quickstart-training-generator.js b/website/src/widgets/quickstart-training-generator.js
index f70aedc8c..e69de29bb 100644
--- a/website/src/widgets/quickstart-training-generator.js
+++ b/website/src/widgets/quickstart-training-generator.js
@@ -1,12 +0,0 @@
-// This file was auto-generated by jinja_to_js.py based on quickstart_training.jinja
-import jinjaToJS from "jinja-to-js";export default function templateQuickstartTraining(ctx) {
-    var __result = "";
-    var __tmp;
-    var __runtime = jinjaToJS.runtime;
-    var __filters = jinjaToJS.filters;
-    var __globals = jinjaToJS.globals;
-    var context = jinjaToJS.createContext(ctx);
-    var use_transformer = context.transformer_data && context.hardware!=="cpu";var transformer = (use_transformer ? context.transformer_data[context.optimize] : {});__result += "[paths]\ntrain = \"\"\ndev = \"\"\n\n[system]\nuse_pytorch_for_gpu_memory = ";__result += "" + __runtime.escape((__tmp = ((use_transformer ? "true" : "false"))) == null ? "" : __tmp);__result += "\n\n[nlp]\nlang = \"";__result += "" + __runtime.escape((__tmp = (context.lang)) == null ? "" : __tmp);__result += "\"";var full_pipeline = [(use_transformer ? "transformer" : "tok2vec")].concat(context.components);__result += "\npipeline = ";__result += "" + ((__tmp = (JSON.stringify(full_pipeline).split("'").join("\""))) == null ? "" : __tmp);__result += "\ntokenizer = {\"@tokenizers\": \"spacy.Tokenizer.v1\"}\n\n[components]\n\n";if(__runtime.boolean(use_transformer)){__result += "[components.transformer]\nfactory = \"transformer\"\n\n[components.transformer.model]\n@architectures = \"spacy-transformers.TransformerModel.v1\"\nname = \"";__result += "" + __runtime.escape((__tmp = (transformer["name"])) == null ? "" : __tmp);__result += "\"\ntokenizer_config = {\"use_fast\": true}\n\n[components.transformer.model.get_spans]\n@span_getters = \"strided_spans.v1\"\nwindow = 128\nstride = 96\n\n";if(context.components.includes("tagger")){__result += "\n[components.tagger]\nfactory = \"tagger\"\n\n[components.tagger.model]\n@architectures = \"spacy.Tagger.v1\"\nnO = null\n\n[components.tagger.model.tok2vec]\n@architectures = \"spacy-transformers.Tok2VecListener.v1\"\ngrad_factor = 1.0\n\n[components.tagger.model.tok2vec.pooling]\n@layers = \"reduce_mean.v1\"";}__result += "\n\n";if(context.components.includes("parser")){__result += "[components.parser]\nfactory = \"parser\"\n\n[components.parser.model]\n@architectures = \"spacy.TransitionBasedParser.v1\"\nnr_feature_tokens = 8\nhidden_width = 128\nmaxout_pieces = 3\nuse_upper = false\nnO = null\n\n[components.parser.model.tok2vec]\n@architectures = \"spacy-transformers.Tok2VecListener.v1\"\ngrad_factor = 1.0\n\n[components.parser.model.tok2vec.pooling]\n@layers = \"reduce_mean.v1\"";}__result += "\n\n";if(context.components.includes("ner")){__result += "[components.ner]\nfactory = \"ner\"\n\n[components.ner.model]\n@architectures = \"spacy.TransitionBasedParser.v1\"\nnr_feature_tokens = 3\nhidden_width = 64\nmaxout_pieces = 2\nuse_upper = false\nnO = null\n\n[components.ner.model.tok2vec]\n@architectures = \"spacy-transformers.Tok2VecListener.v1\"\ngrad_factor = 1.0\n\n[components.ner.model.tok2vec.pooling]\n@layers = \"reduce_mean.v1\"\n";}__result += "\n";} else {if(context.hardware==="gpu"){__result += "# There are no recommended transformer weights available for language '";__result += "" + __runtime.escape((__tmp = (context.lang)) == null ? "" : __tmp);__result += "'\n# yet, so the pipeline described here is not transformer-based.";}__result += "\n\n[components.tok2vec]\nfactory = \"tok2vec\"\n\n[components.tok2vec.model]\n@architectures = \"spacy.Tok2Vec.v1\"\n\n[components.tok2vec.model.embed]\n@architectures = \"spacy.MultiHashEmbed.v1\"\nwidth = ${components.tok2vec.model.encode.width}\nrows = ";__result += "" + __runtime.escape((__tmp = ((context.optimize==="efficiency" ? 2000 : 7000))) == null ? "" : __tmp);__result += "\nalso_embed_subwords = ";__result += "" + __runtime.escape((__tmp = ((context.has_letters ? true : false))) == null ? "" : __tmp);__result += "\nalso_use_static_vectors = ";__result += "" + __runtime.escape((__tmp = ((context.optimize==="accuracy" ? true : false))) == null ? "" : __tmp);__result += "\n\n[components.tok2vec.model.encode]\n@architectures = \"spacy.MaxoutWindowEncoder.v1\"\nwidth = ";__result += "" + __runtime.escape((__tmp = ((context.optimize==="efficiency" ? 96 : 256))) == null ? "" : __tmp);__result += "\ndepth = ";__result += "" + __runtime.escape((__tmp = ((context.optimize==="efficiency" ? 4 : 8))) == null ? "" : __tmp);__result += "\nwindow_size = 1\nmaxout_pieces = 3\n\n";if(context.components.includes("tagger")){__result += "\n[components.tagger]\nfactory = \"tagger\"\n\n[components.tagger.model]\n@architectures = \"spacy.Tagger.v1\"\nnO = null\n\n[components.tagger.model.tok2vec]\n@architectures = \"spacy.Tok2VecListener.v1\"\nwidth = ${components.tok2vec.model.encode.width}";}__result += "\n\n";if(context.components.includes("parser")){__result += "[components.parser]\nfactory = \"parser\"\n\n[components.parser.model]\n@architectures = \"spacy.TransitionBasedParser.v1\"\nnr_feature_tokens = 8\nhidden_width = 128\nmaxout_pieces = 3\nuse_upper = true\nnO = null\n\n[components.parser.model.tok2vec]\n@architectures = \"spacy.Tok2VecListener.v1\"\nwidth = ${components.tok2vec.model.encode.width}";}__result += "\n\n";if(context.components.includes("ner")){__result += "\n[components.ner]\nfactory = \"ner\"\n\n[components.ner.model]\n@architectures = \"spacy.TransitionBasedParser.v1\"\nnr_feature_tokens = 6\nhidden_width = 64\nmaxout_pieces = 2\nuse_upper = true\nnO = null\n\n[components.ner.model.tok2vec]\n@architectures = \"spacy.Tok2VecListener.v1\"\nwidth = ${components.tok2vec.model.encode.width}\n";}__result += "\n";}__result += "\n\n";__runtime.each(context.components,function(pipe){var __$0 = context.pipe;context.pipe = pipe;__result += "\n";if(!["tagger","parser","ner"].includes(pipe)){__result += "\n";__result += "\n[components.";__result += "" + __runtime.escape((__tmp = (pipe)) == null ? "" : __tmp);__result += "]\nfactory = \"";__result += "" + __runtime.escape((__tmp = (pipe)) == null ? "" : __tmp);__result += "\"\n";}__result += "\n";context.pipe = __$0;});__result += "\n\n[training]\n";if(__runtime.boolean(use_transformer) || context.optimize==="efficiency" || !__runtime.boolean(context.word_vectors)){__result += "vectors = null\n";} else {__result += "vectors = \"";__result += "" + __runtime.escape((__tmp = (context.word_vectors)) == null ? "" : __tmp);__result += "\"\n";}if(__runtime.boolean(use_transformer)){__result += "accumulate_gradient = ";__result += "" + __runtime.escape((__tmp = (transformer["size_factor"])) == null ? "" : __tmp);__result += "\n";}__result += "\n\n[training.optimizer]\n@optimizers = \"Adam.v1\"\n\n[training.optimizer.learn_rate]\n@schedules = \"warmup_linear.v1\"\nwarmup_steps = 250\ntotal_steps = 20000\ninitial_rate = 5e-5\n\n[training.train_corpus]\n@readers = \"spacy.Corpus.v1\"\npath = ${paths.train}\nmax_length = ";__result += "" + __runtime.escape((__tmp = ((context.hardware==="gpu" ? 500 : 0))) == null ? "" : __tmp);__result += "\n\n[training.dev_corpus]\n@readers = \"spacy.Corpus.v1\"\npath = ${paths.dev}\nmax_length = 0\n\n";if(__runtime.boolean(use_transformer)){__result += "\n[training.batcher]\n@batchers = \"batch_by_padded.v1\"\ndiscard_oversize = true\nsize = 2000\nbuffer = 256";} else {__result += "\n[training.batcher]\n@batchers = \"batch_by_words.v1\"\ndiscard_oversize = false\ntolerance = 0.2\n\n[training.batcher.size]\n@schedules = \"compounding.v1\"\nstart = 100\nstop = 1000\ncompound = 1.001\n";}__result += "\n\n[training.score_weights]";if(context.components.includes("tagger")){__result += "\ntag_acc = ";__result += "" + __runtime.escape((__tmp = (Math.round((1.0 / __filters.size(context.components)+ Number.EPSILON) * 10**2) / 10**2)) == null ? "" : __tmp);}if(context.components.includes("parser")){__result += "\ndep_uas = 0.0\ndep_las = ";__result += "" + __runtime.escape((__tmp = (Math.round((1.0 / __filters.size(context.components)+ Number.EPSILON) * 10**2) / 10**2)) == null ? "" : __tmp);__result += "\nsents_f = 0.0";}if(context.components.includes("ner")){__result += "\nents_f = ";__result += "" + __runtime.escape((__tmp = (Math.round((1.0 / __filters.size(context.components)+ Number.EPSILON) * 10**2) / 10**2)) == null ? "" : __tmp);__result += "\nents_p = 0.0\nents_r = 0.0";}
-    return __result;
-}
-export const DATA = {"en":{"word_vectors":"en_vectors_web_lg","transformer":{"efficiency":{"name":"roberta-base","size_factor":3},"accuracy":{"name":"roberta-base","size_factor":3}}},"de":{"word_vectors":null,"transformer":{"efficiency":{"name":"bert-base-german-cased","size_factor":3},"accuracy":{"name":"bert-base-german-cased","size_factor":3}}},"fr":{"word_vectors":null,"transformer":{"efficiency":{"name":"camembert-base","size_factor":3},"accuracy":{"name":"camembert-base","size_factor":3}}},"es":{"word_vectors":null,"transformer":{"efficiency":{"name":"mrm8488/RuPERTa-base","size_factor":3},"accuracy":{"name":"mrm8488/RuPERTa-base","size_factor":3}}},"sv":{"word_vectors":null,"transformer":{"efficiency":{"name":"KB/bert-base-swedish-cased","size_factor":3},"accuracy":{"name":"KB/bert-base-swedish-cased","size_factor":3}}},"fi":{"word_vectors":null,"transformer":{"efficiency":{"name":"TurkuNLP/bert-base-finnish-cased-v1","size_factor":3},"accuracy":{"name":"TurkuNLP/bert-base-finnish-cased-v1","size_factor":3}}},"el":{"word_vectors":null,"transformer":{"efficiency":{"name":"nlpaueb/bert-base-greek-uncased-v1","size_factor":3},"accuracy":{"name":"nlpaueb/bert-base-greek-uncased-v1","size_factor":3}}},"tr":{"word_vectors":null,"transformer":{"efficiency":{"name":"dbmdz/bert-base-turkish-cased","size_factor":3},"accuracy":{"name":"dbmdz/bert-base-turkish-cased","size_factor":3}}},"zh":{"word_vectors":null,"transformer":{"efficiency":{"name":"bert-base-chinese","size_factor":3},"accuracy":{"name":"bert-base-chinese","size_factor":3}},"has_letters":false},"ar":{"word_vectors":null,"transformer":{"efficiency":{"name":"asafaya/bert-base-arabic","size_factor":3},"accuracy":{"name":"asafaya/bert-base-arabic","size_factor":3}}},"pl":{"word_vectors":null,"transformer":{"efficiency":{"name":"dkleczek/bert-base-polish-cased-v1","size_factor":3},"accuracy":{"name":"dkleczek/bert-base-polish-cased-v1","size_factor":3}}}}
\ No newline at end of file