diff --git a/spacy/errors.py b/spacy/errors.py
index 453e98b59..7cf9e54e4 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -501,6 +501,9 @@ class Errors:
     E202 = ("Unsupported alignment mode '{mode}'. Supported modes: {modes}.")
 
     # New errors added in v3.x
+    E872 = ("Unable to copy tokenizer from base model due to different "
+            'tokenizer settings: current tokenizer config "{curr_config}" '
+            'vs. base model "{base_config}"')
     E873 = ("Unable to merge a span from doc.spans with key '{key}' and text "
             "'{text}'. This is likely a bug in spaCy, so feel free to open an "
             "issue: https://github.com/explosion/spaCy/issues")
diff --git a/spacy/training/__init__.py b/spacy/training/__init__.py
index 5111b80dc..055f30f42 100644
--- a/spacy/training/__init__.py
+++ b/spacy/training/__init__.py
@@ -8,3 +8,4 @@ from .iob_utils import biluo_tags_to_spans, tags_to_entities  # noqa: F401
 from .gold_io import docs_to_json, read_json_file  # noqa: F401
 from .batchers import minibatch_by_padded_size, minibatch_by_words  # noqa: F401
 from .loggers import console_logger, wandb_logger  # noqa: F401
+from .callbacks import create_copy_from_base_model  # noqa: F401
diff --git a/spacy/training/callbacks.py b/spacy/training/callbacks.py
new file mode 100644
index 000000000..2a21be98c
--- /dev/null
+++ b/spacy/training/callbacks.py
@@ -0,0 +1,32 @@
+from typing import Optional
+from ..errors import Errors
+from ..language import Language
+from ..util import load_model, registry, logger
+
+
+@registry.callbacks("spacy.copy_from_base_model.v1")
+def create_copy_from_base_model(
+    tokenizer: Optional[str] = None,
+    vocab: Optional[str] = None,
+) -> Language:
+    def copy_from_base_model(nlp):
+        if tokenizer:
+            logger.info(f"Copying tokenizer from: {tokenizer}")
+            base_nlp = load_model(tokenizer)
+            if nlp.config["nlp"]["tokenizer"] == base_nlp.config["nlp"]["tokenizer"]:
+                nlp.tokenizer.from_bytes(base_nlp.tokenizer.to_bytes(exclude=["vocab"]))
+            else:
+                raise ValueError(
+                    Errors.E872.format(
+                        curr_config=nlp.config["nlp"]["tokenizer"],
+                        base_config=base_nlp.config["nlp"]["tokenizer"],
+                    )
+                )
+        if vocab:
+            logger.info(f"Copying vocab from: {vocab}")
+            # only reload if the vocab is from a different model
+            if tokenizer != vocab:
+                base_nlp = load_model(vocab)
+            nlp.vocab.from_bytes(base_nlp.vocab.to_bytes())
+
+    return copy_from_base_model
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 38bc40b11..cfaa75bff 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -8,6 +8,7 @@ menu:
   - ['Readers', 'readers']
   - ['Batchers', 'batchers']
   - ['Augmenters', 'augmenters']
+  - ['Callbacks', 'callbacks']
   - ['Training & Alignment', 'gold']
   - ['Utility Functions', 'util']
 ---
@@ -785,6 +786,35 @@ useful for making the model less sensitive to capitalization.
 | `level`     | The percentage of texts that will be augmented. ~~float~~                                                                                                                    |
 | **CREATES** | A function that takes the current `nlp` object and an [`Example`](/api/example) and yields augmented `Example` objects. ~~Callable[[Language, Example], Iterator[Example]]~~ |
 
+## Callbacks {#callbacks source="spacy/training/callbacks.py" new="3"}
+
+The config supports [callbacks](/usage/training#custom-code-nlp-callbacks) at
+several points in the lifecycle that can be used modify the `nlp` object.
+
+### spacy.copy_from_base_model.v1 {#copy_from_base_model tag="registered function"}
+
+> #### Example config
+>
+> ```ini
+> [initialize.before_init]
+> @callbacks = "spacy.copy_from_base_model.v1"
+> tokenizer = "en_core_sci_md"
+> vocab = "en_core_sci_md"
+> ```
+
+Copy the tokenizer and/or vocab from the specified models. It's similar to the
+v2 [base model](https://v2.spacy.io/api/cli#train) option and useful in
+combination with
+[sourced components](/usage/processing-pipelines#sourced-components) when
+fine-tuning an existing pipeline. The vocab includes the lookups and the vectors
+from the specified model. Intended for use in `[initialize.before_init]`.
+
+| Name        | Description                                                                                                             |
+| ----------- | ----------------------------------------------------------------------------------------------------------------------- |
+| `tokenizer` | The pipeline to copy the tokenizer from. Defaults to `None`. ~~Optional[str]~~                                          |
+| `vocab`     | The pipeline to copy the vocab from. The vocab includes the lookups and vectors. Defaults to `None`. ~~Optional[str]~~  |
+| **CREATES** | A function that takes the current `nlp` object and modifies its `tokenizer` and `vocab`. ~~Callable[[Language], None]~~ |
+
 ## Training data and alignment {#gold source="spacy/training"}
 
 ### training.offsets_to_biluo_tags {#offsets_to_biluo_tags tag="function"}