diff --git a/spacy/ml/__init__.py b/spacy/ml/__init__.py
index c382d915b..d44c0f850 100644
--- a/spacy/ml/__init__.py
+++ b/spacy/ml/__init__.py
@@ -1 +1,2 @@
+from .callbacks import create_models_with_nvtx_range # noqa: F401
 from .models import *  # noqa: F401, F403
diff --git a/spacy/ml/callbacks.py b/spacy/ml/callbacks.py
new file mode 100644
index 000000000..c09f911b7
--- /dev/null
+++ b/spacy/ml/callbacks.py
@@ -0,0 +1,37 @@
+from functools import partial
+from typing import Type, Callable, TYPE_CHECKING
+
+from thinc.layers import with_nvtx_range
+from thinc.model import Model, wrap_model_recursive
+
+from ..util import registry
+
+if TYPE_CHECKING:
+    # This lets us add type hints for mypy etc. without causing circular imports
+    from ..language import Language  # noqa: F401
+
+
+@registry.callbacks("spacy.models_with_nvtx_range.v1")
+def create_models_with_nvtx_range(
+    forward_color: int = -1, backprop_color: int = -1
+) -> Callable[["Language"], "Language"]:
+    def models_with_nvtx_range(nlp):
+        pipes = [
+            pipe
+            for _, pipe in nlp.components
+            if hasattr(pipe, "is_trainable") and pipe.is_trainable
+        ]
+
+        # We need process all models jointly to avoid wrapping callbacks twice.
+        models = Model(
+            "wrap_with_nvtx_range",
+            forward=lambda model, X, is_train: ...,
+            layers=[pipe.model for pipe in pipes],
+        )
+
+        for node in models.walk():
+            with_nvtx_range(node, forward_color=forward_color, backprop_color=backprop_color)
+
+        return nlp
+
+    return models_with_nvtx_range
diff --git a/website/docs/api/top-level.md b/website/docs/api/top-level.md
index 3cf81ae93..f6910bd5b 100644
--- a/website/docs/api/top-level.md
+++ b/website/docs/api/top-level.md
@@ -817,6 +817,26 @@ from the specified model. Intended for use in `[initialize.before_init]`.
 | `vocab`     | The pipeline to copy the vocab from. The vocab includes the lookups and vectors. Defaults to `None`. ~~Optional[str]~~  |
 | **CREATES** | A function that takes the current `nlp` object and modifies its `tokenizer` and `vocab`. ~~Callable[[Language], None]~~ |
 
+### spacy.models_with_nvtx_range.v1 {#models_with_nvtx_range tag="registered function"}
+
+> #### Example config
+>
+> ```ini
+> [nlp]
+> after_pipeline_creation = {"@callbacks":"spacy.models_with_nvtx_range.v1"}
+> ```
+
+Recursively wrap the models in each pipe using [NVTX](https://nvidia.github.io/NVTX/)
+range markers. These markers aid in GPU profiling by attributing specific operations
+to a ~~Model~~'s forward or backprop passes.
+
+| Name             | Description                                                                                                                  |
+|------------------|------------------------------------------------------------------------------------------------------------------------------|
+| `forward_color`  | Color identifier for forward passes. Defaults to `-1`. ~~int~~                                                               |
+| `backprop_color` | Color identifier for backpropagation passes. Defaults to `-1`. ~~int~~                                                       |
+| **CREATES**      | A function that takes the current `nlp` and wraps forward/backprop passes in NVTX ranges. ~~Callable[[Language], Language]~~ |
+
+
 ## Training data and alignment {#gold source="spacy/training"}
 
 ### training.offsets_to_biluo_tags {#offsets_to_biluo_tags tag="function"}