From f2caa01bb31f8c578e3d53fed727571217721587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Sun, 5 Mar 2023 18:03:57 +0100 Subject: [PATCH] Document gradient clipping in Fabric (#16943) --- docs/source-fabric/api/fabric_methods.rst | 35 +++++++++++++++++++++++ src/lightning/fabric/fabric.py | 13 +++++++++ 2 files changed, 48 insertions(+) diff --git a/docs/source-fabric/api/fabric_methods.rst b/docs/source-fabric/api/fabric_methods.rst index 650e525be4..b26c834f04 100644 --- a/docs/source-fabric/api/fabric_methods.rst +++ b/docs/source-fabric/api/fabric_methods.rst @@ -61,6 +61,41 @@ This replaces any occurrences of ``loss.backward()`` and makes your code acceler fabric.backward(loss) +clip_gradients +============== + +Clip the gradients of the model to a given max value or max norm. +This is useful if your model experiences *exploding gradients* during training. + +.. code-block:: python + + # Clip gradients to a max value of +/- 0.5 + fabric.clip_gradients(model, clip_val=0.5) + + # Clip gradients such that their total norm is no bigger than 2.0 + fabric.clip_gradients(model, clip_norm=2.0) + + # By default, clipping by norm uses the 2-norm + fabric.clip_gradients(model, clip_norm=2.0, norm_type=2) + + # You can also choose the infinity-norm, which clips the largest + # element among all + fabric.clip_gradients(model, clip_norm=2.0, norm_type="inf") + +You can also reduce the gradient clipping to just one layer or to the parameters a particular optimizer is referencing (if using multiple optimizers): + +.. code-block:: python + + # Clip gradients on a specific layer of your model + fabric.clip_gradients(model.fc3, clip_val=1.0) + + # Clip gradients for a specific optimizer if using multiple optimizers + fabric.clip_gradients(model, optimizer1, clip_val=1.0) + +The :meth:`~lightning.fabric.fabric.Fabric.clip_gradients` method is agnostic to the precision and strategy being used. +Note: Gradient clipping with FSDP is not yet fully supported. + + to_device ========= diff --git a/src/lightning/fabric/fabric.py b/src/lightning/fabric/fabric.py index c6b9423b40..5819769e80 100644 --- a/src/lightning/fabric/fabric.py +++ b/src/lightning/fabric/fabric.py @@ -372,6 +372,19 @@ class Fabric: norm_type: Union[float, int] = 2.0, error_if_nonfinite: bool = True, ) -> Optional[torch.Tensor]: + """Clip the gradients of the model to a given max value or max norm. + + Args: + module: The module whose parameters should be clipped. This can also be just one submodule of your model. + optimizer: Optional optimizer. If passed, clipping will be applied to only the parameters that the + optimizer is referencing. + clip_val: If passed, gradients will be clipped to this value. + max_norm: If passed, clips the gradients in such a way that the p-norm of the resulting parameters is + no larger than the given value. + norm_type: The type of norm if `max_norm` was passed. Can be ``'inf'`` for infinity norm. + Default is the 2-norm. + error_if_nonfinite: An error is raised if the total norm of the gradients is NaN or infinite. + """ if clip_val is not None and max_norm is not None: raise ValueError( "Only one of `clip_val` or `max_norm` can be set as this specifies the underlying clipping algorithm!"