From 9ba76ce60c62f77ea729b3111d7eb79c16fdb7be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 7 May 2021 16:10:24 +0200 Subject: [PATCH] Unify `configure_optimizers` docs (#7399) --- docs/source/common/optimizers.rst | 119 ++++------------------------ pytorch_lightning/core/lightning.py | 96 +++++++++++++++++----- 2 files changed, 91 insertions(+), 124 deletions(-) diff --git a/docs/source/common/optimizers.rst b/docs/source/common/optimizers.rst index 9b12aff2c1..ca10dc97f1 100644 --- a/docs/source/common/optimizers.rst +++ b/docs/source/common/optimizers.rst @@ -166,17 +166,25 @@ Here is an example training a simple GAN with multiple optimizers. ----- +Learning rate scheduling +------------------------ +Every optimizer you use can be paired with any +`Learning Rate Scheduler `_. Please see the +documentation of :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_optimizers` for all the available options + +----- + Learning rate scheduling [manual] --------------------------------- You can call ``lr_scheduler.step()`` at arbitrary intervals. -Use ``self.lr_schedulers()`` in your :class:`~pytorch_lightning.LightningModule` to access any learning rate schedulers -defined in your :meth:`~pytorch_lightning.LightningModule.configure_optimizers`. +Use ``self.lr_schedulers()`` in your :class:`~pytorch_lightning.core.lightning.LightningModule` to access any learning rate schedulers +defined in your :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_optimizers`. .. warning:: * Before 1.3, Lightning automatically called ``lr_scheduler.step()`` in both automatic and manual optimization. From 1.3, ``lr_scheduler.step()`` is now for the user to call at arbitrary intervals. - * Note that the lr_dict keys, such as ``"step"`` and ``""interval"``, will be ignored even if they are provided in - your ``configure_optimizers()`` during manual optimization. + * Note that the ``lr_dict`` keys, such as ``"step"`` and ``""interval"``, will be ignored even if they are provided in + your :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_optimizers` during manual optimization. Here is an example calling ``lr_scheduler.step()`` every step. @@ -341,7 +349,7 @@ Here is an example using a closure function. Access your own optimizer [manual] ---------------------------------- ``optimizer`` is a :class:`~pytorch_lightning.core.optimizer.LightningOptimizer` object wrapping your own optimizer -configured in your :meth:`~pytorch_lightning.LightningModule.configure_optimizers`. You can access your own optimizer +configured in your :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_optimizers`. You can access your own optimizer with ``optimizer.optimizer``. However, if you use your own optimizer to perform a step, Lightning won't be able to support accelerators and precision for you. @@ -402,99 +410,6 @@ In the case of multiple optimizers, Lightning does the following: ----- -Learning rate scheduling ------------------------- -Every optimizer you use can be paired with any -`Learning Rate Scheduler `_. In the basic -use-case, the scheduler(s) should be returned as the second output from the -:meth:`~pytorch_lightning.LightningModule.configure_optimizers` method: - -.. testcode:: python - - # no LR scheduler - def configure_optimizers(self): - return Adam(...) - - # Adam + LR scheduler - def configure_optimizers(self): - optimizer = Adam(...) - scheduler = LambdaLR(optimizer, ...) - return [optimizer], [scheduler] - - # Two optimizers each with a scheduler - def configure_optimizers(self): - optimizer1 = Adam(...) - optimizer2 = SGD(...) - scheduler1 = LambdaLR(optimizer1, ...) - scheduler2 = LambdaLR(optimizer2, ...) - return [optimizer1, optimizer2], [scheduler1, scheduler2] - -When there are schedulers in which the ``.step()`` method is conditioned on a metric value, such as the -:class:`~torch.optim.lr_scheduler.ReduceLROnPlateau` scheduler, Lightning requires that the output from -:meth:`~pytorch_lightning.LightningModule.configure_optimizers` should be dicts, one for each optimizer, with the -keyword ``"monitor"`` set to metric that the scheduler should be conditioned on. - -.. testcode:: - - # The ReduceLROnPlateau scheduler requires a monitor - def configure_optimizers(self): - optimizer = Adam(...) - return { - 'optimizer': optimizer, - 'lr_scheduler': ReduceLROnPlateau(optimizer, ...), - 'monitor': 'metric_to_track', - } - - # In the case of two optimizers, only one using the ReduceLROnPlateau scheduler - def configure_optimizers(self): - optimizer1 = Adam(...) - optimizer2 = SGD(...) - scheduler1 = ReduceLROnPlateau(optimizer1, ...) - scheduler2 = LambdaLR(optimizer2, ...) - return ( - {'optimizer': optimizer1, 'lr_scheduler': scheduler1, 'monitor': 'metric_to_track'}, - {'optimizer': optimizer2, 'lr_scheduler': scheduler2}, - ) - -.. note:: - Metrics can be made available to monitor by simply logging it using ``self.log('metric_to_track', metric_val)`` in - your :class:`~pytorch_lightning.LightningModule`. - -By default, all schedulers will be called after each epoch ends. To change this behaviour, a scheduler configuration -should be returned as a dict which can contain the following keywords: - -* ``"scheduler"`` (required): the actual scheduler object -* ``"monitor"`` (optional): metric to condition -* ``"interval"`` (optional): either ``"epoch"`` (default) for stepping after each epoch ends or ``"step"`` for stepping - after each optimization step -* ``"frequency"`` (optional): how many epochs/steps should pass between calls to ``scheduler.step()``. Default is 1, - corresponding to updating the learning rate after every epoch/step. -* ``"strict"`` (optional): if set to ``True``, will enforce that value specified in ``"monitor"`` is available while - trying to call ``scheduler.step()``, and stop training if not found. If ``False``, it will only give a warning and - continue training without calling the scheduler. -* ``"name"`` (optional): if using the :class:`~pytorch_lightning.callbacks.LearningRateMonitor` callback to monitor the - learning rate progress, this keyword can be used to specify a name the learning rate should be logged as. - -.. testcode:: python - - # Same as the above example with additional params passed to the first scheduler - # In this case the ReduceLROnPlateau will step after every 10 processed batches - def configure_optimizers(self): - optimizers = [Adam(...), SGD(...)] - schedulers = [ - { - 'scheduler': ReduceLROnPlateau(optimizers[0], ...), - 'monitor': 'metric_to_track', - 'interval': 'step', - 'frequency': 10, - 'strict': True, - }, - LambdaLR(optimizers[1], ...) - ] - return optimizers, schedulers - ------ - Use multiple optimizers (like GANs) ----------------------------------- To use multiple optimizers (optionally with learning rate schedulers), return two or more optimizers from @@ -540,7 +455,7 @@ Under the hood, Lightning will call each optimizer sequentially: Step optimizers at arbitrary intervals -------------------------------------- To do more interesting things with your optimizers such as learning rate warm-up or odd scheduling, -override the :meth:`~pytorch_lightning.LightningModule.optimizer_step` function. +override the :meth:`~pytorch_lightning.core.lightning.LightningModule.optimizer_step` function. .. warning:: If you are overriding this method, make sure that you pass the ``optimizer_closure`` parameter to @@ -591,9 +506,9 @@ Here we add a learning rate warm-up. Access your own optimizer ------------------------- ``optimizer`` is a :class:`~pytorch_lightning.core.optimizer.LightningOptimizer` object wrapping your own optimizer -configured in your :meth:`~pytorch_lightning.LightningModule.configure_optimizers`. You can access your own optimizer -with ``optimizer.optimizer``. However, if you use your own optimizer to perform a step, Lightning won't be able to -support accelerators and precision for you. +configured in your :meth:`~pytorch_lightning.core.lightning.LightningModule.configure_optimizers`. +You can access your own optimizer with ``optimizer.optimizer``. However, if you use your own optimizer +to perform a step, Lightning won't be able to support accelerators and precision for you. .. testcode:: python diff --git a/pytorch_lightning/core/lightning.py b/pytorch_lightning/core/lightning.py index 109b8fd810..39eb70255d 100644 --- a/pytorch_lightning/core/lightning.py +++ b/pytorch_lightning/core/lightning.py @@ -1116,30 +1116,77 @@ class LightningModule( - **Single optimizer**. - **List or Tuple** of optimizers. - - **Two lists** - The first list has multiple optimizers, and the second has multiple LR schedulers (or - multiple lr_dict). + - **Two lists** - The first list has multiple optimizers, and the second has multiple LR schedulers + (or multiple ``lr_dict``). - **Dictionary**, with an ``"optimizer"`` key, and (optionally) a ``"lr_scheduler"`` - key whose value is a single LR scheduler or lr_dict. + key whose value is a single LR scheduler or ``lr_dict``. - **Tuple of dictionaries** as described above, with an optional ``"frequency"`` key. - **None** - Fit will run without any optimizer. - Note: - The lr_dict is a dictionary which contains the scheduler and its associated configuration. - The default configuration is shown below. + The ``lr_dict`` is a dictionary which contains the scheduler and its associated configuration. + The default configuration is shown below. - .. code-block:: python + .. code-block:: python - lr_dict = { - 'scheduler': lr_scheduler, # The LR scheduler instance (required) - # The unit of the scheduler's step size, could also be 'step' - 'interval': 'epoch', - 'frequency': 1, # The frequency of the scheduler - 'monitor': 'val_loss', # Metric for `ReduceLROnPlateau` to monitor - 'strict': True, # Whether to crash the training if `monitor` is not found - 'name': None, # Custom name for `LearningRateMonitor` to use + lr_dict = { + # REQUIRED: The scheduler instance + 'scheduler': lr_scheduler, + # The unit of the scheduler's step size, could also be 'step'. + # 'epoch' updates the scheduler on epoch end whereas 'step' + # updates it after a optimizer update. + 'interval': 'epoch', + # How many epochs/steps should pass between calls to + # `scheduler.step()`. 1 corresponds to updating the learning + # rate after every epoch/step. + 'frequency': 1, + # Metric to to monitor for schedulers like `ReduceLROnPlateau` + 'monitor': 'val_loss', + # If set to `True`, will enforce that the value specified 'monitor' + # is available when the scheduler is updated, thus stopping + # training if not found. If set to `False`, it will only produce a warning + 'strict': True, + # If using the `LearningRateMonitor` callback to monitor the + # learning rate progress, this keyword can be used to specify + # a custom logged name + 'name': None, + } + + When there are schedulers in which the ``.step()`` method is conditioned on a value, such as the + :class:`torch.optim.lr_scheduler.ReduceLROnPlateau` scheduler, Lightning requires that the ``lr_dict`` + contains the keyword ``"monitor"`` set to the metric name that the scheduler should be conditioned on. + + .. testcode:: + + # The ReduceLROnPlateau scheduler requires a monitor + def configure_optimizers(self): + optimizer = Adam(...) + return { + 'optimizer': optimizer, + 'lr_scheduler': { + 'scheduler': ReduceLROnPlateau(optimizer, ...), + 'monitor': 'metric_to_track', + } } - Only the ``"scheduler"`` key is required, the rest will be set to the defaults above. + # In the case of two optimizers, only one using the ReduceLROnPlateau scheduler + def configure_optimizers(self): + optimizer1 = Adam(...) + optimizer2 = SGD(...) + scheduler1 = ReduceLROnPlateau(optimizer1, ...) + scheduler2 = LambdaLR(optimizer2, ...) + return ( + { + 'optimizer': optimizer1, + 'lr_scheduler': { + 'scheduler': scheduler1, + 'monitor': 'metric_to_track', + } + }, + {'optimizer': optimizer2, 'lr_scheduler': scheduler2} + ) + + Metrics can be made available to monitor by simply logging it using + ``self.log('metric_to_track', metric_val)`` in your :class:`~pytorch_lightning.core.lightning.LightningModule`. Note: The ``frequency`` value specified in a dict along with the ``optimizer`` key is an int corresponding @@ -1147,9 +1194,11 @@ class LightningModule( It should be given to none or to all of the optimizers. There is a difference between passing multiple optimizers in a list, and passing multiple optimizers in dictionaries with a frequency of 1: - In the former case, all optimizers will operate on the given batch in each optimization step. - In the latter, only one optimizer will operate on the given batch at every step. - This is different from the ``frequency`` value specified in the lr_dict mentioned below. + + - In the former case, all optimizers will operate on the given batch in each optimization step. + - In the latter, only one optimizer will operate on the given batch at every step. + + This is different from the ``frequency`` value specified in the ``lr_dict`` mentioned above. .. code-block:: python @@ -1168,7 +1217,7 @@ class LightningModule( Examples:: - # most cases + # most cases. no learning rate scheduler def configure_optimizers(self): return Adam(self.parameters(), lr=1e-3) @@ -1186,11 +1235,14 @@ class LightningModule( return [gen_opt, dis_opt], [dis_sch] # example with step-based learning rate schedulers + # each optimizer has its own scheduler def configure_optimizers(self): gen_opt = Adam(self.model_gen.parameters(), lr=0.01) dis_opt = Adam(self.model_dis.parameters(), lr=0.02) - gen_sch = {'scheduler': ExponentialLR(gen_opt, 0.99), - 'interval': 'step'} # called after each training step + gen_sch = { + 'scheduler': ExponentialLR(gen_opt, 0.99), + 'interval': 'step' # called after each training step + } dis_sch = CosineAnnealing(dis_opt, T_max=10) # called every epoch return [gen_opt, dis_opt], [gen_sch, dis_sch]