lightning/pytorch_lightning/trainer/connectors/optimizer_connector.py

# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Optional
from weakref import proxy

import pytorch_lightning as pl
from pytorch_lightning.utilities import rank_zero_warn
from pytorch_lightning.utilities.exceptions import MisconfigurationException


class OptimizerConnector:

    def __init__(self, trainer: 'pl.Trainer') -> None:
        self.trainer = proxy(trainer)

    def on_trainer_init(self) -> None:
        self.trainer.lr_schedulers = []
        self.trainer.optimizers = []
        self.trainer.optimizer_frequencies = []

    def update_learning_rates(
        self, interval: str, update_plateau_schedulers: bool, opt_indices: Optional[List[int]] = None
    ) -> None:
        """Update learning rates.

        Args:
            interval: either 'epoch' or 'step'.
            update_plateau_schedulers: control whether ``ReduceLROnPlateau`` or non-plateau schedulers get updated.
                This is used so non-plateau schedulers can be updated before running validation. Checkpoints are
                commonly saved during validation, however, on-plateau schedulers might monitor a validation metric
                so they have to be updated separately.
            opt_indices: indices of the optimizers to update.
        """
        if not self.trainer.lr_schedulers or not self.trainer.lightning_module.automatic_optimization:
            return

        if opt_indices is None:
            opt_indices = []

        for scheduler_idx, lr_scheduler in enumerate(self.trainer.lr_schedulers):
            if isinstance(lr_scheduler['opt_idx'], int) and lr_scheduler['opt_idx'] not in opt_indices:
                continue

            if update_plateau_schedulers ^ lr_scheduler["reduce_on_plateau"]:
                continue

            current_idx = self.trainer.fit_loop.batch_idx if interval == 'step' else self.trainer.current_epoch
            current_idx += 1  # account for both batch and epoch starts from 0
            # Take step if call to update_learning_rates matches the interval key and
            # the current step modulo the schedulers frequency is zero
            if lr_scheduler['interval'] == interval and current_idx % lr_scheduler['frequency'] == 0:
                # If instance of ReduceLROnPlateau, we need a monitor
                monitor_key, monitor_val = None, None
                if lr_scheduler['reduce_on_plateau']:
                    monitor_key = lr_scheduler['monitor']
                    monitor_val = self.trainer.callback_metrics.get(monitor_key)
                    if monitor_val is None:
                        if lr_scheduler.get('strict', True):
                            avail_metrics = list(self.trainer.callback_metrics)
                            raise MisconfigurationException(
                                f'ReduceLROnPlateau conditioned on metric {monitor_key}'
                                f' which is not available. Available metrics are: {avail_metrics}.'
                                ' Condition can be set using `monitor` key in lr scheduler dict'
                            )
                        rank_zero_warn(
                            f'ReduceLROnPlateau conditioned on metric {monitor_key}'
                            ' which is not available but strict is set to `False`.'
                            ' Skipping learning rate update.',
                            RuntimeWarning,
                        )
                        continue
                # update LR
                old_lr = lr_scheduler['scheduler'].optimizer.param_groups[0]['lr']

                if lr_scheduler['reduce_on_plateau']:
                    lr_scheduler['scheduler'].step(monitor_val)
                else:
                    lr_scheduler['scheduler'].step()

                new_lr = lr_scheduler['scheduler'].optimizer.param_groups[0]['lr']

                if self.trainer.dev_debugger.enabled:
                    self.trainer.dev_debugger.track_lr_schedulers_update(
                        self.trainer.fit_loop.batch_idx,
                        interval,
                        scheduler_idx,
                        old_lr,
                        new_lr,
                        monitor_key=monitor_key,
                        monitor_val=monitor_val
                    )
ref: inner train loop (intermediate step) 17/n (#3376) * ref: inner train loop (intermediate step) 17/n * ref: inner train loop (intermediate step) 17/n * ref: inner train loop (intermediate step) 17/n 2020-09-07 13:31:42 +00:00			`# Copyright The PyTorch Lightning team.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
Refactor some loops code and hook tests (#7682) 2021-05-25 11:27:54 +00:00			`from typing import List, Optional`
			`from weakref import proxy`
Update LR schedulers only when their corresponding Optimizer is being… (#4868) * Update LR schedulers only when their corresponding Optimizer is being used. In the case when optimizer frequencies are specified, the LR scheduler corresponding to a particular optimizer is updated only when that optimizer is being used in the training loop or epoch. * pep8speak fixes * Fix failing tests * Add docs * PR Feedback * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> * formatting fix * PR Feedback - part 2 * More PR feedback * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> * Add typing imports * Stronger tests and fixes related to that * Add more tests plus PR feedback * Make optimizer_freq_cumsum a cached property @cached_property is only available after Python 3.8 so had to do it manually. * Fix tests * Apply suggestions from code review Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> * Avoid mutable defaults * Parametrize lr scheduling tests * PR feedback * Apply suggestions from code review * spell * Apply suggestions from code review * flake8 Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: chaton <thomas@grid.ai> Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> 2021-05-04 09:37:40 +00:00
Refactor some loops code and hook tests (#7682) 2021-05-25 11:27:54 +00:00			`import pytorch_lightning as pl`
Add strict option to lr_scheduler dict (#3586) * Add strict option to lr_scheduler dict * Update docs * Unnecessary "else" after "raise" * Update CHANGELOG * Fix rebase 2020-10-21 12:14:37 +00:00			`from pytorch_lightning.utilities import rank_zero_warn`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00			`from pytorch_lightning.utilities.exceptions import MisconfigurationException`


ref: organize args 3/n (#3449) * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n 2020-09-10 17:21:04 +00:00			`class OptimizerConnector:`
formatting 4/n: Trainer (#5720) * yapf trainer * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> * . * fix Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> 2021-02-03 09:25:42 +00:00
Refactor some loops code and hook tests (#7682) 2021-05-25 11:27:54 +00:00			`def __init__(self, trainer: 'pl.Trainer') -> None:`
			`self.trainer = proxy(trainer)`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00
Refactor some loops code and hook tests (#7682) 2021-05-25 11:27:54 +00:00			`def on_trainer_init(self) -> None:`
ref: organize args 3/n (#3449) * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n 2020-09-10 17:21:04 +00:00			`self.trainer.lr_schedulers = []`
disable optimizers setup during testing (#3059) * disable configure_optimizers during testing * minor changes * hvd and ddp * fix precision during testing * fix ddp * fix amp * fix cpu * update dp * simplify optimizers * add test * codefactor * ref optimizer setup * chlog * suggestions * isort * rebased with master 2020-09-28 23:09:04 +00:00			`self.trainer.optimizers = []`
ref: organize args 3/n (#3449) * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n * ref: organize args 3/n 2020-09-10 17:21:04 +00:00			`self.trainer.optimizer_frequencies = []`

Fix checkpointed state for lr_schedulers with step interval (#7877) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2021-06-21 15:08:07 +00:00			`def update_learning_rates(`
			`self, interval: str, update_plateau_schedulers: bool, opt_indices: Optional[List[int]] = None`
			`) -> None:`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00			`"""Update learning rates.`

			`Args:`
			`interval: either 'epoch' or 'step'.`
Fix checkpointed state for lr_schedulers with step interval (#7877) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2021-06-21 15:08:07 +00:00			update_plateau_schedulers: control whether ``ReduceLROnPlateau`` or non-plateau schedulers get updated.
			`This is used so non-plateau schedulers can be updated before running validation. Checkpoints are`
			`commonly saved during validation, however, on-plateau schedulers might monitor a validation metric`
			`so they have to be updated separately.`
Refactor some loops code and hook tests (#7682) 2021-05-25 11:27:54 +00:00			`opt_indices: indices of the optimizers to update.`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00			`"""`
Enforce Lightning module as source of truth for automatic optimization (#7130) * make lightning module source of truth for automatic optimization * Update configuration_validator.py * Update model_connector.py * rm-references * Update CHANGELOG.md * Update CHANGELOG.md Co-authored-by: jirka <jirka.borovec@seznam.cz> 2021-04-26 05:36:26 +00:00			`if not self.trainer.lr_schedulers or not self.trainer.lightning_module.automatic_optimization:`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00			`return`

Update LR schedulers only when their corresponding Optimizer is being… (#4868) * Update LR schedulers only when their corresponding Optimizer is being used. In the case when optimizer frequencies are specified, the LR scheduler corresponding to a particular optimizer is updated only when that optimizer is being used in the training loop or epoch. * pep8speak fixes * Fix failing tests * Add docs * PR Feedback * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> * formatting fix * PR Feedback - part 2 * More PR feedback * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> * Add typing imports * Stronger tests and fixes related to that * Add more tests plus PR feedback * Make optimizer_freq_cumsum a cached property @cached_property is only available after Python 3.8 so had to do it manually. * Fix tests * Apply suggestions from code review Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> * Avoid mutable defaults * Parametrize lr scheduling tests * PR feedback * Apply suggestions from code review * spell * Apply suggestions from code review * flake8 Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: chaton <thomas@grid.ai> Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> 2021-05-04 09:37:40 +00:00			`if opt_indices is None:`
			`opt_indices = []`

ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00			`for scheduler_idx, lr_scheduler in enumerate(self.trainer.lr_schedulers):`
Update LR schedulers only when their corresponding Optimizer is being… (#4868) * Update LR schedulers only when their corresponding Optimizer is being used. In the case when optimizer frequencies are specified, the LR scheduler corresponding to a particular optimizer is updated only when that optimizer is being used in the training loop or epoch. * pep8speak fixes * Fix failing tests * Add docs * PR Feedback * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> * formatting fix * PR Feedback - part 2 * More PR feedback * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> * Add typing imports * Stronger tests and fixes related to that * Add more tests plus PR feedback * Make optimizer_freq_cumsum a cached property @cached_property is only available after Python 3.8 so had to do it manually. * Fix tests * Apply suggestions from code review Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> * Avoid mutable defaults * Parametrize lr scheduling tests * PR feedback * Apply suggestions from code review * spell * Apply suggestions from code review * flake8 Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: chaton <thomas@grid.ai> Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> 2021-05-04 09:37:40 +00:00			`if isinstance(lr_scheduler['opt_idx'], int) and lr_scheduler['opt_idx'] not in opt_indices:`
			`continue`

Fix checkpointed state for lr_schedulers with step interval (#7877) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2021-06-21 15:08:07 +00:00			`if update_plateau_schedulers ^ lr_scheduler["reduce_on_plateau"]:`
			`continue`

rename old `Trainer.train_loop` -> `Trainer.fit_loop` (#8025) 2021-06-22 09:49:32 +00:00			`current_idx = self.trainer.fit_loop.batch_idx if interval == 'step' else self.trainer.current_epoch`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00			`current_idx += 1 # account for both batch and epoch starts from 0`
			`# Take step if call to update_learning_rates matches the interval key and`
			`# the current step modulo the schedulers frequency is zero`
			`if lr_scheduler['interval'] == interval and current_idx % lr_scheduler['frequency'] == 0:`
Clean up optimizer code (#3587) * Update optimizer code * Update CHANGELOG * Fix tuple of one list case * Update docs * Fix pep issue * Minor typo [skip-ci] * Use minimal match Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> 2020-10-21 19:12:48 +00:00			`# If instance of ReduceLROnPlateau, we need a monitor`
			`monitor_key, monitor_val = None, None`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00			`if lr_scheduler['reduce_on_plateau']:`
Clean up optimizer code (#3587) * Update optimizer code * Update CHANGELOG * Fix tuple of one list case * Update docs * Fix pep issue * Minor typo [skip-ci] * Use minimal match Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> 2020-10-21 19:12:48 +00:00			`monitor_key = lr_scheduler['monitor']`
Simplify logger connector access (#8318) 2021-07-07 12:13:30 +00:00			`monitor_val = self.trainer.callback_metrics.get(monitor_key)`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00			`if monitor_val is None:`
Add strict option to lr_scheduler dict (#3586) * Add strict option to lr_scheduler dict * Update docs * Unnecessary "else" after "raise" * Update CHANGELOG * Fix rebase 2020-10-21 12:14:37 +00:00			`if lr_scheduler.get('strict', True):`
Simplify logger connector access (#8318) 2021-07-07 12:13:30 +00:00			`avail_metrics = list(self.trainer.callback_metrics)`
Add strict option to lr_scheduler dict (#3586) * Add strict option to lr_scheduler dict * Update docs * Unnecessary "else" after "raise" * Update CHANGELOG * Fix rebase 2020-10-21 12:14:37 +00:00			`raise MisconfigurationException(`
			`f'ReduceLROnPlateau conditioned on metric {monitor_key}'`
			`f' which is not available. Available metrics are: {avail_metrics}.'`
			' Condition can be set using `monitor` key in lr scheduler dict'
			`)`
			`rank_zero_warn(`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00			`f'ReduceLROnPlateau conditioned on metric {monitor_key}'`
Add strict option to lr_scheduler dict (#3586) * Add strict option to lr_scheduler dict * Update docs * Unnecessary "else" after "raise" * Update CHANGELOG * Fix rebase 2020-10-21 12:14:37 +00:00			' which is not available but strict is set to `False`.'
			`' Skipping learning rate update.',`
			`RuntimeWarning,`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00			`)`
Add strict option to lr_scheduler dict (#3586) * Add strict option to lr_scheduler dict * Update docs * Unnecessary "else" after "raise" * Update CHANGELOG * Fix rebase 2020-10-21 12:14:37 +00:00			`continue`
Clean up optimizer code (#3587) * Update optimizer code * Update CHANGELOG * Fix tuple of one list case * Update docs * Fix pep issue * Minor typo [skip-ci] * Use minimal match Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> 2020-10-21 19:12:48 +00:00			`# update LR`
			`old_lr = lr_scheduler['scheduler'].optimizer.param_groups[0]['lr']`
[Bugfix] Fixed epoch level schedulers not being called when val_check_interval < 1.0 (#6075) * fix bug * fix tests * changelog * fix pep8 * fix tests * fix and add some tests * add test for rlop * chlog * Update CHANGELOG.md Co-authored-by: rohitgr7 <rohitgr1998@gmail.com> 2021-02-24 11:16:33 +00:00
Clean up optimizer code (#3587) * Update optimizer code * Update CHANGELOG * Fix tuple of one list case * Update docs * Fix pep issue * Minor typo [skip-ci] * Use minimal match Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> 2020-10-21 19:12:48 +00:00			`if lr_scheduler['reduce_on_plateau']:`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00			`lr_scheduler['scheduler'].step(monitor_val)`
			`else:`
			`lr_scheduler['scheduler'].step()`
[Bugfix] Fixed epoch level schedulers not being called when val_check_interval < 1.0 (#6075) * fix bug * fix tests * changelog * fix pep8 * fix tests * fix and add some tests * add test for rlop * chlog * Update CHANGELOG.md Co-authored-by: rohitgr7 <rohitgr1998@gmail.com> 2021-02-24 11:16:33 +00:00
Clean up optimizer code (#3587) * Update optimizer code * Update CHANGELOG * Fix tuple of one list case * Update docs * Fix pep issue * Minor typo [skip-ci] * Use minimal match Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> 2020-10-21 19:12:48 +00:00			`new_lr = lr_scheduler['scheduler'].optimizer.param_groups[0]['lr']`
ref: inner train loop (intermediate step) 16/n 2020-09-07 00:48:29 +00:00
Clean up optimizer code (#3587) * Update optimizer code * Update CHANGELOG * Fix tuple of one list case * Update docs * Fix pep issue * Minor typo [skip-ci] * Use minimal match Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> 2020-10-21 19:12:48 +00:00			`if self.trainer.dev_debugger.enabled:`
			`self.trainer.dev_debugger.track_lr_schedulers_update(`
rename old `Trainer.train_loop` -> `Trainer.fit_loop` (#8025) 2021-06-22 09:49:32 +00:00			`self.trainer.fit_loop.batch_idx,`
[Bugfix] Fixed epoch level schedulers not being called when val_check_interval < 1.0 (#6075) * fix bug * fix tests * changelog * fix pep8 * fix tests * fix and add some tests * add test for rlop * chlog * Update CHANGELOG.md Co-authored-by: rohitgr7 <rohitgr1998@gmail.com> 2021-02-24 11:16:33 +00:00			`interval,`
			`scheduler_idx,`
			`old_lr,`
			`new_lr,`
			`monitor_key=monitor_key,`
			`monitor_val=monitor_val`
Clean up optimizer code (#3587) * Update optimizer code * Update CHANGELOG * Fix tuple of one list case * Update docs * Fix pep issue * Minor typo [skip-ci] * Use minimal match Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * Apply suggestions from code review Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> 2020-10-21 19:12:48 +00:00			`)`