From 7c7182d3340f48928645a9cb75148aba3298a095 Mon Sep 17 00:00:00 2001 From: Sean Naren Date: Fri, 4 Jun 2021 17:19:08 +0100 Subject: [PATCH] [IPU] Call accelerator hooks regardless if LM hook overridden 1/n (#7826) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Modify API to ensure hooks defined in the accelerator are called as expected * handle step_end in dp * Add changelog * Update pytorch_lightning/trainer/trainer.py Co-authored-by: Carlos Mocholí * Add todo and explanation Co-authored-by: Adrian Wälchli Co-authored-by: Carlos Mocholí --- CHANGELOG.md | 3 +++ pytorch_lightning/plugins/training_type/dp.py | 13 ++++++++++--- pytorch_lightning/trainer/trainer.py | 11 +++++++---- pytorch_lightning/trainer/training_loop.py | 5 ++--- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab42984161..6905cf4f35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -122,6 +122,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - `Trainer.fit` now raises an error when using manual optimization with unsupported features such as `gradient_clip_val` or `accumulate_grad_batches` ([#7788](https://github.com/PyTorchLightning/pytorch-lightning/pull/7788)) +- Accelerator hooks are called regardless if `LightningModule` overrides the same hooks ([#7826](https://github.com/PyTorchLightning/pytorch-lightning/pull/7826)) + + - Moved profilers to their own file ([#7822](https://github.com/PyTorchLightning/pytorch-lightning/pull/7822)) diff --git a/pytorch_lightning/plugins/training_type/dp.py b/pytorch_lightning/plugins/training_type/dp.py index 18aeb6a451..2787ab5644 100644 --- a/pytorch_lightning/plugins/training_type/dp.py +++ b/pytorch_lightning/plugins/training_type/dp.py @@ -19,6 +19,7 @@ from torch.nn import DataParallel from pytorch_lightning.overrides.data_parallel import LightningParallelModule from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin from pytorch_lightning.utilities.apply_func import apply_to_collection +from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.types import _METRIC_COLLECTION @@ -101,10 +102,16 @@ class DataParallelPlugin(ParallelPlugin): return self.model(*args, **kwargs) def training_step_end(self, output): - return self.reduce(output) + if not is_overridden("training_step_end", self.lightning_module): + return self.reduce(output) + return output def validation_step_end(self, output): - return self.reduce(output) + if not is_overridden("validation_step_end", self.lightning_module): + return self.reduce(output) + return output def test_step_end(self, output): - return self.reduce(output) + if not is_overridden("test_step_end", self.lightning_module): + return self.reduce(output) + return output diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index ded6e3395e..b9846af644 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -1237,11 +1237,14 @@ class Trainer( hook_fx = getattr(model_ref, hook_name) output = hook_fx(*args, **kwargs) - # if the PL module doesn't have the hook then call the accelerator - # used to auto-reduce things for the user with Results obj - elif hasattr(self.accelerator, hook_name): + # call the accelerator hook + if hasattr(self.accelerator, hook_name): accelerator_hook = getattr(self.accelerator, hook_name) - output = accelerator_hook(*args, **kwargs) + accelerator_output = accelerator_hook(*args, **kwargs) + # Rely on the accelerator output if lightningModule hook returns nothing + # Required for cases such as DataParallel where we reduce the output for the user + # todo: move this data parallel logic into the data parallel plugin + output = accelerator_output if output is None else output if not skip: self._cache_logged_metrics() diff --git a/pytorch_lightning/trainer/training_loop.py b/pytorch_lightning/trainer/training_loop.py index 4b48cf0acb..81498cbe3b 100644 --- a/pytorch_lightning/trainer/training_loop.py +++ b/pytorch_lightning/trainer/training_loop.py @@ -634,9 +634,8 @@ class TrainLoop: else: model_ref.on_train_epoch_end() - # if the PL module doesn't have the hook then call the accelerator - # used to auto-reduce things for the user with Results obj - elif hasattr(self.trainer.accelerator, hook_name): + # call the accelerator hook + if hasattr(self.trainer.accelerator, hook_name): accelerator_hook = getattr(self.trainer.accelerator, hook_name) accelerator_hook()