diff --git a/CHANGELOG.md b/CHANGELOG.md index b0d288f7ef..c2b5ca5c7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -73,7 +73,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed -- +- Avoid calling `average_parameters` multiple times per optimizer step ([#12452](https://github.com/PyTorchLightning/pytorch-lightning/pull/12452)) - diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py index 3e4742f0fc..67b8a22c8a 100644 --- a/pytorch_lightning/strategies/ddp.py +++ b/pytorch_lightning/strategies/ddp.py @@ -280,11 +280,8 @@ class DDPStrategy(ParallelStrategy): if not _TORCH_GREATER_EQUAL_1_10 or self._model_averager is None: return optimizer_output - for group in optimizer.param_groups: - for param in group["params"]: - if param.grad is None: - continue - self._model_averager.average_parameters(iter(param)) + params = [param for group in optimizer.param_groups for param in group["params"] if param.grad is not None] + self._model_averager.average_parameters(iter(params)) return optimizer_output