diff --git a/CHANGELOG.md b/CHANGELOG.md index 85a827104d..84c7feb765 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -202,6 +202,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Fixed +- Fixed multi-node DDP sub-process launch by using `local_rank` instead of `global_rank` for main process assertion ([#7061](https://github.com/PyTorchLightning/pytorch-lightning/pull/7061)) + + - Fixed incorrect removal of `WORLD_SIZE` environment variable in DDP training when launching with torch distributed/torchelastic ([#6942](https://github.com/PyTorchLightning/pytorch-lightning/pull/6942)) diff --git a/pytorch_lightning/plugins/training_type/ddp.py b/pytorch_lightning/plugins/training_type/ddp.py index 977145a4cc..28910e9b77 100644 --- a/pytorch_lightning/plugins/training_type/ddp.py +++ b/pytorch_lightning/plugins/training_type/ddp.py @@ -110,7 +110,7 @@ class DDPPlugin(ParallelPlugin): def _call_children_scripts(self): # bookkeeping of spawned processes - assert self.global_rank == 0 + assert self.local_rank == 0 self._check_can_spawn_children() self._has_spawned_children = True