[fix] Fix multi-node DDP launch by using local rank instead of global rank for main process (#7061)
* Update ddp.py * Update CHANGELOG.md
This commit is contained in:
parent
6a7b4cf5d3
commit
8bcd169767
|
@ -202,6 +202,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
||||||
|
- Fixed multi-node DDP sub-process launch by using `local_rank` instead of `global_rank` for main process assertion ([#7061](https://github.com/PyTorchLightning/pytorch-lightning/pull/7061))
|
||||||
|
|
||||||
|
|
||||||
- Fixed incorrect removal of `WORLD_SIZE` environment variable in DDP training when launching with torch distributed/torchelastic ([#6942](https://github.com/PyTorchLightning/pytorch-lightning/pull/6942))
|
- Fixed incorrect removal of `WORLD_SIZE` environment variable in DDP training when launching with torch distributed/torchelastic ([#6942](https://github.com/PyTorchLightning/pytorch-lightning/pull/6942))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -110,7 +110,7 @@ class DDPPlugin(ParallelPlugin):
|
||||||
def _call_children_scripts(self):
|
def _call_children_scripts(self):
|
||||||
|
|
||||||
# bookkeeping of spawned processes
|
# bookkeeping of spawned processes
|
||||||
assert self.global_rank == 0
|
assert self.local_rank == 0
|
||||||
self._check_can_spawn_children()
|
self._check_can_spawn_children()
|
||||||
self._has_spawned_children = True
|
self._has_spawned_children = True
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue