[DDP] Remove the outdated limitations of DDP communication hook since 1.9 (#8346)

* [DDP] Remove the outdated limitations of DDP communication hook since 1.9

1. DDP communication hook can work on multiple backends since 1.9.
2. SPMD in DDP is completely retired in 1.9, and SPSD is the only option.

* Update ddp.py

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Yi Wang 2021-07-19 04:55:42 -07:00 committed by GitHub
parent 2c5d94d98b
commit adaa32f47a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 6 additions and 3 deletions

View File

@ -37,6 +37,7 @@ from pytorch_lightning.utilities import (
_HYDRA_AVAILABLE,
_TORCH_GREATER_EQUAL_1_7,
_TORCH_GREATER_EQUAL_1_8,
_TORCH_GREATER_EQUAL_1_9,
rank_zero_deprecation,
rank_zero_warn,
)
@ -289,9 +290,11 @@ class DDPPlugin(ParallelPlugin):
self._ddp_kwargs["find_unused_parameters"] = True
def _register_ddp_hooks(self) -> None:
# currently, DDP communication hooks only work with NCCL backend and SPSD (single process single device) mode
# https://github.com/pytorch/pytorch/blob/v1.8.0/torch/nn/parallel/distributed.py#L1080-L1084
if _TORCH_GREATER_EQUAL_1_8 and self.on_gpu and self._is_single_process_single_device:
# In 1.8, DDP communication hooks only work with NCCL backend and SPSD (single process single device) mode
# Since 1.9, DDP communication hooks can work on all backends.
if _TORCH_GREATER_EQUAL_1_9 or (
_TORCH_GREATER_EQUAL_1_8 and self.on_gpu and self._is_single_process_single_device
):
register_ddp_comm_hook(
model=self._model,
ddp_comm_state=self._ddp_comm_state,