[DDP] Remove the outdated limitations of DDP communication hook since 1.9 (#8346)

* [DDP] Remove the outdated limitations of DDP communication hook since 1.9 1. DDP communication hook can work on multiple backends since 1.9. 2. SPMD in DDP is completely retired in 1.9, and SPSD is the only option. * Update ddp.py Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2021-07-19 04:55:42 -07:00 · 2021-07-19 04:55:42 -07:00 · adaa32f47a
parent 2c5d94d98b
commit adaa32f47a
1 changed files with 6 additions and 3 deletions
--- a/pytorch_lightning/plugins/training_type/ddp.py
+++ b/pytorch_lightning/plugins/training_type/ddp.py
@ -37,6 +37,7 @@ from pytorch_lightning.utilities import (
    _HYDRA_AVAILABLE,
    _TORCH_GREATER_EQUAL_1_7,
    _TORCH_GREATER_EQUAL_1_8,
+    _TORCH_GREATER_EQUAL_1_9,
    rank_zero_deprecation,
    rank_zero_warn,
 )
@ -289,9 +290,11 @@ class DDPPlugin(ParallelPlugin):
            self._ddp_kwargs["find_unused_parameters"] = True

    def _register_ddp_hooks(self) -> None:
-        # currently, DDP communication hooks only work with NCCL backend and SPSD (single process single device) mode
-        # https://github.com/pytorch/pytorch/blob/v1.8.0/torch/nn/parallel/distributed.py#L1080-L1084
-        if _TORCH_GREATER_EQUAL_1_8 and self.on_gpu and self._is_single_process_single_device:
+        # In 1.8, DDP communication hooks only work with NCCL backend and SPSD (single process single device) mode
+        # Since 1.9, DDP communication hooks can work on all backends.
+        if _TORCH_GREATER_EQUAL_1_9 or (
+            _TORCH_GREATER_EQUAL_1_8 and self.on_gpu and self._is_single_process_single_device
+        ):
            register_ddp_comm_hook(
                model=self._model,
                ddp_comm_state=self._ddp_comm_state,