[fix] Add barriers before and after setup hook is run (#7202)

* Update data_connector.py * move-barrier * Update trainer.py * Update ddp.py * changelog * Spacing Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com>
2021-04-27 09:19:43 -07:00 · 2021-04-27 09:19:43 -07:00 · bab7225507
parent f920ba29f2
commit bab7225507
3 changed files with 8 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -9,6 +9,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

 ### Added

+- Added synchronization points before and after `setup` hooks are run ([#7202](https://github.com/PyTorchLightning/pytorch-lightning/pull/7202))
+
+
 - Added a `teardown` hook to `ClusterEnvironment` ([#6942](https://github.com/PyTorchLightning/pytorch-lightning/pull/6942))


--- a/pytorch_lightning/plugins/training_type/ddp.py
+++ b/pytorch_lightning/plugins/training_type/ddp.py
@ -282,7 +282,7 @@ class DDPPlugin(ParallelPlugin):
        self.cluster_environment.teardown()

    def barrier(self, *args, **kwargs):
-        if torch_distrib.is_initialized():
+        if torch_distrib.is_available() and torch_distrib.is_initialized():
            torch_distrib.barrier()

    def broadcast(self, obj: object, src: int = 0) -> object:
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@ -1114,6 +1114,8 @@ class Trainer(
        assert self.state.running, f"TrainerState: {self.state}"
        state = self._setup_state

+        self.accelerator.barrier("pre_setup")
+
        if self.datamodule is not None:
            called = getattr(self.datamodule, f'has_setup_{state}')
            if not called:
@ -1122,6 +1124,8 @@ class Trainer(
        self.setup(model, stage=state)
        model.setup(stage=state)

+        self.accelerator.barrier("post_setup")
+
    def call_configure_sharded_model(self, model: LightningModule) -> None:
        # Call configure sharded model hook if accelerator requests. In some cases
        # we will not call the hook; the hook has initialized the sharded model for example.