[fix] Add barriers before and after setup hook is run (#7202)

* Update data_connector.py

* move-barrier

* Update trainer.py

* Update ddp.py

* changelog

* Spacing

Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com>
This commit is contained in:
ananthsub 2021-04-27 09:19:43 -07:00 committed by GitHub
parent f920ba29f2
commit bab7225507
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 8 additions and 1 deletions

View File

@ -9,6 +9,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
### Added
- Added synchronization points before and after `setup` hooks are run ([#7202](https://github.com/PyTorchLightning/pytorch-lightning/pull/7202))
- Added a `teardown` hook to `ClusterEnvironment` ([#6942](https://github.com/PyTorchLightning/pytorch-lightning/pull/6942))

View File

@ -282,7 +282,7 @@ class DDPPlugin(ParallelPlugin):
self.cluster_environment.teardown()
def barrier(self, *args, **kwargs):
if torch_distrib.is_initialized():
if torch_distrib.is_available() and torch_distrib.is_initialized():
torch_distrib.barrier()
def broadcast(self, obj: object, src: int = 0) -> object:

View File

@ -1114,6 +1114,8 @@ class Trainer(
assert self.state.running, f"TrainerState: {self.state}"
state = self._setup_state
self.accelerator.barrier("pre_setup")
if self.datamodule is not None:
called = getattr(self.datamodule, f'has_setup_{state}')
if not called:
@ -1122,6 +1124,8 @@ class Trainer(
self.setup(model, stage=state)
model.setup(stage=state)
self.accelerator.barrier("post_setup")
def call_configure_sharded_model(self, model: LightningModule) -> None:
# Call configure sharded model hook if accelerator requests. In some cases
# we will not call the hook; the hook has initialized the sharded model for example.