Do not configure launcher if processes are launched externally (#12431)

This commit is contained in:
ananthsub 2022-03-24 02:40:34 -07:00 committed by GitHub
parent f740085678
commit d418cf23b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 44 additions and 2 deletions

View File

@ -137,8 +137,8 @@ class DDPStrategy(ParallelStrategy):
return self._process_group_backend
def _configure_launcher(self) -> None:
self._launcher = _SubprocessScriptLauncher(self.cluster_environment, self.num_processes, self.num_nodes)
if not self.cluster_environment.creates_processes_externally:
self._launcher = _SubprocessScriptLauncher(self.cluster_environment, self.num_processes, self.num_nodes)
self._rank_0_will_call_children_scripts = True
def setup_environment(self) -> None:

View File

@ -19,7 +19,7 @@ import torch
from torch.nn.parallel import DistributedDataParallel
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.plugins.environments import LightningEnvironment
from pytorch_lightning.plugins.environments import ClusterEnvironment, LightningEnvironment
from pytorch_lightning.strategies import DDPStrategy
from pytorch_lightning.trainer.states import TrainerFn
from tests.helpers.boring_model import BoringModel
@ -147,3 +147,45 @@ def test_ddp_dont_configure_sync_batchnorm(trainer_fn):
trainer.strategy.setup(trainer)
# because TrainerFn is not FITTING, model is not configured with sync batchnorm
assert not isinstance(trainer.strategy.model.layer, torch.nn.modules.batchnorm.SyncBatchNorm)
def test_configure_launcher_create_processes_externally():
class MyClusterEnvironment(ClusterEnvironment):
@property
def creates_processes_externally(self):
return True
@property
def main_address(self):
return ""
@property
def main_port(self):
return 8080
@staticmethod
def detect():
return True
def world_size(self):
return 1
def set_world_size(self):
pass
def global_rank(self):
return 0
def set_global_rank(self):
pass
def local_rank(self):
return 0
def node_rank(self):
return 0
ddp_strategy = DDPStrategy(cluster_environment=MyClusterEnvironment())
assert ddp_strategy.launcher is None
ddp_strategy._configure_launcher()
assert ddp_strategy.launcher is None