From 460ab5485ef693a2f866de95b3505aec75ff2707 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Tue, 26 May 2020 19:02:30 -0400 Subject: [PATCH] Gen ddp support (#1961) * updated docs * added mixed * added mixed --- pytorch_lightning/trainer/trainer.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 0cf10fe8cb..03efa27f09 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -856,17 +856,21 @@ class Trainer( if self.use_ddp2: if self.is_slurm_managing_tasks: task = int(os.environ['SLURM_LOCALID']) - elif 'WORLD_SIZE' in os.environ and 'GROUP_RANK' in os.environ: + + # torchelastic or general non_slurm ddp2 + elif 'WORLD_SIZE' in os.environ and ('GROUP_RANK' in os.environ or 'NODE_RANK' in os.environ): task = int(os.environ['LOCAL_RANK']) self.ddp_train(task, model) elif self.use_ddp: if self.is_slurm_managing_tasks: task = int(os.environ['SLURM_LOCALID']) self.ddp_train(task, model) - # torchelastic - elif 'WORLD_SIZE' in os.environ and 'GROUP_RANK' in os.environ: + + # torchelastic or general non_slurm ddp + elif 'WORLD_SIZE' in os.environ and ('GROUP_RANK' in os.environ or 'NODE_RANK' in os.environ): task = int(os.environ['LOCAL_RANK']) self.ddp_train(task, model) + else: self.__set_random_port() # track for predict