From 79d9adf0046f95d2206a52f4b6980cc84fbd4539 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Mon, 8 Jul 2019 09:36:27 -0400 Subject: [PATCH] added multi-node proc 0 ip reading --- pytorch_lightning/models/trainer.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pytorch_lightning/models/trainer.py b/pytorch_lightning/models/trainer.py index e473fb9644..cacc522143 100644 --- a/pytorch_lightning/models/trainer.py +++ b/pytorch_lightning/models/trainer.py @@ -315,13 +315,11 @@ class Trainer(TrainerIO): print(f"GPU: {gpu_nb} - Rank: {self.proc_rank}") # copy model to each gpu - print('starting DDP') torch.cuda.set_device(gpu_nb) model.cuda(gpu_nb) model = LightningDistributedDataParallel(model, device_ids=[gpu_nb]) # continue training routine - print('running pretrain') self.__run_pretrain_routine(model) def __get_root_node_ip(self, proc_rank, nb_gpu_nodes, ip_file_dir):