From 6462cab351ec731eda189f4d72c74a4155802a10 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Mon, 8 Jul 2019 12:39:49 -0400 Subject: [PATCH] added multi-node locked ip search --- pytorch_lightning/models/trainer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pytorch_lightning/models/trainer.py b/pytorch_lightning/models/trainer.py index b8e0a67641..9f16f49ba7 100644 --- a/pytorch_lightning/models/trainer.py +++ b/pytorch_lightning/models/trainer.py @@ -305,6 +305,12 @@ class Trainer(TrainerIO): if self.on_gpu: rank = 0 self.experiment = self.experiment.get_meta_copy() + + # remove any ip tables we saved + ip_table_file = os.path.join(self.exp_save_path, '.ip_meta') + if os.path.exists(ip_table_file): + os.remove(ip_table_file) + mp.spawn(self.dp_train, nprocs=len(self.data_parallel_device_ids), args=(rank, model)) else: self.__run_pretrain_routine(model)