From a83d00456b24e9b4ea14a92f2533e1023bd8db58 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Mon, 8 Jul 2019 12:59:10 -0400 Subject: [PATCH] added multi-node locked ip search --- pytorch_lightning/models/trainer.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pytorch_lightning/models/trainer.py b/pytorch_lightning/models/trainer.py index acb79dea09..c0e8c97295 100644 --- a/pytorch_lightning/models/trainer.py +++ b/pytorch_lightning/models/trainer.py @@ -366,8 +366,6 @@ class Trainer(TrainerIO): if nb_gpu_nodes == 1: return 0, '127.0.0.1' - - # on multi-node, every node rank > 0 waits until rank 0 # saves the ip to disk ip_file = os.path.join(ip_file_dir, '.ip_meta') @@ -376,10 +374,6 @@ class Trainer(TrainerIO): my_ip = subprocess.run(['hostname', '-I'], stdout=subprocess.PIPE).stdout.decode('utf-8') my_ip = my_ip.split(' ')[0] - test_name = f'{os.getpid()}_{my_ip}' - ip_dir = os.path.join(ip_file_dir, '.ips', test_name) - os.makedirs(ip_dir, exist_ok=True) - # save the ip to the file # block file so only one process can access at a time ip_dir = os.path.join(ip_file_dir, '.ips', my_ip)