using slurm flag to fine node nb

This commit is contained in:
William Falcon 2019-07-08 14:14:36 -04:00
parent 553223334f
commit d2a717d31e
2 changed files with 2 additions and 1 deletions

View File

@ -46,7 +46,6 @@ def main(hparams, cluster, results_dict):
# 1 INIT LIGHTNING MODEL
# ------------------------
print('loading model...')
print(os.environ['SLURM_SRUN_COMM_HOST'])
model = LightningTemplateModel(hparams)
print('model built')

View File

@ -361,6 +361,8 @@ class Trainer(TrainerIO):
# saves the ip to disk
ip_table_name = f'.ip_meta_' + os.environ['SLURM_JOB_ID']
ip_file = os.path.join(ip_file_dir, ip_table_name)
os.makedirs(ip_file_dir, exist_ok=True)
if world_gpu_nb == 0:
# get the proc 0 IP
root_ip = subprocess.run(['hostname', '-I'], stdout=subprocess.PIPE).stdout.decode('utf-8')