using slurm flag to fine node nb
This commit is contained in:
parent
553223334f
commit
d2a717d31e
|
@ -46,7 +46,6 @@ def main(hparams, cluster, results_dict):
|
|||
# 1 INIT LIGHTNING MODEL
|
||||
# ------------------------
|
||||
print('loading model...')
|
||||
print(os.environ['SLURM_SRUN_COMM_HOST'])
|
||||
model = LightningTemplateModel(hparams)
|
||||
print('model built')
|
||||
|
||||
|
|
|
@ -361,6 +361,8 @@ class Trainer(TrainerIO):
|
|||
# saves the ip to disk
|
||||
ip_table_name = f'.ip_meta_' + os.environ['SLURM_JOB_ID']
|
||||
ip_file = os.path.join(ip_file_dir, ip_table_name)
|
||||
os.makedirs(ip_file_dir, exist_ok=True)
|
||||
|
||||
if world_gpu_nb == 0:
|
||||
# get the proc 0 IP
|
||||
root_ip = subprocess.run(['hostname', '-I'], stdout=subprocess.PIPE).stdout.decode('utf-8')
|
||||
|
|
Loading…
Reference in New Issue