From 468bd141f46b4bf96ca01ca6ae9c6c909b8d2396 Mon Sep 17 00:00:00 2001 From: William Falcon Date: Sat, 20 Jul 2019 09:08:24 -0400 Subject: [PATCH] added slurm managed flag catch for non-slurm peeps --- pytorch_lightning/models/trainer.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pytorch_lightning/models/trainer.py b/pytorch_lightning/models/trainer.py index 12c501db4d..5b51828307 100644 --- a/pytorch_lightning/models/trainer.py +++ b/pytorch_lightning/models/trainer.py @@ -450,6 +450,13 @@ class Trainer(TrainerIO): # node rank using relative slurm id # otherwise default to node rank 0 try: + print('x'*100) + node_id = os.environ['SLURM_NODEID'] + local_id = os.environ['SLURM_LOCALID'] + n_nodes = os.environ['SLURM_JOB_NUM_NODES'] + + print(f'NODEID: {node_id}, LOCALID: {local_id}, N_NODES: {n_nodes}') + print('x'*100) self.node_rank = int(os.environ['SLURM_NODEID']) except Exception as e: self.node_rank = 0