added slurm managed flag catch for non-slurm peeps
This commit is contained in:
parent
00678c6053
commit
468bd141f4
|
@ -450,6 +450,13 @@ class Trainer(TrainerIO):
|
||||||
# node rank using relative slurm id
|
# node rank using relative slurm id
|
||||||
# otherwise default to node rank 0
|
# otherwise default to node rank 0
|
||||||
try:
|
try:
|
||||||
|
print('x'*100)
|
||||||
|
node_id = os.environ['SLURM_NODEID']
|
||||||
|
local_id = os.environ['SLURM_LOCALID']
|
||||||
|
n_nodes = os.environ['SLURM_JOB_NUM_NODES']
|
||||||
|
|
||||||
|
print(f'NODEID: {node_id}, LOCALID: {local_id}, N_NODES: {n_nodes}')
|
||||||
|
print('x'*100)
|
||||||
self.node_rank = int(os.environ['SLURM_NODEID'])
|
self.node_rank = int(os.environ['SLURM_NODEID'])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.node_rank = 0
|
self.node_rank = 0
|
||||||
|
|
Loading…
Reference in New Issue