added slurm managed flag catch for non-slurm peeps

This commit is contained in:
William Falcon 2019-07-18 17:58:38 -04:00
parent c2e2298586
commit 6e12431e6b
1 changed files with 8 additions and 3 deletions

View File

@ -377,9 +377,14 @@ class Trainer(TrainerIO):
# whenever we have the correct number of tasks, we let slurm manage processes
# otherwise we launch the required number of processes
nb_slurm_tasks = int(os.environ['SLURM_NTASKS'])
nb_requested_gpus = len(self.data_parallel_device_ids)
is_slurm_managing_tasks = nb_slurm_tasks == nb_requested_gpus
try:
nb_slurm_tasks = int(os.environ['SLURM_NTASKS'])
nb_requested_gpus = len(self.data_parallel_device_ids)
is_slurm_managing_tasks = nb_slurm_tasks == nb_requested_gpus
except Exception as e:
# likely not on slurm, so set the slurm managed flag to false
is_slurm_managing_tasks = False
if is_slurm_managing_tasks:
task = int(os.environ['SLURM_LOCALID'])
self.ddp_train(task, model)