From 6e12431e6bd935b5292811e38a3a734cd453befb Mon Sep 17 00:00:00 2001 From: William Falcon Date: Thu, 18 Jul 2019 17:58:38 -0400 Subject: [PATCH] added slurm managed flag catch for non-slurm peeps --- pytorch_lightning/models/trainer.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pytorch_lightning/models/trainer.py b/pytorch_lightning/models/trainer.py index ba57ed76d6..2f07083b3c 100644 --- a/pytorch_lightning/models/trainer.py +++ b/pytorch_lightning/models/trainer.py @@ -377,9 +377,14 @@ class Trainer(TrainerIO): # whenever we have the correct number of tasks, we let slurm manage processes # otherwise we launch the required number of processes - nb_slurm_tasks = int(os.environ['SLURM_NTASKS']) - nb_requested_gpus = len(self.data_parallel_device_ids) - is_slurm_managing_tasks = nb_slurm_tasks == nb_requested_gpus + try: + nb_slurm_tasks = int(os.environ['SLURM_NTASKS']) + nb_requested_gpus = len(self.data_parallel_device_ids) + is_slurm_managing_tasks = nb_slurm_tasks == nb_requested_gpus + except Exception as e: + # likely not on slurm, so set the slurm managed flag to false + is_slurm_managing_tasks = False + if is_slurm_managing_tasks: task = int(os.environ['SLURM_LOCALID']) self.ddp_train(task, model)