From 3af3f37d43c69715b191512d396e3d13e89d3987 Mon Sep 17 00:00:00 2001 From: Andreas Kirsch Date: Fri, 29 May 2020 06:57:50 +0100 Subject: [PATCH] Add toma comments to auto_scale_batch_size (#1994) * Add source comments * Update training_tricks.rst --- docs/source/training_tricks.rst | 2 +- pytorch_lightning/utilities/memory.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/source/training_tricks.rst b/docs/source/training_tricks.rst index 53cb95bf9f..9140f52aba 100644 --- a/docs/source/training_tricks.rst +++ b/docs/source/training_tricks.rst @@ -39,7 +39,7 @@ Auto scaling of batch size -------------------------- Auto scaling of batch size may be enabled to find the largest batch size that fits into memory. Larger batch size often yields better estimates of gradients, but may also result in -longer training time. +longer training time. Inspired by https://github.com/BlackHC/toma. .. seealso:: :class:`~pytorch_lightning.trainer.trainer.Trainer` diff --git a/pytorch_lightning/utilities/memory.py b/pytorch_lightning/utilities/memory.py index eed7a13ca9..19a473640e 100644 --- a/pytorch_lightning/utilities/memory.py +++ b/pytorch_lightning/utilities/memory.py @@ -32,24 +32,29 @@ def is_oom_error(exception): or is_out_of_cpu_memory(exception) +# based on https://github.com/BlackHC/toma/blob/master/toma/torch_cuda_memory.py def is_cuda_out_of_memory(exception): return isinstance(exception, RuntimeError) \ and len(exception.args) == 1 \ and "CUDA out of memory." in exception.args[0] +# based on https://github.com/BlackHC/toma/blob/master/toma/torch_cuda_memory.py def is_cudnn_snafu(exception): + # For/because of https://github.com/pytorch/pytorch/issues/4107 return isinstance(exception, RuntimeError) \ and len(exception.args) == 1 \ and "cuDNN error: CUDNN_STATUS_NOT_SUPPORTED." in exception.args[0] +# based on https://github.com/BlackHC/toma/blob/master/toma/cpu_memory.py def is_out_of_cpu_memory(exception): return isinstance(exception, RuntimeError) \ and len(exception.args) == 1 \ and "DefaultCPUAllocator: can't allocate memory" in exception.args[0] +# based on https://github.com/BlackHC/toma/blob/master/toma/torch_cuda_memory.py def garbage_collection_cuda(): """Garbage collection Torch (CUDA) memory.""" gc.collect()