Add toma comments to auto_scale_batch_size (#1994)
* Add source comments * Update training_tricks.rst
This commit is contained in:
parent
cd3fed03a2
commit
3af3f37d43
|
@ -39,7 +39,7 @@ Auto scaling of batch size
|
|||
--------------------------
|
||||
Auto scaling of batch size may be enabled to find the largest batch size that fits into
|
||||
memory. Larger batch size often yields better estimates of gradients, but may also result in
|
||||
longer training time.
|
||||
longer training time. Inspired by https://github.com/BlackHC/toma.
|
||||
|
||||
.. seealso:: :class:`~pytorch_lightning.trainer.trainer.Trainer`
|
||||
|
||||
|
|
|
@ -32,24 +32,29 @@ def is_oom_error(exception):
|
|||
or is_out_of_cpu_memory(exception)
|
||||
|
||||
|
||||
# based on https://github.com/BlackHC/toma/blob/master/toma/torch_cuda_memory.py
|
||||
def is_cuda_out_of_memory(exception):
|
||||
return isinstance(exception, RuntimeError) \
|
||||
and len(exception.args) == 1 \
|
||||
and "CUDA out of memory." in exception.args[0]
|
||||
|
||||
|
||||
# based on https://github.com/BlackHC/toma/blob/master/toma/torch_cuda_memory.py
|
||||
def is_cudnn_snafu(exception):
|
||||
# For/because of https://github.com/pytorch/pytorch/issues/4107
|
||||
return isinstance(exception, RuntimeError) \
|
||||
and len(exception.args) == 1 \
|
||||
and "cuDNN error: CUDNN_STATUS_NOT_SUPPORTED." in exception.args[0]
|
||||
|
||||
|
||||
# based on https://github.com/BlackHC/toma/blob/master/toma/cpu_memory.py
|
||||
def is_out_of_cpu_memory(exception):
|
||||
return isinstance(exception, RuntimeError) \
|
||||
and len(exception.args) == 1 \
|
||||
and "DefaultCPUAllocator: can't allocate memory" in exception.args[0]
|
||||
|
||||
|
||||
# based on https://github.com/BlackHC/toma/blob/master/toma/torch_cuda_memory.py
|
||||
def garbage_collection_cuda():
|
||||
"""Garbage collection Torch (CUDA) memory."""
|
||||
gc.collect()
|
||||
|
|
Loading…
Reference in New Issue