diff --git a/CHANGELOG.md b/CHANGELOG.md index 517b6d4709..e90168fe17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Added - Added all DDP params to be exposed through hpu parallel strategy ([#13067](https://github.com/PyTorchLightning/pytorch-lightning/pull/13067)) + +### Changed + +- Keep `torch.backends.cudnn.benchmark=False` by default (unlike in v1.6.{0-4}) after speed and memory problems depending on the data used. Please consider tuning `Trainer(benchmark)` manually. ([#13154](https://github.com/PyTorchLightning/pytorch-lightning/pull/13154)) +- Prevent modification of `torch.backends.cudnn.benchmark` when `Trainer(benchmark=...)` is not set ([#13154](https://github.com/PyTorchLightning/pytorch-lightning/pull/13154)) + ### Fixed - Fixed an issue causing zero-division error for empty dataloaders ([#12885](https://github.com/PyTorchLightning/pytorch-lightning/pull/12885)) diff --git a/docs/source/common/trainer.rst b/docs/source/common/trainer.rst index 56f8cb07ee..c46a172e8b 100644 --- a/docs/source/common/trainer.rst +++ b/docs/source/common/trainer.rst @@ -437,21 +437,24 @@ benchmark | -Defaults to ``True`` if :paramref:`~pytorch_lightning.trainer.Trainer.deterministic` is not set. -This flag sets the ``torch.backends.cudnn.deterministic`` flag. You can read more about its impact +The value (``True`` or ``False``) to set ``torch.backends.cudnn.benchmark`` to. The value for +``torch.backends.cudnn.benchmark`` set in the current session will be used (``False`` if not manually set). +If :paramref:`~pytorch_lightning.trainer.Trainer.deterministic` is set to ``True``, this will default to ``False``. +You can read more about the interaction of ``torch.backends.cudnn.benchmark`` and ``torch.backends.cudnn.deterministic`` `here `__ -This is likely to increase the speed of your system if your input sizes don't change. However, if they do, then it -might make your system slower. The CUDNN auto-tuner will try to find the best algorithm for the hardware when a new -input size is encountered. Read more about it `here `__. +Setting this flag to ``True`` can increase the speed of your system if your input sizes don't +change. However, if they do, then it might make your system slower. The CUDNN auto-tuner will try to find the best +algorithm for the hardware when a new input size is encountered. This might also increase the memory usage. +Read more about it `here `__. Example:: - # defaults to True if not deterministic (which is False by default) - trainer = Trainer() + # Will use whatever the current value for torch.backends.cudnn.benchmark, normally False + trainer = Trainer(benchmark=None) # default # you can overwrite the value - trainer = Trainer(benchmark=False) + trainer = Trainer(benchmark=True) deterministic ^^^^^^^^^^^^^ diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index ff4cfc4980..d242236e83 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -143,14 +143,19 @@ class AcceleratorConnector: B. Strategy > Accelerator/precision/plugins C. TODO When multiple flag set to the same thing """ - if benchmark and deterministic: - rank_zero_warn( - "You passed `deterministic=True` and `benchmark=True`. Note that PyTorch ignores" - " torch.backends.cudnn.deterministic=True when torch.backends.cudnn.benchmark=True.", - ) - self.benchmark = not deterministic if benchmark is None else benchmark + if deterministic: + if benchmark is None: + # Set benchmark to False to ensure determinism + benchmark = False + elif benchmark: + rank_zero_warn( + "You passed `deterministic=True` and `benchmark=True`. Note that PyTorch ignores" + " torch.backends.cudnn.deterministic=True when torch.backends.cudnn.benchmark=True.", + ) # TODO: move to gpu accelerator - torch.backends.cudnn.benchmark = self.benchmark + if benchmark is not None: + torch.backends.cudnn.benchmark = benchmark + self.benchmark = torch.backends.cudnn.benchmark self.replace_sampler_ddp = replace_sampler_ddp self._init_deterministic(deterministic) @@ -211,10 +216,10 @@ class AcceleratorConnector: # 6. Instantiate Strategy - Part 2 self._lazy_init_strategy() - def _init_deterministic(self, deterministic: bool) -> None: - self.deterministic = deterministic - torch.use_deterministic_algorithms(deterministic) - if deterministic: + def _init_deterministic(self, deterministic: Optional[bool]) -> None: + self.deterministic = deterministic or False # default to False if not set + torch.use_deterministic_algorithms(self.deterministic) + if self.deterministic: # fixing non-deterministic part of horovod # https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383 os.environ["HOROVOD_FUSION_THRESHOLD"] = "0" diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 4d8402147d..2fd7806ae3 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -174,7 +174,7 @@ class Trainer( resume_from_checkpoint: Optional[Union[Path, str]] = None, profiler: Optional[Union[Profiler, str]] = None, benchmark: Optional[bool] = None, - deterministic: bool = False, + deterministic: Optional[bool] = None, reload_dataloaders_every_n_epochs: int = 0, auto_lr_find: Union[bool, str] = False, replace_sampler_ddp: bool = True, @@ -229,9 +229,11 @@ class Trainer( that only one process at a time can access them. Default: ``False``. - benchmark: Sets ``torch.backends.cudnn.benchmark``. - Defaults to ``True`` if :paramref:`~pytorch_lightning.trainer.trainer.Trainer.deterministic` - is ``False``. Overwrite to manually set a different value. Default: ``None``. + benchmark: The value (``True`` or ``False``) to set ``torch.backends.cudnn.benchmark`` to. + The value for ``torch.backends.cudnn.benchmark`` set in the current session will be used + (``False`` if not manually set). If :paramref:`~pytorch_lightning.trainer.Trainer.deterministic` is set + to ``True``, this will default to ``False``. Override to manually set a different value. + Default: ``None``. callbacks: Add a callback or list of callbacks. Default: ``None``. @@ -260,7 +262,8 @@ class Trainer( Default: ``False``. deterministic: If ``True``, sets whether PyTorch operations must use deterministic algorithms. - Default: ``False``. + If not set, defaults to ``False``. + Default: ``None``. devices: Will be mapped to either `gpus`, `tpu_cores`, `num_processes` or `ipus`, based on the accelerator type. diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 8124baee9e..c2ac5b2599 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -637,27 +637,32 @@ def test_trainer_max_steps_accumulate_batches(tmpdir): assert trainer.global_step == trainer.max_steps, "Model did not stop at max_steps" +@pytest.mark.parametrize("cudnn_benchmark", (False, True)) @pytest.mark.parametrize( ["benchmark_", "deterministic", "expected"], [ - (None, False, True), + (None, False, None), (None, True, False), + (None, None, None), (True, False, True), (True, True, True), - (False, True, False), + (True, None, True), (False, False, False), + (False, True, False), + (False, None, False), ], ) -def test_benchmark_option(benchmark_, deterministic, expected): +def test_benchmark_option(cudnn_benchmark, benchmark_, deterministic, expected): """Verify benchmark option.""" - original_val = torch.backends.cudnn.benchmark + torch.backends.cudnn.benchmark = cudnn_benchmark if benchmark_ and deterministic: with pytest.warns(UserWarning, match="You passed `deterministic=True` and `benchmark=True`"): trainer = Trainer(benchmark=benchmark_, deterministic=deterministic) else: trainer = Trainer(benchmark=benchmark_, deterministic=deterministic) + expected = cudnn_benchmark if expected is None else expected assert torch.backends.cudnn.benchmark == expected assert trainer._accelerator_connector.benchmark == expected