Avoid changing the current `cudnn.benchmark` value (#13154)

2022-05-31 21:22:19 +02:00 · 2022-05-31 21:22:19 +02:00 · 2acff1ce76
parent 3c06cd834d
commit 2acff1ce76
5 changed files with 50 additions and 28 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -10,6 +10,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 ### Added

 - Added all DDP params to be exposed through hpu parallel strategy ([#13067](https://github.com/PyTorchLightning/pytorch-lightning/pull/13067))
+
+### Changed
+
+- Keep `torch.backends.cudnn.benchmark=False` by default (unlike in v1.6.{0-4}) after speed and memory problems depending on the data used. Please consider tuning `Trainer(benchmark)` manually. ([#13154](https://github.com/PyTorchLightning/pytorch-lightning/pull/13154))
+- Prevent modification of `torch.backends.cudnn.benchmark` when `Trainer(benchmark=...)` is not set ([#13154](https://github.com/PyTorchLightning/pytorch-lightning/pull/13154))
+
 ### Fixed

 - Fixed an issue causing zero-division error for empty dataloaders ([#12885](https://github.com/PyTorchLightning/pytorch-lightning/pull/12885))
--- a/docs/source/common/trainer.rst
+++ b/docs/source/common/trainer.rst
@ -437,21 +437,24 @@ benchmark

 |

-Defaults to ``True`` if :paramref:`~pytorch_lightning.trainer.Trainer.deterministic` is not set.
-This flag sets the ``torch.backends.cudnn.deterministic`` flag. You can read more about its impact
+The value (``True`` or ``False``) to set ``torch.backends.cudnn.benchmark`` to. The value for
+``torch.backends.cudnn.benchmark`` set in the current session will be used (``False`` if not manually set).
+If :paramref:`~pytorch_lightning.trainer.Trainer.deterministic` is set to ``True``, this will default to ``False``.
+You can read more about the interaction of ``torch.backends.cudnn.benchmark`` and ``torch.backends.cudnn.deterministic``
 `here <https://pytorch.org/docs/stable/notes/randomness.html#cuda-convolution-benchmarking>`__

-This is likely to increase the speed of your system if your input sizes don't change. However, if they do, then it
-might make your system slower. The CUDNN auto-tuner will try to find the best algorithm for the hardware when a new
-input size is encountered. Read more about it `here <https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936>`__.
+Setting this flag to ``True`` can increase the speed of your system if your input sizes don't
+change. However, if they do, then it might make your system slower. The CUDNN auto-tuner will try to find the best
+algorithm for the hardware when a new input size is encountered. This might also increase the memory usage.
+Read more about it `here <https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936>`__.

 Example::

-    # defaults to True if not deterministic (which is False by default)
-    trainer = Trainer()
+    # Will use whatever the current value for torch.backends.cudnn.benchmark, normally False
+    trainer = Trainer(benchmark=None)  # default

    # you can overwrite the value
-    trainer = Trainer(benchmark=False)
+    trainer = Trainer(benchmark=True)

 deterministic
 ^^^^^^^^^^^^^
--- a/pytorch_lightning/trainer/connectors/accelerator_connector.py
+++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py
@ -143,14 +143,19 @@ class AcceleratorConnector:
            B. Strategy > Accelerator/precision/plugins
            C. TODO When multiple flag set to the same thing
        """
-        if benchmark and deterministic:
-            rank_zero_warn(
-                "You passed `deterministic=True` and `benchmark=True`. Note that PyTorch ignores"
-                " torch.backends.cudnn.deterministic=True when torch.backends.cudnn.benchmark=True.",
-            )
-        self.benchmark = not deterministic if benchmark is None else benchmark
+        if deterministic:
+            if benchmark is None:
+                # Set benchmark to False to ensure determinism
+                benchmark = False
+            elif benchmark:
+                rank_zero_warn(
+                    "You passed `deterministic=True` and `benchmark=True`. Note that PyTorch ignores"
+                    " torch.backends.cudnn.deterministic=True when torch.backends.cudnn.benchmark=True.",
+                )
        # TODO: move to gpu accelerator
-        torch.backends.cudnn.benchmark = self.benchmark
+        if benchmark is not None:
+            torch.backends.cudnn.benchmark = benchmark
+        self.benchmark = torch.backends.cudnn.benchmark
        self.replace_sampler_ddp = replace_sampler_ddp
        self._init_deterministic(deterministic)

@ -211,10 +216,10 @@ class AcceleratorConnector:
        # 6. Instantiate Strategy - Part 2
        self._lazy_init_strategy()

-    def _init_deterministic(self, deterministic: bool) -> None:
-        self.deterministic = deterministic
-        torch.use_deterministic_algorithms(deterministic)
-        if deterministic:
+    def _init_deterministic(self, deterministic: Optional[bool]) -> None:
+        self.deterministic = deterministic or False  # default to False if not set
+        torch.use_deterministic_algorithms(self.deterministic)
+        if self.deterministic:
            # fixing non-deterministic part of horovod
            # https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383
            os.environ["HOROVOD_FUSION_THRESHOLD"] = "0"
--- a/pytorch_lightning/trainer/trainer.py
+++ b/pytorch_lightning/trainer/trainer.py
@ -174,7 +174,7 @@ class Trainer(
        resume_from_checkpoint: Optional[Union[Path, str]] = None,
        profiler: Optional[Union[Profiler, str]] = None,
        benchmark: Optional[bool] = None,
-        deterministic: bool = False,
+        deterministic: Optional[bool] = None,
        reload_dataloaders_every_n_epochs: int = 0,
        auto_lr_find: Union[bool, str] = False,
        replace_sampler_ddp: bool = True,
@ -229,9 +229,11 @@ class Trainer(
                that only one process at a time can access them.
                Default: ``False``.

-            benchmark: Sets ``torch.backends.cudnn.benchmark``.
-                Defaults to ``True`` if :paramref:`~pytorch_lightning.trainer.trainer.Trainer.deterministic`
-                is ``False``. Overwrite to manually set a different value. Default: ``None``.
+            benchmark: The value (``True`` or ``False``) to set ``torch.backends.cudnn.benchmark`` to.
+                The value for ``torch.backends.cudnn.benchmark`` set in the current session will be used
+                (``False`` if not manually set). If :paramref:`~pytorch_lightning.trainer.Trainer.deterministic` is set
+                to ``True``, this will default to ``False``. Override to manually set a different value.
+                Default: ``None``.

            callbacks: Add a callback or list of callbacks.
                Default: ``None``.
@ -260,7 +262,8 @@ class Trainer(
                Default: ``False``.

            deterministic: If ``True``, sets whether PyTorch operations must use deterministic algorithms.
-                Default: ``False``.
+                If not set, defaults to ``False``.
+                Default: ``None``.

            devices: Will be mapped to either `gpus`, `tpu_cores`, `num_processes` or `ipus`,
                based on the accelerator type.
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@ -637,27 +637,32 @@ def test_trainer_max_steps_accumulate_batches(tmpdir):
    assert trainer.global_step == trainer.max_steps, "Model did not stop at max_steps"


+@pytest.mark.parametrize("cudnn_benchmark", (False, True))
@pytest.mark.parametrize(
    ["benchmark_", "deterministic", "expected"],
    [
-        (None, False, True),
+        (None, False, None),
        (None, True, False),
+        (None, None, None),
        (True, False, True),
        (True, True, True),
-        (False, True, False),
+        (True, None, True),
        (False, False, False),
+        (False, True, False),
+        (False, None, False),
    ],
 )
-def test_benchmark_option(benchmark_, deterministic, expected):
+def test_benchmark_option(cudnn_benchmark, benchmark_, deterministic, expected):
    """Verify benchmark option."""
-
    original_val = torch.backends.cudnn.benchmark

+    torch.backends.cudnn.benchmark = cudnn_benchmark
    if benchmark_ and deterministic:
        with pytest.warns(UserWarning, match="You passed `deterministic=True` and `benchmark=True`"):
            trainer = Trainer(benchmark=benchmark_, deterministic=deterministic)
    else:
        trainer = Trainer(benchmark=benchmark_, deterministic=deterministic)
+    expected = cudnn_benchmark if expected is None else expected
    assert torch.backends.cudnn.benchmark == expected
    assert trainer._accelerator_connector.benchmark == expected