Remove `Trainer(move_metrics_to_cpu=True)` (#16358)

2023-01-16 16:32:29 +01:00 · 2023-01-16 16:32:29 +01:00 · caa82d8600
parent 14933592f4
commit caa82d8600
9 changed files with 14 additions and 49 deletions
--- a/src/pytorch_lightning/CHANGELOG.md
+++ b/src/pytorch_lightning/CHANGELOG.md
@ -68,7 +68,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
  * Removed the `FitLoop.split_idx` property
  * Removed the `LoggerConnector.on_train_split_start` method

+- Removed the `Trainer(move_metrics_to_cpu=True)` argument ([#16358](https://github.com/Lightning-AI/lightning/pull/16358))
+
 - Removed the `LightningModule.precision` attribute ([#16203](https://github.com/Lightning-AI/lightning/pull/16203))
+
 - Removed the automatic addition of a moving average of the `training_step` loss in the progress bar. Use `self.log("loss", ..., prog_bar=True)` instead. ([#16192](https://github.com/Lightning-AI/lightning/issues/16192))


--- a/src/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py
+++ b/src/pytorch_lightning/loops/epoch/evaluation_epoch_loop.py
@ -154,11 +154,6 @@ class EvaluationEpochLoop(Loop):
        if self._should_track_batch_outputs_for_epoch_end() and output is not None:
            self._outputs.append(output)

-        if self.trainer.move_metrics_to_cpu:
-            # the evaluation step output is not moved as they are not considered "metrics"
-            assert self.trainer._results is not None
-            self.trainer._results.cpu()
-
        if not self.batch_progress.is_last_batch:
            # if fault tolerant is enabled and process has been notified, exit.
            self.trainer._exit_gracefully_on_signal()
--- a/src/pytorch_lightning/loops/optimization/manual_loop.py
+++ b/src/pytorch_lightning/loops/optimization/manual_loop.py
@ -116,12 +116,6 @@ class ManualOptimization(Loop[_OUTPUTS_TYPE]):

        result = self.output_result_cls.from_training_step_output(training_step_output)

-        if self.trainer.move_metrics_to_cpu:
-            # training step output does not get moved because it is not considered a "metric"
-            # the user might need them on the correct device for an operation in `training_epoch_end`
-            assert self.trainer._results is not None
-            self.trainer._results.cpu()
-
        self._done = True
        self._output = result.asdict()

--- a/src/pytorch_lightning/loops/optimization/optimizer_loop.py
+++ b/src/pytorch_lightning/loops/optimization/optimizer_loop.py
@ -397,11 +397,6 @@ class OptimizerLoop(Loop[_OUTPUTS_TYPE]):
            training_step_output, self.trainer.accumulate_grad_batches
        )

-        if self.trainer.move_metrics_to_cpu:
-            # training step output does not get moved because it is not considered a "metric"
-            assert self.trainer._results is not None
-            self.trainer._results.cpu()
-
        return result

    def _build_kwargs(self, kwargs: OrderedDict, opt_idx: int) -> OrderedDict:
--- a/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
+++ b/src/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py
@ -42,11 +42,9 @@ class LoggerConnector:
        self,
        logger: Union[bool, Logger, Iterable[Logger]],
        log_every_n_steps: int,
-        move_metrics_to_cpu: bool,
    ) -> None:
        self.configure_logger(logger)
        self.trainer.log_every_n_steps = log_every_n_steps
-        self.trainer.move_metrics_to_cpu = move_metrics_to_cpu

    @property
    def should_update_logs(self) -> bool:
--- a/src/pytorch_lightning/trainer/trainer.py
+++ b/src/pytorch_lightning/trainer/trainer.py
@ -157,7 +157,6 @@ class Trainer:
        detect_anomaly: bool = False,
        auto_scale_batch_size: Union[str, bool] = False,
        plugins: Optional[Union[PLUGIN_INPUT, List[PLUGIN_INPUT]]] = None,
-        move_metrics_to_cpu: bool = False,
        multiple_trainloader_mode: str = "max_size_cycle",
        inference_mode: bool = True,
    ) -> None:
@ -334,10 +333,6 @@ class Trainer:
            enable_model_summary: Whether to enable model summarization by default.
                Default: ``True``.

-            move_metrics_to_cpu: Whether to force internal logged metrics to be moved to cpu.
-                This can save some gpu memory, but can make training slower. Use with attention.
-                Default: ``False``.
-
            multiple_trainloader_mode: How to loop over the datasets when there are multiple train loaders.
                In 'max_size_cycle' mode, the trainer ends one epoch when the largest dataset is traversed,
                and smaller datasets reload when running out of their data. In 'min_size' mode, all the datasets
@ -451,7 +446,7 @@ class Trainer:

        # init logger flags
        self._loggers: List[Logger]
-        self._logger_connector.on_trainer_init(logger, log_every_n_steps, move_metrics_to_cpu)
+        self._logger_connector.on_trainer_init(logger, log_every_n_steps)

        # init debugging flags
        self.val_check_batch: Union[int, float]
--- a/tests/tests_pytorch/loops/test_evaluation_loop.py
+++ b/tests/tests_pytorch/loops/test_evaluation_loop.py
@ -177,7 +177,6 @@ def test_memory_consumption_validation(tmpdir):
        devices=1,
        default_root_dir=tmpdir,
        fast_dev_run=2,
-        move_metrics_to_cpu=True,
        enable_model_summary=False,
    )
    trainer.fit(BoringLargeBatchModel())
--- a/tests/tests_pytorch/trainer/logging_/test_eval_loop_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_eval_loop_logging.py
@ -677,12 +677,16 @@ def test_multiple_dataloaders_reset(val_check_interval, tmpdir):
@pytest.mark.parametrize(
    "accelerator",
    [
-        pytest.param("gpu", marks=RunIf(min_cuda_gpus=1)),
+        pytest.param("cuda", marks=RunIf(min_cuda_gpus=1)),
        pytest.param("mps", marks=RunIf(mps=True)),
    ],
 )
-def test_evaluation_move_metrics_to_cpu_and_outputs(tmpdir, accelerator):
+def test_metrics_and_outputs_device(tmpdir, accelerator):
    class TestModel(BoringModel):
+        def on_before_backward(self, loss: Tensor) -> None:
+            # the loss should be on the correct device before backward
+            assert loss.device.type == accelerator
+
        def validation_step(self, *args):
            x = torch.tensor(2.0, requires_grad=True, device=self.device)
            y = x * 2
@ -695,13 +699,12 @@ def test_evaluation_move_metrics_to_cpu_and_outputs(tmpdir, accelerator):
        def validation_epoch_end(self, outputs):
            # the step outputs were not moved
            assert all(o.device == self.device for o in outputs)
-            # but the logging results were
-            assert self.trainer.callback_metrics["foo"].device.type == "cpu"
+            # and the logged metrics aren't
+            assert self.trainer.callback_metrics["foo"].device.type == accelerator

    model = TestModel()
-    trainer = Trainer(
-        default_root_dir=tmpdir, limit_val_batches=2, move_metrics_to_cpu=True, accelerator=accelerator, devices=1
-    )
+    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, accelerator=accelerator, devices=1)
+    trainer.fit(model)
    trainer.validate(model, verbose=False)


--- a/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py
+++ b/tests/tests_pytorch/trainer/logging_/test_train_loop_logging.py
@ -718,23 +718,6 @@ def test_sanity_metrics_are_reset(tmpdir):
    assert "val_loss" not in trainer.progress_bar_metrics


-@RunIf(min_cuda_gpus=1)
-def test_move_metrics_to_cpu(tmpdir):
-    class TestModel(BoringModel):
-        def on_before_backward(self, loss: Tensor) -> None:
-            assert loss.device.type == "cuda"
-
-    trainer = Trainer(
-        default_root_dir=tmpdir,
-        fast_dev_run=True,
-        precision=16,
-        move_metrics_to_cpu=True,
-        accelerator="gpu",
-        devices=1,
-    )
-    trainer.fit(TestModel())
-
-
 def test_on_epoch_logging_with_sum_and_on_batch_start(tmpdir):
    class TestModel(BoringModel):
        def on_train_epoch_end(self):