From 7fa1aebcc99297e4d7eb8dcf2deb22e6da814edf Mon Sep 17 00:00:00 2001
From: Danielle Pintz <38207072+daniellepintz@users.noreply.github.com>
Date: Tue, 4 Jan 2022 02:50:11 -0800
Subject: [PATCH] Remove `profile("training_step_and_backward")` (#11222)

---
 CHANGELOG.md                                  |  3 +++
 .../loops/optimization/optimizer_loop.py      | 26 ++++++-------------
 pytorch_lightning/profiler/__init__.py        |  6 ++---
 pytorch_lightning/profiler/pytorch.py         |  1 -
 pytorch_lightning/profiler/xla.py             |  3 +--
 tests/profiler/test_profiler.py               |  1 -
 6 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2649c878f9..96087fad69 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -363,6 +363,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Removed `Strategy.init_optimizers` in favor of `Strategy.setup_optimizers` ([#11236](https://github.com/PyTorchLightning/pytorch-lightning/pull/11236))
 
 
+- Removed `profile("training_step_and_backward")` in `Closure` class since we already profile calls `training_step` and `backward` ([#11222](https://github.com/PyTorchLightning/pytorch-lightning/pull/11222))
+
+
 - Removed `Strategy.optimizer_zero_grad` ([#11246](https://github.com/PyTorchLightning/pytorch-lightning/pull/11246))
 
 ### Fixed
diff --git a/pytorch_lightning/loops/optimization/optimizer_loop.py b/pytorch_lightning/loops/optimization/optimizer_loop.py
index 00e9b602c1..5a5625e285 100644
--- a/pytorch_lightning/loops/optimization/optimizer_loop.py
+++ b/pytorch_lightning/loops/optimization/optimizer_loop.py
@@ -28,7 +28,6 @@ from pytorch_lightning.loops.utilities import (
     _extract_hiddens,
     check_finite_loss,
 )
-from pytorch_lightning.profiler import BaseProfiler, PassThroughProfiler
 from pytorch_lightning.trainer.progress import OptimizationProgress
 from pytorch_lightning.utilities import _AcceleratorType, AMPType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
@@ -110,7 +109,6 @@ class Closure(AbstractClosure[ClosureResult]):
             Can be set to ``None`` to skip the backward operation.
         zero_grad_fn: A function that zeroes the gradients. Can be set to ``None`` to skip zero_grad, for example
             when accumulating gradients.
-        profiler: A profiler for profiling the actions of the passed in closure functions.
 
     Example:
 
@@ -126,28 +124,23 @@ class Closure(AbstractClosure[ClosureResult]):
         step_fn: Callable[[], ClosureResult],
         backward_fn: Optional[Callable[[Tensor], None]] = None,
         zero_grad_fn: Optional[Callable[[], None]] = None,
-        profiler: Optional[BaseProfiler] = None,
     ):
         super().__init__()
         self._step_fn = step_fn
         self._backward_fn = backward_fn
         self._zero_grad_fn = zero_grad_fn
-        self._profiler = PassThroughProfiler() if profiler is None else profiler
 
     def closure(self, *args: Any, **kwargs: Any) -> ClosureResult:
-        with self._profiler.profile("training_step_and_backward"):
-            step_output = self._step_fn()
+        step_output = self._step_fn()
 
-            if step_output.closure_loss is None:
-                self.warning_cache.warn(
-                    "`training_step` returned `None`. If this was on purpose, ignore this warning..."
-                )
+        if step_output.closure_loss is None:
+            self.warning_cache.warn("`training_step` returned `None`. If this was on purpose, ignore this warning...")
 
-            if self._zero_grad_fn is not None:
-                self._zero_grad_fn()
+        if self._zero_grad_fn is not None:
+            self._zero_grad_fn()
 
-            if self._backward_fn is not None and step_output.closure_loss is not None:
-                self._backward_fn(step_output.closure_loss)
+        if self._backward_fn is not None and step_output.closure_loss is not None:
+            self._backward_fn(step_output.closure_loss)
 
         return step_output
 
@@ -280,10 +273,7 @@ class OptimizerLoop(Loop[_OUTPUTS_TYPE]):
         step_fn = self._make_step_fn(split_batch, batch_idx, opt_idx)
         backward_fn = self._make_backward_fn(optimizer, opt_idx)
         zero_grad_fn = self._make_zero_grad_fn(batch_idx, opt_idx, optimizer)
-
-        return Closure(
-            step_fn=step_fn, backward_fn=backward_fn, zero_grad_fn=zero_grad_fn, profiler=self.trainer.profiler
-        )
+        return Closure(step_fn=step_fn, backward_fn=backward_fn, zero_grad_fn=zero_grad_fn)
 
     def _make_step_fn(self, split_batch: Any, batch_idx: int, opt_idx: int) -> Callable[[], ClosureResult]:
         """Build the step function that runs the `training_step` and processes its output."""
diff --git a/pytorch_lightning/profiler/__init__.py b/pytorch_lightning/profiler/__init__.py
index 58cee0c1d8..e67295c12b 100644
--- a/pytorch_lightning/profiler/__init__.py
+++ b/pytorch_lightning/profiler/__init__.py
@@ -141,9 +141,9 @@ output in your terminal. If no filename is given, it will be logged only on rank
 
 The profiler's results will be printed on the completion of ``{fit,validate,test,predict}``.
 
-This profiler will record ``training_step_and_backward``, ``training_step``, ``backward``,
+This profiler will record ``training_step``, ``backward``,
 ``validation_step``, ``test_step``, and ``predict_step`` by default.
-The output below shows the profiling for the action ``training_step_and_backward``.
+The output below shows the profiling for the action ``training_step``.
 The user can provide ``PyTorchProfiler(record_functions={...})`` to extend the scope of profiled functions.
 
 .. note::
@@ -156,7 +156,7 @@ The user can provide ``PyTorchProfiler(record_functions={...})`` to extend the s
 
     Profiler Report
 
-    Profile stats for: training_step_and_backward
+    Profile stats for: training_step
     ---------------------  ---------------  ---------------  ---------------  ---------------  ---------------
     Name                   Self CPU total %  Self CPU total   CPU total %      CPU total        CPU time avg
     ---------------------  ---------------  ---------------  ---------------  ---------------  ---------------
diff --git a/pytorch_lightning/profiler/pytorch.py b/pytorch_lightning/profiler/pytorch.py
index 042a70966a..8c542b8876 100644
--- a/pytorch_lightning/profiler/pytorch.py
+++ b/pytorch_lightning/profiler/pytorch.py
@@ -195,7 +195,6 @@ class ScheduleWrapper:
 class PyTorchProfiler(BaseProfiler):
 
     RECORD_FUNCTIONS = {
-        "training_step_and_backward",
         "training_step",
         "backward",
         "validation_step",
diff --git a/pytorch_lightning/profiler/xla.py b/pytorch_lightning/profiler/xla.py
index c89685bcad..be158f7be4 100644
--- a/pytorch_lightning/profiler/xla.py
+++ b/pytorch_lightning/profiler/xla.py
@@ -53,9 +53,8 @@ log = logging.getLogger(__name__)
 
 class XLAProfiler(BaseProfiler):
 
-    STEP_FUNCTIONS = {"training_step_and_backward", "validation_step", "test_step", "predict_step"}
+    STEP_FUNCTIONS = {"validation_step", "test_step", "predict_step"}
     RECORD_FUNCTIONS = {
-        "training_step_and_backward",
         "training_step",
         "backward",
         "validation_step",
diff --git a/tests/profiler/test_profiler.py b/tests/profiler/test_profiler.py
index 75d4fb47b4..e047e55fb2 100644
--- a/tests/profiler/test_profiler.py
+++ b/tests/profiler/test_profiler.py
@@ -313,7 +313,6 @@ def test_pytorch_profiler_trainer_ddp(tmpdir, pytorch_profiler):
     expected = {"[Strategy]DDPStrategy.validation_step"}
     if not _KINETO_AVAILABLE:
         expected |= {
-            "training_step_and_backward",
             "[Strategy]DDPStrategy.training_step",
             "[Strategy]DDPStrategy.backward",
         }