Sort simple profiler summary based on mean duration (#11671)

This commit is contained in:
Rohit Gupta 2022-02-03 02:14:42 +05:30 committed by GitHub
parent 76175217e4
commit 3eee8f18cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 56 additions and 41 deletions

View File

@ -222,6 +222,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Changed `MisconfigurationException` to `ModuleNotFoundError` when `rich` isn't available ([#11360](https://github.com/PyTorchLightning/pytorch-lightning/pull/11360))
- Sorted `SimpleProfiler(extended=False)` summary based on mean duration for each hook ([#11671](https://github.com/PyTorchLightning/pytorch-lightning/pull/11671))
### Deprecated
- Deprecated `ClusterEnvironment.master_{address,port}` in favor of `ClusterEnvironment.main_{address,port}` ([#10103](https://github.com/PyTorchLightning/pytorch-lightning/pull/10103))

View File

@ -63,23 +63,21 @@ The profiler's results will be printed at the completion of a training ``trainer
-----------------------------------------------------------------------------------------------
| Action | Mean duration (s) | Total time (s) |
-----------------------------------------------------------------------------------------------
| [LightningModule]BoringModel.prepare_data | 2.112e-06 | 2.112e-06 |
| [LightningModule]BoringModel.setup | 1.513e-06 | 1.513e-06 |
| [LightningModule]BoringModel.on_fit_start | 1.542e-06 | 1.542e-06 |
| [LightningModule]BoringModel.on_epoch_start | 1.051e-06 | 1.051e-06 |
| fetch_next_train_batch | 0.0003097 | 0.00061939 |
| get_train_batch | 0.0003287 | 0.00065739 |
| on_train_batch_start | 7.578e-05 | 7.578e-05 |
| [LightningModule]BoringModel.training_step_end | 1.556e-06 | 1.556e-06 |
| model_forward | 0.00028337 | 0.00028337 |
| [LightningModule]BoringModel.optimizer_step | 0.0011853 | 0.0011853 |
| run_training_batch | 0.0016311 | 0.0016311 |
| on_train_batch_end | 7.6117e-05 | 7.6117e-05 |
| run_training_epoch | 0.0036915 | 0.0036915 |
| [LightningModule]BoringModel.on_epoch_end | 1.079e-06 | 1.079e-06 |
| [LightningModule]BoringModel.on_train_end | 1.509e-06 | 1.509e-06 |
| [LightningModule]BoringModel.on_fit_end | 3.127e-06 | 3.127e-06 |
| [LightningModule]BoringModel.teardown | 2.581e-06 | 2.581e-06 |
| run_training_epoch | 6.1558 | 6.1558 |
| run_training_batch | 0.0022506 | 0.015754 |
| [LightningModule]BoringModel.optimizer_step | 0.0017477 | 0.012234 |
| model_forward | 0.00055868 | 0.0039108 |
| [LightningModule]BoringModel.val_dataloader | 0.00024388 | 0.00024388 |
| on_train_batch_start | 0.00014637 | 0.0010246 |
| [LightningModule]BoringModel.teardown | 2.15e-06 | 2.15e-06 |
| [LightningModule]BoringModel.prepare_data | 1.955e-06 | 1.955e-06 |
| [LightningModule]BoringModel.on_epoch_end | 1.8373e-06 | 5.512e-06 |
| [LightningModule]BoringModel.on_train_start | 1.644e-06 | 1.644e-06 |
| [LightningModule]BoringModel.on_train_end | 1.516e-06 | 1.516e-06 |
| [LightningModule]BoringModel.on_fit_end | 1.426e-06 | 1.426e-06 |
| [LightningModule]BoringModel.setup | 1.403e-06 | 1.403e-06 |
| [LightningModule]BoringModel.on_epoch_start | 1.2883e-06 | 3.865e-06 |
| [LightningModule]BoringModel.on_fit_start | 1.226e-06 | 1.226e-06 |
-----------------------------------------------------------------------------------------------
.. note:: Note that there are a lot more actions that will be present in the final report along with percentage and call count for each action.

View File

@ -17,7 +17,7 @@ import os
import time
from collections import defaultdict
from pathlib import Path
from typing import Dict, Optional, Tuple, Union
from typing import Dict, List, Optional, Tuple, Union
import numpy as np
@ -25,6 +25,11 @@ from pytorch_lightning.profiler.base import BaseProfiler
log = logging.getLogger(__name__)
_TABLE_ROW_EXTENDED = Tuple[str, float, int, float, float]
_TABLE_DATA_EXTENDED = List[_TABLE_ROW_EXTENDED]
_TABLE_ROW = Tuple[str, float, float]
_TABLE_DATA = List[_TABLE_ROW]
class SimpleProfiler(BaseProfiler):
"""This profiler simply records the duration of actions (in seconds) and reports the mean duration of each
@ -72,13 +77,21 @@ class SimpleProfiler(BaseProfiler):
duration = end_time - start_time
self.recorded_durations[action_name].append(duration)
def _make_report(self) -> Tuple[list, float]:
def _make_report_extended(self) -> Tuple[_TABLE_DATA_EXTENDED, float, float]:
total_duration = time.monotonic() - self.start_time
report = [[a, d, 100.0 * np.sum(d) / total_duration] for a, d in self.recorded_durations.items()]
report.sort(key=lambda x: x[2], reverse=True)
total_calls = sum(len(x[1]) for x in report)
report = [
(a, np.mean(d), len(d), np.sum(d), 100.0 * np.sum(d) / total_duration)
for a, d in self.recorded_durations.items()
]
report.sort(key=lambda x: x[4], reverse=True)
total_calls = sum(x[2] for x in report)
return report, total_calls, total_duration
def _make_report(self) -> _TABLE_DATA:
report = [(action, np.mean(d), np.sum(d)) for action, d in self.recorded_durations.items()]
report.sort(key=lambda x: x[1], reverse=True)
return report
def summary(self) -> str:
sep = os.linesep
output_string = ""
@ -100,15 +113,15 @@ class SimpleProfiler(BaseProfiler):
output_string_len = len(header_string.expandtabs())
sep_lines = f"{sep}{'-' * output_string_len}"
output_string += sep_lines + header_string + sep_lines
report, total_calls, total_duration = self._make_report()
report, total_calls, total_duration = self._make_report_extended()
output_string += log_row("Total", "-", f"{total_calls:}", f"{total_duration:.5}", "100 %")
output_string += sep_lines
for action, durations, duration_per in report:
for action, mean_duration, num_calls, total_duration, duration_per in report:
output_string += log_row(
action,
f"{np.mean(durations):.5}",
f"{len(durations):}",
f"{np.sum(durations):.5}",
f"{mean_duration:.5}",
f"{num_calls}",
f"{total_duration:.5}",
f"{duration_per:.5}",
)
output_string += sep_lines
@ -122,9 +135,10 @@ class SimpleProfiler(BaseProfiler):
output_string_len = len(header_string.expandtabs())
sep_lines = f"{sep}{'-' * output_string_len}"
output_string += sep_lines + header_string + sep_lines
report = self._make_report()
for action, durations in self.recorded_durations.items():
output_string += log_row(action, f"{np.mean(durations):.5}", f"{np.sum(durations):.5}")
for action, mean_duration, total_duration in report:
output_string += log_row(action, f"{mean_duration:.5}", f"{total_duration:.5}")
output_string += sep_lines
output_string += sep
return output_string

View File

@ -218,11 +218,11 @@ def test_simple_profiler_summary(tmpdir, extended):
sep = os.linesep
max_action_len = len("on_before_batch_transfer")
for hook in hooks:
for i, hook in enumerate(hooks):
with profiler.profile(hook):
pass
profiler.recorded_durations[hook] = [sometime]
profiler.recorded_durations[hook] = [sometime + i]
if extended:
header_string = (
@ -238,12 +238,12 @@ def test_simple_profiler_summary(tmpdir, extended):
f"{sep_lines}"
f"{sep}| Total | - | 6 | 7.0 | 100 % |" # noqa: E501
f"{sep_lines}"
f"{sep}| on_fit_start | 5.7734 | 1 | 5.7734 | 82.478 |" # noqa: E501
f"{sep}| on_before_batch_transfer | 4.7734 | 1 | 4.7734 | 68.192 |" # noqa: E501
f"{sep}| on_train_epoch_end | 3.7734 | 1 | 3.7734 | 53.906 |" # noqa: E501
f"{sep}| on_train_epoch_start | 2.7734 | 1 | 2.7734 | 39.62 |" # noqa: E501
f"{sep}| on_train_end | 1.7734 | 1 | 1.7734 | 25.335 |" # noqa: E501
f"{sep}| on_train_start | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501
f"{sep}| on_train_end | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501
f"{sep}| on_train_epoch_start | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501
f"{sep}| on_train_epoch_end | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501
f"{sep}| on_before_batch_transfer | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501
f"{sep}| on_fit_start | 0.77343 | 1 | 0.77343 | 11.049 |" # noqa: E501
f"{sep_lines}{sep}"
)
else:
@ -257,12 +257,12 @@ def test_simple_profiler_summary(tmpdir, extended):
f"{sep_lines}"
f"{sep}| Action | Mean duration (s) | Total time (s) |"
f"{sep_lines}"
f"{sep}| on_fit_start | 5.7734 | 5.7734 |"
f"{sep}| on_before_batch_transfer | 4.7734 | 4.7734 |"
f"{sep}| on_train_epoch_end | 3.7734 | 3.7734 |"
f"{sep}| on_train_epoch_start | 2.7734 | 2.7734 |"
f"{sep}| on_train_end | 1.7734 | 1.7734 |"
f"{sep}| on_train_start | 0.77343 | 0.77343 |"
f"{sep}| on_train_end | 0.77343 | 0.77343 |"
f"{sep}| on_train_epoch_start | 0.77343 | 0.77343 |"
f"{sep}| on_train_epoch_end | 0.77343 | 0.77343 |"
f"{sep}| on_before_batch_transfer | 0.77343 | 0.77343 |"
f"{sep}| on_fit_start | 0.77343 | 0.77343 |"
f"{sep_lines}{sep}"
)