[bug] Fix Pytorch profiler with emit_nvtx (#6260)

* resolve bug

* update changelog

* Update tests/trainer/test_trainer.py

* Update pytorch_lightning/profiler/profilers.py

Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>

* resolve comments

* resolve flake8

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
This commit is contained in:
thomas chaton 2021-03-05 20:12:03 +00:00 committed by GitHub
parent e84854264f
commit 2ec67a48b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 34 additions and 3 deletions

View File

@ -83,6 +83,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Fixed DP reduction with collection ([#6324](https://github.com/PyTorchLightning/pytorch-lightning/pull/6324))
- Fixed PyTorch Profiler with `emit_nvtx` ([#6260](https://github.com/PyTorchLightning/pytorch-lightning/pull/6260))
- Fixed `trainer.test` from `best_path` hangs after calling `trainer.fit` ([#6272](https://github.com/PyTorchLightning/pytorch-lightning/pull/6272))
## [1.2.2] - 2021-03-02
### Added

View File

@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Profiler to check if there are any bottlenecks in your code."""
import cProfile
import io
import logging

View File

@ -205,7 +205,7 @@ class PyTorchProfiler(BaseProfiler):
def _start(self, action_name: str) -> None:
if self.emit_nvtx:
self._create_profiler(action_name, torch.cuda.profiler.profile, enter=False)
self._parent_profiler = self._create_profiler(action_name, torch.cuda.profiler.profile, enter=True)
self._create_profiler(action_name, torch.autograd.profiler.emit_nvtx)
else:
self._create_profiler(action_name, torch.autograd.profiler.profile)
@ -215,8 +215,11 @@ class PyTorchProfiler(BaseProfiler):
profiler_args = {k: v for k, v in vars(self).items() if k in init_args}
pr = profiler(**profiler_args)
if enter:
pr = pr.__enter__()
out_pr = pr.__enter__()
if out_pr is not None:
pr = out_pr
self.profiler = pr
return self.profiler
def _stop(self, action_name: str) -> None:
if self.profiler is None:
@ -224,6 +227,12 @@ class PyTorchProfiler(BaseProfiler):
self.profiler.__exit__(exc_type=None, exc_val=None, exc_tb=None)
if isinstance(self.profiler, torch.autograd.profiler.emit_nvtx):
# when running ``emit_nvtx``, PyTorch requires 2 context manager.
# The parent_profiler is being closed too.
self._parent_profiler.__exit__(None, None, None)
return
function_events = self.profiler.function_events
self.profiler = None
for name in self.running_stack:

View File

@ -32,3 +32,4 @@ python ${DEFAULTS} tests/trainer/test_trainer.py::test_pytorch_profiler_trainer_
python ${DEFAULTS} tests/models/test_hooks.py::test_transfer_batch_hook_ddp
python ${DEFAULTS} tests/trainer/test_data_loading.py::test_replace_distrubuted_sampler_custom_dataloader_custom_batch_sampler
python ${DEFAULTS} tests/trainer/optimization/test_manual_optimization.py::test_step_with_optimizer_closure_with_different_frequencies_ddp_with_toggle_model
nvprof --profile-from-start off -o trace_name.prof -- python ${DEFAULTS} tests/trainer/test_trainer.py::test_pytorch_profiler_nested_emit_nvtx

View File

@ -1554,6 +1554,22 @@ def test_pytorch_profiler_nested(tmpdir):
assert pa[n] == expected_[n]
@RunIf(min_gpus=1, special=True)
def test_pytorch_profiler_nested_emit_nvtx(tmpdir):
"""
This test check emit_nvtx is correctly supported
"""
profiler = PyTorchProfiler(use_cuda=True, emit_nvtx=True)
model = BoringModel()
trainer = Trainer(
fast_dev_run=True,
profiler=profiler,
gpus=1,
)
trainer.fit(model)
@pytest.mark.parametrize(
["limit_train_batches", "global_step", "num_training_batches", "current_epoch", "should_train"],
[(0.2, 0, 0, 0, False), (0.5, 10, 2, 4, True)],