Fix rich progress bar metric render on epoch end (#11689)

Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com>
Co-authored-by: Jirka <jirka.borovec@seznam.cz>
This commit is contained in:
Rohit Gupta 2022-02-03 07:13:48 +05:30 committed by GitHub
parent ec1379da2c
commit ee049e123d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 79 additions and 0 deletions

View File

@ -475,6 +475,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Disbled sampler replacement when using `IterableDataset` ([#11507](https://github.com/PyTorchLightning/pytorch-lightning/pull/11507))
- The Rich progress bar now correctly shows the `on_epoch` logged values on train epoch end ([#11689](https://github.com/PyTorchLightning/pytorch-lightning/pull/11689))
- Fixed an issue to avoid validation loop run on restart ([#11552](https://github.com/PyTorchLightning/pytorch-lightning/pull/11552))

View File

@ -16,6 +16,7 @@ from dataclasses import dataclass
from datetime import timedelta
from typing import Any, Dict, Optional, Union
import pytorch_lightning as pl
from pytorch_lightning.callbacks.progress.base import ProgressBarBase
from pytorch_lightning.utilities.imports import _RICH_AVAILABLE
@ -373,6 +374,10 @@ class RichProgressBar(ProgressBarBase):
if self.val_progress_bar_id is not None:
self._update(self.val_progress_bar_id, self.val_batch_idx, self.total_val_batches, visible=False)
def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
if trainer.state.fn == "fit":
self._update_metrics(trainer, pl_module)
def on_test_epoch_start(self, trainer, pl_module):
self.test_progress_bar_id = self._add_task(self.total_test_batches, self.test_description)
self.refresh()
@ -386,6 +391,9 @@ class RichProgressBar(ProgressBarBase):
self._update_metrics(trainer, pl_module)
self.refresh()
def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
self._update_metrics(trainer, pl_module)
def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
if trainer.sanity_checking:
self._update(self.val_sanity_progress_bar_id, self.val_batch_idx, self.total_val_batches)

View File

@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import defaultdict
from unittest import mock
from unittest.mock import DEFAULT, Mock
@ -223,3 +224,68 @@ def test_rich_progress_bar_num_sanity_val_steps(tmpdir, limit_val_batches: int):
trainer.fit(model)
assert progress_bar.progress.tasks[0].completed == min(num_sanity_val_steps, limit_val_batches)
@RunIf(rich=True)
def test_rich_progress_bar_correct_value_epoch_end(tmpdir):
"""Rich counterpart to test_tqdm_progress_bar::test_tqdm_progress_bar_correct_value_epoch_end."""
class MockedProgressBar(RichProgressBar):
calls = defaultdict(list)
def get_metrics(self, trainer, pl_module):
items = super().get_metrics(trainer, model)
del items["v_num"]
del items["loss"]
# this is equivalent to mocking `set_postfix` as this method gets called every time
self.calls[trainer.state.fn].append(
(trainer.state.stage, trainer.current_epoch, trainer.global_step, items)
)
return items
class MyModel(BoringModel):
def training_step(self, batch, batch_idx):
self.log("a", self.global_step, prog_bar=True, on_step=False, on_epoch=True, reduce_fx=max)
return super().training_step(batch, batch_idx)
def validation_step(self, batch, batch_idx):
self.log("b", self.global_step, prog_bar=True, on_step=False, on_epoch=True, reduce_fx=max)
return super().validation_step(batch, batch_idx)
def test_step(self, batch, batch_idx):
self.log("c", self.global_step, prog_bar=True, on_step=False, on_epoch=True, reduce_fx=max)
return super().test_step(batch, batch_idx)
model = MyModel()
pbar = MockedProgressBar()
trainer = Trainer(
default_root_dir=tmpdir,
limit_train_batches=2,
limit_val_batches=2,
limit_test_batches=2,
max_epochs=2,
enable_model_summary=False,
enable_checkpointing=False,
log_every_n_steps=1,
callbacks=pbar,
)
trainer.fit(model)
assert pbar.calls["fit"] == [
("sanity_check", 0, 0, {"b": 0}),
("train", 0, 0, {}),
("train", 0, 1, {}),
("validate", 0, 1, {"b": 1}), # validation end
# epoch end over, `on_epoch=True` metrics are computed
("train", 0, 2, {"a": 1, "b": 1}), # training epoch end
("train", 1, 2, {"a": 1, "b": 1}),
("train", 1, 3, {"a": 1, "b": 1}),
("validate", 1, 3, {"a": 1, "b": 3}), # validation end
("train", 1, 4, {"a": 3, "b": 3}), # training epoch end
]
trainer.validate(model, verbose=False)
assert pbar.calls["validate"] == []
trainer.test(model, verbose=False)
assert pbar.calls["test"] == []

View File

@ -559,6 +559,8 @@ def test_get_progress_bar_metrics(tmpdir: str):
def test_tqdm_progress_bar_correct_value_epoch_end(tmpdir):
"""TQDM counterpart to test_rich_progress_bar::test_rich_progress_bar_correct_value_epoch_end."""
class MockedProgressBar(TQDMProgressBar):
calls = defaultdict(list)