# Copyright The PyTorch Lightning team. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Test logging in the training loop.""" import collections import itertools from re import escape import numpy as np import pytest import torch from torchmetrics import Accuracy from pytorch_lightning import callbacks, Trainer from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, ProgressBar from pytorch_lightning.core.lightning import LightningModule from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers.boring_model import BoringModel, RandomDictDataset from tests.helpers.runif import RunIf def test__training_step__log(tmpdir): """Tests that only training_step can be used.""" class TestModel(BoringModel): def training_step(self, batch, batch_idx): out = super().training_step(batch, batch_idx) loss = out["loss"] # ----------- # default # ----------- self.log("default", loss) # ----------- # logger # ----------- # on_step T on_epoch F self.log("l_s", loss, on_step=True, on_epoch=False, prog_bar=False, logger=True) # on_step F on_epoch T self.log("l_e", loss, on_step=False, on_epoch=True, prog_bar=False, logger=True) # on_step T on_epoch T self.log("l_se", loss, on_step=True, on_epoch=True, prog_bar=False, logger=True) # ----------- # pbar # ----------- # on_step T on_epoch F self.log("p_s", loss, on_step=True, on_epoch=False, prog_bar=True, logger=False) # on_step F on_epoch T self.log("p_e", loss, on_step=False, on_epoch=True, prog_bar=True, logger=False) # on_step T on_epoch T self.log("p_se", loss, on_step=True, on_epoch=True, prog_bar=True, logger=False) return loss model = TestModel() model.val_dataloader = None trainer = Trainer( default_root_dir=tmpdir, limit_train_batches=2, limit_val_batches=2, max_epochs=2, log_every_n_steps=1, weights_summary=None, callbacks=[ModelCheckpoint(monitor="l_se")], ) trainer.fit(model) logged_metrics = set(trainer.logged_metrics) assert logged_metrics == {"epoch", "default", "l_e", "l_s", "l_se_step", "l_se_epoch"} pbar_metrics = set(trainer.progress_bar_metrics) assert pbar_metrics == {"p_e", "p_s", "p_se_step", "p_se_epoch"} assert set(trainer.callback_metrics) == (logged_metrics | pbar_metrics | {"p_se", "l_se"}) - {"epoch"} def test__training_step__epoch_end__log(tmpdir): """Tests that training_epoch_end can log.""" class TestModel(BoringModel): def training_step(self, batch, batch_idx): out = super().training_step(batch, batch_idx) loss = out["loss"] self.log("a", loss, on_step=True, on_epoch=True) self.log_dict({"a1": loss, "a2": loss}) return out def training_epoch_end(self, outputs): self.log("b1", outputs[0]["loss"]) self.log("b", outputs[0]["loss"], on_epoch=True, prog_bar=True, logger=True) model = TestModel() model.val_dataloader = None trainer = Trainer( default_root_dir=tmpdir, limit_train_batches=2, limit_val_batches=2, max_epochs=2, log_every_n_steps=1, weights_summary=None, ) trainer.fit(model) logged_metrics = set(trainer.logged_metrics) assert logged_metrics == {"epoch", "a_step", "a_epoch", "b", "b1", "a1", "a2"} pbar_metrics = set(trainer.progress_bar_metrics) assert pbar_metrics == {"b"} assert set(trainer.callback_metrics) == (logged_metrics | pbar_metrics | {"a"}) - {"epoch"} @pytest.mark.parametrize(["batches", "log_interval", "max_epochs"], [(1, 1, 1), (64, 32, 2)]) def test__training_step__step_end__epoch_end__log(tmpdir, batches, log_interval, max_epochs): """Tests that training_step_end and training_epoch_end can log.""" class TestModel(BoringModel): def training_step(self, batch, batch_idx): loss = self.step(batch[0]) self.log("a", loss, on_step=True, on_epoch=True) return loss def training_step_end(self, out): self.log("b", out, on_step=True, on_epoch=True, prog_bar=True, logger=True) return out def training_epoch_end(self, outputs): self.log("c", outputs[0]["loss"], on_epoch=True, prog_bar=True, logger=True) self.log("d/e/f", 2) model = TestModel() model.val_dataloader = None trainer = Trainer( default_root_dir=tmpdir, limit_train_batches=batches, limit_val_batches=batches, max_epochs=max_epochs, log_every_n_steps=log_interval, weights_summary=None, ) trainer.fit(model) # make sure all the metrics are available for callbacks logged_metrics = set(trainer.logged_metrics) assert logged_metrics == {"a_step", "a_epoch", "b_step", "b_epoch", "c", "d/e/f", "epoch"} pbar_metrics = set(trainer.progress_bar_metrics) assert pbar_metrics == {"c", "b_epoch", "b_step"} assert set(trainer.callback_metrics) == (logged_metrics | pbar_metrics | {"a", "b"}) - {"epoch"} @pytest.mark.parametrize( ["batches", "fx", "result"], [(3, min, 0), (3, torch.max, 2), (11, max, 10), (5, "avg", 2), (5, "SUM", 10)] ) def test__training_step__log_max_reduce_fx(tmpdir, batches, fx, result): """Tests that log works correctly with different tensor types.""" class TestModel(BoringModel): def training_step(self, batch, batch_idx): acc = self.step(batch[0]) self.log("foo", torch.tensor(batch_idx, dtype=torch.long), on_step=False, on_epoch=True, reduce_fx=fx) return acc def validation_step(self, batch, batch_idx): output = self.layer(batch) loss = self.loss(batch, output) self.log("bar", torch.tensor(batch_idx).float(), on_step=False, on_epoch=True, reduce_fx=fx) return {"x": loss} model = TestModel() trainer = Trainer( default_root_dir=tmpdir, limit_train_batches=batches, limit_val_batches=batches, max_epochs=2, weights_summary=None, ) trainer.fit(model) # make sure types are correct assert trainer.logged_metrics["foo"] == result assert trainer.logged_metrics["bar"] == result def test_different_batch_types_for_sizing(tmpdir): class TestModel(BoringModel): def training_step(self, batch, batch_idx): assert isinstance(batch, dict) a = batch["a"] acc = self.step(a) self.log("a", {"d1": 2, "d2": torch.tensor(1)}, on_step=True, on_epoch=True) return acc def validation_step(self, batch, batch_idx): assert isinstance(batch, dict) a = batch["a"] output = self.layer(a) loss = self.loss(batch, output) self.log("n", {"d3": 2, "d4": torch.tensor(1)}, on_step=True, on_epoch=True) return {"x": loss} def train_dataloader(self): return torch.utils.data.DataLoader(RandomDictDataset(32, 64), batch_size=32) def val_dataloader(self): return torch.utils.data.DataLoader(RandomDictDataset(32, 64), batch_size=32) model = TestModel() trainer = Trainer( default_root_dir=tmpdir, limit_train_batches=1, limit_val_batches=2, max_epochs=1, weights_summary=None, fast_dev_run=True, ) trainer.fit(model) assert set(trainer.logged_metrics) == {"a_step", "a_epoch", "n_step", "n_epoch", "epoch"} def test_log_works_in_train_callback(tmpdir): """Tests that log can be called within callback.""" class TestCallback(callbacks.Callback): count = 0 choices = [False, True] # used to compute expected values logged_values = collections.defaultdict(list) call_counter = collections.Counter() logged_arguments = {} def make_logging(self, pl_module, func_name, on_steps, on_epochs, prob_bars): self.call_counter.update([func_name]) for idx, (on_step, on_epoch, prog_bar) in enumerate(itertools.product(on_steps, on_epochs, prob_bars)): fx = f"{func_name}_{idx}" pl_module.log(fx, self.count, on_step=on_step, on_epoch=on_epoch, prog_bar=prog_bar) self.logged_values[fx].append(self.count) self.logged_arguments[fx] = {"on_step": on_step, "on_epoch": on_epoch, "prog_bar": prog_bar} self.count += 1 def on_train_start(self, _, pl_module): self.make_logging(pl_module, "on_train_start", on_steps=[False], on_epochs=[True], prob_bars=self.choices) def on_epoch_start(self, _, pl_module): self.make_logging( pl_module, "on_epoch_start", on_steps=self.choices, on_epochs=[True], prob_bars=self.choices ) def on_train_epoch_start(self, _, pl_module): self.make_logging( pl_module, "on_train_epoch_start", on_steps=self.choices, on_epochs=[True], prob_bars=self.choices ) def on_batch_end(self, _, pl_module): self.make_logging( pl_module, "on_batch_end", on_steps=self.choices, on_epochs=self.choices, prob_bars=self.choices ) def on_train_batch_end(self, _, pl_module, *__): self.make_logging( pl_module, "on_train_batch_end", on_steps=self.choices, on_epochs=self.choices, prob_bars=self.choices ) def on_train_epoch_end(self, _, pl_module): self.make_logging( pl_module, "on_train_epoch_end", on_steps=[False], on_epochs=[True], prob_bars=self.choices ) def on_epoch_end(self, _, pl_module): self.make_logging(pl_module, "on_epoch_end", on_steps=[False], on_epochs=[True], prob_bars=self.choices) class TestModel(BoringModel): seen_losses = [] def training_step(self, batch, batch_idx): loss = super().training_step(batch, batch_idx)["loss"] self.seen_losses.append(loss) self.log("train_loss", loss, prog_bar=True) return {"loss": loss} model = TestModel() cb = TestCallback() trainer = Trainer( default_root_dir=tmpdir, limit_train_batches=2, limit_val_batches=0, num_sanity_val_steps=0, max_epochs=1, callbacks=[cb], ) trainer.fit(model) # Make sure the func_name output equals the average from all logged values when on_epoch true assert trainer.progress_bar_callback.get_metrics(trainer, model)["train_loss"] == model.seen_losses[-1] assert trainer.callback_metrics["train_loss"] == model.seen_losses[-1] assert cb.call_counter == { "on_train_start": 1, "on_epoch_start": 1, "on_train_epoch_start": 1, "on_train_batch_end": 2, "on_batch_end": 2, "on_train_epoch_end": 1, "on_epoch_end": 1, } def get_expected(on_epoch, values): reduction = np.mean if on_epoch else np.max return reduction(values) for fx, value in trainer.callback_metrics.items(): actual = value.item() if fx not in cb.logged_arguments: continue on_epoch = cb.logged_arguments[fx]["on_epoch"] values = cb.logged_values[fx] expected = get_expected(on_epoch, values) assert actual == expected for fx, attrs in cb.logged_arguments.items(): should_include = attrs["prog_bar"] and attrs["on_step"] ^ attrs["on_epoch"] is_included = fx in trainer.progress_bar_metrics assert is_included if should_include else not is_included class LoggingSyncDistModel(BoringModel): def __init__(self, fake_result): super().__init__() self.fake_result = fake_result @property def rank(self) -> int: return self.trainer.global_rank def training_step(self, batch, batch_idx): value = self.fake_result + self.rank self.log("foo", value, on_step=True, on_epoch=False, sync_dist=True, reduce_fx="sum") self.log("foo_2", 2, on_step=True, on_epoch=False, sync_dist=True, reduce_fx="sum") self.log("foo_3", 2, on_step=True, on_epoch=False, sync_dist=True, reduce_fx="mean") self.log("foo_4", value, on_step=True, on_epoch=False, sync_dist=True, reduce_fx="mean") self.log("foo_5", batch_idx + self.rank, on_step=True, on_epoch=False, sync_dist=True, reduce_fx="max") self.log("foo_6", value, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="sum") self.log("foo_7", 2, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="sum") self.log("foo_8", 2, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="mean") self.log("foo_9", value, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="mean") self.log("foo_10", batch_idx, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="max") return super().training_step(batch, batch_idx) def validation_step(self, batch, batch_idx): self.log("bar", self.fake_result, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="sum") self.log("bar_2", self.fake_result, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="mean") self.log("bar_3", batch_idx + self.rank, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="max") return super().validation_step(batch, batch_idx) @pytest.mark.parametrize( "gpus", [None, pytest.param(1, marks=RunIf(min_gpus=1)), pytest.param(2, marks=RunIf(min_gpus=2))] ) def test_logging_sync_dist_true(tmpdir, gpus): """Tests to ensure that the sync_dist flag works (should just return the original value)""" fake_result = 1 model = LoggingSyncDistModel(fake_result) trainer = Trainer( max_epochs=1, default_root_dir=tmpdir, limit_train_batches=3, limit_val_batches=3, weights_summary=None, gpus=gpus, ) trainer.fit(model) num_devices = 1 if gpus is None else gpus use_multiple_devices = num_devices > 1 total = fake_result * num_devices + 1 metrics = trainer.callback_metrics assert metrics["foo"] == total if use_multiple_devices else fake_result assert metrics["foo_2"] == 2 * num_devices assert metrics["foo_3"] == 2 assert metrics["foo_4"] == total / num_devices if use_multiple_devices else 1 assert metrics["foo_5"] == fake_result * 2 + 1 if use_multiple_devices else fake_result * 2 assert metrics["foo_6"] == fake_result * 3 * 2 + 3 if use_multiple_devices else fake_result * 3 * 2 assert metrics["foo_7"] == 2 * num_devices * 3 assert metrics["foo_8"] == 2 assert metrics["foo_9"] == (fake_result * 2 + 1) / num_devices if use_multiple_devices else fake_result assert metrics["foo_10"] == 2 assert metrics["bar"] == fake_result * 3 * num_devices assert metrics["bar_2"] == fake_result assert metrics["bar_3"] == 2 + int(use_multiple_devices) @RunIf(min_gpus=2, special=True) def test_logging_sync_dist_true_ddp(tmpdir): """Tests to ensure that the sync_dist flag works with ddp.""" class TestLoggingSyncDistModel(BoringModel): def training_step(self, batch, batch_idx): acc = self.step(batch[0]) self.log("foo", 1, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="SUM") self.log("cho", acc, on_step=False, on_epoch=True) return acc def validation_step(self, batch, batch_idx): output = self.layer(batch) loss = self.loss(batch, output) self.log("bar", 2, on_step=False, on_epoch=True, sync_dist=True, reduce_fx="AVG") return {"x": loss} model = TestLoggingSyncDistModel() trainer = Trainer( default_root_dir=tmpdir, limit_train_batches=1, limit_val_batches=1, max_epochs=2, weights_summary=None, accelerator="ddp", gpus=2, profiler="pytorch", ) trainer.fit(model) assert trainer.logged_metrics["foo"] == 2 assert trainer.logged_metrics["bar"] == 2 def test_progress_bar_metrics_contains_values_on_train_epoch_end(tmpdir: str): class TestModel(BoringModel): def training_step(self, *args): self.log("foo", torch.tensor(self.current_epoch), on_step=False, on_epoch=True, prog_bar=True) return super().training_step(*args) def on_train_epoch_end(self, *_): self.log( "foo_2", torch.tensor(self.current_epoch), prog_bar=True, on_epoch=True, sync_dist=True, reduce_fx="sum" ) self.on_train_epoch_end_called = True class TestProgressBar(ProgressBar): def get_metrics(self, trainer: Trainer, model: LightningModule): items = super().get_metrics(trainer, model) items.pop("v_num", None) return items def on_epoch_end(self, trainer: Trainer, model: LightningModule): metrics = self.get_metrics(trainer, model) assert metrics["foo"] == self.trainer.current_epoch assert metrics["foo_2"] == self.trainer.current_epoch model.on_epoch_end_called = True progress_bar = TestProgressBar() trainer = Trainer( default_root_dir=tmpdir, callbacks=[progress_bar], max_epochs=2, limit_train_batches=1, limit_val_batches=0, checkpoint_callback=False, logger=False, weights_summary=None, ) model = TestModel() trainer.fit(model) assert model.on_train_epoch_end_called assert model.on_epoch_end_called def test_logging_in_callbacks_with_log_function(tmpdir): """Tests ensure self.log can be used directly in callbacks.""" class LoggingCallback(callbacks.Callback): def on_train_start(self, trainer, pl_module): self.log("on_train_start", 1) def on_train_epoch_start(self, trainer, pl_module): self.log("on_train_epoch_start", 2) def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx): self.log("on_train_batch_end", 3) def on_batch_end(self, trainer, pl_module): self.log("on_batch_end", 4) def on_epoch_end(self, trainer, pl_module): self.log("on_epoch_end", 5) def on_train_epoch_end(self, trainer, pl_module): self.log("on_train_epoch_end", 6) model = BoringModel() trainer = Trainer( default_root_dir=tmpdir, limit_train_batches=1, limit_val_batches=1, max_epochs=1, weights_summary=None, callbacks=[LoggingCallback()], ) trainer.fit(model) expected = { "on_train_start": 1, "on_train_epoch_start": 2, "on_train_batch_end": 3, "on_batch_end": 4, "on_epoch_end": 5, "on_train_epoch_end": 6, } assert trainer.callback_metrics == expected @RunIf(min_gpus=1) def test_metric_are_properly_reduced(tmpdir): class TestingModel(BoringModel): def __init__(self, *args, **kwargs) -> None: super().__init__() self.val_acc = Accuracy() def training_step(self, batch, batch_idx): output = super().training_step(batch, batch_idx) self.log("train_loss", output["loss"]) return output def validation_step(self, batch, batch_idx): preds = torch.tensor([[0.9, 0.1]], device=self.device) targets = torch.tensor([1], device=self.device) if batch_idx < 8: preds = torch.tensor([[0.1, 0.9]], device=self.device) self.val_acc(preds, targets) self.log("val_acc", self.val_acc, on_step=True, on_epoch=True) return super().validation_step(batch, batch_idx) early_stop = EarlyStopping(monitor="val_acc", mode="max") checkpoint = ModelCheckpoint(monitor="val_acc", save_last=True, save_top_k=2, mode="max") model = TestingModel() trainer = Trainer( default_root_dir=tmpdir, gpus=1, max_epochs=2, limit_train_batches=5, limit_val_batches=32, callbacks=[early_stop, checkpoint], ) trainer.fit(model) assert trainer.callback_metrics["val_acc"] == 8 / 32.0 assert "train_loss" in trainer.callback_metrics @pytest.mark.parametrize( "value", [None, dict(a=None), dict(a=dict(b=None)), dict(a=dict(b=1)), "foo", [1, 2, 3], (1, 2, 3), [[1, 2], 3]] ) def test_log_none_raises(tmpdir, value): class TestModel(BoringModel): def training_step(self, *args): self.log("foo", value) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=1) model = TestModel() match = escape(f"self.log(foo, {value})` was called") with pytest.raises(ValueError, match=match): trainer.fit(model) def test_logging_raises(tmpdir): class TestModel(BoringModel): def training_step(self, batch, batch_idx): self.log("foo/dataloader_idx_0", -1) trainer = Trainer(default_root_dir=tmpdir) model = TestModel() with pytest.raises(MisconfigurationException, match="`self.log` with the key `foo/dataloader_idx_0`"): trainer.fit(model) class TestModel(BoringModel): def training_step(self, batch, batch_idx): self.log("foo", Accuracy()) trainer = Trainer(default_root_dir=tmpdir) model = TestModel() with pytest.raises(MisconfigurationException, match="fix this by setting an attribute for the metric in your"): trainer.fit(model) class TestModel(BoringModel): def __init__(self): super().__init__() self.bar = Accuracy() def training_step(self, batch, batch_idx): self.log("foo", Accuracy()) trainer = Trainer(default_root_dir=tmpdir) model = TestModel() with pytest.raises( MisconfigurationException, match=r"`self.log\(foo, ..., metric_attribute=name\)` where `name` is one of \['bar'\]", ): trainer.fit(model) class TestModel(BoringModel): def training_step(self, *args): self.log("foo", -1, prog_bar=False) self.log("foo", -1, prog_bar=True) return super().training_step(*args) trainer = Trainer(default_root_dir=tmpdir) model = TestModel() with pytest.raises(MisconfigurationException, match=r"self.log\(foo, ...\)` twice in `training_step`"): trainer.fit(model) class TestModel(BoringModel): def training_step(self, *args): self.log("foo", -1, reduce_fx=torch.argmax) return super().training_step(*args) trainer = Trainer(default_root_dir=tmpdir) model = TestModel() with pytest.raises(MisconfigurationException, match=r"reduce_fx={min,max,mean,sum}\)` are currently supported"): trainer.fit(model) def test_sanity_metrics_are_reset(tmpdir): class TestModel(BoringModel): def validation_step(self, batch, batch_idx): output = super().validation_step(batch, batch_idx) if self.trainer.sanity_checking: self.log("val_loss", output["x"], prog_bar=True, logger=True) return output def training_step(self, batch, batch_idx): loss = super().training_step(batch, batch_idx) if batch_idx == 0: assert self.trainer.progress_bar_metrics == {} assert self.trainer.logged_metrics == {} assert self.trainer.callback_metrics == {} self.log("train_loss", loss, prog_bar=True, logger=True) return loss trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=1, limit_val_batches=2, num_sanity_val_steps=2 ) trainer.fit(TestModel()) assert "val_loss" not in trainer.progress_bar_metrics @RunIf(min_gpus=2) @pytest.mark.parametrize("log_gpu_memory", ["all", "min_max"]) def test_log_gpu_memory_without_logging_on_step(tmpdir, log_gpu_memory): model = BoringModel() trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=1, limit_val_batches=0, log_gpu_memory=log_gpu_memory, log_every_n_steps=1, gpus=[1], ) trainer.fit(model) if log_gpu_memory == "min_max": assert "min_gpu_mem" in trainer.logged_metrics assert "max_gpu_mem" in trainer.logged_metrics else: assert "gpu_id: 1/memory.used (MB)" in trainer.logged_metrics @RunIf(min_gpus=1) def test_move_metrics_to_cpu(tmpdir): class TestModel(BoringModel): def on_before_backward(self, loss: torch.Tensor) -> None: assert loss.device.type == "cuda" trainer = Trainer( default_root_dir=tmpdir, fast_dev_run=True, amp_backend="native", precision=16, move_metrics_to_cpu=True, gpus=1, ) trainer.fit(TestModel())