diff --git a/tests/tests_pytorch/helpers/pipelines.py b/tests/tests_pytorch/helpers/pipelines.py index 7cba60f9f6..8bc1f6bc07 100644 --- a/tests/tests_pytorch/helpers/pipelines.py +++ b/tests/tests_pytorch/helpers/pipelines.py @@ -59,14 +59,18 @@ def run_model_test( logger = get_default_logger(save_dir, version=version) trainer_options.update(logger=logger) trainer = Trainer(**trainer_options) - initial_values = torch.tensor([torch.sum(torch.abs(x)) for x in model.parameters()]) + with torch.no_grad(): + initial_values = torch.cat([x.view(-1) for x in model.parameters()]) trainer.fit(model, datamodule=data) - post_train_values = torch.tensor([torch.sum(torch.abs(x)) for x in model.parameters()]) + with torch.no_grad(): + post_train_values = torch.cat([x.view(-1) for x in model.parameters()]) - assert trainer.state.finished, f"Training failed with {trainer.state}" - # Check that the model is actually changed post-training - change_ratio = torch.norm(initial_values - post_train_values) - assert change_ratio >= min_change_ratio, f"the model is changed of {change_ratio} and shall be >={min_change_ratio}" + # Check that the model has changed post-training + change_ratio = torch.norm(initial_values - post_train_values) / torch.norm(initial_values) + assert change_ratio >= min_change_ratio, ( + f"The change in the model's parameter norm is {change_ratio:.1f}" + f" relative to the initial norm, but expected a change by >={min_change_ratio}" + ) # test model loading _ = load_model_from_checkpoint(trainer.checkpoint_callback.best_model_path, type(model)) diff --git a/tests/tests_pytorch/models/test_cpu.py b/tests/tests_pytorch/models/test_cpu.py index 123efd39b4..e79f014c7e 100644 --- a/tests/tests_pytorch/models/test_cpu.py +++ b/tests/tests_pytorch/models/test_cpu.py @@ -18,7 +18,7 @@ import torch import tests_pytorch.helpers.pipelines as tpipes import tests_pytorch.helpers.utils as tutils -from lightning.pytorch import Trainer +from lightning.pytorch import seed_everything, Trainer from lightning.pytorch.callbacks import Callback, EarlyStopping, ModelCheckpoint from lightning.pytorch.demos.boring_classes import BoringModel from tests_pytorch.helpers.datamodules import ClassifDataModule @@ -29,6 +29,8 @@ from tests_pytorch.helpers.simple_models import ClassificationModel @mock.patch("lightning.fabric.plugins.environments.slurm.SLURMEnvironment.detect", return_value=True) def test_cpu_slurm_save_load(_, tmpdir): """Verify model save/load/checkpoint on CPU.""" + seed_everything(42) + model = BoringModel() # logger file to get meta @@ -101,6 +103,8 @@ def test_cpu_slurm_save_load(_, tmpdir): def test_early_stopping_cpu_model(tmpdir): + seed_everything(42) + class ModelTrainVal(BoringModel): def validation_step(self, *args, **kwargs): output = super().validation_step(*args, **kwargs) @@ -129,6 +133,8 @@ def test_early_stopping_cpu_model(tmpdir): @RunIf(skip_windows=True, sklearn=True) def test_multi_cpu_model_ddp(tmpdir): """Make sure DDP works.""" + seed_everything(42) + trainer_options = { "default_root_dir": tmpdir, "enable_progress_bar": False, @@ -150,6 +156,7 @@ def test_lbfgs_cpu_model(tmpdir): Testing LBFGS optimizer """ + seed_everything(42) class ModelSpecifiedOptimizer(BoringModel): def __init__(self, optimizer_name, learning_rate): @@ -172,6 +179,8 @@ def test_lbfgs_cpu_model(tmpdir): def test_default_logger_callbacks_cpu_model(tmpdir): """Test each of the trainer options.""" + seed_everything(42) + trainer_options = { "default_root_dir": tmpdir, "max_epochs": 1, @@ -192,6 +201,7 @@ def test_default_logger_callbacks_cpu_model(tmpdir): def test_running_test_after_fitting(tmpdir): """Verify test() on fitted model.""" + seed_everything(42) class ModelTrainValTest(BoringModel): def validation_step(self, *args, **kwargs): @@ -238,6 +248,7 @@ def test_running_test_no_val(tmpdir): It performs train and test only """ + seed_everything(42) class ModelTrainTest(BoringModel): def test_step(self, *args, **kwargs): @@ -276,20 +287,9 @@ def test_running_test_no_val(tmpdir): tutils.assert_ok_model_acc(trainer, key="test_loss") -def test_simple_cpu(tmpdir): - """Verify continue training session on CPU.""" - model = BoringModel() - - # fit model - trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=20) - trainer.fit(model) - - # traning complete - assert trainer.state.finished, "amp + ddp model failed to complete" - - def test_cpu_model(tmpdir): """Make sure model trains on CPU.""" + seed_everything(42) trainer_options = { "default_root_dir": tmpdir, "enable_progress_bar": False, @@ -304,6 +304,7 @@ def test_cpu_model(tmpdir): def test_all_features_cpu_model(tmpdir): """Test each of the trainer options.""" + seed_everything(42) trainer_options = { "default_root_dir": tmpdir, "gradient_clip_val": 1.0, @@ -316,5 +317,4 @@ def test_all_features_cpu_model(tmpdir): } model = BoringModel() - - tpipes.run_model_test(trainer_options, model, min_acc=0.01) + tpipes.run_model_test(trainer_options, model) diff --git a/tests/tests_pytorch/models/test_gpu.py b/tests/tests_pytorch/models/test_gpu.py index 542197fe91..e12bc0eb58 100644 --- a/tests/tests_pytorch/models/test_gpu.py +++ b/tests/tests_pytorch/models/test_gpu.py @@ -22,7 +22,7 @@ import torch import tests_pytorch.helpers.pipelines as tpipes from lightning.fabric.plugins.environments import TorchElasticEnvironment from lightning.fabric.utilities.device_parser import _parse_gpu_ids -from lightning.pytorch import Trainer +from lightning.pytorch import seed_everything, Trainer from lightning.pytorch.accelerators import CPUAccelerator, CUDAAccelerator from lightning.pytorch.demos.boring_classes import BoringModel from lightning.pytorch.utilities.exceptions import MisconfigurationException @@ -36,6 +36,7 @@ PRETEND_N_OF_GPUS = 16 @RunIf(min_cuda_gpus=2, sklearn=True) def test_multi_gpu_none_backend(tmpdir): """Make sure when using multiple GPUs the user can't use `accelerator = None`.""" + seed_everything(42) trainer_options = { "default_root_dir": tmpdir, "enable_progress_bar": False, @@ -55,6 +56,7 @@ def test_multi_gpu_none_backend(tmpdir): @RunIf(min_cuda_gpus=2) @pytest.mark.parametrize("devices", [1, [0], [1]]) def test_single_gpu_model(tmpdir, devices): + seed_everything(42) trainer_options = { "default_root_dir": tmpdir, "enable_progress_bar": False, diff --git a/tests/tests_pytorch/strategies/test_ddp_spawn.py b/tests/tests_pytorch/strategies/test_ddp_spawn.py index f368de4069..4c03af87fc 100644 --- a/tests/tests_pytorch/strategies/test_ddp_spawn.py +++ b/tests/tests_pytorch/strategies/test_ddp_spawn.py @@ -17,7 +17,7 @@ from torch.multiprocessing import ProcessRaisedException import tests_pytorch.helpers.pipelines as tpipes from lightning.pytorch.callbacks import EarlyStopping from lightning.pytorch.demos.boring_classes import BoringModel -from lightning.pytorch.trainer import Trainer +from lightning.pytorch.trainer import seed_everything, Trainer from tests_pytorch.helpers.datamodules import ClassifDataModule from tests_pytorch.helpers.runif import RunIf from tests_pytorch.helpers.simple_models import ClassificationModel @@ -26,6 +26,8 @@ from tests_pytorch.strategies.test_ddp_strategy import UnusedParametersModel @RunIf(min_cuda_gpus=2, sklearn=True) def test_multi_gpu_early_stop_ddp_spawn(tmpdir): + seed_everything(42) + trainer_options = { "default_root_dir": tmpdir, "callbacks": [EarlyStopping(monitor="train_acc")], @@ -44,6 +46,8 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir): @RunIf(min_cuda_gpus=2) def test_multi_gpu_model_ddp_spawn(tmpdir): + seed_everything(42) + trainer_options = { "default_root_dir": tmpdir, "max_epochs": 1,