Make model test more robust (#18043)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
69d7cfe5d8
commit
a97c559d92
|
@ -59,14 +59,18 @@ def run_model_test(
|
|||
logger = get_default_logger(save_dir, version=version)
|
||||
trainer_options.update(logger=logger)
|
||||
trainer = Trainer(**trainer_options)
|
||||
initial_values = torch.tensor([torch.sum(torch.abs(x)) for x in model.parameters()])
|
||||
with torch.no_grad():
|
||||
initial_values = torch.cat([x.view(-1) for x in model.parameters()])
|
||||
trainer.fit(model, datamodule=data)
|
||||
post_train_values = torch.tensor([torch.sum(torch.abs(x)) for x in model.parameters()])
|
||||
with torch.no_grad():
|
||||
post_train_values = torch.cat([x.view(-1) for x in model.parameters()])
|
||||
|
||||
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
||||
# Check that the model is actually changed post-training
|
||||
change_ratio = torch.norm(initial_values - post_train_values)
|
||||
assert change_ratio >= min_change_ratio, f"the model is changed of {change_ratio} and shall be >={min_change_ratio}"
|
||||
# Check that the model has changed post-training
|
||||
change_ratio = torch.norm(initial_values - post_train_values) / torch.norm(initial_values)
|
||||
assert change_ratio >= min_change_ratio, (
|
||||
f"The change in the model's parameter norm is {change_ratio:.1f}"
|
||||
f" relative to the initial norm, but expected a change by >={min_change_ratio}"
|
||||
)
|
||||
|
||||
# test model loading
|
||||
_ = load_model_from_checkpoint(trainer.checkpoint_callback.best_model_path, type(model))
|
||||
|
|
|
@ -18,7 +18,7 @@ import torch
|
|||
|
||||
import tests_pytorch.helpers.pipelines as tpipes
|
||||
import tests_pytorch.helpers.utils as tutils
|
||||
from lightning.pytorch import Trainer
|
||||
from lightning.pytorch import seed_everything, Trainer
|
||||
from lightning.pytorch.callbacks import Callback, EarlyStopping, ModelCheckpoint
|
||||
from lightning.pytorch.demos.boring_classes import BoringModel
|
||||
from tests_pytorch.helpers.datamodules import ClassifDataModule
|
||||
|
@ -29,6 +29,8 @@ from tests_pytorch.helpers.simple_models import ClassificationModel
|
|||
@mock.patch("lightning.fabric.plugins.environments.slurm.SLURMEnvironment.detect", return_value=True)
|
||||
def test_cpu_slurm_save_load(_, tmpdir):
|
||||
"""Verify model save/load/checkpoint on CPU."""
|
||||
seed_everything(42)
|
||||
|
||||
model = BoringModel()
|
||||
|
||||
# logger file to get meta
|
||||
|
@ -101,6 +103,8 @@ def test_cpu_slurm_save_load(_, tmpdir):
|
|||
|
||||
|
||||
def test_early_stopping_cpu_model(tmpdir):
|
||||
seed_everything(42)
|
||||
|
||||
class ModelTrainVal(BoringModel):
|
||||
def validation_step(self, *args, **kwargs):
|
||||
output = super().validation_step(*args, **kwargs)
|
||||
|
@ -129,6 +133,8 @@ def test_early_stopping_cpu_model(tmpdir):
|
|||
@RunIf(skip_windows=True, sklearn=True)
|
||||
def test_multi_cpu_model_ddp(tmpdir):
|
||||
"""Make sure DDP works."""
|
||||
seed_everything(42)
|
||||
|
||||
trainer_options = {
|
||||
"default_root_dir": tmpdir,
|
||||
"enable_progress_bar": False,
|
||||
|
@ -150,6 +156,7 @@ def test_lbfgs_cpu_model(tmpdir):
|
|||
|
||||
Testing LBFGS optimizer
|
||||
"""
|
||||
seed_everything(42)
|
||||
|
||||
class ModelSpecifiedOptimizer(BoringModel):
|
||||
def __init__(self, optimizer_name, learning_rate):
|
||||
|
@ -172,6 +179,8 @@ def test_lbfgs_cpu_model(tmpdir):
|
|||
|
||||
def test_default_logger_callbacks_cpu_model(tmpdir):
|
||||
"""Test each of the trainer options."""
|
||||
seed_everything(42)
|
||||
|
||||
trainer_options = {
|
||||
"default_root_dir": tmpdir,
|
||||
"max_epochs": 1,
|
||||
|
@ -192,6 +201,7 @@ def test_default_logger_callbacks_cpu_model(tmpdir):
|
|||
|
||||
def test_running_test_after_fitting(tmpdir):
|
||||
"""Verify test() on fitted model."""
|
||||
seed_everything(42)
|
||||
|
||||
class ModelTrainValTest(BoringModel):
|
||||
def validation_step(self, *args, **kwargs):
|
||||
|
@ -238,6 +248,7 @@ def test_running_test_no_val(tmpdir):
|
|||
|
||||
It performs train and test only
|
||||
"""
|
||||
seed_everything(42)
|
||||
|
||||
class ModelTrainTest(BoringModel):
|
||||
def test_step(self, *args, **kwargs):
|
||||
|
@ -276,20 +287,9 @@ def test_running_test_no_val(tmpdir):
|
|||
tutils.assert_ok_model_acc(trainer, key="test_loss")
|
||||
|
||||
|
||||
def test_simple_cpu(tmpdir):
|
||||
"""Verify continue training session on CPU."""
|
||||
model = BoringModel()
|
||||
|
||||
# fit model
|
||||
trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=20)
|
||||
trainer.fit(model)
|
||||
|
||||
# traning complete
|
||||
assert trainer.state.finished, "amp + ddp model failed to complete"
|
||||
|
||||
|
||||
def test_cpu_model(tmpdir):
|
||||
"""Make sure model trains on CPU."""
|
||||
seed_everything(42)
|
||||
trainer_options = {
|
||||
"default_root_dir": tmpdir,
|
||||
"enable_progress_bar": False,
|
||||
|
@ -304,6 +304,7 @@ def test_cpu_model(tmpdir):
|
|||
|
||||
def test_all_features_cpu_model(tmpdir):
|
||||
"""Test each of the trainer options."""
|
||||
seed_everything(42)
|
||||
trainer_options = {
|
||||
"default_root_dir": tmpdir,
|
||||
"gradient_clip_val": 1.0,
|
||||
|
@ -316,5 +317,4 @@ def test_all_features_cpu_model(tmpdir):
|
|||
}
|
||||
|
||||
model = BoringModel()
|
||||
|
||||
tpipes.run_model_test(trainer_options, model, min_acc=0.01)
|
||||
tpipes.run_model_test(trainer_options, model)
|
||||
|
|
|
@ -22,7 +22,7 @@ import torch
|
|||
import tests_pytorch.helpers.pipelines as tpipes
|
||||
from lightning.fabric.plugins.environments import TorchElasticEnvironment
|
||||
from lightning.fabric.utilities.device_parser import _parse_gpu_ids
|
||||
from lightning.pytorch import Trainer
|
||||
from lightning.pytorch import seed_everything, Trainer
|
||||
from lightning.pytorch.accelerators import CPUAccelerator, CUDAAccelerator
|
||||
from lightning.pytorch.demos.boring_classes import BoringModel
|
||||
from lightning.pytorch.utilities.exceptions import MisconfigurationException
|
||||
|
@ -36,6 +36,7 @@ PRETEND_N_OF_GPUS = 16
|
|||
@RunIf(min_cuda_gpus=2, sklearn=True)
|
||||
def test_multi_gpu_none_backend(tmpdir):
|
||||
"""Make sure when using multiple GPUs the user can't use `accelerator = None`."""
|
||||
seed_everything(42)
|
||||
trainer_options = {
|
||||
"default_root_dir": tmpdir,
|
||||
"enable_progress_bar": False,
|
||||
|
@ -55,6 +56,7 @@ def test_multi_gpu_none_backend(tmpdir):
|
|||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize("devices", [1, [0], [1]])
|
||||
def test_single_gpu_model(tmpdir, devices):
|
||||
seed_everything(42)
|
||||
trainer_options = {
|
||||
"default_root_dir": tmpdir,
|
||||
"enable_progress_bar": False,
|
||||
|
|
|
@ -17,7 +17,7 @@ from torch.multiprocessing import ProcessRaisedException
|
|||
import tests_pytorch.helpers.pipelines as tpipes
|
||||
from lightning.pytorch.callbacks import EarlyStopping
|
||||
from lightning.pytorch.demos.boring_classes import BoringModel
|
||||
from lightning.pytorch.trainer import Trainer
|
||||
from lightning.pytorch.trainer import seed_everything, Trainer
|
||||
from tests_pytorch.helpers.datamodules import ClassifDataModule
|
||||
from tests_pytorch.helpers.runif import RunIf
|
||||
from tests_pytorch.helpers.simple_models import ClassificationModel
|
||||
|
@ -26,6 +26,8 @@ from tests_pytorch.strategies.test_ddp_strategy import UnusedParametersModel
|
|||
|
||||
@RunIf(min_cuda_gpus=2, sklearn=True)
|
||||
def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
|
||||
seed_everything(42)
|
||||
|
||||
trainer_options = {
|
||||
"default_root_dir": tmpdir,
|
||||
"callbacks": [EarlyStopping(monitor="train_acc")],
|
||||
|
@ -44,6 +46,8 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
|
|||
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_multi_gpu_model_ddp_spawn(tmpdir):
|
||||
seed_everything(42)
|
||||
|
||||
trainer_options = {
|
||||
"default_root_dir": tmpdir,
|
||||
"max_epochs": 1,
|
||||
|
|
Loading…
Reference in New Issue