From 8e9a026bc34d8409faa572a7144c2d96a7c039ed Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Thu, 11 Feb 2021 20:02:07 +0530 Subject: [PATCH] [tests/models] refactor with BoringModel (#5507) * update with BoringModel * update with BoringModel * step * try TPU * TPU * update tests * update tpu tests * self * fix * dp * update tests * ref * update tests * fix tpu tests * fix dp and run_prediction * dp * only dp * Apply suggestions from code review * Apply suggestions from code review * Apply suggestions from code review * Apply suggestions from code review Co-authored-by: Jirka Borovec --- tests/core/test_datamodules.py | 70 +++-- tests/helpers/pipelines.py | 12 +- tests/helpers/simple_models.py | 14 +- .../data/horovod/train_default_model.py | 6 +- tests/models/test_amp.py | 33 ++- tests/models/test_cpu.py | 39 ++- tests/models/test_grad_norm.py | 12 +- tests/models/test_hooks.py | 34 +-- tests/models/test_horovod.py | 21 +- tests/models/test_hparams.py | 111 ++++---- tests/models/test_onnx.py | 14 +- tests/models/test_restore.py | 252 +++++++++++------- tests/models/test_tpu.py | 82 +++--- 13 files changed, 397 insertions(+), 303 deletions(-) diff --git a/tests/core/test_datamodules.py b/tests/core/test_datamodules.py index 1bbbe7c40f..76fdca0fed 100644 --- a/tests/core/test_datamodules.py +++ b/tests/core/test_datamodules.py @@ -18,13 +18,16 @@ from unittest.mock import MagicMock import pytest import torch +import torch.nn.functional as F from pytorch_lightning import LightningDataModule, Trainer from pytorch_lightning.accelerators.legacy.gpu_accelerator import GPUAccelerator from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.trainer.states import TrainerState from tests.helpers import BoringDataModule, BoringModel -from tests.helpers.utils import reset_seed +from tests.helpers.datamodules import ClassifDataModule +from tests.helpers.simple_models import ClassificationModel +from tests.helpers.utils import reset_seed, set_random_master_port def test_can_prepare_data(tmpdir): @@ -190,8 +193,8 @@ def test_dm_pickle_after_init(tmpdir): def test_train_loop_only(tmpdir): reset_seed() - dm = BoringDataModule() - model = BoringModel() + dm = ClassifDataModule() + model = ClassificationModel() model.validation_step = None model.validation_step_end = None @@ -207,18 +210,17 @@ def test_train_loop_only(tmpdir): ) # fit model - result = trainer.fit(model, dm) + result = trainer.fit(model, datamodule=dm) assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" assert result - # TODO: add end-to-end test - # assert trainer.callback_metrics['loss'] < 0.6 + assert trainer.callback_metrics['train_loss'] < 1.0 def test_train_val_loop_only(tmpdir): reset_seed() - dm = BoringDataModule() - model = BoringModel() + dm = ClassifDataModule() + model = ClassificationModel() model.validation_step = None model.validation_step_end = None @@ -231,11 +233,10 @@ def test_train_val_loop_only(tmpdir): ) # fit model - result = trainer.fit(model, dm) + result = trainer.fit(model, datamodule=dm) assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" assert result - # TODO: add end-to-end test - # assert trainer.callback_metrics['train_loss'] < 0.6 + assert trainer.callback_metrics['train_loss'] < 1.0 def test_dm_checkpoint_save(tmpdir): @@ -294,8 +295,8 @@ def test_test_loop_only(tmpdir): def test_full_loop(tmpdir): reset_seed() - dm = BoringDataModule() - model = BoringModel() + dm = ClassifDataModule() + model = ClassificationModel() trainer = Trainer( default_root_dir=tmpdir, @@ -311,8 +312,7 @@ def test_full_loop(tmpdir): # test result = trainer.test(datamodule=dm) - # TODO: add end-to-end test - # assert result[0]['test_acc'] > 0.8 + assert result[0]['test_acc'] > 0.6 def test_trainer_attached_to_dm(tmpdir): @@ -346,8 +346,8 @@ def test_trainer_attached_to_dm(tmpdir): def test_full_loop_single_gpu(tmpdir): reset_seed() - dm = BoringDataModule() - model = BoringModel() + dm = ClassifDataModule() + model = ClassificationModel() trainer = Trainer( default_root_dir=tmpdir, @@ -364,16 +364,37 @@ def test_full_loop_single_gpu(tmpdir): # test result = trainer.test(datamodule=dm) - # TODO: add end-to-end test - # assert result[0]['test_acc'] > 0.8 + assert result[0]['test_acc'] > 0.6 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_full_loop_dp(tmpdir): - reset_seed() + set_random_master_port() - dm = BoringDataModule() - model = BoringModel() + class CustomClassificationModelDP(ClassificationModel): + + def _step(self, batch, batch_idx): + x, y = batch + logits = self(x) + return {'logits': logits, 'y': y} + + def training_step(self, batch, batch_idx): + _, y = batch + out = self._step(batch, batch_idx) + out['loss'] = F.cross_entropy(out['logits'], y) + return out + + def validation_step(self, batch, batch_idx): + return self._step(batch, batch_idx) + + def test_step(self, batch, batch_idx): + return self._step(batch, batch_idx) + + def test_step_end(self, outputs): + self.log('test_acc', self.test_acc(outputs['logits'], outputs['y'])) + + dm = ClassifDataModule() + model = CustomClassificationModelDP() trainer = Trainer( default_root_dir=tmpdir, @@ -385,14 +406,13 @@ def test_full_loop_dp(tmpdir): ) # fit model - result = trainer.fit(model, dm) + result = trainer.fit(model, datamodule=dm) assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" assert result # test result = trainer.test(datamodule=dm) - # TODO: add end-to-end test - # assert result[0]['test_acc'] > 0.8 + assert result[0]['test_acc'] > 0.6 @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") diff --git a/tests/helpers/pipelines.py b/tests/helpers/pipelines.py index f5d9823600..3f131ab055 100644 --- a/tests/helpers/pipelines.py +++ b/tests/helpers/pipelines.py @@ -102,9 +102,9 @@ def run_model_test( def run_prediction(trained_model, dataloader, dp=False, min_acc=0.25): if isinstance(trained_model, BoringModel): - return _boring_model_run_prediction(trained_model, dataloader, dp, min_acc) + return _boring_model_run_prediction(trained_model, dataloader, min_acc) else: - return _eval_model_template_run_prediction(trained_model, dataloader, dp, min_acc) + return _eval_model_template_run_prediction(trained_model, dataloader, dp, min_acc=min_acc) def _eval_model_template_run_prediction(trained_model, dataloader, dp=False, min_acc=0.50): @@ -135,11 +135,15 @@ def _eval_model_template_run_prediction(trained_model, dataloader, dp=False, min assert acc >= min_acc, f"This model is expected to get > {min_acc} in test set (it got {acc})" -def _boring_model_run_prediction(trained_model, dataloader, dp=False, min_acc=0.25): +# TODO: This test compares a loss value with a min accuracy - complete non-sense! +# create BoringModels that make actual predictions! +def _boring_model_run_prediction(trained_model, dataloader, min_acc=0.25): # run prediction on 1 batch + trained_model.cpu() batch = next(iter(dataloader)) + with torch.no_grad(): output = trained_model(batch) - acc = trained_model.loss(batch, output) + acc = trained_model.loss(batch, output) assert acc >= min_acc, f"This model is expected to get, {min_acc} in test set but got {acc}" diff --git a/tests/helpers/simple_models.py b/tests/helpers/simple_models.py index ebc70690f4..9288a3c802 100644 --- a/tests/helpers/simple_models.py +++ b/tests/helpers/simple_models.py @@ -51,18 +51,21 @@ class ClassificationModel(LightningModule): x, y = batch logits = self.forward(x) loss = F.cross_entropy(logits, y) - self.log('train_Acc', self.train_acc(logits, y), prog_bar=True) + self.log('train_loss', loss, prog_bar=True) + self.log('train_acc', self.train_acc(logits, y), prog_bar=True) return {"loss": loss} def validation_step(self, batch, batch_idx): x, y = batch logits = self.forward(x) - self.log('valid_Acc', self.valid_acc(logits, y), prog_bar=True) + self.log('val_loss', F.cross_entropy(logits, y), prog_bar=False) + self.log('val_acc', self.valid_acc(logits, y), prog_bar=True) def test_step(self, batch, batch_idx): x, y = batch logits = self.forward(x) - self.log('test_Acc', self.test_acc(logits, y), prog_bar=True) + self.log('test_loss', F.cross_entropy(logits, y), prog_bar=False) + self.log('test_acc', self.test_acc(logits, y), prog_bar=True) class RegressionModel(LightningModule): @@ -98,15 +101,18 @@ class RegressionModel(LightningModule): x, y = batch out = self.forward(x) loss = F.mse_loss(out, y) + self.log('train_loss', loss, prog_bar=False) self.log('train_MSE', self.train_mse(out, y), prog_bar=True) return {"loss": loss} def validation_step(self, batch, batch_idx): x, y = batch out = self.forward(x) - self.log('valid_MSE', self.valid_mse(out, y), prog_bar=True) + self.log('val_loss', F.mse_loss(out, y), prog_bar=False) + self.log('val_MSE', self.valid_mse(out, y), prog_bar=True) def test_step(self, batch, batch_idx): x, y = batch out = self.forward(x) + self.log('test_loss', F.mse_loss(out, y), prog_bar=False) self.log('test_MSE', self.test_mse(out, y), prog_bar=True) diff --git a/tests/models/data/horovod/train_default_model.py b/tests/models/data/horovod/train_default_model.py index 24ddbd24c4..93a637dda1 100644 --- a/tests/models/data/horovod/train_default_model.py +++ b/tests/models/data/horovod/train_default_model.py @@ -36,7 +36,7 @@ if _HOROVOD_AVAILABLE: else: print('You requested to import Horovod which is missing or not supported for your OS.') -from tests.base import EvalModelTemplate # noqa: E402 +from tests.helpers import BoringModel # noqa: E402 from tests.helpers.pipelines import run_prediction # noqa: E402 from tests.helpers.utils import reset_seed, set_random_master_port # noqa: E402 @@ -53,7 +53,7 @@ def run_test_from_config(trainer_options): ckpt_path = trainer_options['weights_save_path'] trainer_options.update(callbacks=[ModelCheckpoint(dirpath=ckpt_path)]) - model = EvalModelTemplate() + model = BoringModel() trainer = Trainer(**trainer_options) trainer.fit(model) @@ -66,7 +66,7 @@ def run_test_from_config(trainer_options): return # test model loading - pretrained_model = EvalModelTemplate.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) + pretrained_model = BoringModel.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) # test new model accuracy test_loaders = model.test_dataloader() diff --git a/tests/models/test_amp.py b/tests/models/test_amp.py index 8a5b6d005c..8d620bb563 100644 --- a/tests/models/test_amp.py +++ b/tests/models/test_amp.py @@ -24,7 +24,7 @@ from pytorch_lightning import Trainer from pytorch_lightning.trainer.states import TrainerState from pytorch_lightning.utilities import _APEX_AVAILABLE from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base import EvalModelTemplate +from tests.helpers import BoringModel @pytest.mark.skip(reason='dp + amp not supported currently') # TODO @@ -41,7 +41,7 @@ def test_amp_single_gpu_dp(tmpdir): precision=16, ) - model = EvalModelTemplate() + model = BoringModel() # tutils.run_model_test(trainer_options, model) trainer.fit(model) @@ -60,7 +60,7 @@ def test_amp_single_gpu_ddp_spawn(tmpdir): precision=16, ) - model = EvalModelTemplate() + model = BoringModel() # tutils.run_model_test(trainer_options, model) trainer.fit(model) @@ -81,7 +81,7 @@ def test_amp_multi_gpu_dp(tmpdir): precision=16, ) - model = EvalModelTemplate() + model = BoringModel() # tutils.run_model_test(trainer_options, model) trainer.fit(model) @@ -100,7 +100,7 @@ def test_amp_multi_gpu_ddp_spawn(tmpdir): precision=16, ) - model = EvalModelTemplate() + model = BoringModel() # tutils.run_model_test(trainer_options, model) trainer.fit(model) @@ -108,13 +108,13 @@ def test_amp_multi_gpu_ddp_spawn(tmpdir): @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") +@mock.patch.dict(os.environ, {"SLURM_LOCALID": "0"}) def test_amp_gpu_ddp_slurm_managed(tmpdir): """Make sure DDP + AMP work.""" # simulate setting slurm flags tutils.set_random_master_port() - os.environ['SLURM_LOCALID'] = str(0) - model = EvalModelTemplate() + model = BoringModel() # exp file to get meta logger = tutils.get_default_logger(tmpdir) @@ -156,7 +156,7 @@ def test_cpu_model_with_amp(tmpdir): precision=16, ) - model = EvalModelTemplate() + model = BoringModel() with pytest.raises((MisconfigurationException, ModuleNotFoundError)): tpipes.run_model_test(trainer_options, model, on_gpu=False) @@ -165,7 +165,7 @@ def test_cpu_model_with_amp(tmpdir): @mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"}) def test_amp_without_apex(tmpdir): """Check that even with apex amp type without requesting precision=16 the amp backend is void.""" - model = EvalModelTemplate() + model = BoringModel() trainer = Trainer( default_root_dir=tmpdir, @@ -190,19 +190,24 @@ def test_amp_without_apex(tmpdir): def test_amp_with_apex(tmpdir): """Check calling apex scaling in training.""" - class CustomModel(EvalModelTemplate): + class CustomModel(BoringModel): + + def training_step(self, batch, batch_idx, optimizer_idx): + return super().training_step(batch, batch_idx) def configure_optimizers(self): - optimizer1 = optim.Adam(self.parameters(), lr=self.learning_rate) - optimizer2 = optim.SGD(self.parameters(), lr=self.learning_rate) + optimizer1 = optim.Adam(self.parameters(), lr=0.01) + optimizer2 = optim.SGD(self.parameters(), lr=0.01) lr_scheduler1 = optim.lr_scheduler.StepLR(optimizer1, 1, gamma=0.1) lr_scheduler2 = optim.lr_scheduler.StepLR(optimizer2, 1, gamma=0.1) return [optimizer1, optimizer2], [lr_scheduler1, lr_scheduler2] model = CustomModel() + model.training_epoch_end = None + trainer = Trainer( default_root_dir=tmpdir, - max_epochs=1, + max_steps=5, precision=16, amp_backend='apex', gpus=1, @@ -210,7 +215,7 @@ def test_amp_with_apex(tmpdir): assert str(trainer.amp_backend) == "AMPType.APEX" trainer.fit(model) assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" - assert trainer.dev_debugger.count_events('AMP') == 20 + assert trainer.dev_debugger.count_events('AMP') == 10 assert isinstance(trainer.lr_schedulers[0]['scheduler'].optimizer, optim.Adam) assert isinstance(trainer.lr_schedulers[1]['scheduler'].optimizer, optim.SGD) diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py index c3ac26bdeb..e8970c2df4 100644 --- a/tests/models/test_cpu.py +++ b/tests/models/test_cpu.py @@ -23,7 +23,6 @@ import tests.helpers.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.callbacks import Callback, EarlyStopping, ModelCheckpoint from pytorch_lightning.trainer.states import TrainerState -from tests.base import EvalModelTemplate from tests.helpers import BoringModel from tests.helpers.datamodules import ClassifDataModule from tests.helpers.simple_models import ClassificationModel @@ -101,10 +100,12 @@ def test_early_stopping_cpu_model(tmpdir): class ModelTrainVal(BoringModel): - def validation_epoch_end(self, outputs) -> None: - val_loss = torch.stack([x["x"] for x in outputs]).mean() - self.log('val_loss', val_loss) + def validation_step(self, *args, **kwargs): + output = super().validation_step(*args, **kwargs) + self.log('val_loss', output['x']) + return output + tutils.reset_seed() stopping = EarlyStopping(monitor="val_loss", min_delta=0.1) trainer_options = dict( callbacks=[stopping], @@ -198,13 +199,15 @@ def test_running_test_after_fitting(tmpdir): class ModelTrainValTest(BoringModel): - def validation_epoch_end(self, outputs) -> None: - val_loss = torch.stack([x["x"] for x in outputs]).mean() - self.log('val_loss', val_loss) + def validation_step(self, *args, **kwargs): + output = super().validation_step(*args, **kwargs) + self.log('val_loss', output['x']) + return output - def test_epoch_end(self, outputs) -> None: - test_loss = torch.stack([x["y"] for x in outputs]).mean() - self.log('test_loss', test_loss) + def test_step(self, *args, **kwargs): + output = super().test_step(*args, **kwargs) + self.log('test_loss', output['y']) + return output model = ModelTrainValTest() @@ -244,9 +247,10 @@ def test_running_test_no_val(tmpdir): def val_dataloader(self): pass - def test_epoch_end(self, outputs) -> None: - test_loss = torch.stack([x["y"] for x in outputs]).mean() - self.log('test_loss', test_loss) + def test_step(self, *args, **kwargs): + output = super().test_step(*args, **kwargs) + self.log('test_loss', output['y']) + return output model = ModelTrainTest() @@ -297,15 +301,10 @@ def test_simple_cpu(tmpdir): def test_cpu_model(tmpdir): """Make sure model trains on CPU.""" trainer_options = dict( - default_root_dir=tmpdir, - progress_bar_refresh_rate=0, - max_epochs=1, - limit_train_batches=0.4, - limit_val_batches=0.4 + default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, limit_train_batches=4, limit_val_batches=4 ) - model = EvalModelTemplate() - + model = BoringModel() tpipes.run_model_test(trainer_options, model, on_gpu=False) diff --git a/tests/models/test_grad_norm.py b/tests/models/test_grad_norm.py index 10cfa0cb9a..4d04911ffa 100644 --- a/tests/models/test_grad_norm.py +++ b/tests/models/test_grad_norm.py @@ -20,11 +20,11 @@ import pytest from pytorch_lightning import Trainer from pytorch_lightning.trainer.states import TrainerState -from tests.base import EvalModelTemplate +from tests.helpers import BoringModel from tests.helpers.utils import reset_seed -class ModelWithManualGradTracker(EvalModelTemplate): +class ModelWithManualGradTracker(BoringModel): def __init__(self, norm_type, *args, **kwargs): super().__init__(*args, **kwargs) @@ -36,9 +36,9 @@ class ModelWithManualGradTracker(EvalModelTemplate): def training_step(self, batch, batch_idx, optimizer_idx=None): # just return a loss, no log or progress bar meta - x, y = batch - loss_val = self.loss(y, self(x.flatten(1, -1))) - return {'loss': loss_val} + output = self(batch) + loss = self.loss(batch, output) + return {'loss': loss} def on_after_backward(self): out, norms = {}, [] @@ -102,7 +102,7 @@ def test_grad_tracking_interval(tmpdir, log_every_n_steps): ) with patch.object(trainer.logger, "log_metrics") as mocked: - model = EvalModelTemplate() + model = BoringModel() trainer.fit(model) expected = trainer.global_step // log_every_n_steps grad_norm_dicts = [] diff --git a/tests/models/test_hooks.py b/tests/models/test_hooks.py index 8e7615baa7..969597a10f 100644 --- a/tests/models/test_hooks.py +++ b/tests/models/test_hooks.py @@ -21,14 +21,13 @@ import torch from pytorch_lightning import Callback, Trainer from pytorch_lightning.accelerators.legacy.gpu_accelerator import GPUAccelerator from pytorch_lightning.trainer.states import TrainerState -from tests.base import EvalModelTemplate from tests.helpers import BoringModel, RandomDataset @pytest.mark.parametrize('max_steps', [1, 2, 3]) def test_on_before_zero_grad_called(tmpdir, max_steps): - class CurrentTestModel(EvalModelTemplate): + class CurrentTestModel(BoringModel): on_before_zero_grad_called = 0 def on_before_zero_grad(self, optimizer): @@ -40,7 +39,6 @@ def test_on_before_zero_grad_called(tmpdir, max_steps): default_root_dir=tmpdir, max_steps=max_steps, max_epochs=2, - num_sanity_val_steps=5, ) assert 0 == model.on_before_zero_grad_called trainer.fit(model) @@ -55,23 +53,24 @@ def test_training_epoch_end_metrics_collection(tmpdir): """ Test that progress bar metrics also get collected at the end of an epoch. """ num_epochs = 3 - class CurrentModel(EvalModelTemplate): + class CurrentModel(BoringModel): def training_step(self, *args, **kwargs): output = super().training_step(*args, **kwargs) - output['progress_bar'].update({'step_metric': torch.tensor(-1)}) - output['progress_bar'].update({'shared_metric': 100}) + self.log_dict({'step_metric': torch.tensor(-1), 'shared_metric': 100}, logger=False, prog_bar=True) return output def training_epoch_end(self, outputs): epoch = self.current_epoch # both scalar tensors and Python numbers are accepted - return { - 'progress_bar': { - f'epoch_metric_{epoch}': torch.tensor(epoch), # add a new metric key every epoch - 'shared_metric': 111, - } - } + self.log_dict( + { + f'epoch_metric_{epoch}': torch.tensor(epoch), + 'shared_metric': 111 + }, + logger=False, + prog_bar=True, + ) model = CurrentModel() trainer = Trainer( @@ -103,7 +102,7 @@ def test_training_epoch_end_metrics_collection_on_override(tmpdir): def on_train_epoch_end(self, trainer, pl_module, outputs): self.len_outputs = len(outputs[0]) - class OverriddenModel(EvalModelTemplate): + class OverriddenModel(BoringModel): def on_train_epoch_start(self): self.num_train_batches = 0 @@ -114,7 +113,7 @@ def test_training_epoch_end_metrics_collection_on_override(tmpdir): def on_train_batch_end(self, outputs, batch, batch_idx, dataloader_idx): self.num_train_batches += 1 - class NotOverriddenModel(EvalModelTemplate): + class NotOverriddenModel(BoringModel): def on_train_epoch_start(self): self.num_train_batches = 0 @@ -124,6 +123,7 @@ def test_training_epoch_end_metrics_collection_on_override(tmpdir): overridden_model = OverriddenModel() not_overridden_model = NotOverriddenModel() + not_overridden_model.training_epoch_end = None callback = LoggingCallback() trainer = Trainer( @@ -152,7 +152,7 @@ def test_transfer_batch_hook(): self.samples = data[0] self.targets = data[1] - class CurrentTestModel(EvalModelTemplate): + class CurrentTestModel(BoringModel): hook_called = False @@ -166,7 +166,7 @@ def test_transfer_batch_hook(): return data model = CurrentTestModel() - batch = CustomBatch((torch.zeros(5, 28), torch.ones(5, 1, dtype=torch.long))) + batch = CustomBatch((torch.zeros(5, 32), torch.ones(5, 1, dtype=torch.long))) trainer = Trainer(gpus=1) trainer.accelerator_backend = GPUAccelerator(trainer) @@ -226,7 +226,7 @@ def test_transfer_batch_hook_ddp(tmpdir): @pytest.mark.parametrize('max_epochs,batch_idx_', [(2, 5), (3, 8), (4, 12)]) def test_on_train_batch_start_hook(max_epochs, batch_idx_): - class CurrentModel(EvalModelTemplate): + class CurrentModel(BoringModel): def on_train_batch_start(self, batch, batch_idx, dataloader_idx): if batch_idx == batch_idx_: diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py index 948fb0144d..19f39b3da4 100644 --- a/tests/models/test_horovod.py +++ b/tests/models/test_horovod.py @@ -30,9 +30,8 @@ from pytorch_lightning.accelerators.legacy.horovod_accelerator import HorovodAcc from pytorch_lightning.metrics.classification.accuracy import Accuracy from pytorch_lightning.trainer.states import TrainerState from pytorch_lightning.utilities import _APEX_AVAILABLE, _HOROVOD_AVAILABLE, _NATIVE_AMP_AVAILABLE -from tests.base import EvalModelTemplate +from tests.helpers import BoringModel from tests.helpers.advanced_models import BasicGAN -from tests.helpers.boring_model import BoringModel if _HOROVOD_AVAILABLE: import horovod @@ -173,22 +172,17 @@ def test_horovod_amp(tmpdir): @pytest.mark.skipif(not torch.cuda.is_available(), reason="test requires GPU machine") def test_horovod_transfer_batch_to_gpu(tmpdir): - class TestTrainingStepModel(EvalModelTemplate): + class TestTrainingStepModel(BoringModel): def training_step(self, batch, *args, **kwargs): - x, y = batch - assert str(x.device) != 'cpu' - assert str(y.device) != 'cpu' + assert str(batch.device) != 'cpu' return super(TestTrainingStepModel, self).training_step(batch, *args, **kwargs) def validation_step(self, batch, *args, **kwargs): - x, y = batch - assert str(x.device) != 'cpu' - assert str(y.device) != 'cpu' + assert str(batch.device) != 'cpu' return super(TestTrainingStepModel, self).validation_step(batch, *args, **kwargs) - hparams = EvalModelTemplate.get_default_hparams() - model = TestTrainingStepModel(**hparams) + model = TestTrainingStepModel() trainer_options = dict( default_root_dir=str(tmpdir), @@ -205,7 +199,7 @@ def test_horovod_transfer_batch_to_gpu(tmpdir): @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") def test_horovod_multi_optimizer(tmpdir): - model = BasicGAN(**EvalModelTemplate.get_default_hparams()) + model = BasicGAN() # fit model trainer = Trainer( @@ -342,8 +336,7 @@ def test_accuracy_metric_horovod(): # @pytest.mark.skipif(platform.system() == "Windows", reason="Horovod is not supported on Windows") # def test_horovod_multi_optimizer_with_scheduling_stepping(tmpdir): -# hparams = EvalModelTemplate.get_default_hparams() -# model = EvalModelTemplate(**hparams) +# model = BoringModel() # model.configure_optimizers = model.configure_optimizers__multiple_schedulers # # num_workers = 8 diff --git a/tests/models/test_hparams.py b/tests/models/test_hparams.py index 229c8128ae..0e32ebea09 100644 --- a/tests/models/test_hparams.py +++ b/tests/models/test_hparams.py @@ -21,15 +21,13 @@ import pytest import torch from fsspec.implementations.local import LocalFileSystem from omegaconf import Container, OmegaConf -from torch.nn import functional as F from torch.utils.data import DataLoader from pytorch_lightning import LightningModule, Trainer from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.core.saving import load_hparams_from_yaml, save_hparams_to_yaml from pytorch_lightning.utilities import _HYDRA_EXPERIMENTAL_AVAILABLE, AttributeDict, is_picklable -from tests.base import EvalModelTemplate -from tests.helpers import BoringModel, TrialMNIST +from tests.helpers import BoringModel, RandomDataset if _HYDRA_EXPERIMENTAL_AVAILABLE: from hydra.experimental import compose, initialize @@ -162,7 +160,7 @@ def test_explicit_args_hparams(tmpdir): """ # define model - class LocalModel(EvalModelTemplate): + class LocalModel(BoringModel): def __init__(self, test_arg, test_arg2): super().__init__() @@ -184,7 +182,7 @@ def test_implicit_args_hparams(tmpdir): """ # define model - class LocalModel(EvalModelTemplate): + class LocalModel(BoringModel): def __init__(self, test_arg, test_arg2): super().__init__() @@ -206,7 +204,7 @@ def test_explicit_missing_args_hparams(tmpdir): """ # define model - class LocalModel(EvalModelTemplate): + class LocalModel(BoringModel): def __init__(self, test_arg, test_arg2): super().__init__() @@ -269,7 +267,14 @@ def test_class_nesting(): A().test() -class SubClassEvalModel(EvalModelTemplate): +class CustomBoringModel(BoringModel): + + def __init__(self, batch_size=64): + super().__init__() + self.save_hyperparameters() + + +class SubClassBoringModel(CustomBoringModel): any_other_loss = torch.nn.CrossEntropyLoss() def __init__(self, *args, subclass_arg=1200, **kwargs): @@ -277,18 +282,18 @@ class SubClassEvalModel(EvalModelTemplate): self.save_hyperparameters() -class SubSubClassEvalModel(SubClassEvalModel): +class SubSubClassBoringModel(SubClassBoringModel): pass -class AggSubClassEvalModel(SubClassEvalModel): +class AggSubClassBoringModel(SubClassBoringModel): def __init__(self, *args, my_loss=torch.nn.CrossEntropyLoss(), **kwargs): super().__init__(*args, **kwargs) self.save_hyperparameters() -class UnconventionalArgsEvalModel(EvalModelTemplate): +class UnconventionalArgsBoringModel(CustomBoringModel): """ A model that has unconventional names for "self", "*args" and "**kwargs". """ def __init__(obj, *more_args, other_arg=300, **more_kwargs): @@ -297,7 +302,7 @@ class UnconventionalArgsEvalModel(EvalModelTemplate): obj.save_hyperparameters() -class DictConfSubClassEvalModel(SubClassEvalModel): +class DictConfSubClassBoringModel(SubClassBoringModel): def __init__(self, *args, dict_conf=OmegaConf.create(dict(my_param='something')), **kwargs): super().__init__(*args, **kwargs) @@ -306,31 +311,31 @@ class DictConfSubClassEvalModel(SubClassEvalModel): @pytest.mark.parametrize( "cls", [ - EvalModelTemplate, - SubClassEvalModel, - SubSubClassEvalModel, - AggSubClassEvalModel, - UnconventionalArgsEvalModel, - DictConfSubClassEvalModel, + CustomBoringModel, + SubClassBoringModel, + SubSubClassBoringModel, + AggSubClassBoringModel, + UnconventionalArgsBoringModel, + DictConfSubClassBoringModel, ] ) def test_collect_init_arguments(tmpdir, cls): """ Test that the model automatically saves the arguments passed into the constructor """ extra_args = {} - if cls is AggSubClassEvalModel: + if cls is AggSubClassBoringModel: extra_args.update(my_loss=torch.nn.CosineEmbeddingLoss()) - elif cls is DictConfSubClassEvalModel: + elif cls is DictConfSubClassBoringModel: extra_args.update(dict_conf=OmegaConf.create(dict(my_param='anything'))) model = cls(**extra_args) - assert model.hparams.batch_size == 32 + assert model.hparams.batch_size == 64 model = cls(batch_size=179, **extra_args) assert model.hparams.batch_size == 179 - if isinstance(model, SubClassEvalModel): + if isinstance(model, SubClassBoringModel): assert model.hparams.subclass_arg == 1200 - if isinstance(model, AggSubClassEvalModel): + if isinstance(model, AggSubClassBoringModel): assert isinstance(model.hparams.my_loss, torch.nn.CosineEmbeddingLoss) # verify that the checkpoint saved the correct values @@ -347,10 +352,10 @@ def test_collect_init_arguments(tmpdir, cls): model = cls.load_from_checkpoint(raw_checkpoint_path) assert model.hparams.batch_size == 179 - if isinstance(model, AggSubClassEvalModel): + if isinstance(model, AggSubClassBoringModel): assert isinstance(model.hparams.my_loss, torch.nn.CosineEmbeddingLoss) - if isinstance(model, DictConfSubClassEvalModel): + if isinstance(model, DictConfSubClassBoringModel): assert isinstance(model.hparams.dict_conf, Container) assert model.hparams.dict_conf['my_param'] == 'anything' @@ -368,7 +373,7 @@ def _raw_checkpoint_path(trainer) -> str: return raw_checkpoint_path -class LocalVariableModelSuperLast(EvalModelTemplate): +class LocalVariableModelSuperLast(BoringModel): """ This model has the super().__init__() call at the end. """ def __init__(self, arg1, arg2, *args, **kwargs): @@ -378,7 +383,7 @@ class LocalVariableModelSuperLast(EvalModelTemplate): super().__init__(*args, **kwargs) # this is intentionally here at the end -class LocalVariableModelSuperFirst(EvalModelTemplate): +class LocalVariableModelSuperFirst(BoringModel): """ This model has the _auto_collect_arguments() call at the end. """ def __init__(self, arg1, arg2, *args, **kwargs): @@ -429,16 +434,17 @@ def test_collect_init_arguments_with_local_vars(cls): # assert model.hparams.my_arg == 42 -class AnotherArgModel(EvalModelTemplate): +class AnotherArgModel(BoringModel): def __init__(self, arg1): super().__init__() self.save_hyperparameters(arg1) -class OtherArgsModel(EvalModelTemplate): +class OtherArgsModel(BoringModel): def __init__(self, arg1, arg2): + super().__init__() self.save_hyperparameters(arg1, arg2) @@ -457,7 +463,7 @@ def test_single_config_models_fail(tmpdir, cls, config): @pytest.mark.parametrize("past_key", ['module_arguments']) def test_load_past_checkpoint(tmpdir, past_key): - model = EvalModelTemplate() + model = CustomBoringModel() # verify we can train trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) @@ -474,7 +480,7 @@ def test_load_past_checkpoint(tmpdir, past_key): torch.save(raw_checkpoint, raw_checkpoint_path) # verify that model loads correctly - model2 = EvalModelTemplate.load_from_checkpoint(raw_checkpoint_path) + model2 = CustomBoringModel.load_from_checkpoint(raw_checkpoint_path) assert model2.hparams.batch_size == -17 @@ -486,7 +492,7 @@ def test_hparams_pickle(tmpdir): assert ad == pickle.loads(pkl) -class UnpickleableArgsEvalModel(EvalModelTemplate): +class UnpickleableArgsBoringModel(BoringModel): """ A model that has an attribute that cannot be pickled. """ def __init__(self, foo='bar', pickle_me=(lambda x: x + 1), **kwargs): @@ -496,7 +502,7 @@ class UnpickleableArgsEvalModel(EvalModelTemplate): def test_hparams_pickle_warning(tmpdir): - model = UnpickleableArgsEvalModel() + model = UnpickleableArgsBoringModel() trainer = Trainer(default_root_dir=tmpdir, max_steps=1) with pytest.warns(UserWarning, match="attribute 'pickle_me' removed from hparams because it cannot be pickled"): trainer.fit(model) @@ -522,38 +528,15 @@ def test_hparams_save_yaml(tmpdir): assert load_hparams_from_yaml(path_yaml) == hparams -class NoArgsSubClassEvalModel(EvalModelTemplate): +class NoArgsSubClassBoringModel(CustomBoringModel): def __init__(self): super().__init__() -class SimpleNoArgsModel(LightningModule): - - def __init__(self): - super().__init__() - self.l1 = torch.nn.Linear(28 * 28, 10) - - def forward(self, x): - return torch.relu(self.l1(x.view(x.size(0), -1))) - - def training_step(self, batch, batch_nb): - x, y = batch - loss = F.cross_entropy(self(x), y) - return {'loss': loss, 'log': {'train_loss': loss}} - - def test_step(self, batch, batch_nb): - x, y = batch - loss = F.cross_entropy(self(x), y) - return {'loss': loss, 'log': {'train_loss': loss}} - - def configure_optimizers(self): - return torch.optim.Adam(self.parameters(), lr=0.02) - - @pytest.mark.parametrize("cls", [ - SimpleNoArgsModel, - NoArgsSubClassEvalModel, + BoringModel, + NoArgsSubClassBoringModel, ]) def test_model_nohparams_train_test(tmpdir, cls): """Test models that do not tae any argument in init.""" @@ -564,20 +547,20 @@ def test_model_nohparams_train_test(tmpdir, cls): default_root_dir=tmpdir, ) - train_loader = DataLoader(TrialMNIST(os.getcwd(), train=True, download=True), batch_size=32) + train_loader = DataLoader(RandomDataset(32, 64), batch_size=32) trainer.fit(model, train_loader) - test_loader = DataLoader(TrialMNIST(os.getcwd(), train=False, download=True), batch_size=32) + test_loader = DataLoader(RandomDataset(32, 64), batch_size=32) trainer.test(test_dataloaders=test_loader) def test_model_ignores_non_exist_kwargument(tmpdir): """Test that the model takes only valid class arguments.""" - class LocalModel(EvalModelTemplate): + class LocalModel(BoringModel): def __init__(self, batch_size=15): - super().__init__(batch_size=batch_size) + super().__init__() self.save_hyperparameters() model = LocalModel() @@ -593,11 +576,11 @@ def test_model_ignores_non_exist_kwargument(tmpdir): assert 'non_exist_kwarg' not in model.hparams -class SuperClassPositionalArgs(EvalModelTemplate): +class SuperClassPositionalArgs(BoringModel): def __init__(self, hparams): super().__init__() - self._hparams = None # pretend EvalModelTemplate did not call self.save_hyperparameters() + self._hparams = None # pretend BoringModel did not call self.save_hyperparameters() self.hparams = hparams diff --git a/tests/models/test_onnx.py b/tests/models/test_onnx.py index e031494361..2bd3ebf9b6 100644 --- a/tests/models/test_onnx.py +++ b/tests/models/test_onnx.py @@ -21,14 +21,13 @@ import torch import tests.helpers.pipelines as tpipes import tests.helpers.utils as tutils from pytorch_lightning import Trainer -from tests.base import EvalModelTemplate from tests.helpers import BoringModel def test_model_saves_with_input_sample(tmpdir): """Test that ONNX model saves with input sample and size is greater than 3 MB""" model = BoringModel() - trainer = Trainer(max_epochs=1) + trainer = Trainer(fast_dev_run=True) trainer.fit(model) file_path = os.path.join(tmpdir, "model.onnx") @@ -42,7 +41,7 @@ def test_model_saves_with_input_sample(tmpdir): def test_model_saves_on_gpu(tmpdir): """Test that model saves on gpu""" model = BoringModel() - trainer = Trainer(gpus=1, max_epochs=1) + trainer = Trainer(gpus=1, fast_dev_run=True) trainer.fit(model) file_path = os.path.join(tmpdir, "model.onnx") @@ -55,7 +54,7 @@ def test_model_saves_on_gpu(tmpdir): def test_model_saves_with_example_output(tmpdir): """Test that ONNX model saves when provided with example output""" model = BoringModel() - trainer = Trainer(max_epochs=1) + trainer = Trainer(fast_dev_run=True) trainer.fit(model) file_path = os.path.join(tmpdir, "model.onnx") @@ -92,9 +91,10 @@ def test_model_saves_on_multi_gpu(tmpdir): progress_bar_refresh_rate=0, ) - model = EvalModelTemplate() + model = BoringModel() + model.example_input_array = torch.randn(5, 32) - tpipes.run_model_test(trainer_options, model) + tpipes.run_model_test(trainer_options, model, min_acc=0.08) file_path = os.path.join(tmpdir, "model.onnx") model.to_onnx(file_path) @@ -130,7 +130,7 @@ def test_if_inference_output_is_valid(tmpdir): model = BoringModel() model.example_input_array = torch.randn(5, 32) - trainer = Trainer(max_epochs=2) + trainer = Trainer(fast_dev_run=True) trainer.fit(model) model.eval() diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index 9420da74f0..114ebf3368 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -16,18 +16,21 @@ import logging as log import os import pickle from copy import deepcopy +from typing import Generic, TypeVar import cloudpickle import pytest import torch +import torch.nn.functional as F import tests.helpers.pipelines as tpipes import tests.helpers.utils as tutils from pytorch_lightning import Callback, Trainer from pytorch_lightning.callbacks import ModelCheckpoint from pytorch_lightning.trainer.states import RunningStage, TrainerState -from tests.base import EvalModelTemplate, GenericEvalModelTemplate from tests.helpers import BoringModel +from tests.helpers.datamodules import ClassifDataModule +from tests.helpers.simple_models import ClassificationModel class ModelTrainerPropertyParity(Callback): @@ -52,14 +55,48 @@ class ModelTrainerPropertyParity(Callback): self._check_properties(trainer, pl_module) +class ValTestLossBoringModel(BoringModel): + + def __init__(self, batch_size=4): + super().__init__() + self.save_hyperparameters() + + def validation_step(self, batch, batch_idx): + out = super().validation_step(batch, batch_idx) + self.log('val_loss', out['x']) + return out + + def test_step(self, batch, batch_idx): + out = super().test_step(batch, batch_idx) + self.log('test_loss', out['y']) + return out + + +T = TypeVar('T') + + +class GenericParentValTestLossBoringModel(Generic[T], ValTestLossBoringModel): + + def __init__(self, batch_size: int = 4): + super().__init__(batch_size=batch_size) + + +class GenericValTestLossBoringModel(GenericParentValTestLossBoringModel[int]): + pass + + def test_model_properties_resume_from_checkpoint(tmpdir): - """ Test that properties like `current_epoch` and `global_step` - in model and trainer are always the same. """ - model = EvalModelTemplate() - checkpoint_callback = ModelCheckpoint(dirpath=tmpdir, monitor="early_stop_on", save_last=True) + """ + Test that properties like `current_epoch` and `global_step` + in model and trainer are always the same. + """ + model = BoringModel() + checkpoint_callback = ModelCheckpoint(dirpath=tmpdir, monitor="val_loss", save_last=True) trainer_args = dict( default_root_dir=tmpdir, max_epochs=1, + limit_train_batches=2, + limit_val_batches=2, logger=False, callbacks=[checkpoint_callback, ModelTrainerPropertyParity()], # this performs the assertions ) @@ -73,18 +110,19 @@ def test_model_properties_resume_from_checkpoint(tmpdir): def test_try_resume_from_non_existing_checkpoint(tmpdir): """ Test that trying to resume from non-existing `resume_from_checkpoint` fail without error.""" - model = BoringModel() - checkpoint_cb = ModelCheckpoint(dirpath=tmpdir, monitor="early_stop_on", save_last=True) + dm = ClassifDataModule() + model = ClassificationModel() + checkpoint_cb = ModelCheckpoint(dirpath=tmpdir, monitor="val_loss", save_last=True) trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, logger=False, callbacks=[checkpoint_cb], - limit_train_batches=0.1, - limit_val_batches=0.1, + limit_train_batches=2, + limit_val_batches=2, ) # Generate checkpoint `last.ckpt` with BoringModel - trainer.fit(model) + trainer.fit(model, datamodule=dm) # `True` if resume/restore successfully else `False` assert trainer.checkpoint_connector.restore(str(tmpdir / "last.ckpt"), trainer.on_gpu) assert not trainer.checkpoint_connector.restore(str(tmpdir / "last_non_existing.ckpt"), trainer.on_gpu) @@ -99,11 +137,12 @@ class CaptureCallbacksBeforeTraining(Callback): def test_callbacks_state_resume_from_checkpoint(tmpdir): """ Test that resuming from a checkpoint restores callbacks that persist state. """ - model = EvalModelTemplate() + dm = ClassifDataModule() + model = ClassificationModel() callback_capture = CaptureCallbacksBeforeTraining() def get_trainer_args(): - checkpoint = ModelCheckpoint(dirpath=tmpdir, monitor="early_stop_on", save_last=True) + checkpoint = ModelCheckpoint(dirpath=tmpdir, monitor="val_loss", save_last=True) trainer_args = dict( default_root_dir=tmpdir, max_steps=1, logger=False, callbacks=[ checkpoint, @@ -116,12 +155,12 @@ def test_callbacks_state_resume_from_checkpoint(tmpdir): # initial training trainer = Trainer(**get_trainer_args()) - trainer.fit(model) + trainer.fit(model, datamodule=dm) callbacks_before_resume = deepcopy(trainer.callbacks) # resumed training trainer = Trainer(**get_trainer_args(), resume_from_checkpoint=str(tmpdir / "last.ckpt")) - trainer.fit(model) + trainer.fit(model, datamodule=dm) assert len(callbacks_before_resume) == len(callback_capture.callbacks) @@ -133,23 +172,24 @@ def test_callbacks_state_resume_from_checkpoint(tmpdir): def test_callbacks_references_resume_from_checkpoint(tmpdir): """ Test that resuming from a checkpoint sets references as expected. """ - model = EvalModelTemplate() + dm = ClassifDataModule() + model = ClassificationModel() args = {'default_root_dir': tmpdir, 'max_steps': 1, 'logger': False} # initial training - checkpoint = ModelCheckpoint(dirpath=tmpdir, monitor="early_stop_on", save_last=True) + checkpoint = ModelCheckpoint(dirpath=tmpdir, monitor="val_loss", save_last=True) trainer = Trainer(**args, callbacks=[checkpoint]) assert checkpoint is trainer.callbacks[-1] is trainer.checkpoint_callback - trainer.fit(model) + trainer.fit(model, datamodule=dm) # resumed training - new_checkpoint = ModelCheckpoint(dirpath=tmpdir, monitor="early_stop_on", save_last=True) + new_checkpoint = ModelCheckpoint(dirpath=tmpdir, monitor="val_loss", save_last=True) # pass in a new checkpoint object, which should take # precedence over the one in the last.ckpt file trainer = Trainer(**args, callbacks=[new_checkpoint], resume_from_checkpoint=str(tmpdir / "last.ckpt")) assert checkpoint is not new_checkpoint assert new_checkpoint is trainer.callbacks[-1] is trainer.checkpoint_callback - trainer.fit(model) + trainer.fit(model, datamodule=dm) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @@ -158,7 +198,30 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir): tutils.set_random_master_port() - model = EvalModelTemplate() + class CustomClassificationModelDP(ClassificationModel): + + def _step(self, batch, batch_idx): + x, y = batch + logits = self(x) + return {'logits': logits, 'y': y} + + def training_step(self, batch, batch_idx): + _, y = batch + out = self._step(batch, batch_idx) + out['loss'] = F.cross_entropy(out['logits'], y) + return out + + def validation_step(self, batch, batch_idx): + return self._step(batch, batch_idx) + + def test_step(self, batch, batch_idx): + return self._step(batch, batch_idx) + + def validation_step_end(self, outputs): + self.log('val_acc', self.valid_acc(outputs['logits'], outputs['y'])) + + dm = ClassifDataModule() + model = CustomClassificationModelDP() # exp file to get meta logger = tutils.get_default_logger(tmpdir) @@ -169,8 +232,8 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir): trainer_options = dict( progress_bar_refresh_rate=0, max_epochs=2, - limit_train_batches=0.4, - limit_val_batches=0.2, + limit_train_batches=5, + limit_val_batches=5, callbacks=[checkpoint], logger=logger, gpus=[0, 1], @@ -180,21 +243,17 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir): # fit model trainer = Trainer(**trainer_options) - trainer.fit(model) + trainer.fit(model, datamodule=dm) # correct result and ok accuracy assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" - pretrained_model = EvalModelTemplate.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) + pretrained_model = ClassificationModel.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) # run test set new_trainer = Trainer(**trainer_options) - results = new_trainer.test(pretrained_model) + new_trainer.test(pretrained_model) pretrained_model.cpu() - # test we have good test accuracy - acc = results[0]['test_acc'] - assert acc > 0.5, f"Model failed to get expected {0.5} accuracy. test_acc = {acc}" - dataloaders = model.test_dataloader() if not isinstance(dataloaders, list): dataloaders = [dataloaders] @@ -207,8 +266,8 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir): def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir): """Verify `test()` on pretrained model.""" tutils.set_random_master_port() - - model = EvalModelTemplate() + dm = ClassifDataModule() + model = ClassificationModel() # exp file to get meta logger = tutils.get_default_logger(tmpdir) @@ -219,8 +278,8 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir): trainer_options = dict( progress_bar_refresh_rate=0, max_epochs=2, - limit_train_batches=0.4, - limit_val_batches=0.2, + limit_train_batches=2, + limit_val_batches=2, callbacks=[checkpoint], logger=logger, gpus=[0, 1], @@ -230,33 +289,32 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir): # fit model trainer = Trainer(**trainer_options) - trainer.fit(model) + trainer.fit(model, datamodule=dm) log.info(os.listdir(tutils.get_data_path(logger, path_dir=tmpdir))) # correct result and ok accuracy assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" - pretrained_model = EvalModelTemplate.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) + pretrained_model = ClassificationModel.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) # run test set new_trainer = Trainer(**trainer_options) - results = new_trainer.test(pretrained_model) + new_trainer.test(pretrained_model) pretrained_model.cpu() - acc = results[0]['test_acc'] - assert acc > 0.5, f"Model failed to get expected {0.5} accuracy. test_acc = {acc}" - - dataloaders = model.test_dataloader() + dataloaders = dm.test_dataloader() if not isinstance(dataloaders, list): dataloaders = [dataloaders] for dataloader in dataloaders: - tpipes.run_prediction(pretrained_model, dataloader) + tpipes.run_prediction(pretrained_model, dataloader, min_acc=0.1) def test_running_test_pretrained_model_cpu(tmpdir): """Verify test() on pretrained model.""" - model = EvalModelTemplate() + tutils.reset_seed() + dm = ClassifDataModule() + model = ClassificationModel() # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -266,9 +324,10 @@ def test_running_test_pretrained_model_cpu(tmpdir): trainer_options = dict( progress_bar_refresh_rate=0, - max_epochs=3, - limit_train_batches=0.4, - limit_val_batches=0.2, + max_epochs=2, + limit_train_batches=2, + limit_val_batches=2, + limit_test_batches=2, callbacks=[checkpoint], logger=logger, default_root_dir=tmpdir, @@ -276,31 +335,32 @@ def test_running_test_pretrained_model_cpu(tmpdir): # fit model trainer = Trainer(**trainer_options) - trainer.fit(model) + trainer.fit(model, datamodule=dm) # correct result and ok accuracy assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" - pretrained_model = EvalModelTemplate.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) + pretrained_model = ClassificationModel.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) new_trainer = Trainer(**trainer_options) - new_trainer.test(pretrained_model) + new_trainer.test(pretrained_model, datamodule=dm) # test we have good test accuracy - tutils.assert_ok_model_acc(new_trainer) + tutils.assert_ok_model_acc(new_trainer, key='test_acc', thr=0.45) -@pytest.mark.parametrize('model_template', [EvalModelTemplate, GenericEvalModelTemplate]) +@pytest.mark.parametrize('model_template', [ValTestLossBoringModel, GenericValTestLossBoringModel]) def test_load_model_from_checkpoint(tmpdir, model_template): """Verify test() on pretrained model.""" - hparams = model_template.get_default_hparams() - model = model_template(**hparams) + tutils.reset_seed() + model = model_template() trainer_options = dict( progress_bar_refresh_rate=0, max_epochs=2, - limit_train_batches=0.4, - limit_val_batches=0.2, - callbacks=[ModelCheckpoint(dirpath=tmpdir, monitor='early_stop_on', save_top_k=-1)], + limit_train_batches=2, + limit_val_batches=2, + limit_test_batches=2, + callbacks=[ModelCheckpoint(dirpath=tmpdir, monitor='val_loss', save_top_k=-1)], default_root_dir=tmpdir, ) @@ -315,7 +375,7 @@ def test_load_model_from_checkpoint(tmpdir, model_template): # load last checkpoint last_checkpoint = sorted(glob.glob(os.path.join(trainer.checkpoint_callback.dirpath, "*.ckpt")))[-1] - # Since `EvalModelTemplate` has `_save_hparams = True` by default, check that ckpt has hparams + # Since `BoringModel` has `_save_hparams = True` by default, check that ckpt has hparams ckpt = torch.load(last_checkpoint) assert model_template.CHECKPOINT_HYPER_PARAMS_KEY in ckpt.keys(), 'hyper_parameters missing from checkpoints' @@ -323,8 +383,8 @@ def test_load_model_from_checkpoint(tmpdir, model_template): pretrained_model = model_template.load_from_checkpoint(last_checkpoint) # test that hparams loaded correctly - for k, v in hparams.items(): - assert getattr(pretrained_model, k) == v + for k, v in model.hparams.items(): + assert getattr(pretrained_model.hparams, k) == v # assert weights are the same for (old_name, old_p), (new_name, new_p) in zip(model.named_parameters(), pretrained_model.named_parameters()): @@ -334,15 +394,11 @@ def test_load_model_from_checkpoint(tmpdir, model_template): new_trainer = Trainer(**trainer_options) new_trainer.test(pretrained_model) - # test we have good test accuracy - tutils.assert_ok_model_acc(new_trainer) - @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_dp_resume(tmpdir): """Make sure DP continues training correctly.""" - hparams = EvalModelTemplate.get_default_hparams() - model = EvalModelTemplate(**hparams) + model = BoringModel() trainer_options = dict(max_epochs=1, gpus=2, accelerator='dp', default_root_dir=tmpdir) @@ -355,7 +411,7 @@ def test_dp_resume(tmpdir): # add these to the trainer options trainer_options['logger'] = logger - trainer_options['checkpoint_callback'] = checkpoint + trainer_options['callbacks'] = [checkpoint] # fit model trainer = Trainer(**trainer_options) @@ -377,31 +433,38 @@ def test_dp_resume(tmpdir): # init new trainer new_logger = tutils.get_default_logger(tmpdir, version=logger.version) trainer_options['logger'] = new_logger - trainer_options['checkpoint_callback'] = ModelCheckpoint(dirpath=tmpdir) + trainer_options['callbacks'] = [ModelCheckpoint(dirpath=tmpdir)] trainer_options['limit_train_batches'] = 0.5 trainer_options['limit_val_batches'] = 0.2 trainer_options['max_epochs'] = 1 new_trainer = Trainer(**trainer_options) - # set the epoch start hook so we can predict before the model does the full training - def assert_good_acc(): - assert new_trainer.current_epoch == real_global_epoch and new_trainer.current_epoch > 0 + class CustomModel(BoringModel): - # if model and state loaded correctly, predictions will be good even though we - # haven't trained with the new loaded model - dp_model = new_trainer.model - dp_model.eval() - dp_model.module.module.running_stage = RunningStage.EVALUATING + def __init__(self): + super().__init__() + self.on_train_start_called = False - dataloader = trainer.train_dataloader - tpipes.run_prediction(dp_model, dataloader, dp=True) + # set the epoch start hook so we can predict before the model does the full training + def on_train_start(self): + assert self.trainer.current_epoch == real_global_epoch and self.trainer.current_epoch > 0 + + # if model and state loaded correctly, predictions will be good even though we + # haven't trained with the new loaded model + dp_model = new_trainer.model + dp_model.eval() + dp_model.module.module.running_stage = RunningStage.EVALUATING + + dataloader = self.train_dataloader() + tpipes.run_prediction(self.trainer.get_model(), dataloader) + self.on_train_start_called = True # new model - model = EvalModelTemplate(**hparams) - model.on_train_start = assert_good_acc + model = CustomModel() # fit new model which should load hpc weights new_trainer.fit(model) + assert model.on_train_start_called # test freeze on gpu model.freeze() @@ -410,7 +473,7 @@ def test_dp_resume(tmpdir): def test_model_saving_loading(tmpdir): """Tests use case where trainer saves the model, and user loads it from tags independently.""" - model = EvalModelTemplate() + model = BoringModel() # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -418,6 +481,8 @@ def test_model_saving_loading(tmpdir): # fit model trainer = Trainer( max_epochs=1, + limit_train_batches=2, + limit_val_batches=2, logger=logger, callbacks=[ModelCheckpoint(dirpath=tmpdir)], default_root_dir=tmpdir, @@ -432,16 +497,11 @@ def test_model_saving_loading(tmpdir): if not isinstance(dataloaders, list): dataloaders = [dataloaders] - for dataloader in dataloaders: - for batch in dataloader: - break - - x, y = batch - x = x.view(x.size(0), -1) + batch = next(iter(dataloaders[0])) # generate preds before saving model model.eval() - pred_before_saving = model(x) + pred_before_saving = model(batch) # save model new_weights_path = os.path.join(tmpdir, 'save_test.ckpt') @@ -450,7 +510,7 @@ def test_model_saving_loading(tmpdir): # load new model hparams_path = tutils.get_data_path(logger, path_dir=tmpdir) hparams_path = os.path.join(hparams_path, 'hparams.yaml') - model_2 = EvalModelTemplate.load_from_checkpoint( + model_2 = BoringModel.load_from_checkpoint( checkpoint_path=new_weights_path, hparams_file=hparams_path, ) @@ -458,7 +518,7 @@ def test_model_saving_loading(tmpdir): # make prediction # assert that both predictions are the same - new_pred = model_2(x) + new_pred = model_2(batch) assert torch.all(torch.eq(pred_before_saving, new_pred)).item() == 1 @@ -468,9 +528,9 @@ def test_strict_model_load_more_params(monkeypatch, tmpdir, tmpdir_server, url_c # set $TORCH_HOME, which determines torch hub's cache path, to tmpdir monkeypatch.setenv('TORCH_HOME', tmpdir) - model = EvalModelTemplate() + model = BoringModel() # Extra layer - model.c_d3 = torch.nn.Linear(model.hidden_dim, model.hidden_dim) + model.c_d3 = torch.nn.Linear(32, 32) # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -479,6 +539,8 @@ def test_strict_model_load_more_params(monkeypatch, tmpdir, tmpdir_server, url_c trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, + limit_train_batches=2, + limit_val_batches=2, logger=logger, callbacks=[ModelCheckpoint(dirpath=tmpdir)], ) @@ -496,14 +558,14 @@ def test_strict_model_load_more_params(monkeypatch, tmpdir, tmpdir_server, url_c hparams_url = f'http://{tmpdir_server[0]}:{tmpdir_server[1]}/{os.path.basename(new_weights_path)}' ckpt_path = hparams_url if url_ckpt else new_weights_path - EvalModelTemplate.load_from_checkpoint( + BoringModel.load_from_checkpoint( checkpoint_path=ckpt_path, hparams_file=hparams_path, strict=False, ) with pytest.raises(RuntimeError, match=r'Unexpected key\(s\) in state_dict: "c_d3.weight", "c_d3.bias"'): - EvalModelTemplate.load_from_checkpoint( + BoringModel.load_from_checkpoint( checkpoint_path=ckpt_path, hparams_file=hparams_path, strict=True, @@ -516,7 +578,7 @@ def test_strict_model_load_less_params(monkeypatch, tmpdir, tmpdir_server, url_c # set $TORCH_HOME, which determines torch hub's cache path, to tmpdir monkeypatch.setenv('TORCH_HOME', tmpdir) - model = EvalModelTemplate() + model = BoringModel() # logger file to get meta logger = tutils.get_default_logger(tmpdir) @@ -525,6 +587,8 @@ def test_strict_model_load_less_params(monkeypatch, tmpdir, tmpdir_server, url_c trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, + limit_train_batches=2, + limit_val_batches=2, logger=logger, callbacks=[ModelCheckpoint(dirpath=tmpdir)], ) @@ -542,7 +606,7 @@ def test_strict_model_load_less_params(monkeypatch, tmpdir, tmpdir_server, url_c hparams_url = f'http://{tmpdir_server[0]}:{tmpdir_server[1]}/{os.path.basename(new_weights_path)}' ckpt_path = hparams_url if url_ckpt else new_weights_path - class CurrentModel(EvalModelTemplate): + class CurrentModel(BoringModel): def __init__(self): super().__init__() @@ -563,6 +627,6 @@ def test_strict_model_load_less_params(monkeypatch, tmpdir, tmpdir_server, url_c def test_model_pickle(tmpdir): - model = EvalModelTemplate() + model = BoringModel() pickle.dumps(model) cloudpickle.dumps(model) diff --git a/tests/models/test_tpu.py b/tests/models/test_tpu.py index 98a02d730e..e5895d98b6 100644 --- a/tests/models/test_tpu.py +++ b/tests/models/test_tpu.py @@ -19,14 +19,14 @@ import pytest from torch.utils.data import DataLoader import tests.helpers.pipelines as tpipes +import tests.helpers.utils as tutils from pytorch_lightning import Trainer from pytorch_lightning.accelerators import TPUAccelerator from pytorch_lightning.callbacks import EarlyStopping from pytorch_lightning.trainer.states import TrainerState from pytorch_lightning.utilities import _TPU_AVAILABLE from pytorch_lightning.utilities.exceptions import MisconfigurationException -from tests.base import EvalModelTemplate -from tests.helpers.datasets import TrialMNIST +from tests.helpers import BoringModel, RandomDataset from tests.helpers.utils import pl_multi_process_test if _TPU_AVAILABLE: @@ -34,7 +34,7 @@ if _TPU_AVAILABLE: import torch_xla.distributed.xla_multiprocessing as xmp SERIAL_EXEC = xmp.MpSerialExecutor() -_LARGER_DATASET = TrialMNIST(download=True, num_samples=2000, digits=(0, 1, 2, 5, 8)) +_LARGER_DATASET = RandomDataset(32, 2000) # 8 cores needs a big dataset @@ -42,20 +42,30 @@ def _serial_train_loader(): return DataLoader(_LARGER_DATASET, batch_size=32) +class SerialLoaderBoringModel(BoringModel): + + def train_dataloader(self): + return DataLoader(RandomDataset(32, 2000), batch_size=32) + + def val_dataloader(self): + return DataLoader(RandomDataset(32, 2000), batch_size=32) + + @pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") @pl_multi_process_test def test_model_tpu_cores_1(tmpdir): """Make sure model trains on TPU.""" + tutils.reset_seed() trainer_options = dict( default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, tpu_cores=1, - limit_train_batches=0.4, - limit_val_batches=0.4, + limit_train_batches=4, + limit_val_batches=4, ) - model = EvalModelTemplate() + model = BoringModel() tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False) @@ -64,16 +74,17 @@ def test_model_tpu_cores_1(tmpdir): @pl_multi_process_test def test_model_tpu_index(tmpdir, tpu_core): """Make sure model trains on TPU.""" + tutils.reset_seed() trainer_options = dict( default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, tpu_cores=[tpu_core], - limit_train_batches=0.4, - limit_val_batches=0.4, + limit_train_batches=4, + limit_val_batches=4, ) - model = EvalModelTemplate() + model = BoringModel() tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False) assert torch_xla._XLAC._xla_get_default_device() == f'xla:{tpu_core}' @@ -82,6 +93,7 @@ def test_model_tpu_index(tmpdir, tpu_core): @pl_multi_process_test def test_model_tpu_cores_8(tmpdir): """Make sure model trains on TPU.""" + tutils.reset_seed() trainer_options = dict( default_root_dir=tmpdir, progress_bar_refresh_rate=0, @@ -91,29 +103,27 @@ def test_model_tpu_cores_8(tmpdir): limit_val_batches=0.4, ) - model = EvalModelTemplate() # 8 cores needs a big dataset - model.train_dataloader = _serial_train_loader - model.val_dataloader = _serial_train_loader - - tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False) + model = SerialLoaderBoringModel() + tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False, min_acc=0.05) @pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") @pl_multi_process_test def test_model_16bit_tpu_cores_1(tmpdir): """Make sure model trains on TPU.""" + tutils.reset_seed() trainer_options = dict( default_root_dir=tmpdir, precision=16, progress_bar_refresh_rate=0, max_epochs=1, tpu_cores=1, - limit_train_batches=0.4, - limit_val_batches=0.4, + limit_train_batches=4, + limit_val_batches=4, ) - model = EvalModelTemplate() + model = BoringModel() tpipes.run_model_test(trainer_options, model, on_gpu=False) assert os.environ.get('XLA_USE_BF16') == str(1), "XLA_USE_BF16 was not set in environment variables" @@ -123,17 +133,18 @@ def test_model_16bit_tpu_cores_1(tmpdir): @pl_multi_process_test def test_model_16bit_tpu_index(tmpdir, tpu_core): """Make sure model trains on TPU.""" + tutils.reset_seed() trainer_options = dict( default_root_dir=tmpdir, precision=16, progress_bar_refresh_rate=0, max_epochs=1, tpu_cores=[tpu_core], - limit_train_batches=0.4, - limit_val_batches=0.2, + limit_train_batches=4, + limit_val_batches=2, ) - model = EvalModelTemplate() + model = BoringModel() tpipes.run_model_test(trainer_options, model, on_gpu=False) assert torch_xla._XLAC._xla_get_default_device() == f'xla:{tpu_core}' assert os.environ.get('XLA_USE_BF16') == str(1), "XLA_USE_BF16 was not set in environment variables" @@ -143,6 +154,7 @@ def test_model_16bit_tpu_index(tmpdir, tpu_core): @pl_multi_process_test def test_model_16bit_tpu_cores_8(tmpdir): """Make sure model trains on TPU.""" + tutils.reset_seed() trainer_options = dict( default_root_dir=tmpdir, precision=16, @@ -153,26 +165,32 @@ def test_model_16bit_tpu_cores_8(tmpdir): limit_val_batches=0.4, ) - model = EvalModelTemplate() # 8 cores needs a big dataset - model.train_dataloader = _serial_train_loader - model.val_dataloader = _serial_train_loader - - tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False) + model = SerialLoaderBoringModel() + tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False, min_acc=0.05) @pytest.mark.skipif(not _TPU_AVAILABLE, reason="test requires TPU machine") @pl_multi_process_test def test_model_tpu_early_stop(tmpdir): """Test if single TPU core training works""" - model = EvalModelTemplate() + + class CustomBoringModel(BoringModel): + + def validation_step(self, *args, **kwargs): + out = super().validation_step(*args, **kwargs) + self.log('val_loss', out['x']) + return out + + tutils.reset_seed() + model = CustomBoringModel() trainer = Trainer( - callbacks=[EarlyStopping()], + callbacks=[EarlyStopping(monitor='val_loss')], default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=50, - limit_train_batches=10, - limit_val_batches=10, + limit_train_batches=4, + limit_val_batches=4, tpu_cores=1, ) trainer.fit(model) @@ -182,6 +200,7 @@ def test_model_tpu_early_stop(tmpdir): @pl_multi_process_test def test_tpu_grad_norm(tmpdir): """Test if grad_norm works on TPU.""" + tutils.reset_seed() trainer_options = dict( default_root_dir=tmpdir, progress_bar_refresh_rate=0, @@ -192,7 +211,7 @@ def test_tpu_grad_norm(tmpdir): gradient_clip_val=0.1, ) - model = EvalModelTemplate() + model = BoringModel() tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False) @@ -201,7 +220,8 @@ def test_tpu_grad_norm(tmpdir): def test_dataloaders_passed_to_fit(tmpdir): """Test if dataloaders passed to trainer works on TPU""" - model = EvalModelTemplate() + tutils.reset_seed() + model = BoringModel() trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, tpu_cores=8) trainer.fit(model, train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader())