fixing miss-leading tested acc values (#5876)

* fixing tested values * . * tests * yapf * softmax * hvd * rename * lr * duplicate * drop * classif * rm EvalModel * Revert "rm EvalModel" This reverts commit 6c3fb39ebe. * update tests * fix * azure * azure * self * cpu * Apply suggestions from code review Co-authored-by: rohitgr7 <rohitgr1998@gmail.com>
2021-02-23 23:08:46 +01:00 · 2021-02-23 23:08:46 +01:00 · 1c851b89e1
parent ebabe56f4e
commit 1c851b89e1
15 changed files with 207 additions and 167 deletions
--- a/tests/accelerators/ddp_model.py
+++ b/tests/accelerators/ddp_model.py
@ -20,7 +20,8 @@ from argparse import ArgumentParser
 import torch

 from pytorch_lightning import seed_everything, Trainer
-from tests.base import EvalModelTemplate
+from tests.helpers.datamodules import ClassifDataModule
+from tests.helpers.simple_models import ClassificationModel


 def main():
@ -35,24 +36,28 @@ def main():
    parser.set_defaults(accelerator="ddp")
    args = parser.parse_args()

-    model = EvalModelTemplate()
+    dm = ClassifDataModule()
+    model = ClassificationModel()
    trainer = Trainer.from_argparse_args(args)

-    result = {}
    if args.trainer_method == 'fit':
-        trainer.fit(model)
-        result = {'status': 'complete', 'method': args.trainer_method, 'result': None}
-    if args.trainer_method == 'test':
-        result = trainer.test(model)
-        result = {'status': 'complete', 'method': args.trainer_method, 'result': result}
-    if args.trainer_method == 'fit_test':
-        trainer.fit(model)
-        result = trainer.test(model)
-        result = {'status': 'complete', 'method': args.trainer_method, 'result': result}
+        trainer.fit(model, datamodule=dm)
+        result = None
+    elif args.trainer_method == 'test':
+        result = trainer.test(model, datamodule=dm)
+    elif args.trainer_method == 'fit_test':
+        trainer.fit(model, datamodule=dm)
+        result = trainer.test(model, datamodule=dm)
+    else:
+        raise ValueError(f'Unsupported: {args.trainer_method}')

-    if len(result) > 0:
-        file_path = os.path.join(args.tmpdir, 'ddp.result')
-        torch.save(result, file_path)
+    result_ext = {
+        'status': 'complete',
+        'method': args.trainer_method,
+        'result': result,
+    }
+    file_path = os.path.join(args.tmpdir, 'ddp.result')
+    torch.save(result_ext, file_path)


 if __name__ == '__main__':
--- a/tests/accelerators/test_ddp.py
+++ b/tests/accelerators/test_ddp.py
@ -23,14 +23,13 @@ from tests.accelerators import ddp_model, DDPLauncher
 from tests.helpers.boring_model import BoringModel
 from tests.utilities.distributed import call_training_script

+CLI_ARGS = '--max_epochs 1 --gpus 2 --accelerator ddp'
+

-@pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
-])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
-def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
+def test_multi_gpu_model_ddp_fit_only(tmpdir):
    # call the script
-    std, err = call_training_script(ddp_model, cli_args, 'fit', tmpdir, timeout=120)
+    call_training_script(ddp_model, CLI_ARGS, 'fit', tmpdir, timeout=120)

    # load the results of the script
    result_path = os.path.join(tmpdir, 'ddp.result')
@ -40,13 +39,10 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
    assert result['status'] == 'complete'


-@pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
-])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
-def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
+def test_multi_gpu_model_ddp_test_only(tmpdir):
    # call the script
-    call_training_script(ddp_model, cli_args, 'test', tmpdir)
+    call_training_script(ddp_model, CLI_ARGS, 'test', tmpdir)

    # load the results of the script
    result_path = os.path.join(tmpdir, 'ddp.result')
@ -56,13 +52,10 @@ def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
    assert result['status'] == 'complete'


-@pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
-])
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
-def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args):
+def test_multi_gpu_model_ddp_fit_test(tmpdir):
    # call the script
-    call_training_script(ddp_model, cli_args, 'fit_test', tmpdir, timeout=20)
+    call_training_script(ddp_model, CLI_ARGS, 'fit_test', tmpdir, timeout=20)

    # load the results of the script
    result_path = os.path.join(tmpdir, 'ddp.result')
@ -73,7 +66,7 @@ def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args):

    model_outs = result['result']
    for out in model_outs:
-        assert out['test_acc'] > 0.90
+        assert out['test_acc'] > 0.7


@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
--- a/tests/accelerators/test_ddp_spawn.py
+++ b/tests/accelerators/test_ddp_spawn.py
@ -20,7 +20,9 @@ from pytorch_lightning.callbacks import EarlyStopping
 from pytorch_lightning.core import memory
 from pytorch_lightning.trainer import Trainer
 from pytorch_lightning.trainer.states import TrainerState
-from tests.base import EvalModelTemplate
+from tests.helpers import BoringModel
+from tests.helpers.datamodules import ClassifDataModule
+from tests.helpers.simple_models import ClassificationModel


@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@ -29,7 +31,7 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):

    trainer_options = dict(
        default_root_dir=tmpdir,
-        callbacks=[EarlyStopping()],
+        callbacks=[EarlyStopping(monitor='train_acc')],
        max_epochs=50,
        limit_train_batches=10,
        limit_val_batches=10,
@ -37,8 +39,9 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
        accelerator='ddp_spawn',
    )

-    model = EvalModelTemplate()
-    tpipes.run_model_test(trainer_options, model)
+    dm = ClassifDataModule()
+    model = ClassificationModel()
+    tpipes.run_model_test(trainer_options, model, dm)


@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@ -55,7 +58,7 @@ def test_multi_gpu_model_ddp_spawn(tmpdir):
        progress_bar_refresh_rate=0,
    )

-    model = EvalModelTemplate()
+    model = BoringModel()

    tpipes.run_model_test(trainer_options, model)

@ -68,7 +71,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
    """Make sure DDP works with dataloaders passed to fit()"""
    tutils.set_random_master_port()

-    model = EvalModelTemplate()
+    model = BoringModel()
    fit_options = dict(train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader())

    trainer = Trainer(
--- a/tests/accelerators/test_dp.py
+++ b/tests/accelerators/test_dp.py
@ -11,27 +11,61 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+from unittest import mock
+
 import pytest
 import torch
+import torch.nn.functional as F

 import pytorch_lightning as pl
 import tests.helpers.pipelines as tpipes
 import tests.helpers.utils as tutils
 from pytorch_lightning.callbacks import EarlyStopping
 from pytorch_lightning.core import memory
-from tests.base import EvalModelTemplate
+from tests.helpers import BoringModel
+from tests.helpers.datamodules import ClassifDataModule
+from tests.helpers.simple_models import ClassificationModel

 PRETEND_N_OF_GPUS = 16


+class CustomClassificationModelDP(ClassificationModel):
+
+    def _step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        return {'logits': logits, 'y': y}
+
+    def training_step(self, batch, batch_idx):
+        out = self._step(batch, batch_idx)
+        loss = F.cross_entropy(out['logits'], out['y'])
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        return self._step(batch, batch_idx)
+
+    def test_step(self, batch, batch_idx):
+        return self._step(batch, batch_idx)
+
+    def validation_step_end(self, outputs):
+        self.log('val_acc', self.valid_acc(outputs['logits'], outputs['y']))
+
+    def test_step_end(self, outputs):
+        self.log('test_acc', self.test_acc(outputs['logits'], outputs['y']))
+
+
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_multi_gpu_early_stop_dp(tmpdir):
    """Make sure DDP works. with early stopping"""
    tutils.set_random_master_port()

+    dm = ClassifDataModule()
+    model = CustomClassificationModelDP()
+
    trainer_options = dict(
        default_root_dir=tmpdir,
-        callbacks=[EarlyStopping()],
+        callbacks=[EarlyStopping(monitor='val_acc')],
        max_epochs=50,
        limit_train_batches=10,
        limit_val_batches=10,
@ -39,8 +73,7 @@ def test_multi_gpu_early_stop_dp(tmpdir):
        accelerator='dp',
    )

-    model = EvalModelTemplate()
-    tpipes.run_model_test(trainer_options, model)
+    tpipes.run_model_test(trainer_options, model, dm)


@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@ -57,7 +90,7 @@ def test_multi_gpu_model_dp(tmpdir):
        progress_bar_refresh_rate=0,
    )

-    model = EvalModelTemplate()
+    model = BoringModel()

    tpipes.run_model_test(trainer_options, model)

@ -65,14 +98,13 @@ def test_multi_gpu_model_dp(tmpdir):
    memory.get_memory_profile('min_max')


+@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"})
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_dp_test(tmpdir):
    tutils.set_random_master_port()

-    import os
-    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
-
-    model = EvalModelTemplate()
+    dm = ClassifDataModule()
+    model = CustomClassificationModelDP()
    trainer = pl.Trainer(
        default_root_dir=tmpdir,
        max_epochs=2,
@ -81,17 +113,17 @@ def test_dp_test(tmpdir):
        gpus=[0, 1],
        accelerator='dp',
    )
-    trainer.fit(model)
+    trainer.fit(model, datamodule=dm)
    assert 'ckpt' in trainer.checkpoint_callback.best_model_path
-    results = trainer.test()
+    results = trainer.test(datamodule=dm)
    assert 'test_acc' in results[0]

-    old_weights = model.c_d1.weight.clone().detach().cpu()
+    old_weights = model.layer_0.weight.clone().detach().cpu()

-    results = trainer.test(model)
+    results = trainer.test(model, datamodule=dm)
    assert 'test_acc' in results[0]

    # make sure weights didn't change
-    new_weights = model.c_d1.weight.clone().detach().cpu()
+    new_weights = model.layer_0.weight.clone().detach().cpu()

    assert torch.all(torch.eq(old_weights, new_weights))
--- a/tests/base/model_template.py
+++ b/tests/base/model_template.py
@ -111,7 +111,7 @@ class EvalModelTemplate(
        x = self.c_d1_drop(x)

        x = self.c_d2(x)
-        logits = F.log_softmax(x, dim=1)
+        logits = F.softmax(x, dim=1)

        return logits

--- a/tests/core/test_datamodules.py
+++ b/tests/core/test_datamodules.py
@ -385,9 +385,8 @@ def test_full_loop_dp(tmpdir):
            return {'logits': logits, 'y': y}

        def training_step(self, batch, batch_idx):
-            _, y = batch
            out = self._step(batch, batch_idx)
-            loss = F.cross_entropy(out['logits'], y)
+            loss = F.cross_entropy(out['logits'], out['y'])
            return loss

        def validation_step(self, batch, batch_idx):
--- a/tests/helpers/pipelines.py
+++ b/tests/helpers/pipelines.py
@ -13,39 +13,41 @@
 # limitations under the License.
 import torch

-from pytorch_lightning import LightningDataModule, Trainer
+from pytorch_lightning import LightningDataModule, LightningModule, Trainer
+from pytorch_lightning.metrics.functional import accuracy
 from pytorch_lightning.trainer.states import TrainerState
 from pytorch_lightning.utilities import DistributedType
 from tests.helpers import BoringModel
 from tests.helpers.utils import get_default_logger, load_model_from_checkpoint, reset_seed


-def run_model_test_without_loggers(trainer_options, model, min_acc: float = 0.50):
+def run_model_test_without_loggers(
+    trainer_options: dict, model: LightningModule, data: LightningDataModule = None, min_acc: float = 0.50
+):
    reset_seed()

    # fit model
    trainer = Trainer(**trainer_options)
-    trainer.fit(model)
+    trainer.fit(model, datamodule=data)

    # correct result and ok accuracy
    assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"

-    pretrained_model = load_model_from_checkpoint(
-        trainer.logger, trainer.checkpoint_callback.best_model_path, type(model)
-    )
+    model2 = load_model_from_checkpoint(trainer.logger, trainer.checkpoint_callback.best_model_path, type(model))

    # test new model accuracy
-    test_loaders = model.test_dataloader()
+    test_loaders = model2.test_dataloader() if not data else data.test_dataloader()
    if not isinstance(test_loaders, list):
        test_loaders = [test_loaders]

-    for dataloader in test_loaders:
-        run_prediction(pretrained_model, dataloader, min_acc=min_acc)
+    if not isinstance(model2, BoringModel):
+        for dataloader in test_loaders:
+            run_prediction_eval_model_template(model2, dataloader, min_acc=min_acc)


 def run_model_test(
    trainer_options,
-    model,
+    model: LightningModule,
    data: LightningDataModule = None,
    on_gpu: bool = True,
    version=None,
@ -76,8 +78,9 @@ def run_model_test(
    if not isinstance(test_loaders, list):
        test_loaders = [test_loaders]

-    for dataloader in test_loaders:
-        run_prediction(pretrained_model, dataloader, min_acc=min_acc)
+    if not isinstance(model, BoringModel):
+        for dataloader in test_loaders:
+            run_prediction_eval_model_template(model, dataloader, min_acc=min_acc)

    if with_hpc:
        if trainer._distrib_type in (DistributedType.DDP, DistributedType.DDP_SPAWN, DistributedType.DDP2):
@ -92,50 +95,17 @@ def run_model_test(
        trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu)


-def run_prediction(trained_model, dataloader, dp=False, min_acc=0.25):
-    if isinstance(trained_model, BoringModel):
-        return _boring_model_run_prediction(trained_model, dataloader, min_acc)
-    else:
-        return _eval_model_template_run_prediction(trained_model, dataloader, dp, min_acc=min_acc)
-
-
-def _eval_model_template_run_prediction(trained_model, dataloader, dp=False, min_acc=0.50):
-    # run prediction on 1 batch
-    batch = next(iter(dataloader))
-    x, y = batch
-    x = x.view(x.size(0), -1)
-
-    if dp:
-        with torch.no_grad():
-            output = trained_model(batch, 0)
-            acc = output['val_acc']
-        acc = torch.mean(acc).item()
-
-    else:
-        with torch.no_grad():
-            y_hat = trained_model(x)
-        y_hat = y_hat.cpu()
-
-        # acc
-        labels_hat = torch.argmax(y_hat, dim=1)
-
-        y = y.cpu()
-        acc = torch.sum(y == labels_hat).item() / (len(y) * 1.0)
-        acc = torch.tensor(acc)
-        acc = acc.item()
-
-    assert acc >= min_acc, f"This model is expected to get > {min_acc} in test set (it got {acc})"
-
-
-# TODO: This test compares a loss value with a min accuracy - complete non-sense!
-# create BoringModels that make actual predictions!
-def _boring_model_run_prediction(trained_model, dataloader, min_acc=0.25):
+@torch.no_grad()
+def run_prediction_eval_model_template(trained_model, dataloader, min_acc=0.50):
    # run prediction on 1 batch
    trained_model.cpu()
+    trained_model.eval()
+
    batch = next(iter(dataloader))
+    x, y = batch
+    x = x.flatten(1)

-    with torch.no_grad():
-        output = trained_model(batch)
+    y_hat = trained_model(x)
+    acc = accuracy(y_hat.cpu(), y.cpu(), top_k=2).item()

-    acc = trained_model.loss(batch, output)
-    assert acc >= min_acc, f"This model is expected to get, {min_acc} in test set but got {acc}"
+    assert acc >= min_acc, f"This model is expected to get > {min_acc} in test set (it got {acc})"
--- a/tests/models/data/horovod/test_train_script.py
+++ b/tests/models/data/horovod/test_train_script.py
@ -0,0 +1,30 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from tests.models.data.horovod.train_default_model import run_test_from_config
+
+
+def test_horovod_model_script(tmpdir):
+    """This just for testing/debugging horovod script without horovod..."""
+    trainer_options = dict(
+        default_root_dir=str(tmpdir),
+        weights_save_path=str(tmpdir),
+        gradient_clip_val=1.0,
+        progress_bar_refresh_rate=0,
+        max_epochs=1,
+        limit_train_batches=0.4,
+        limit_val_batches=0.2,
+        deterministic=True,
+    )
+    run_test_from_config(trainer_options, check_size=False, on_gpu=False)
--- a/tests/models/data/horovod/train_default_model.py
+++ b/tests/models/data/horovod/train_default_model.py
@ -37,7 +37,6 @@ else:
    print('You requested to import Horovod which is missing or not supported for your OS.')

 from tests.helpers import BoringModel  # noqa: E402
-from tests.helpers.pipelines import run_prediction  # noqa: E402
 from tests.helpers.utils import reset_seed, set_random_master_port  # noqa: E402

 parser = argparse.ArgumentParser()
@ -45,7 +44,7 @@ parser.add_argument('--trainer-options', required=True)
 parser.add_argument('--on-gpu', action='store_true', default=False)


-def run_test_from_config(trainer_options):
+def run_test_from_config(trainer_options, on_gpu, check_size=True):
    """Trains the default model with the given config."""
    set_random_master_port()
    reset_seed()
@ -60,7 +59,8 @@ def run_test_from_config(trainer_options):
    assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"

    # Horovod should be initialized following training. If not, this will raise an exception.
-    assert hvd.size() == 2
+    if check_size:
+        assert hvd.size() == 2

    if trainer.global_rank > 0:
        return
@ -74,15 +74,16 @@ def run_test_from_config(trainer_options):
        test_loaders = [test_loaders]

    for dataloader in test_loaders:
-        run_prediction(pretrained_model, dataloader)
+        batch = next(iter(dataloader))
+        pretrained_model(batch)

    # test HPC saving
    trainer.checkpoint_connector.hpc_save(ckpt_path, trainer.logger)
    # test HPC loading
    checkpoint_path = trainer.checkpoint_connector.get_max_ckpt_path_from_folder(ckpt_path)
-    trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=args.on_gpu)
+    trainer.checkpoint_connector.hpc_load(checkpoint_path, on_gpu=on_gpu)

-    if args.on_gpu:
+    if on_gpu:
        trainer = Trainer(gpus=1, accelerator='horovod', max_epochs=1)
        # Test the root_gpu property
        assert trainer.root_gpu == hvd.local_rank()
@ -90,4 +91,4 @@ def run_test_from_config(trainer_options):

 if __name__ == "__main__":
    args = parser.parse_args()
-    run_test_from_config(json.loads(args.trainer_options))
+    run_test_from_config(json.loads(args.trainer_options), args.on_gpu)
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@ -24,6 +24,8 @@ from pytorch_lightning import Trainer
 from pytorch_lightning.utilities import device_parser
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers import BoringModel
+from tests.helpers.datamodules import ClassifDataModule
+from tests.helpers.simple_models import ClassificationModel

 PRETEND_N_OF_GPUS = 16

@ -41,8 +43,9 @@ def test_multi_gpu_none_backend(tmpdir):
        gpus=2,
    )

-    model = BoringModel()
-    tpipes.run_model_test(trainer_options, model, min_acc=0.20)
+    dm = ClassifDataModule()
+    model = ClassificationModel()
+    tpipes.run_model_test(trainer_options, model, dm)


@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
--- a/tests/models/test_restore.py
+++ b/tests/models/test_restore.py
@ -85,6 +85,28 @@ class GenericValTestLossBoringModel(GenericParentValTestLossBoringModel[int]):
    pass


+class CustomClassificationModelDP(ClassificationModel):
+
+    def _step(self, batch, batch_idx):
+        x, y = batch
+        logits = self(x)
+        return {'logits': logits, 'y': y}
+
+    def training_step(self, batch, batch_idx):
+        out = self._step(batch, batch_idx)
+        loss = F.cross_entropy(out['logits'], out['y'])
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        return self._step(batch, batch_idx)
+
+    def test_step(self, batch, batch_idx):
+        return self._step(batch, batch_idx)
+
+    def validation_step_end(self, outputs):
+        self.log('val_acc', self.valid_acc(outputs['logits'], outputs['y']))
+
+
 def test_model_properties_resume_from_checkpoint(tmpdir):
    """
    Test that properties like `current_epoch` and `global_step`
@ -198,28 +220,6 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):

    tutils.set_random_master_port()

-    class CustomClassificationModelDP(ClassificationModel):
-
-        def _step(self, batch, batch_idx):
-            x, y = batch
-            logits = self(x)
-            return {'logits': logits, 'y': y}
-
-        def training_step(self, batch, batch_idx):
-            _, y = batch
-            out = self._step(batch, batch_idx)
-            loss = F.cross_entropy(out['logits'], y)
-            return loss
-
-        def validation_step(self, batch, batch_idx):
-            return self._step(batch, batch_idx)
-
-        def test_step(self, batch, batch_idx):
-            return self._step(batch, batch_idx)
-
-        def validation_step_end(self, outputs):
-            self.log('val_acc', self.valid_acc(outputs['logits'], outputs['y']))
-
    dm = ClassifDataModule()
    model = CustomClassificationModelDP(lr=0.1)

@ -259,7 +259,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):
        dataloaders = [dataloaders]

    for dataloader in dataloaders:
-        tpipes.run_prediction(pretrained_model, dataloader)
+        tpipes.run_prediction_eval_model_template(pretrained_model, dataloader)


@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
@ -307,7 +307,7 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
        dataloaders = [dataloaders]

    for dataloader in dataloaders:
-        tpipes.run_prediction(pretrained_model, dataloader, min_acc=0.1)
+        tpipes.run_prediction_eval_model_template(pretrained_model, dataloader, min_acc=0.1)


 def test_running_test_pretrained_model_cpu(tmpdir):
@ -398,7 +398,8 @@ def test_load_model_from_checkpoint(tmpdir, model_template):
@pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_dp_resume(tmpdir):
    """Make sure DP continues training correctly."""
-    model = BoringModel()
+    model = CustomClassificationModelDP(lr=0.1)
+    dm = ClassifDataModule()

    trainer_options = dict(max_epochs=1, gpus=2, accelerator='dp', default_root_dir=tmpdir)

@ -416,7 +417,7 @@ def test_dp_resume(tmpdir):
    # fit model
    trainer = Trainer(**trainer_options)
    trainer.is_slurm_managing_tasks = True
-    trainer.fit(model)
+    trainer.fit(model, datamodule=dm)

    # track epoch before saving. Increment since we finished the current epoch, don't want to rerun
    real_global_epoch = trainer.current_epoch + 1
@ -439,7 +440,7 @@ def test_dp_resume(tmpdir):
    trainer_options['max_epochs'] = 1
    new_trainer = Trainer(**trainer_options)

-    class CustomModel(BoringModel):
+    class CustomModel(CustomClassificationModelDP):

        def __init__(self):
            super().__init__()
@ -451,19 +452,17 @@ def test_dp_resume(tmpdir):

            # if model and state loaded correctly, predictions will be good even though we
            # haven't trained with the new loaded model
-            dp_model = new_trainer.model
-            dp_model.eval()
            new_trainer._running_stage = RunningStage.EVALUATING

            dataloader = self.train_dataloader()
-            tpipes.run_prediction(self.trainer.lightning_module, dataloader)
+            tpipes.run_prediction_eval_model_template(self.trainer.lightning_module, dataloader=dataloader)
            self.on_train_start_called = True

    # new model
    model = CustomModel()

    # fit new model which should load hpc weights
-    new_trainer.fit(model)
+    new_trainer.fit(model, datamodule=dm)
    assert model.on_train_start_called

    # test freeze on gpu
--- a/tests/models/test_tpu.py
+++ b/tests/models/test_tpu.py
@ -223,12 +223,19 @@ def test_tpu_grad_norm(tmpdir):
@pl_multi_process_test
 def test_dataloaders_passed_to_fit(tmpdir):
    """Test if dataloaders passed to trainer works on TPU"""
-
    tutils.reset_seed()
    model = BoringModel()

-    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, tpu_cores=8)
-    trainer.fit(model, train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader())
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        max_epochs=1,
+        tpu_cores=8,
+    )
+    trainer.fit(
+        model,
+        train_dataloader=model.train_dataloader(),
+        val_dataloaders=model.val_dataloader(),
+    )
    assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"


--- a/tests/trainer/test_dataloaders.py
+++ b/tests/trainer/test_dataloaders.py
@ -130,7 +130,7 @@ def test_multiple_val_dataloader(tmpdir):

    # make sure predictions are good for each val set
    for dataloader in trainer.val_dataloaders:
-        tpipes.run_prediction(trained_model=model, dataloader=dataloader)
+        tpipes.run_prediction_eval_model_template(trained_model=model, dataloader=dataloader)


@pytest.mark.parametrize('ckpt_path', [None, 'best', 'specific'])
@ -153,8 +153,8 @@ def test_multiple_test_dataloader(tmpdir, ckpt_path):
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
-        limit_val_batches=0.1,
-        limit_train_batches=0.2,
+        limit_val_batches=10,
+        limit_train_batches=100,
    )
    trainer.fit(model)
    if ckpt_path == 'specific':
@ -162,12 +162,11 @@ def test_multiple_test_dataloader(tmpdir, ckpt_path):
    trainer.test(ckpt_path=ckpt_path)

    # verify there are 2 test loaders
-    assert len(trainer.test_dataloaders) == 2, \
-        'Multiple test_dataloaders not initiated properly'
+    assert len(trainer.test_dataloaders) == 2, 'Multiple test_dataloaders not initiated properly'

    # make sure predictions are good for each test set
    for dataloader in trainer.test_dataloaders:
-        tpipes.run_prediction(trainer.model, dataloader)
+        tpipes.run_prediction_eval_model_template(trainer.model, dataloader)

    # run the test method
    trainer.test(ckpt_path=ckpt_path)
--- a/tests/trainer/test_lr_finder.py
+++ b/tests/trainer/test_lr_finder.py
@ -229,8 +229,8 @@ def test_accumulation_and_early_stopping(tmpdir):
 def test_suggestion_parameters_work(tmpdir):
    """ Test that default skipping does not alter results in basic case """

-    hparams = EvalModelTemplate.get_default_hparams()
-    model = EvalModelTemplate(**hparams)
+    dm = ClassifDataModule()
+    model = ClassificationModel()

    # logger file to get meta
    trainer = Trainer(
@ -238,12 +238,11 @@ def test_suggestion_parameters_work(tmpdir):
        max_epochs=3,
    )

-    lrfinder = trainer.tuner.lr_find(model)
+    lrfinder = trainer.tuner.lr_find(model, datamodule=dm)
    lr1 = lrfinder.suggestion(skip_begin=10)  # default
-    lr2 = lrfinder.suggestion(skip_begin=80)  # way too high, should have an impact
+    lr2 = lrfinder.suggestion(skip_begin=150)  # way too high, should have an impact

-    assert lr1 != lr2, \
-        'Skipping parameter did not influence learning rate'
+    assert lr1 != lr2, 'Skipping parameter did not influence learning rate'


 def test_suggestion_with_non_finite_values(tmpdir):
--- a/tests/utilities/test_parsing.py
+++ b/tests/utilities/test_parsing.py
@ -113,8 +113,8 @@ def test_lightning_getattr(tmpdir):

    for m in models:
        with pytest.raises(
-                AttributeError,
-                match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule."
+            AttributeError,
+            match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule."
        ):
            lightning_getattr(m, "this_attr_not_exist")

@ -140,7 +140,7 @@ def test_lightning_setattr(tmpdir):

    for m in models:
        with pytest.raises(
-                AttributeError,
-                match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule."
+            AttributeError,
+            match="is neither stored in the model namespace nor the `hparams` namespace/dict, nor the datamodule."
        ):
            lightning_setattr(m, "this_attr_not_exist", None)