From 61ae35c37810ace87c47bfdea762d2c75b04082a Mon Sep 17 00:00:00 2001
From: Rohit Gupta <rohitgr1998@gmail.com>
Date: Tue, 1 Nov 2022 17:10:32 +0530
Subject: [PATCH] Use sklearn in runif (#15426)

* Use sklearn in runif
* test by removing sklearn dep
* remove repeated code
* seed
---
 tests/legacy/simple_classif_training.py       | 131 +-----------------
 tests/tests_pytorch/accelerators/test_hpu.py  |   4 +-
 tests/tests_pytorch/accelerators/test_ipu.py  |   3 +-
 .../callbacks/test_early_stopping.py          |   5 +
 .../callbacks/test_lr_monitor.py              |   2 +
 .../callbacks/test_quantization.py            |  10 +-
 .../checkpointing/test_legacy_checkpoints.py  |  18 +--
 tests/tests_pytorch/core/test_datamodules.py  |   3 +
 tests/tests_pytorch/helpers/datamodules.py    |  24 +++-
 tests/tests_pytorch/helpers/runif.py          |   7 +
 tests/tests_pytorch/helpers/simple_models.py  |   6 +-
 tests/tests_pytorch/helpers/test_models.py    |   5 +-
 tests/tests_pytorch/loggers/test_csv.py       |   2 +
 tests/tests_pytorch/models/test_cpu.py        |   2 +-
 tests/tests_pytorch/models/test_gpu.py        |   2 +-
 tests/tests_pytorch/models/test_horovod.py    |   5 +-
 tests/tests_pytorch/models/test_restore.py    |  12 +-
 .../strategies/test_colossalai.py             |   4 +-
 tests/tests_pytorch/strategies/test_common.py |   1 +
 tests/tests_pytorch/strategies/test_ddp.py    |   6 +-
 .../strategies/test_ddp_spawn.py              |   2 +-
 .../strategies/test_deepspeed_strategy.py     |   8 +-
 tests/tests_pytorch/strategies/test_dp.py     |   2 +-
 .../trainer/flags/test_overfit_batches.py     |  20 ++-
 tests/tests_pytorch/trainer/test_trainer.py   |   1 +
 tests/tests_pytorch/tuner/test_lr_finder.py   |   6 +-
 26 files changed, 114 insertions(+), 177 deletions(-)

diff --git a/tests/legacy/simple_classif_training.py b/tests/legacy/simple_classif_training.py
index 440af1d92a..ab7b1fab9f 100644
--- a/tests/legacy/simple_classif_training.py
+++ b/tests/legacy/simple_classif_training.py
@@ -14,141 +14,16 @@
 import os
 
 import torch
-import torch.nn.functional as F
-from sklearn.datasets import make_classification
-from sklearn.model_selection import train_test_split
-from torch import nn
-from torch.utils.data import DataLoader, Dataset
-from torchmetrics import Accuracy
 
 import pytorch_lightning as pl
-from pytorch_lightning import LightningDataModule, LightningModule, seed_everything
+from pytorch_lightning import seed_everything
 from pytorch_lightning.callbacks import EarlyStopping
+from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.simple_models import ClassificationModel
 
 PATH_LEGACY = os.path.dirname(__file__)
 
 
-class SklearnDataset(Dataset):
-    def __init__(self, x, y, x_type, y_type):
-        self.x = x
-        self.y = y
-        self._x_type = x_type
-        self._y_type = y_type
-
-    def __getitem__(self, idx):
-        return torch.tensor(self.x[idx], dtype=self._x_type), torch.tensor(self.y[idx], dtype=self._y_type)
-
-    def __len__(self):
-        return len(self.y)
-
-
-class SklearnDataModule(LightningDataModule):
-    def __init__(self, sklearn_dataset, x_type, y_type, batch_size: int = 128):
-        super().__init__()
-        self.batch_size = batch_size
-        self._x, self._y = sklearn_dataset
-        self._split_data()
-        self._x_type = x_type
-        self._y_type = y_type
-
-    def _split_data(self):
-        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(
-            self._x, self._y, test_size=0.20, random_state=42
-        )
-        self.x_train, self.x_predict, self.y_train, self.y_predict = train_test_split(
-            self._x, self._y, test_size=0.20, random_state=42
-        )
-        self.x_train, self.x_valid, self.y_train, self.y_valid = train_test_split(
-            self.x_train, self.y_train, test_size=0.40, random_state=42
-        )
-
-    def train_dataloader(self):
-        return DataLoader(
-            SklearnDataset(self.x_train, self.y_train, self._x_type, self._y_type),
-            shuffle=True,
-            batch_size=self.batch_size,
-        )
-
-    def val_dataloader(self):
-        return DataLoader(
-            SklearnDataset(self.x_valid, self.y_valid, self._x_type, self._y_type), batch_size=self.batch_size
-        )
-
-    def test_dataloader(self):
-        return DataLoader(
-            SklearnDataset(self.x_test, self.y_test, self._x_type, self._y_type), batch_size=self.batch_size
-        )
-
-    def predict_dataloader(self):
-        return DataLoader(
-            SklearnDataset(self.x_predict, self.y_predict, self._x_type, self._y_type), batch_size=self.batch_size
-        )
-
-
-class ClassifDataModule(SklearnDataModule):
-    def __init__(self, num_features=24, length=6000, num_classes=3, batch_size=128):
-        data = make_classification(
-            n_samples=length,
-            n_features=num_features,
-            n_classes=num_classes,
-            n_clusters_per_class=2,
-            n_informative=int(num_features / num_classes),
-            random_state=42,
-        )
-        super().__init__(data, x_type=torch.float32, y_type=torch.long, batch_size=batch_size)
-
-
-class ClassificationModel(LightningModule):
-    def __init__(self, num_features=24, num_classes=3, lr=0.01):
-        super().__init__()
-        self.save_hyperparameters()
-
-        self.lr = lr
-        for i in range(3):
-            setattr(self, f"layer_{i}", nn.Linear(num_features, num_features))
-            setattr(self, f"layer_{i}a", torch.nn.ReLU())
-        setattr(self, "layer_end", nn.Linear(num_features, num_classes))
-
-        self.train_acc = Accuracy()
-        self.valid_acc = Accuracy()
-        self.test_acc = Accuracy()
-
-    def forward(self, x):
-        x = self.layer_0(x)
-        x = self.layer_0a(x)
-        x = self.layer_1(x)
-        x = self.layer_1a(x)
-        x = self.layer_2(x)
-        x = self.layer_2a(x)
-        x = self.layer_end(x)
-        logits = F.softmax(x, dim=1)
-        return logits
-
-    def configure_optimizers(self):
-        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
-        return [optimizer], []
-
-    def training_step(self, batch, batch_idx):
-        x, y = batch
-        logits = self.forward(x)
-        loss = F.cross_entropy(logits, y)
-        self.log("train_loss", loss, prog_bar=True)
-        self.log("train_acc", self.train_acc(logits, y), prog_bar=True)
-        return {"loss": loss}
-
-    def validation_step(self, batch, batch_idx):
-        x, y = batch
-        logits = self.forward(x)
-        self.log("val_loss", F.cross_entropy(logits, y), prog_bar=False)
-        self.log("val_acc", self.valid_acc(logits, y), prog_bar=True)
-
-    def test_step(self, batch, batch_idx):
-        x, y = batch
-        logits = self.forward(x)
-        self.log("test_loss", F.cross_entropy(logits, y), prog_bar=False)
-        self.log("test_acc", self.test_acc(logits, y), prog_bar=True)
-
-
 def main_train(dir_path, max_epochs: int = 20):
     seed_everything(42)
     stopping = EarlyStopping(monitor="val_acc", mode="max", min_delta=0.005)
diff --git a/tests/tests_pytorch/accelerators/test_hpu.py b/tests/tests_pytorch/accelerators/test_hpu.py
index 405cd2161f..d6ca2b97f3 100644
--- a/tests/tests_pytorch/accelerators/test_hpu.py
+++ b/tests/tests_pytorch/accelerators/test_hpu.py
@@ -76,8 +76,8 @@ def test_all_stages(tmpdir, hpus):
     trainer.predict(model)
 
 
-@RunIf(hpu=True)
-@mock.patch.dict(os.environ, os.environ.copy())
+@RunIf(hpu=True, sklearn=True)
+@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 def test_optimization(tmpdir):
     seed_everything(42)
 
diff --git a/tests/tests_pytorch/accelerators/test_ipu.py b/tests/tests_pytorch/accelerators/test_ipu.py
index 35bb38794f..de60aa3a03 100644
--- a/tests/tests_pytorch/accelerators/test_ipu.py
+++ b/tests/tests_pytorch/accelerators/test_ipu.py
@@ -149,7 +149,8 @@ def test_inference_only(tmpdir, devices):
     trainer.predict(model)
 
 
-@RunIf(ipu=True)
+@RunIf(ipu=True, sklearn=True)
+@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 def test_optimization(tmpdir):
     seed_everything(42)
 
diff --git a/tests/tests_pytorch/callbacks/test_early_stopping.py b/tests/tests_pytorch/callbacks/test_early_stopping.py
index a3a98027cc..4940ed8c4a 100644
--- a/tests/tests_pytorch/callbacks/test_early_stopping.py
+++ b/tests/tests_pytorch/callbacks/test_early_stopping.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import logging
 import math
+import os
 import pickle
 from typing import List, Optional
 from unittest import mock
@@ -56,6 +57,8 @@ class EarlyStoppingTestRestore(EarlyStopping):
         self.saved_states.append(self.state_dict().copy())
 
 
+@RunIf(sklearn=True)
+@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 def test_resume_early_stopping_from_checkpoint(tmpdir):
     """Prevent regressions to bugs:
 
@@ -98,6 +101,7 @@ def test_resume_early_stopping_from_checkpoint(tmpdir):
         new_trainer.fit(model, datamodule=dm, ckpt_path=checkpoint_filepath)
 
 
+@RunIf(sklearn=True)
 def test_early_stopping_no_extraneous_invocations(tmpdir):
     """Test to ensure that callback methods aren't being invoked outside of the callback handler."""
     model = ClassificationModel()
@@ -195,6 +199,7 @@ def test_pickling(tmpdir):
     assert vars(early_stopping) == vars(early_stopping_loaded)
 
 
+@RunIf(sklearn=True)
 def test_early_stopping_no_val_step(tmpdir):
     """Test that early stopping callback falls back to training metrics when no validation defined."""
 
diff --git a/tests/tests_pytorch/callbacks/test_lr_monitor.py b/tests/tests_pytorch/callbacks/test_lr_monitor.py
index bac61f1123..90e2c0fa26 100644
--- a/tests/tests_pytorch/callbacks/test_lr_monitor.py
+++ b/tests/tests_pytorch/callbacks/test_lr_monitor.py
@@ -22,6 +22,7 @@ from pytorch_lightning.callbacks.finetuning import BackboneFinetuning
 from pytorch_lightning.demos.boring_classes import BoringModel
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel
 
 
@@ -284,6 +285,7 @@ def test_lr_monitor_no_lr_scheduler_multi_lrs(tmpdir, logging_interval: str):
     assert all(len(lr) == expected_number_logged for lr in lr_monitor.lrs.values())
 
 
+@RunIf(sklearn=True)
 def test_lr_monitor_param_groups(tmpdir):
     """Test that learning rates are extracted and logged for single lr scheduler."""
 
diff --git a/tests/tests_pytorch/callbacks/test_quantization.py b/tests/tests_pytorch/callbacks/test_quantization.py
index 2906430c7d..ee5b3e750f 100644
--- a/tests/tests_pytorch/callbacks/test_quantization.py
+++ b/tests/tests_pytorch/callbacks/test_quantization.py
@@ -35,7 +35,7 @@ from tests_pytorch.helpers.simple_models import RegressionModel
 @pytest.mark.parametrize("observe", ["average", "histogram"])
 @pytest.mark.parametrize("fuse", [True, False])
 @pytest.mark.parametrize("convert", [True, False])
-@RunIf(quantization=True, max_torch="1.11")
+@RunIf(quantization=True, sklearn=True, max_torch="1.11")
 def test_quantization(tmpdir, observe: str, fuse: bool, convert: bool):
     """Parity test for quant model."""
     cuda_available = CUDAAccelerator.is_available()
@@ -100,7 +100,7 @@ def test_quantization(tmpdir, observe: str, fuse: bool, convert: bool):
     assert torch.allclose(org_score, quant2_score, atol=0.45)
 
 
-@RunIf(quantization=True)
+@RunIf(quantization=True, sklearn=True)
 def test_quantize_torchscript(tmpdir):
     """Test converting to torchscipt."""
     dm = RegressDataModule()
@@ -116,7 +116,7 @@ def test_quantize_torchscript(tmpdir):
     tsmodel(tsmodel.quant(batch[0]))
 
 
-@RunIf(quantization=True)
+@RunIf(quantization=True, sklearn=True)
 def test_quantization_exceptions(tmpdir):
     """Test wrong fuse layers."""
     with pytest.raises(MisconfigurationException, match="Unsupported qconfig"):
@@ -157,7 +157,7 @@ def custom_trigger_last(trainer):
     "trigger_fn,expected_count",
     [(None, 9), (3, 3), (custom_trigger_never, 0), (custom_trigger_even, 5), (custom_trigger_last, 2)],
 )
-@RunIf(quantization=True)
+@RunIf(quantization=True, sklearn=True)
 def test_quantization_triggers(tmpdir, trigger_fn: Union[None, int, Callable], expected_count: int):
     """Test  how many times the quant is called."""
     dm = RegressDataModule()
@@ -216,7 +216,7 @@ def test_quantization_disable_observers(tmpdir, observer_enabled_stages):
         )
 
 
-@RunIf(quantization=True)
+@RunIf(quantization=True, sklearn=True)
 def test_quantization_val_test_predict(tmpdir):
     """Test the default quantization aware training not affected by validating, testing and predicting."""
     seed_everything(42)
diff --git a/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py b/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py
index 5043ef6569..4a99accb06 100644
--- a/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py
+++ b/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py
@@ -23,6 +23,9 @@ import torch
 import pytorch_lightning as pl
 from pytorch_lightning import Callback, Trainer
 from tests_pytorch import _PATH_LEGACY
+from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.runif import RunIf
+from tests_pytorch.helpers.simple_models import ClassificationModel
 
 LEGACY_CHECKPOINTS_PATH = os.path.join(_PATH_LEGACY, "checkpoints")
 CHECKPOINT_EXTENSION = ".ckpt"
@@ -32,18 +35,17 @@ with open(os.path.join(_PATH_LEGACY, "back-compatible-versions.txt")) as fp:
 
 
 @pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)
+@RunIf(sklearn=True)
 def test_load_legacy_checkpoints(tmpdir, pl_version: str):
     PATH_LEGACY = os.path.join(LEGACY_CHECKPOINTS_PATH, pl_version)
     with patch("sys.path", [PATH_LEGACY] + sys.path):
-        from simple_classif_training import ClassifDataModule, ClassificationModel
-
         path_ckpts = sorted(glob.glob(os.path.join(PATH_LEGACY, f"*{CHECKPOINT_EXTENSION}")))
         assert path_ckpts, f'No checkpoints found in folder "{PATH_LEGACY}"'
         path_ckpt = path_ckpts[-1]
 
-        model = ClassificationModel.load_from_checkpoint(path_ckpt)
+        model = ClassificationModel.load_from_checkpoint(path_ckpt, num_features=24)
         trainer = Trainer(default_root_dir=str(tmpdir))
-        dm = ClassifDataModule()
+        dm = ClassifDataModule(num_features=24, length=6000, batch_size=128, n_clusters_per_class=2, n_informative=8)
         res = trainer.test(model, datamodule=dm)
         assert res[0]["test_loss"] <= 0.7
         assert res[0]["test_acc"] >= 0.85
@@ -62,6 +64,7 @@ class LimitNbEpochs(Callback):
 
 
 @pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)
+@RunIf(sklearn=True)
 def test_legacy_ckpt_threading(tmpdir, pl_version: str):
     def load_model():
         import torch
@@ -84,17 +87,16 @@ def test_legacy_ckpt_threading(tmpdir, pl_version: str):
 
 
 @pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)
+@RunIf(sklearn=True)
 def test_resume_legacy_checkpoints(tmpdir, pl_version: str):
     PATH_LEGACY = os.path.join(LEGACY_CHECKPOINTS_PATH, pl_version)
     with patch("sys.path", [PATH_LEGACY] + sys.path):
-        from simple_classif_training import ClassifDataModule, ClassificationModel
-
         path_ckpts = sorted(glob.glob(os.path.join(PATH_LEGACY, f"*{CHECKPOINT_EXTENSION}")))
         assert path_ckpts, f'No checkpoints found in folder "{PATH_LEGACY}"'
         path_ckpt = path_ckpts[-1]
 
-        dm = ClassifDataModule()
-        model = ClassificationModel()
+        dm = ClassifDataModule(num_features=24, length=6000, batch_size=128, n_clusters_per_class=2, n_informative=8)
+        model = ClassificationModel(num_features=24)
         stop = LimitNbEpochs(1)
 
         trainer = Trainer(
diff --git a/tests/tests_pytorch/core/test_datamodules.py b/tests/tests_pytorch/core/test_datamodules.py
index 84370ade97..d951f2ad55 100644
--- a/tests/tests_pytorch/core/test_datamodules.py
+++ b/tests/tests_pytorch/core/test_datamodules.py
@@ -148,6 +148,7 @@ def test_dm_pickle_after_init():
     pickle.dumps(dm)
 
 
+@RunIf(sklearn=True)
 def test_train_loop_only(tmpdir):
     seed_everything(7)
 
@@ -169,6 +170,7 @@ def test_train_loop_only(tmpdir):
     assert trainer.callback_metrics["train_loss"] < 1.1
 
 
+@RunIf(sklearn=True)
 def test_train_val_loop_only(tmpdir):
     seed_everything(7)
 
@@ -226,6 +228,7 @@ def test_dm_checkpoint_save_and_load(tmpdir):
         assert dm.my_state_dict == {"my": "state_dict"}
 
 
+@RunIf(sklearn=True)
 def test_full_loop(tmpdir):
     seed_everything(7)
 
diff --git a/tests/tests_pytorch/helpers/datamodules.py b/tests/tests_pytorch/helpers/datamodules.py
index 0b12996e2b..c0915df0fb 100644
--- a/tests/tests_pytorch/helpers/datamodules.py
+++ b/tests/tests_pytorch/helpers/datamodules.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pytest
 import torch
 from lightning_utilities.core.imports import RequirementCache
 from torch.utils.data import DataLoader
@@ -54,7 +53,8 @@ class MNISTDataModule(LightningDataModule):
 class SklearnDataModule(LightningDataModule):
     def __init__(self, sklearn_dataset, x_type, y_type, batch_size: int = 10):
         if not _SKLEARN_AVAILABLE:
-            pytest.skip(str(_SKLEARN_AVAILABLE))
+            raise ImportError(str(_SKLEARN_AVAILABLE))
+
         super().__init__()
         self.batch_size = batch_size
         self._x, self._y = sklearn_dataset
@@ -74,7 +74,8 @@ class SklearnDataModule(LightningDataModule):
 
     def train_dataloader(self):
         return DataLoader(
-            SklearnDataset(self.x_train, self.y_train, self._x_type, self._y_type), batch_size=self.batch_size
+            SklearnDataset(self.x_train, self.y_train, self._x_type, self._y_type),
+            batch_size=self.batch_size,
         )
 
     def val_dataloader(self):
@@ -98,13 +99,21 @@ class SklearnDataModule(LightningDataModule):
 
 
 class ClassifDataModule(SklearnDataModule):
-    def __init__(self, num_features=32, length=800, num_classes=3, batch_size=10):
+    def __init__(
+        self, num_features=32, length=800, num_classes=3, batch_size=10, n_clusters_per_class=1, n_informative=2
+    ):
         if not _SKLEARN_AVAILABLE:
-            pytest.skip(str(_SKLEARN_AVAILABLE))
+            raise ImportError(str(_SKLEARN_AVAILABLE))
+
         from sklearn.datasets import make_classification
 
         data = make_classification(
-            n_samples=length, n_features=num_features, n_classes=num_classes, n_clusters_per_class=1, random_state=42
+            n_samples=length,
+            n_features=num_features,
+            n_classes=num_classes,
+            n_clusters_per_class=n_clusters_per_class,
+            n_informative=n_informative,
+            random_state=42,
         )
         super().__init__(data, x_type=torch.float32, y_type=torch.long, batch_size=batch_size)
 
@@ -112,7 +121,8 @@ class ClassifDataModule(SklearnDataModule):
 class RegressDataModule(SklearnDataModule):
     def __init__(self, num_features=16, length=800, batch_size=10):
         if not _SKLEARN_AVAILABLE:
-            pytest.skip(str(_SKLEARN_AVAILABLE))
+            raise ImportError(str(_SKLEARN_AVAILABLE))
+
         from sklearn.datasets import make_regression
 
         x, y = make_regression(n_samples=length, n_features=num_features, random_state=42)
diff --git a/tests/tests_pytorch/helpers/runif.py b/tests/tests_pytorch/helpers/runif.py
index ba9ccfb3df..5323461d52 100644
--- a/tests/tests_pytorch/helpers/runif.py
+++ b/tests/tests_pytorch/helpers/runif.py
@@ -39,6 +39,7 @@ from pytorch_lightning.utilities.imports import (
     _TORCH_GREATER_EQUAL_1_10,
     _TORCH_QUANTIZE_AVAILABLE,
 )
+from tests_pytorch.helpers.datamodules import _SKLEARN_AVAILABLE
 
 _HOROVOD_NCCL_AVAILABLE = False
 if _HOROVOD_AVAILABLE:
@@ -90,6 +91,7 @@ class RunIf:
         colossalai: bool = False,
         psutil: bool = False,
         hivemind: bool = False,
+        sklearn: bool = False,
         **kwargs,
     ):
         """
@@ -121,6 +123,7 @@ class RunIf:
             bagua: Require that BaguaSys/bagua is installed.
             psutil: Require that psutil is installed.
             hivemind: Require that Hivemind is installed.
+            sklearn: Require that scikit-learn is installed.
             **kwargs: Any :class:`pytest.mark.skipif` keyword arguments.
         """
         conditions = []
@@ -257,6 +260,10 @@ class RunIf:
             conditions.append(not _HIVEMIND_AVAILABLE or sys.platform in ("win32", "darwin"))
             reasons.append("Hivemind")
 
+        if sklearn:
+            conditions.append(not _SKLEARN_AVAILABLE)
+            reasons.append("scikit-learn")
+
         reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
         return pytest.mark.skipif(
             *args, condition=any(conditions), reason=f"Requires: [{' + '.join(reasons)}]", **kwargs
diff --git a/tests/tests_pytorch/helpers/simple_models.py b/tests/tests_pytorch/helpers/simple_models.py
index 7484263909..8360819f1c 100644
--- a/tests/tests_pytorch/helpers/simple_models.py
+++ b/tests/tests_pytorch/helpers/simple_models.py
@@ -20,14 +20,14 @@ from pytorch_lightning import LightningModule
 
 
 class ClassificationModel(LightningModule):
-    def __init__(self, lr=0.01):
+    def __init__(self, num_features=32, num_classes=3, lr=0.01):
         super().__init__()
 
         self.lr = lr
         for i in range(3):
-            setattr(self, f"layer_{i}", nn.Linear(32, 32))
+            setattr(self, f"layer_{i}", nn.Linear(num_features, num_features))
             setattr(self, f"layer_{i}a", torch.nn.ReLU())
-        setattr(self, "layer_end", nn.Linear(32, 3))
+        setattr(self, "layer_end", nn.Linear(num_features, 3))
 
         self.train_acc = Accuracy()
         self.valid_acc = Accuracy()
diff --git a/tests/tests_pytorch/helpers/test_models.py b/tests/tests_pytorch/helpers/test_models.py
index 0b38e31e0a..547c9c1806 100644
--- a/tests/tests_pytorch/helpers/test_models.py
+++ b/tests/tests_pytorch/helpers/test_models.py
@@ -19,6 +19,7 @@ from pytorch_lightning import Trainer
 from pytorch_lightning.demos.boring_classes import BoringModel
 from tests_pytorch.helpers.advanced_models import BasicGAN, ParityModuleMNIST, ParityModuleRNN
 from tests_pytorch.helpers.datamodules import ClassifDataModule, RegressDataModule
+from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel, RegressionModel
 
 
@@ -29,8 +30,8 @@ from tests_pytorch.helpers.simple_models import ClassificationModel, RegressionM
         (None, BasicGAN),
         (None, ParityModuleRNN),
         (None, ParityModuleMNIST),
-        (ClassifDataModule, ClassificationModel),
-        (RegressDataModule, RegressionModel),
+        pytest.param(ClassifDataModule, ClassificationModel, marks=RunIf(sklearn=True)),
+        pytest.param(RegressDataModule, RegressionModel, marks=RunIf(sklearn=True)),
     ],
 )
 def test_models(tmpdir, data_class, model_class):
diff --git a/tests/tests_pytorch/loggers/test_csv.py b/tests/tests_pytorch/loggers/test_csv.py
index 6b57936700..8c963108b8 100644
--- a/tests/tests_pytorch/loggers/test_csv.py
+++ b/tests/tests_pytorch/loggers/test_csv.py
@@ -22,6 +22,7 @@ from pytorch_lightning.core.saving import load_hparams_from_yaml
 from pytorch_lightning.loggers import CSVLogger
 from pytorch_lightning.loggers.csv_logs import ExperimentWriter
 from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel
 
 
@@ -107,6 +108,7 @@ def test_file_logger_log_hyperparams(tmpdir):
     assert all(n in params for n in hparams)
 
 
+@RunIf(sklearn=True)
 def test_fit_csv_logger(tmpdir):
     dm = ClassifDataModule()
     model = ClassificationModel()
diff --git a/tests/tests_pytorch/models/test_cpu.py b/tests/tests_pytorch/models/test_cpu.py
index 10e9b6f916..d8e598ca6a 100644
--- a/tests/tests_pytorch/models/test_cpu.py
+++ b/tests/tests_pytorch/models/test_cpu.py
@@ -126,7 +126,7 @@ def test_early_stopping_cpu_model(tmpdir):
     model.unfreeze()
 
 
-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, sklearn=True)
 def test_multi_cpu_model_ddp(tmpdir):
     """Make sure DDP works."""
     trainer_options = dict(
diff --git a/tests/tests_pytorch/models/test_gpu.py b/tests/tests_pytorch/models/test_gpu.py
index d2cbec7c40..8c4dd78ef2 100644
--- a/tests/tests_pytorch/models/test_gpu.py
+++ b/tests/tests_pytorch/models/test_gpu.py
@@ -33,7 +33,7 @@ from tests_pytorch.helpers.simple_models import ClassificationModel
 PRETEND_N_OF_GPUS = 16
 
 
-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_multi_gpu_none_backend(tmpdir):
     """Make sure when using multiple GPUs the user can't use `accelerator = None`."""
     trainer_options = dict(
diff --git a/tests/tests_pytorch/models/test_horovod.py b/tests/tests_pytorch/models/test_horovod.py
index 87557db2a4..3d223ef93a 100644
--- a/tests/tests_pytorch/models/test_horovod.py
+++ b/tests/tests_pytorch/models/test_horovod.py
@@ -21,7 +21,6 @@ from unittest.mock import patch
 import numpy as np
 import pytest
 import torch
-from sklearn.metrics import accuracy_score
 from torch import optim
 from torchmetrics.classification.accuracy import Accuracy
 
@@ -368,8 +367,10 @@ def test_result_reduce_horovod(tmpdir):
 
 # todo: need to be fixed :]
 @pytest.mark.skip(reason="TODO: CI agent.jobstatus=Succeeded: Permission denied")
-@RunIf(horovod=True, skip_windows=True, num_gpus=2)
+@RunIf(horovod=True, skip_windows=True, num_gpus=2, sklearn=True)
 def test_accuracy_metric_horovod():
+    from sklearn.metrics import accuracy_score
+
     num_batches = 10
     batch_size = 16
     threshold = 0.5
diff --git a/tests/tests_pytorch/models/test_restore.py b/tests/tests_pytorch/models/test_restore.py
index c640e2268a..8648d9ba1a 100644
--- a/tests/tests_pytorch/models/test_restore.py
+++ b/tests/tests_pytorch/models/test_restore.py
@@ -124,6 +124,7 @@ def test_model_properties_fit_ckpt_path(tmpdir):
     trainer.fit(model, ckpt_path=str(tmpdir / "last.ckpt"))
 
 
+@RunIf(sklearn=True)
 def test_trainer_properties_restore_ckpt_path(tmpdir):
     """Test that required trainer properties are set correctly when resuming from checkpoint in different
     phases."""
@@ -315,6 +316,7 @@ class CaptureCallbacksBeforeTraining(Callback):
         self.callbacks = deepcopy(trainer.callbacks)
 
 
+@RunIf(sklearn=True)
 def test_callbacks_state_fit_ckpt_path(tmpdir):
     """Test that resuming from a checkpoint restores callbacks that persist state."""
     dm = ClassifDataModule()
@@ -360,6 +362,7 @@ def test_callbacks_state_fit_ckpt_path(tmpdir):
                 assert getattr(before, attribute) == getattr(after, attribute)
 
 
+@RunIf(sklearn=True)
 def test_callbacks_references_fit_ckpt_path(tmpdir):
     """Test that resuming from a checkpoint sets references as expected."""
     dm = ClassifDataModule()
@@ -388,9 +391,11 @@ def test_callbacks_references_fit_ckpt_path(tmpdir):
     trainer.fit(model, datamodule=dm, ckpt_path=str(tmpdir / "last.ckpt"))
 
 
-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_running_test_pretrained_model_distrib_dp(tmpdir):
     """Verify `test()` on pretrained model."""
+    seed_everything(7)
+
     dm = ClassifDataModule()
     model = CustomClassificationModelDP(lr=0.1)
 
@@ -434,7 +439,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):
         tpipes.run_model_prediction(pretrained_model, dataloader)
 
 
-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
     """Verify `test()` on pretrained model."""
     dm = ClassifDataModule()
@@ -482,6 +487,7 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
         tpipes.run_model_prediction(pretrained_model, dataloader, min_acc=0.1)
 
 
+@RunIf(sklearn=True)
 def test_running_test_pretrained_model_cpu(tmpdir):
     """Verify test() on pretrained model."""
     seed_everything(1)
@@ -567,7 +573,7 @@ def test_load_model_from_checkpoint(tmpdir, model_template):
     new_trainer.test(pretrained_model)
 
 
-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_dp_resume(tmpdir):
     """Make sure DP continues training correctly."""
     model = CustomClassificationModelDP(lr=0.1)
diff --git a/tests/tests_pytorch/strategies/test_colossalai.py b/tests/tests_pytorch/strategies/test_colossalai.py
index 15ee03d441..608fcb25ab 100644
--- a/tests/tests_pytorch/strategies/test_colossalai.py
+++ b/tests/tests_pytorch/strategies/test_colossalai.py
@@ -239,7 +239,7 @@ class ModelParallelClassificationModel(LightningModule):
         return self.forward(x)
 
 
-@RunIf(min_cuda_gpus=2, standalone=True, colossalai=True)
+@RunIf(min_cuda_gpus=2, standalone=True, colossalai=True, sklearn=True)
 def test_multi_gpu_checkpointing(tmpdir):
     dm = ClassifDataModule()
     model = ModelParallelClassificationModel()
@@ -267,7 +267,7 @@ def test_multi_gpu_checkpointing(tmpdir):
     assert saved_results == results
 
 
-@RunIf(min_cuda_gpus=2, standalone=True, colossalai=True)
+@RunIf(min_cuda_gpus=2, standalone=True, colossalai=True, sklearn=True)
 def test_multi_gpu_model_colossalai_fit_test(tmpdir):
     seed_everything(7)
 
diff --git a/tests/tests_pytorch/strategies/test_common.py b/tests/tests_pytorch/strategies/test_common.py
index 25cc26d2ba..971bcc9bf2 100644
--- a/tests/tests_pytorch/strategies/test_common.py
+++ b/tests/tests_pytorch/strategies/test_common.py
@@ -30,6 +30,7 @@ from tests_pytorch.strategies.test_dp import CustomClassificationModelDP
         pytest.param(dict(accelerator="mps", devices=1), marks=RunIf(mps=True)),
     ),
 )
+@RunIf(sklearn=True)
 def test_evaluate(tmpdir, trainer_kwargs):
     dm = ClassifDataModule()
     model = CustomClassificationModelDP()
diff --git a/tests/tests_pytorch/strategies/test_ddp.py b/tests/tests_pytorch/strategies/test_ddp.py
index 561a00c193..d95c76e20d 100644
--- a/tests/tests_pytorch/strategies/test_ddp.py
+++ b/tests/tests_pytorch/strategies/test_ddp.py
@@ -27,7 +27,7 @@ from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel
 
 
-@RunIf(min_cuda_gpus=2, standalone=True)
+@RunIf(min_cuda_gpus=2, standalone=True, sklearn=True)
 def test_multi_gpu_model_ddp_fit_only(tmpdir):
     dm = ClassifDataModule()
     model = ClassificationModel()
@@ -35,7 +35,7 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir):
     trainer.fit(model, datamodule=dm)
 
 
-@RunIf(min_cuda_gpus=2, standalone=True)
+@RunIf(min_cuda_gpus=2, standalone=True, sklearn=True)
 def test_multi_gpu_model_ddp_test_only(tmpdir):
     dm = ClassifDataModule()
     model = ClassificationModel()
@@ -43,7 +43,7 @@ def test_multi_gpu_model_ddp_test_only(tmpdir):
     trainer.test(model, datamodule=dm)
 
 
-@RunIf(min_cuda_gpus=2, standalone=True)
+@RunIf(min_cuda_gpus=2, standalone=True, sklearn=True)
 def test_multi_gpu_model_ddp_fit_test(tmpdir):
     seed_everything(4321)
     dm = ClassifDataModule()
diff --git a/tests/tests_pytorch/strategies/test_ddp_spawn.py b/tests/tests_pytorch/strategies/test_ddp_spawn.py
index fa20bbec19..153bea0234 100644
--- a/tests/tests_pytorch/strategies/test_ddp_spawn.py
+++ b/tests/tests_pytorch/strategies/test_ddp_spawn.py
@@ -20,7 +20,7 @@ from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel
 
 
-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
     trainer_options = dict(
         default_root_dir=tmpdir,
diff --git a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
index 1da2a77e93..786cfd1ab1 100644
--- a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
+++ b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
@@ -690,7 +690,7 @@ def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config
 
 @pytest.mark.skip(reason="skipped due to deepspeed/#2449, keep track @rohitgr7")
 @pytest.mark.parametrize(("accumulate_grad_batches", "automatic_optimization"), [(1, False), (2, True)])
-@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
+@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True, sklearn=True)
 def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization, accumulate_grad_batches):
     if automatic_optimization:
         model = ModelParallelClassificationModel()
@@ -732,7 +732,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization
     trainer.test(model, datamodule=dm, ckpt_path=ck.best_model_path)
 
 
-@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
+@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True, sklearn=True)
 def test_deepspeed_multigpu_stage_3_warns_resume_training(tmpdir):
     """Test to ensure with Stage 3 and multiple GPUs that we can resume from training, throwing a warning that the
     optimizer state and scheduler states cannot be restored."""
@@ -767,7 +767,7 @@ def test_deepspeed_multigpu_stage_3_warns_resume_training(tmpdir):
         trainer.fit(model, datamodule=dm, ckpt_path=checkpoint_path)
 
 
-@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
+@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True, sklearn=True)
 def test_deepspeed_multigpu_stage_3_resume_training(tmpdir):
     """Test to ensure with Stage 3 and single GPU that we can resume training."""
     initial_model = ModelParallelClassificationModel()
@@ -833,7 +833,7 @@ def test_deepspeed_multigpu_stage_3_resume_training(tmpdir):
 
 
 @pytest.mark.parametrize("offload_optimizer", [False, True])
-@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
+@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True, sklearn=True)
 def test_deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, offload_optimizer):
     """Test to ensure with Stage 2 and multiple GPUs, accumulated grad batches works."""
 
diff --git a/tests/tests_pytorch/strategies/test_dp.py b/tests/tests_pytorch/strategies/test_dp.py
index 48b6e7577a..ed7e265857 100644
--- a/tests/tests_pytorch/strategies/test_dp.py
+++ b/tests/tests_pytorch/strategies/test_dp.py
@@ -49,7 +49,7 @@ class CustomClassificationModelDP(ClassificationModel):
         self.log("test_acc", self.test_acc(outputs["logits"], outputs["y"]))
 
 
-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_multi_gpu_early_stop_dp(tmpdir):
     """Make sure DDP works.
 
diff --git a/tests/tests_pytorch/trainer/flags/test_overfit_batches.py b/tests/tests_pytorch/trainer/flags/test_overfit_batches.py
index dc73e76cc3..bed0e58cc9 100644
--- a/tests/tests_pytorch/trainer/flags/test_overfit_batches.py
+++ b/tests/tests_pytorch/trainer/flags/test_overfit_batches.py
@@ -11,15 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+from unittest import mock
+
 import pytest
 import torch
-from legacy.simple_classif_training import ClassifDataModule, ClassificationModel
 from torch.utils.data import DataLoader, DistributedSampler, RandomSampler, Sampler, SequentialSampler
 
 from pytorch_lightning import Trainer
 from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset
 from pytorch_lightning.trainer.states import RunningStage
+from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.datasets import SklearnDataset
 from tests_pytorch.helpers.runif import RunIf
+from tests_pytorch.helpers.simple_models import ClassificationModel
 
 
 @pytest.mark.parametrize("overfit_batches", [1, 2, 0.1, 0.25, 1.0])
@@ -78,6 +83,8 @@ def test_overfit_batches_raises_warning_in_case_of_sequential_sampler(tmpdir):
     [(RunningStage.VALIDATING, "val"), (RunningStage.TESTING, "test"), (RunningStage.PREDICTING, "predict")],
 )
 @pytest.mark.parametrize("overfit_batches", [0.11, 4])
+@RunIf(sklearn=True)
+@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 def test_overfit_batch_limits_eval(stage, mode, overfit_batches):
     model = ClassificationModel()
     dm = ClassifDataModule()
@@ -99,9 +106,18 @@ def test_overfit_batch_limits_eval(stage, mode, overfit_batches):
 
 
 @pytest.mark.parametrize("overfit_batches", [0.11, 4])
+@RunIf(sklearn=True)
 def test_overfit_batch_limits_train(overfit_batches):
+    class CustomDataModule(ClassifDataModule):
+        def train_dataloader(self):
+            return DataLoader(
+                SklearnDataset(self.x_train, self.y_train, self._x_type, self._y_type),
+                batch_size=self.batch_size,
+                shuffle=True,
+            )
+
     model = ClassificationModel()
-    dm = ClassifDataModule()
+    dm = CustomDataModule()
 
     # original train loader which should be replaced in all methods
     train_loader = dm.train_dataloader()
diff --git a/tests/tests_pytorch/trainer/test_trainer.py b/tests/tests_pytorch/trainer/test_trainer.py
index 1bd619a0cb..21d2525f21 100644
--- a/tests/tests_pytorch/trainer/test_trainer.py
+++ b/tests/tests_pytorch/trainer/test_trainer.py
@@ -1099,6 +1099,7 @@ def test_on_exception_hook(tmpdir):
 
 
 @pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_cuda_gpus=1))])
+@RunIf(sklearn=True)
 def test_gradient_clipping_by_norm(tmpdir, precision):
     """Test gradient clipping by norm."""
     trainer = Trainer(
diff --git a/tests/tests_pytorch/tuner/test_lr_finder.py b/tests/tests_pytorch/tuner/test_lr_finder.py
index f811d796d3..d88fe41b5c 100644
--- a/tests/tests_pytorch/tuner/test_lr_finder.py
+++ b/tests/tests_pytorch/tuner/test_lr_finder.py
@@ -14,6 +14,7 @@
 import logging
 import os
 from copy import deepcopy
+from unittest import mock
 
 import pytest
 import torch
@@ -24,6 +25,7 @@ from pytorch_lightning.demos.boring_classes import BoringModel
 from pytorch_lightning.tuner.lr_finder import _LRFinder
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel
 from tests_pytorch.helpers.utils import getattr_recursive, no_warning_call
 
@@ -194,12 +196,14 @@ def test_call_to_trainer_method(tmpdir, opt):
     assert before_lr != after_lr, "Learning rate was not altered after running learning rate finder"
 
 
+@RunIf(sklearn=True)
+@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 def test_datamodule_parameter(tmpdir):
     """Test that the datamodule parameter works."""
     seed_everything(1)
 
     dm = ClassifDataModule()
-    model = ClassificationModel()
+    model = ClassificationModel(lr=1e-3)
 
     before_lr = model.lr
     # logger file to get meta