Use sklearn in runif (#15426)

* Use sklearn in runif * test by removing sklearn dep * remove repeated code * seed
2022-11-01 17:10:32 +05:30 · 2022-11-01 17:10:32 +05:30 · 61ae35c378
parent 7ee0994bfc
commit 61ae35c378
26 changed files with 114 additions and 177 deletions
--- a/tests/legacy/simple_classif_training.py
+++ b/tests/legacy/simple_classif_training.py
@ -14,141 +14,16 @@
 import os

 import torch
-import torch.nn.functional as F
-from sklearn.datasets import make_classification
-from sklearn.model_selection import train_test_split
-from torch import nn
-from torch.utils.data import DataLoader, Dataset
-from torchmetrics import Accuracy

 import pytorch_lightning as pl
-from pytorch_lightning import LightningDataModule, LightningModule, seed_everything
+from pytorch_lightning import seed_everything
 from pytorch_lightning.callbacks import EarlyStopping
+from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.simple_models import ClassificationModel

 PATH_LEGACY = os.path.dirname(__file__)


-class SklearnDataset(Dataset):
-    def __init__(self, x, y, x_type, y_type):
-        self.x = x
-        self.y = y
-        self._x_type = x_type
-        self._y_type = y_type
-
-    def __getitem__(self, idx):
-        return torch.tensor(self.x[idx], dtype=self._x_type), torch.tensor(self.y[idx], dtype=self._y_type)
-
-    def __len__(self):
-        return len(self.y)
-
-
-class SklearnDataModule(LightningDataModule):
-    def __init__(self, sklearn_dataset, x_type, y_type, batch_size: int = 128):
-        super().__init__()
-        self.batch_size = batch_size
-        self._x, self._y = sklearn_dataset
-        self._split_data()
-        self._x_type = x_type
-        self._y_type = y_type
-
-    def _split_data(self):
-        self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(
-            self._x, self._y, test_size=0.20, random_state=42
-        )
-        self.x_train, self.x_predict, self.y_train, self.y_predict = train_test_split(
-            self._x, self._y, test_size=0.20, random_state=42
-        )
-        self.x_train, self.x_valid, self.y_train, self.y_valid = train_test_split(
-            self.x_train, self.y_train, test_size=0.40, random_state=42
-        )
-
-    def train_dataloader(self):
-        return DataLoader(
-            SklearnDataset(self.x_train, self.y_train, self._x_type, self._y_type),
-            shuffle=True,
-            batch_size=self.batch_size,
-        )
-
-    def val_dataloader(self):
-        return DataLoader(
-            SklearnDataset(self.x_valid, self.y_valid, self._x_type, self._y_type), batch_size=self.batch_size
-        )
-
-    def test_dataloader(self):
-        return DataLoader(
-            SklearnDataset(self.x_test, self.y_test, self._x_type, self._y_type), batch_size=self.batch_size
-        )
-
-    def predict_dataloader(self):
-        return DataLoader(
-            SklearnDataset(self.x_predict, self.y_predict, self._x_type, self._y_type), batch_size=self.batch_size
-        )
-
-
-class ClassifDataModule(SklearnDataModule):
-    def __init__(self, num_features=24, length=6000, num_classes=3, batch_size=128):
-        data = make_classification(
-            n_samples=length,
-            n_features=num_features,
-            n_classes=num_classes,
-            n_clusters_per_class=2,
-            n_informative=int(num_features / num_classes),
-            random_state=42,
-        )
-        super().__init__(data, x_type=torch.float32, y_type=torch.long, batch_size=batch_size)
-
-
-class ClassificationModel(LightningModule):
-    def __init__(self, num_features=24, num_classes=3, lr=0.01):
-        super().__init__()
-        self.save_hyperparameters()
-
-        self.lr = lr
-        for i in range(3):
-            setattr(self, f"layer_{i}", nn.Linear(num_features, num_features))
-            setattr(self, f"layer_{i}a", torch.nn.ReLU())
-        setattr(self, "layer_end", nn.Linear(num_features, num_classes))
-
-        self.train_acc = Accuracy()
-        self.valid_acc = Accuracy()
-        self.test_acc = Accuracy()
-
-    def forward(self, x):
-        x = self.layer_0(x)
-        x = self.layer_0a(x)
-        x = self.layer_1(x)
-        x = self.layer_1a(x)
-        x = self.layer_2(x)
-        x = self.layer_2a(x)
-        x = self.layer_end(x)
-        logits = F.softmax(x, dim=1)
-        return logits
-
-    def configure_optimizers(self):
-        optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
-        return [optimizer], []
-
-    def training_step(self, batch, batch_idx):
-        x, y = batch
-        logits = self.forward(x)
-        loss = F.cross_entropy(logits, y)
-        self.log("train_loss", loss, prog_bar=True)
-        self.log("train_acc", self.train_acc(logits, y), prog_bar=True)
-        return {"loss": loss}
-
-    def validation_step(self, batch, batch_idx):
-        x, y = batch
-        logits = self.forward(x)
-        self.log("val_loss", F.cross_entropy(logits, y), prog_bar=False)
-        self.log("val_acc", self.valid_acc(logits, y), prog_bar=True)
-
-    def test_step(self, batch, batch_idx):
-        x, y = batch
-        logits = self.forward(x)
-        self.log("test_loss", F.cross_entropy(logits, y), prog_bar=False)
-        self.log("test_acc", self.test_acc(logits, y), prog_bar=True)
-
-
 def main_train(dir_path, max_epochs: int = 20):
    seed_everything(42)
    stopping = EarlyStopping(monitor="val_acc", mode="max", min_delta=0.005)
--- a/tests/tests_pytorch/accelerators/test_hpu.py
+++ b/tests/tests_pytorch/accelerators/test_hpu.py
@ -76,8 +76,8 @@ def test_all_stages(tmpdir, hpus):
    trainer.predict(model)


-@RunIf(hpu=True)
-@mock.patch.dict(os.environ, os.environ.copy())
+@RunIf(hpu=True, sklearn=True)
+@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 def test_optimization(tmpdir):
    seed_everything(42)

--- a/tests/tests_pytorch/accelerators/test_ipu.py
+++ b/tests/tests_pytorch/accelerators/test_ipu.py
@ -149,7 +149,8 @@ def test_inference_only(tmpdir, devices):
    trainer.predict(model)


-@RunIf(ipu=True)
+@RunIf(ipu=True, sklearn=True)
+@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 def test_optimization(tmpdir):
    seed_everything(42)

--- a/tests/tests_pytorch/callbacks/test_early_stopping.py
+++ b/tests/tests_pytorch/callbacks/test_early_stopping.py
@ -13,6 +13,7 @@
 # limitations under the License.
 import logging
 import math
+import os
 import pickle
 from typing import List, Optional
 from unittest import mock
@ -56,6 +57,8 @@ class EarlyStoppingTestRestore(EarlyStopping):
        self.saved_states.append(self.state_dict().copy())


+@RunIf(sklearn=True)
+@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 def test_resume_early_stopping_from_checkpoint(tmpdir):
    """Prevent regressions to bugs:

@ -98,6 +101,7 @@ def test_resume_early_stopping_from_checkpoint(tmpdir):
        new_trainer.fit(model, datamodule=dm, ckpt_path=checkpoint_filepath)


+@RunIf(sklearn=True)
 def test_early_stopping_no_extraneous_invocations(tmpdir):
    """Test to ensure that callback methods aren't being invoked outside of the callback handler."""
    model = ClassificationModel()
@ -195,6 +199,7 @@ def test_pickling(tmpdir):
    assert vars(early_stopping) == vars(early_stopping_loaded)


+@RunIf(sklearn=True)
 def test_early_stopping_no_val_step(tmpdir):
    """Test that early stopping callback falls back to training metrics when no validation defined."""

--- a/tests/tests_pytorch/callbacks/test_lr_monitor.py
+++ b/tests/tests_pytorch/callbacks/test_lr_monitor.py
@ -22,6 +22,7 @@ from pytorch_lightning.callbacks.finetuning import BackboneFinetuning
 from pytorch_lightning.demos.boring_classes import BoringModel
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel


@ -284,6 +285,7 @@ def test_lr_monitor_no_lr_scheduler_multi_lrs(tmpdir, logging_interval: str):
    assert all(len(lr) == expected_number_logged for lr in lr_monitor.lrs.values())


+@RunIf(sklearn=True)
 def test_lr_monitor_param_groups(tmpdir):
    """Test that learning rates are extracted and logged for single lr scheduler."""

--- a/tests/tests_pytorch/callbacks/test_quantization.py
+++ b/tests/tests_pytorch/callbacks/test_quantization.py
@ -35,7 +35,7 @@ from tests_pytorch.helpers.simple_models import RegressionModel
@pytest.mark.parametrize("observe", ["average", "histogram"])
@pytest.mark.parametrize("fuse", [True, False])
@pytest.mark.parametrize("convert", [True, False])
-@RunIf(quantization=True, max_torch="1.11")
+@RunIf(quantization=True, sklearn=True, max_torch="1.11")
 def test_quantization(tmpdir, observe: str, fuse: bool, convert: bool):
    """Parity test for quant model."""
    cuda_available = CUDAAccelerator.is_available()
@ -100,7 +100,7 @@ def test_quantization(tmpdir, observe: str, fuse: bool, convert: bool):
    assert torch.allclose(org_score, quant2_score, atol=0.45)


-@RunIf(quantization=True)
+@RunIf(quantization=True, sklearn=True)
 def test_quantize_torchscript(tmpdir):
    """Test converting to torchscipt."""
    dm = RegressDataModule()
@ -116,7 +116,7 @@ def test_quantize_torchscript(tmpdir):
    tsmodel(tsmodel.quant(batch[0]))


-@RunIf(quantization=True)
+@RunIf(quantization=True, sklearn=True)
 def test_quantization_exceptions(tmpdir):
    """Test wrong fuse layers."""
    with pytest.raises(MisconfigurationException, match="Unsupported qconfig"):
@ -157,7 +157,7 @@ def custom_trigger_last(trainer):
    "trigger_fn,expected_count",
    [(None, 9), (3, 3), (custom_trigger_never, 0), (custom_trigger_even, 5), (custom_trigger_last, 2)],
 )
-@RunIf(quantization=True)
+@RunIf(quantization=True, sklearn=True)
 def test_quantization_triggers(tmpdir, trigger_fn: Union[None, int, Callable], expected_count: int):
    """Test  how many times the quant is called."""
    dm = RegressDataModule()
@ -216,7 +216,7 @@ def test_quantization_disable_observers(tmpdir, observer_enabled_stages):
        )


-@RunIf(quantization=True)
+@RunIf(quantization=True, sklearn=True)
 def test_quantization_val_test_predict(tmpdir):
    """Test the default quantization aware training not affected by validating, testing and predicting."""
    seed_everything(42)
--- a/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py
+++ b/tests/tests_pytorch/checkpointing/test_legacy_checkpoints.py
@ -23,6 +23,9 @@ import torch
 import pytorch_lightning as pl
 from pytorch_lightning import Callback, Trainer
 from tests_pytorch import _PATH_LEGACY
+from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.runif import RunIf
+from tests_pytorch.helpers.simple_models import ClassificationModel

 LEGACY_CHECKPOINTS_PATH = os.path.join(_PATH_LEGACY, "checkpoints")
 CHECKPOINT_EXTENSION = ".ckpt"
@ -32,18 +35,17 @@ with open(os.path.join(_PATH_LEGACY, "back-compatible-versions.txt")) as fp:


@pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)
+@RunIf(sklearn=True)
 def test_load_legacy_checkpoints(tmpdir, pl_version: str):
    PATH_LEGACY = os.path.join(LEGACY_CHECKPOINTS_PATH, pl_version)
    with patch("sys.path", [PATH_LEGACY] + sys.path):
-        from simple_classif_training import ClassifDataModule, ClassificationModel
-
        path_ckpts = sorted(glob.glob(os.path.join(PATH_LEGACY, f"*{CHECKPOINT_EXTENSION}")))
        assert path_ckpts, f'No checkpoints found in folder "{PATH_LEGACY}"'
        path_ckpt = path_ckpts[-1]

-        model = ClassificationModel.load_from_checkpoint(path_ckpt)
+        model = ClassificationModel.load_from_checkpoint(path_ckpt, num_features=24)
        trainer = Trainer(default_root_dir=str(tmpdir))
-        dm = ClassifDataModule()
+        dm = ClassifDataModule(num_features=24, length=6000, batch_size=128, n_clusters_per_class=2, n_informative=8)
        res = trainer.test(model, datamodule=dm)
        assert res[0]["test_loss"] <= 0.7
        assert res[0]["test_acc"] >= 0.85
@ -62,6 +64,7 @@ class LimitNbEpochs(Callback):


@pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)
+@RunIf(sklearn=True)
 def test_legacy_ckpt_threading(tmpdir, pl_version: str):
    def load_model():
        import torch
@ -84,17 +87,16 @@ def test_legacy_ckpt_threading(tmpdir, pl_version: str):


@pytest.mark.parametrize("pl_version", LEGACY_BACK_COMPATIBLE_PL_VERSIONS)
+@RunIf(sklearn=True)
 def test_resume_legacy_checkpoints(tmpdir, pl_version: str):
    PATH_LEGACY = os.path.join(LEGACY_CHECKPOINTS_PATH, pl_version)
    with patch("sys.path", [PATH_LEGACY] + sys.path):
-        from simple_classif_training import ClassifDataModule, ClassificationModel
-
        path_ckpts = sorted(glob.glob(os.path.join(PATH_LEGACY, f"*{CHECKPOINT_EXTENSION}")))
        assert path_ckpts, f'No checkpoints found in folder "{PATH_LEGACY}"'
        path_ckpt = path_ckpts[-1]

-        dm = ClassifDataModule()
-        model = ClassificationModel()
+        dm = ClassifDataModule(num_features=24, length=6000, batch_size=128, n_clusters_per_class=2, n_informative=8)
+        model = ClassificationModel(num_features=24)
        stop = LimitNbEpochs(1)

        trainer = Trainer(
--- a/tests/tests_pytorch/core/test_datamodules.py
+++ b/tests/tests_pytorch/core/test_datamodules.py
@ -148,6 +148,7 @@ def test_dm_pickle_after_init():
    pickle.dumps(dm)


+@RunIf(sklearn=True)
 def test_train_loop_only(tmpdir):
    seed_everything(7)

@ -169,6 +170,7 @@ def test_train_loop_only(tmpdir):
    assert trainer.callback_metrics["train_loss"] < 1.1


+@RunIf(sklearn=True)
 def test_train_val_loop_only(tmpdir):
    seed_everything(7)

@ -226,6 +228,7 @@ def test_dm_checkpoint_save_and_load(tmpdir):
        assert dm.my_state_dict == {"my": "state_dict"}


+@RunIf(sklearn=True)
 def test_full_loop(tmpdir):
    seed_everything(7)

--- a/tests/tests_pytorch/helpers/datamodules.py
+++ b/tests/tests_pytorch/helpers/datamodules.py
@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import pytest
 import torch
 from lightning_utilities.core.imports import RequirementCache
 from torch.utils.data import DataLoader
@ -54,7 +53,8 @@ class MNISTDataModule(LightningDataModule):
 class SklearnDataModule(LightningDataModule):
    def __init__(self, sklearn_dataset, x_type, y_type, batch_size: int = 10):
        if not _SKLEARN_AVAILABLE:
-            pytest.skip(str(_SKLEARN_AVAILABLE))
+            raise ImportError(str(_SKLEARN_AVAILABLE))
+
        super().__init__()
        self.batch_size = batch_size
        self._x, self._y = sklearn_dataset
@ -74,7 +74,8 @@ class SklearnDataModule(LightningDataModule):

    def train_dataloader(self):
        return DataLoader(
-            SklearnDataset(self.x_train, self.y_train, self._x_type, self._y_type), batch_size=self.batch_size
+            SklearnDataset(self.x_train, self.y_train, self._x_type, self._y_type),
+            batch_size=self.batch_size,
        )

    def val_dataloader(self):
@ -98,13 +99,21 @@ class SklearnDataModule(LightningDataModule):


 class ClassifDataModule(SklearnDataModule):
-    def __init__(self, num_features=32, length=800, num_classes=3, batch_size=10):
+    def __init__(
+        self, num_features=32, length=800, num_classes=3, batch_size=10, n_clusters_per_class=1, n_informative=2
+    ):
        if not _SKLEARN_AVAILABLE:
-            pytest.skip(str(_SKLEARN_AVAILABLE))
+            raise ImportError(str(_SKLEARN_AVAILABLE))
+
        from sklearn.datasets import make_classification

        data = make_classification(
-            n_samples=length, n_features=num_features, n_classes=num_classes, n_clusters_per_class=1, random_state=42
+            n_samples=length,
+            n_features=num_features,
+            n_classes=num_classes,
+            n_clusters_per_class=n_clusters_per_class,
+            n_informative=n_informative,
+            random_state=42,
        )
        super().__init__(data, x_type=torch.float32, y_type=torch.long, batch_size=batch_size)

@ -112,7 +121,8 @@ class ClassifDataModule(SklearnDataModule):
 class RegressDataModule(SklearnDataModule):
    def __init__(self, num_features=16, length=800, batch_size=10):
        if not _SKLEARN_AVAILABLE:
-            pytest.skip(str(_SKLEARN_AVAILABLE))
+            raise ImportError(str(_SKLEARN_AVAILABLE))
+
        from sklearn.datasets import make_regression

        x, y = make_regression(n_samples=length, n_features=num_features, random_state=42)
--- a/tests/tests_pytorch/helpers/runif.py
+++ b/tests/tests_pytorch/helpers/runif.py
@ -39,6 +39,7 @@ from pytorch_lightning.utilities.imports import (
    _TORCH_GREATER_EQUAL_1_10,
    _TORCH_QUANTIZE_AVAILABLE,
 )
+from tests_pytorch.helpers.datamodules import _SKLEARN_AVAILABLE

 _HOROVOD_NCCL_AVAILABLE = False
 if _HOROVOD_AVAILABLE:
@ -90,6 +91,7 @@ class RunIf:
        colossalai: bool = False,
        psutil: bool = False,
        hivemind: bool = False,
+        sklearn: bool = False,
        **kwargs,
    ):
        """
@ -121,6 +123,7 @@ class RunIf:
            bagua: Require that BaguaSys/bagua is installed.
            psutil: Require that psutil is installed.
            hivemind: Require that Hivemind is installed.
+            sklearn: Require that scikit-learn is installed.
            **kwargs: Any :class:`pytest.mark.skipif` keyword arguments.
        """
        conditions = []
@ -257,6 +260,10 @@ class RunIf:
            conditions.append(not _HIVEMIND_AVAILABLE or sys.platform in ("win32", "darwin"))
            reasons.append("Hivemind")

+        if sklearn:
+            conditions.append(not _SKLEARN_AVAILABLE)
+            reasons.append("scikit-learn")
+
        reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
        return pytest.mark.skipif(
            *args, condition=any(conditions), reason=f"Requires: [{' + '.join(reasons)}]", **kwargs
--- a/tests/tests_pytorch/helpers/simple_models.py
+++ b/tests/tests_pytorch/helpers/simple_models.py
@ -20,14 +20,14 @@ from pytorch_lightning import LightningModule


 class ClassificationModel(LightningModule):
-    def __init__(self, lr=0.01):
+    def __init__(self, num_features=32, num_classes=3, lr=0.01):
        super().__init__()

        self.lr = lr
        for i in range(3):
-            setattr(self, f"layer_{i}", nn.Linear(32, 32))
+            setattr(self, f"layer_{i}", nn.Linear(num_features, num_features))
            setattr(self, f"layer_{i}a", torch.nn.ReLU())
-        setattr(self, "layer_end", nn.Linear(32, 3))
+        setattr(self, "layer_end", nn.Linear(num_features, 3))

        self.train_acc = Accuracy()
        self.valid_acc = Accuracy()
--- a/tests/tests_pytorch/helpers/test_models.py
+++ b/tests/tests_pytorch/helpers/test_models.py
@ -19,6 +19,7 @@ from pytorch_lightning import Trainer
 from pytorch_lightning.demos.boring_classes import BoringModel
 from tests_pytorch.helpers.advanced_models import BasicGAN, ParityModuleMNIST, ParityModuleRNN
 from tests_pytorch.helpers.datamodules import ClassifDataModule, RegressDataModule
+from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel, RegressionModel


@ -29,8 +30,8 @@ from tests_pytorch.helpers.simple_models import ClassificationModel, RegressionM
        (None, BasicGAN),
        (None, ParityModuleRNN),
        (None, ParityModuleMNIST),
-        (ClassifDataModule, ClassificationModel),
-        (RegressDataModule, RegressionModel),
+        pytest.param(ClassifDataModule, ClassificationModel, marks=RunIf(sklearn=True)),
+        pytest.param(RegressDataModule, RegressionModel, marks=RunIf(sklearn=True)),
    ],
 )
 def test_models(tmpdir, data_class, model_class):
--- a/tests/tests_pytorch/loggers/test_csv.py
+++ b/tests/tests_pytorch/loggers/test_csv.py
@ -22,6 +22,7 @@ from pytorch_lightning.core.saving import load_hparams_from_yaml
 from pytorch_lightning.loggers import CSVLogger
 from pytorch_lightning.loggers.csv_logs import ExperimentWriter
 from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel


@ -107,6 +108,7 @@ def test_file_logger_log_hyperparams(tmpdir):
    assert all(n in params for n in hparams)


+@RunIf(sklearn=True)
 def test_fit_csv_logger(tmpdir):
    dm = ClassifDataModule()
    model = ClassificationModel()
--- a/tests/tests_pytorch/models/test_cpu.py
+++ b/tests/tests_pytorch/models/test_cpu.py
@ -126,7 +126,7 @@ def test_early_stopping_cpu_model(tmpdir):
    model.unfreeze()


-@RunIf(skip_windows=True)
+@RunIf(skip_windows=True, sklearn=True)
 def test_multi_cpu_model_ddp(tmpdir):
    """Make sure DDP works."""
    trainer_options = dict(
--- a/tests/tests_pytorch/models/test_gpu.py
+++ b/tests/tests_pytorch/models/test_gpu.py
@ -33,7 +33,7 @@ from tests_pytorch.helpers.simple_models import ClassificationModel
 PRETEND_N_OF_GPUS = 16


-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_multi_gpu_none_backend(tmpdir):
    """Make sure when using multiple GPUs the user can't use `accelerator = None`."""
    trainer_options = dict(
--- a/tests/tests_pytorch/models/test_horovod.py
+++ b/tests/tests_pytorch/models/test_horovod.py
@ -21,7 +21,6 @@ from unittest.mock import patch
 import numpy as np
 import pytest
 import torch
-from sklearn.metrics import accuracy_score
 from torch import optim
 from torchmetrics.classification.accuracy import Accuracy

@ -368,8 +367,10 @@ def test_result_reduce_horovod(tmpdir):

 # todo: need to be fixed :]
@pytest.mark.skip(reason="TODO: CI agent.jobstatus=Succeeded: Permission denied")
-@RunIf(horovod=True, skip_windows=True, num_gpus=2)
+@RunIf(horovod=True, skip_windows=True, num_gpus=2, sklearn=True)
 def test_accuracy_metric_horovod():
+    from sklearn.metrics import accuracy_score
+
    num_batches = 10
    batch_size = 16
    threshold = 0.5
--- a/tests/tests_pytorch/models/test_restore.py
+++ b/tests/tests_pytorch/models/test_restore.py
@ -124,6 +124,7 @@ def test_model_properties_fit_ckpt_path(tmpdir):
    trainer.fit(model, ckpt_path=str(tmpdir / "last.ckpt"))


+@RunIf(sklearn=True)
 def test_trainer_properties_restore_ckpt_path(tmpdir):
    """Test that required trainer properties are set correctly when resuming from checkpoint in different
    phases."""
@ -315,6 +316,7 @@ class CaptureCallbacksBeforeTraining(Callback):
        self.callbacks = deepcopy(trainer.callbacks)


+@RunIf(sklearn=True)
 def test_callbacks_state_fit_ckpt_path(tmpdir):
    """Test that resuming from a checkpoint restores callbacks that persist state."""
    dm = ClassifDataModule()
@ -360,6 +362,7 @@ def test_callbacks_state_fit_ckpt_path(tmpdir):
                assert getattr(before, attribute) == getattr(after, attribute)


+@RunIf(sklearn=True)
 def test_callbacks_references_fit_ckpt_path(tmpdir):
    """Test that resuming from a checkpoint sets references as expected."""
    dm = ClassifDataModule()
@ -388,9 +391,11 @@ def test_callbacks_references_fit_ckpt_path(tmpdir):
    trainer.fit(model, datamodule=dm, ckpt_path=str(tmpdir / "last.ckpt"))


-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_running_test_pretrained_model_distrib_dp(tmpdir):
    """Verify `test()` on pretrained model."""
+    seed_everything(7)
+
    dm = ClassifDataModule()
    model = CustomClassificationModelDP(lr=0.1)

@ -434,7 +439,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):
        tpipes.run_model_prediction(pretrained_model, dataloader)


-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
    """Verify `test()` on pretrained model."""
    dm = ClassifDataModule()
@ -482,6 +487,7 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
        tpipes.run_model_prediction(pretrained_model, dataloader, min_acc=0.1)


+@RunIf(sklearn=True)
 def test_running_test_pretrained_model_cpu(tmpdir):
    """Verify test() on pretrained model."""
    seed_everything(1)
@ -567,7 +573,7 @@ def test_load_model_from_checkpoint(tmpdir, model_template):
    new_trainer.test(pretrained_model)


-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_dp_resume(tmpdir):
    """Make sure DP continues training correctly."""
    model = CustomClassificationModelDP(lr=0.1)
--- a/tests/tests_pytorch/strategies/test_colossalai.py
+++ b/tests/tests_pytorch/strategies/test_colossalai.py
@ -239,7 +239,7 @@ class ModelParallelClassificationModel(LightningModule):
        return self.forward(x)


-@RunIf(min_cuda_gpus=2, standalone=True, colossalai=True)
+@RunIf(min_cuda_gpus=2, standalone=True, colossalai=True, sklearn=True)
 def test_multi_gpu_checkpointing(tmpdir):
    dm = ClassifDataModule()
    model = ModelParallelClassificationModel()
@ -267,7 +267,7 @@ def test_multi_gpu_checkpointing(tmpdir):
    assert saved_results == results


-@RunIf(min_cuda_gpus=2, standalone=True, colossalai=True)
+@RunIf(min_cuda_gpus=2, standalone=True, colossalai=True, sklearn=True)
 def test_multi_gpu_model_colossalai_fit_test(tmpdir):
    seed_everything(7)

--- a/tests/tests_pytorch/strategies/test_common.py
+++ b/tests/tests_pytorch/strategies/test_common.py
@ -30,6 +30,7 @@ from tests_pytorch.strategies.test_dp import CustomClassificationModelDP
        pytest.param(dict(accelerator="mps", devices=1), marks=RunIf(mps=True)),
    ),
 )
+@RunIf(sklearn=True)
 def test_evaluate(tmpdir, trainer_kwargs):
    dm = ClassifDataModule()
    model = CustomClassificationModelDP()
--- a/tests/tests_pytorch/strategies/test_ddp.py
+++ b/tests/tests_pytorch/strategies/test_ddp.py
@ -27,7 +27,7 @@ from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel


-@RunIf(min_cuda_gpus=2, standalone=True)
+@RunIf(min_cuda_gpus=2, standalone=True, sklearn=True)
 def test_multi_gpu_model_ddp_fit_only(tmpdir):
    dm = ClassifDataModule()
    model = ClassificationModel()
@ -35,7 +35,7 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir):
    trainer.fit(model, datamodule=dm)


-@RunIf(min_cuda_gpus=2, standalone=True)
+@RunIf(min_cuda_gpus=2, standalone=True, sklearn=True)
 def test_multi_gpu_model_ddp_test_only(tmpdir):
    dm = ClassifDataModule()
    model = ClassificationModel()
@ -43,7 +43,7 @@ def test_multi_gpu_model_ddp_test_only(tmpdir):
    trainer.test(model, datamodule=dm)


-@RunIf(min_cuda_gpus=2, standalone=True)
+@RunIf(min_cuda_gpus=2, standalone=True, sklearn=True)
 def test_multi_gpu_model_ddp_fit_test(tmpdir):
    seed_everything(4321)
    dm = ClassifDataModule()
--- a/tests/tests_pytorch/strategies/test_ddp_spawn.py
+++ b/tests/tests_pytorch/strategies/test_ddp_spawn.py
@ -20,7 +20,7 @@ from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel


-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
    trainer_options = dict(
        default_root_dir=tmpdir,
--- a/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
+++ b/tests/tests_pytorch/strategies/test_deepspeed_strategy.py
@ -690,7 +690,7 @@ def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config

@pytest.mark.skip(reason="skipped due to deepspeed/#2449, keep track @rohitgr7")
@pytest.mark.parametrize(("accumulate_grad_batches", "automatic_optimization"), [(1, False), (2, True)])
-@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
+@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True, sklearn=True)
 def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization, accumulate_grad_batches):
    if automatic_optimization:
        model = ModelParallelClassificationModel()
@ -732,7 +732,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization
    trainer.test(model, datamodule=dm, ckpt_path=ck.best_model_path)


-@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
+@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True, sklearn=True)
 def test_deepspeed_multigpu_stage_3_warns_resume_training(tmpdir):
    """Test to ensure with Stage 3 and multiple GPUs that we can resume from training, throwing a warning that the
    optimizer state and scheduler states cannot be restored."""
@ -767,7 +767,7 @@ def test_deepspeed_multigpu_stage_3_warns_resume_training(tmpdir):
        trainer.fit(model, datamodule=dm, ckpt_path=checkpoint_path)


-@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
+@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True, sklearn=True)
 def test_deepspeed_multigpu_stage_3_resume_training(tmpdir):
    """Test to ensure with Stage 3 and single GPU that we can resume training."""
    initial_model = ModelParallelClassificationModel()
@ -833,7 +833,7 @@ def test_deepspeed_multigpu_stage_3_resume_training(tmpdir):


@pytest.mark.parametrize("offload_optimizer", [False, True])
-@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
+@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True, sklearn=True)
 def test_deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, offload_optimizer):
    """Test to ensure with Stage 2 and multiple GPUs, accumulated grad batches works."""

--- a/tests/tests_pytorch/strategies/test_dp.py
+++ b/tests/tests_pytorch/strategies/test_dp.py
@ -49,7 +49,7 @@ class CustomClassificationModelDP(ClassificationModel):
        self.log("test_acc", self.test_acc(outputs["logits"], outputs["y"]))


-@RunIf(min_cuda_gpus=2)
+@RunIf(min_cuda_gpus=2, sklearn=True)
 def test_multi_gpu_early_stop_dp(tmpdir):
    """Make sure DDP works.

--- a/tests/tests_pytorch/trainer/flags/test_overfit_batches.py
+++ b/tests/tests_pytorch/trainer/flags/test_overfit_batches.py
@ -11,15 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
+from unittest import mock
+
 import pytest
 import torch
-from legacy.simple_classif_training import ClassifDataModule, ClassificationModel
 from torch.utils.data import DataLoader, DistributedSampler, RandomSampler, Sampler, SequentialSampler

 from pytorch_lightning import Trainer
 from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset
 from pytorch_lightning.trainer.states import RunningStage
+from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.datasets import SklearnDataset
 from tests_pytorch.helpers.runif import RunIf
+from tests_pytorch.helpers.simple_models import ClassificationModel


@pytest.mark.parametrize("overfit_batches", [1, 2, 0.1, 0.25, 1.0])
@ -78,6 +83,8 @@ def test_overfit_batches_raises_warning_in_case_of_sequential_sampler(tmpdir):
    [(RunningStage.VALIDATING, "val"), (RunningStage.TESTING, "test"), (RunningStage.PREDICTING, "predict")],
 )
@pytest.mark.parametrize("overfit_batches", [0.11, 4])
+@RunIf(sklearn=True)
+@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 def test_overfit_batch_limits_eval(stage, mode, overfit_batches):
    model = ClassificationModel()
    dm = ClassifDataModule()
@ -99,9 +106,18 @@ def test_overfit_batch_limits_eval(stage, mode, overfit_batches):


@pytest.mark.parametrize("overfit_batches", [0.11, 4])
+@RunIf(sklearn=True)
 def test_overfit_batch_limits_train(overfit_batches):
+    class CustomDataModule(ClassifDataModule):
+        def train_dataloader(self):
+            return DataLoader(
+                SklearnDataset(self.x_train, self.y_train, self._x_type, self._y_type),
+                batch_size=self.batch_size,
+                shuffle=True,
+            )
+
    model = ClassificationModel()
-    dm = ClassifDataModule()
+    dm = CustomDataModule()

    # original train loader which should be replaced in all methods
    train_loader = dm.train_dataloader()
--- a/tests/tests_pytorch/trainer/test_trainer.py
+++ b/tests/tests_pytorch/trainer/test_trainer.py
@ -1099,6 +1099,7 @@ def test_on_exception_hook(tmpdir):


@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_cuda_gpus=1))])
+@RunIf(sklearn=True)
 def test_gradient_clipping_by_norm(tmpdir, precision):
    """Test gradient clipping by norm."""
    trainer = Trainer(
--- a/tests/tests_pytorch/tuner/test_lr_finder.py
+++ b/tests/tests_pytorch/tuner/test_lr_finder.py
@ -14,6 +14,7 @@
 import logging
 import os
 from copy import deepcopy
+from unittest import mock

 import pytest
 import torch
@ -24,6 +25,7 @@ from pytorch_lightning.demos.boring_classes import BoringModel
 from pytorch_lightning.tuner.lr_finder import _LRFinder
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests_pytorch.helpers.datamodules import ClassifDataModule
+from tests_pytorch.helpers.runif import RunIf
 from tests_pytorch.helpers.simple_models import ClassificationModel
 from tests_pytorch.helpers.utils import getattr_recursive, no_warning_call

@ -194,12 +196,14 @@ def test_call_to_trainer_method(tmpdir, opt):
    assert before_lr != after_lr, "Learning rate was not altered after running learning rate finder"


+@RunIf(sklearn=True)
+@mock.patch.dict(os.environ, os.environ.copy(), clear=True)
 def test_datamodule_parameter(tmpdir):
    """Test that the datamodule parameter works."""
    seed_everything(1)

    dm = ClassifDataModule()
-    model = ClassificationModel()
+    model = ClassificationModel(lr=1e-3)

    before_lr = model.lr
    # logger file to get meta