Refactor plugin tests whose assertions don't need to run in `on_fit_start` hook (#11149)

This commit is contained in:
Adrian Wälchli 2021-12-18 23:38:40 +01:00 committed by GitHub
parent 3461af0ddb
commit a3e2ef2be0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 151 additions and 272 deletions

View File

@ -24,7 +24,6 @@ from pytorch_lightning import Trainer
from pytorch_lightning.accelerators.accelerator import Accelerator
from pytorch_lightning.accelerators.cpu import CPUAccelerator
from pytorch_lightning.accelerators.gpu import GPUAccelerator
from pytorch_lightning.callbacks import Callback
from pytorch_lightning.plugins import (
DataParallelPlugin,
DDP2Plugin,
@ -45,7 +44,6 @@ from pytorch_lightning.plugins.environments import (
)
from pytorch_lightning.utilities import _AcceleratorType, _StrategyType
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from tests.helpers.boring_model import BoringModel
from tests.helpers.runif import RunIf
@ -100,23 +98,15 @@ def test_accelerator_choice_ddp_spawn(cuda_available_mock, device_count_mock):
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=2)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp_slurm(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer._accelerator_connector._is_slurm_managing_tasks()
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
raise SystemExit()
model = BoringModel()
def test_accelerator_choice_ddp_slurm(*_):
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2)
assert trainer._accelerator_connector._is_slurm_managing_tasks()
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
@mock.patch.dict(
@ -133,25 +123,15 @@ def test_accelerator_choice_ddp_slurm(set_device_mock, device_count_mock, setup_
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=2)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer._accelerator_connector._is_slurm_managing_tasks()
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDP2Plugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
raise SystemExit()
model = BoringModel()
def test_accelerator_choice_ddp2_slurm(*_):
with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
set_device_mock.assert_called_once()
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2)
assert trainer._accelerator_connector._is_slurm_managing_tasks()
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDP2Plugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
@mock.patch.dict(
@ -168,24 +148,14 @@ def test_accelerator_choice_ddp2_slurm(set_device_mock, device_count_mock, setup
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=1)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp_te(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
raise SystemExit()
model = BoringModel()
def test_accelerator_choice_ddp_te(*_):
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
set_device_mock.assert_called_once()
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=2)
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
@mock.patch.dict(
@ -202,24 +172,14 @@ def test_accelerator_choice_ddp_te(set_device_mock, device_count_mock, setup_dis
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=1)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp2_te(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDP2Plugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
raise SystemExit()
model = BoringModel()
def test_accelerator_choice_ddp2_te(*_):
with pytest.deprecated_call(match=r"accelerator='ddp2'\)` has been deprecated in v1.5"):
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
set_device_mock.assert_called_once()
trainer = Trainer(fast_dev_run=True, accelerator="ddp2", gpus=2)
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDP2Plugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
@mock.patch.dict(
@ -227,21 +187,13 @@ def test_accelerator_choice_ddp2_te(set_device_mock, device_count_mock, setup_di
)
@mock.patch("torch.cuda.device_count", return_value=0)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp_cpu_te(device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", num_processes=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
def test_accelerator_choice_ddp_cpu_te(*_):
trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", num_processes=2)
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
@mock.patch.dict(
@ -258,24 +210,14 @@ def test_accelerator_choice_ddp_cpu_te(device_count_mock, setup_distributed_mock
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=1)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp_kubeflow(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, KubeflowEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 0
assert trainer.training_type_plugin.local_rank == 0
raise SystemExit()
model = BoringModel()
def test_accelerator_choice_ddp_kubeflow(*_):
with pytest.deprecated_call(match=r"accelerator='ddp'\)` has been deprecated in v1.5"):
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
set_device_mock.assert_called_once()
trainer = Trainer(fast_dev_run=True, accelerator="ddp", gpus=1)
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, KubeflowEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 0
assert trainer.training_type_plugin.local_rank == 0
@mock.patch.dict(
@ -290,21 +232,13 @@ def test_accelerator_choice_ddp_kubeflow(set_device_mock, device_count_mock, set
)
@mock.patch("torch.cuda.device_count", return_value=0)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp_cpu_kubeflow(device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, KubeflowEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 0
assert trainer.training_type_plugin.local_rank == 0
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", num_processes=1, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
def test_accelerator_choice_ddp_cpu_kubeflow(*_):
trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", num_processes=1)
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, KubeflowEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 0
assert trainer.training_type_plugin.local_rank == 0
@mock.patch.dict(
@ -320,21 +254,13 @@ def test_accelerator_choice_ddp_cpu_kubeflow(device_count_mock, setup_distribute
)
@mock.patch("torch.cuda.device_count", return_value=0)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_accelerator_choice_ddp_cpu_slurm(device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer._accelerator_connector._is_slurm_managing_tasks()
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.local_rank == 0
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", num_processes=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
def test_accelerator_choice_ddp_cpu_slurm(*_):
trainer = Trainer(fast_dev_run=True, accelerator="ddp_cpu", num_processes=2)
assert trainer._accelerator_connector._is_slurm_managing_tasks()
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.local_rank == 0
@RunIf(skip_windows=True, standalone=True)
@ -449,19 +375,11 @@ def test_custom_accelerator(device_count_mock, setup_distributed_mock):
)
@mock.patch("torch.cuda.device_count", return_value=0)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_dist_backend_accelerator_mapping(device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert trainer.training_type_plugin.local_rank == 0
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", num_processes=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
def test_dist_backend_accelerator_mapping(*_):
trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", num_processes=2)
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert trainer.training_type_plugin.local_rank == 0
@mock.patch("pytorch_lightning.utilities._IS_INTERACTIVE", return_value=True)
@ -792,21 +710,13 @@ def test_strategy_choice_ddp_spawn(cuda_available_mock, device_count_mock):
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
@pytest.mark.parametrize("strategy", ["ddp", DDPPlugin()])
def test_strategy_choice_ddp_slurm(setup_distributed_mock, strategy):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer._accelerator_connector._is_slurm_managing_tasks()
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, strategy=strategy, gpus=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
trainer = Trainer(fast_dev_run=True, strategy=strategy, gpus=2)
assert trainer._accelerator_connector._is_slurm_managing_tasks()
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
@mock.patch.dict(
@ -825,23 +735,13 @@ def test_strategy_choice_ddp_slurm(setup_distributed_mock, strategy):
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
@pytest.mark.parametrize("strategy", ["ddp2", DDP2Plugin()])
def test_strategy_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_distributed_mock, strategy):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer._accelerator_connector._is_slurm_managing_tasks()
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDP2Plugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, strategy=strategy, gpus=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
set_device_mock.assert_called_once()
trainer = Trainer(fast_dev_run=True, strategy=strategy, gpus=2)
assert trainer._accelerator_connector._is_slurm_managing_tasks()
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDP2Plugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
@mock.patch.dict(
@ -858,23 +758,13 @@ def test_strategy_choice_ddp2_slurm(set_device_mock, device_count_mock, setup_di
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=2)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_strategy_choice_ddp_te(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, strategy="ddp", gpus=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
set_device_mock.assert_called_once()
def test_strategy_choice_ddp_te(*_):
trainer = Trainer(fast_dev_run=True, strategy="ddp", gpus=2)
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
@mock.patch.dict(
@ -891,23 +781,13 @@ def test_strategy_choice_ddp_te(set_device_mock, device_count_mock, setup_distri
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=2)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_strategy_choice_ddp2_te(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDP2Plugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, strategy="ddp2", gpus=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
set_device_mock.assert_called_once()
def test_strategy_choice_ddp2_te(*_):
trainer = Trainer(fast_dev_run=True, strategy="ddp2", gpus=2)
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDP2Plugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
@mock.patch.dict(
@ -915,21 +795,13 @@ def test_strategy_choice_ddp2_te(set_device_mock, device_count_mock, setup_distr
)
@mock.patch("torch.cuda.device_count", return_value=0)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_strategy_choice_ddp_cpu_te(device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", num_processes=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
def test_strategy_choice_ddp_cpu_te(*_):
trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", num_processes=2)
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, TorchElasticEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 1
assert trainer.training_type_plugin.local_rank == 1
@mock.patch.dict(
@ -946,23 +818,13 @@ def test_strategy_choice_ddp_cpu_te(device_count_mock, setup_distributed_mock):
@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.device_count", return_value=1)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_strategy_choice_ddp_kubeflow(set_device_mock, device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, KubeflowEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 0
assert trainer.training_type_plugin.local_rank == 0
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, strategy="ddp", gpus=1, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
set_device_mock.assert_called_once()
def test_strategy_choice_ddp_kubeflow(*_):
trainer = Trainer(fast_dev_run=True, strategy="ddp", gpus=1)
assert isinstance(trainer.accelerator, GPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, KubeflowEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 0
assert trainer.training_type_plugin.local_rank == 0
@mock.patch.dict(
@ -977,21 +839,13 @@ def test_strategy_choice_ddp_kubeflow(set_device_mock, device_count_mock, setup_
)
@mock.patch("torch.cuda.device_count", return_value=0)
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
def test_strategy_choice_ddp_cpu_kubeflow(device_count_mock, setup_distributed_mock):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, KubeflowEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 0
assert trainer.training_type_plugin.local_rank == 0
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", num_processes=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
def test_strategy_choice_ddp_cpu_kubeflow(*_):
trainer = Trainer(fast_dev_run=True, strategy="ddp_spawn", num_processes=2)
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, KubeflowEnvironment)
assert trainer.training_type_plugin.cluster_environment.local_rank() == 0
assert trainer.training_type_plugin.local_rank == 0
@mock.patch.dict(
@ -1009,20 +863,11 @@ def test_strategy_choice_ddp_cpu_kubeflow(device_count_mock, setup_distributed_m
@mock.patch("pytorch_lightning.plugins.DDPPlugin.setup_distributed", autospec=True)
@pytest.mark.parametrize("strategy", ["ddp", DDPPlugin()])
def test_strategy_choice_ddp_cpu_slurm(device_count_mock, setup_distributed_mock, strategy):
class CB(Callback):
def on_fit_start(self, trainer, pl_module):
assert trainer._accelerator_connector._is_slurm_managing_tasks()
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.local_rank == 0
raise SystemExit()
model = BoringModel()
trainer = Trainer(fast_dev_run=True, strategy=strategy, num_processes=2, callbacks=[CB()])
with pytest.raises(SystemExit):
trainer.fit(model)
trainer = Trainer(fast_dev_run=True, strategy=strategy, num_processes=2)
assert isinstance(trainer.accelerator, CPUAccelerator)
assert isinstance(trainer.training_type_plugin, DDPPlugin)
assert isinstance(trainer.training_type_plugin.cluster_environment, SLURMEnvironment)
assert trainer.training_type_plugin.local_rank == 0
def test_unsupported_tpu_choice(monkeypatch):

View File

@ -1,6 +1,23 @@
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import mock
import torch
from pytorch_lightning import Trainer
from pytorch_lightning.accelerators import GPUAccelerator
from tests.helpers import BoringModel
from tests.helpers.runif import RunIf
@ -26,3 +43,20 @@ def test_get_nvidia_gpu_stats(tmpdir):
for f in fields:
assert any(f in h for h in gpu_stats.keys())
@RunIf(min_gpus=1)
@mock.patch("torch.cuda.set_device")
def test_set_cuda_device(set_device_mock, tmpdir):
model = BoringModel()
trainer = Trainer(
default_root_dir=tmpdir,
fast_dev_run=True,
accelerator="gpu",
devices=1,
enable_checkpointing=False,
enable_model_summary=False,
enable_progress_bar=False,
)
trainer.fit(model)
set_device_mock.assert_called_once()