Rename min_gpus to min_cuda_gpus (#13133)

* rename min_gpus to min_cuda_gpus
This commit is contained in:
Justus Schock 2022-05-24 14:54:05 +02:00 committed by GitHub
parent 880224d4e1
commit fbd887df9d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
63 changed files with 223 additions and 217 deletions

View File

@ -291,7 +291,7 @@ def test_accelererator_invalid_type_devices(mock_is_available, mock_device_count
_ = Trainer(accelerator="gpu", devices=device_count)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_accelerator_gpu():
trainer = Trainer(accelerator="gpu", devices=1)
assert isinstance(trainer.accelerator, GPUAccelerator)
@ -311,7 +311,7 @@ def test_accelerator_cpu_with_devices(devices, plugin):
assert isinstance(trainer.accelerator, CPUAccelerator)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize(
["devices", "plugin"], [(1, SingleDeviceStrategy), ([1], SingleDeviceStrategy), (2, DDPSpawnStrategy)]
)
@ -322,7 +322,7 @@ def test_accelerator_gpu_with_devices(devices, plugin):
assert isinstance(trainer.accelerator, GPUAccelerator)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_accelerator_auto_with_devices_gpu():
trainer = Trainer(accelerator="auto", devices=1)
assert isinstance(trainer.accelerator, GPUAccelerator)
@ -374,7 +374,7 @@ def test_strategy_choice_cpu_plugin(tmpdir, plugin):
assert isinstance(trainer.strategy, plugin)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize(
["strategy", "plugin"],
[
@ -394,14 +394,14 @@ def test_strategy_choice_gpu_str(tmpdir, strategy, plugin):
assert isinstance(trainer.strategy, plugin)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize("plugin", [DDPSpawnStrategy, DDPStrategy])
def test_strategy_choice_gpu_plugin(tmpdir, plugin):
trainer = Trainer(strategy=plugin(), accelerator="gpu", devices=2)
assert isinstance(trainer.strategy, plugin)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize("plugin", [DDPSpawnStrategy, DDPStrategy])
def test_device_type_when_training_plugin_gpu_passed(tmpdir, plugin):
@ -449,7 +449,7 @@ def test_strategy_choice_ddp_spawn(cuda_available_mock, device_count_mock):
assert isinstance(trainer.strategy.cluster_environment, LightningEnvironment)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@mock.patch.dict(
os.environ,
{

View File

@ -23,7 +23,7 @@ from tests.helpers import BoringModel
from tests.helpers.runif import RunIf
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_get_torch_gpu_stats(tmpdir):
current_device = torch.device(f"cuda:{torch.cuda.current_device()}")
gpu_stats = GPUAccelerator().get_device_stats(current_device)
@ -33,7 +33,7 @@ def test_get_torch_gpu_stats(tmpdir):
assert any(f in h for h in gpu_stats.keys())
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_get_nvidia_gpu_stats(tmpdir):
current_device = torch.device(f"cuda:{torch.cuda.current_device()}")
gpu_stats = get_nvidia_gpu_stats(current_device)
@ -43,7 +43,7 @@ def test_get_nvidia_gpu_stats(tmpdir):
assert any(f in h for h in gpu_stats.keys())
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@mock.patch("torch.cuda.set_device")
def test_set_cuda_device(set_device_mock, tmpdir):
model = BoringModel()
@ -60,12 +60,12 @@ def test_set_cuda_device(set_device_mock, tmpdir):
set_device_mock.assert_called_once()
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_gpu_availability():
assert GPUAccelerator.is_available()
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_warning_if_gpus_not_used():
with pytest.warns(UserWarning, match="GPU available but not used. Set `accelerator` and `devices`"):
Trainer()

View File

@ -184,25 +184,25 @@ def plugin_parity_test(
@pytest.mark.parametrize(
"kwargs",
[
pytest.param(dict(gpus=1, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=1)),
pytest.param(dict(gpus=1, model_cls=SeedTrainLoaderModel), marks=RunIf(min_cuda_gpus=1)),
pytest.param(
dict(gpus=1, precision=16, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=1, amp_native=True)
dict(gpus=1, precision=16, model_cls=SeedTrainLoaderModel), marks=RunIf(min_cuda_gpus=1, amp_native=True)
),
pytest.param(dict(gpus=2, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=2)),
pytest.param(dict(gpus=2, model_cls=SeedTrainLoaderModel), marks=RunIf(min_cuda_gpus=2)),
pytest.param(
dict(gpus=2, precision=16, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=2, amp_native=True)
dict(gpus=2, precision=16, model_cls=SeedTrainLoaderModel), marks=RunIf(min_cuda_gpus=2, amp_native=True)
),
pytest.param(
dict(gpus=2, model_cls=SeedTrainLoaderMultipleOptimizersModel),
marks=[
RunIf(min_gpus=2),
RunIf(min_cuda_gpus=2),
pytest.mark.skip(reason="TODO: Current issue with multiple optimizers and FairScale."),
],
),
pytest.param(
dict(gpus=2, model_cls=SeedTrainLoaderManualModel),
marks=[
RunIf(min_gpus=2),
RunIf(min_cuda_gpus=2),
pytest.mark.skip(reason="TODO: Current issue with multiple optimizers and FairScale."),
],
),

View File

@ -50,7 +50,7 @@ class SyncBNModule(LightningModule):
return DataLoader(dataset, sampler=sampler, batch_size=self.batch_size)
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
def test_sync_batchnorm_parity(tmpdir):
"""Test parity between 1) Training a synced batch-norm layer on 2 GPUs with batch size B per device 2) Training
a batch-norm layer on CPU with twice the batch size."""

View File

@ -29,7 +29,7 @@ from tests.helpers import BoringModel
from tests.helpers.runif import RunIf
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_device_stats_gpu_from_torch(tmpdir):
"""Test GPU stats are logged using a logger."""
model = BoringModel()

View File

@ -160,7 +160,7 @@ def test_pruning_callback(
)
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
@pytest.mark.parametrize("parameters_to_prune", (False, True))
@pytest.mark.parametrize("use_global_unstructured", (False, True))
def test_pruning_callback_ddp(tmpdir, parameters_to_prune, use_global_unstructured):
@ -174,7 +174,7 @@ def test_pruning_callback_ddp(tmpdir, parameters_to_prune, use_global_unstructur
)
@RunIf(min_gpus=2, skip_windows=True)
@RunIf(min_cuda_gpus=2, skip_windows=True)
def test_pruning_callback_ddp_spawn(tmpdir):
train_with_pruning_callback(
tmpdir, use_global_unstructured=True, strategy="ddp_spawn", accelerator="gpu", devices=2

View File

@ -146,12 +146,12 @@ def train_with_swa(
assert trainer.lightning_module == model
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
def test_swa_callback_ddp(tmpdir):
train_with_swa(tmpdir, strategy="ddp", accelerator="gpu", devices=2)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_swa_callback_ddp_spawn(tmpdir):
train_with_swa(tmpdir, strategy="ddp_spawn", accelerator="gpu", devices=2)
@ -161,7 +161,7 @@ def test_swa_callback_ddp_cpu(tmpdir):
train_with_swa(tmpdir, strategy="ddp_spawn", accelerator="cpu", devices=2)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_swa_callback_1_gpu(tmpdir):
train_with_swa(tmpdir, accelerator="gpu", devices=1)

View File

@ -592,7 +592,7 @@ def test_progress_bar_max_val_check_interval(
assert pbar_callback.is_enabled
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
@pytest.mark.parametrize("val_check_interval", [0.2, 0.5])
def test_progress_bar_max_val_check_interval_ddp(tmpdir, val_check_interval):
world_size = 2

View File

@ -87,7 +87,7 @@ def test_top_k(save_mock, tmpdir, k: int, epochs: int, val_check_interval: float
@mock.patch("torch.save")
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
@pytest.mark.parametrize(["k", "epochs", "val_check_interval", "expected"], [(1, 1, 1.0, 1), (2, 2, 0.3, 4)])
def test_top_k_ddp(save_mock, tmpdir, k, epochs, val_check_interval, expected):
class TestModel(BoringModel):

View File

@ -50,7 +50,7 @@ def test_model_torch_save_ddp_cpu(tmpdir):
torch.save(trainer, temp_path)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_model_torch_save_ddp_cuda(tmpdir):
"""Test to ensure torch save does not fail for model and trainer using gpu ddp."""
model = BoringModel()

View File

@ -265,7 +265,7 @@ def test_full_loop(tmpdir):
assert result[0]["test_acc"] > 0.6
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@mock.patch(
"pytorch_lightning.strategies.Strategy.lightning_module",
new_callable=PropertyMock,

View File

@ -276,7 +276,7 @@ def test_toggle_untoggle_3_optimizers_shared_parameters(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_device_placement(tmpdir):
model = BoringModel()

View File

@ -99,7 +99,7 @@ def _ddp_test_fn(rank, worldsize):
assert epoch_log == {"b": cumulative_sum * worldsize, "a_epoch": cumulative_sum * worldsize}
@RunIf(min_gpus=2, skip_windows=True)
@RunIf(min_cuda_gpus=2, skip_windows=True)
def test_result_reduce_ddp():
"""Make sure result logging works with DDP."""
tutils.set_random_main_port()
@ -298,7 +298,7 @@ def test_result_collection_restoration(tmpdir):
batch_idx = None
@pytest.mark.parametrize("device", ("cpu", pytest.param("cuda", marks=RunIf(min_gpus=1))))
@pytest.mark.parametrize("device", ("cpu", pytest.param("cuda", marks=RunIf(min_cuda_gpus=1))))
def test_lightning_module_logging_result_collection(tmpdir, device):
class LoggingModel(BoringModel):
def __init__(self):
@ -474,13 +474,13 @@ def test_result_collection_reload(tmpdir):
result_collection_reload(default_root_dir=tmpdir)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
def test_result_collection_reload_1_gpu_ddp(tmpdir):
result_collection_reload(default_root_dir=tmpdir, strategy="ddp", accelerator="gpu")
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
def test_result_collection_reload_2_gpus(tmpdir):
result_collection_reload(default_root_dir=tmpdir, strategy="ddp", accelerator="gpu", devices=2)

View File

@ -64,7 +64,7 @@ class RunIf:
def __new__(
self,
*args,
min_gpus: int = 0,
min_cuda_gpus: int = 0,
min_torch: Optional[str] = None,
max_torch: Optional[str] = None,
min_python: Optional[str] = None,
@ -93,7 +93,7 @@ class RunIf:
"""
Args:
*args: Any :class:`pytest.mark.skipif` arguments.
min_gpus: Require this number of gpus.
min_cuda_gpus: Require this number of gpus.
min_torch: Require that PyTorch is greater or equal than this version.
max_torch: Require that PyTorch is less than this version.
min_python: Require that Python is greater or equal than this version.
@ -122,9 +122,9 @@ class RunIf:
conditions = []
reasons = []
if min_gpus:
conditions.append(torch.cuda.device_count() < min_gpus)
reasons.append(f"GPUs>={min_gpus}")
if min_cuda_gpus:
conditions.append(torch.cuda.device_count() < min_cuda_gpus)
reasons.append(f"GPUs>={min_cuda_gpus}")
if min_torch:
torch_version = get_distribution("torch").version

View File

@ -313,7 +313,7 @@ def test_setup_dataloaders_replace_standard_sampler(shuffle, strategy):
"accelerator, expected",
[
("cpu", torch.device("cpu")),
pytest.param("gpu", torch.device("cuda", 0), marks=RunIf(min_gpus=1)),
pytest.param("gpu", torch.device("cuda", 0), marks=RunIf(min_cuda_gpus=1)),
pytest.param("tpu", torch.device("xla", 0), marks=RunIf(tpu=True)),
],
)
@ -393,7 +393,7 @@ def test_autocast():
lite._precision_plugin.forward_context().__exit__.assert_called()
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_multiple_models():
class Lite(LightningLite):
def run(self):

View File

@ -109,9 +109,9 @@ def precision_context(precision, accelerator) -> Generator[None, None, None]:
"precision, strategy, devices, accelerator",
[
pytest.param(32, None, 1, "cpu"),
pytest.param(32, None, 1, "gpu", marks=RunIf(min_gpus=1)),
pytest.param(16, None, 1, "gpu", marks=RunIf(min_gpus=1)),
pytest.param("bf16", None, 1, "gpu", marks=RunIf(min_gpus=1, min_torch="1.10", bf16_cuda=True)),
pytest.param(32, None, 1, "gpu", marks=RunIf(min_cuda_gpus=1)),
pytest.param(16, None, 1, "gpu", marks=RunIf(min_cuda_gpus=1)),
pytest.param("bf16", None, 1, "gpu", marks=RunIf(min_cuda_gpus=1, min_torch="1.10", bf16_cuda=True)),
],
)
def test_boring_lite_model_single_device(precision, strategy, devices, accelerator, tmpdir):
@ -159,7 +159,7 @@ def run(rank, model, train_dataloader, num_epochs, precision, accelerator, tmpdi
@pytest.mark.skipif(True, reason="Skipping as it takes 80 seconds.")
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize(
"precision, strategy, devices, accelerator",
[
@ -190,7 +190,7 @@ def test_boring_lite_model_ddp_spawn(precision, strategy, devices, accelerator,
assert torch.equal(w_pure.cpu(), w_lite.cpu())
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
@pytest.mark.parametrize(
"precision, strategy, devices, accelerator",
[

View File

@ -69,7 +69,7 @@ def test_lite_module_attribute_lookup():
_ = lite_module.not_exists
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@pytest.mark.parametrize(
"precision, input_type, expected_type",
[
@ -102,7 +102,7 @@ def test_lite_module_forward_conversion(precision, input_type, expected_type):
@pytest.mark.parametrize(
"device", [torch.device("cpu"), pytest.param(torch.device("cuda", 0), marks=RunIf(min_gpus=1))]
"device", [torch.device("cpu"), pytest.param(torch.device("cuda", 0), marks=RunIf(min_cuda_gpus=1))]
)
@pytest.mark.parametrize("dtype", [torch.float32, torch.float16])
def test_lite_module_device_dtype_propagation(device, dtype):
@ -147,8 +147,8 @@ def test_lite_dataloader_iterator():
"src_device, dest_device",
[
(torch.device("cpu"), torch.device("cpu")),
pytest.param(torch.device("cpu"), torch.device("cuda", 0), marks=RunIf(min_gpus=1)),
pytest.param(torch.device("cuda", 0), torch.device("cpu"), marks=RunIf(min_gpus=1)),
pytest.param(torch.device("cpu"), torch.device("cuda", 0), marks=RunIf(min_cuda_gpus=1)),
pytest.param(torch.device("cuda", 0), torch.device("cpu"), marks=RunIf(min_cuda_gpus=1)),
],
)
def test_lite_dataloader_device_placement(src_device, dest_device):

View File

@ -68,7 +68,7 @@ class BatchHookObserverModel(BoringModel):
assert batch.device == self.device
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_callback_batch_on_device(tmpdir):
"""Test that the batch object sent to the on_*_batch_start/end hooks is on the right device."""

View File

@ -91,7 +91,7 @@ def test_log_epoch_metrics_before_on_evaluation_end(update_eval_epoch_metrics_mo
assert order == ["log_epoch_metrics", "on_validation_end"]
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_memory_consumption_validation(tmpdir):
"""Test that the training batch is no longer in GPU memory when running validation."""

View File

@ -99,7 +99,7 @@ def test_amp_cpus(tmpdir, strategy, precision, devices):
assert trainer.state.finished, f"Training failed with {trainer.state}"
@RunIf(min_gpus=2, min_torch="1.10")
@RunIf(min_cuda_gpus=2, min_torch="1.10")
@pytest.mark.parametrize("strategy", [None, "dp", "ddp_spawn"])
@pytest.mark.parametrize("precision", [16, pytest.param("bf16", marks=RunIf(bf16_cuda=True))])
@pytest.mark.parametrize("devices", [1, 2])
@ -124,7 +124,7 @@ def test_amp_gpus(tmpdir, strategy, precision, devices):
assert trainer.state.finished, f"Training failed with {trainer.state}"
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@mock.patch.dict(
os.environ,
{
@ -189,7 +189,7 @@ def test_amp_without_apex(bwd_mock, tmpdir):
assert not bwd_mock.called
@RunIf(min_gpus=1, amp_apex=True)
@RunIf(min_cuda_gpus=1, amp_apex=True)
@mock.patch("pytorch_lightning.plugins.precision.apex_amp.ApexMixedPrecisionPlugin.backward")
def test_amp_with_apex(bwd_mock, tmpdir):
"""Check calling apex scaling in training."""

View File

@ -38,7 +38,7 @@ PL_VERSION_LT_1_5 = _compare_version("pytorch_lightning", operator.lt, "1.5")
PRETEND_N_OF_GPUS = 16
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_multi_gpu_none_backend(tmpdir):
"""Make sure when using multiple GPUs the user can't use `accelerator = None`."""
tutils.set_random_main_port()
@ -57,7 +57,7 @@ def test_multi_gpu_none_backend(tmpdir):
tpipes.run_model_test(trainer_options, model, dm)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize("devices", [1, [0], [1]])
def test_single_gpu_model(tmpdir, devices):
"""Make sure single GPU works (DP mode)."""
@ -206,7 +206,7 @@ def test_torchelastic_gpu_parsing(mocked_device_count, mocked_is_available, gpus
assert trainer.device_ids == [0]
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_single_gpu_batch_parse():
trainer = Trainer(accelerator="gpu", devices=1)
@ -298,7 +298,7 @@ def test_single_gpu_batch_parse():
assert batch.label.type() == "torch.cuda.LongTensor"
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_non_blocking():
"""Tests that non_blocking=True only gets passed on torch.Tensor.to, but not on other objects."""
trainer = Trainer()

View File

@ -111,7 +111,7 @@ def test_training_epoch_end_metrics_collection_on_override(tmpdir):
assert overridden_model.len_outputs == overridden_model.num_train_batches
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@mock.patch(
"pytorch_lightning.strategies.Strategy.lightning_module",
new_callable=PropertyMock,
@ -170,7 +170,7 @@ def test_apply_batch_transfer_handler(model_getter_mock):
assert torch.allclose(batch_gpu.targets.cpu(), torch.ones(5, 1, dtype=torch.long) * 2)
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
def test_transfer_batch_hook_ddp(tmpdir):
"""Test custom data are properly moved to the right device using ddp."""
@ -439,13 +439,16 @@ class HookedModel(BoringModel):
[
{},
# these precision plugins modify the optimization flow, so testing them explicitly
pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
pytest.param(
dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(min_gpus=1, amp_apex=True)
dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_cuda_gpus=1)
),
pytest.param(
dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"),
marks=RunIf(min_cuda_gpus=1, amp_apex=True),
),
pytest.param(
dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"),
marks=RunIf(min_gpus=1, standalone=True, deepspeed=True),
marks=RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True),
),
],
)

View File

@ -40,7 +40,7 @@ if _HOROVOD_AVAILABLE:
import horovod.torch as hvd
@RunIf(min_gpus=1, horovod=True)
@RunIf(min_cuda_gpus=1, horovod=True)
def test_nccl_is_available_on_gpu_environment():
from tests.helpers.runif import _HOROVOD_NCCL_AVAILABLE
@ -135,7 +135,7 @@ def test_horovod_cpu_implicit(tmpdir):
_run_horovod(trainer_options)
@RunIf(min_gpus=2, horovod_nccl=True, skip_windows=True)
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
def test_horovod_multi_gpu(tmpdir):
"""Test Horovod with multi-GPU support."""
trainer_options = dict(
@ -152,7 +152,7 @@ def test_horovod_multi_gpu(tmpdir):
_run_horovod(trainer_options)
@RunIf(min_gpus=2, horovod_nccl=True, skip_windows=True)
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
def test_horovod_multi_gpu_accumulate_grad_batches(tmpdir):
trainer_options = dict(
default_root_dir=str(tmpdir),
@ -168,7 +168,7 @@ def test_horovod_multi_gpu_accumulate_grad_batches(tmpdir):
_run_horovod(trainer_options)
@RunIf(horovod=True, skip_windows=True, min_gpus=2)
@RunIf(horovod=True, skip_windows=True, min_cuda_gpus=2)
def test_horovod_raises_unsupported_accumulate_grad_batches(tmpdir):
"""Ensure MisConfigurationException for different `accumulate_grad_batches` at different epochs for Horovod
Strategy on multi-gpus."""
@ -185,7 +185,7 @@ def test_horovod_raises_unsupported_accumulate_grad_batches(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=2, horovod_nccl=True, skip_windows=True)
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
def test_horovod_multi_gpu_grad_by_value(tmpdir):
"""Test Horovod with multi-GPU support."""
trainer_options = dict(
@ -207,7 +207,7 @@ def test_horovod_multi_gpu_grad_by_value(tmpdir):
# https://discuss.pytorch.org/t/torch-cuda-amp-vs-nvidia-apex/74994
# Check with (tgaddair) on Horovod issues if this feature is needed
@pytest.mark.skip(reason="TODO: Horovod currently doesn't work with Apex")
@RunIf(min_gpus=2, amp_apex=True, horovod_nccl=True, skip_windows=True)
@RunIf(min_cuda_gpus=2, amp_apex=True, horovod_nccl=True, skip_windows=True)
def test_horovod_apex(tmpdir):
"""Test Horovod with multi-GPU support using apex amp."""
trainer_options = dict(
@ -226,7 +226,7 @@ def test_horovod_apex(tmpdir):
_run_horovod(trainer_options)
@RunIf(min_gpus=2, horovod_nccl=True, skip_windows=True)
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
def test_horovod_amp(tmpdir):
"""Test Horovod with multi-GPU support using native amp."""
trainer_options = dict(
@ -245,7 +245,7 @@ def test_horovod_amp(tmpdir):
_run_horovod(trainer_options)
@RunIf(min_gpus=2, horovod_nccl=True, skip_windows=True)
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
def test_horovod_gather(tmpdir):
"""Test Horovod with multi-GPU support using native amp."""
trainer_options = dict(
@ -262,7 +262,7 @@ def test_horovod_gather(tmpdir):
_run_horovod(trainer_options)
@RunIf(min_gpus=2, skip_windows=True, horovod=True, horovod_nccl=True)
@RunIf(min_cuda_gpus=2, skip_windows=True, horovod=True, horovod_nccl=True)
def test_horovod_transfer_batch_to_gpu(tmpdir):
class TestTrainingStepModel(BoringModel):
def training_step(self, batch, *args, **kwargs):

View File

@ -39,7 +39,7 @@ def test_model_saves_with_input_sample(tmpdir):
assert os.path.getsize(file_path) > 4e2
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_model_saves_on_gpu(tmpdir):
"""Test that model saves on gpu."""
model = BoringModel()
@ -86,7 +86,7 @@ def test_model_saves_with_example_input_array(tmpdir, modelclass, input_sample):
assert os.path.getsize(file_path) > 4e2
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_model_saves_on_multi_gpu(tmpdir):
"""Test that ONNX model saves on a distributed backend."""
tutils.set_random_main_port()

View File

@ -375,7 +375,7 @@ def test_callbacks_references_fit_ckpt_path(tmpdir):
trainer.fit(model, datamodule=dm, ckpt_path=str(tmpdir / "last.ckpt"))
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_running_test_pretrained_model_distrib_dp(tmpdir):
"""Verify `test()` on pretrained model."""
@ -424,7 +424,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):
tpipes.run_model_prediction(pretrained_model, dataloader)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
"""Verify `test()` on pretrained model."""
tutils.set_random_main_port()
@ -558,7 +558,7 @@ def test_load_model_from_checkpoint(tmpdir, model_template):
new_trainer.test(pretrained_model)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_dp_resume(tmpdir):
"""Make sure DP continues training correctly."""
model = CustomClassificationModelDP(lr=0.1)

View File

@ -78,7 +78,7 @@ def test_torchscript_input_output_trace():
assert torch.allclose(script_output, model_output)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@pytest.mark.parametrize("device", [torch.device("cpu"), torch.device("cuda", 0)])
def test_torchscript_device(device):
"""Test that scripted module is on the correct device."""

View File

@ -72,7 +72,7 @@ def test_unsqueeze_scalar_tensor(inp, expected):
assert torch.all(unsqueeze_scalar_tensor(inp).eq(expected))
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_lightning_parallel_module_unsqueeze_scalar():
"""Test that LightningParallelModule takes care of un-squeezeing 0-dim tensors."""
@ -114,7 +114,7 @@ def test_python_scalar_to_tensor(inp, expected):
assert torch.all(python_scalar_to_tensor(inp).eq(expected))
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@pytest.mark.parametrize("device", [torch.device("cpu"), torch.device("cuda", 0)])
def test_lightning_parallel_module_python_scalar_conversion(device):
"""Test that LightningParallelModule can convert Python scalars to tensors."""
@ -139,7 +139,7 @@ def test_lightning_parallel_module_python_scalar_conversion(device):
assert output["python scalar"] == torch.tensor([12.3], device=device)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize(
"nest, unnest",
[
@ -181,7 +181,7 @@ def test_lightning_parallel_module_device_access(nest, unnest):
assert torch.all(output.cpu().eq(torch.tensor([1, 1])))
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_lightning_parallel_module_device_access_warning():
"""Test that we show a warning when the device can't be inferred from the input."""

View File

@ -135,7 +135,7 @@ class TestPrecisionModel(BoringModel):
return TestClippingOptimizer(self.layer.parameters(), lr=0.1)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize("accum", [1, 2])
def test_amp_gradient_unscale(tmpdir, accum: int):
model = TestPrecisionModel()
@ -161,7 +161,7 @@ def test_amp_gradient_unscale(tmpdir, accum: int):
trainer.fit(model)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_amp_skip_optimizer(tmpdir):
"""Test that optimizers can be skipped when using amp."""
@ -195,7 +195,7 @@ def test_amp_skip_optimizer(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=2, amp_apex=True, standalone=True)
@RunIf(min_cuda_gpus=2, amp_apex=True, standalone=True)
@pytest.mark.parametrize("amp_level", ["O2"])
def test_amp_apex_ddp_fit(amp_level, tmpdir):
class CustomBoringModel(BoringModel):
@ -222,7 +222,7 @@ def test_amp_apex_ddp_fit(amp_level, tmpdir):
trainer.test(model)
@RunIf(min_gpus=2, amp_apex=True)
@RunIf(min_cuda_gpus=2, amp_apex=True)
@pytest.mark.parametrize("amp_level", ["O2"])
def test_amp_apex_ddp_spawn_fit(amp_level, tmpdir):
trainer = Trainer(

View File

@ -148,7 +148,7 @@ def test_double_precision(tmpdir, boring_model):
trainer.predict(model)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_double_precision_ddp(tmpdir):
model = DoublePrecisionBoringModel()

View File

@ -346,7 +346,7 @@ def test_advanced_profiler_cprofile_deepcopy(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
def test_pytorch_profiler_trainer_ddp(tmpdir, pytorch_profiler):
"""Ensure that the profiler can be given to the training and default step are properly recorded."""
model = BoringModel()
@ -476,7 +476,7 @@ def test_pytorch_profiler_multiple_loggers(tmpdir):
assert look_for_trace(tmpdir)
@RunIf(min_gpus=1, standalone=True)
@RunIf(min_cuda_gpus=1, standalone=True)
def test_pytorch_profiler_nested_emit_nvtx(tmpdir):
"""This test check emit_nvtx is correctly supported."""
profiler = PyTorchProfiler(use_cuda=True, emit_nvtx=True)

View File

@ -33,7 +33,7 @@ class BoringModel4QAdam(BoringModel):
return [optimizer], [lr_scheduler]
@RunIf(min_gpus=1, bagua=True)
@RunIf(min_cuda_gpus=1, bagua=True)
def test_bagua_default(tmpdir):
trainer = Trainer(
default_root_dir=tmpdir,
@ -45,7 +45,7 @@ def test_bagua_default(tmpdir):
assert isinstance(trainer.strategy, BaguaStrategy)
@RunIf(min_gpus=2, standalone=True, bagua=True)
@RunIf(min_cuda_gpus=2, standalone=True, bagua=True)
def test_async_algorithm(tmpdir):
model = BoringModel()
bagua_strategy = BaguaStrategy(algorithm="async")
@ -64,7 +64,7 @@ def test_async_algorithm(tmpdir):
assert torch.norm(param) < 3
@RunIf(min_gpus=1, bagua=True)
@RunIf(min_cuda_gpus=1, bagua=True)
@pytest.mark.parametrize(
"algorithm", ["gradient_allreduce", "bytegrad", "qadam", "decentralized", "low_precision_decentralized"]
)
@ -92,7 +92,7 @@ def test_configuration(algorithm, tmpdir):
trainer.strategy.configure_ddp()
@RunIf(min_gpus=1, bagua=True)
@RunIf(min_cuda_gpus=1, bagua=True)
def test_qadam_configuration(tmpdir):
model = BoringModel4QAdam()
bagua_strategy = BaguaStrategy(algorithm="qadam")

View File

@ -337,7 +337,7 @@ def test_multiple_peers(num_processes, wait_seconds):
assert any(global_step > 0 for global_step in process_steps)
@RunIf(hivemind=True, min_gpus=1)
@RunIf(hivemind=True, min_cuda_gpus=1)
@mock.patch.dict(os.environ, {"HIVEMIND_MEMORY_SHARING_STRATEGY": "file_descriptor"}, clear=True)
def test_scaler_updated_precision_16():
class TestModel(BoringModel):

View File

@ -27,9 +27,9 @@ from tests.strategies.test_dp import CustomClassificationModelDP
@pytest.mark.parametrize(
"trainer_kwargs",
(
pytest.param(dict(accelerator="gpu", devices=1), marks=RunIf(min_gpus=1)),
pytest.param(dict(strategy="dp", accelerator="gpu", devices=2), marks=RunIf(min_gpus=2)),
pytest.param(dict(strategy="ddp_spawn", accelerator="gpu", devices=2), marks=RunIf(min_gpus=2)),
pytest.param(dict(accelerator="gpu", devices=1), marks=RunIf(min_cuda_gpus=1)),
pytest.param(dict(strategy="dp", accelerator="gpu", devices=2), marks=RunIf(min_cuda_gpus=2)),
pytest.param(dict(strategy="ddp_spawn", accelerator="gpu", devices=2), marks=RunIf(min_cuda_gpus=2)),
),
)
def test_evaluate(tmpdir, trainer_kwargs):

View File

@ -32,7 +32,7 @@ from tests.utilities.distributed import call_training_script
CLI_ARGS = "--max_epochs 1 --accelerator gpu --devices 2 --strategy ddp"
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize("as_module", [True, False])
def test_multi_gpu_model_ddp_fit_only(tmpdir, as_module):
# call the script
@ -46,7 +46,7 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir, as_module):
assert result["status"] == "complete"
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize("as_module", [True, False])
def test_multi_gpu_model_ddp_test_only(tmpdir, as_module):
# call the script
@ -60,7 +60,7 @@ def test_multi_gpu_model_ddp_test_only(tmpdir, as_module):
assert result["status"] == "complete"
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize("as_module", [True, False])
def test_multi_gpu_model_ddp_fit_test(tmpdir, as_module):
# call the script
@ -122,7 +122,7 @@ def test_ddp_torch_dist_is_available_in_setup(
trainer.fit(model)
@RunIf(min_gpus=2, min_torch="1.8.1", standalone=True)
@RunIf(min_cuda_gpus=2, min_torch="1.8.1", standalone=True)
@pytest.mark.parametrize("precision", (16, 32))
def test_ddp_wrapper(tmpdir, precision):
"""Test parameters to ignore are carried over for DDP."""

View File

@ -102,7 +102,7 @@ class TestFSDPModel(BoringModel):
assert self.layer.module[2].reshard_after_forward is True
@RunIf(min_gpus=2, skip_windows=True, standalone=True, min_torch="1.12dev")
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, min_torch="1.12dev")
def test_fully_sharded_native_strategy_sync_batchnorm(tmpdir):
"""Test to ensure that sync_batchnorm works when using fsdp_native and GPU, and all stages can be run."""
@ -119,7 +119,7 @@ def test_fully_sharded_native_strategy_sync_batchnorm(tmpdir):
_run_multiple_stages(trainer, model, os.path.join(tmpdir, "last.ckpt"))
@RunIf(min_gpus=1, skip_windows=True, standalone=True, min_torch="1.12dev")
@RunIf(min_cuda_gpus=1, skip_windows=True, standalone=True, min_torch="1.12dev")
def test_fully_sharded_native_strategy_checkpoint(tmpdir):
"""Test to ensure that checkpoint is saved correctly when using a single GPU, and all stages can be run."""
@ -130,7 +130,7 @@ def test_fully_sharded_native_strategy_checkpoint(tmpdir):
_run_multiple_stages(trainer, model, os.path.join(tmpdir, "last.ckpt"))
@RunIf(min_gpus=2, skip_windows=True, standalone=True, min_torch="1.12dev")
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, min_torch="1.12dev")
def test_fully_sharded_native_strategy_checkpoint_multi_gpus(tmpdir):
"""Test to ensure that checkpoint is saved correctly when using multiple GPUs, and all stages can be run."""

View File

@ -96,7 +96,7 @@ class TestFSDPModel(BoringModel):
assert self.layer.module[2].mixed_precision
@RunIf(min_gpus=1, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
@RunIf(min_cuda_gpus=1, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
def test_fully_sharded_strategy_checkpoint(tmpdir):
"""Test to ensure that checkpoint is saved correctly when using a single GPU, and all stages can be run."""
@ -114,7 +114,7 @@ def test_fully_sharded_strategy_checkpoint(tmpdir):
_run_multiple_stages(trainer, model, os.path.join(tmpdir, "last.ckpt"))
@RunIf(min_gpus=2, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
def test_fully_sharded_strategy_checkpoint_multi_gpus(tmpdir):
"""Test to ensure that checkpoint is saved correctly when using multiple GPUs, and all stages can be run."""
@ -162,7 +162,7 @@ def _run_multiple_stages(trainer, model, model_path: Optional[str] = None):
trainer.test(ckpt_path=model_path)
@RunIf(min_gpus=1, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
@RunIf(min_cuda_gpus=1, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
def test_fsdp_gradient_clipping_raises(tmpdir):
"""Test to ensure that an exception is raised when clipping gradients by value with FSDP."""
model = BoringModel()

View File

@ -21,7 +21,7 @@ from tests.helpers.runif import RunIf
from tests.helpers.simple_models import ClassificationModel
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
tutils.set_random_main_port()
@ -41,7 +41,7 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
tpipes.run_model_test(trainer_options, model, dm)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_multi_gpu_model_ddp_spawn(tmpdir):
tutils.set_random_main_port()
@ -61,7 +61,7 @@ def test_multi_gpu_model_ddp_spawn(tmpdir):
tpipes.run_model_test(trainer_options, model)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
"""Make sure DDP works with dataloaders passed to fit()"""
tutils.set_random_main_port()

View File

@ -65,7 +65,7 @@ def test_ddp_cpu():
trainer.fit(model)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_ddp_spawn_extra_parameters(tmpdir):
"""Tests if device is set correctly when training for DDPSpawnStrategy and tests add_to_queue/get_from_queue
with Lightning Module (deprecated way)."""

View File

@ -33,7 +33,7 @@ class BoringModelGPU(BoringModel):
self.start_cuda_memory = torch.cuda.memory_allocated()
@RunIf(min_gpus=2, skip_windows=True, standalone=True)
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
def test_ddp_with_2_gpus():
"""Tests if device is set correctly when training and after teardown for DDPStrategy."""
trainer = Trainer(
@ -69,7 +69,7 @@ class BarrierModel(BoringModel):
self.trainer.strategy.barrier("barrier after model is wrapped")
@RunIf(min_gpus=4, standalone=True)
@RunIf(min_cuda_gpus=4, standalone=True)
@mock.patch("torch.distributed.barrier")
def test_ddp_barrier_non_consecutive_device_ids(barrier_mock, tmpdir):
"""Test correct usage of barriers when device ids do not start at 0 or are not consecutive."""
@ -146,7 +146,7 @@ def test_ddp_configure_ddp():
assert isinstance(trainer.model, LightningModule)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@pytest.mark.parametrize(
"trainer_fn", (TrainerFn.VALIDATING, TrainerFn.TUNING, TrainerFn.TESTING, TrainerFn.PREDICTING)
)

View File

@ -42,7 +42,7 @@ class TestDDPStrategy(DDPStrategy):
return super().teardown()
@RunIf(min_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
def test_ddp_fp16_compress_comm_hook(tmpdir):
"""Test for DDP FP16 compress hook."""
model = BoringModel()
@ -65,7 +65,7 @@ def test_ddp_fp16_compress_comm_hook(tmpdir):
assert trainer.state.finished, f"Training failed with {trainer.state}"
@RunIf(min_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
def test_ddp_sgd_comm_hook(tmpdir):
"""Test for DDP FP16 compress hook."""
model = BoringModel()
@ -89,7 +89,7 @@ def test_ddp_sgd_comm_hook(tmpdir):
assert trainer.state.finished, f"Training failed with {trainer.state}"
@RunIf(min_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir):
"""Test for DDP FP16 compress wrapper for SGD hook."""
model = BoringModel()
@ -114,7 +114,7 @@ def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir):
assert trainer.state.finished, f"Training failed with {trainer.state}"
@RunIf(min_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
def test_ddp_spawn_fp16_compress_comm_hook(tmpdir):
"""Test for DDP Spawn FP16 compress hook."""
model = BoringModel()
@ -134,7 +134,7 @@ def test_ddp_spawn_fp16_compress_comm_hook(tmpdir):
assert trainer.state.finished, f"Training failed with {trainer.state}"
@RunIf(min_gpus=2, min_torch="1.10.0", skip_windows=True, standalone=True)
@RunIf(min_cuda_gpus=2, min_torch="1.10.0", skip_windows=True, standalone=True)
def test_ddp_post_local_sgd_comm_hook(tmpdir):
"""Test for DDP post-localSGD hook."""
model = BoringModel()
@ -162,7 +162,7 @@ def test_ddp_post_local_sgd_comm_hook(tmpdir):
assert trainer.state.finished, f"Training failed with {trainer.state}"
@RunIf(skip_windows=True, min_torch="1.10.0", min_gpus=2, standalone=True)
@RunIf(skip_windows=True, min_torch="1.10.0", min_cuda_gpus=2, standalone=True)
@mock.patch("torch.distributed.algorithms.model_averaging.averagers.PeriodicModelAverager.average_parameters")
def test_post_local_sgd_model_averaging(average_parameters_mock, tmpdir):
"""Test that when using DDP with post-localSGD, model averaging is called."""
@ -207,7 +207,7 @@ def test_post_local_sgd_model_averaging(average_parameters_mock, tmpdir):
average_parameters_mock.assert_called()
@RunIf(skip_windows=True, min_torch="1.10.0", min_gpus=2, standalone=True)
@RunIf(skip_windows=True, min_torch="1.10.0", min_cuda_gpus=2, standalone=True)
@mock.patch("torch.distributed.algorithms.model_averaging.averagers.PeriodicModelAverager.average_parameters")
def test_post_local_sgd_model_averaging_value_error(average_parameters_mock, tmpdir):
"""Test that when using DDP with post-localSGD a ValueError is thrown when the optmizer is

View File

@ -108,7 +108,7 @@ def test_deepspeed_lightning_module(tmpdir):
assert model.dtype == torch.double
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_deepspeed_lightning_module_precision(tmpdir):
"""Test to ensure that a model wrapped in `LightningDeepSpeedModule` moves tensors to half when precision
16."""
@ -232,7 +232,7 @@ def test_deepspeed_defaults(tmpdir):
assert isinstance(strategy.config["zero_optimization"], dict)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_warn_deepspeed_ignored(tmpdir):
class TestModel(BoringModel):
def backward(self, loss: Tensor, optimizer: Optimizer, optimizer_idx: int, *args, **kwargs) -> None:
@ -257,7 +257,7 @@ def test_warn_deepspeed_ignored(tmpdir):
assert any("track_grad_norm=2.0)' but this is not supported" in w for w in warning_cache)
@RunIf(min_gpus=1, deepspeed=True)
@RunIf(min_cuda_gpus=1, deepspeed=True)
@pytest.mark.parametrize(
["dataset_cls", "value"],
[(RandomDataset, "auto"), (RandomDataset, 10), (RandomIterableDataset, "auto"), (RandomIterableDataset, 10)],
@ -298,7 +298,7 @@ def test_deepspeed_auto_batch_size_config_select(mock_deepspeed_distributed, moc
trainer.fit(model)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_run_configure_optimizers(tmpdir):
"""Test end to end that deepspeed works with defaults (without ZeRO as that requires compilation), whilst using
configure_optimizers for optimizers and schedulers."""
@ -336,7 +336,7 @@ def test_deepspeed_run_configure_optimizers(tmpdir):
_assert_save_model_is_equal(model, tmpdir, trainer)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_config(tmpdir, deepspeed_zero_config):
"""Test to ensure deepspeed works correctly when passed a DeepSpeed config object including
optimizers/schedulers and saves the model weights to load correctly."""
@ -375,7 +375,7 @@ def test_deepspeed_config(tmpdir, deepspeed_zero_config):
assert len(set(lr_monitor.lrs["lr-SGD"])) == 8
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_custom_precision_params(tmpdir):
"""Ensure if we modify the FP16 parameters via the DeepSpeedStrategy, the deepspeed config contains these
changes."""
@ -423,7 +423,7 @@ def test_deepspeed_custom_activation_checkpointing_params(tmpdir):
assert checkpoint_config["synchronize_checkpoint_boundary"]
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_custom_activation_checkpointing_params_forwarded(tmpdir):
"""Ensure if we modify the activation checkpointing parameters, we pass these to
deepspeed.checkpointing.configure correctly."""
@ -455,7 +455,7 @@ def test_deepspeed_custom_activation_checkpointing_params_forwarded(tmpdir):
)
@RunIf(min_gpus=1, deepspeed=True)
@RunIf(min_cuda_gpus=1, deepspeed=True)
def test_deepspeed_assert_config_zero_offload_disabled(tmpdir, deepspeed_zero_config):
"""Ensure if we use a config and turn off offload_optimizer, that this is set to False within the config."""
@ -481,7 +481,7 @@ def test_deepspeed_assert_config_zero_offload_disabled(tmpdir, deepspeed_zero_co
trainer.fit(model)
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_multigpu(tmpdir):
"""Test to ensure that DeepSpeed with multiple GPUs works and deepspeed distributed is initialized
correctly."""
@ -504,7 +504,7 @@ def test_deepspeed_multigpu(tmpdir):
_assert_save_model_is_equal(model, tmpdir, trainer)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_fp32_works(tmpdir):
model = BoringModel()
trainer = Trainer(
@ -519,7 +519,7 @@ def test_deepspeed_fp32_works(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_stage_3_save_warning(tmpdir):
"""Test to ensure that DeepSpeed Stage 3 gives a warning when saving on rank zero."""
model = BoringModel()
@ -546,7 +546,7 @@ def test_deepspeed_stage_3_save_warning(tmpdir):
trainer.save_checkpoint(checkpoint_path)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_single_file(tmpdir):
"""Test to ensure that DeepSpeed loads from a single file checkpoint."""
model = BoringModel()
@ -669,7 +669,7 @@ class ManualModelParallelClassificationModel(ModelParallelClassificationModel):
opt.step()
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_stage_3(tmpdir, deepspeed_config):
"""Test to ensure ZeRO Stage 3 works with a parallel model."""
model = ModelParallelBoringModel()
@ -689,7 +689,7 @@ def test_deepspeed_multigpu_stage_3(tmpdir, deepspeed_config):
_assert_save_model_is_equal(model, tmpdir, trainer)
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config):
"""Test to ensure ZeRO Stage 3 works with a parallel model."""
model = ModelParallelBoringModelManualOptim()
@ -711,7 +711,7 @@ def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config
@pytest.mark.parametrize(("accumulate_grad_batches", "automatic_optimization"), [(1, False), (2, True)])
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization, accumulate_grad_batches):
seed_everything(1)
if automatic_optimization:
@ -758,7 +758,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization
assert results[0]["test_acc"] > 0.7
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_stage_3_warns_resume_training(tmpdir):
"""Test to ensure with Stage 3 and multiple GPUs that we can resume from training, throwing a warning that the
optimizer state and scheduler states cannot be restored."""
@ -793,7 +793,7 @@ def test_deepspeed_multigpu_stage_3_warns_resume_training(tmpdir):
trainer.fit(model, datamodule=dm, ckpt_path=checkpoint_path)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_stage_3_resume_training(tmpdir):
"""Test to ensure with Stage 3 and single GPU that we can resume training."""
initial_model = ModelParallelClassificationModel()
@ -859,7 +859,7 @@ def test_deepspeed_multigpu_stage_3_resume_training(tmpdir):
@pytest.mark.parametrize("offload_optimizer", [False, True])
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, offload_optimizer):
"""Test to ensure with Stage 2 and multiple GPUs, accumulated grad batches works."""
seed_everything(42)
@ -898,7 +898,7 @@ def test_deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, offload_opt
assert verification_callback.on_train_batch_start_called
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_test(tmpdir):
"""Test to ensure we can use DeepSpeed with just test using ZeRO Stage 3."""
model = ModelParallelBoringModel()
@ -917,7 +917,7 @@ def test_deepspeed_multigpu_test(tmpdir):
# TODO(Sean): Once partial parameter partitioning is supported this test should be re-enabled
@pytest.mark.skip("Partial parameter partitioning for DeepSpeed is currently broken.")
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_partial_partition_parameters(tmpdir):
"""Test to ensure that a module that defines a layer inside the ``__init__`` and ``configure_sharded_model``
correctly converts all parameters to float16 when ``precision=16`` and runs successfully."""
@ -951,7 +951,7 @@ def test_deepspeed_multigpu_partial_partition_parameters(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_test_rnn(tmpdir):
"""Test to ensure that turning off explicit partitioning of the entire module for ZeRO Stage 3 works when
training with certain layers which will crash with explicit partitioning."""
@ -1025,7 +1025,7 @@ def _assert_save_model_is_equal(model, tmpdir, trainer):
assert torch.equal(orig_param, saved_model_param)
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_multigpu_no_schedulers(tmpdir):
"""Test to ensure ZeRO Stage 3 works with a parallel model and no schedulers."""
model = ModelParallelBoringModelNoSchedulers()
@ -1044,7 +1044,7 @@ def test_deepspeed_multigpu_no_schedulers(tmpdir):
_assert_save_model_is_equal(model, tmpdir, trainer)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_skip_backward_raises(tmpdir):
class TestModel(BoringModel):
def training_step(self, batch, batch_idx):
@ -1065,7 +1065,7 @@ def test_deepspeed_skip_backward_raises(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_setup_train_dataloader(tmpdir):
"""Test DeepSpeed works when setup is required to call in the DataModule."""
@ -1109,7 +1109,7 @@ def test_deepspeed_setup_train_dataloader(tmpdir):
@pytest.mark.parametrize("interval", ["step", "epoch"])
@pytest.mark.parametrize("max_epoch", [2])
@pytest.mark.parametrize("limit_train_batches", [2])
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_scheduler_step_count(mock_step, max_epoch, limit_train_batches, interval):
"""Test to ensure that the scheduler is called the correct amount of times during training when scheduler is
set to step or epoch."""
@ -1144,7 +1144,7 @@ def test_scheduler_step_count(mock_step, max_epoch, limit_train_batches, interva
assert mock_step.call_count == 1 + (max_epoch * limit_train_batches)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_configure_gradient_clipping(tmpdir):
"""Test to ensure that a warning is raised when `LightningModule.configure_gradient_clipping` is overridden in
case of deepspeed."""
@ -1168,7 +1168,7 @@ def test_deepspeed_configure_gradient_clipping(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_deepspeed_gradient_clip_by_value(tmpdir):
"""Test to ensure that an exception is raised when using `gradient_clip_algorithm='value'`."""
model = BoringModel()
@ -1185,7 +1185,7 @@ def test_deepspeed_gradient_clip_by_value(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
def test_different_accumulate_grad_batches_fails(tmpdir):
model = BoringModel()
trainer = Trainer(
@ -1203,7 +1203,7 @@ def test_different_accumulate_grad_batches_fails(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_specific_gpu_device_id(tmpdir):
class TestCallback(Callback):
def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
@ -1246,7 +1246,7 @@ def test_specific_gpu_device_id(tmpdir):
trainer.test(model)
@RunIf(min_gpus=2, min_torch="1.10.0", standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, min_torch="1.10.0", standalone=True, deepspeed=True)
def test_deepspeed_with_meta_device(tmpdir):
with init_meta_context():
model = BoringModel()
@ -1265,7 +1265,7 @@ def test_deepspeed_with_meta_device(tmpdir):
assert model.layer.weight.device.type == "cpu"
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_multi_save_same_filepath(tmpdir):
"""Test that verifies that deepspeed saves only latest checkpoint in the specified path and deletes the old
sharded checkpoints."""
@ -1300,7 +1300,7 @@ def test_deepspeed_multi_save_same_filepath(tmpdir):
assert expected == set(os.listdir(ckpt_path))
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
@pytest.mark.skipif(not _DEEPSPEED_GREATER_EQUAL_0_6, reason="requires deepspeed >= 0.6")
def test_deepspeed_with_bfloat16_precision(tmpdir):
"""Test that deepspeed works with bfloat16 precision."""

View File

@ -54,7 +54,7 @@ class CustomClassificationModelDP(ClassificationModel):
self.log("test_acc", self.test_acc(outputs["logits"], outputs["y"]))
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_multi_gpu_early_stop_dp(tmpdir):
"""Make sure DDP works.
@ -79,7 +79,7 @@ def test_multi_gpu_early_stop_dp(tmpdir):
tpipes.run_model_test(trainer_options, model, dm)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_multi_gpu_model_dp(tmpdir):
tutils.set_random_main_port()
@ -195,7 +195,7 @@ def test_dp_raise_exception_with_batch_transfer_hooks(mock_is_available, mock_de
trainer.fit(model)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_dp_training_step_dict(tmpdir):
"""This test verifies that dp properly reduces dictionaries."""
model = ReductionTestModel()
@ -214,7 +214,7 @@ def test_dp_training_step_dict(tmpdir):
trainer.test(model)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_dp_batch_not_moved_to_device_explicitly(tmpdir):
"""Test that with DP, batch is not moved to the device explicitly."""

View File

@ -17,7 +17,7 @@ if _FAIRSCALE_AVAILABLE:
@pytest.mark.parametrize("clip_val", [0, 10])
@RunIf(min_gpus=1, skip_windows=True, fairscale=True)
@RunIf(min_cuda_gpus=1, skip_windows=True, fairscale=True)
@mock.patch("fairscale.optim.oss.OSS.clip_grad_norm")
def test_ddp_sharded_precision_16_clip_gradients(mock_oss_clip_grad_norm, clip_val, tmpdir):
"""Ensure that clip gradients is only called if the value is greater than 0."""
@ -47,7 +47,7 @@ def test_sharded_ddp_choice(tmpdir, strategy, expected):
assert isinstance(trainer.strategy, expected)
@RunIf(min_gpus=1, fairscale=True)
@RunIf(min_cuda_gpus=1, fairscale=True)
@pytest.mark.parametrize(
"strategy,expected", [("ddp_sharded", DDPShardedStrategy), ("ddp_sharded_spawn", DDPSpawnShardedStrategy)]
)
@ -74,7 +74,7 @@ def test_ddp_sharded_strategy_checkpoint_cpu(tmpdir):
assert torch.equal(ddp_param.to("cpu"), shard_param)
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
@RunIf(min_cuda_gpus=2, skip_windows=True, fairscale=True)
def test_ddp_sharded_strategy_checkpoint_multi_gpu(tmpdir):
"""Test to ensure that checkpoint is saved correctly when using multiple GPUs."""
model = BoringModel()
@ -91,7 +91,7 @@ def test_ddp_sharded_strategy_checkpoint_multi_gpu(tmpdir):
assert torch.equal(ddp_param.to("cpu"), shard_param)
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
@RunIf(min_cuda_gpus=2, skip_windows=True, fairscale=True)
def test_ddp_sharded_strategy_finetune(tmpdir):
"""Test to ensure that we can save and restart training (simulate fine-tuning)"""
model = BoringModel()
@ -126,7 +126,7 @@ def test_ddp_sharded_strategy_fit_ckpt_path(tmpdir):
@pytest.mark.skip(reason="Not a critical test, skip till drone CI performance improves.") # todo
@pytest.mark.skip(reason="Currently unsupported restarting training on different number of devices.")
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
@RunIf(min_cuda_gpus=2, skip_windows=True, fairscale=True)
def test_ddp_sharded_strategy_fit_ckpt_path_downsize_gpus(tmpdir):
"""Test to ensure that resuming from checkpoint works when downsizing number of GPUS."""
model = BoringModel()
@ -144,7 +144,7 @@ def test_ddp_sharded_strategy_fit_ckpt_path_downsize_gpus(tmpdir):
trainer.fit(model, ckpt_path=checkpoint_path)
@RunIf(min_gpus=1, skip_windows=True, fairscale=True)
@RunIf(min_cuda_gpus=1, skip_windows=True, fairscale=True)
def test_ddp_sharded_strategy_fit_ckpt_path_gpu_to_cpu(tmpdir):
"""Test to ensure that resuming from checkpoint works when going from GPUs- > CPU."""
model = BoringModel()
@ -165,7 +165,10 @@ def test_ddp_sharded_strategy_fit_ckpt_path_gpu_to_cpu(tmpdir):
@RunIf(skip_windows=True, standalone=True, fairscale=True)
@pytest.mark.parametrize(
"trainer_kwargs",
(dict(accelerator="cpu", devices=2), pytest.param(dict(accelerator="gpu", devices=2), marks=RunIf(min_gpus=2))),
(
dict(accelerator="cpu", devices=2),
pytest.param(dict(accelerator="gpu", devices=2), marks=RunIf(min_cuda_gpus=2)),
),
)
def test_ddp_sharded_strategy_test_multigpu(tmpdir, trainer_kwargs):
"""Test to ensure we can use validate and test without fit."""
@ -197,7 +200,7 @@ class ManualBoringModel(BoringModel):
return {"loss": loss}
@RunIf(min_gpus=2, skip_windows=True, standalone=True, fairscale=True)
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, fairscale=True)
def test_ddp_sharded_strategy_manual_optimization_spawn(tmpdir):
# todo (sean): this test has been split out as running both tests using parametrize causes "Address in use"
model = ManualBoringModel()
@ -213,7 +216,7 @@ def test_ddp_sharded_strategy_manual_optimization_spawn(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=2, skip_windows=True, standalone=True, fairscale=True)
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, fairscale=True)
def test_ddp_sharded_strategy_manual_optimization(tmpdir):
model = ManualBoringModel()
trainer = Trainer(

View File

@ -36,7 +36,7 @@ class BoringModelGPU(BoringModel):
self.start_cuda_memory = torch.cuda.memory_allocated()
@RunIf(min_gpus=1, skip_windows=True)
@RunIf(min_cuda_gpus=1, skip_windows=True)
def test_single_gpu():
"""Tests if device is set correctly when training and after teardown for single GPU strategy."""
trainer = Trainer(accelerator="gpu", devices=1, fast_dev_run=True)

View File

@ -79,7 +79,7 @@ def test_all_rank_logging_ddp_cpu(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_all_rank_logging_ddp_spawn(tmpdir):
"""Check that all ranks can be logged from."""
model = TestModel()

View File

@ -691,7 +691,7 @@ def test_multiple_dataloaders_reset(val_check_interval, tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_evaluation_move_metrics_to_cpu_and_outputs(tmpdir):
class TestModel(BoringModel):
def validation_step(self, *args):

View File

@ -293,7 +293,7 @@ def test_fx_validator_integration(tmpdir):
trainer.predict(model)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_epoch_results_cache_dp(tmpdir):
root_device = torch.device("cuda", 0)

View File

@ -395,7 +395,7 @@ class LoggingSyncDistModel(BoringModel):
return super().validation_step(batch, batch_idx)
@pytest.mark.parametrize("devices", [1, pytest.param(2, marks=RunIf(min_gpus=2, skip_windows=True))])
@pytest.mark.parametrize("devices", [1, pytest.param(2, marks=RunIf(min_cuda_gpus=2, skip_windows=True))])
def test_logging_sync_dist_true(tmpdir, devices):
"""Tests to ensure that the sync_dist flag works (should just return the original value)"""
fake_result = 1
@ -433,7 +433,7 @@ def test_logging_sync_dist_true(tmpdir, devices):
assert metrics["bar_3"] == 2 + int(use_multiple_devices)
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
def test_logging_sync_dist_true_ddp(tmpdir):
"""Tests to ensure that the sync_dist flag works with ddp."""
@ -553,7 +553,7 @@ def test_logging_in_callbacks_with_log_function(tmpdir):
assert trainer.callback_metrics == expected
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_metric_are_properly_reduced(tmpdir):
class TestingModel(BoringModel):
def __init__(self, *args, **kwargs) -> None:
@ -691,7 +691,7 @@ def test_sanity_metrics_are_reset(tmpdir):
assert "val_loss" not in trainer.progress_bar_metrics
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_move_metrics_to_cpu(tmpdir):
class TestModel(BoringModel):
def on_before_backward(self, loss: torch.Tensor) -> None:

View File

@ -68,11 +68,11 @@ class ManualOptModel(BoringModel):
[
{},
pytest.param(
{"accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "native"}, marks=RunIf(min_gpus=1)
{"accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "native"}, marks=RunIf(min_cuda_gpus=1)
),
pytest.param(
{"accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "apex", "amp_level": "O2"},
marks=RunIf(min_gpus=1, amp_apex=True),
marks=RunIf(min_cuda_gpus=1, amp_apex=True),
),
],
)
@ -198,7 +198,7 @@ def test_multiple_optimizers_manual_log(tmpdir):
assert set(trainer.logged_metrics) == {"a_step", "a_epoch"}
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_multiple_optimizers_manual_native_amp(tmpdir):
model = ManualOptModel()
model.val_dataloader = None
@ -282,7 +282,7 @@ class ManualOptimizationExtendedModel(BoringModel):
assert self.called["on_train_batch_end"] == 10
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_manual_optimization_and_return_tensor(tmpdir):
"""This test verify that in `manual_optimization` we don't add gradient when the user return loss in
`training_step`"""
@ -306,7 +306,7 @@ def test_manual_optimization_and_return_tensor(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_manual_optimization_and_accumulated_gradient(tmpdir):
"""This test verify that in `automatic_optimization=False`, step is being called only when we shouldn't
accumulate."""
@ -394,7 +394,7 @@ def test_manual_optimization_and_accumulated_gradient(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_multiple_optimizers_step(tmpdir):
"""Tests that `step` works with several optimizers."""
@ -783,14 +783,14 @@ def train_manual_optimization(tmpdir, strategy, model_cls=TesManualOptimizationD
assert not torch.equal(param.cpu().data, param_copy.data)
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
def test_step_with_optimizer_closure_with_different_frequencies_ddp(tmpdir):
"""Tests that `step` works with optimizer_closure and different accumulated_gradient frequency."""
train_manual_optimization(tmpdir, "ddp")
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_step_with_optimizer_closure_with_different_frequencies_ddp_spawn(tmpdir):
"""Tests that `step` works with optimizer_closure and different accumulated_gradient frequency."""
@ -853,7 +853,7 @@ class TestManualOptimizationDDPModelToggleModel(TesManualOptimizationDDPModel):
opt_dis.zero_grad()
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
def test_step_with_optimizer_closure_with_different_frequencies_ddp_with_toggle_model(tmpdir):
train_manual_optimization(
tmpdir, "ddp_find_unused_parameters_false", model_cls=TestManualOptimizationDDPModelToggleModel
@ -966,7 +966,7 @@ def test_lr_scheduler_step_not_called(tmpdir):
assert lr_step.call_count == 1
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@pytest.mark.parametrize("precision", [16, 32])
def test_multiple_optimizers_logging(precision, tmpdir):
"""Tests that metrics are properly being logged."""

View File

@ -564,7 +564,7 @@ def test_warn_invalid_scheduler_key_in_manual_optimization(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
def test_optimizer_state_on_device(tmpdir):
"""Test that optimizers that create state initially at instantiation still end up with the state on the GPU."""

View File

@ -23,7 +23,7 @@ from pytorch_lightning.utilities.exceptions import MisconfigurationException
from tests.helpers.runif import RunIf
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
@pytest.mark.parametrize(
["nb", "expected_gpu_idxs", "expected_error"],
[(0, [], MisconfigurationException), (-1, list(range(torch.cuda.device_count())), None), (1, [0], None)],

View File

@ -56,7 +56,7 @@ def test_get_model_ddp_cpu(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_get_model_gpu(tmpdir):
"""Tests that `trainer.lightning_module` extracts the model correctly when using GPU."""

View File

@ -685,7 +685,7 @@ class MultiProcessModel(BoringModel):
assert len(torch.unique(all_batches, dim=0)) == num_samples
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_auto_add_worker_init_fn_distributed(tmpdir, monkeypatch):
"""Test that the lightning worker_init_fn takes care of dataloaders in multi-gpu/multi-node training."""
dataset = NumpyRandomDataset()
@ -810,7 +810,7 @@ class DistribSamplerCallback(Callback):
assert test_sampler.seed == self.expected_seed[2]
@RunIf(min_gpus=2, skip_windows=True)
@RunIf(min_cuda_gpus=2, skip_windows=True)
def test_dataloader_distributed_sampler(tmpdir):
"""Test DistributedSampler and it's arguments for DDP backend."""
seed_everything(123)
@ -835,7 +835,7 @@ class ModelWithDataLoaderDistributedSampler(BoringModel):
return DataLoader(dataloader.dataset, batch_size=32, drop_last=False, sampler=dist_sampler, shuffle=False)
@RunIf(min_gpus=2, skip_windows=True)
@RunIf(min_cuda_gpus=2, skip_windows=True)
def test_dataloader_distributed_sampler_already_attached(tmpdir):
"""Test DistributedSampler and it's arguments for DDP backend when DistSampler already included on
dataloader."""
@ -855,7 +855,7 @@ def test_dataloader_distributed_sampler_already_attached(tmpdir):
assert trainer.state.finished, "DDP Training failed"
@RunIf(min_gpus=3)
@RunIf(min_cuda_gpus=3)
def test_batch_size_smaller_than_num_gpus(tmpdir):
# we need at least 3 gpus for this test
num_gpus = 3

View File

@ -382,7 +382,7 @@ def test_combined_data_loader_validation_test(
apply_to_collection(dataloader.loaders, DataLoader, _assert_dataset)
@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_gpus=2))])
@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_cuda_gpus=2))])
@pytest.mark.parametrize("replace_sampler_ddp", [False, True])
def test_combined_data_loader_with_max_size_cycle_and_ddp(accelerator, replace_sampler_ddp):
"""This test makes sure distributed sampler has been properly injected in dataloaders when using CombinedLoader

View File

@ -1040,7 +1040,7 @@ def test_on_exception_hook(tmpdir):
assert isinstance(handle_interrupt_callback.exception, MisconfigurationException)
@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_gpus=1))])
@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_cuda_gpus=1))])
def test_gradient_clipping_by_norm(tmpdir, precision):
"""Test gradient clipping by norm."""
tutils.reset_seed()
@ -1070,7 +1070,7 @@ def test_gradient_clipping_by_norm(tmpdir, precision):
assert model.assertion_called
@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_gpus=1))])
@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_cuda_gpus=1))])
def test_gradient_clipping_by_value(tmpdir, precision):
"""Test gradient clipping by value."""
tutils.reset_seed()
@ -1111,7 +1111,7 @@ def test_invalid_gradient_clip_algo(tmpdir):
Trainer(default_root_dir=tmpdir, gradient_clip_algorithm="norm2")
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_gpu_choice():
num_gpus = torch.cuda.device_count()
Trainer(accelerator="gpu", devices=num_gpus, auto_select_gpus=True)
@ -1422,7 +1422,7 @@ def test_trainer_predict_cpu(tmpdir, datamodule, enable_progress_bar):
predict(tmpdir, datamodule=datamodule, enable_progress_bar=enable_progress_bar)
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
@pytest.mark.parametrize(
"kwargs",
[
@ -1435,13 +1435,13 @@ def test_trainer_predict_standalone(tmpdir, kwargs):
predict(tmpdir, accelerator="gpu", **kwargs)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_trainer_predict_1_gpu(tmpdir):
predict(tmpdir, accelerator="gpu", devices=1)
@RunIf(skip_windows=True)
@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_gpus=2))])
@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_cuda_gpus=2))])
def test_trainer_predict_ddp_spawn(tmpdir, accelerator):
predict(tmpdir, strategy="ddp_spawn", accelerator=accelerator, devices=2)
@ -1524,7 +1524,7 @@ def test_trainer_access_in_configure_optimizers(tmpdir):
trainer.fit(model, train_data)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_setup_hook_move_to_device_correctly(tmpdir):
"""Verify that if a user defines a layer in the setup hook function, this is moved to the correct device."""
@ -1747,7 +1747,7 @@ class CustomException(Exception):
pass
@RunIf(min_gpus=2, standalone=True)
@RunIf(min_cuda_gpus=2, standalone=True)
def test_ddp_terminate_when_deadlock_is_detected(tmpdir):
"""Test that DDP kills the remaining processes when only one rank is throwing an exception."""
@ -1778,7 +1778,7 @@ def test_ddp_terminate_when_deadlock_is_detected(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_multiple_trainer_constant_memory_allocated(tmpdir):
"""This tests ensures calling the trainer several times reset the memory back to 0."""

View File

@ -123,7 +123,7 @@ def test_trainer_reset_correctly(tmpdir):
assert actual == expected
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@pytest.mark.parametrize("scale_arg", ["power", "binsearch", True])
def test_auto_scale_batch_size_trainer_arg(tmpdir, scale_arg):
"""Test possible values for 'batch size auto scaling' Trainer argument."""
@ -140,7 +140,7 @@ def test_auto_scale_batch_size_trainer_arg(tmpdir, scale_arg):
assert not os.path.exists(tmpdir / "scale_batch_size_temp_model.ckpt")
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@pytest.mark.parametrize("use_hparams", [True, False])
def test_auto_scale_batch_size_set_model_attribute(tmpdir, use_hparams):
"""Test that new batch size gets written to the correct hyperparameter attribute."""
@ -240,7 +240,7 @@ def test_error_on_dataloader_passed_to_fit(tmpdir):
trainer.tune(model, **fit_options)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_auto_scale_batch_size_with_amp(tmpdir):
before_batch_size = 2
model = BatchSizeModel(batch_size=before_batch_size)

View File

@ -60,7 +60,7 @@ def test_all_gather_ddp_spawn():
torch.multiprocessing.spawn(_test_all_gather_ddp, args=(world_size,), nprocs=world_size)
@RunIf(min_gpus=2, skip_windows=True, standalone=True)
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
def test_all_gather_collection(tmpdir):
class TestModel(BoringModel):
@ -114,7 +114,7 @@ def test_all_gather_collection(tmpdir):
assert model.training_epoch_end_called
@RunIf(min_gpus=2, skip_windows=True, standalone=True)
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
def test_all_gather_sync_grads(tmpdir):
class TestModel(BoringModel):

View File

@ -23,7 +23,7 @@ from tests.helpers.torchtext_utils import get_dummy_torchtext_data_iterator
@pytest.mark.parametrize("include_lengths", [False, True])
@pytest.mark.parametrize("device", [torch.device("cuda", 0)])
@pytest.mark.skipif(not _TORCHTEXT_LEGACY, reason="torchtext.legacy is deprecated.")
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_batch_move_data_to_device_torchtext_include_lengths(include_lengths, device):
data_iterator, _ = get_dummy_torchtext_data_iterator(num_samples=3, batch_size=3, include_lengths=include_lengths)
data_iter = iter(data_iterator)

View File

@ -22,7 +22,7 @@ from tests.helpers.boring_model import BoringModel
from tests.helpers.runif import RunIf
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
def test_deepspeed_collate_checkpoint(tmpdir):
"""Test to ensure that with DeepSpeed Stage 3 we can collate the sharded checkpoints into a single file."""
model = BoringModel()

View File

@ -34,7 +34,7 @@ def _test_collect_states(rank, world_size):
assert collected_state == {1: {"something": torch.tensor([1])}, 0: {"something": torch.tensor([0])}}
@RunIf(min_gpus=2, min_torch="1.10", skip_windows=True)
@RunIf(min_cuda_gpus=2, min_torch="1.10", skip_windows=True)
def test_collect_states():
"""This test ensures state are properly collected across processes.

View File

@ -48,7 +48,7 @@ class DeviceAssertCallback(Callback):
@pytest.mark.parametrize("dst_dtype", [torch.float, torch.double, torch.half])
@pytest.mark.parametrize("dst_device", [torch.device("cpu"), torch.device("cuda", 0)])
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_submodules_device_and_dtype(dst_device, dst_dtype):
"""Test that the device and dtype property updates propagate through mixed nesting of regular nn.Modules and
the special modules of type DeviceDtypeModuleMixin (e.g. Metric or LightningModule)."""
@ -64,7 +64,7 @@ def test_submodules_device_and_dtype(dst_device, dst_dtype):
assert model.dtype == model.module.module.dtype == dst_dtype
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_submodules_multi_gpu_dp(tmpdir):
model = TopModule()
trainer = Trainer(
@ -78,7 +78,7 @@ def test_submodules_multi_gpu_dp(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=2)
@RunIf(min_cuda_gpus=2)
def test_submodules_multi_gpu_ddp_spawn(tmpdir):
model = TopModule()
trainer = Trainer(
@ -100,7 +100,7 @@ def test_submodules_multi_gpu_ddp_spawn(tmpdir):
torch.device("cuda", 0),
],
)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_gpu_cuda_device(device):
model = TopModule()

View File

@ -194,7 +194,7 @@ class RecommenderModel(BoringModel):
@pytest.mark.flaky(reruns=3)
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_trainer_num_prefetch_batches(tmpdir):
model = RecommenderModel()

View File

@ -162,7 +162,7 @@ def test_empty_model_summary_shapes(max_depth):
assert summary.param_nums == []
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
@pytest.mark.parametrize("max_depth", [-1, 1])
@pytest.mark.parametrize("device", [torch.device("cpu"), torch.device("cuda", 0)])
def test_linear_model_summary_shapes(device, max_depth):
@ -295,7 +295,7 @@ def test_empty_model_size(max_depth):
assert 0.0 == summary.model_size
@RunIf(min_gpus=1)
@RunIf(min_cuda_gpus=1)
def test_model_size_precision(tmpdir):
"""Test model size for half and full precision."""
model = PreCalculatedModel()