Rename min_gpus to min_cuda_gpus (#13133)
* rename min_gpus to min_cuda_gpus
This commit is contained in:
parent
880224d4e1
commit
fbd887df9d
|
@ -291,7 +291,7 @@ def test_accelererator_invalid_type_devices(mock_is_available, mock_device_count
|
|||
_ = Trainer(accelerator="gpu", devices=device_count)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_accelerator_gpu():
|
||||
trainer = Trainer(accelerator="gpu", devices=1)
|
||||
assert isinstance(trainer.accelerator, GPUAccelerator)
|
||||
|
@ -311,7 +311,7 @@ def test_accelerator_cpu_with_devices(devices, plugin):
|
|||
assert isinstance(trainer.accelerator, CPUAccelerator)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize(
|
||||
["devices", "plugin"], [(1, SingleDeviceStrategy), ([1], SingleDeviceStrategy), (2, DDPSpawnStrategy)]
|
||||
)
|
||||
|
@ -322,7 +322,7 @@ def test_accelerator_gpu_with_devices(devices, plugin):
|
|||
assert isinstance(trainer.accelerator, GPUAccelerator)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_accelerator_auto_with_devices_gpu():
|
||||
trainer = Trainer(accelerator="auto", devices=1)
|
||||
assert isinstance(trainer.accelerator, GPUAccelerator)
|
||||
|
@ -374,7 +374,7 @@ def test_strategy_choice_cpu_plugin(tmpdir, plugin):
|
|||
assert isinstance(trainer.strategy, plugin)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize(
|
||||
["strategy", "plugin"],
|
||||
[
|
||||
|
@ -394,14 +394,14 @@ def test_strategy_choice_gpu_str(tmpdir, strategy, plugin):
|
|||
assert isinstance(trainer.strategy, plugin)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize("plugin", [DDPSpawnStrategy, DDPStrategy])
|
||||
def test_strategy_choice_gpu_plugin(tmpdir, plugin):
|
||||
trainer = Trainer(strategy=plugin(), accelerator="gpu", devices=2)
|
||||
assert isinstance(trainer.strategy, plugin)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize("plugin", [DDPSpawnStrategy, DDPStrategy])
|
||||
def test_device_type_when_training_plugin_gpu_passed(tmpdir, plugin):
|
||||
|
||||
|
@ -449,7 +449,7 @@ def test_strategy_choice_ddp_spawn(cuda_available_mock, device_count_mock):
|
|||
assert isinstance(trainer.strategy.cluster_environment, LightningEnvironment)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@mock.patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
|
|
|
@ -23,7 +23,7 @@ from tests.helpers import BoringModel
|
|||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_get_torch_gpu_stats(tmpdir):
|
||||
current_device = torch.device(f"cuda:{torch.cuda.current_device()}")
|
||||
gpu_stats = GPUAccelerator().get_device_stats(current_device)
|
||||
|
@ -33,7 +33,7 @@ def test_get_torch_gpu_stats(tmpdir):
|
|||
assert any(f in h for h in gpu_stats.keys())
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_get_nvidia_gpu_stats(tmpdir):
|
||||
current_device = torch.device(f"cuda:{torch.cuda.current_device()}")
|
||||
gpu_stats = get_nvidia_gpu_stats(current_device)
|
||||
|
@ -43,7 +43,7 @@ def test_get_nvidia_gpu_stats(tmpdir):
|
|||
assert any(f in h for h in gpu_stats.keys())
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@mock.patch("torch.cuda.set_device")
|
||||
def test_set_cuda_device(set_device_mock, tmpdir):
|
||||
model = BoringModel()
|
||||
|
@ -60,12 +60,12 @@ def test_set_cuda_device(set_device_mock, tmpdir):
|
|||
set_device_mock.assert_called_once()
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_gpu_availability():
|
||||
assert GPUAccelerator.is_available()
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_warning_if_gpus_not_used():
|
||||
with pytest.warns(UserWarning, match="GPU available but not used. Set `accelerator` and `devices`"):
|
||||
Trainer()
|
||||
|
|
|
@ -184,25 +184,25 @@ def plugin_parity_test(
|
|||
@pytest.mark.parametrize(
|
||||
"kwargs",
|
||||
[
|
||||
pytest.param(dict(gpus=1, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=1)),
|
||||
pytest.param(dict(gpus=1, model_cls=SeedTrainLoaderModel), marks=RunIf(min_cuda_gpus=1)),
|
||||
pytest.param(
|
||||
dict(gpus=1, precision=16, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=1, amp_native=True)
|
||||
dict(gpus=1, precision=16, model_cls=SeedTrainLoaderModel), marks=RunIf(min_cuda_gpus=1, amp_native=True)
|
||||
),
|
||||
pytest.param(dict(gpus=2, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=2)),
|
||||
pytest.param(dict(gpus=2, model_cls=SeedTrainLoaderModel), marks=RunIf(min_cuda_gpus=2)),
|
||||
pytest.param(
|
||||
dict(gpus=2, precision=16, model_cls=SeedTrainLoaderModel), marks=RunIf(min_gpus=2, amp_native=True)
|
||||
dict(gpus=2, precision=16, model_cls=SeedTrainLoaderModel), marks=RunIf(min_cuda_gpus=2, amp_native=True)
|
||||
),
|
||||
pytest.param(
|
||||
dict(gpus=2, model_cls=SeedTrainLoaderMultipleOptimizersModel),
|
||||
marks=[
|
||||
RunIf(min_gpus=2),
|
||||
RunIf(min_cuda_gpus=2),
|
||||
pytest.mark.skip(reason="TODO: Current issue with multiple optimizers and FairScale."),
|
||||
],
|
||||
),
|
||||
pytest.param(
|
||||
dict(gpus=2, model_cls=SeedTrainLoaderManualModel),
|
||||
marks=[
|
||||
RunIf(min_gpus=2),
|
||||
RunIf(min_cuda_gpus=2),
|
||||
pytest.mark.skip(reason="TODO: Current issue with multiple optimizers and FairScale."),
|
||||
],
|
||||
),
|
||||
|
|
|
@ -50,7 +50,7 @@ class SyncBNModule(LightningModule):
|
|||
return DataLoader(dataset, sampler=sampler, batch_size=self.batch_size)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
def test_sync_batchnorm_parity(tmpdir):
|
||||
"""Test parity between 1) Training a synced batch-norm layer on 2 GPUs with batch size B per device 2) Training
|
||||
a batch-norm layer on CPU with twice the batch size."""
|
||||
|
|
|
@ -29,7 +29,7 @@ from tests.helpers import BoringModel
|
|||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_device_stats_gpu_from_torch(tmpdir):
|
||||
"""Test GPU stats are logged using a logger."""
|
||||
model = BoringModel()
|
||||
|
|
|
@ -160,7 +160,7 @@ def test_pruning_callback(
|
|||
)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
@pytest.mark.parametrize("parameters_to_prune", (False, True))
|
||||
@pytest.mark.parametrize("use_global_unstructured", (False, True))
|
||||
def test_pruning_callback_ddp(tmpdir, parameters_to_prune, use_global_unstructured):
|
||||
|
@ -174,7 +174,7 @@ def test_pruning_callback_ddp(tmpdir, parameters_to_prune, use_global_unstructur
|
|||
)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True)
|
||||
def test_pruning_callback_ddp_spawn(tmpdir):
|
||||
train_with_pruning_callback(
|
||||
tmpdir, use_global_unstructured=True, strategy="ddp_spawn", accelerator="gpu", devices=2
|
||||
|
|
|
@ -146,12 +146,12 @@ def train_with_swa(
|
|||
assert trainer.lightning_module == model
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
def test_swa_callback_ddp(tmpdir):
|
||||
train_with_swa(tmpdir, strategy="ddp", accelerator="gpu", devices=2)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_swa_callback_ddp_spawn(tmpdir):
|
||||
train_with_swa(tmpdir, strategy="ddp_spawn", accelerator="gpu", devices=2)
|
||||
|
||||
|
@ -161,7 +161,7 @@ def test_swa_callback_ddp_cpu(tmpdir):
|
|||
train_with_swa(tmpdir, strategy="ddp_spawn", accelerator="cpu", devices=2)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_swa_callback_1_gpu(tmpdir):
|
||||
train_with_swa(tmpdir, accelerator="gpu", devices=1)
|
||||
|
||||
|
|
|
@ -592,7 +592,7 @@ def test_progress_bar_max_val_check_interval(
|
|||
assert pbar_callback.is_enabled
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
@pytest.mark.parametrize("val_check_interval", [0.2, 0.5])
|
||||
def test_progress_bar_max_val_check_interval_ddp(tmpdir, val_check_interval):
|
||||
world_size = 2
|
||||
|
|
|
@ -87,7 +87,7 @@ def test_top_k(save_mock, tmpdir, k: int, epochs: int, val_check_interval: float
|
|||
|
||||
|
||||
@mock.patch("torch.save")
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
@pytest.mark.parametrize(["k", "epochs", "val_check_interval", "expected"], [(1, 1, 1.0, 1), (2, 2, 0.3, 4)])
|
||||
def test_top_k_ddp(save_mock, tmpdir, k, epochs, val_check_interval, expected):
|
||||
class TestModel(BoringModel):
|
||||
|
|
|
@ -50,7 +50,7 @@ def test_model_torch_save_ddp_cpu(tmpdir):
|
|||
torch.save(trainer, temp_path)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_model_torch_save_ddp_cuda(tmpdir):
|
||||
"""Test to ensure torch save does not fail for model and trainer using gpu ddp."""
|
||||
model = BoringModel()
|
||||
|
|
|
@ -265,7 +265,7 @@ def test_full_loop(tmpdir):
|
|||
assert result[0]["test_acc"] > 0.6
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@mock.patch(
|
||||
"pytorch_lightning.strategies.Strategy.lightning_module",
|
||||
new_callable=PropertyMock,
|
||||
|
|
|
@ -276,7 +276,7 @@ def test_toggle_untoggle_3_optimizers_shared_parameters(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_device_placement(tmpdir):
|
||||
|
||||
model = BoringModel()
|
||||
|
|
|
@ -99,7 +99,7 @@ def _ddp_test_fn(rank, worldsize):
|
|||
assert epoch_log == {"b": cumulative_sum * worldsize, "a_epoch": cumulative_sum * worldsize}
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True)
|
||||
def test_result_reduce_ddp():
|
||||
"""Make sure result logging works with DDP."""
|
||||
tutils.set_random_main_port()
|
||||
|
@ -298,7 +298,7 @@ def test_result_collection_restoration(tmpdir):
|
|||
batch_idx = None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("device", ("cpu", pytest.param("cuda", marks=RunIf(min_gpus=1))))
|
||||
@pytest.mark.parametrize("device", ("cpu", pytest.param("cuda", marks=RunIf(min_cuda_gpus=1))))
|
||||
def test_lightning_module_logging_result_collection(tmpdir, device):
|
||||
class LoggingModel(BoringModel):
|
||||
def __init__(self):
|
||||
|
@ -474,13 +474,13 @@ def test_result_collection_reload(tmpdir):
|
|||
result_collection_reload(default_root_dir=tmpdir)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
def test_result_collection_reload_1_gpu_ddp(tmpdir):
|
||||
result_collection_reload(default_root_dir=tmpdir, strategy="ddp", accelerator="gpu")
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
def test_result_collection_reload_2_gpus(tmpdir):
|
||||
result_collection_reload(default_root_dir=tmpdir, strategy="ddp", accelerator="gpu", devices=2)
|
||||
|
|
|
@ -64,7 +64,7 @@ class RunIf:
|
|||
def __new__(
|
||||
self,
|
||||
*args,
|
||||
min_gpus: int = 0,
|
||||
min_cuda_gpus: int = 0,
|
||||
min_torch: Optional[str] = None,
|
||||
max_torch: Optional[str] = None,
|
||||
min_python: Optional[str] = None,
|
||||
|
@ -93,7 +93,7 @@ class RunIf:
|
|||
"""
|
||||
Args:
|
||||
*args: Any :class:`pytest.mark.skipif` arguments.
|
||||
min_gpus: Require this number of gpus.
|
||||
min_cuda_gpus: Require this number of gpus.
|
||||
min_torch: Require that PyTorch is greater or equal than this version.
|
||||
max_torch: Require that PyTorch is less than this version.
|
||||
min_python: Require that Python is greater or equal than this version.
|
||||
|
@ -122,9 +122,9 @@ class RunIf:
|
|||
conditions = []
|
||||
reasons = []
|
||||
|
||||
if min_gpus:
|
||||
conditions.append(torch.cuda.device_count() < min_gpus)
|
||||
reasons.append(f"GPUs>={min_gpus}")
|
||||
if min_cuda_gpus:
|
||||
conditions.append(torch.cuda.device_count() < min_cuda_gpus)
|
||||
reasons.append(f"GPUs>={min_cuda_gpus}")
|
||||
|
||||
if min_torch:
|
||||
torch_version = get_distribution("torch").version
|
||||
|
|
|
@ -313,7 +313,7 @@ def test_setup_dataloaders_replace_standard_sampler(shuffle, strategy):
|
|||
"accelerator, expected",
|
||||
[
|
||||
("cpu", torch.device("cpu")),
|
||||
pytest.param("gpu", torch.device("cuda", 0), marks=RunIf(min_gpus=1)),
|
||||
pytest.param("gpu", torch.device("cuda", 0), marks=RunIf(min_cuda_gpus=1)),
|
||||
pytest.param("tpu", torch.device("xla", 0), marks=RunIf(tpu=True)),
|
||||
],
|
||||
)
|
||||
|
@ -393,7 +393,7 @@ def test_autocast():
|
|||
lite._precision_plugin.forward_context().__exit__.assert_called()
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multiple_models():
|
||||
class Lite(LightningLite):
|
||||
def run(self):
|
||||
|
|
|
@ -109,9 +109,9 @@ def precision_context(precision, accelerator) -> Generator[None, None, None]:
|
|||
"precision, strategy, devices, accelerator",
|
||||
[
|
||||
pytest.param(32, None, 1, "cpu"),
|
||||
pytest.param(32, None, 1, "gpu", marks=RunIf(min_gpus=1)),
|
||||
pytest.param(16, None, 1, "gpu", marks=RunIf(min_gpus=1)),
|
||||
pytest.param("bf16", None, 1, "gpu", marks=RunIf(min_gpus=1, min_torch="1.10", bf16_cuda=True)),
|
||||
pytest.param(32, None, 1, "gpu", marks=RunIf(min_cuda_gpus=1)),
|
||||
pytest.param(16, None, 1, "gpu", marks=RunIf(min_cuda_gpus=1)),
|
||||
pytest.param("bf16", None, 1, "gpu", marks=RunIf(min_cuda_gpus=1, min_torch="1.10", bf16_cuda=True)),
|
||||
],
|
||||
)
|
||||
def test_boring_lite_model_single_device(precision, strategy, devices, accelerator, tmpdir):
|
||||
|
@ -159,7 +159,7 @@ def run(rank, model, train_dataloader, num_epochs, precision, accelerator, tmpdi
|
|||
|
||||
|
||||
@pytest.mark.skipif(True, reason="Skipping as it takes 80 seconds.")
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize(
|
||||
"precision, strategy, devices, accelerator",
|
||||
[
|
||||
|
@ -190,7 +190,7 @@ def test_boring_lite_model_ddp_spawn(precision, strategy, devices, accelerator,
|
|||
assert torch.equal(w_pure.cpu(), w_lite.cpu())
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
@pytest.mark.parametrize(
|
||||
"precision, strategy, devices, accelerator",
|
||||
[
|
||||
|
|
|
@ -69,7 +69,7 @@ def test_lite_module_attribute_lookup():
|
|||
_ = lite_module.not_exists
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@pytest.mark.parametrize(
|
||||
"precision, input_type, expected_type",
|
||||
[
|
||||
|
@ -102,7 +102,7 @@ def test_lite_module_forward_conversion(precision, input_type, expected_type):
|
|||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"device", [torch.device("cpu"), pytest.param(torch.device("cuda", 0), marks=RunIf(min_gpus=1))]
|
||||
"device", [torch.device("cpu"), pytest.param(torch.device("cuda", 0), marks=RunIf(min_cuda_gpus=1))]
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", [torch.float32, torch.float16])
|
||||
def test_lite_module_device_dtype_propagation(device, dtype):
|
||||
|
@ -147,8 +147,8 @@ def test_lite_dataloader_iterator():
|
|||
"src_device, dest_device",
|
||||
[
|
||||
(torch.device("cpu"), torch.device("cpu")),
|
||||
pytest.param(torch.device("cpu"), torch.device("cuda", 0), marks=RunIf(min_gpus=1)),
|
||||
pytest.param(torch.device("cuda", 0), torch.device("cpu"), marks=RunIf(min_gpus=1)),
|
||||
pytest.param(torch.device("cpu"), torch.device("cuda", 0), marks=RunIf(min_cuda_gpus=1)),
|
||||
pytest.param(torch.device("cuda", 0), torch.device("cpu"), marks=RunIf(min_cuda_gpus=1)),
|
||||
],
|
||||
)
|
||||
def test_lite_dataloader_device_placement(src_device, dest_device):
|
||||
|
|
|
@ -68,7 +68,7 @@ class BatchHookObserverModel(BoringModel):
|
|||
assert batch.device == self.device
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_callback_batch_on_device(tmpdir):
|
||||
"""Test that the batch object sent to the on_*_batch_start/end hooks is on the right device."""
|
||||
|
||||
|
|
|
@ -91,7 +91,7 @@ def test_log_epoch_metrics_before_on_evaluation_end(update_eval_epoch_metrics_mo
|
|||
assert order == ["log_epoch_metrics", "on_validation_end"]
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_memory_consumption_validation(tmpdir):
|
||||
"""Test that the training batch is no longer in GPU memory when running validation."""
|
||||
|
||||
|
|
|
@ -99,7 +99,7 @@ def test_amp_cpus(tmpdir, strategy, precision, devices):
|
|||
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, min_torch="1.10")
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.10")
|
||||
@pytest.mark.parametrize("strategy", [None, "dp", "ddp_spawn"])
|
||||
@pytest.mark.parametrize("precision", [16, pytest.param("bf16", marks=RunIf(bf16_cuda=True))])
|
||||
@pytest.mark.parametrize("devices", [1, 2])
|
||||
|
@ -124,7 +124,7 @@ def test_amp_gpus(tmpdir, strategy, precision, devices):
|
|||
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@mock.patch.dict(
|
||||
os.environ,
|
||||
{
|
||||
|
@ -189,7 +189,7 @@ def test_amp_without_apex(bwd_mock, tmpdir):
|
|||
assert not bwd_mock.called
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, amp_apex=True)
|
||||
@RunIf(min_cuda_gpus=1, amp_apex=True)
|
||||
@mock.patch("pytorch_lightning.plugins.precision.apex_amp.ApexMixedPrecisionPlugin.backward")
|
||||
def test_amp_with_apex(bwd_mock, tmpdir):
|
||||
"""Check calling apex scaling in training."""
|
||||
|
|
|
@ -38,7 +38,7 @@ PL_VERSION_LT_1_5 = _compare_version("pytorch_lightning", operator.lt, "1.5")
|
|||
PRETEND_N_OF_GPUS = 16
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_multi_gpu_none_backend(tmpdir):
|
||||
"""Make sure when using multiple GPUs the user can't use `accelerator = None`."""
|
||||
tutils.set_random_main_port()
|
||||
|
@ -57,7 +57,7 @@ def test_multi_gpu_none_backend(tmpdir):
|
|||
tpipes.run_model_test(trainer_options, model, dm)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize("devices", [1, [0], [1]])
|
||||
def test_single_gpu_model(tmpdir, devices):
|
||||
"""Make sure single GPU works (DP mode)."""
|
||||
|
@ -206,7 +206,7 @@ def test_torchelastic_gpu_parsing(mocked_device_count, mocked_is_available, gpus
|
|||
assert trainer.device_ids == [0]
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_single_gpu_batch_parse():
|
||||
trainer = Trainer(accelerator="gpu", devices=1)
|
||||
|
||||
|
@ -298,7 +298,7 @@ def test_single_gpu_batch_parse():
|
|||
assert batch.label.type() == "torch.cuda.LongTensor"
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_non_blocking():
|
||||
"""Tests that non_blocking=True only gets passed on torch.Tensor.to, but not on other objects."""
|
||||
trainer = Trainer()
|
||||
|
|
|
@ -111,7 +111,7 @@ def test_training_epoch_end_metrics_collection_on_override(tmpdir):
|
|||
assert overridden_model.len_outputs == overridden_model.num_train_batches
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@mock.patch(
|
||||
"pytorch_lightning.strategies.Strategy.lightning_module",
|
||||
new_callable=PropertyMock,
|
||||
|
@ -170,7 +170,7 @@ def test_apply_batch_transfer_handler(model_getter_mock):
|
|||
assert torch.allclose(batch_gpu.targets.cpu(), torch.ones(5, 1, dtype=torch.long) * 2)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
def test_transfer_batch_hook_ddp(tmpdir):
|
||||
"""Test custom data are properly moved to the right device using ddp."""
|
||||
|
||||
|
@ -439,13 +439,16 @@ class HookedModel(BoringModel):
|
|||
[
|
||||
{},
|
||||
# these precision plugins modify the optimization flow, so testing them explicitly
|
||||
pytest.param(dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_gpus=1)),
|
||||
pytest.param(
|
||||
dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"), marks=RunIf(min_gpus=1, amp_apex=True)
|
||||
dict(accelerator="gpu", devices=1, precision=16, amp_backend="native"), marks=RunIf(min_cuda_gpus=1)
|
||||
),
|
||||
pytest.param(
|
||||
dict(accelerator="gpu", devices=1, precision=16, amp_backend="apex"),
|
||||
marks=RunIf(min_cuda_gpus=1, amp_apex=True),
|
||||
),
|
||||
pytest.param(
|
||||
dict(accelerator="gpu", devices=1, precision=16, strategy="deepspeed"),
|
||||
marks=RunIf(min_gpus=1, standalone=True, deepspeed=True),
|
||||
marks=RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
|
|
@ -40,7 +40,7 @@ if _HOROVOD_AVAILABLE:
|
|||
import horovod.torch as hvd
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, horovod=True)
|
||||
@RunIf(min_cuda_gpus=1, horovod=True)
|
||||
def test_nccl_is_available_on_gpu_environment():
|
||||
from tests.helpers.runif import _HOROVOD_NCCL_AVAILABLE
|
||||
|
||||
|
@ -135,7 +135,7 @@ def test_horovod_cpu_implicit(tmpdir):
|
|||
_run_horovod(trainer_options)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||
def test_horovod_multi_gpu(tmpdir):
|
||||
"""Test Horovod with multi-GPU support."""
|
||||
trainer_options = dict(
|
||||
|
@ -152,7 +152,7 @@ def test_horovod_multi_gpu(tmpdir):
|
|||
_run_horovod(trainer_options)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||
def test_horovod_multi_gpu_accumulate_grad_batches(tmpdir):
|
||||
trainer_options = dict(
|
||||
default_root_dir=str(tmpdir),
|
||||
|
@ -168,7 +168,7 @@ def test_horovod_multi_gpu_accumulate_grad_batches(tmpdir):
|
|||
_run_horovod(trainer_options)
|
||||
|
||||
|
||||
@RunIf(horovod=True, skip_windows=True, min_gpus=2)
|
||||
@RunIf(horovod=True, skip_windows=True, min_cuda_gpus=2)
|
||||
def test_horovod_raises_unsupported_accumulate_grad_batches(tmpdir):
|
||||
"""Ensure MisConfigurationException for different `accumulate_grad_batches` at different epochs for Horovod
|
||||
Strategy on multi-gpus."""
|
||||
|
@ -185,7 +185,7 @@ def test_horovod_raises_unsupported_accumulate_grad_batches(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||
def test_horovod_multi_gpu_grad_by_value(tmpdir):
|
||||
"""Test Horovod with multi-GPU support."""
|
||||
trainer_options = dict(
|
||||
|
@ -207,7 +207,7 @@ def test_horovod_multi_gpu_grad_by_value(tmpdir):
|
|||
# https://discuss.pytorch.org/t/torch-cuda-amp-vs-nvidia-apex/74994
|
||||
# Check with (tgaddair) on Horovod issues if this feature is needed
|
||||
@pytest.mark.skip(reason="TODO: Horovod currently doesn't work with Apex")
|
||||
@RunIf(min_gpus=2, amp_apex=True, horovod_nccl=True, skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=2, amp_apex=True, horovod_nccl=True, skip_windows=True)
|
||||
def test_horovod_apex(tmpdir):
|
||||
"""Test Horovod with multi-GPU support using apex amp."""
|
||||
trainer_options = dict(
|
||||
|
@ -226,7 +226,7 @@ def test_horovod_apex(tmpdir):
|
|||
_run_horovod(trainer_options)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||
def test_horovod_amp(tmpdir):
|
||||
"""Test Horovod with multi-GPU support using native amp."""
|
||||
trainer_options = dict(
|
||||
|
@ -245,7 +245,7 @@ def test_horovod_amp(tmpdir):
|
|||
_run_horovod(trainer_options)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||
def test_horovod_gather(tmpdir):
|
||||
"""Test Horovod with multi-GPU support using native amp."""
|
||||
trainer_options = dict(
|
||||
|
@ -262,7 +262,7 @@ def test_horovod_gather(tmpdir):
|
|||
_run_horovod(trainer_options)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True, horovod=True, horovod_nccl=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, horovod=True, horovod_nccl=True)
|
||||
def test_horovod_transfer_batch_to_gpu(tmpdir):
|
||||
class TestTrainingStepModel(BoringModel):
|
||||
def training_step(self, batch, *args, **kwargs):
|
||||
|
|
|
@ -39,7 +39,7 @@ def test_model_saves_with_input_sample(tmpdir):
|
|||
assert os.path.getsize(file_path) > 4e2
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_model_saves_on_gpu(tmpdir):
|
||||
"""Test that model saves on gpu."""
|
||||
model = BoringModel()
|
||||
|
@ -86,7 +86,7 @@ def test_model_saves_with_example_input_array(tmpdir, modelclass, input_sample):
|
|||
assert os.path.getsize(file_path) > 4e2
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_model_saves_on_multi_gpu(tmpdir):
|
||||
"""Test that ONNX model saves on a distributed backend."""
|
||||
tutils.set_random_main_port()
|
||||
|
|
|
@ -375,7 +375,7 @@ def test_callbacks_references_fit_ckpt_path(tmpdir):
|
|||
trainer.fit(model, datamodule=dm, ckpt_path=str(tmpdir / "last.ckpt"))
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_running_test_pretrained_model_distrib_dp(tmpdir):
|
||||
"""Verify `test()` on pretrained model."""
|
||||
|
||||
|
@ -424,7 +424,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):
|
|||
tpipes.run_model_prediction(pretrained_model, dataloader)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
|
||||
"""Verify `test()` on pretrained model."""
|
||||
tutils.set_random_main_port()
|
||||
|
@ -558,7 +558,7 @@ def test_load_model_from_checkpoint(tmpdir, model_template):
|
|||
new_trainer.test(pretrained_model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_dp_resume(tmpdir):
|
||||
"""Make sure DP continues training correctly."""
|
||||
model = CustomClassificationModelDP(lr=0.1)
|
||||
|
|
|
@ -78,7 +78,7 @@ def test_torchscript_input_output_trace():
|
|||
assert torch.allclose(script_output, model_output)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@pytest.mark.parametrize("device", [torch.device("cpu"), torch.device("cuda", 0)])
|
||||
def test_torchscript_device(device):
|
||||
"""Test that scripted module is on the correct device."""
|
||||
|
|
|
@ -72,7 +72,7 @@ def test_unsqueeze_scalar_tensor(inp, expected):
|
|||
assert torch.all(unsqueeze_scalar_tensor(inp).eq(expected))
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_lightning_parallel_module_unsqueeze_scalar():
|
||||
"""Test that LightningParallelModule takes care of un-squeezeing 0-dim tensors."""
|
||||
|
||||
|
@ -114,7 +114,7 @@ def test_python_scalar_to_tensor(inp, expected):
|
|||
assert torch.all(python_scalar_to_tensor(inp).eq(expected))
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@pytest.mark.parametrize("device", [torch.device("cpu"), torch.device("cuda", 0)])
|
||||
def test_lightning_parallel_module_python_scalar_conversion(device):
|
||||
"""Test that LightningParallelModule can convert Python scalars to tensors."""
|
||||
|
@ -139,7 +139,7 @@ def test_lightning_parallel_module_python_scalar_conversion(device):
|
|||
assert output["python scalar"] == torch.tensor([12.3], device=device)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize(
|
||||
"nest, unnest",
|
||||
[
|
||||
|
@ -181,7 +181,7 @@ def test_lightning_parallel_module_device_access(nest, unnest):
|
|||
assert torch.all(output.cpu().eq(torch.tensor([1, 1])))
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_lightning_parallel_module_device_access_warning():
|
||||
"""Test that we show a warning when the device can't be inferred from the input."""
|
||||
|
||||
|
|
|
@ -135,7 +135,7 @@ class TestPrecisionModel(BoringModel):
|
|||
return TestClippingOptimizer(self.layer.parameters(), lr=0.1)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize("accum", [1, 2])
|
||||
def test_amp_gradient_unscale(tmpdir, accum: int):
|
||||
model = TestPrecisionModel()
|
||||
|
@ -161,7 +161,7 @@ def test_amp_gradient_unscale(tmpdir, accum: int):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_amp_skip_optimizer(tmpdir):
|
||||
"""Test that optimizers can be skipped when using amp."""
|
||||
|
||||
|
@ -195,7 +195,7 @@ def test_amp_skip_optimizer(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, amp_apex=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, amp_apex=True, standalone=True)
|
||||
@pytest.mark.parametrize("amp_level", ["O2"])
|
||||
def test_amp_apex_ddp_fit(amp_level, tmpdir):
|
||||
class CustomBoringModel(BoringModel):
|
||||
|
@ -222,7 +222,7 @@ def test_amp_apex_ddp_fit(amp_level, tmpdir):
|
|||
trainer.test(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, amp_apex=True)
|
||||
@RunIf(min_cuda_gpus=2, amp_apex=True)
|
||||
@pytest.mark.parametrize("amp_level", ["O2"])
|
||||
def test_amp_apex_ddp_spawn_fit(amp_level, tmpdir):
|
||||
trainer = Trainer(
|
||||
|
|
|
@ -148,7 +148,7 @@ def test_double_precision(tmpdir, boring_model):
|
|||
trainer.predict(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_double_precision_ddp(tmpdir):
|
||||
model = DoublePrecisionBoringModel()
|
||||
|
||||
|
|
|
@ -346,7 +346,7 @@ def test_advanced_profiler_cprofile_deepcopy(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
def test_pytorch_profiler_trainer_ddp(tmpdir, pytorch_profiler):
|
||||
"""Ensure that the profiler can be given to the training and default step are properly recorded."""
|
||||
model = BoringModel()
|
||||
|
@ -476,7 +476,7 @@ def test_pytorch_profiler_multiple_loggers(tmpdir):
|
|||
assert look_for_trace(tmpdir)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True)
|
||||
def test_pytorch_profiler_nested_emit_nvtx(tmpdir):
|
||||
"""This test check emit_nvtx is correctly supported."""
|
||||
profiler = PyTorchProfiler(use_cuda=True, emit_nvtx=True)
|
||||
|
|
|
@ -33,7 +33,7 @@ class BoringModel4QAdam(BoringModel):
|
|||
return [optimizer], [lr_scheduler]
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, bagua=True)
|
||||
@RunIf(min_cuda_gpus=1, bagua=True)
|
||||
def test_bagua_default(tmpdir):
|
||||
trainer = Trainer(
|
||||
default_root_dir=tmpdir,
|
||||
|
@ -45,7 +45,7 @@ def test_bagua_default(tmpdir):
|
|||
assert isinstance(trainer.strategy, BaguaStrategy)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, bagua=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, bagua=True)
|
||||
def test_async_algorithm(tmpdir):
|
||||
model = BoringModel()
|
||||
bagua_strategy = BaguaStrategy(algorithm="async")
|
||||
|
@ -64,7 +64,7 @@ def test_async_algorithm(tmpdir):
|
|||
assert torch.norm(param) < 3
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, bagua=True)
|
||||
@RunIf(min_cuda_gpus=1, bagua=True)
|
||||
@pytest.mark.parametrize(
|
||||
"algorithm", ["gradient_allreduce", "bytegrad", "qadam", "decentralized", "low_precision_decentralized"]
|
||||
)
|
||||
|
@ -92,7 +92,7 @@ def test_configuration(algorithm, tmpdir):
|
|||
trainer.strategy.configure_ddp()
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, bagua=True)
|
||||
@RunIf(min_cuda_gpus=1, bagua=True)
|
||||
def test_qadam_configuration(tmpdir):
|
||||
model = BoringModel4QAdam()
|
||||
bagua_strategy = BaguaStrategy(algorithm="qadam")
|
||||
|
|
|
@ -337,7 +337,7 @@ def test_multiple_peers(num_processes, wait_seconds):
|
|||
assert any(global_step > 0 for global_step in process_steps)
|
||||
|
||||
|
||||
@RunIf(hivemind=True, min_gpus=1)
|
||||
@RunIf(hivemind=True, min_cuda_gpus=1)
|
||||
@mock.patch.dict(os.environ, {"HIVEMIND_MEMORY_SHARING_STRATEGY": "file_descriptor"}, clear=True)
|
||||
def test_scaler_updated_precision_16():
|
||||
class TestModel(BoringModel):
|
||||
|
|
|
@ -27,9 +27,9 @@ from tests.strategies.test_dp import CustomClassificationModelDP
|
|||
@pytest.mark.parametrize(
|
||||
"trainer_kwargs",
|
||||
(
|
||||
pytest.param(dict(accelerator="gpu", devices=1), marks=RunIf(min_gpus=1)),
|
||||
pytest.param(dict(strategy="dp", accelerator="gpu", devices=2), marks=RunIf(min_gpus=2)),
|
||||
pytest.param(dict(strategy="ddp_spawn", accelerator="gpu", devices=2), marks=RunIf(min_gpus=2)),
|
||||
pytest.param(dict(accelerator="gpu", devices=1), marks=RunIf(min_cuda_gpus=1)),
|
||||
pytest.param(dict(strategy="dp", accelerator="gpu", devices=2), marks=RunIf(min_cuda_gpus=2)),
|
||||
pytest.param(dict(strategy="ddp_spawn", accelerator="gpu", devices=2), marks=RunIf(min_cuda_gpus=2)),
|
||||
),
|
||||
)
|
||||
def test_evaluate(tmpdir, trainer_kwargs):
|
||||
|
|
|
@ -32,7 +32,7 @@ from tests.utilities.distributed import call_training_script
|
|||
CLI_ARGS = "--max_epochs 1 --accelerator gpu --devices 2 --strategy ddp"
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize("as_module", [True, False])
|
||||
def test_multi_gpu_model_ddp_fit_only(tmpdir, as_module):
|
||||
# call the script
|
||||
|
@ -46,7 +46,7 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir, as_module):
|
|||
assert result["status"] == "complete"
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize("as_module", [True, False])
|
||||
def test_multi_gpu_model_ddp_test_only(tmpdir, as_module):
|
||||
# call the script
|
||||
|
@ -60,7 +60,7 @@ def test_multi_gpu_model_ddp_test_only(tmpdir, as_module):
|
|||
assert result["status"] == "complete"
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize("as_module", [True, False])
|
||||
def test_multi_gpu_model_ddp_fit_test(tmpdir, as_module):
|
||||
# call the script
|
||||
|
@ -122,7 +122,7 @@ def test_ddp_torch_dist_is_available_in_setup(
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, min_torch="1.8.1", standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.8.1", standalone=True)
|
||||
@pytest.mark.parametrize("precision", (16, 32))
|
||||
def test_ddp_wrapper(tmpdir, precision):
|
||||
"""Test parameters to ignore are carried over for DDP."""
|
||||
|
|
|
@ -102,7 +102,7 @@ class TestFSDPModel(BoringModel):
|
|||
assert self.layer.module[2].reshard_after_forward is True
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True, standalone=True, min_torch="1.12dev")
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, min_torch="1.12dev")
|
||||
def test_fully_sharded_native_strategy_sync_batchnorm(tmpdir):
|
||||
"""Test to ensure that sync_batchnorm works when using fsdp_native and GPU, and all stages can be run."""
|
||||
|
||||
|
@ -119,7 +119,7 @@ def test_fully_sharded_native_strategy_sync_batchnorm(tmpdir):
|
|||
_run_multiple_stages(trainer, model, os.path.join(tmpdir, "last.ckpt"))
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, skip_windows=True, standalone=True, min_torch="1.12dev")
|
||||
@RunIf(min_cuda_gpus=1, skip_windows=True, standalone=True, min_torch="1.12dev")
|
||||
def test_fully_sharded_native_strategy_checkpoint(tmpdir):
|
||||
"""Test to ensure that checkpoint is saved correctly when using a single GPU, and all stages can be run."""
|
||||
|
||||
|
@ -130,7 +130,7 @@ def test_fully_sharded_native_strategy_checkpoint(tmpdir):
|
|||
_run_multiple_stages(trainer, model, os.path.join(tmpdir, "last.ckpt"))
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True, standalone=True, min_torch="1.12dev")
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, min_torch="1.12dev")
|
||||
def test_fully_sharded_native_strategy_checkpoint_multi_gpus(tmpdir):
|
||||
"""Test to ensure that checkpoint is saved correctly when using multiple GPUs, and all stages can be run."""
|
||||
|
||||
|
|
|
@ -96,7 +96,7 @@ class TestFSDPModel(BoringModel):
|
|||
assert self.layer.module[2].mixed_precision
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
|
||||
@RunIf(min_cuda_gpus=1, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
|
||||
def test_fully_sharded_strategy_checkpoint(tmpdir):
|
||||
"""Test to ensure that checkpoint is saved correctly when using a single GPU, and all stages can be run."""
|
||||
|
||||
|
@ -114,7 +114,7 @@ def test_fully_sharded_strategy_checkpoint(tmpdir):
|
|||
_run_multiple_stages(trainer, model, os.path.join(tmpdir, "last.ckpt"))
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
|
||||
def test_fully_sharded_strategy_checkpoint_multi_gpus(tmpdir):
|
||||
"""Test to ensure that checkpoint is saved correctly when using multiple GPUs, and all stages can be run."""
|
||||
|
||||
|
@ -162,7 +162,7 @@ def _run_multiple_stages(trainer, model, model_path: Optional[str] = None):
|
|||
trainer.test(ckpt_path=model_path)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
|
||||
@RunIf(min_cuda_gpus=1, skip_windows=True, standalone=True, fairscale_fully_sharded=True)
|
||||
def test_fsdp_gradient_clipping_raises(tmpdir):
|
||||
"""Test to ensure that an exception is raised when clipping gradients by value with FSDP."""
|
||||
model = BoringModel()
|
||||
|
|
|
@ -21,7 +21,7 @@ from tests.helpers.runif import RunIf
|
|||
from tests.helpers.simple_models import ClassificationModel
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
|
||||
tutils.set_random_main_port()
|
||||
|
||||
|
@ -41,7 +41,7 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
|
|||
tpipes.run_model_test(trainer_options, model, dm)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_multi_gpu_model_ddp_spawn(tmpdir):
|
||||
tutils.set_random_main_port()
|
||||
|
||||
|
@ -61,7 +61,7 @@ def test_multi_gpu_model_ddp_spawn(tmpdir):
|
|||
tpipes.run_model_test(trainer_options, model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
|
||||
"""Make sure DDP works with dataloaders passed to fit()"""
|
||||
tutils.set_random_main_port()
|
||||
|
|
|
@ -65,7 +65,7 @@ def test_ddp_cpu():
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_ddp_spawn_extra_parameters(tmpdir):
|
||||
"""Tests if device is set correctly when training for DDPSpawnStrategy and tests add_to_queue/get_from_queue
|
||||
with Lightning Module (deprecated way)."""
|
||||
|
|
|
@ -33,7 +33,7 @@ class BoringModelGPU(BoringModel):
|
|||
self.start_cuda_memory = torch.cuda.memory_allocated()
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
|
||||
def test_ddp_with_2_gpus():
|
||||
"""Tests if device is set correctly when training and after teardown for DDPStrategy."""
|
||||
trainer = Trainer(
|
||||
|
@ -69,7 +69,7 @@ class BarrierModel(BoringModel):
|
|||
self.trainer.strategy.barrier("barrier after model is wrapped")
|
||||
|
||||
|
||||
@RunIf(min_gpus=4, standalone=True)
|
||||
@RunIf(min_cuda_gpus=4, standalone=True)
|
||||
@mock.patch("torch.distributed.barrier")
|
||||
def test_ddp_barrier_non_consecutive_device_ids(barrier_mock, tmpdir):
|
||||
"""Test correct usage of barriers when device ids do not start at 0 or are not consecutive."""
|
||||
|
@ -146,7 +146,7 @@ def test_ddp_configure_ddp():
|
|||
assert isinstance(trainer.model, LightningModule)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@pytest.mark.parametrize(
|
||||
"trainer_fn", (TrainerFn.VALIDATING, TrainerFn.TUNING, TrainerFn.TESTING, TrainerFn.PREDICTING)
|
||||
)
|
||||
|
|
|
@ -42,7 +42,7 @@ class TestDDPStrategy(DDPStrategy):
|
|||
return super().teardown()
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
def test_ddp_fp16_compress_comm_hook(tmpdir):
|
||||
"""Test for DDP FP16 compress hook."""
|
||||
model = BoringModel()
|
||||
|
@ -65,7 +65,7 @@ def test_ddp_fp16_compress_comm_hook(tmpdir):
|
|||
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
def test_ddp_sgd_comm_hook(tmpdir):
|
||||
"""Test for DDP FP16 compress hook."""
|
||||
model = BoringModel()
|
||||
|
@ -89,7 +89,7 @@ def test_ddp_sgd_comm_hook(tmpdir):
|
|||
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir):
|
||||
"""Test for DDP FP16 compress wrapper for SGD hook."""
|
||||
model = BoringModel()
|
||||
|
@ -114,7 +114,7 @@ def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir):
|
|||
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
def test_ddp_spawn_fp16_compress_comm_hook(tmpdir):
|
||||
"""Test for DDP Spawn FP16 compress hook."""
|
||||
model = BoringModel()
|
||||
|
@ -134,7 +134,7 @@ def test_ddp_spawn_fp16_compress_comm_hook(tmpdir):
|
|||
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, min_torch="1.10.0", skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.10.0", skip_windows=True, standalone=True)
|
||||
def test_ddp_post_local_sgd_comm_hook(tmpdir):
|
||||
"""Test for DDP post-localSGD hook."""
|
||||
model = BoringModel()
|
||||
|
@ -162,7 +162,7 @@ def test_ddp_post_local_sgd_comm_hook(tmpdir):
|
|||
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
||||
|
||||
|
||||
@RunIf(skip_windows=True, min_torch="1.10.0", min_gpus=2, standalone=True)
|
||||
@RunIf(skip_windows=True, min_torch="1.10.0", min_cuda_gpus=2, standalone=True)
|
||||
@mock.patch("torch.distributed.algorithms.model_averaging.averagers.PeriodicModelAverager.average_parameters")
|
||||
def test_post_local_sgd_model_averaging(average_parameters_mock, tmpdir):
|
||||
"""Test that when using DDP with post-localSGD, model averaging is called."""
|
||||
|
@ -207,7 +207,7 @@ def test_post_local_sgd_model_averaging(average_parameters_mock, tmpdir):
|
|||
average_parameters_mock.assert_called()
|
||||
|
||||
|
||||
@RunIf(skip_windows=True, min_torch="1.10.0", min_gpus=2, standalone=True)
|
||||
@RunIf(skip_windows=True, min_torch="1.10.0", min_cuda_gpus=2, standalone=True)
|
||||
@mock.patch("torch.distributed.algorithms.model_averaging.averagers.PeriodicModelAverager.average_parameters")
|
||||
def test_post_local_sgd_model_averaging_value_error(average_parameters_mock, tmpdir):
|
||||
"""Test that when using DDP with post-localSGD a ValueError is thrown when the optmizer is
|
||||
|
|
|
@ -108,7 +108,7 @@ def test_deepspeed_lightning_module(tmpdir):
|
|||
assert model.dtype == torch.double
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_deepspeed_lightning_module_precision(tmpdir):
|
||||
"""Test to ensure that a model wrapped in `LightningDeepSpeedModule` moves tensors to half when precision
|
||||
16."""
|
||||
|
@ -232,7 +232,7 @@ def test_deepspeed_defaults(tmpdir):
|
|||
assert isinstance(strategy.config["zero_optimization"], dict)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_warn_deepspeed_ignored(tmpdir):
|
||||
class TestModel(BoringModel):
|
||||
def backward(self, loss: Tensor, optimizer: Optimizer, optimizer_idx: int, *args, **kwargs) -> None:
|
||||
|
@ -257,7 +257,7 @@ def test_warn_deepspeed_ignored(tmpdir):
|
|||
assert any("track_grad_norm=2.0)' but this is not supported" in w for w in warning_cache)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, deepspeed=True)
|
||||
@pytest.mark.parametrize(
|
||||
["dataset_cls", "value"],
|
||||
[(RandomDataset, "auto"), (RandomDataset, 10), (RandomIterableDataset, "auto"), (RandomIterableDataset, 10)],
|
||||
|
@ -298,7 +298,7 @@ def test_deepspeed_auto_batch_size_config_select(mock_deepspeed_distributed, moc
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_run_configure_optimizers(tmpdir):
|
||||
"""Test end to end that deepspeed works with defaults (without ZeRO as that requires compilation), whilst using
|
||||
configure_optimizers for optimizers and schedulers."""
|
||||
|
@ -336,7 +336,7 @@ def test_deepspeed_run_configure_optimizers(tmpdir):
|
|||
_assert_save_model_is_equal(model, tmpdir, trainer)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_config(tmpdir, deepspeed_zero_config):
|
||||
"""Test to ensure deepspeed works correctly when passed a DeepSpeed config object including
|
||||
optimizers/schedulers and saves the model weights to load correctly."""
|
||||
|
@ -375,7 +375,7 @@ def test_deepspeed_config(tmpdir, deepspeed_zero_config):
|
|||
assert len(set(lr_monitor.lrs["lr-SGD"])) == 8
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_custom_precision_params(tmpdir):
|
||||
"""Ensure if we modify the FP16 parameters via the DeepSpeedStrategy, the deepspeed config contains these
|
||||
changes."""
|
||||
|
@ -423,7 +423,7 @@ def test_deepspeed_custom_activation_checkpointing_params(tmpdir):
|
|||
assert checkpoint_config["synchronize_checkpoint_boundary"]
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_custom_activation_checkpointing_params_forwarded(tmpdir):
|
||||
"""Ensure if we modify the activation checkpointing parameters, we pass these to
|
||||
deepspeed.checkpointing.configure correctly."""
|
||||
|
@ -455,7 +455,7 @@ def test_deepspeed_custom_activation_checkpointing_params_forwarded(tmpdir):
|
|||
)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, deepspeed=True)
|
||||
def test_deepspeed_assert_config_zero_offload_disabled(tmpdir, deepspeed_zero_config):
|
||||
"""Ensure if we use a config and turn off offload_optimizer, that this is set to False within the config."""
|
||||
|
||||
|
@ -481,7 +481,7 @@ def test_deepspeed_assert_config_zero_offload_disabled(tmpdir, deepspeed_zero_co
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu(tmpdir):
|
||||
"""Test to ensure that DeepSpeed with multiple GPUs works and deepspeed distributed is initialized
|
||||
correctly."""
|
||||
|
@ -504,7 +504,7 @@ def test_deepspeed_multigpu(tmpdir):
|
|||
_assert_save_model_is_equal(model, tmpdir, trainer)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_fp32_works(tmpdir):
|
||||
model = BoringModel()
|
||||
trainer = Trainer(
|
||||
|
@ -519,7 +519,7 @@ def test_deepspeed_fp32_works(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_stage_3_save_warning(tmpdir):
|
||||
"""Test to ensure that DeepSpeed Stage 3 gives a warning when saving on rank zero."""
|
||||
model = BoringModel()
|
||||
|
@ -546,7 +546,7 @@ def test_deepspeed_stage_3_save_warning(tmpdir):
|
|||
trainer.save_checkpoint(checkpoint_path)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu_single_file(tmpdir):
|
||||
"""Test to ensure that DeepSpeed loads from a single file checkpoint."""
|
||||
model = BoringModel()
|
||||
|
@ -669,7 +669,7 @@ class ManualModelParallelClassificationModel(ModelParallelClassificationModel):
|
|||
opt.step()
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu_stage_3(tmpdir, deepspeed_config):
|
||||
"""Test to ensure ZeRO Stage 3 works with a parallel model."""
|
||||
model = ModelParallelBoringModel()
|
||||
|
@ -689,7 +689,7 @@ def test_deepspeed_multigpu_stage_3(tmpdir, deepspeed_config):
|
|||
_assert_save_model_is_equal(model, tmpdir, trainer)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config):
|
||||
"""Test to ensure ZeRO Stage 3 works with a parallel model."""
|
||||
model = ModelParallelBoringModelManualOptim()
|
||||
|
@ -711,7 +711,7 @@ def test_deepspeed_multigpu_stage_3_manual_optimization(tmpdir, deepspeed_config
|
|||
|
||||
|
||||
@pytest.mark.parametrize(("accumulate_grad_batches", "automatic_optimization"), [(1, False), (2, True)])
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization, accumulate_grad_batches):
|
||||
seed_everything(1)
|
||||
if automatic_optimization:
|
||||
|
@ -758,7 +758,7 @@ def test_deepspeed_multigpu_stage_3_checkpointing(tmpdir, automatic_optimization
|
|||
assert results[0]["test_acc"] > 0.7
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu_stage_3_warns_resume_training(tmpdir):
|
||||
"""Test to ensure with Stage 3 and multiple GPUs that we can resume from training, throwing a warning that the
|
||||
optimizer state and scheduler states cannot be restored."""
|
||||
|
@ -793,7 +793,7 @@ def test_deepspeed_multigpu_stage_3_warns_resume_training(tmpdir):
|
|||
trainer.fit(model, datamodule=dm, ckpt_path=checkpoint_path)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu_stage_3_resume_training(tmpdir):
|
||||
"""Test to ensure with Stage 3 and single GPU that we can resume training."""
|
||||
initial_model = ModelParallelClassificationModel()
|
||||
|
@ -859,7 +859,7 @@ def test_deepspeed_multigpu_stage_3_resume_training(tmpdir):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("offload_optimizer", [False, True])
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, offload_optimizer):
|
||||
"""Test to ensure with Stage 2 and multiple GPUs, accumulated grad batches works."""
|
||||
seed_everything(42)
|
||||
|
@ -898,7 +898,7 @@ def test_deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, offload_opt
|
|||
assert verification_callback.on_train_batch_start_called
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu_test(tmpdir):
|
||||
"""Test to ensure we can use DeepSpeed with just test using ZeRO Stage 3."""
|
||||
model = ModelParallelBoringModel()
|
||||
|
@ -917,7 +917,7 @@ def test_deepspeed_multigpu_test(tmpdir):
|
|||
|
||||
# TODO(Sean): Once partial parameter partitioning is supported this test should be re-enabled
|
||||
@pytest.mark.skip("Partial parameter partitioning for DeepSpeed is currently broken.")
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu_partial_partition_parameters(tmpdir):
|
||||
"""Test to ensure that a module that defines a layer inside the ``__init__`` and ``configure_sharded_model``
|
||||
correctly converts all parameters to float16 when ``precision=16`` and runs successfully."""
|
||||
|
@ -951,7 +951,7 @@ def test_deepspeed_multigpu_partial_partition_parameters(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu_test_rnn(tmpdir):
|
||||
"""Test to ensure that turning off explicit partitioning of the entire module for ZeRO Stage 3 works when
|
||||
training with certain layers which will crash with explicit partitioning."""
|
||||
|
@ -1025,7 +1025,7 @@ def _assert_save_model_is_equal(model, tmpdir, trainer):
|
|||
assert torch.equal(orig_param, saved_model_param)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu_no_schedulers(tmpdir):
|
||||
"""Test to ensure ZeRO Stage 3 works with a parallel model and no schedulers."""
|
||||
model = ModelParallelBoringModelNoSchedulers()
|
||||
|
@ -1044,7 +1044,7 @@ def test_deepspeed_multigpu_no_schedulers(tmpdir):
|
|||
_assert_save_model_is_equal(model, tmpdir, trainer)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_skip_backward_raises(tmpdir):
|
||||
class TestModel(BoringModel):
|
||||
def training_step(self, batch, batch_idx):
|
||||
|
@ -1065,7 +1065,7 @@ def test_deepspeed_skip_backward_raises(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_setup_train_dataloader(tmpdir):
|
||||
"""Test DeepSpeed works when setup is required to call in the DataModule."""
|
||||
|
||||
|
@ -1109,7 +1109,7 @@ def test_deepspeed_setup_train_dataloader(tmpdir):
|
|||
@pytest.mark.parametrize("interval", ["step", "epoch"])
|
||||
@pytest.mark.parametrize("max_epoch", [2])
|
||||
@pytest.mark.parametrize("limit_train_batches", [2])
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_scheduler_step_count(mock_step, max_epoch, limit_train_batches, interval):
|
||||
"""Test to ensure that the scheduler is called the correct amount of times during training when scheduler is
|
||||
set to step or epoch."""
|
||||
|
@ -1144,7 +1144,7 @@ def test_scheduler_step_count(mock_step, max_epoch, limit_train_batches, interva
|
|||
assert mock_step.call_count == 1 + (max_epoch * limit_train_batches)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_configure_gradient_clipping(tmpdir):
|
||||
"""Test to ensure that a warning is raised when `LightningModule.configure_gradient_clipping` is overridden in
|
||||
case of deepspeed."""
|
||||
|
@ -1168,7 +1168,7 @@ def test_deepspeed_configure_gradient_clipping(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_gradient_clip_by_value(tmpdir):
|
||||
"""Test to ensure that an exception is raised when using `gradient_clip_algorithm='value'`."""
|
||||
model = BoringModel()
|
||||
|
@ -1185,7 +1185,7 @@ def test_deepspeed_gradient_clip_by_value(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=1, standalone=True, deepspeed=True)
|
||||
def test_different_accumulate_grad_batches_fails(tmpdir):
|
||||
model = BoringModel()
|
||||
trainer = Trainer(
|
||||
|
@ -1203,7 +1203,7 @@ def test_different_accumulate_grad_batches_fails(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_specific_gpu_device_id(tmpdir):
|
||||
class TestCallback(Callback):
|
||||
def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None:
|
||||
|
@ -1246,7 +1246,7 @@ def test_specific_gpu_device_id(tmpdir):
|
|||
trainer.test(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, min_torch="1.10.0", standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.10.0", standalone=True, deepspeed=True)
|
||||
def test_deepspeed_with_meta_device(tmpdir):
|
||||
with init_meta_context():
|
||||
model = BoringModel()
|
||||
|
@ -1265,7 +1265,7 @@ def test_deepspeed_with_meta_device(tmpdir):
|
|||
assert model.layer.weight.device.type == "cpu"
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_multi_save_same_filepath(tmpdir):
|
||||
"""Test that verifies that deepspeed saves only latest checkpoint in the specified path and deletes the old
|
||||
sharded checkpoints."""
|
||||
|
@ -1300,7 +1300,7 @@ def test_deepspeed_multi_save_same_filepath(tmpdir):
|
|||
assert expected == set(os.listdir(ckpt_path))
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
@pytest.mark.skipif(not _DEEPSPEED_GREATER_EQUAL_0_6, reason="requires deepspeed >= 0.6")
|
||||
def test_deepspeed_with_bfloat16_precision(tmpdir):
|
||||
"""Test that deepspeed works with bfloat16 precision."""
|
||||
|
|
|
@ -54,7 +54,7 @@ class CustomClassificationModelDP(ClassificationModel):
|
|||
self.log("test_acc", self.test_acc(outputs["logits"], outputs["y"]))
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_multi_gpu_early_stop_dp(tmpdir):
|
||||
"""Make sure DDP works.
|
||||
|
||||
|
@ -79,7 +79,7 @@ def test_multi_gpu_early_stop_dp(tmpdir):
|
|||
tpipes.run_model_test(trainer_options, model, dm)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_multi_gpu_model_dp(tmpdir):
|
||||
tutils.set_random_main_port()
|
||||
|
||||
|
@ -195,7 +195,7 @@ def test_dp_raise_exception_with_batch_transfer_hooks(mock_is_available, mock_de
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_dp_training_step_dict(tmpdir):
|
||||
"""This test verifies that dp properly reduces dictionaries."""
|
||||
model = ReductionTestModel()
|
||||
|
@ -214,7 +214,7 @@ def test_dp_training_step_dict(tmpdir):
|
|||
trainer.test(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_dp_batch_not_moved_to_device_explicitly(tmpdir):
|
||||
"""Test that with DP, batch is not moved to the device explicitly."""
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ if _FAIRSCALE_AVAILABLE:
|
|||
|
||||
|
||||
@pytest.mark.parametrize("clip_val", [0, 10])
|
||||
@RunIf(min_gpus=1, skip_windows=True, fairscale=True)
|
||||
@RunIf(min_cuda_gpus=1, skip_windows=True, fairscale=True)
|
||||
@mock.patch("fairscale.optim.oss.OSS.clip_grad_norm")
|
||||
def test_ddp_sharded_precision_16_clip_gradients(mock_oss_clip_grad_norm, clip_val, tmpdir):
|
||||
"""Ensure that clip gradients is only called if the value is greater than 0."""
|
||||
|
@ -47,7 +47,7 @@ def test_sharded_ddp_choice(tmpdir, strategy, expected):
|
|||
assert isinstance(trainer.strategy, expected)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, fairscale=True)
|
||||
@RunIf(min_cuda_gpus=1, fairscale=True)
|
||||
@pytest.mark.parametrize(
|
||||
"strategy,expected", [("ddp_sharded", DDPShardedStrategy), ("ddp_sharded_spawn", DDPSpawnShardedStrategy)]
|
||||
)
|
||||
|
@ -74,7 +74,7 @@ def test_ddp_sharded_strategy_checkpoint_cpu(tmpdir):
|
|||
assert torch.equal(ddp_param.to("cpu"), shard_param)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, fairscale=True)
|
||||
def test_ddp_sharded_strategy_checkpoint_multi_gpu(tmpdir):
|
||||
"""Test to ensure that checkpoint is saved correctly when using multiple GPUs."""
|
||||
model = BoringModel()
|
||||
|
@ -91,7 +91,7 @@ def test_ddp_sharded_strategy_checkpoint_multi_gpu(tmpdir):
|
|||
assert torch.equal(ddp_param.to("cpu"), shard_param)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, fairscale=True)
|
||||
def test_ddp_sharded_strategy_finetune(tmpdir):
|
||||
"""Test to ensure that we can save and restart training (simulate fine-tuning)"""
|
||||
model = BoringModel()
|
||||
|
@ -126,7 +126,7 @@ def test_ddp_sharded_strategy_fit_ckpt_path(tmpdir):
|
|||
|
||||
@pytest.mark.skip(reason="Not a critical test, skip till drone CI performance improves.") # todo
|
||||
@pytest.mark.skip(reason="Currently unsupported restarting training on different number of devices.")
|
||||
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, fairscale=True)
|
||||
def test_ddp_sharded_strategy_fit_ckpt_path_downsize_gpus(tmpdir):
|
||||
"""Test to ensure that resuming from checkpoint works when downsizing number of GPUS."""
|
||||
model = BoringModel()
|
||||
|
@ -144,7 +144,7 @@ def test_ddp_sharded_strategy_fit_ckpt_path_downsize_gpus(tmpdir):
|
|||
trainer.fit(model, ckpt_path=checkpoint_path)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, skip_windows=True, fairscale=True)
|
||||
@RunIf(min_cuda_gpus=1, skip_windows=True, fairscale=True)
|
||||
def test_ddp_sharded_strategy_fit_ckpt_path_gpu_to_cpu(tmpdir):
|
||||
"""Test to ensure that resuming from checkpoint works when going from GPUs- > CPU."""
|
||||
model = BoringModel()
|
||||
|
@ -165,7 +165,10 @@ def test_ddp_sharded_strategy_fit_ckpt_path_gpu_to_cpu(tmpdir):
|
|||
@RunIf(skip_windows=True, standalone=True, fairscale=True)
|
||||
@pytest.mark.parametrize(
|
||||
"trainer_kwargs",
|
||||
(dict(accelerator="cpu", devices=2), pytest.param(dict(accelerator="gpu", devices=2), marks=RunIf(min_gpus=2))),
|
||||
(
|
||||
dict(accelerator="cpu", devices=2),
|
||||
pytest.param(dict(accelerator="gpu", devices=2), marks=RunIf(min_cuda_gpus=2)),
|
||||
),
|
||||
)
|
||||
def test_ddp_sharded_strategy_test_multigpu(tmpdir, trainer_kwargs):
|
||||
"""Test to ensure we can use validate and test without fit."""
|
||||
|
@ -197,7 +200,7 @@ class ManualBoringModel(BoringModel):
|
|||
return {"loss": loss}
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True, standalone=True, fairscale=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, fairscale=True)
|
||||
def test_ddp_sharded_strategy_manual_optimization_spawn(tmpdir):
|
||||
# todo (sean): this test has been split out as running both tests using parametrize causes "Address in use"
|
||||
model = ManualBoringModel()
|
||||
|
@ -213,7 +216,7 @@ def test_ddp_sharded_strategy_manual_optimization_spawn(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True, standalone=True, fairscale=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True, fairscale=True)
|
||||
def test_ddp_sharded_strategy_manual_optimization(tmpdir):
|
||||
model = ManualBoringModel()
|
||||
trainer = Trainer(
|
||||
|
|
|
@ -36,7 +36,7 @@ class BoringModelGPU(BoringModel):
|
|||
self.start_cuda_memory = torch.cuda.memory_allocated()
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=1, skip_windows=True)
|
||||
def test_single_gpu():
|
||||
"""Tests if device is set correctly when training and after teardown for single GPU strategy."""
|
||||
trainer = Trainer(accelerator="gpu", devices=1, fast_dev_run=True)
|
||||
|
|
|
@ -79,7 +79,7 @@ def test_all_rank_logging_ddp_cpu(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_all_rank_logging_ddp_spawn(tmpdir):
|
||||
"""Check that all ranks can be logged from."""
|
||||
model = TestModel()
|
||||
|
|
|
@ -691,7 +691,7 @@ def test_multiple_dataloaders_reset(val_check_interval, tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_evaluation_move_metrics_to_cpu_and_outputs(tmpdir):
|
||||
class TestModel(BoringModel):
|
||||
def validation_step(self, *args):
|
||||
|
|
|
@ -293,7 +293,7 @@ def test_fx_validator_integration(tmpdir):
|
|||
trainer.predict(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_epoch_results_cache_dp(tmpdir):
|
||||
|
||||
root_device = torch.device("cuda", 0)
|
||||
|
|
|
@ -395,7 +395,7 @@ class LoggingSyncDistModel(BoringModel):
|
|||
return super().validation_step(batch, batch_idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("devices", [1, pytest.param(2, marks=RunIf(min_gpus=2, skip_windows=True))])
|
||||
@pytest.mark.parametrize("devices", [1, pytest.param(2, marks=RunIf(min_cuda_gpus=2, skip_windows=True))])
|
||||
def test_logging_sync_dist_true(tmpdir, devices):
|
||||
"""Tests to ensure that the sync_dist flag works (should just return the original value)"""
|
||||
fake_result = 1
|
||||
|
@ -433,7 +433,7 @@ def test_logging_sync_dist_true(tmpdir, devices):
|
|||
assert metrics["bar_3"] == 2 + int(use_multiple_devices)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
def test_logging_sync_dist_true_ddp(tmpdir):
|
||||
"""Tests to ensure that the sync_dist flag works with ddp."""
|
||||
|
||||
|
@ -553,7 +553,7 @@ def test_logging_in_callbacks_with_log_function(tmpdir):
|
|||
assert trainer.callback_metrics == expected
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_metric_are_properly_reduced(tmpdir):
|
||||
class TestingModel(BoringModel):
|
||||
def __init__(self, *args, **kwargs) -> None:
|
||||
|
@ -691,7 +691,7 @@ def test_sanity_metrics_are_reset(tmpdir):
|
|||
assert "val_loss" not in trainer.progress_bar_metrics
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_move_metrics_to_cpu(tmpdir):
|
||||
class TestModel(BoringModel):
|
||||
def on_before_backward(self, loss: torch.Tensor) -> None:
|
||||
|
|
|
@ -68,11 +68,11 @@ class ManualOptModel(BoringModel):
|
|||
[
|
||||
{},
|
||||
pytest.param(
|
||||
{"accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "native"}, marks=RunIf(min_gpus=1)
|
||||
{"accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "native"}, marks=RunIf(min_cuda_gpus=1)
|
||||
),
|
||||
pytest.param(
|
||||
{"accelerator": "gpu", "devices": 1, "precision": 16, "amp_backend": "apex", "amp_level": "O2"},
|
||||
marks=RunIf(min_gpus=1, amp_apex=True),
|
||||
marks=RunIf(min_cuda_gpus=1, amp_apex=True),
|
||||
),
|
||||
],
|
||||
)
|
||||
|
@ -198,7 +198,7 @@ def test_multiple_optimizers_manual_log(tmpdir):
|
|||
assert set(trainer.logged_metrics) == {"a_step", "a_epoch"}
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_multiple_optimizers_manual_native_amp(tmpdir):
|
||||
model = ManualOptModel()
|
||||
model.val_dataloader = None
|
||||
|
@ -282,7 +282,7 @@ class ManualOptimizationExtendedModel(BoringModel):
|
|||
assert self.called["on_train_batch_end"] == 10
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_manual_optimization_and_return_tensor(tmpdir):
|
||||
"""This test verify that in `manual_optimization` we don't add gradient when the user return loss in
|
||||
`training_step`"""
|
||||
|
@ -306,7 +306,7 @@ def test_manual_optimization_and_return_tensor(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_manual_optimization_and_accumulated_gradient(tmpdir):
|
||||
"""This test verify that in `automatic_optimization=False`, step is being called only when we shouldn't
|
||||
accumulate."""
|
||||
|
@ -394,7 +394,7 @@ def test_manual_optimization_and_accumulated_gradient(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_multiple_optimizers_step(tmpdir):
|
||||
"""Tests that `step` works with several optimizers."""
|
||||
|
||||
|
@ -783,14 +783,14 @@ def train_manual_optimization(tmpdir, strategy, model_cls=TesManualOptimizationD
|
|||
assert not torch.equal(param.cpu().data, param_copy.data)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
def test_step_with_optimizer_closure_with_different_frequencies_ddp(tmpdir):
|
||||
"""Tests that `step` works with optimizer_closure and different accumulated_gradient frequency."""
|
||||
|
||||
train_manual_optimization(tmpdir, "ddp")
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_step_with_optimizer_closure_with_different_frequencies_ddp_spawn(tmpdir):
|
||||
"""Tests that `step` works with optimizer_closure and different accumulated_gradient frequency."""
|
||||
|
||||
|
@ -853,7 +853,7 @@ class TestManualOptimizationDDPModelToggleModel(TesManualOptimizationDDPModel):
|
|||
opt_dis.zero_grad()
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
def test_step_with_optimizer_closure_with_different_frequencies_ddp_with_toggle_model(tmpdir):
|
||||
train_manual_optimization(
|
||||
tmpdir, "ddp_find_unused_parameters_false", model_cls=TestManualOptimizationDDPModelToggleModel
|
||||
|
@ -966,7 +966,7 @@ def test_lr_scheduler_step_not_called(tmpdir):
|
|||
assert lr_step.call_count == 1
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@pytest.mark.parametrize("precision", [16, 32])
|
||||
def test_multiple_optimizers_logging(precision, tmpdir):
|
||||
"""Tests that metrics are properly being logged."""
|
||||
|
|
|
@ -564,7 +564,7 @@ def test_warn_invalid_scheduler_key_in_manual_optimization(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
def test_optimizer_state_on_device(tmpdir):
|
||||
"""Test that optimizers that create state initially at instantiation still end up with the state on the GPU."""
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
|||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
@pytest.mark.parametrize(
|
||||
["nb", "expected_gpu_idxs", "expected_error"],
|
||||
[(0, [], MisconfigurationException), (-1, list(range(torch.cuda.device_count())), None), (1, [0], None)],
|
||||
|
|
|
@ -56,7 +56,7 @@ def test_get_model_ddp_cpu(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_get_model_gpu(tmpdir):
|
||||
"""Tests that `trainer.lightning_module` extracts the model correctly when using GPU."""
|
||||
|
||||
|
|
|
@ -685,7 +685,7 @@ class MultiProcessModel(BoringModel):
|
|||
assert len(torch.unique(all_batches, dim=0)) == num_samples
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_auto_add_worker_init_fn_distributed(tmpdir, monkeypatch):
|
||||
"""Test that the lightning worker_init_fn takes care of dataloaders in multi-gpu/multi-node training."""
|
||||
dataset = NumpyRandomDataset()
|
||||
|
@ -810,7 +810,7 @@ class DistribSamplerCallback(Callback):
|
|||
assert test_sampler.seed == self.expected_seed[2]
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True)
|
||||
def test_dataloader_distributed_sampler(tmpdir):
|
||||
"""Test DistributedSampler and it's arguments for DDP backend."""
|
||||
seed_everything(123)
|
||||
|
@ -835,7 +835,7 @@ class ModelWithDataLoaderDistributedSampler(BoringModel):
|
|||
return DataLoader(dataloader.dataset, batch_size=32, drop_last=False, sampler=dist_sampler, shuffle=False)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True)
|
||||
def test_dataloader_distributed_sampler_already_attached(tmpdir):
|
||||
"""Test DistributedSampler and it's arguments for DDP backend when DistSampler already included on
|
||||
dataloader."""
|
||||
|
@ -855,7 +855,7 @@ def test_dataloader_distributed_sampler_already_attached(tmpdir):
|
|||
assert trainer.state.finished, "DDP Training failed"
|
||||
|
||||
|
||||
@RunIf(min_gpus=3)
|
||||
@RunIf(min_cuda_gpus=3)
|
||||
def test_batch_size_smaller_than_num_gpus(tmpdir):
|
||||
# we need at least 3 gpus for this test
|
||||
num_gpus = 3
|
||||
|
|
|
@ -382,7 +382,7 @@ def test_combined_data_loader_validation_test(
|
|||
apply_to_collection(dataloader.loaders, DataLoader, _assert_dataset)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_gpus=2))])
|
||||
@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_cuda_gpus=2))])
|
||||
@pytest.mark.parametrize("replace_sampler_ddp", [False, True])
|
||||
def test_combined_data_loader_with_max_size_cycle_and_ddp(accelerator, replace_sampler_ddp):
|
||||
"""This test makes sure distributed sampler has been properly injected in dataloaders when using CombinedLoader
|
||||
|
|
|
@ -1040,7 +1040,7 @@ def test_on_exception_hook(tmpdir):
|
|||
assert isinstance(handle_interrupt_callback.exception, MisconfigurationException)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_gpus=1))])
|
||||
@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_cuda_gpus=1))])
|
||||
def test_gradient_clipping_by_norm(tmpdir, precision):
|
||||
"""Test gradient clipping by norm."""
|
||||
tutils.reset_seed()
|
||||
|
@ -1070,7 +1070,7 @@ def test_gradient_clipping_by_norm(tmpdir, precision):
|
|||
assert model.assertion_called
|
||||
|
||||
|
||||
@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_gpus=1))])
|
||||
@pytest.mark.parametrize("precision", [32, pytest.param(16, marks=RunIf(min_cuda_gpus=1))])
|
||||
def test_gradient_clipping_by_value(tmpdir, precision):
|
||||
"""Test gradient clipping by value."""
|
||||
tutils.reset_seed()
|
||||
|
@ -1111,7 +1111,7 @@ def test_invalid_gradient_clip_algo(tmpdir):
|
|||
Trainer(default_root_dir=tmpdir, gradient_clip_algorithm="norm2")
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_gpu_choice():
|
||||
num_gpus = torch.cuda.device_count()
|
||||
Trainer(accelerator="gpu", devices=num_gpus, auto_select_gpus=True)
|
||||
|
@ -1422,7 +1422,7 @@ def test_trainer_predict_cpu(tmpdir, datamodule, enable_progress_bar):
|
|||
predict(tmpdir, datamodule=datamodule, enable_progress_bar=enable_progress_bar)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs",
|
||||
[
|
||||
|
@ -1435,13 +1435,13 @@ def test_trainer_predict_standalone(tmpdir, kwargs):
|
|||
predict(tmpdir, accelerator="gpu", **kwargs)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_trainer_predict_1_gpu(tmpdir):
|
||||
predict(tmpdir, accelerator="gpu", devices=1)
|
||||
|
||||
|
||||
@RunIf(skip_windows=True)
|
||||
@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_gpus=2))])
|
||||
@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_cuda_gpus=2))])
|
||||
def test_trainer_predict_ddp_spawn(tmpdir, accelerator):
|
||||
predict(tmpdir, strategy="ddp_spawn", accelerator=accelerator, devices=2)
|
||||
|
||||
|
@ -1524,7 +1524,7 @@ def test_trainer_access_in_configure_optimizers(tmpdir):
|
|||
trainer.fit(model, train_data)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_setup_hook_move_to_device_correctly(tmpdir):
|
||||
"""Verify that if a user defines a layer in the setup hook function, this is moved to the correct device."""
|
||||
|
||||
|
@ -1747,7 +1747,7 @@ class CustomException(Exception):
|
|||
pass
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
def test_ddp_terminate_when_deadlock_is_detected(tmpdir):
|
||||
"""Test that DDP kills the remaining processes when only one rank is throwing an exception."""
|
||||
|
||||
|
@ -1778,7 +1778,7 @@ def test_ddp_terminate_when_deadlock_is_detected(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_multiple_trainer_constant_memory_allocated(tmpdir):
|
||||
"""This tests ensures calling the trainer several times reset the memory back to 0."""
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ def test_trainer_reset_correctly(tmpdir):
|
|||
assert actual == expected
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@pytest.mark.parametrize("scale_arg", ["power", "binsearch", True])
|
||||
def test_auto_scale_batch_size_trainer_arg(tmpdir, scale_arg):
|
||||
"""Test possible values for 'batch size auto scaling' Trainer argument."""
|
||||
|
@ -140,7 +140,7 @@ def test_auto_scale_batch_size_trainer_arg(tmpdir, scale_arg):
|
|||
assert not os.path.exists(tmpdir / "scale_batch_size_temp_model.ckpt")
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@pytest.mark.parametrize("use_hparams", [True, False])
|
||||
def test_auto_scale_batch_size_set_model_attribute(tmpdir, use_hparams):
|
||||
"""Test that new batch size gets written to the correct hyperparameter attribute."""
|
||||
|
@ -240,7 +240,7 @@ def test_error_on_dataloader_passed_to_fit(tmpdir):
|
|||
trainer.tune(model, **fit_options)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_auto_scale_batch_size_with_amp(tmpdir):
|
||||
before_batch_size = 2
|
||||
model = BatchSizeModel(batch_size=before_batch_size)
|
||||
|
|
|
@ -60,7 +60,7 @@ def test_all_gather_ddp_spawn():
|
|||
torch.multiprocessing.spawn(_test_all_gather_ddp, args=(world_size,), nprocs=world_size)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
|
||||
def test_all_gather_collection(tmpdir):
|
||||
class TestModel(BoringModel):
|
||||
|
||||
|
@ -114,7 +114,7 @@ def test_all_gather_collection(tmpdir):
|
|||
assert model.training_epoch_end_called
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
|
||||
def test_all_gather_sync_grads(tmpdir):
|
||||
class TestModel(BoringModel):
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ from tests.helpers.torchtext_utils import get_dummy_torchtext_data_iterator
|
|||
@pytest.mark.parametrize("include_lengths", [False, True])
|
||||
@pytest.mark.parametrize("device", [torch.device("cuda", 0)])
|
||||
@pytest.mark.skipif(not _TORCHTEXT_LEGACY, reason="torchtext.legacy is deprecated.")
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_batch_move_data_to_device_torchtext_include_lengths(include_lengths, device):
|
||||
data_iterator, _ = get_dummy_torchtext_data_iterator(num_samples=3, batch_size=3, include_lengths=include_lengths)
|
||||
data_iter = iter(data_iterator)
|
||||
|
|
|
@ -22,7 +22,7 @@ from tests.helpers.boring_model import BoringModel
|
|||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, standalone=True, deepspeed=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True, deepspeed=True)
|
||||
def test_deepspeed_collate_checkpoint(tmpdir):
|
||||
"""Test to ensure that with DeepSpeed Stage 3 we can collate the sharded checkpoints into a single file."""
|
||||
model = BoringModel()
|
||||
|
|
|
@ -34,7 +34,7 @@ def _test_collect_states(rank, world_size):
|
|||
assert collected_state == {1: {"something": torch.tensor([1])}, 0: {"something": torch.tensor([0])}}
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, min_torch="1.10", skip_windows=True)
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.10", skip_windows=True)
|
||||
def test_collect_states():
|
||||
"""This test ensures state are properly collected across processes.
|
||||
|
||||
|
|
|
@ -48,7 +48,7 @@ class DeviceAssertCallback(Callback):
|
|||
|
||||
@pytest.mark.parametrize("dst_dtype", [torch.float, torch.double, torch.half])
|
||||
@pytest.mark.parametrize("dst_device", [torch.device("cpu"), torch.device("cuda", 0)])
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_submodules_device_and_dtype(dst_device, dst_dtype):
|
||||
"""Test that the device and dtype property updates propagate through mixed nesting of regular nn.Modules and
|
||||
the special modules of type DeviceDtypeModuleMixin (e.g. Metric or LightningModule)."""
|
||||
|
@ -64,7 +64,7 @@ def test_submodules_device_and_dtype(dst_device, dst_dtype):
|
|||
assert model.dtype == model.module.module.dtype == dst_dtype
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_submodules_multi_gpu_dp(tmpdir):
|
||||
model = TopModule()
|
||||
trainer = Trainer(
|
||||
|
@ -78,7 +78,7 @@ def test_submodules_multi_gpu_dp(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2)
|
||||
@RunIf(min_cuda_gpus=2)
|
||||
def test_submodules_multi_gpu_ddp_spawn(tmpdir):
|
||||
model = TopModule()
|
||||
trainer = Trainer(
|
||||
|
@ -100,7 +100,7 @@ def test_submodules_multi_gpu_ddp_spawn(tmpdir):
|
|||
torch.device("cuda", 0),
|
||||
],
|
||||
)
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_gpu_cuda_device(device):
|
||||
model = TopModule()
|
||||
|
||||
|
|
|
@ -194,7 +194,7 @@ class RecommenderModel(BoringModel):
|
|||
|
||||
|
||||
@pytest.mark.flaky(reruns=3)
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_trainer_num_prefetch_batches(tmpdir):
|
||||
|
||||
model = RecommenderModel()
|
||||
|
|
|
@ -162,7 +162,7 @@ def test_empty_model_summary_shapes(max_depth):
|
|||
assert summary.param_nums == []
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
@pytest.mark.parametrize("max_depth", [-1, 1])
|
||||
@pytest.mark.parametrize("device", [torch.device("cpu"), torch.device("cuda", 0)])
|
||||
def test_linear_model_summary_shapes(device, max_depth):
|
||||
|
@ -295,7 +295,7 @@ def test_empty_model_size(max_depth):
|
|||
assert 0.0 == summary.model_size
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_cuda_gpus=1)
|
||||
def test_model_size_precision(tmpdir):
|
||||
"""Test model size for half and full precision."""
|
||||
model = PreCalculatedModel()
|
||||
|
|
Loading…
Reference in New Issue