diff --git a/tests/callbacks/test_quantization.py b/tests/callbacks/test_quantization.py index 053a153ad9..dd39ddb35d 100644 --- a/tests/callbacks/test_quantization.py +++ b/tests/callbacks/test_quantization.py @@ -80,7 +80,7 @@ def test_quantization(tmpdir, observe: str, fuse: bool, convert: bool): # todo: make it work also with strict loading qmodel2 = RegressionModel.load_from_checkpoint(model_path, strict=False) quant2_score = torch.mean(torch.tensor([mape(qmodel2(x), y) for x, y in dm.test_dataloader()])) - assert torch.allclose(org_score, quant2_score, atol=0.45) + assert torch.allclose(org_score, quant2_score, atol=0.47) # test without and with QAT callback trainer_args.update(max_epochs=curr_epoch + 1) diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py index fb404b54a1..5a2464f6fd 100644 --- a/tests/helpers/runif.py +++ b/tests/helpers/runif.py @@ -31,6 +31,7 @@ from pytorch_lightning.utilities import ( _IPU_AVAILABLE, _OMEGACONF_AVAILABLE, _RICH_AVAILABLE, + _TORCH_GREATER_EQUAL_1_10, _TORCH_QUANTIZE_AVAILABLE, _TPU_AVAILABLE, ) @@ -67,6 +68,7 @@ class RunIf: min_python: Optional[str] = None, quantization: bool = False, amp_apex: bool = False, + bf16_cuda: bool = False, tpu: bool = False, ipu: bool = False, hpu: bool = False, @@ -93,6 +95,7 @@ class RunIf: min_python: Require that Python is greater or equal than this version. quantization: Require that `torch.quantization` is available. amp_apex: Require that NVIDIA/apex is installed. + bf16_cuda: Require that CUDA device supports bf16. tpu: Require that TPU is available. ipu: Require that IPU is available. hpu: Require that HPU is available. @@ -141,6 +144,20 @@ class RunIf: conditions.append(not _APEX_AVAILABLE) reasons.append("NVIDIA Apex") + if bf16_cuda: + try: + cond = not (torch.cuda.is_available() and _TORCH_GREATER_EQUAL_1_10 and torch.cuda.is_bf16_supported()) + except (AssertionError, RuntimeError) as e: + # AssertionError: Torch not compiled with CUDA enabled + # RuntimeError: Found no NVIDIA driver on your system. + is_unrelated = "Found no NVIDIA driver" not in str(e) or "Torch not compiled with CUDA" not in str(e) + if is_unrelated: + raise e + cond = True + + conditions.append(cond) + reasons.append("CUDA device bf16") + if skip_windows: conditions.append(sys.platform == "win32") reasons.append("unimplemented on Windows") diff --git a/tests/lite/test_parity.py b/tests/lite/test_parity.py index d5009a6da5..d32cdc3d97 100644 --- a/tests/lite/test_parity.py +++ b/tests/lite/test_parity.py @@ -111,7 +111,7 @@ def precision_context(precision, accelerator) -> Generator[None, None, None]: pytest.param(32, None, 1, "cpu"), pytest.param(32, None, 1, "gpu", marks=RunIf(min_gpus=1)), pytest.param(16, None, 1, "gpu", marks=RunIf(min_gpus=1)), - pytest.param("bf16", None, 1, "gpu", marks=RunIf(min_gpus=1, min_torch="1.10")), + pytest.param("bf16", None, 1, "gpu", marks=RunIf(min_gpus=1, min_torch="1.10", bf16_cuda=True)), ], ) def test_boring_lite_model_single_device(precision, strategy, devices, accelerator, tmpdir): diff --git a/tests/lite/test_wrappers.py b/tests/lite/test_wrappers.py index f70a6a9e38..609a2c37e5 100644 --- a/tests/lite/test_wrappers.py +++ b/tests/lite/test_wrappers.py @@ -45,9 +45,9 @@ def test_lite_module_wraps(): (16, torch.float32, torch.float16), (16, torch.float64, torch.float16), (16, torch.long, torch.long), - pytest.param("bf16", torch.float32, torch.bfloat16, marks=RunIf(min_torch="1.10")), - pytest.param("bf16", torch.float64, torch.bfloat16, marks=RunIf(min_torch="1.10")), - pytest.param("bf16", torch.bool, torch.bool, marks=RunIf(min_torch="1.10")), + pytest.param("bf16", torch.float32, torch.bfloat16, marks=RunIf(min_torch="1.10", bf16_cuda=True)), + pytest.param("bf16", torch.float64, torch.bfloat16, marks=RunIf(min_torch="1.10", bf16_cuda=True)), + pytest.param("bf16", torch.bool, torch.bool, marks=RunIf(min_torch="1.10", bf16_cuda=True)), ], ) def test_lite_module_forward_conversion(precision, input_type, expected_type): diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py index f8d973d5cf..152d01aca9 100644 --- a/tests/models/test_horovod.py +++ b/tests/models/test_horovod.py @@ -262,7 +262,7 @@ def test_horovod_gather(tmpdir): _run_horovod(trainer_options) -@RunIf(min_gpus=1, horovod_nccl=True, skip_windows=True) +@RunIf(min_gpus=1, skip_windows=True, horovod=True, horovod_nccl=True) def test_horovod_transfer_batch_to_gpu(tmpdir): class TestTrainingStepModel(BoringModel): def training_step(self, batch, *args, **kwargs): diff --git a/tests/trainer/logging_/test_train_loop_logging.py b/tests/trainer/logging_/test_train_loop_logging.py index 5a8fabece7..ecf701f7ed 100644 --- a/tests/trainer/logging_/test_train_loop_logging.py +++ b/tests/trainer/logging_/test_train_loop_logging.py @@ -395,7 +395,7 @@ class LoggingSyncDistModel(BoringModel): return super().validation_step(batch, batch_idx) -@pytest.mark.parametrize("devices", [1, pytest.param(2, marks=RunIf(skip_windows=True))]) +@pytest.mark.parametrize("devices", [1, pytest.param(2, marks=RunIf(min_gpus=2, skip_windows=True))]) def test_logging_sync_dist_true(tmpdir, devices): """Tests to ensure that the sync_dist flag works (should just return the original value)""" fake_result = 1 diff --git a/tests/trainer/test_supporters.py b/tests/trainer/test_supporters.py index 7088432e3b..c109c842ea 100644 --- a/tests/trainer/test_supporters.py +++ b/tests/trainer/test_supporters.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import math import os from typing import Sequence from unittest import mock @@ -36,6 +37,7 @@ from pytorch_lightning.utilities.auto_restart import CaptureMapDataset, FastForw from pytorch_lightning.utilities.data import get_len from pytorch_lightning.utilities.exceptions import MisconfigurationException from tests.helpers.boring_model import BoringModel, RandomDataset +from tests.helpers.runif import RunIf def test_tensor_running_accum_reset(): @@ -381,11 +383,12 @@ def test_combined_data_loader_validation_test( apply_to_collection(dataloader.loaders, DataLoader, _assert_dataset) +@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_gpus=2))]) @pytest.mark.parametrize("replace_sampler_ddp", [False, True]) -def test_combined_data_loader_with_max_size_cycle_and_ddp(replace_sampler_ddp): +def test_combined_data_loader_with_max_size_cycle_and_ddp(accelerator, replace_sampler_ddp): """This test makes sure distributed sampler has been properly injected in dataloaders when using CombinedLoader with ddp and `max_size_cycle` mode.""" - trainer = Trainer(strategy="ddp", accelerator="auto", devices=2, replace_sampler_ddp=replace_sampler_ddp) + trainer = Trainer(strategy="ddp", accelerator=accelerator, devices=2, replace_sampler_ddp=replace_sampler_ddp) dataloader = CombinedLoader( {"a": DataLoader(RandomDataset(32, 8), batch_size=1), "b": DataLoader(RandomDataset(32, 8), batch_size=1)}, @@ -452,19 +455,23 @@ def test_combined_dataloader_for_training_with_ddp( } if use_combined_loader: dataloader = CombinedLoader(dataloader, mode=mode) - expected_length_before_ddp = min(n1, n2) if is_min_size_mode else max(n1, n2) - expected_length_after_ddp = expected_length_before_ddp // 2 if replace_sampler_ddp else expected_length_before_ddp model = BoringModel() trainer = Trainer( strategy="ddp", accelerator="auto", - devices=2, + devices="auto", replace_sampler_ddp=replace_sampler_ddp, multiple_trainloader_mode="max_size_cycle" if use_combined_loader else mode, ) trainer._data_connector.attach_data( model=model, train_dataloaders=dataloader, val_dataloaders=None, datamodule=None ) + expected_length_before_ddp = min(n1, n2) if is_min_size_mode else max(n1, n2) + expected_length_after_ddp = ( + math.ceil(expected_length_before_ddp / trainer.num_devices) + if replace_sampler_ddp + else expected_length_before_ddp + ) trainer.reset_train_dataloader(model=model) assert trainer.train_dataloader is not None assert isinstance(trainer.train_dataloader, CombinedLoader) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index de495dd4c1..548b9df8b6 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -987,7 +987,7 @@ def test_gradient_clipping_by_norm(tmpdir, precision): # test that gradient is clipped correctly parameters = self.parameters() grad_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), 2) for p in parameters]), 2) - torch.testing.assert_allclose(grad_norm, torch.tensor(0.05)) + torch.testing.assert_allclose(grad_norm, torch.tensor(0.05, device=self.device)) self.assertion_called = True model = TestModel() @@ -1018,7 +1018,7 @@ def test_gradient_clipping_by_value(tmpdir, precision): parameters = self.parameters() grad_max_list = [torch.max(p.grad.detach().abs()) for p in parameters] grad_max = torch.max(torch.stack(grad_max_list)) - torch.testing.assert_allclose(grad_max.abs(), torch.tensor(1e-10)) + torch.testing.assert_allclose(grad_max.abs(), torch.tensor(1e-10, device=self.device)) self.assertion_called = True model = TestModel() @@ -1406,8 +1406,9 @@ def test_trainer_predict_1_gpu(tmpdir): @RunIf(skip_windows=True) -def test_trainer_predict_ddp_spawn(tmpdir): - predict(tmpdir, strategy="ddp_spawn", accelerator="auto", devices=2) +@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_gpus=2))]) +def test_trainer_predict_ddp_spawn(tmpdir, accelerator): + predict(tmpdir, strategy="ddp_spawn", accelerator=accelerator, devices=2) @pytest.mark.parametrize("dataset_cls", [RandomDataset, RandomIterableDatasetWithLen, RandomIterableDataset])