Fix tests failing on a single GPU (#11753)

Co-authored-by: akihiro@grid.ai <akihiro@grid.ai>
Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com>
This commit is contained in:
Akihiro Nitta 2022-04-10 13:23:16 +09:00 committed by GitHub
parent c233731b7c
commit 568710f2f2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 41 additions and 16 deletions

View File

@ -80,7 +80,7 @@ def test_quantization(tmpdir, observe: str, fuse: bool, convert: bool):
# todo: make it work also with strict loading
qmodel2 = RegressionModel.load_from_checkpoint(model_path, strict=False)
quant2_score = torch.mean(torch.tensor([mape(qmodel2(x), y) for x, y in dm.test_dataloader()]))
assert torch.allclose(org_score, quant2_score, atol=0.45)
assert torch.allclose(org_score, quant2_score, atol=0.47)
# test without and with QAT callback
trainer_args.update(max_epochs=curr_epoch + 1)

View File

@ -31,6 +31,7 @@ from pytorch_lightning.utilities import (
_IPU_AVAILABLE,
_OMEGACONF_AVAILABLE,
_RICH_AVAILABLE,
_TORCH_GREATER_EQUAL_1_10,
_TORCH_QUANTIZE_AVAILABLE,
_TPU_AVAILABLE,
)
@ -67,6 +68,7 @@ class RunIf:
min_python: Optional[str] = None,
quantization: bool = False,
amp_apex: bool = False,
bf16_cuda: bool = False,
tpu: bool = False,
ipu: bool = False,
hpu: bool = False,
@ -93,6 +95,7 @@ class RunIf:
min_python: Require that Python is greater or equal than this version.
quantization: Require that `torch.quantization` is available.
amp_apex: Require that NVIDIA/apex is installed.
bf16_cuda: Require that CUDA device supports bf16.
tpu: Require that TPU is available.
ipu: Require that IPU is available.
hpu: Require that HPU is available.
@ -141,6 +144,20 @@ class RunIf:
conditions.append(not _APEX_AVAILABLE)
reasons.append("NVIDIA Apex")
if bf16_cuda:
try:
cond = not (torch.cuda.is_available() and _TORCH_GREATER_EQUAL_1_10 and torch.cuda.is_bf16_supported())
except (AssertionError, RuntimeError) as e:
# AssertionError: Torch not compiled with CUDA enabled
# RuntimeError: Found no NVIDIA driver on your system.
is_unrelated = "Found no NVIDIA driver" not in str(e) or "Torch not compiled with CUDA" not in str(e)
if is_unrelated:
raise e
cond = True
conditions.append(cond)
reasons.append("CUDA device bf16")
if skip_windows:
conditions.append(sys.platform == "win32")
reasons.append("unimplemented on Windows")

View File

@ -111,7 +111,7 @@ def precision_context(precision, accelerator) -> Generator[None, None, None]:
pytest.param(32, None, 1, "cpu"),
pytest.param(32, None, 1, "gpu", marks=RunIf(min_gpus=1)),
pytest.param(16, None, 1, "gpu", marks=RunIf(min_gpus=1)),
pytest.param("bf16", None, 1, "gpu", marks=RunIf(min_gpus=1, min_torch="1.10")),
pytest.param("bf16", None, 1, "gpu", marks=RunIf(min_gpus=1, min_torch="1.10", bf16_cuda=True)),
],
)
def test_boring_lite_model_single_device(precision, strategy, devices, accelerator, tmpdir):

View File

@ -45,9 +45,9 @@ def test_lite_module_wraps():
(16, torch.float32, torch.float16),
(16, torch.float64, torch.float16),
(16, torch.long, torch.long),
pytest.param("bf16", torch.float32, torch.bfloat16, marks=RunIf(min_torch="1.10")),
pytest.param("bf16", torch.float64, torch.bfloat16, marks=RunIf(min_torch="1.10")),
pytest.param("bf16", torch.bool, torch.bool, marks=RunIf(min_torch="1.10")),
pytest.param("bf16", torch.float32, torch.bfloat16, marks=RunIf(min_torch="1.10", bf16_cuda=True)),
pytest.param("bf16", torch.float64, torch.bfloat16, marks=RunIf(min_torch="1.10", bf16_cuda=True)),
pytest.param("bf16", torch.bool, torch.bool, marks=RunIf(min_torch="1.10", bf16_cuda=True)),
],
)
def test_lite_module_forward_conversion(precision, input_type, expected_type):

View File

@ -262,7 +262,7 @@ def test_horovod_gather(tmpdir):
_run_horovod(trainer_options)
@RunIf(min_gpus=1, horovod_nccl=True, skip_windows=True)
@RunIf(min_gpus=1, skip_windows=True, horovod=True, horovod_nccl=True)
def test_horovod_transfer_batch_to_gpu(tmpdir):
class TestTrainingStepModel(BoringModel):
def training_step(self, batch, *args, **kwargs):

View File

@ -395,7 +395,7 @@ class LoggingSyncDistModel(BoringModel):
return super().validation_step(batch, batch_idx)
@pytest.mark.parametrize("devices", [1, pytest.param(2, marks=RunIf(skip_windows=True))])
@pytest.mark.parametrize("devices", [1, pytest.param(2, marks=RunIf(min_gpus=2, skip_windows=True))])
def test_logging_sync_dist_true(tmpdir, devices):
"""Tests to ensure that the sync_dist flag works (should just return the original value)"""
fake_result = 1

View File

@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import os
from typing import Sequence
from unittest import mock
@ -36,6 +37,7 @@ from pytorch_lightning.utilities.auto_restart import CaptureMapDataset, FastForw
from pytorch_lightning.utilities.data import get_len
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from tests.helpers.boring_model import BoringModel, RandomDataset
from tests.helpers.runif import RunIf
def test_tensor_running_accum_reset():
@ -381,11 +383,12 @@ def test_combined_data_loader_validation_test(
apply_to_collection(dataloader.loaders, DataLoader, _assert_dataset)
@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_gpus=2))])
@pytest.mark.parametrize("replace_sampler_ddp", [False, True])
def test_combined_data_loader_with_max_size_cycle_and_ddp(replace_sampler_ddp):
def test_combined_data_loader_with_max_size_cycle_and_ddp(accelerator, replace_sampler_ddp):
"""This test makes sure distributed sampler has been properly injected in dataloaders when using CombinedLoader
with ddp and `max_size_cycle` mode."""
trainer = Trainer(strategy="ddp", accelerator="auto", devices=2, replace_sampler_ddp=replace_sampler_ddp)
trainer = Trainer(strategy="ddp", accelerator=accelerator, devices=2, replace_sampler_ddp=replace_sampler_ddp)
dataloader = CombinedLoader(
{"a": DataLoader(RandomDataset(32, 8), batch_size=1), "b": DataLoader(RandomDataset(32, 8), batch_size=1)},
@ -452,19 +455,23 @@ def test_combined_dataloader_for_training_with_ddp(
}
if use_combined_loader:
dataloader = CombinedLoader(dataloader, mode=mode)
expected_length_before_ddp = min(n1, n2) if is_min_size_mode else max(n1, n2)
expected_length_after_ddp = expected_length_before_ddp // 2 if replace_sampler_ddp else expected_length_before_ddp
model = BoringModel()
trainer = Trainer(
strategy="ddp",
accelerator="auto",
devices=2,
devices="auto",
replace_sampler_ddp=replace_sampler_ddp,
multiple_trainloader_mode="max_size_cycle" if use_combined_loader else mode,
)
trainer._data_connector.attach_data(
model=model, train_dataloaders=dataloader, val_dataloaders=None, datamodule=None
)
expected_length_before_ddp = min(n1, n2) if is_min_size_mode else max(n1, n2)
expected_length_after_ddp = (
math.ceil(expected_length_before_ddp / trainer.num_devices)
if replace_sampler_ddp
else expected_length_before_ddp
)
trainer.reset_train_dataloader(model=model)
assert trainer.train_dataloader is not None
assert isinstance(trainer.train_dataloader, CombinedLoader)

View File

@ -987,7 +987,7 @@ def test_gradient_clipping_by_norm(tmpdir, precision):
# test that gradient is clipped correctly
parameters = self.parameters()
grad_norm = torch.norm(torch.stack([torch.norm(p.grad.detach(), 2) for p in parameters]), 2)
torch.testing.assert_allclose(grad_norm, torch.tensor(0.05))
torch.testing.assert_allclose(grad_norm, torch.tensor(0.05, device=self.device))
self.assertion_called = True
model = TestModel()
@ -1018,7 +1018,7 @@ def test_gradient_clipping_by_value(tmpdir, precision):
parameters = self.parameters()
grad_max_list = [torch.max(p.grad.detach().abs()) for p in parameters]
grad_max = torch.max(torch.stack(grad_max_list))
torch.testing.assert_allclose(grad_max.abs(), torch.tensor(1e-10))
torch.testing.assert_allclose(grad_max.abs(), torch.tensor(1e-10, device=self.device))
self.assertion_called = True
model = TestModel()
@ -1406,8 +1406,9 @@ def test_trainer_predict_1_gpu(tmpdir):
@RunIf(skip_windows=True)
def test_trainer_predict_ddp_spawn(tmpdir):
predict(tmpdir, strategy="ddp_spawn", accelerator="auto", devices=2)
@pytest.mark.parametrize("accelerator", ["cpu", pytest.param("gpu", marks=RunIf(min_gpus=2))])
def test_trainer_predict_ddp_spawn(tmpdir, accelerator):
predict(tmpdir, strategy="ddp_spawn", accelerator=accelerator, devices=2)
@pytest.mark.parametrize("dataset_cls", [RandomDataset, RandomIterableDatasetWithLen, RandomIterableDataset])