Skip horovod tests with cuda errors (#16276)
This commit is contained in:
parent
9c3c819a94
commit
72e1f54dd9
|
@ -132,6 +132,7 @@ def test_horovod_cpu_implicit(tmpdir):
|
||||||
_run_horovod(trainer_options)
|
_run_horovod(trainer_options)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(raises=AssertionError, reason="unhandled cuda error")
|
||||||
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||||
def test_horovod_multi_gpu(tmpdir):
|
def test_horovod_multi_gpu(tmpdir):
|
||||||
"""Test Horovod with multi-GPU support."""
|
"""Test Horovod with multi-GPU support."""
|
||||||
|
@ -149,6 +150,7 @@ def test_horovod_multi_gpu(tmpdir):
|
||||||
_run_horovod(trainer_options)
|
_run_horovod(trainer_options)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(raises=AssertionError, reason="unhandled cuda error")
|
||||||
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||||
def test_horovod_multi_gpu_accumulate_grad_batches(tmpdir):
|
def test_horovod_multi_gpu_accumulate_grad_batches(tmpdir):
|
||||||
trainer_options = dict(
|
trainer_options = dict(
|
||||||
|
@ -165,10 +167,12 @@ def test_horovod_multi_gpu_accumulate_grad_batches(tmpdir):
|
||||||
_run_horovod(trainer_options)
|
_run_horovod(trainer_options)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="unhandled cuda error")
|
||||||
@RunIf(horovod=True, skip_windows=True, min_cuda_gpus=1)
|
@RunIf(horovod=True, skip_windows=True, min_cuda_gpus=1)
|
||||||
def test_horovod_raises_unsupported_accumulate_grad_batches(tmpdir):
|
def test_horovod_raises_unsupported_accumulate_grad_batches(tmpdir):
|
||||||
"""Ensure MisConfigurationException for different `accumulate_grad_batches` at different epochs for Horovod
|
"""Ensure MisConfigurationException for different `accumulate_grad_batches` at different epochs for Horovod
|
||||||
Strategy on multi-gpus."""
|
Strategy on multi-gpus."""
|
||||||
|
|
||||||
model = BoringModel()
|
model = BoringModel()
|
||||||
with pytest.deprecated_call(match=r"horovod'\)` has been deprecated in v1.9"):
|
with pytest.deprecated_call(match=r"horovod'\)` has been deprecated in v1.9"):
|
||||||
trainer = Trainer(
|
trainer = Trainer(
|
||||||
|
@ -183,6 +187,7 @@ def test_horovod_raises_unsupported_accumulate_grad_batches(tmpdir):
|
||||||
trainer.fit(model)
|
trainer.fit(model)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(raises=AssertionError, reason="unhandled cuda error")
|
||||||
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||||
def test_horovod_multi_gpu_grad_by_value(tmpdir):
|
def test_horovod_multi_gpu_grad_by_value(tmpdir):
|
||||||
"""Test Horovod with multi-GPU support."""
|
"""Test Horovod with multi-GPU support."""
|
||||||
|
@ -201,6 +206,7 @@ def test_horovod_multi_gpu_grad_by_value(tmpdir):
|
||||||
_run_horovod(trainer_options)
|
_run_horovod(trainer_options)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(raises=AssertionError, reason="unhandled cuda error")
|
||||||
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||||
def test_horovod_amp(tmpdir):
|
def test_horovod_amp(tmpdir):
|
||||||
"""Test Horovod with multi-GPU support using native amp."""
|
"""Test Horovod with multi-GPU support using native amp."""
|
||||||
|
@ -220,6 +226,7 @@ def test_horovod_amp(tmpdir):
|
||||||
_run_horovod(trainer_options)
|
_run_horovod(trainer_options)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(raises=AssertionError, reason="unhandled cuda error")
|
||||||
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
@RunIf(min_cuda_gpus=2, horovod_nccl=True, skip_windows=True)
|
||||||
def test_horovod_gather(tmpdir):
|
def test_horovod_gather(tmpdir):
|
||||||
"""Test Horovod with multi-GPU support using native amp."""
|
"""Test Horovod with multi-GPU support using native amp."""
|
||||||
|
@ -237,6 +244,7 @@ def test_horovod_gather(tmpdir):
|
||||||
_run_horovod(trainer_options)
|
_run_horovod(trainer_options)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.xfail(reason="unhandled cuda error")
|
||||||
@RunIf(min_cuda_gpus=2, skip_windows=True, horovod=True, horovod_nccl=True)
|
@RunIf(min_cuda_gpus=2, skip_windows=True, horovod=True, horovod_nccl=True)
|
||||||
def test_horovod_transfer_batch_to_gpu(tmpdir):
|
def test_horovod_transfer_batch_to_gpu(tmpdir):
|
||||||
class TestTrainingStepModel(BoringModel):
|
class TestTrainingStepModel(BoringModel):
|
||||||
|
|
Loading…
Reference in New Issue