Add fairscale & deepspeed to skipif 4/n (#6281)

* add fairscale & windows to skipif

* add deepspeed to runif

* fairscale

* deepspeed

* flake8

Co-authored-by: Jirka Borovec <jirka.borovec@seznam.cz>
This commit is contained in:
Kaushik B 2021-03-03 01:15:13 +05:30 committed by GitHub
parent d1a03153f3
commit 4157b35062
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 50 additions and 59 deletions

View File

@ -34,7 +34,6 @@ from pytorch_lightning.plugins import (
SingleDevicePlugin,
)
from pytorch_lightning.plugins.environments import ClusterEnvironment, SLURMEnvironment, TorchElasticEnvironment
from pytorch_lightning.utilities import _DEEPSPEED_AVAILABLE
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from tests.helpers.boring_model import BoringModel
from tests.helpers.runif import RunIf
@ -425,11 +424,7 @@ def test_plugin_accelerator_choice(accelerator, plugin):
('ddp_spawn', DDPSpawnPlugin),
('ddp_sharded', DDPShardedPlugin),
('ddp_sharded_spawn', DDPSpawnShardedPlugin),
pytest.param(
'deepspeed',
DeepSpeedPlugin,
marks=pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
),
pytest.param('deepspeed', DeepSpeedPlugin, marks=RunIf(deepspeed=True)),
])
@mock.patch('torch.cuda.is_available', return_value=True)
@mock.patch('torch.cuda.device_count', return_value=2)

View File

@ -22,6 +22,9 @@ from pkg_resources import get_distribution
from pytorch_lightning.utilities import (
_APEX_AVAILABLE,
_DEEPSPEED_AVAILABLE,
_FAIRSCALE_AVAILABLE,
_FAIRSCALE_PIPE_AVAILABLE,
_HOROVOD_AVAILABLE,
_NATIVE_AMP_AVAILABLE,
_RPC_AVAILABLE,
@ -63,6 +66,9 @@ class RunIf:
skip_windows: bool = False,
special: bool = False,
rpc: bool = False,
fairscale: bool = False,
fairscale_pipe: bool = False,
deepspeed: bool = False,
**kwargs
):
"""
@ -80,6 +86,8 @@ class RunIf:
skip_windows: skip test for Windows platform (typically fo some limited torch functionality)
special: running in special mode, outside pytest suit
rpc: requires Remote Procedure Call (RPC)
fairscale: if `fairscale` module is required to run the test
deepspeed: if `deepspeed` module is required to run the test
kwargs: native pytest.mark.skipif keyword arguments
"""
conditions = []
@ -137,6 +145,18 @@ class RunIf:
conditions.append(not _RPC_AVAILABLE)
reasons.append("RPC")
if fairscale:
conditions.append(not _FAIRSCALE_AVAILABLE)
reasons.append("Fairscale")
if fairscale_pipe:
conditions.append(not _FAIRSCALE_PIPE_AVAILABLE)
reasons.append("Fairscale Pipe")
if deepspeed:
conditions.append(not _DEEPSPEED_AVAILABLE)
reasons.append("Deepspeed")
reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
return pytest.mark.skipif(
*args,

View File

@ -9,7 +9,6 @@ from torch.optim import Optimizer
from pytorch_lightning import Trainer
from pytorch_lightning.plugins import DeepSpeedPlugin, DeepSpeedPrecisionPlugin
from pytorch_lightning.plugins.training_type.deepspeed import LightningDeepSpeedModule
from pytorch_lightning.utilities import _DEEPSPEED_AVAILABLE
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from tests.helpers.boring_model import BoringModel
from tests.helpers.runif import RunIf
@ -81,7 +80,7 @@ def deepspeed_zero_config(deepspeed_config):
return {**deepspeed_config, 'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2}}
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(deepspeed=True)
@pytest.mark.parametrize("input", ("deepspeed", DeepSpeedPlugin))
def test_deepspeed_plugin_string(tmpdir, input):
"""
@ -98,7 +97,7 @@ def test_deepspeed_plugin_string(tmpdir, input):
assert trainer.accelerator.training_type_plugin.parallel_devices == [torch.device('cpu')]
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(deepspeed=True)
def test_deepspeed_plugin_env(tmpdir, monkeypatch, deepspeed_config):
"""
Test to ensure that the plugin can be passed via a string with an environment variable.
@ -120,14 +119,13 @@ def test_deepspeed_plugin_env(tmpdir, monkeypatch, deepspeed_config):
assert plugin.config == deepspeed_config
@RunIf(amp_native=True, deepspeed=True)
@pytest.mark.parametrize(
"amp_backend", [
pytest.param("native", marks=RunIf(amp_native=True)),
pytest.param("apex", marks=RunIf(amp_apex=True)),
]
)
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(amp_native=True)
def test_deepspeed_precision_choice(amp_backend, tmpdir):
"""
Test to ensure precision plugin is also correctly chosen.
@ -143,7 +141,7 @@ def test_deepspeed_precision_choice(amp_backend, tmpdir):
assert trainer.accelerator.precision_plugin.precision == 16
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(deepspeed=True)
def test_deepspeed_with_invalid_config_path(tmpdir):
"""
Test to ensure if we pass an invalid config path we throw an exception.
@ -155,7 +153,7 @@ def test_deepspeed_with_invalid_config_path(tmpdir):
DeepSpeedPlugin(config='invalid_path.json')
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(deepspeed=True)
def test_deepspeed_with_env_path(tmpdir, monkeypatch, deepspeed_config):
"""
Test to ensure if we pass an env variable, we load the config from the path.
@ -168,7 +166,7 @@ def test_deepspeed_with_env_path(tmpdir, monkeypatch, deepspeed_config):
assert plugin.config == deepspeed_config
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(deepspeed=True)
def test_deepspeed_defaults(tmpdir):
"""
Ensure that defaults are correctly set as a config for DeepSpeed if no arguments are passed.
@ -178,7 +176,7 @@ def test_deepspeed_defaults(tmpdir):
assert isinstance(plugin.config["zero_optimization"], dict)
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(deepspeed=True)
def test_invalid_deepspeed_defaults_no_precision(tmpdir):
"""
Test to ensure that using defaults, if precision is not set to 16, we throw an exception.
@ -195,8 +193,7 @@ def test_invalid_deepspeed_defaults_no_precision(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1)
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(min_gpus=1, deepspeed=True)
def test_warn_deepspeed_override_backward(tmpdir):
"""
Test to ensure that if the backward hook in the LightningModule is overridden, we throw a warning.
@ -213,8 +210,7 @@ def test_warn_deepspeed_override_backward(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1)
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(min_gpus=1, deepspeed=True)
def test_deepspeed_run_configure_optimizers(tmpdir):
"""
Test end to end that deepspeed works with defaults (without ZeRO as that requires compilation),
@ -246,8 +242,7 @@ def test_deepspeed_run_configure_optimizers(tmpdir):
_assert_save_model_is_equal(model, tmpdir, trainer)
@RunIf(min_gpus=1)
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(min_gpus=1, deepspeed=True)
def test_deepspeed_config(tmpdir, deepspeed_zero_config):
"""
Test to ensure deepspeed works correctly when passed a DeepSpeed config object including optimizers/schedulers
@ -281,8 +276,7 @@ def test_deepspeed_config(tmpdir, deepspeed_zero_config):
_assert_save_model_is_equal(model, tmpdir, trainer)
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(min_gpus=1)
@RunIf(min_gpus=1, deepspeed=True)
def test_deepspeed_custom_precision_params(tmpdir):
"""
Ensure if we modify the FP16 parameters via the DeepSpeedPlugin, the deepspeed config contains these changes.
@ -312,8 +306,7 @@ def test_deepspeed_custom_precision_params(tmpdir):
trainer.fit(model)
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(min_gpus=1)
@RunIf(min_gpus=1, deepspeed=True)
def test_deepspeed_assert_config_zero_offload_disabled(tmpdir, deepspeed_zero_config):
"""
Ensure if we use a config and turn off cpu_offload, that this is set to False within the config.
@ -333,8 +326,7 @@ def test_deepspeed_assert_config_zero_offload_disabled(tmpdir, deepspeed_zero_co
trainer.fit(model)
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
@RunIf(min_gpus=2, special=True)
@RunIf(min_gpus=2, special=True, deepspeed=True)
def test_deepspeed_multigpu(tmpdir, deepspeed_config):
"""
Test to ensure that DeepSpeed with multiple GPUs works, without ZeRO Optimization as this requires compilation.

View File

@ -21,15 +21,13 @@ from torch import nn
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.plugins.training_type.rpc_sequential import RPCSequentialPlugin
from pytorch_lightning.utilities import _FAIRSCALE_PIPE_AVAILABLE
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from tests.helpers.boring_model import RandomDataset
from tests.helpers.runif import RunIf
@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
@RunIf(min_gpus=2, special=True)
@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
def test_rpc_sequential_plugin_manual(tmpdir, args=None):
model = SequentialModelRPCManual()
trainer = Trainer(
@ -52,9 +50,8 @@ def test_rpc_sequential_plugin_manual(tmpdir, args=None):
trainer.accelerator.training_type_plugin.exit_rpc_process()
@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
@RunIf(min_gpus=2, special=True)
@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
def test_rpc_sequential_plugin_manual_amp(tmpdir, args=None):
model = SequentialModelRPCManual()
trainer = Trainer(
@ -75,9 +72,8 @@ def test_rpc_sequential_plugin_manual_amp(tmpdir, args=None):
trainer.fit(model)
@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
@RunIf(min_gpus=2, special=True)
@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
def test_rpc_sequential_plugin_automatic(tmpdir, args=None):
model = SequentialModelRPCAutomatic()
trainer = Trainer(
@ -100,9 +96,8 @@ def test_rpc_sequential_plugin_automatic(tmpdir, args=None):
trainer.accelerator.training_type_plugin.exit_rpc_process()
@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
@RunIf(min_gpus=2, special=True)
@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
def test_rpc_sequential_plugin_with_wrong_balance(tmpdir, args=None):
model = SequentialModelRPCAutomatic()
trainer = Trainer(

View File

@ -6,14 +6,13 @@ import torch
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import Callback
from pytorch_lightning.plugins import DDPShardedPlugin, DDPSpawnShardedPlugin
from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from tests.helpers.boring_model import BoringModel
from tests.helpers.runif import RunIf
@RunIf(fairscale=True)
@pytest.mark.parametrize(["accelerator"], [("ddp_sharded", ), ("ddp_sharded_spawn", )])
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
def test_sharded_ddp_choice(tmpdir, accelerator):
"""
Test to ensure that plugin is correctly chosen
@ -39,8 +38,7 @@ def test_sharded_ddp_choice(tmpdir, accelerator):
trainer.fit(model)
@RunIf(amp_apex=True)
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
@RunIf(amp_apex=True, fairscale=True)
def test_invalid_apex_sharded(tmpdir):
"""
Test to ensure that we raise an error when we try to use apex and sharded
@ -58,9 +56,8 @@ def test_invalid_apex_sharded(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=2, amp_native=True)
@RunIf(min_gpus=2, amp_native=True, fairscale=True)
@pytest.mark.parametrize(["accelerator"], [("ddp_sharded", ), ("ddp_sharded_spawn", )])
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
def test_ddp_choice_sharded_amp(tmpdir, accelerator):
"""
Test to ensure that plugin native amp plugin is correctly chosen when using sharded
@ -88,8 +85,7 @@ def test_ddp_choice_sharded_amp(tmpdir, accelerator):
trainer.fit(model)
@RunIf(skip_windows=True)
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
@RunIf(skip_windows=True, fairscale=True)
def test_ddp_sharded_plugin_checkpoint_cpu(tmpdir):
"""
Test to ensure that checkpoint is saved correctly
@ -112,8 +108,7 @@ def test_ddp_sharded_plugin_checkpoint_cpu(tmpdir):
assert torch.equal(ddp_param.to("cpu"), shard_param)
@RunIf(min_gpus=2, skip_windows=True)
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
def test_ddp_sharded_plugin_checkpoint_multi_gpu(tmpdir):
"""
Test to ensure that checkpoint is saved correctly when using multiple GPUs
@ -136,8 +131,7 @@ def test_ddp_sharded_plugin_checkpoint_multi_gpu(tmpdir):
assert torch.equal(ddp_param.to("cpu"), shard_param)
@RunIf(min_gpus=2, skip_windows=True)
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
def test_ddp_sharded_plugin_finetune(tmpdir):
"""
Test to ensure that we can save and restart training (simulate fine-tuning)
@ -158,8 +152,7 @@ def test_ddp_sharded_plugin_finetune(tmpdir):
trainer.fit(saved_model)
@RunIf(skip_windows=True)
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
@RunIf(skip_windows=True, fairscale=True)
def test_ddp_sharded_plugin_resume_from_checkpoint(tmpdir):
"""
Test to ensure that resuming from checkpoint works
@ -188,10 +181,9 @@ def test_ddp_sharded_plugin_resume_from_checkpoint(tmpdir):
trainer.fit(model)
@pytest.mark.skip(reason="Not a critical test, skip till drone CI performance improves.")
@pytest.mark.skip(reason="Not a critical test, skip till drone CI performance improves.") # todo
@pytest.mark.skip(reason="Currently unsupported restarting training on different number of devices.")
@RunIf(min_gpus=2, skip_windows=True)
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
def test_ddp_sharded_plugin_resume_from_checkpoint_downsize_gpus(tmpdir):
"""
Test to ensure that resuming from checkpoint works when downsizing number of GPUS
@ -220,8 +212,7 @@ def test_ddp_sharded_plugin_resume_from_checkpoint_downsize_gpus(tmpdir):
trainer.fit(model)
@RunIf(min_gpus=1, skip_windows=True)
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
@RunIf(min_gpus=1, skip_windows=True, fairscale=True)
def test_ddp_sharded_plugin_resume_from_checkpoint_gpu_to_cpu(tmpdir):
"""
Test to ensure that resuming from checkpoint works when going from GPUs- > CPU
@ -250,8 +241,7 @@ def test_ddp_sharded_plugin_resume_from_checkpoint_gpu_to_cpu(tmpdir):
trainer.fit(model)
@RunIf(skip_windows=True, special=True)
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
@RunIf(skip_windows=True, special=True, fairscale=True)
def test_ddp_sharded_plugin_test(tmpdir):
"""
Test to ensure we can use test without fit
@ -266,8 +256,7 @@ def test_ddp_sharded_plugin_test(tmpdir):
trainer.test(model)
@RunIf(min_gpus=2, skip_windows=True)
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
def test_ddp_sharded_plugin_test_multigpu(tmpdir):
"""
Test to ensure we can use test without fit