Add fairscale & deepspeed to skipif 4/n (#6281)
* add fairscale & windows to skipif * add deepspeed to runif * fairscale * deepspeed * flake8 Co-authored-by: Jirka Borovec <jirka.borovec@seznam.cz>
This commit is contained in:
parent
d1a03153f3
commit
4157b35062
|
@ -34,7 +34,6 @@ from pytorch_lightning.plugins import (
|
|||
SingleDevicePlugin,
|
||||
)
|
||||
from pytorch_lightning.plugins.environments import ClusterEnvironment, SLURMEnvironment, TorchElasticEnvironment
|
||||
from pytorch_lightning.utilities import _DEEPSPEED_AVAILABLE
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from tests.helpers.boring_model import BoringModel
|
||||
from tests.helpers.runif import RunIf
|
||||
|
@ -425,11 +424,7 @@ def test_plugin_accelerator_choice(accelerator, plugin):
|
|||
('ddp_spawn', DDPSpawnPlugin),
|
||||
('ddp_sharded', DDPShardedPlugin),
|
||||
('ddp_sharded_spawn', DDPSpawnShardedPlugin),
|
||||
pytest.param(
|
||||
'deepspeed',
|
||||
DeepSpeedPlugin,
|
||||
marks=pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
),
|
||||
pytest.param('deepspeed', DeepSpeedPlugin, marks=RunIf(deepspeed=True)),
|
||||
])
|
||||
@mock.patch('torch.cuda.is_available', return_value=True)
|
||||
@mock.patch('torch.cuda.device_count', return_value=2)
|
||||
|
|
|
@ -22,6 +22,9 @@ from pkg_resources import get_distribution
|
|||
|
||||
from pytorch_lightning.utilities import (
|
||||
_APEX_AVAILABLE,
|
||||
_DEEPSPEED_AVAILABLE,
|
||||
_FAIRSCALE_AVAILABLE,
|
||||
_FAIRSCALE_PIPE_AVAILABLE,
|
||||
_HOROVOD_AVAILABLE,
|
||||
_NATIVE_AMP_AVAILABLE,
|
||||
_RPC_AVAILABLE,
|
||||
|
@ -63,6 +66,9 @@ class RunIf:
|
|||
skip_windows: bool = False,
|
||||
special: bool = False,
|
||||
rpc: bool = False,
|
||||
fairscale: bool = False,
|
||||
fairscale_pipe: bool = False,
|
||||
deepspeed: bool = False,
|
||||
**kwargs
|
||||
):
|
||||
"""
|
||||
|
@ -80,6 +86,8 @@ class RunIf:
|
|||
skip_windows: skip test for Windows platform (typically fo some limited torch functionality)
|
||||
special: running in special mode, outside pytest suit
|
||||
rpc: requires Remote Procedure Call (RPC)
|
||||
fairscale: if `fairscale` module is required to run the test
|
||||
deepspeed: if `deepspeed` module is required to run the test
|
||||
kwargs: native pytest.mark.skipif keyword arguments
|
||||
"""
|
||||
conditions = []
|
||||
|
@ -137,6 +145,18 @@ class RunIf:
|
|||
conditions.append(not _RPC_AVAILABLE)
|
||||
reasons.append("RPC")
|
||||
|
||||
if fairscale:
|
||||
conditions.append(not _FAIRSCALE_AVAILABLE)
|
||||
reasons.append("Fairscale")
|
||||
|
||||
if fairscale_pipe:
|
||||
conditions.append(not _FAIRSCALE_PIPE_AVAILABLE)
|
||||
reasons.append("Fairscale Pipe")
|
||||
|
||||
if deepspeed:
|
||||
conditions.append(not _DEEPSPEED_AVAILABLE)
|
||||
reasons.append("Deepspeed")
|
||||
|
||||
reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
|
||||
return pytest.mark.skipif(
|
||||
*args,
|
||||
|
|
|
@ -9,7 +9,6 @@ from torch.optim import Optimizer
|
|||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.plugins import DeepSpeedPlugin, DeepSpeedPrecisionPlugin
|
||||
from pytorch_lightning.plugins.training_type.deepspeed import LightningDeepSpeedModule
|
||||
from pytorch_lightning.utilities import _DEEPSPEED_AVAILABLE
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from tests.helpers.boring_model import BoringModel
|
||||
from tests.helpers.runif import RunIf
|
||||
|
@ -81,7 +80,7 @@ def deepspeed_zero_config(deepspeed_config):
|
|||
return {**deepspeed_config, 'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2}}
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(deepspeed=True)
|
||||
@pytest.mark.parametrize("input", ("deepspeed", DeepSpeedPlugin))
|
||||
def test_deepspeed_plugin_string(tmpdir, input):
|
||||
"""
|
||||
|
@ -98,7 +97,7 @@ def test_deepspeed_plugin_string(tmpdir, input):
|
|||
assert trainer.accelerator.training_type_plugin.parallel_devices == [torch.device('cpu')]
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(deepspeed=True)
|
||||
def test_deepspeed_plugin_env(tmpdir, monkeypatch, deepspeed_config):
|
||||
"""
|
||||
Test to ensure that the plugin can be passed via a string with an environment variable.
|
||||
|
@ -120,14 +119,13 @@ def test_deepspeed_plugin_env(tmpdir, monkeypatch, deepspeed_config):
|
|||
assert plugin.config == deepspeed_config
|
||||
|
||||
|
||||
@RunIf(amp_native=True, deepspeed=True)
|
||||
@pytest.mark.parametrize(
|
||||
"amp_backend", [
|
||||
pytest.param("native", marks=RunIf(amp_native=True)),
|
||||
pytest.param("apex", marks=RunIf(amp_apex=True)),
|
||||
]
|
||||
)
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(amp_native=True)
|
||||
def test_deepspeed_precision_choice(amp_backend, tmpdir):
|
||||
"""
|
||||
Test to ensure precision plugin is also correctly chosen.
|
||||
|
@ -143,7 +141,7 @@ def test_deepspeed_precision_choice(amp_backend, tmpdir):
|
|||
assert trainer.accelerator.precision_plugin.precision == 16
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(deepspeed=True)
|
||||
def test_deepspeed_with_invalid_config_path(tmpdir):
|
||||
"""
|
||||
Test to ensure if we pass an invalid config path we throw an exception.
|
||||
|
@ -155,7 +153,7 @@ def test_deepspeed_with_invalid_config_path(tmpdir):
|
|||
DeepSpeedPlugin(config='invalid_path.json')
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(deepspeed=True)
|
||||
def test_deepspeed_with_env_path(tmpdir, monkeypatch, deepspeed_config):
|
||||
"""
|
||||
Test to ensure if we pass an env variable, we load the config from the path.
|
||||
|
@ -168,7 +166,7 @@ def test_deepspeed_with_env_path(tmpdir, monkeypatch, deepspeed_config):
|
|||
assert plugin.config == deepspeed_config
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(deepspeed=True)
|
||||
def test_deepspeed_defaults(tmpdir):
|
||||
"""
|
||||
Ensure that defaults are correctly set as a config for DeepSpeed if no arguments are passed.
|
||||
|
@ -178,7 +176,7 @@ def test_deepspeed_defaults(tmpdir):
|
|||
assert isinstance(plugin.config["zero_optimization"], dict)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(deepspeed=True)
|
||||
def test_invalid_deepspeed_defaults_no_precision(tmpdir):
|
||||
"""
|
||||
Test to ensure that using defaults, if precision is not set to 16, we throw an exception.
|
||||
|
@ -195,8 +193,7 @@ def test_invalid_deepspeed_defaults_no_precision(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(min_gpus=1, deepspeed=True)
|
||||
def test_warn_deepspeed_override_backward(tmpdir):
|
||||
"""
|
||||
Test to ensure that if the backward hook in the LightningModule is overridden, we throw a warning.
|
||||
|
@ -213,8 +210,7 @@ def test_warn_deepspeed_override_backward(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(min_gpus=1, deepspeed=True)
|
||||
def test_deepspeed_run_configure_optimizers(tmpdir):
|
||||
"""
|
||||
Test end to end that deepspeed works with defaults (without ZeRO as that requires compilation),
|
||||
|
@ -246,8 +242,7 @@ def test_deepspeed_run_configure_optimizers(tmpdir):
|
|||
_assert_save_model_is_equal(model, tmpdir, trainer)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(min_gpus=1, deepspeed=True)
|
||||
def test_deepspeed_config(tmpdir, deepspeed_zero_config):
|
||||
"""
|
||||
Test to ensure deepspeed works correctly when passed a DeepSpeed config object including optimizers/schedulers
|
||||
|
@ -281,8 +276,7 @@ def test_deepspeed_config(tmpdir, deepspeed_zero_config):
|
|||
_assert_save_model_is_equal(model, tmpdir, trainer)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_gpus=1, deepspeed=True)
|
||||
def test_deepspeed_custom_precision_params(tmpdir):
|
||||
"""
|
||||
Ensure if we modify the FP16 parameters via the DeepSpeedPlugin, the deepspeed config contains these changes.
|
||||
|
@ -312,8 +306,7 @@ def test_deepspeed_custom_precision_params(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(min_gpus=1)
|
||||
@RunIf(min_gpus=1, deepspeed=True)
|
||||
def test_deepspeed_assert_config_zero_offload_disabled(tmpdir, deepspeed_zero_config):
|
||||
"""
|
||||
Ensure if we use a config and turn off cpu_offload, that this is set to False within the config.
|
||||
|
@ -333,8 +326,7 @@ def test_deepspeed_assert_config_zero_offload_disabled(tmpdir, deepspeed_zero_co
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
|
||||
@RunIf(min_gpus=2, special=True)
|
||||
@RunIf(min_gpus=2, special=True, deepspeed=True)
|
||||
def test_deepspeed_multigpu(tmpdir, deepspeed_config):
|
||||
"""
|
||||
Test to ensure that DeepSpeed with multiple GPUs works, without ZeRO Optimization as this requires compilation.
|
||||
|
|
|
@ -21,15 +21,13 @@ from torch import nn
|
|||
|
||||
from pytorch_lightning import LightningModule, Trainer
|
||||
from pytorch_lightning.plugins.training_type.rpc_sequential import RPCSequentialPlugin
|
||||
from pytorch_lightning.utilities import _FAIRSCALE_PIPE_AVAILABLE
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from tests.helpers.boring_model import RandomDataset
|
||||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
|
||||
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
|
||||
@RunIf(min_gpus=2, special=True)
|
||||
@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
|
||||
def test_rpc_sequential_plugin_manual(tmpdir, args=None):
|
||||
model = SequentialModelRPCManual()
|
||||
trainer = Trainer(
|
||||
|
@ -52,9 +50,8 @@ def test_rpc_sequential_plugin_manual(tmpdir, args=None):
|
|||
trainer.accelerator.training_type_plugin.exit_rpc_process()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
|
||||
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
|
||||
@RunIf(min_gpus=2, special=True)
|
||||
@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
|
||||
def test_rpc_sequential_plugin_manual_amp(tmpdir, args=None):
|
||||
model = SequentialModelRPCManual()
|
||||
trainer = Trainer(
|
||||
|
@ -75,9 +72,8 @@ def test_rpc_sequential_plugin_manual_amp(tmpdir, args=None):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
|
||||
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
|
||||
@RunIf(min_gpus=2, special=True)
|
||||
@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
|
||||
def test_rpc_sequential_plugin_automatic(tmpdir, args=None):
|
||||
model = SequentialModelRPCAutomatic()
|
||||
trainer = Trainer(
|
||||
|
@ -100,9 +96,8 @@ def test_rpc_sequential_plugin_automatic(tmpdir, args=None):
|
|||
trainer.accelerator.training_type_plugin.exit_rpc_process()
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
|
||||
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
|
||||
@RunIf(min_gpus=2, special=True)
|
||||
@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
|
||||
def test_rpc_sequential_plugin_with_wrong_balance(tmpdir, args=None):
|
||||
model = SequentialModelRPCAutomatic()
|
||||
trainer = Trainer(
|
||||
|
|
|
@ -6,14 +6,13 @@ import torch
|
|||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.callbacks import Callback
|
||||
from pytorch_lightning.plugins import DDPShardedPlugin, DDPSpawnShardedPlugin
|
||||
from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from tests.helpers.boring_model import BoringModel
|
||||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
||||
@RunIf(fairscale=True)
|
||||
@pytest.mark.parametrize(["accelerator"], [("ddp_sharded", ), ("ddp_sharded_spawn", )])
|
||||
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
|
||||
def test_sharded_ddp_choice(tmpdir, accelerator):
|
||||
"""
|
||||
Test to ensure that plugin is correctly chosen
|
||||
|
@ -39,8 +38,7 @@ def test_sharded_ddp_choice(tmpdir, accelerator):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(amp_apex=True)
|
||||
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
|
||||
@RunIf(amp_apex=True, fairscale=True)
|
||||
def test_invalid_apex_sharded(tmpdir):
|
||||
"""
|
||||
Test to ensure that we raise an error when we try to use apex and sharded
|
||||
|
@ -58,9 +56,8 @@ def test_invalid_apex_sharded(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, amp_native=True)
|
||||
@RunIf(min_gpus=2, amp_native=True, fairscale=True)
|
||||
@pytest.mark.parametrize(["accelerator"], [("ddp_sharded", ), ("ddp_sharded_spawn", )])
|
||||
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
|
||||
def test_ddp_choice_sharded_amp(tmpdir, accelerator):
|
||||
"""
|
||||
Test to ensure that plugin native amp plugin is correctly chosen when using sharded
|
||||
|
@ -88,8 +85,7 @@ def test_ddp_choice_sharded_amp(tmpdir, accelerator):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(skip_windows=True)
|
||||
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
|
||||
@RunIf(skip_windows=True, fairscale=True)
|
||||
def test_ddp_sharded_plugin_checkpoint_cpu(tmpdir):
|
||||
"""
|
||||
Test to ensure that checkpoint is saved correctly
|
||||
|
@ -112,8 +108,7 @@ def test_ddp_sharded_plugin_checkpoint_cpu(tmpdir):
|
|||
assert torch.equal(ddp_param.to("cpu"), shard_param)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True)
|
||||
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
|
||||
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
|
||||
def test_ddp_sharded_plugin_checkpoint_multi_gpu(tmpdir):
|
||||
"""
|
||||
Test to ensure that checkpoint is saved correctly when using multiple GPUs
|
||||
|
@ -136,8 +131,7 @@ def test_ddp_sharded_plugin_checkpoint_multi_gpu(tmpdir):
|
|||
assert torch.equal(ddp_param.to("cpu"), shard_param)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True)
|
||||
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
|
||||
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
|
||||
def test_ddp_sharded_plugin_finetune(tmpdir):
|
||||
"""
|
||||
Test to ensure that we can save and restart training (simulate fine-tuning)
|
||||
|
@ -158,8 +152,7 @@ def test_ddp_sharded_plugin_finetune(tmpdir):
|
|||
trainer.fit(saved_model)
|
||||
|
||||
|
||||
@RunIf(skip_windows=True)
|
||||
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
|
||||
@RunIf(skip_windows=True, fairscale=True)
|
||||
def test_ddp_sharded_plugin_resume_from_checkpoint(tmpdir):
|
||||
"""
|
||||
Test to ensure that resuming from checkpoint works
|
||||
|
@ -188,10 +181,9 @@ def test_ddp_sharded_plugin_resume_from_checkpoint(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Not a critical test, skip till drone CI performance improves.")
|
||||
@pytest.mark.skip(reason="Not a critical test, skip till drone CI performance improves.") # todo
|
||||
@pytest.mark.skip(reason="Currently unsupported restarting training on different number of devices.")
|
||||
@RunIf(min_gpus=2, skip_windows=True)
|
||||
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
|
||||
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
|
||||
def test_ddp_sharded_plugin_resume_from_checkpoint_downsize_gpus(tmpdir):
|
||||
"""
|
||||
Test to ensure that resuming from checkpoint works when downsizing number of GPUS
|
||||
|
@ -220,8 +212,7 @@ def test_ddp_sharded_plugin_resume_from_checkpoint_downsize_gpus(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1, skip_windows=True)
|
||||
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
|
||||
@RunIf(min_gpus=1, skip_windows=True, fairscale=True)
|
||||
def test_ddp_sharded_plugin_resume_from_checkpoint_gpu_to_cpu(tmpdir):
|
||||
"""
|
||||
Test to ensure that resuming from checkpoint works when going from GPUs- > CPU
|
||||
|
@ -250,8 +241,7 @@ def test_ddp_sharded_plugin_resume_from_checkpoint_gpu_to_cpu(tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(skip_windows=True, special=True)
|
||||
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
|
||||
@RunIf(skip_windows=True, special=True, fairscale=True)
|
||||
def test_ddp_sharded_plugin_test(tmpdir):
|
||||
"""
|
||||
Test to ensure we can use test without fit
|
||||
|
@ -266,8 +256,7 @@ def test_ddp_sharded_plugin_test(tmpdir):
|
|||
trainer.test(model)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, skip_windows=True)
|
||||
@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
|
||||
@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
|
||||
def test_ddp_sharded_plugin_test_multigpu(tmpdir):
|
||||
"""
|
||||
Test to ensure we can use test without fit
|
||||
|
|
Loading…
Reference in New Issue