Add fairscale & deepspeed to skipif 4/n (#6281)

* add fairscale & windows to skipif * add deepspeed to runif * fairscale * deepspeed * flake8 Co-authored-by: Jirka Borovec <jirka.borovec@seznam.cz>
2021-03-03 01:15:13 +05:30 · 2021-03-03 01:15:13 +05:30 · 4157b35062
parent d1a03153f3
commit 4157b35062
5 changed files with 50 additions and 59 deletions
--- a/tests/accelerators/test_accelerator_connector.py
+++ b/tests/accelerators/test_accelerator_connector.py
@ -34,7 +34,6 @@ from pytorch_lightning.plugins import (
    SingleDevicePlugin,
 )
 from pytorch_lightning.plugins.environments import ClusterEnvironment, SLURMEnvironment, TorchElasticEnvironment
-from pytorch_lightning.utilities import _DEEPSPEED_AVAILABLE
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.runif import RunIf
@ -425,11 +424,7 @@ def test_plugin_accelerator_choice(accelerator, plugin):
    ('ddp_spawn', DDPSpawnPlugin),
    ('ddp_sharded', DDPShardedPlugin),
    ('ddp_sharded_spawn', DDPSpawnShardedPlugin),
-    pytest.param(
-        'deepspeed',
-        DeepSpeedPlugin,
-        marks=pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
-    ),
+    pytest.param('deepspeed', DeepSpeedPlugin, marks=RunIf(deepspeed=True)),
 ])
@mock.patch('torch.cuda.is_available', return_value=True)
@mock.patch('torch.cuda.device_count', return_value=2)
--- a/tests/helpers/runif.py
+++ b/tests/helpers/runif.py
@ -22,6 +22,9 @@ from pkg_resources import get_distribution

 from pytorch_lightning.utilities import (
    _APEX_AVAILABLE,
+    _DEEPSPEED_AVAILABLE,
+    _FAIRSCALE_AVAILABLE,
+    _FAIRSCALE_PIPE_AVAILABLE,
    _HOROVOD_AVAILABLE,
    _NATIVE_AMP_AVAILABLE,
    _RPC_AVAILABLE,
@ -63,6 +66,9 @@ class RunIf:
        skip_windows: bool = False,
        special: bool = False,
        rpc: bool = False,
+        fairscale: bool = False,
+        fairscale_pipe: bool = False,
+        deepspeed: bool = False,
        **kwargs
    ):
        """
@ -80,6 +86,8 @@ class RunIf:
            skip_windows: skip test for Windows platform (typically fo some limited torch functionality)
            special: running in special mode, outside pytest suit
            rpc: requires Remote Procedure Call (RPC)
+            fairscale: if `fairscale` module is required to run the test
+            deepspeed: if `deepspeed` module is required to run the test
            kwargs: native pytest.mark.skipif keyword arguments
        """
        conditions = []
@ -137,6 +145,18 @@ class RunIf:
            conditions.append(not _RPC_AVAILABLE)
            reasons.append("RPC")

+        if fairscale:
+            conditions.append(not _FAIRSCALE_AVAILABLE)
+            reasons.append("Fairscale")
+
+        if fairscale_pipe:
+            conditions.append(not _FAIRSCALE_PIPE_AVAILABLE)
+            reasons.append("Fairscale Pipe")
+
+        if deepspeed:
+            conditions.append(not _DEEPSPEED_AVAILABLE)
+            reasons.append("Deepspeed")
+
        reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
        return pytest.mark.skipif(
            *args,
--- a/tests/plugins/test_deepspeed_plugin.py
+++ b/tests/plugins/test_deepspeed_plugin.py
@ -9,7 +9,6 @@ from torch.optim import Optimizer
 from pytorch_lightning import Trainer
 from pytorch_lightning.plugins import DeepSpeedPlugin, DeepSpeedPrecisionPlugin
 from pytorch_lightning.plugins.training_type.deepspeed import LightningDeepSpeedModule
-from pytorch_lightning.utilities import _DEEPSPEED_AVAILABLE
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.runif import RunIf
@ -81,7 +80,7 @@ def deepspeed_zero_config(deepspeed_config):
    return {**deepspeed_config, 'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2}}


-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
+@RunIf(deepspeed=True)
@pytest.mark.parametrize("input", ("deepspeed", DeepSpeedPlugin))
 def test_deepspeed_plugin_string(tmpdir, input):
    """
@ -98,7 +97,7 @@ def test_deepspeed_plugin_string(tmpdir, input):
    assert trainer.accelerator.training_type_plugin.parallel_devices == [torch.device('cpu')]


-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
+@RunIf(deepspeed=True)
 def test_deepspeed_plugin_env(tmpdir, monkeypatch, deepspeed_config):
    """
        Test to ensure that the plugin can be passed via a string with an environment variable.
@ -120,14 +119,13 @@ def test_deepspeed_plugin_env(tmpdir, monkeypatch, deepspeed_config):
    assert plugin.config == deepspeed_config


+@RunIf(amp_native=True, deepspeed=True)
@pytest.mark.parametrize(
    "amp_backend", [
        pytest.param("native", marks=RunIf(amp_native=True)),
        pytest.param("apex", marks=RunIf(amp_apex=True)),
    ]
 )
-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
-@RunIf(amp_native=True)
 def test_deepspeed_precision_choice(amp_backend, tmpdir):
    """
        Test to ensure precision plugin is also correctly chosen.
@ -143,7 +141,7 @@ def test_deepspeed_precision_choice(amp_backend, tmpdir):
    assert trainer.accelerator.precision_plugin.precision == 16


-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
+@RunIf(deepspeed=True)
 def test_deepspeed_with_invalid_config_path(tmpdir):
    """
        Test to ensure if we pass an invalid config path we throw an exception.
@ -155,7 +153,7 @@ def test_deepspeed_with_invalid_config_path(tmpdir):
        DeepSpeedPlugin(config='invalid_path.json')


-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
+@RunIf(deepspeed=True)
 def test_deepspeed_with_env_path(tmpdir, monkeypatch, deepspeed_config):
    """
        Test to ensure if we pass an env variable, we load the config from the path.
@ -168,7 +166,7 @@ def test_deepspeed_with_env_path(tmpdir, monkeypatch, deepspeed_config):
    assert plugin.config == deepspeed_config


-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
+@RunIf(deepspeed=True)
 def test_deepspeed_defaults(tmpdir):
    """
    Ensure that defaults are correctly set as a config for DeepSpeed if no arguments are passed.
@ -178,7 +176,7 @@ def test_deepspeed_defaults(tmpdir):
    assert isinstance(plugin.config["zero_optimization"], dict)


-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
+@RunIf(deepspeed=True)
 def test_invalid_deepspeed_defaults_no_precision(tmpdir):
    """
        Test to ensure that using defaults, if precision is not set to 16, we throw an exception.
@ -195,8 +193,7 @@ def test_invalid_deepspeed_defaults_no_precision(tmpdir):
        trainer.fit(model)


-@RunIf(min_gpus=1)
-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
+@RunIf(min_gpus=1, deepspeed=True)
 def test_warn_deepspeed_override_backward(tmpdir):
    """
        Test to ensure that if the backward hook in the LightningModule is overridden, we throw a warning.
@ -213,8 +210,7 @@ def test_warn_deepspeed_override_backward(tmpdir):
        trainer.fit(model)


-@RunIf(min_gpus=1)
-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
+@RunIf(min_gpus=1, deepspeed=True)
 def test_deepspeed_run_configure_optimizers(tmpdir):
    """
        Test end to end that deepspeed works with defaults (without ZeRO as that requires compilation),
@ -246,8 +242,7 @@ def test_deepspeed_run_configure_optimizers(tmpdir):
    _assert_save_model_is_equal(model, tmpdir, trainer)


-@RunIf(min_gpus=1)
-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
+@RunIf(min_gpus=1, deepspeed=True)
 def test_deepspeed_config(tmpdir, deepspeed_zero_config):
    """
        Test to ensure deepspeed works correctly when passed a DeepSpeed config object including optimizers/schedulers
@ -281,8 +276,7 @@ def test_deepspeed_config(tmpdir, deepspeed_zero_config):
    _assert_save_model_is_equal(model, tmpdir, trainer)


-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
-@RunIf(min_gpus=1)
+@RunIf(min_gpus=1, deepspeed=True)
 def test_deepspeed_custom_precision_params(tmpdir):
    """
        Ensure if we modify the FP16 parameters via the DeepSpeedPlugin, the deepspeed config contains these changes.
@ -312,8 +306,7 @@ def test_deepspeed_custom_precision_params(tmpdir):
        trainer.fit(model)


-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
-@RunIf(min_gpus=1)
+@RunIf(min_gpus=1, deepspeed=True)
 def test_deepspeed_assert_config_zero_offload_disabled(tmpdir, deepspeed_zero_config):
    """
        Ensure if we use a config and turn off cpu_offload, that this is set to False within the config.
@ -333,8 +326,7 @@ def test_deepspeed_assert_config_zero_offload_disabled(tmpdir, deepspeed_zero_co
        trainer.fit(model)


-@pytest.mark.skipif(not _DEEPSPEED_AVAILABLE, reason="DeepSpeed not available.")
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2, special=True, deepspeed=True)
 def test_deepspeed_multigpu(tmpdir, deepspeed_config):
    """
        Test to ensure that DeepSpeed with multiple GPUs works, without ZeRO Optimization as this requires compilation.
--- a/tests/plugins/test_rpc_sequential_plugin.py
+++ b/tests/plugins/test_rpc_sequential_plugin.py
@ -21,15 +21,13 @@ from torch import nn

 from pytorch_lightning import LightningModule, Trainer
 from pytorch_lightning.plugins.training_type.rpc_sequential import RPCSequentialPlugin
-from pytorch_lightning.utilities import _FAIRSCALE_PIPE_AVAILABLE
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import RandomDataset
 from tests.helpers.runif import RunIf


-@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
 def test_rpc_sequential_plugin_manual(tmpdir, args=None):
    model = SequentialModelRPCManual()
    trainer = Trainer(
@ -52,9 +50,8 @@ def test_rpc_sequential_plugin_manual(tmpdir, args=None):
        trainer.accelerator.training_type_plugin.exit_rpc_process()


-@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
 def test_rpc_sequential_plugin_manual_amp(tmpdir, args=None):
    model = SequentialModelRPCManual()
    trainer = Trainer(
@ -75,9 +72,8 @@ def test_rpc_sequential_plugin_manual_amp(tmpdir, args=None):
        trainer.fit(model)


-@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
 def test_rpc_sequential_plugin_automatic(tmpdir, args=None):
    model = SequentialModelRPCAutomatic()
    trainer = Trainer(
@ -100,9 +96,8 @@ def test_rpc_sequential_plugin_automatic(tmpdir, args=None):
        trainer.accelerator.training_type_plugin.exit_rpc_process()


-@pytest.mark.skipif(not _FAIRSCALE_PIPE_AVAILABLE, reason="test requires FairScale to be installed")
@mock.patch.dict(os.environ, {"PL_DEV_DEBUG": "1"})
-@RunIf(min_gpus=2, special=True)
+@RunIf(min_gpus=2, special=True, fairscale_pipe=True)
 def test_rpc_sequential_plugin_with_wrong_balance(tmpdir, args=None):
    model = SequentialModelRPCAutomatic()
    trainer = Trainer(
--- a/tests/plugins/test_sharded_plugin.py
+++ b/tests/plugins/test_sharded_plugin.py
@ -6,14 +6,13 @@ import torch
 from pytorch_lightning import Trainer
 from pytorch_lightning.callbacks import Callback
 from pytorch_lightning.plugins import DDPShardedPlugin, DDPSpawnShardedPlugin
-from pytorch_lightning.utilities import _FAIRSCALE_AVAILABLE
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from tests.helpers.boring_model import BoringModel
 from tests.helpers.runif import RunIf


+@RunIf(fairscale=True)
@pytest.mark.parametrize(["accelerator"], [("ddp_sharded", ), ("ddp_sharded_spawn", )])
-@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_sharded_ddp_choice(tmpdir, accelerator):
    """
        Test to ensure that plugin is correctly chosen
@ -39,8 +38,7 @@ def test_sharded_ddp_choice(tmpdir, accelerator):
        trainer.fit(model)


-@RunIf(amp_apex=True)
-@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
+@RunIf(amp_apex=True, fairscale=True)
 def test_invalid_apex_sharded(tmpdir):
    """
        Test to ensure that we raise an error when we try to use apex and sharded
@ -58,9 +56,8 @@ def test_invalid_apex_sharded(tmpdir):
        trainer.fit(model)


-@RunIf(min_gpus=2, amp_native=True)
+@RunIf(min_gpus=2, amp_native=True, fairscale=True)
@pytest.mark.parametrize(["accelerator"], [("ddp_sharded", ), ("ddp_sharded_spawn", )])
-@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
 def test_ddp_choice_sharded_amp(tmpdir, accelerator):
    """
        Test to ensure that plugin native amp plugin is correctly chosen when using sharded
@ -88,8 +85,7 @@ def test_ddp_choice_sharded_amp(tmpdir, accelerator):
        trainer.fit(model)


-@RunIf(skip_windows=True)
-@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
+@RunIf(skip_windows=True, fairscale=True)
 def test_ddp_sharded_plugin_checkpoint_cpu(tmpdir):
    """
        Test to ensure that checkpoint is saved correctly
@ -112,8 +108,7 @@ def test_ddp_sharded_plugin_checkpoint_cpu(tmpdir):
        assert torch.equal(ddp_param.to("cpu"), shard_param)


-@RunIf(min_gpus=2, skip_windows=True)
-@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
+@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
 def test_ddp_sharded_plugin_checkpoint_multi_gpu(tmpdir):
    """
        Test to ensure that checkpoint is saved correctly when using multiple GPUs
@ -136,8 +131,7 @@ def test_ddp_sharded_plugin_checkpoint_multi_gpu(tmpdir):
        assert torch.equal(ddp_param.to("cpu"), shard_param)


-@RunIf(min_gpus=2, skip_windows=True)
-@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
+@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
 def test_ddp_sharded_plugin_finetune(tmpdir):
    """
        Test to ensure that we can save and restart training (simulate fine-tuning)
@ -158,8 +152,7 @@ def test_ddp_sharded_plugin_finetune(tmpdir):
    trainer.fit(saved_model)


-@RunIf(skip_windows=True)
-@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
+@RunIf(skip_windows=True, fairscale=True)
 def test_ddp_sharded_plugin_resume_from_checkpoint(tmpdir):
    """
        Test to ensure that resuming from checkpoint works
@ -188,10 +181,9 @@ def test_ddp_sharded_plugin_resume_from_checkpoint(tmpdir):
    trainer.fit(model)


-@pytest.mark.skip(reason="Not a critical test, skip till drone CI performance improves.")
+@pytest.mark.skip(reason="Not a critical test, skip till drone CI performance improves.")  # todo
@pytest.mark.skip(reason="Currently unsupported restarting training on different number of devices.")
-@RunIf(min_gpus=2, skip_windows=True)
-@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
+@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
 def test_ddp_sharded_plugin_resume_from_checkpoint_downsize_gpus(tmpdir):
    """
        Test to ensure that resuming from checkpoint works when downsizing number of GPUS
@ -220,8 +212,7 @@ def test_ddp_sharded_plugin_resume_from_checkpoint_downsize_gpus(tmpdir):
    trainer.fit(model)


-@RunIf(min_gpus=1, skip_windows=True)
-@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
+@RunIf(min_gpus=1, skip_windows=True, fairscale=True)
 def test_ddp_sharded_plugin_resume_from_checkpoint_gpu_to_cpu(tmpdir):
    """
        Test to ensure that resuming from checkpoint works when going from GPUs- > CPU
@ -250,8 +241,7 @@ def test_ddp_sharded_plugin_resume_from_checkpoint_gpu_to_cpu(tmpdir):
    trainer.fit(model)


-@RunIf(skip_windows=True, special=True)
-@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
+@RunIf(skip_windows=True, special=True, fairscale=True)
 def test_ddp_sharded_plugin_test(tmpdir):
    """
        Test to ensure we can use test without fit
@ -266,8 +256,7 @@ def test_ddp_sharded_plugin_test(tmpdir):
    trainer.test(model)


-@RunIf(min_gpus=2, skip_windows=True)
-@pytest.mark.skipif(not _FAIRSCALE_AVAILABLE, reason="Fairscale is not available")
+@RunIf(min_gpus=2, skip_windows=True, fairscale=True)
 def test_ddp_sharded_plugin_test_multigpu(tmpdir):
    """
        Test to ensure we can use test without fit