Remove DeepSpeed version restriction from Lite (#13967)

2022-08-11 18:17:56 +02:00 · 2022-08-11 18:17:56 +02:00 · 56533368af
parent 3b18da3eaf
commit 56533368af
5 changed files with 6 additions and 29 deletions
--- a/.azure/gpu-tests.yml
+++ b/.azure/gpu-tests.yml
@ -75,7 +75,7 @@ jobs:
        CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
        pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
        pip install -e .[strategies]
-        pip install deepspeed>0.6.4  # TODO: remove when docker images are upgraded
+        pip install -U deepspeed  # TODO: remove when docker images are upgraded
        pip install --requirement requirements/pytorch/devel.txt
        pip list
      env:
--- a/requirements/pytorch/strategies.txt
+++ b/requirements/pytorch/strategies.txt
@ -2,7 +2,7 @@
 #  in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

 fairscale>=0.4.5, <=0.4.6
-deepspeed>=0.6.0, <0.7.0
+deepspeed>=0.6.0, <=0.7.0
 # no need to install with [pytorch] as pytorch is already installed
 horovod>=0.21.2, !=0.24.0, <0.25.1
 hivemind>=1.0.1, <=1.0.1; sys_platform == 'linux'
--- a/src/pytorch_lightning/CHANGELOG.md
+++ b/src/pytorch_lightning/CHANGELOG.md
@ -22,6 +22,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Raised a `MisconfigurationException` if batch transfer hooks are overriden with `IPUAccelerator` ([13961](https://github.com/Lightning-AI/lightning/pull/13961))


+- Updated compatibility for LightningLite to run with the latest DeepSpeed 0.7.0 ([13967](https://github.com/Lightning-AI/lightning/pull/13967))
+
+
 ### Deprecated

 - Deprecated `LightningDeepSpeedModule` ([#14000](https://github.com/Lightning-AI/lightning/pull/14000))
--- a/src/pytorch_lightning/lite/lite.py
+++ b/src/pytorch_lightning/lite/lite.py
@ -40,7 +40,6 @@ from pytorch_lightning.utilities.data import (
    has_iterable_dataset,
 )
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.imports import _RequirementAvailable
 from pytorch_lightning.utilities.seed import seed_everything


@ -106,8 +105,6 @@ class LightningLite(ABC):
        self._precision_plugin = self._strategy.precision_plugin
        self._models_setup: int = 0

-        self._check_deepspeed_support()
-
        # wrap the run method so we can inject setup logic or spawn processes for the user
        setattr(self, "run", partial(self._run_impl, self.run))

@ -459,18 +456,6 @@ class LightningLite(ABC):
                f" Choose one of {supported} or pass in a `Strategy` instance."
            )

-    def _check_deepspeed_support(self) -> None:
-        if (
-            isinstance(self._strategy, DeepSpeedStrategy)
-            and self._strategy.zero_stage_3
-            and _RequirementAvailable("deepspeed>=0.6.5")
-        ):
-            # https://github.com/microsoft/DeepSpeed/issues/2139
-            raise RuntimeError(
-                "DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite and `deepspeed>=0.6.5`."
-                " Please downgrade deepspeed to 0.6.4 or check if a newer version of Lightning is available."
-            )
-
    @staticmethod
    def _supported_device_types() -> Sequence[_AcceleratorType]:
        return (
--- a/tests/tests_pytorch/lite/test_lite.py
+++ b/tests/tests_pytorch/lite/test_lite.py
@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import contextlib
 import os
 from copy import deepcopy
 from unittest import mock
@ -30,7 +29,6 @@ from pytorch_lightning.plugins import PrecisionPlugin
 from pytorch_lightning.strategies import DeepSpeedStrategy, Strategy
 from pytorch_lightning.utilities import _StrategyType
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.imports import _RequirementAvailable
 from pytorch_lightning.utilities.seed import pl_worker_init_function
 from tests_pytorch.helpers.runif import RunIf

@ -480,13 +478,4 @@ def test_deepspeed_multiple_models():
            assert self.broadcast(True)
            assert self.is_global_zero == (self.local_rank == 0)

-    if _RequirementAvailable("deepspeed>=0.6.5"):
-        # https://github.com/microsoft/DeepSpeed/issues/2139
-        raise_if_deepspeed_incompatible = pytest.raises(
-            RuntimeError, match="DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite"
-        )
-    else:
-        raise_if_deepspeed_incompatible = contextlib.suppress()
-
-    with raise_if_deepspeed_incompatible:
-        Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()
+    Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()