From 56533368afe14407867dc999a65b799d0f4bd89b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Thu, 11 Aug 2022 18:17:56 +0200 Subject: [PATCH] Remove DeepSpeed version restriction from Lite (#13967) --- .azure/gpu-tests.yml | 2 +- requirements/pytorch/strategies.txt | 2 +- src/pytorch_lightning/CHANGELOG.md | 3 +++ src/pytorch_lightning/lite/lite.py | 15 --------------- tests/tests_pytorch/lite/test_lite.py | 13 +------------ 5 files changed, 6 insertions(+), 29 deletions(-) diff --git a/.azure/gpu-tests.yml b/.azure/gpu-tests.yml index 68ba6974a3..8ae670d265 100644 --- a/.azure/gpu-tests.yml +++ b/.azure/gpu-tests.yml @@ -75,7 +75,7 @@ jobs: CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))") pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0" pip install -e .[strategies] - pip install deepspeed>0.6.4 # TODO: remove when docker images are upgraded + pip install -U deepspeed # TODO: remove when docker images are upgraded pip install --requirement requirements/pytorch/devel.txt pip list env: diff --git a/requirements/pytorch/strategies.txt b/requirements/pytorch/strategies.txt index 4e916fbc6c..c5fc92a67a 100644 --- a/requirements/pytorch/strategies.txt +++ b/requirements/pytorch/strategies.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment fairscale>=0.4.5, <=0.4.6 -deepspeed>=0.6.0, <0.7.0 +deepspeed>=0.6.0, <=0.7.0 # no need to install with [pytorch] as pytorch is already installed horovod>=0.21.2, !=0.24.0, <0.25.1 hivemind>=1.0.1, <=1.0.1; sys_platform == 'linux' diff --git a/src/pytorch_lightning/CHANGELOG.md b/src/pytorch_lightning/CHANGELOG.md index 04db3d1908..6d67d2d586 100644 --- a/src/pytorch_lightning/CHANGELOG.md +++ b/src/pytorch_lightning/CHANGELOG.md @@ -22,6 +22,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Raised a `MisconfigurationException` if batch transfer hooks are overriden with `IPUAccelerator` ([13961](https://github.com/Lightning-AI/lightning/pull/13961)) +- Updated compatibility for LightningLite to run with the latest DeepSpeed 0.7.0 ([13967](https://github.com/Lightning-AI/lightning/pull/13967)) + + ### Deprecated - Deprecated `LightningDeepSpeedModule` ([#14000](https://github.com/Lightning-AI/lightning/pull/14000)) diff --git a/src/pytorch_lightning/lite/lite.py b/src/pytorch_lightning/lite/lite.py index 5125bf4486..981eed3063 100644 --- a/src/pytorch_lightning/lite/lite.py +++ b/src/pytorch_lightning/lite/lite.py @@ -40,7 +40,6 @@ from pytorch_lightning.utilities.data import ( has_iterable_dataset, ) from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.imports import _RequirementAvailable from pytorch_lightning.utilities.seed import seed_everything @@ -106,8 +105,6 @@ class LightningLite(ABC): self._precision_plugin = self._strategy.precision_plugin self._models_setup: int = 0 - self._check_deepspeed_support() - # wrap the run method so we can inject setup logic or spawn processes for the user setattr(self, "run", partial(self._run_impl, self.run)) @@ -459,18 +456,6 @@ class LightningLite(ABC): f" Choose one of {supported} or pass in a `Strategy` instance." ) - def _check_deepspeed_support(self) -> None: - if ( - isinstance(self._strategy, DeepSpeedStrategy) - and self._strategy.zero_stage_3 - and _RequirementAvailable("deepspeed>=0.6.5") - ): - # https://github.com/microsoft/DeepSpeed/issues/2139 - raise RuntimeError( - "DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite and `deepspeed>=0.6.5`." - " Please downgrade deepspeed to 0.6.4 or check if a newer version of Lightning is available." - ) - @staticmethod def _supported_device_types() -> Sequence[_AcceleratorType]: return ( diff --git a/tests/tests_pytorch/lite/test_lite.py b/tests/tests_pytorch/lite/test_lite.py index 2215ab3129..86a0a5a821 100644 --- a/tests/tests_pytorch/lite/test_lite.py +++ b/tests/tests_pytorch/lite/test_lite.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import contextlib import os from copy import deepcopy from unittest import mock @@ -30,7 +29,6 @@ from pytorch_lightning.plugins import PrecisionPlugin from pytorch_lightning.strategies import DeepSpeedStrategy, Strategy from pytorch_lightning.utilities import _StrategyType from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.imports import _RequirementAvailable from pytorch_lightning.utilities.seed import pl_worker_init_function from tests_pytorch.helpers.runif import RunIf @@ -480,13 +478,4 @@ def test_deepspeed_multiple_models(): assert self.broadcast(True) assert self.is_global_zero == (self.local_rank == 0) - if _RequirementAvailable("deepspeed>=0.6.5"): - # https://github.com/microsoft/DeepSpeed/issues/2139 - raise_if_deepspeed_incompatible = pytest.raises( - RuntimeError, match="DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite" - ) - else: - raise_if_deepspeed_incompatible = contextlib.suppress() - - with raise_if_deepspeed_incompatible: - Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run() + Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()