Remove DeepSpeed version restriction from Lite (#13967)

This commit is contained in:
Adrian Wälchli 2022-08-11 18:17:56 +02:00 committed by GitHub
parent 3b18da3eaf
commit 56533368af
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 6 additions and 29 deletions

View File

@ -75,7 +75,7 @@ jobs:
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
pip install -e .[strategies]
pip install deepspeed>0.6.4 # TODO: remove when docker images are upgraded
pip install -U deepspeed # TODO: remove when docker images are upgraded
pip install --requirement requirements/pytorch/devel.txt
pip list
env:

View File

@ -2,7 +2,7 @@
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
fairscale>=0.4.5, <=0.4.6
deepspeed>=0.6.0, <0.7.0
deepspeed>=0.6.0, <=0.7.0
# no need to install with [pytorch] as pytorch is already installed
horovod>=0.21.2, !=0.24.0, <0.25.1
hivemind>=1.0.1, <=1.0.1; sys_platform == 'linux'

View File

@ -22,6 +22,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Raised a `MisconfigurationException` if batch transfer hooks are overriden with `IPUAccelerator` ([13961](https://github.com/Lightning-AI/lightning/pull/13961))
- Updated compatibility for LightningLite to run with the latest DeepSpeed 0.7.0 ([13967](https://github.com/Lightning-AI/lightning/pull/13967))
### Deprecated
- Deprecated `LightningDeepSpeedModule` ([#14000](https://github.com/Lightning-AI/lightning/pull/14000))

View File

@ -40,7 +40,6 @@ from pytorch_lightning.utilities.data import (
has_iterable_dataset,
)
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.imports import _RequirementAvailable
from pytorch_lightning.utilities.seed import seed_everything
@ -106,8 +105,6 @@ class LightningLite(ABC):
self._precision_plugin = self._strategy.precision_plugin
self._models_setup: int = 0
self._check_deepspeed_support()
# wrap the run method so we can inject setup logic or spawn processes for the user
setattr(self, "run", partial(self._run_impl, self.run))
@ -459,18 +456,6 @@ class LightningLite(ABC):
f" Choose one of {supported} or pass in a `Strategy` instance."
)
def _check_deepspeed_support(self) -> None:
if (
isinstance(self._strategy, DeepSpeedStrategy)
and self._strategy.zero_stage_3
and _RequirementAvailable("deepspeed>=0.6.5")
):
# https://github.com/microsoft/DeepSpeed/issues/2139
raise RuntimeError(
"DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite and `deepspeed>=0.6.5`."
" Please downgrade deepspeed to 0.6.4 or check if a newer version of Lightning is available."
)
@staticmethod
def _supported_device_types() -> Sequence[_AcceleratorType]:
return (

View File

@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import contextlib
import os
from copy import deepcopy
from unittest import mock
@ -30,7 +29,6 @@ from pytorch_lightning.plugins import PrecisionPlugin
from pytorch_lightning.strategies import DeepSpeedStrategy, Strategy
from pytorch_lightning.utilities import _StrategyType
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.imports import _RequirementAvailable
from pytorch_lightning.utilities.seed import pl_worker_init_function
from tests_pytorch.helpers.runif import RunIf
@ -480,13 +478,4 @@ def test_deepspeed_multiple_models():
assert self.broadcast(True)
assert self.is_global_zero == (self.local_rank == 0)
if _RequirementAvailable("deepspeed>=0.6.5"):
# https://github.com/microsoft/DeepSpeed/issues/2139
raise_if_deepspeed_incompatible = pytest.raises(
RuntimeError, match="DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite"
)
else:
raise_if_deepspeed_incompatible = contextlib.suppress()
with raise_if_deepspeed_incompatible:
Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()
Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()