Remove DeepSpeed version restriction from Lite (#13967)
This commit is contained in:
parent
3b18da3eaf
commit
56533368af
|
@ -75,7 +75,7 @@ jobs:
|
|||
CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))")
|
||||
pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0"
|
||||
pip install -e .[strategies]
|
||||
pip install deepspeed>0.6.4 # TODO: remove when docker images are upgraded
|
||||
pip install -U deepspeed # TODO: remove when docker images are upgraded
|
||||
pip install --requirement requirements/pytorch/devel.txt
|
||||
pip list
|
||||
env:
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
|
||||
|
||||
fairscale>=0.4.5, <=0.4.6
|
||||
deepspeed>=0.6.0, <0.7.0
|
||||
deepspeed>=0.6.0, <=0.7.0
|
||||
# no need to install with [pytorch] as pytorch is already installed
|
||||
horovod>=0.21.2, !=0.24.0, <0.25.1
|
||||
hivemind>=1.0.1, <=1.0.1; sys_platform == 'linux'
|
||||
|
|
|
@ -22,6 +22,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
|
|||
- Raised a `MisconfigurationException` if batch transfer hooks are overriden with `IPUAccelerator` ([13961](https://github.com/Lightning-AI/lightning/pull/13961))
|
||||
|
||||
|
||||
- Updated compatibility for LightningLite to run with the latest DeepSpeed 0.7.0 ([13967](https://github.com/Lightning-AI/lightning/pull/13967))
|
||||
|
||||
|
||||
### Deprecated
|
||||
|
||||
- Deprecated `LightningDeepSpeedModule` ([#14000](https://github.com/Lightning-AI/lightning/pull/14000))
|
||||
|
|
|
@ -40,7 +40,6 @@ from pytorch_lightning.utilities.data import (
|
|||
has_iterable_dataset,
|
||||
)
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from pytorch_lightning.utilities.imports import _RequirementAvailable
|
||||
from pytorch_lightning.utilities.seed import seed_everything
|
||||
|
||||
|
||||
|
@ -106,8 +105,6 @@ class LightningLite(ABC):
|
|||
self._precision_plugin = self._strategy.precision_plugin
|
||||
self._models_setup: int = 0
|
||||
|
||||
self._check_deepspeed_support()
|
||||
|
||||
# wrap the run method so we can inject setup logic or spawn processes for the user
|
||||
setattr(self, "run", partial(self._run_impl, self.run))
|
||||
|
||||
|
@ -459,18 +456,6 @@ class LightningLite(ABC):
|
|||
f" Choose one of {supported} or pass in a `Strategy` instance."
|
||||
)
|
||||
|
||||
def _check_deepspeed_support(self) -> None:
|
||||
if (
|
||||
isinstance(self._strategy, DeepSpeedStrategy)
|
||||
and self._strategy.zero_stage_3
|
||||
and _RequirementAvailable("deepspeed>=0.6.5")
|
||||
):
|
||||
# https://github.com/microsoft/DeepSpeed/issues/2139
|
||||
raise RuntimeError(
|
||||
"DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite and `deepspeed>=0.6.5`."
|
||||
" Please downgrade deepspeed to 0.6.4 or check if a newer version of Lightning is available."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _supported_device_types() -> Sequence[_AcceleratorType]:
|
||||
return (
|
||||
|
|
|
@ -11,7 +11,6 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import contextlib
|
||||
import os
|
||||
from copy import deepcopy
|
||||
from unittest import mock
|
||||
|
@ -30,7 +29,6 @@ from pytorch_lightning.plugins import PrecisionPlugin
|
|||
from pytorch_lightning.strategies import DeepSpeedStrategy, Strategy
|
||||
from pytorch_lightning.utilities import _StrategyType
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from pytorch_lightning.utilities.imports import _RequirementAvailable
|
||||
from pytorch_lightning.utilities.seed import pl_worker_init_function
|
||||
from tests_pytorch.helpers.runif import RunIf
|
||||
|
||||
|
@ -480,13 +478,4 @@ def test_deepspeed_multiple_models():
|
|||
assert self.broadcast(True)
|
||||
assert self.is_global_zero == (self.local_rank == 0)
|
||||
|
||||
if _RequirementAvailable("deepspeed>=0.6.5"):
|
||||
# https://github.com/microsoft/DeepSpeed/issues/2139
|
||||
raise_if_deepspeed_incompatible = pytest.raises(
|
||||
RuntimeError, match="DeepSpeed ZeRO-3 is not supported with this version of Lightning Lite"
|
||||
)
|
||||
else:
|
||||
raise_if_deepspeed_incompatible = contextlib.suppress()
|
||||
|
||||
with raise_if_deepspeed_incompatible:
|
||||
Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()
|
||||
Lite(strategy=DeepSpeedStrategy(stage=3, logging_batch_size_per_gpu=1), devices=2, accelerator="gpu").run()
|
||||
|
|
Loading…
Reference in New Issue