From 2e512d4b2e3599317d0c72f27f3efdab5dd91281 Mon Sep 17 00:00:00 2001 From: awaelchli Date: Mon, 26 Feb 2024 10:59:15 +0100 Subject: [PATCH] Remove the Colossal AI integration (#19528) --- .azure/gpu-tests-pytorch.yml | 9 +------- requirements/_integrations/strategies.txt | 4 ---- .../gradient_accumulation_scheduler.py | 9 +------- .../connectors/accelerator_connector.py | 21 +------------------ src/lightning/pytorch/utilities/imports.py | 1 - .../test_gradient_accumulation_scheduler.py | 17 +-------------- .../connectors/test_accelerator_connector.py | 16 -------------- 7 files changed, 4 insertions(+), 73 deletions(-) delete mode 100644 requirements/_integrations/strategies.txt diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 697d717e63..b9ab6ead7f 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -105,16 +105,9 @@ jobs: done displayName: "Adjust dependencies" - - bash: | - pip install -q -r .actions/requirements.txt - python .actions/assistant.py requirements_prune_pkgs \ - --packages="[lightning-colossalai]" \ - --req_files="[requirements/_integrations/strategies.txt]" - displayName: "Prune packages" # these have installation issues - - bash: | extra=$(python -c "print({'lightning': 'pytorch-'}.get('$(PACKAGE_NAME)', ''))") - pip install -e ".[${extra}dev]" -r requirements/_integrations/strategies.txt pytest-timeout -U --find-links="${TORCH_URL}" + pip install -e ".[${extra}dev]" pytest-timeout -U --find-links="${TORCH_URL}" displayName: "Install package & dependencies" - bash: pip uninstall -y lightning diff --git a/requirements/_integrations/strategies.txt b/requirements/_integrations/strategies.txt deleted file mode 100644 index d7dd0524bd..0000000000 --- a/requirements/_integrations/strategies.txt +++ /dev/null @@ -1,4 +0,0 @@ -# NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package -# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment - -lightning-colossalai >=0.1.0 diff --git a/src/lightning/pytorch/callbacks/gradient_accumulation_scheduler.py b/src/lightning/pytorch/callbacks/gradient_accumulation_scheduler.py index 2f33867add..20b1df29d1 100644 --- a/src/lightning/pytorch/callbacks/gradient_accumulation_scheduler.py +++ b/src/lightning/pytorch/callbacks/gradient_accumulation_scheduler.py @@ -27,7 +27,6 @@ from typing_extensions import override import lightning.pytorch as pl from lightning.pytorch.callbacks.callback import Callback from lightning.pytorch.utilities.exceptions import MisconfigurationException -from lightning.pytorch.utilities.imports import _LIGHTNING_COLOSSALAI_AVAILABLE from lightning.pytorch.utilities.model_helpers import is_overridden from lightning.pytorch.utilities.rank_zero import rank_zero_warn @@ -125,13 +124,7 @@ class GradientAccumulationScheduler(Callback): # local import to avoid circular import from lightning.pytorch.strategies import DeepSpeedStrategy - unsupported_strategies = [DeepSpeedStrategy] - if _LIGHTNING_COLOSSALAI_AVAILABLE: - from lightning_colossalai import ColossalAIStrategy - - unsupported_strategies.append(ColossalAIStrategy) - - if isinstance(trainer.strategy, tuple(unsupported_strategies)): + if isinstance(trainer.strategy, DeepSpeedStrategy): raise RuntimeError( f"The `{type(trainer.strategy).__name__}` does not support `accumulate_grad_batches` changing" " between epochs." diff --git a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py index fd2a5d413b..67adad64d1 100644 --- a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py +++ b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py @@ -62,10 +62,7 @@ from lightning.pytorch.strategies import ( ) from lightning.pytorch.strategies.ddp import _DDP_FORK_ALIASES from lightning.pytorch.utilities.exceptions import MisconfigurationException -from lightning.pytorch.utilities.imports import ( - _LIGHTNING_COLOSSALAI_AVAILABLE, - _habana_available_and_importable, -) +from lightning.pytorch.utilities.imports import _habana_available_and_importable from lightning.pytorch.utilities.rank_zero import rank_zero_info, rank_zero_warn log = logging.getLogger(__name__) @@ -191,9 +188,6 @@ class _AcceleratorConnector: self._strategy_flag = strategy - if strategy == "colossalai" and not _LIGHTNING_COLOSSALAI_AVAILABLE: - raise ModuleNotFoundError(str(_LIGHTNING_COLOSSALAI_AVAILABLE)) - if strategy != "auto" and strategy not in self._registered_strategies and not isinstance(strategy, Strategy): raise ValueError( f"You selected an invalid strategy name: `strategy={strategy!r}`." @@ -490,12 +484,6 @@ class _AcceleratorConnector: if isinstance(self.accelerator, HPUAccelerator): return HPUPrecisionPlugin(self._precision_flag) - if _LIGHTNING_COLOSSALAI_AVAILABLE: - from lightning_colossalai import ColossalAIPrecisionPlugin, ColossalAIStrategy - - if isinstance(self.strategy, ColossalAIStrategy): - return ColossalAIPrecisionPlugin(self._precision_flag) - if isinstance(self.strategy, (SingleDeviceXLAStrategy, XLAStrategy)): return XLAPrecision(self._precision_flag) # type: ignore if isinstance(self.strategy, DeepSpeedStrategy): @@ -648,13 +636,6 @@ def _set_torch_flags( def _register_external_accelerators_and_strategies() -> None: """Registers all known strategies in other packages.""" - if _LIGHTNING_COLOSSALAI_AVAILABLE: - from lightning_colossalai import ColossalAIStrategy - - # TODO: Prevent registering multiple times - if "colossalai" not in StrategyRegistry: - ColossalAIStrategy.register_strategies(StrategyRegistry) - if _habana_available_and_importable(): from lightning_habana import HPUAccelerator, HPUParallelStrategy, SingleHPUStrategy diff --git a/src/lightning/pytorch/utilities/imports.py b/src/lightning/pytorch/utilities/imports.py index 9f2d687314..6c0815a6af 100644 --- a/src/lightning/pytorch/utilities/imports.py +++ b/src/lightning/pytorch/utilities/imports.py @@ -28,7 +28,6 @@ _TORCHMETRICS_GREATER_EQUAL_1_0_0 = RequirementCache("torchmetrics>=1.0.0") _OMEGACONF_AVAILABLE = package_available("omegaconf") _TORCHVISION_AVAILABLE = RequirementCache("torchvision") -_LIGHTNING_COLOSSALAI_AVAILABLE = RequirementCache("lightning-colossalai") @functools.lru_cache(maxsize=128) diff --git a/tests/tests_pytorch/callbacks/test_gradient_accumulation_scheduler.py b/tests/tests_pytorch/callbacks/test_gradient_accumulation_scheduler.py index bf9f63d8f5..4f1074dc5c 100644 --- a/tests/tests_pytorch/callbacks/test_gradient_accumulation_scheduler.py +++ b/tests/tests_pytorch/callbacks/test_gradient_accumulation_scheduler.py @@ -20,12 +20,6 @@ from lightning.pytorch.callbacks import GradientAccumulationScheduler from lightning.pytorch.demos.boring_classes import BoringModel from lightning.pytorch.strategies import DeepSpeedStrategy from lightning.pytorch.utilities.exceptions import MisconfigurationException -from lightning.pytorch.utilities.imports import _LIGHTNING_COLOSSALAI_AVAILABLE - -if _LIGHTNING_COLOSSALAI_AVAILABLE: - from lightning_colossalai import ColossalAIStrategy -else: - ColossalAIStrategy = None @pytest.mark.parametrize("accumulate_grad_batches", [1, 2, 3]) @@ -94,16 +88,7 @@ def test_invalid_values_for_grad_accum_scheduler(scheduling): _ = GradientAccumulationScheduler(scheduling=scheduling) -@pytest.mark.parametrize( - "strategy_class", - [ - pytest.param( - ColossalAIStrategy, - marks=pytest.mark.skipif(not _LIGHTNING_COLOSSALAI_AVAILABLE, reason="Requires ColossalAI strategy"), - ), - DeepSpeedStrategy, - ], -) +@pytest.mark.parametrize("strategy_class", [DeepSpeedStrategy]) def test_unsupported_strategies(strategy_class): """Test that an error is raised for strategies that require the gradient accumulation factor to be fixed.""" scheduler = GradientAccumulationScheduler({1: 2}) diff --git a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py index b1cafbf9dc..830c08ef41 100644 --- a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py +++ b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py @@ -59,7 +59,6 @@ from lightning.pytorch.utilities.exceptions import MisconfigurationException from lightning.pytorch.utilities.imports import ( _LIGHTNING_HABANA_AVAILABLE, ) -from lightning_utilities.core.imports import package_available from tests_pytorch.conftest import mock_cuda_count, mock_mps_count, mock_tpu_available, mock_xla_available from tests_pytorch.helpers.runif import RunIf @@ -845,21 +844,6 @@ def test_connector_defaults_match_trainer_defaults(): assert connector_default == trainer_defaults[name] -@RunIf(min_cuda_gpus=1) # trigger this test on our GPU pipeline, because we don't install the package on the CPU suite -@pytest.mark.xfail(raises=ImportError, reason="Not updated to latest API") -@pytest.mark.skipif(not package_available("lightning_colossalai"), reason="Requires Colossal AI Strategy") -def test_colossalai_external_strategy(monkeypatch): - with mock.patch( - "lightning.pytorch.trainer.connectors.accelerator_connector._LIGHTNING_COLOSSALAI_AVAILABLE", False - ), pytest.raises(ModuleNotFoundError): - Trainer(strategy="colossalai") - - from lightning_colossalai import ColossalAIStrategy - - trainer = Trainer(strategy="colossalai", precision="16-mixed") - assert isinstance(trainer.strategy, ColossalAIStrategy) - - class DeviceMock(Mock): def __instancecheck__(self, instance): return True