diff --git a/src/lightning_lite/connector.py b/src/lightning_lite/connector.py index 3f2299304f..1709f56f49 100644 --- a/src/lightning_lite/connector.py +++ b/src/lightning_lite/connector.py @@ -55,7 +55,7 @@ from lightning_lite.strategies import ( from lightning_lite.strategies.ddp_spawn import _DDP_FORK_ALIASES from lightning_lite.utilities import _StrategyType, rank_zero_info, rank_zero_warn from lightning_lite.utilities.device_parser import determine_root_gpu_device -from lightning_lite.utilities.imports import _HPU_AVAILABLE, _IPU_AVAILABLE, _IS_INTERACTIVE +from lightning_lite.utilities.imports import _IS_INTERACTIVE _PLUGIN = Union[Precision, ClusterEnvironment, CheckpointIO] _PLUGIN_INPUT = Union[_PLUGIN, str] @@ -109,7 +109,7 @@ class _Connector: # Raise an exception if there are conflicts between flags # Set each valid flag to `self._x_flag` after validation - # For devices: Assign gpus, ipus, etc. to the accelerator flag and devices flag + # For devices: Assign gpus, etc. to the accelerator flag and devices flag self._strategy_flag: Optional[Union[Strategy, str]] = None self._accelerator_flag: Optional[Union[Accelerator, str]] = None self._precision_input: Optional[_PRECISION_INPUT] = None @@ -302,10 +302,6 @@ class _Connector: if self._accelerator_flag == "auto": if TPUAccelerator.is_available(): return "tpu" - if _IPU_AVAILABLE: - return "ipu" - if _HPU_AVAILABLE: - return "hpu" if MPSAccelerator.is_available(): return "mps" if CUDAAccelerator.is_available(): @@ -499,8 +495,6 @@ class _Connector: self.strategy.set_world_ranks() self.strategy._configure_launcher() - from lightning_lite.utilities import _IS_INTERACTIVE - if _IS_INTERACTIVE and self.strategy.launcher and not self.strategy.launcher.is_interactive_compatible: raise RuntimeError( f"`Lite(strategy={self._strategy_flag!r})` is not compatible with an interactive" diff --git a/src/lightning_lite/plugins/io/xla_plugin.py b/src/lightning_lite/plugins/io/xla_plugin.py index 75c13898eb..57e5fa7931 100644 --- a/src/lightning_lite/plugins/io/xla_plugin.py +++ b/src/lightning_lite/plugins/io/xla_plugin.py @@ -15,16 +15,13 @@ import os from typing import Any, Dict, Optional from lightning_utilities.core.apply_func import apply_to_collection +from lightning_utilities.core.imports import RequirementCache from lightning_lite.accelerators.tpu import _XLA_AVAILABLE from lightning_lite.plugins.io.torch_plugin import TorchCheckpointIO from lightning_lite.utilities.cloud_io import get_filesystem -from lightning_lite.utilities.imports import _OMEGACONF_AVAILABLE from lightning_lite.utilities.types import _PATH -if _OMEGACONF_AVAILABLE: - from omegaconf import DictConfig, ListConfig, OmegaConf - class XLACheckpointIO(TorchCheckpointIO): """CheckpointIO that utilizes :func:`xm.save` to save checkpoints for TPU training strategies.""" @@ -54,9 +51,10 @@ class XLACheckpointIO(TorchCheckpointIO): ) fs = get_filesystem(path) fs.makedirs(os.path.dirname(path), exist_ok=True) - # Todo: TypeError: 'mappingproxy' object does not support item assignment - # Ref: https://github.com/pytorch/xla/issues/2773 - if _OMEGACONF_AVAILABLE: + if RequirementCache("omegaconf"): + # workaround for https://github.com/pytorch/xla/issues/2773 + from omegaconf import DictConfig, ListConfig, OmegaConf + checkpoint = apply_to_collection(checkpoint, (DictConfig, ListConfig), OmegaConf.to_container) import torch_xla.core.xla_model as xm diff --git a/src/lightning_lite/plugins/precision/deepspeed.py b/src/lightning_lite/plugins/precision/deepspeed.py index 3817bf3aa3..265dfacfdb 100644 --- a/src/lightning_lite/plugins/precision/deepspeed.py +++ b/src/lightning_lite/plugins/precision/deepspeed.py @@ -21,10 +21,10 @@ from typing_extensions import Literal from lightning_lite.plugins.precision.precision import Precision from lightning_lite.plugins.precision.utils import _convert_fp_tensor from lightning_lite.utilities.enums import AMPType, PrecisionType -from lightning_lite.utilities.imports import _APEX_AVAILABLE from lightning_lite.utilities.types import Steppable _DEEPSPEED_AVAILABLE = RequirementCache("deepspeed") +_APEX_AVAILABLE = RequirementCache("apex") if TYPE_CHECKING and _DEEPSPEED_AVAILABLE: import deepspeed @@ -49,7 +49,7 @@ class DeepSpeedPrecision(Precision): def __init__(self, precision: Literal[16, 32, "bf16"], amp_type: str, amp_level: Optional[str] = None) -> None: if amp_type == AMPType.APEX: if not _APEX_AVAILABLE: - raise ImportError( + raise ModuleNotFoundError( "You have asked for Apex AMP but `apex` is not installed." " Install `apex` using this guide: https://github.com/NVIDIA/apex" ) diff --git a/src/lightning_lite/utilities/__init__.py b/src/lightning_lite/utilities/__init__.py index 63e9049492..17f37679f2 100644 --- a/src/lightning_lite/utilities/__init__.py +++ b/src/lightning_lite/utilities/__init__.py @@ -15,20 +15,6 @@ from lightning_lite.utilities.apply_func import move_data_to_device # noqa: F401 from lightning_lite.utilities.enums import _AcceleratorType, _StrategyType, AMPType, LightningEnum # noqa: F401 - -# TODO(lite): Avoid importing protected attributes in `__init__.py` files -from lightning_lite.utilities.imports import ( # noqa: F401 - _HIVEMIND_AVAILABLE, - _HOROVOD_AVAILABLE, - _HPU_AVAILABLE, - _IPU_AVAILABLE, - _IS_INTERACTIVE, - _IS_WINDOWS, - _POPTORCH_AVAILABLE, - _TORCH_GREATER_EQUAL_1_10, - _TORCH_GREATER_EQUAL_1_11, - _TORCH_GREATER_EQUAL_1_12, -) from lightning_lite.utilities.rank_zero import ( # noqa: F401 rank_zero_deprecation, rank_zero_info, diff --git a/src/lightning_lite/utilities/data.py b/src/lightning_lite/utilities/data.py index ca50344567..afa0e988ca 100644 --- a/src/lightning_lite/utilities/data.py +++ b/src/lightning_lite/utilities/data.py @@ -175,11 +175,7 @@ def _dataloader_init_kwargs_resolve_sampler( disallow_batch_sampler: bool = False, ) -> Dict[str, Any]: """This function is used to handle the sampler, batch_sampler arguments associated within a DataLoader for its - re-instantiation. - - If there are multiple devices in IPU mode, it is necessary to disallow BatchSampler that isn't instantiated - automatically, since `poptorch.DataLoader` will try to increase the batch_size - """ + re-instantiation.""" batch_sampler = getattr(dataloader, "batch_sampler") if batch_sampler is not None: diff --git a/src/lightning_lite/utilities/distributed.py b/src/lightning_lite/utilities/distributed.py index 9e7ea0142a..767de75411 100644 --- a/src/lightning_lite/utilities/distributed.py +++ b/src/lightning_lite/utilities/distributed.py @@ -4,11 +4,11 @@ from typing import Any, Iterable, Iterator, List, Optional, Sized, Tuple, Union import torch import torch.nn.functional as F +from lightning_utilities.core.imports import module_available from torch import Tensor from torch.utils.data import Dataset, DistributedSampler, Sampler from lightning_lite.plugins.environments.cluster_environment import ClusterEnvironment -from lightning_lite.utilities.imports import _HPU_AVAILABLE from lightning_lite.utilities.rank_zero import rank_zero_info from lightning_lite.utilities.types import ReduceOp @@ -135,12 +135,16 @@ def sync_ddp(result: Tensor, group: Optional[Any] = None, reduce_op: Optional[Un op = reduce_op # WA for HPU. HPU doesn't support Long types, forcefully set it to float - if _HPU_AVAILABLE: - is_hpu_backend = os.environ.get("HCCL_DISTRIBUTED_BACKEND") == "1" - if is_hpu_backend: - if (result.type() == "torch.LongTensor") or (result.type() == "torch.hpu.LongTensor"): - rank_zero_info("Long tensor unsupported on HPU, casting to float") - result = result.float() + if module_available("habana_frameworks.torch.utils.library_loader"): + from habana_frameworks.torch.utils.library_loader import is_habana_available + + if ( + is_habana_available() + and os.environ.get("HCCL_DISTRIBUTED_BACKEND") == "1" + and result.type() in ("torch.LongTensor", "torch.hpu.LongTensor") + ): + rank_zero_info("Long tensor unsupported on HPU, casting to float") + result = result.float() # Sync all processes before reduction torch.distributed.barrier(group=group) diff --git a/src/lightning_lite/utilities/enums.py b/src/lightning_lite/utilities/enums.py index 567483b1e5..c1bb015010 100644 --- a/src/lightning_lite/utilities/enums.py +++ b/src/lightning_lite/utilities/enums.py @@ -61,21 +61,16 @@ class _StrategyType(LightningEnum): DDP = "ddp" DDP_SPAWN = "ddp_spawn" DDP_FORK = "ddp_fork" - TPU_SPAWN = "tpu_spawn" DEEPSPEED = "deepspeed" - HOROVOD = "horovod" DDP_SHARDED = "ddp_sharded" DDP_SHARDED_SPAWN = "ddp_sharded_spawn" DDP_FULLY_SHARDED = "ddp_fully_sharded" - BAGUA = "bagua" - HPU_PARALLEL = "hpu_parallel" @staticmethod def interactive_compatible_types() -> list[_StrategyType]: """Returns a list containing interactive compatible _StrategyTypes.""" return [ _StrategyType.DP, - _StrategyType.TPU_SPAWN, _StrategyType.DDP_FORK, ] @@ -89,7 +84,5 @@ class _AcceleratorType(LightningEnum): CPU = "CPU" CUDA = "CUDA" - IPU = "IPU" TPU = "TPU" - HPU = "HPU" MPS = "MPS" diff --git a/src/lightning_lite/utilities/imports.py b/src/lightning_lite/utilities/imports.py index 737d1d7a4a..5ff3d656c7 100644 --- a/src/lightning_lite/utilities/imports.py +++ b/src/lightning_lite/utilities/imports.py @@ -16,7 +16,7 @@ import operator import platform import sys -from lightning_utilities.core.imports import compare_version, module_available, package_available +from lightning_utilities.core.imports import compare_version _IS_WINDOWS = platform.system() == "Windows" _IS_INTERACTIVE = hasattr(sys, "ps1") # https://stackoverflow.com/a/64523765 @@ -28,24 +28,3 @@ _TORCH_LESSER_EQUAL_1_10_2 = compare_version("torch", operator.le, "1.10.2") _TORCH_GREATER_EQUAL_1_11 = compare_version("torch", operator.ge, "1.11.0") _TORCH_GREATER_EQUAL_1_12 = compare_version("torch", operator.ge, "1.12.0") _TORCH_GREATER_EQUAL_1_13 = compare_version("torch", operator.ge, "1.13.0", use_base_version=True) - -_APEX_AVAILABLE = module_available("apex.amp") -_HABANA_FRAMEWORK_AVAILABLE = package_available("habana_frameworks") -_HIVEMIND_AVAILABLE = package_available("hivemind") -_HOROVOD_AVAILABLE = module_available("horovod.torch") -_OMEGACONF_AVAILABLE = package_available("omegaconf") -_POPTORCH_AVAILABLE = package_available("poptorch") - -if _POPTORCH_AVAILABLE: - import poptorch - - _IPU_AVAILABLE = poptorch.ipuHardwareIsAvailable() -else: - _IPU_AVAILABLE = False - -if _HABANA_FRAMEWORK_AVAILABLE: - from habana_frameworks.torch.utils.library_loader import is_habana_avaialble - - _HPU_AVAILABLE = is_habana_avaialble() -else: - _HPU_AVAILABLE = False diff --git a/tests/tests_lite/plugins/precision/test_deepspeed.py b/tests/tests_lite/plugins/precision/test_deepspeed.py index a0cee88b26..3a06956c31 100644 --- a/tests/tests_lite/plugins/precision/test_deepspeed.py +++ b/tests/tests_lite/plugins/precision/test_deepspeed.py @@ -27,9 +27,9 @@ def test_invalid_precision_with_deepspeed_precision(): def test_deepspeed_precision_apex_not_installed(monkeypatch): - import lightning_lite.plugins.precision.deepspeed as deepspeed_apex + import lightning_lite.plugins.precision.deepspeed as deepspeed - monkeypatch.setattr(deepspeed_apex, "_APEX_AVAILABLE", False) + monkeypatch.setattr(deepspeed, "_APEX_AVAILABLE", False) with pytest.raises(ImportError, match="You have asked for Apex AMP but `apex` is not installed."): DeepSpeedPrecision(precision=16, amp_type="apex") diff --git a/tests/tests_lite/test_connector.py b/tests/tests_lite/test_connector.py index 88fa081b0f..46ef1ad25f 100644 --- a/tests/tests_lite/test_connector.py +++ b/tests/tests_lite/test_connector.py @@ -221,7 +221,7 @@ def test_dist_backend_accelerator_mapping(*_): @RunIf(mps=False) @mock.patch("lightning_lite.accelerators.cuda.num_cuda_devices", return_value=2) def test_ipython_incompatible_backend_error(_, monkeypatch): - monkeypatch.setattr(lightning_lite.utilities, "_IS_INTERACTIVE", True) + monkeypatch.setattr(lightning_lite.connector, "_IS_INTERACTIVE", True) with pytest.raises(RuntimeError, match=r"strategy='ddp'\)`.*is not compatible"): _Connector(strategy="ddp", accelerator="gpu", devices=2) @@ -238,21 +238,21 @@ def test_ipython_incompatible_backend_error(_, monkeypatch): @mock.patch("lightning_lite.accelerators.cuda.num_cuda_devices", return_value=2) def test_ipython_compatible_dp_strategy_gpu(_, monkeypatch): - monkeypatch.setattr(lightning_lite.utilities, "_IS_INTERACTIVE", True) + monkeypatch.setattr(lightning_lite.utilities.imports, "_IS_INTERACTIVE", True) connector = _Connector(strategy="dp", accelerator="gpu") assert connector.strategy.launcher is None @RunIf(skip_windows=True) def test_ipython_compatible_strategy_tpu(tpu_available, monkeypatch): - monkeypatch.setattr(lightning_lite.utilities, "_IS_INTERACTIVE", True) + monkeypatch.setattr(lightning_lite.utilities.imports, "_IS_INTERACTIVE", True) connector = _Connector(accelerator="tpu") assert connector.strategy.launcher.is_interactive_compatible @RunIf(skip_windows=True) def test_ipython_compatible_strategy_ddp_fork(monkeypatch): - monkeypatch.setattr(lightning_lite.utilities, "_IS_INTERACTIVE", True) + monkeypatch.setattr(lightning_lite.utilities.imports, "_IS_INTERACTIVE", True) connector = _Connector(strategy="ddp_fork", accelerator="cpu") assert connector.strategy.launcher.is_interactive_compatible diff --git a/tests/tests_lite/utilities/test_imports.py b/tests/tests_lite/utilities/test_imports.py index 1b48e626ce..b9812516c1 100644 --- a/tests/tests_lite/utilities/test_imports.py +++ b/tests/tests_lite/utilities/test_imports.py @@ -13,22 +13,9 @@ # limitations under the License. from lightning_lite.strategies.deepspeed import _DEEPSPEED_AVAILABLE from lightning_lite.strategies.fairscale import _FAIRSCALE_AVAILABLE -from lightning_lite.utilities.imports import ( - _APEX_AVAILABLE, - _HOROVOD_AVAILABLE, - _OMEGACONF_AVAILABLE, - _POPTORCH_AVAILABLE, -) def test_imports(): - try: - import apex # noqa - except ModuleNotFoundError: - assert not _APEX_AVAILABLE - else: - assert _APEX_AVAILABLE - try: import deepspeed # noqa except ModuleNotFoundError: @@ -42,24 +29,3 @@ def test_imports(): assert not _FAIRSCALE_AVAILABLE else: assert _FAIRSCALE_AVAILABLE - - try: - import horovod.torch # noqa - except ModuleNotFoundError: - assert not _HOROVOD_AVAILABLE - else: - assert _HOROVOD_AVAILABLE - - try: - import omegaconf # noqa - except ModuleNotFoundError: - assert not _OMEGACONF_AVAILABLE - else: - assert _OMEGACONF_AVAILABLE - - try: - import poptorch # noqa - except ModuleNotFoundError: - assert not _POPTORCH_AVAILABLE - else: - assert _POPTORCH_AVAILABLE