From a35f1fe34bd0bba041a3579fb094802466ab00d6 Mon Sep 17 00:00:00 2001 From: Jirka B Date: Tue, 12 Nov 2024 22:22:57 +0100 Subject: [PATCH] contextlib.AbstractContextManager --- src/lightning/fabric/fabric.py | 13 ++++++------- src/lightning/fabric/plugins/precision/amp.py | 5 +++-- .../fabric/plugins/precision/bitsandbytes.py | 10 +++++----- src/lightning/fabric/plugins/precision/deepspeed.py | 8 ++++---- src/lightning/fabric/plugins/precision/double.py | 9 +++++---- src/lightning/fabric/plugins/precision/fsdp.py | 9 +++++---- src/lightning/fabric/plugins/precision/half.py | 9 +++++---- src/lightning/fabric/plugins/precision/precision.py | 10 +++++----- .../fabric/plugins/precision/transformer_engine.py | 10 +++++----- src/lightning/fabric/strategies/ddp.py | 6 +++--- src/lightning/fabric/strategies/deepspeed.py | 8 ++++---- src/lightning/fabric/strategies/fsdp.py | 9 ++++----- src/lightning/fabric/strategies/model_parallel.py | 10 +++++----- src/lightning/fabric/strategies/strategy.py | 12 ++++++------ src/lightning/fabric/strategies/xla_fsdp.py | 10 +++++----- src/lightning/pytorch/loops/utilities.py | 6 +++--- .../pytorch/plugins/precision/deepspeed.py | 8 ++++---- src/lightning/pytorch/plugins/precision/double.py | 8 ++++---- src/lightning/pytorch/plugins/precision/fsdp.py | 9 +++++---- src/lightning/pytorch/plugins/precision/half.py | 8 ++++---- src/lightning/pytorch/profilers/pytorch.py | 5 +++-- .../callbacks/test_stochastic_weight_avg.py | 5 +++-- 22 files changed, 96 insertions(+), 91 deletions(-) diff --git a/src/lightning/fabric/fabric.py b/src/lightning/fabric/fabric.py index 5d32e9d96f..058e5e7c40 100644 --- a/src/lightning/fabric/fabric.py +++ b/src/lightning/fabric/fabric.py @@ -14,13 +14,12 @@ import inspect import os from collections.abc import Generator, Mapping, Sequence -from contextlib import contextmanager, nullcontext +from contextlib import AbstractContextManager, contextmanager, nullcontext from functools import partial from pathlib import Path from typing import ( Any, Callable, - ContextManager, Optional, Union, cast, @@ -484,7 +483,7 @@ class Fabric: ) raise ValueError("You have to specify either `clip_val` or `max_norm` to do gradient clipping!") - def autocast(self) -> ContextManager: + def autocast(self) -> AbstractContextManager: """A context manager to automatically convert operations for the chosen precision. Use this only if the `forward` method of your model does not cover all operations you wish to run with the @@ -634,7 +633,7 @@ class Fabric: if rank == 0: barrier() - def no_backward_sync(self, module: _FabricModule, enabled: bool = True) -> ContextManager: + def no_backward_sync(self, module: _FabricModule, enabled: bool = True) -> AbstractContextManager: r"""Skip gradient synchronization during backward to avoid redundant communication overhead. Use this context manager when performing gradient accumulation to speed up training with multiple devices. @@ -676,7 +675,7 @@ class Fabric: forward_module, _ = _unwrap_compiled(module._forward_module) return self._strategy._backward_sync_control.no_backward_sync(forward_module, enabled) - def sharded_model(self) -> ContextManager: + def sharded_model(self) -> AbstractContextManager: r"""Instantiate a model under this context manager to prepare it for model-parallel sharding. .. deprecated:: This context manager is deprecated in favor of :meth:`init_module`, use it instead. @@ -688,12 +687,12 @@ class Fabric: return self.strategy.module_sharded_context() return nullcontext() - def init_tensor(self) -> ContextManager: + def init_tensor(self) -> AbstractContextManager: """Tensors that you instantiate under this context manager will be created on the device right away and have the right data type depending on the precision setting in Fabric.""" return self._strategy.tensor_init_context() - def init_module(self, empty_init: Optional[bool] = None) -> ContextManager: + def init_module(self, empty_init: Optional[bool] = None) -> AbstractContextManager: """Instantiate the model and its parameters under this context manager to reduce peak memory usage. The parameters get created on the device and with the right data type right away without wasting memory being diff --git a/src/lightning/fabric/plugins/precision/amp.py b/src/lightning/fabric/plugins/precision/amp.py index c9205ea2b4..d5fc1f0c1c 100644 --- a/src/lightning/fabric/plugins/precision/amp.py +++ b/src/lightning/fabric/plugins/precision/amp.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, ContextManager, Literal, Optional +from contextlib import AbstractContextManager +from typing import Any, Literal, Optional import torch from lightning_utilities.core.apply_func import apply_to_collection @@ -59,7 +60,7 @@ class MixedPrecision(Precision): self._desired_input_dtype = torch.bfloat16 if self.precision == "bf16-mixed" else torch.float16 @override - def forward_context(self) -> ContextManager: + def forward_context(self) -> AbstractContextManager: return torch.autocast(self.device, dtype=self._desired_input_dtype) @override diff --git a/src/lightning/fabric/plugins/precision/bitsandbytes.py b/src/lightning/fabric/plugins/precision/bitsandbytes.py index f0e80cbd77..ecb1d8a442 100644 --- a/src/lightning/fabric/plugins/precision/bitsandbytes.py +++ b/src/lightning/fabric/plugins/precision/bitsandbytes.py @@ -17,10 +17,10 @@ import math import os import warnings from collections import OrderedDict -from contextlib import ExitStack +from contextlib import AbstractContextManager, ExitStack from functools import partial from types import ModuleType -from typing import Any, Callable, ContextManager, Literal, Optional, cast +from typing import Any, Callable, Literal, Optional, cast import torch from lightning_utilities import apply_to_collection @@ -123,11 +123,11 @@ class BitsandbytesPrecision(Precision): return module @override - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: return _DtypeContextManager(self.dtype) @override - def module_init_context(self) -> ContextManager: + def module_init_context(self) -> AbstractContextManager: if self.ignore_modules: # cannot patch the Linear class if the user wants to skip some submodules raise RuntimeError( @@ -145,7 +145,7 @@ class BitsandbytesPrecision(Precision): return stack @override - def forward_context(self) -> ContextManager: + def forward_context(self) -> AbstractContextManager: return _DtypeContextManager(self.dtype) @override diff --git a/src/lightning/fabric/plugins/precision/deepspeed.py b/src/lightning/fabric/plugins/precision/deepspeed.py index 2fcaa38258..526095008f 100644 --- a/src/lightning/fabric/plugins/precision/deepspeed.py +++ b/src/lightning/fabric/plugins/precision/deepspeed.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from contextlib import nullcontext -from typing import TYPE_CHECKING, Any, ContextManager, Literal +from contextlib import AbstractContextManager, nullcontext +from typing import TYPE_CHECKING, Any, Literal import torch from lightning_utilities.core.apply_func import apply_to_collection @@ -68,13 +68,13 @@ class DeepSpeedPrecision(Precision): return module @override - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: if "true" not in self.precision: return nullcontext() return _DtypeContextManager(self._desired_dtype) @override - def module_init_context(self) -> ContextManager: + def module_init_context(self) -> AbstractContextManager: return self.tensor_init_context() @override diff --git a/src/lightning/fabric/plugins/precision/double.py b/src/lightning/fabric/plugins/precision/double.py index 0a857499f3..9aa0365a55 100644 --- a/src/lightning/fabric/plugins/precision/double.py +++ b/src/lightning/fabric/plugins/precision/double.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, ContextManager, Literal +from contextlib import AbstractContextManager +from typing import Any, Literal import torch from lightning_utilities.core.apply_func import apply_to_collection @@ -33,15 +34,15 @@ class DoublePrecision(Precision): return module.double() @override - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: return _DtypeContextManager(torch.double) @override - def module_init_context(self) -> ContextManager: + def module_init_context(self) -> AbstractContextManager: return self.tensor_init_context() @override - def forward_context(self) -> ContextManager: + def forward_context(self) -> AbstractContextManager: return self.tensor_init_context() @override diff --git a/src/lightning/fabric/plugins/precision/fsdp.py b/src/lightning/fabric/plugins/precision/fsdp.py index 7e3170c2c7..0b78ad72a4 100644 --- a/src/lightning/fabric/plugins/precision/fsdp.py +++ b/src/lightning/fabric/plugins/precision/fsdp.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Any, ContextManager, Literal, Optional +from contextlib import AbstractContextManager +from typing import TYPE_CHECKING, Any, Literal, Optional import torch from lightning_utilities import apply_to_collection @@ -100,15 +101,15 @@ class FSDPPrecision(Precision): ) @override - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: return _DtypeContextManager(self._desired_input_dtype) @override - def module_init_context(self) -> ContextManager: + def module_init_context(self) -> AbstractContextManager: return _DtypeContextManager(self.mixed_precision_config.param_dtype or torch.float32) @override - def forward_context(self) -> ContextManager: + def forward_context(self) -> AbstractContextManager: if "mixed" in self.precision: return torch.autocast("cuda", dtype=(torch.bfloat16 if self.precision == "bf16-mixed" else torch.float16)) return self.tensor_init_context() diff --git a/src/lightning/fabric/plugins/precision/half.py b/src/lightning/fabric/plugins/precision/half.py index 32ca7da815..fcb28ad332 100644 --- a/src/lightning/fabric/plugins/precision/half.py +++ b/src/lightning/fabric/plugins/precision/half.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import Any, ContextManager, Literal +from contextlib import AbstractContextManager +from typing import Any, Literal import torch from lightning_utilities.core.apply_func import apply_to_collection @@ -42,15 +43,15 @@ class HalfPrecision(Precision): return module.to(dtype=self._desired_input_dtype) @override - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: return _DtypeContextManager(self._desired_input_dtype) @override - def module_init_context(self) -> ContextManager: + def module_init_context(self) -> AbstractContextManager: return self.tensor_init_context() @override - def forward_context(self) -> ContextManager: + def forward_context(self) -> AbstractContextManager: return self.tensor_init_context() @override diff --git a/src/lightning/fabric/plugins/precision/precision.py b/src/lightning/fabric/plugins/precision/precision.py index d32ccbabb4..1dfab2a7bc 100644 --- a/src/lightning/fabric/plugins/precision/precision.py +++ b/src/lightning/fabric/plugins/precision/precision.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from contextlib import nullcontext -from typing import Any, ContextManager, Literal, Optional, Union +from contextlib import AbstractContextManager, nullcontext +from typing import Any, Literal, Optional, Union from torch import Tensor from torch.nn import Module @@ -53,11 +53,11 @@ class Precision: """ return module - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: """Controls how tensors get created (device, dtype).""" return nullcontext() - def module_init_context(self) -> ContextManager: + def module_init_context(self) -> AbstractContextManager: """Instantiate module parameters or tensors in the precision type this plugin handles. This is optional and depends on the precision limitations during optimization. @@ -65,7 +65,7 @@ class Precision: """ return nullcontext() - def forward_context(self) -> ContextManager: + def forward_context(self) -> AbstractContextManager: """A contextmanager for managing model forward/training_step/evaluation_step/predict_step.""" return nullcontext() diff --git a/src/lightning/fabric/plugins/precision/transformer_engine.py b/src/lightning/fabric/plugins/precision/transformer_engine.py index ddc30c2e1b..c3ef84a453 100644 --- a/src/lightning/fabric/plugins/precision/transformer_engine.py +++ b/src/lightning/fabric/plugins/precision/transformer_engine.py @@ -13,8 +13,8 @@ # limitations under the License. import logging from collections.abc import Mapping -from contextlib import ExitStack -from typing import TYPE_CHECKING, Any, ContextManager, Literal, Optional, Union +from contextlib import AbstractContextManager, ExitStack +from typing import TYPE_CHECKING, Any, Literal, Optional, Union import torch from lightning_utilities import apply_to_collection @@ -107,11 +107,11 @@ class TransformerEnginePrecision(Precision): return module @override - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: return _DtypeContextManager(self.weights_dtype) @override - def module_init_context(self) -> ContextManager: + def module_init_context(self) -> AbstractContextManager: dtype_ctx = self.tensor_init_context() stack = ExitStack() if self.replace_layers: @@ -126,7 +126,7 @@ class TransformerEnginePrecision(Precision): return stack @override - def forward_context(self) -> ContextManager: + def forward_context(self) -> AbstractContextManager: dtype_ctx = _DtypeContextManager(self.weights_dtype) fallback_autocast_ctx = torch.autocast(device_type="cuda", dtype=self.fallback_compute_dtype) import transformer_engine.pytorch as te diff --git a/src/lightning/fabric/strategies/ddp.py b/src/lightning/fabric/strategies/ddp.py index 2fd325c9c3..ce47e4e403 100644 --- a/src/lightning/fabric/strategies/ddp.py +++ b/src/lightning/fabric/strategies/ddp.py @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from contextlib import nullcontext +from contextlib import AbstractContextManager, nullcontext from datetime import timedelta -from typing import Any, ContextManager, Literal, Optional, Union +from typing import Any, Literal, Optional, Union import torch import torch.distributed @@ -231,7 +231,7 @@ class DDPStrategy(ParallelStrategy): class _DDPBackwardSyncControl(_BackwardSyncControl): @override - def no_backward_sync(self, module: Module, enabled: bool) -> ContextManager: + def no_backward_sync(self, module: Module, enabled: bool) -> AbstractContextManager: """Blocks gradient synchronization inside the :class:`~torch.nn.parallel.distributed.DistributedDataParallel` wrapper.""" if not enabled: diff --git a/src/lightning/fabric/strategies/deepspeed.py b/src/lightning/fabric/strategies/deepspeed.py index 1cd5690966..03d90cd5df 100644 --- a/src/lightning/fabric/strategies/deepspeed.py +++ b/src/lightning/fabric/strategies/deepspeed.py @@ -17,10 +17,10 @@ import logging import os import platform from collections.abc import Mapping -from contextlib import ExitStack +from contextlib import AbstractContextManager, ExitStack from itertools import chain from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Optional, Union import torch from lightning_utilities.core.imports import RequirementCache @@ -353,7 +353,7 @@ class DeepSpeedStrategy(DDPStrategy, _Sharded): raise NotImplementedError(self._err_msg_joint_setup_required()) @override - def module_init_context(self, empty_init: Optional[bool] = None) -> ContextManager: + def module_init_context(self, empty_init: Optional[bool] = None) -> AbstractContextManager: if self.zero_stage_3 and empty_init is False: raise NotImplementedError( f"`{empty_init=}` is not a valid choice with `DeepSpeedStrategy` when ZeRO stage 3 is enabled." @@ -366,7 +366,7 @@ class DeepSpeedStrategy(DDPStrategy, _Sharded): return stack @override - def module_sharded_context(self) -> ContextManager: + def module_sharded_context(self) -> AbstractContextManager: # Current limitation in Fabric: The config needs to be fully determined at the time of calling the context # manager. Later modifications through e.g. `Fabric.setup()` won't have an effect here. diff --git a/src/lightning/fabric/strategies/fsdp.py b/src/lightning/fabric/strategies/fsdp.py index 7e598191ce..9dd5b2c62d 100644 --- a/src/lightning/fabric/strategies/fsdp.py +++ b/src/lightning/fabric/strategies/fsdp.py @@ -14,7 +14,7 @@ import shutil import warnings from collections.abc import Generator -from contextlib import ExitStack, nullcontext +from contextlib import AbstractContextManager, ExitStack, nullcontext from datetime import timedelta from functools import partial from pathlib import Path @@ -22,7 +22,6 @@ from typing import ( TYPE_CHECKING, Any, Callable, - ContextManager, Literal, Optional, Union, @@ -335,7 +334,7 @@ class FSDPStrategy(ParallelStrategy, _Sharded): pass @override - def module_init_context(self, empty_init: Optional[bool] = None) -> ContextManager: + def module_init_context(self, empty_init: Optional[bool] = None) -> AbstractContextManager: precision_init_ctx = self.precision.module_init_context() module_sharded_ctx = self.module_sharded_context() stack = ExitStack() @@ -349,7 +348,7 @@ class FSDPStrategy(ParallelStrategy, _Sharded): return stack @override - def module_sharded_context(self) -> ContextManager: + def module_sharded_context(self) -> AbstractContextManager: from torch.distributed.fsdp.fully_sharded_data_parallel import FullyShardedDataParallel from torch.distributed.fsdp.wrap import enable_wrap @@ -740,7 +739,7 @@ def _setup_activation_checkpointing(module: Module, activation_checkpointing_kwa class _FSDPBackwardSyncControl(_BackwardSyncControl): @override - def no_backward_sync(self, module: Module, enabled: bool) -> ContextManager: + def no_backward_sync(self, module: Module, enabled: bool) -> AbstractContextManager: """Blocks gradient synchronization inside the :class:`~torch.distributed.fsdp.FullyShardedDataParallel` wrapper.""" if not enabled: diff --git a/src/lightning/fabric/strategies/model_parallel.py b/src/lightning/fabric/strategies/model_parallel.py index 435dc3dad1..ad1fc19074 100644 --- a/src/lightning/fabric/strategies/model_parallel.py +++ b/src/lightning/fabric/strategies/model_parallel.py @@ -14,10 +14,10 @@ import itertools import shutil from collections.abc import Generator -from contextlib import ExitStack +from contextlib import AbstractContextManager, ExitStack from datetime import timedelta from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, ContextManager, Literal, Optional, TypeVar, Union +from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, TypeVar, Union import torch from lightning_utilities.core.rank_zero import rank_zero_only as utils_rank_zero_only @@ -195,7 +195,7 @@ class ModelParallelStrategy(ParallelStrategy): pass @override - def module_init_context(self, empty_init: Optional[bool] = None) -> ContextManager: + def module_init_context(self, empty_init: Optional[bool] = None) -> AbstractContextManager: precision_init_ctx = self.precision.module_init_context() stack = ExitStack() if empty_init: @@ -319,12 +319,12 @@ class ModelParallelStrategy(ParallelStrategy): class _ParallelBackwardSyncControl(_BackwardSyncControl): @override - def no_backward_sync(self, module: Module, enabled: bool) -> ContextManager: + def no_backward_sync(self, module: Module, enabled: bool) -> AbstractContextManager: """Blocks gradient synchronization inside the FSDP2 modules.""" return _FSDPNoSync(module=module, enabled=enabled) -class _FSDPNoSync(ContextManager): +class _FSDPNoSync(AbstractContextManager): def __init__(self, module: Module, enabled: bool) -> None: self._module = module self._enabled = enabled diff --git a/src/lightning/fabric/strategies/strategy.py b/src/lightning/fabric/strategies/strategy.py index c83160a3a2..4daad9b954 100644 --- a/src/lightning/fabric/strategies/strategy.py +++ b/src/lightning/fabric/strategies/strategy.py @@ -14,8 +14,8 @@ import logging from abc import ABC, abstractmethod from collections.abc import Iterable -from contextlib import ExitStack -from typing import Any, Callable, ContextManager, Optional, TypeVar, Union +from contextlib import AbstractContextManager, ExitStack +from typing import Any, Callable, Optional, TypeVar, Union import torch from torch import Tensor @@ -118,7 +118,7 @@ class Strategy(ABC): """ return dataloader - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: """Controls how tensors get created (device, dtype).""" precision_init_ctx = self.precision.tensor_init_context() stack = ExitStack() @@ -126,7 +126,7 @@ class Strategy(ABC): stack.enter_context(precision_init_ctx) return stack - def module_init_context(self, empty_init: Optional[bool] = None) -> ContextManager: + def module_init_context(self, empty_init: Optional[bool] = None) -> AbstractContextManager: """A context manager wrapping the model instantiation. Here, the strategy can control how the parameters of the model get created (device, dtype) and or apply other @@ -422,7 +422,7 @@ class _BackwardSyncControl(ABC): """ @abstractmethod - def no_backward_sync(self, module: Module, enabled: bool) -> ContextManager: + def no_backward_sync(self, module: Module, enabled: bool) -> AbstractContextManager: """Blocks the synchronization of gradients during the backward pass. This is a context manager. It is only effective if it wraps a call to `.backward()`. @@ -434,7 +434,7 @@ class _Sharded(ABC): """Mixin-interface for any :class:`Strategy` that wants to expose functionality for sharding model parameters.""" @abstractmethod - def module_sharded_context(self) -> ContextManager: + def module_sharded_context(self) -> AbstractContextManager: """A context manager that goes over the instantiation of an :class:`torch.nn.Module` and handles sharding of parameters on creation. diff --git a/src/lightning/fabric/strategies/xla_fsdp.py b/src/lightning/fabric/strategies/xla_fsdp.py index 9b2ffe3505..935ef72713 100644 --- a/src/lightning/fabric/strategies/xla_fsdp.py +++ b/src/lightning/fabric/strategies/xla_fsdp.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import io -from contextlib import ExitStack, nullcontext +from contextlib import AbstractContextManager, ExitStack, nullcontext from functools import partial from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, ContextManager, Literal, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union import torch from torch import Tensor @@ -225,7 +225,7 @@ class XLAFSDPStrategy(ParallelStrategy, _Sharded): def module_to_device(self, module: Module) -> None: pass - def module_init_context(self, empty_init: Optional[bool] = None) -> ContextManager: + def module_init_context(self, empty_init: Optional[bool] = None) -> AbstractContextManager: precision_init_ctx = self.precision.module_init_context() module_sharded_ctx = self.module_sharded_context() stack = ExitStack() @@ -235,7 +235,7 @@ class XLAFSDPStrategy(ParallelStrategy, _Sharded): return stack @override - def module_sharded_context(self) -> ContextManager: + def module_sharded_context(self) -> AbstractContextManager: return nullcontext() @override @@ -668,7 +668,7 @@ def _activation_checkpointing_kwargs(policy: Optional[_POLICY_SET], kwargs: dict class _XLAFSDPBackwardSyncControl(_BackwardSyncControl): @override - def no_backward_sync(self, module: Module, enabled: bool) -> ContextManager: + def no_backward_sync(self, module: Module, enabled: bool) -> AbstractContextManager: """Blocks gradient synchronization inside the :class:`~torch_xla.distributed.fsdp.XlaFullyShardedDataParallel` wrapper.""" if not enabled: diff --git a/src/lightning/pytorch/loops/utilities.py b/src/lightning/pytorch/loops/utilities.py index 43d7e333ef..2aaf877c89 100644 --- a/src/lightning/pytorch/loops/utilities.py +++ b/src/lightning/pytorch/loops/utilities.py @@ -13,8 +13,8 @@ # limitations under the License. import inspect from collections.abc import Generator -from contextlib import contextmanager -from typing import Any, Callable, ContextManager, Optional +from contextlib import AbstractContextManager, contextmanager +from typing import Any, Callable, Optional import torch import torch.distributed as dist @@ -160,7 +160,7 @@ def _no_grad_context(loop_run: Callable) -> Callable: raise TypeError(f"`{type(self).__name__}` needs to be a Loop.") if not hasattr(self, "inference_mode"): raise TypeError(f"`{type(self).__name__}.inference_mode` needs to be defined") - context_manager: type[ContextManager] + context_manager: type[AbstractContextManager] if _distributed_is_initialized() and dist.get_backend() == "gloo": # gloo backend does not work properly. # https://github.com/Lightning-AI/lightning/pull/12715/files#r854569110 diff --git a/src/lightning/pytorch/plugins/precision/deepspeed.py b/src/lightning/pytorch/plugins/precision/deepspeed.py index e1e90281cf..9225e3bb9e 100644 --- a/src/lightning/pytorch/plugins/precision/deepspeed.py +++ b/src/lightning/pytorch/plugins/precision/deepspeed.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from contextlib import nullcontext -from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, Union +from contextlib import AbstractContextManager, nullcontext +from typing import TYPE_CHECKING, Any, Callable, Optional, Union import torch from lightning_utilities import apply_to_collection @@ -80,13 +80,13 @@ class DeepSpeedPrecision(Precision): return apply_to_collection(data, function=_convert_fp_tensor, dtype=Tensor, dst_type=self._desired_dtype) @override - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: if "true" not in self.precision: return nullcontext() return _DtypeContextManager(self._desired_dtype) @override - def module_init_context(self) -> ContextManager: + def module_init_context(self) -> AbstractContextManager: return self.tensor_init_context() @override diff --git a/src/lightning/pytorch/plugins/precision/double.py b/src/lightning/pytorch/plugins/precision/double.py index 5d0af8b992..efa1aa008a 100644 --- a/src/lightning/pytorch/plugins/precision/double.py +++ b/src/lightning/pytorch/plugins/precision/double.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections.abc import Generator -from contextlib import contextmanager -from typing import Any, ContextManager, Literal +from contextlib import AbstractContextManager, contextmanager +from typing import Any, Literal import torch import torch.nn as nn @@ -38,11 +38,11 @@ class DoublePrecision(Precision): return module.double() @override - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: return _DtypeContextManager(torch.float64) @override - def module_init_context(self) -> ContextManager: + def module_init_context(self) -> AbstractContextManager: return self.tensor_init_context() @override diff --git a/src/lightning/pytorch/plugins/precision/fsdp.py b/src/lightning/pytorch/plugins/precision/fsdp.py index 64cee32359..7029497c17 100644 --- a/src/lightning/pytorch/plugins/precision/fsdp.py +++ b/src/lightning/pytorch/plugins/precision/fsdp.py @@ -11,7 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional +from contextlib import AbstractContextManager +from typing import TYPE_CHECKING, Any, Callable, Optional import torch from lightning_utilities import apply_to_collection @@ -109,15 +110,15 @@ class FSDPPrecision(Precision): ) @override - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: return _DtypeContextManager(self._desired_input_dtype) @override - def module_init_context(self) -> ContextManager: + def module_init_context(self) -> AbstractContextManager: return _DtypeContextManager(self.mixed_precision_config.param_dtype or torch.float32) @override - def forward_context(self) -> ContextManager: + def forward_context(self) -> AbstractContextManager: if "mixed" in self.precision: return torch.autocast("cuda", dtype=(torch.bfloat16 if self.precision == "bf16-mixed" else torch.float16)) return _DtypeContextManager(self._desired_input_dtype) diff --git a/src/lightning/pytorch/plugins/precision/half.py b/src/lightning/pytorch/plugins/precision/half.py index 2ad30de2b8..fe9deb44c3 100644 --- a/src/lightning/pytorch/plugins/precision/half.py +++ b/src/lightning/pytorch/plugins/precision/half.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections.abc import Generator -from contextlib import contextmanager -from typing import Any, ContextManager, Literal +from contextlib import AbstractContextManager, contextmanager +from typing import Any, Literal import torch from lightning_utilities import apply_to_collection @@ -44,11 +44,11 @@ class HalfPrecision(Precision): return module.to(dtype=self._desired_input_dtype) @override - def tensor_init_context(self) -> ContextManager: + def tensor_init_context(self) -> AbstractContextManager: return _DtypeContextManager(self._desired_input_dtype) @override - def module_init_context(self) -> ContextManager: + def module_init_context(self) -> AbstractContextManager: return self.tensor_init_context() @override diff --git a/src/lightning/pytorch/profilers/pytorch.py b/src/lightning/pytorch/profilers/pytorch.py index 6ba7466ee0..e264d5154f 100644 --- a/src/lightning/pytorch/profilers/pytorch.py +++ b/src/lightning/pytorch/profilers/pytorch.py @@ -16,9 +16,10 @@ import inspect import logging import os +from contextlib import AbstractContextManager from functools import lru_cache, partial from pathlib import Path -from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Optional, Union import torch from torch import Tensor, nn @@ -305,7 +306,7 @@ class PyTorchProfiler(Profiler): self.function_events: Optional[EventList] = None self._lightning_module: Optional[LightningModule] = None # set by ProfilerConnector self._register: Optional[RegisterRecordFunction] = None - self._parent_profiler: Optional[ContextManager] = None + self._parent_profiler: Optional[AbstractContextManager] = None self._recording_map: dict[str, record_function] = {} self._start_action_name: Optional[str] = None self._schedule: Optional[ScheduleWrapper] = None diff --git a/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py b/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py index d57ac76f04..8d3a1800e1 100644 --- a/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py +++ b/tests/tests_pytorch/callbacks/test_stochastic_weight_avg.py @@ -13,8 +13,9 @@ # limitations under the License. import logging import os +from contextlib import AbstractContextManager from pathlib import Path -from typing import ContextManager, Optional +from typing import Optional from unittest import mock import pytest @@ -382,5 +383,5 @@ def test_misconfiguration_error_with_sharded_model(tmp_path, strategy: str): trainer.fit(model) -def _backward_patch(trainer: Trainer) -> ContextManager: +def _backward_patch(trainer: Trainer) -> AbstractContextManager: return mock.patch.object(Strategy, "backward", wraps=trainer.strategy.backward)