Drop torch 1.6 testing (#10390)
* Drop torch 1.6 support
* Drop 1.6 support
* Update CHANGELOG
* Fixes
* Split change
* Undo change
* 1.7 -> 1.7.1
https://github.com/pytorch/pytorch/issues/47354
* Force trigger nightly
* Update .github/workflows/events-nightly.yml
Co-authored-by: Aki Nitta <nitta@akihironitta.com>
* Revert 1.7.1 change - try wildcard
* Update adjust versions and test it
* Undo test changes
* Revert "Undo test changes"
This reverts commit 3a6acadd11
.
* Update CHANGELOG.md
Co-authored-by: Aki Nitta <nitta@akihironitta.com>
This commit is contained in:
parent
a8c2725ff8
commit
7a9a08c5d3
|
@ -28,7 +28,7 @@ from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8
|
|||
if _TORCH_GREATER_EQUAL_1_8:
|
||||
from torch.quantization import FakeQuantizeBase
|
||||
else:
|
||||
# For torch 1.6 and 1.7.
|
||||
# For torch 1.7.
|
||||
from torch.quantization import FakeQuantize as FakeQuantizeBase
|
||||
|
||||
import pytorch_lightning as pl
|
||||
|
|
|
@ -13,7 +13,8 @@
|
|||
# limitations under the License.
|
||||
from typing import Any
|
||||
|
||||
from pytorch_lightning.overrides.torch_distributed import broadcast_object_list
|
||||
import torch.distributed
|
||||
|
||||
from pytorch_lightning.utilities import rank_zero_deprecation
|
||||
from pytorch_lightning.utilities.distributed import group as _group
|
||||
|
||||
|
@ -40,6 +41,6 @@ class LightningDistributed:
|
|||
if self.rank != 0:
|
||||
obj = [None] * len(obj)
|
||||
|
||||
broadcast_object_list(obj, 0, group=group or _group.WORLD)
|
||||
torch.distributed.broadcast_object_list(obj, 0, group=group or _group.WORLD)
|
||||
|
||||
return obj[0]
|
||||
|
|
|
@ -1,99 +0,0 @@
|
|||
import logging
|
||||
import pickle
|
||||
|
||||
import torch
|
||||
|
||||
from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
if torch.distributed.is_available():
|
||||
from torch.distributed import Backend, broadcast, get_backend, get_rank, GroupMember
|
||||
|
||||
# The code underneath is taken from PyTorch `torch/distributed/distributed_c10d.py`
|
||||
# and enable broadcasting for PyTorch 1.6 and lower.
|
||||
|
||||
|
||||
# https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py#L160
|
||||
def _rank_not_in_group(group):
|
||||
"""Helper that checks if the current process's rank is not in a given group."""
|
||||
if group is None:
|
||||
return False
|
||||
return group == GroupMember.NON_GROUP_MEMBER
|
||||
|
||||
|
||||
# Taken from https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py#L1164
|
||||
def _object_to_tensor(obj):
|
||||
buffer = pickle.dumps(obj)
|
||||
byte_storage = torch.ByteStorage.from_buffer(buffer) # type: ignore[attr-defined]
|
||||
byte_tensor = torch.ByteTensor(byte_storage)
|
||||
local_size = torch.LongTensor([byte_tensor.numel()])
|
||||
return byte_tensor, local_size
|
||||
|
||||
|
||||
# Taken from https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py
|
||||
def _tensor_to_object(tensor, tensor_size):
|
||||
buf = tensor.numpy().tobytes()[:tensor_size]
|
||||
out = pickle.loads(buf)
|
||||
return out
|
||||
|
||||
|
||||
# Taken from https://github.com/pytorch/pytorch/blob/1.7/torch/distributed/distributed_c10d.py#L1327
|
||||
def _broadcast_object_list(object_list, src=0, group=None):
|
||||
if _rank_not_in_group(group):
|
||||
return
|
||||
|
||||
my_rank = get_rank()
|
||||
# Serialize object_list elements to tensors on src rank.
|
||||
if my_rank == src:
|
||||
tensor_list, size_list = zip(*(_object_to_tensor(obj) for obj in object_list))
|
||||
object_sizes_tensor = torch.cat(size_list)
|
||||
else:
|
||||
object_sizes_tensor = torch.LongTensor(len(object_list))
|
||||
|
||||
group_backend = get_backend(group)
|
||||
is_nccl_backend = group_backend == Backend.NCCL
|
||||
current_device = torch.device("cpu")
|
||||
if is_nccl_backend:
|
||||
# See note about using torch.cuda.current_device() here in docstring.
|
||||
# We cannot simply use my_rank since rank == device is not necessarily
|
||||
# true.
|
||||
current_device = torch.device("cuda", torch.cuda.current_device())
|
||||
object_sizes_tensor = object_sizes_tensor.to(current_device)
|
||||
object_sizes_tensor = object_sizes_tensor.to(current_device)
|
||||
|
||||
# Broadcast object sizes
|
||||
broadcast(object_sizes_tensor, src=src, group=group)
|
||||
|
||||
# Concatenate and broadcast serialized object tensors
|
||||
if my_rank == src:
|
||||
object_tensor = torch.cat(tensor_list)
|
||||
else:
|
||||
object_tensor = torch.ByteTensor(torch.sum(object_sizes_tensor).item())
|
||||
|
||||
if is_nccl_backend:
|
||||
object_tensor = object_tensor.to(current_device)
|
||||
|
||||
broadcast(object_tensor, src=src, group=group)
|
||||
|
||||
# Deserialize objects using their stored sizes.
|
||||
offset = 0
|
||||
if my_rank != src:
|
||||
for i, obj_size in enumerate(object_sizes_tensor):
|
||||
obj_view = object_tensor[offset : offset + obj_size]
|
||||
obj_view = obj_view.type(torch.ByteTensor) # type: ignore[call-overload]
|
||||
offset += obj_size
|
||||
object_list[i] = _tensor_to_object(obj_view, obj_size)
|
||||
|
||||
|
||||
if not torch.distributed.is_available():
|
||||
# avoid failures on early PyTorch versions for Windows where
|
||||
# not all functions used in `broadcast_object_list` are available.
|
||||
def _broadcast_noop(obj, *_, **__):
|
||||
return obj
|
||||
|
||||
broadcast_object_list = _broadcast_noop
|
||||
elif _TORCH_GREATER_EQUAL_1_8:
|
||||
from torch.distributed.distributed_c10d import broadcast_object_list
|
||||
else:
|
||||
broadcast_object_list = _broadcast_object_list
|
|
@ -34,7 +34,6 @@ import pytorch_lightning as pl
|
|||
from pytorch_lightning.core.optimizer import LightningOptimizer
|
||||
from pytorch_lightning.overrides import LightningDistributedModule
|
||||
from pytorch_lightning.overrides.distributed import prepare_for_backward
|
||||
from pytorch_lightning.overrides.torch_distributed import broadcast_object_list
|
||||
from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
|
||||
from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
|
||||
from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
|
||||
|
@ -43,7 +42,6 @@ from pytorch_lightning.utilities import (
|
|||
_FAIRSCALE_AVAILABLE,
|
||||
_HYDRA_AVAILABLE,
|
||||
_IS_WINDOWS,
|
||||
_TORCH_GREATER_EQUAL_1_7,
|
||||
_TORCH_GREATER_EQUAL_1_8,
|
||||
_TORCH_GREATER_EQUAL_1_9,
|
||||
_TORCH_GREATER_EQUAL_1_10,
|
||||
|
@ -255,15 +253,13 @@ class DDPPlugin(ParallelPlugin):
|
|||
# when not all parameter backward hooks are fired by the autograd engine even if require_grad is set to True.
|
||||
# This flag does come with a performance hit, so it is suggested to disable in cases where it is possible.
|
||||
self._ddp_kwargs["find_unused_parameters"] = self._ddp_kwargs.get("find_unused_parameters", True)
|
||||
# todo: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization
|
||||
if (
|
||||
_TORCH_GREATER_EQUAL_1_7
|
||||
and not self.lightning_module.automatic_optimization
|
||||
and not self._ddp_kwargs.get("find_unused_parameters", False)
|
||||
if not self.lightning_module.automatic_optimization and not self._ddp_kwargs.get(
|
||||
"find_unused_parameters", False
|
||||
):
|
||||
# TODO: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization
|
||||
rank_zero_warn(
|
||||
"From PyTorch 1.7.0, Lightning ``manual_optimization`` needs to set ``find_unused_parameters=True`` "
|
||||
"to properly work with DDP."
|
||||
"From PyTorch 1.7.0, Lightning `manual_optimization` needs to set `find_unused_parameters=True` to"
|
||||
" properly work with DDP. Using `find_unused_parameters=True`."
|
||||
)
|
||||
self._ddp_kwargs["find_unused_parameters"] = True
|
||||
|
||||
|
@ -371,7 +367,7 @@ class DDPPlugin(ParallelPlugin):
|
|||
obj = [obj]
|
||||
if self.global_rank != src:
|
||||
obj = [None]
|
||||
broadcast_object_list(obj, src, group=_group.WORLD)
|
||||
torch.distributed.broadcast_object_list(obj, src, group=_group.WORLD)
|
||||
return obj[0]
|
||||
|
||||
def pre_backward(self, closure_loss: torch.Tensor) -> None:
|
||||
|
|
|
@ -27,12 +27,11 @@ from torch.nn.parallel.distributed import DistributedDataParallel
|
|||
import pytorch_lightning as pl
|
||||
from pytorch_lightning.overrides import LightningDistributedModule
|
||||
from pytorch_lightning.overrides.distributed import prepare_for_backward
|
||||
from pytorch_lightning.overrides.torch_distributed import broadcast_object_list
|
||||
from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
|
||||
from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
|
||||
from pytorch_lightning.plugins.training_type.parallel import ParallelPlugin
|
||||
from pytorch_lightning.trainer.states import TrainerFn
|
||||
from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8, rank_zero_warn
|
||||
from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_8, rank_zero_warn
|
||||
from pytorch_lightning.utilities.apply_func import apply_to_collection, move_data_to_device
|
||||
from pytorch_lightning.utilities.cloud_io import atomic_save
|
||||
from pytorch_lightning.utilities.cloud_io import load as pl_load
|
||||
|
@ -238,15 +237,13 @@ class DDPSpawnPlugin(ParallelPlugin):
|
|||
# when not all parameter backward hooks are fired by the autograd engine even if require_grad is set to True.
|
||||
# This flag does come with a performance hit, so it is suggested to disable in cases where it is possible.
|
||||
self._ddp_kwargs["find_unused_parameters"] = self._ddp_kwargs.get("find_unused_parameters", True)
|
||||
# todo: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization
|
||||
if (
|
||||
_TORCH_GREATER_EQUAL_1_7
|
||||
and not self.lightning_module.automatic_optimization
|
||||
and not self._ddp_kwargs.get("find_unused_parameters", False)
|
||||
if not self.lightning_module.automatic_optimization and not self._ddp_kwargs.get(
|
||||
"find_unused_parameters", False
|
||||
):
|
||||
# TODO: PyTorch 1.7.0 DDP introduces `self.reducer._rebuild_buckets()` breaking manual_optimization
|
||||
rank_zero_warn(
|
||||
"From PyTorch 1.7.0, Lightning ``manual_optimization`` needs to set ``find_unused_parameters=True`` "
|
||||
"to properly work with DDP."
|
||||
"From PyTorch 1.7.0, Lightning `manual_optimization` needs to set `find_unused_parameters=True` to"
|
||||
" properly work with DDP. Using `find_unused_parameters=True`."
|
||||
)
|
||||
self._ddp_kwargs["find_unused_parameters"] = True
|
||||
|
||||
|
@ -323,7 +320,7 @@ class DDPSpawnPlugin(ParallelPlugin):
|
|||
obj = [obj]
|
||||
if self.global_rank != src:
|
||||
obj = [None]
|
||||
broadcast_object_list(obj, src, group=_group.WORLD)
|
||||
torch.distributed.broadcast_object_list(obj, src, group=_group.WORLD)
|
||||
return obj[0]
|
||||
|
||||
def model_to_device(self):
|
||||
|
|
|
@ -74,7 +74,6 @@ from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
|||
from pytorch_lightning.utilities.imports import (
|
||||
_HOROVOD_AVAILABLE,
|
||||
_IPU_AVAILABLE,
|
||||
_TORCH_GREATER_EQUAL_1_7,
|
||||
_TORCH_GREATER_EQUAL_1_8,
|
||||
_TPU_AVAILABLE,
|
||||
)
|
||||
|
@ -190,10 +189,8 @@ class AcceleratorConnector:
|
|||
self.deterministic = deterministic
|
||||
if _TORCH_GREATER_EQUAL_1_8:
|
||||
torch.use_deterministic_algorithms(deterministic)
|
||||
elif _TORCH_GREATER_EQUAL_1_7:
|
||||
else:
|
||||
torch.set_deterministic(deterministic)
|
||||
else: # the minimum version Lightning supports is PyTorch 1.6
|
||||
torch._set_deterministic(deterministic)
|
||||
if deterministic:
|
||||
# fixing non-deterministic part of horovod
|
||||
# https://github.com/PyTorchLightning/pytorch-lightning/pull/1572/files#r420279383
|
||||
|
|
|
@ -44,7 +44,6 @@ from pytorch_lightning.utilities.imports import ( # noqa: F401
|
|||
_OMEGACONF_AVAILABLE,
|
||||
_POPTORCH_AVAILABLE,
|
||||
_RICH_AVAILABLE,
|
||||
_TORCH_GREATER_EQUAL_1_7,
|
||||
_TORCH_GREATER_EQUAL_1_8,
|
||||
_TORCH_GREATER_EQUAL_1_9,
|
||||
_TORCH_GREATER_EQUAL_1_10,
|
||||
|
|
|
@ -305,9 +305,6 @@ class CaptureIterableDataset(IterableDataset):
|
|||
# access wrapped dataset attributes
|
||||
dataset_dict = self.dataset.__dict__
|
||||
|
||||
# create a tuple of sampler names
|
||||
samplers_names = tuple(v.__class__.__name__ for k, v in dataset_dict.items() if isinstance(v, Sampler))
|
||||
|
||||
# create a dictionary of generator present within the dataset attributes
|
||||
dataset_sampler_generators = {k: v for k, v in dataset_dict.items() if isinstance(v, (Generator, Iterator))}
|
||||
|
||||
|
@ -318,31 +315,17 @@ class CaptureIterableDataset(IterableDataset):
|
|||
if isinstance(generator, Sampler):
|
||||
continue
|
||||
|
||||
# used to handle a weird behaviour from PyTorch 1.6
|
||||
# where the sampler is converted to a list_iterator
|
||||
is_legacy = False
|
||||
# wrap the generator into a `FastForwardSampler`
|
||||
sampler = FastForwardSampler(generator, attr_name=generator_attr_name)
|
||||
|
||||
if isinstance(generator, Generator):
|
||||
# Generator name have the the form `SamplerName.__iter__`
|
||||
generator_name = generator.__qualname__.split(".")[0]
|
||||
else:
|
||||
# assume the retrieved iterator is coming from sampler.
|
||||
is_legacy = True
|
||||
# if `CaptureIterableDataset` was available, the sampler should reload its own state.
|
||||
if self._state_dict is not None:
|
||||
sampler.load_state_dict(self._state_dict[generator_attr_name])
|
||||
# store the samplers
|
||||
self.samplers[generator_attr_name] = sampler
|
||||
|
||||
# validate the base generator name matches a sampler name.
|
||||
if is_legacy or any(sampler_name == generator_name for sampler_name in samplers_names):
|
||||
|
||||
# wrap the generator into a `FastForwardSampler`
|
||||
sampler = FastForwardSampler(generator, attr_name=generator_attr_name)
|
||||
|
||||
# if `CaptureIterableDataset` was available, the sampler should reload its own state.
|
||||
if self._state_dict is not None:
|
||||
sampler.load_state_dict(self._state_dict[generator_attr_name])
|
||||
# store the samplers
|
||||
self.samplers[generator_attr_name] = sampler
|
||||
|
||||
# replace generator with the generator from the `FastForwardSampler`.
|
||||
dataset_dict[generator_attr_name] = iter(sampler)
|
||||
# replace generator with the generator from the `FastForwardSampler`.
|
||||
dataset_dict[generator_attr_name] = iter(sampler)
|
||||
|
||||
self.reset_on_epoch()
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@ from typing import Any, Callable, Dict, IO, Optional, Union
|
|||
import fsspec
|
||||
import torch
|
||||
from fsspec.implementations.local import AbstractFileSystem, LocalFileSystem
|
||||
from packaging.version import Version
|
||||
|
||||
|
||||
def load(
|
||||
|
@ -59,12 +58,6 @@ def atomic_save(checkpoint: Dict[str, Any], filepath: Union[str, Path]) -> None:
|
|||
"""
|
||||
|
||||
bytesbuffer = io.BytesIO()
|
||||
# Can't use the new zipfile serialization for 1.6.0 because there's a bug in
|
||||
# torch.hub.load_state_dict_from_url() that prevents it from loading the new files.
|
||||
# More details can be found here: https://github.com/pytorch/pytorch/issues/42239
|
||||
if Version(torch.__version__).release[:3] == (1, 6, 0):
|
||||
torch.save(checkpoint, bytesbuffer, _use_new_zipfile_serialization=False)
|
||||
else:
|
||||
torch.save(checkpoint, bytesbuffer)
|
||||
torch.save(checkpoint, bytesbuffer)
|
||||
with fsspec.open(filepath, "wb") as f:
|
||||
f.write(bytesbuffer.getvalue())
|
||||
|
|
|
@ -70,7 +70,6 @@ def _compare_version(package: str, op: Callable, version: str, use_base_version:
|
|||
|
||||
_IS_WINDOWS = platform.system() == "Windows"
|
||||
_IS_INTERACTIVE = hasattr(sys, "ps1") # https://stackoverflow.com/a/64523765
|
||||
_TORCH_GREATER_EQUAL_1_7 = _compare_version("torch", operator.ge, "1.7.0")
|
||||
_TORCH_GREATER_EQUAL_1_8 = _compare_version("torch", operator.ge, "1.8.0")
|
||||
_TORCH_GREATER_EQUAL_1_8_1 = _compare_version("torch", operator.ge, "1.8.1")
|
||||
_TORCH_GREATER_EQUAL_1_9 = _compare_version("torch", operator.ge, "1.9.0")
|
||||
|
@ -112,4 +111,4 @@ else:
|
|||
|
||||
# experimental feature within PyTorch Lightning.
|
||||
def _fault_tolerant_training() -> bool:
|
||||
return _TORCH_GREATER_EQUAL_1_7 and int(os.getenv("PL_FAULT_TOLERANT_TRAINING", 0))
|
||||
return bool(int(os.getenv("PL_FAULT_TOLERANT_TRAINING", 0)))
|
||||
|
|
|
@ -21,7 +21,7 @@ from typing import Optional
|
|||
import numpy as np
|
||||
import torch
|
||||
|
||||
from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7, rank_zero_warn
|
||||
from pytorch_lightning.utilities import rank_zero_warn
|
||||
from pytorch_lightning.utilities.distributed import rank_zero_only
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
@ -113,9 +113,7 @@ def pl_worker_init_function(worker_id: int, rank: Optional[int] = None) -> None:
|
|||
np.random.seed(ss.generate_state(4))
|
||||
# Spawn distinct SeedSequences for the PyTorch PRNG and the stdlib random module
|
||||
torch_ss, stdlib_ss = ss.spawn(2)
|
||||
# PyTorch 1.7 and above takes a 64-bit seed
|
||||
dtype = np.uint64 if _TORCH_GREATER_EQUAL_1_7 else np.uint32
|
||||
torch.manual_seed(torch_ss.generate_state(1, dtype=dtype)[0])
|
||||
torch.manual_seed(torch_ss.generate_state(1, dtype=np.uint64)[0])
|
||||
# use 128 bits expressed as an integer
|
||||
stdlib_seed = (stdlib_ss.generate_state(2, dtype=np.uint64).astype(object) * [1 << 64, 1]).sum()
|
||||
random.seed(stdlib_seed)
|
||||
|
|
|
@ -31,7 +31,7 @@ from tests.helpers.simple_models import RegressionModel
|
|||
if _TORCH_GREATER_EQUAL_1_8:
|
||||
from torch.quantization import FakeQuantizeBase
|
||||
else:
|
||||
# For torch 1.6 and 1.7.
|
||||
# For torch 1.7.
|
||||
from torch.quantization import FakeQuantize as FakeQuantizeBase
|
||||
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import pytest
|
|||
import torch.distributed
|
||||
|
||||
from pytorch_lightning.plugins.environments.lightning_environment import find_free_network_port
|
||||
from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_7, _TORCH_GREATER_EQUAL_1_8
|
||||
from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_8
|
||||
from tests import _PATH_DATASETS
|
||||
|
||||
|
||||
|
@ -95,10 +95,8 @@ def reset_deterministic_algorithm():
|
|||
yield
|
||||
if _TORCH_GREATER_EQUAL_1_8:
|
||||
torch.use_deterministic_algorithms(False)
|
||||
elif _TORCH_GREATER_EQUAL_1_7:
|
||||
else:
|
||||
torch.set_deterministic(False)
|
||||
else: # the minimum version Lightning supports is PyTorch 1.6
|
||||
torch._set_deterministic(False)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
|
@ -33,7 +33,7 @@ from pytorch_lightning.trainer.connectors.logger_connector.result import (
|
|||
ResultCollection,
|
||||
ResultMetric,
|
||||
)
|
||||
from pytorch_lightning.utilities.imports import _fault_tolerant_training, _TORCH_GREATER_EQUAL_1_7
|
||||
from pytorch_lightning.utilities.imports import _fault_tolerant_training
|
||||
from tests.helpers import BoringModel
|
||||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
@ -470,21 +470,18 @@ def result_collection_reload(**kwargs):
|
|||
|
||||
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7")
|
||||
def test_result_collection_reload(tmpdir):
|
||||
result_collection_reload(default_root_dir=tmpdir)
|
||||
|
||||
|
||||
@RunIf(min_gpus=1)
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7")
|
||||
def test_result_collection_reload_1_gpu_ddp(tmpdir):
|
||||
result_collection_reload(default_root_dir=tmpdir, strategy="ddp", gpus=1)
|
||||
|
||||
|
||||
@RunIf(min_gpus=2, special=True)
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7")
|
||||
def test_result_collection_reload_2_gpus(tmpdir):
|
||||
result_collection_reload(default_root_dir=tmpdir, strategy="ddp", gpus=2)
|
||||
|
||||
|
|
|
@ -46,7 +46,6 @@ class MNISTDataModule(LightningDataModule):
|
|||
self.dataset_cls(self.data_dir, train=False, download=True)
|
||||
|
||||
def setup(self, stage: Optional[str] = None):
|
||||
# TODO: need to split using random_split once updated to torch >= 1.6
|
||||
if stage == "fit" or stage is None:
|
||||
self.mnist_train = self.dataset_cls(self.data_dir, train=True)
|
||||
if stage == "test" or stage is None:
|
||||
|
|
|
@ -24,7 +24,6 @@ from pytorch_lightning.core.optimizer import LightningOptimizer
|
|||
from pytorch_lightning.loops.optimization.optimizer_loop import ClosureResult
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from tests.helpers import BoringModel
|
||||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
||||
def test_closure_result_deepcopy():
|
||||
|
@ -140,7 +139,6 @@ class CustomException(Exception):
|
|||
pass
|
||||
|
||||
|
||||
@RunIf(min_torch="1.7.0")
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@pytest.mark.parametrize("stop_epoch", (0, 1))
|
||||
@pytest.mark.parametrize("stop_batch", (0, 1, 2))
|
||||
|
|
|
@ -253,7 +253,6 @@ def test_loop_hierarchy():
|
|||
assert state_dict == {"state_dict": {"a": 1}, "progress": {"increment": 1}}
|
||||
|
||||
|
||||
@RunIf(min_torch="1.7.0")
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@pytest.mark.parametrize("stop_epoch", (1, 2))
|
||||
@pytest.mark.parametrize("stop_batch", (1, 2))
|
||||
|
@ -323,7 +322,6 @@ def test_loop_restart_progress_multiple_dataloaders(tmpdir, n_dataloaders, stop_
|
|||
assert trainer.fit_loop.epoch_loop.val_loop.epoch_loop.batch_progress.state_dict() == expected
|
||||
|
||||
|
||||
@RunIf(min_torch="1.7.0")
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@pytest.mark.parametrize("accumulate_grad_batches", (1, 2, 3))
|
||||
@pytest.mark.parametrize("n_optimizers", (1, 3, 5))
|
||||
|
@ -526,7 +524,6 @@ def test_loop_state_on_exception(accumulate_grad_batches, stop_epoch, stop_batch
|
|||
assert state_dict["epoch_progress"]["current"]["started"] == stop_epoch
|
||||
|
||||
|
||||
@RunIf(min_torch="1.7.0")
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@pytest.mark.parametrize("n_optimizers", (1, 3, 5))
|
||||
def test_loop_state_on_complete_run(n_optimizers, tmpdir):
|
||||
|
@ -662,7 +659,6 @@ def test_loop_state_on_complete_run(n_optimizers, tmpdir):
|
|||
assert checkpoint["loops"]["fit_loop"] == expected
|
||||
|
||||
|
||||
@RunIf(min_torch="1.7.0")
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
def test_fit_loop_reset(tmpdir):
|
||||
"""Test that the reset logic in fit- and epoch loop is aware of whether the loop is restarting from a completed
|
||||
|
@ -752,7 +748,6 @@ def test_fit_loop_reset(tmpdir):
|
|||
|
||||
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@RunIf(min_torch="1.7.0")
|
||||
@pytest.mark.parametrize(
|
||||
["train_datasets", "val_datasets"],
|
||||
[([RandomDataset], [RandomDataset]), ([RandomDataset], [RandomDataset, RandomDataset])],
|
||||
|
|
|
@ -20,7 +20,6 @@ from torch.utils.data import DataLoader, Dataset
|
|||
|
||||
from pytorch_lightning import Trainer
|
||||
from pytorch_lightning.plugins import DoublePrecisionPlugin
|
||||
from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7
|
||||
from tests.helpers.boring_model import BoringModel, RandomDataset
|
||||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
@ -137,10 +136,7 @@ class DoublePrecisionBoringModelComplexBuffer(BoringModel):
|
|||
[
|
||||
DoublePrecisionBoringModel,
|
||||
DoublePrecisionBoringModelNoForward,
|
||||
pytest.param(
|
||||
DoublePrecisionBoringModelComplexBuffer,
|
||||
marks=pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="torch.complex not available"),
|
||||
),
|
||||
DoublePrecisionBoringModelComplexBuffer,
|
||||
],
|
||||
)
|
||||
def test_double_precision(tmpdir, boring_model):
|
||||
|
|
|
@ -26,7 +26,6 @@ from pytorch_lightning.loggers.base import LoggerCollection
|
|||
from pytorch_lightning.loggers.tensorboard import TensorBoardLogger
|
||||
from pytorch_lightning.profiler import AdvancedProfiler, PassThroughProfiler, PyTorchProfiler, SimpleProfiler
|
||||
from pytorch_lightning.profiler.pytorch import RegisterRecordFunction
|
||||
from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_7
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from pytorch_lightning.utilities.imports import _KINETO_AVAILABLE
|
||||
from tests.helpers import BoringModel, ManualOptimBoringModel
|
||||
|
@ -394,8 +393,7 @@ def test_pytorch_profiler_nested(tmpdir):
|
|||
|
||||
names = {"a", "b", "c"}
|
||||
ops = {"add", "empty", "fill_", "ones", "zero_", "zeros"}
|
||||
if _TORCH_GREATER_EQUAL_1_7:
|
||||
ops = {"aten::" + op for op in ops}
|
||||
ops = {"aten::" + op for op in ops}
|
||||
|
||||
expected = names.union(ops)
|
||||
assert events_name == expected, (events_name, torch.__version__, platform.system())
|
||||
|
|
|
@ -21,7 +21,6 @@ from pytorch_lightning import Trainer
|
|||
from pytorch_lightning.callbacks import ModelCheckpoint
|
||||
from pytorch_lightning.trainer.states import TrainerFn
|
||||
from tests.helpers import BoringModel
|
||||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
||||
class HPCHookdedModel(BoringModel):
|
||||
|
@ -133,7 +132,6 @@ def test_hpc_max_ckpt_version(tmpdir):
|
|||
|
||||
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@RunIf(min_torch="1.7.0")
|
||||
def test_loops_restore(tmpdir):
|
||||
"""Test that required loop state_dict is loaded correctly by checkpoint connector."""
|
||||
model = BoringModel()
|
||||
|
|
|
@ -26,7 +26,7 @@ from tests.helpers.runif import RunIf
|
|||
|
||||
@pytest.mark.parametrize("register_handler", [False, True])
|
||||
@pytest.mark.parametrize("terminate_gracefully", [False, True])
|
||||
@RunIf(min_torch="1.7.0", skip_windows=True)
|
||||
@RunIf(skip_windows=True)
|
||||
def test_fault_tolerant_sig_handler(register_handler, terminate_gracefully, tmpdir):
|
||||
|
||||
# hack to reset the signal
|
||||
|
|
|
@ -26,7 +26,7 @@ from tests.helpers import BoringModel, RandomDataset
|
|||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
||||
@RunIf(skip_windows=True, min_torch="1.7.0")
|
||||
@RunIf(skip_windows=True)
|
||||
@pytest.mark.parametrize("mode", (1, 2, 3))
|
||||
def test_replace_distributed_sampler(tmpdir, mode):
|
||||
class IndexedRandomDataset(RandomDataset):
|
||||
|
|
|
@ -35,7 +35,6 @@ from pytorch_lightning.utilities.apply_func import apply_to_collection
|
|||
from pytorch_lightning.utilities.auto_restart import CaptureMapDataset, FastForwardSampler
|
||||
from pytorch_lightning.utilities.data import get_len
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from pytorch_lightning.utilities.imports import _TORCH_GREATER_EQUAL_1_7
|
||||
from tests.helpers.boring_model import RandomDataset
|
||||
|
||||
|
||||
|
@ -312,7 +311,6 @@ def test_nested_calc_num_data(input_data, compute_func, expected_length):
|
|||
assert calculated_length == expected_length
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _TORCH_GREATER_EQUAL_1_7, reason="Requires at least PyTorch 1.7")
|
||||
@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1", "PL_TRAINER_GPUS": "2"})
|
||||
@mock.patch("torch.cuda.device_count", return_value=2)
|
||||
@mock.patch("torch.cuda.is_available", return_value=True)
|
||||
|
|
|
@ -690,7 +690,6 @@ def test_dataloader_to_state_dict_and_reload():
|
|||
}
|
||||
|
||||
|
||||
@RunIf(min_torch="1.7.0")
|
||||
@pytest.mark.parametrize("use_fault_tolerant", ["0", "1"])
|
||||
def test_data_loading_wraps_dataset_and_samplers(use_fault_tolerant, tmpdir):
|
||||
"""This test ensures the dataset and sampler are properly wrapped when fault tolerant is enabled."""
|
||||
|
@ -785,7 +784,6 @@ class RandomGetItemDataset(Dataset):
|
|||
|
||||
# TODO: test with `RandomGeneratorGetItemDataset`
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@RunIf(min_torch="1.7.0")
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_class",
|
||||
[
|
||||
|
@ -921,7 +919,6 @@ def _run_training(trainer_kwargs, dataset_classes, fail_on_step: int = -1, ckpt_
|
|||
|
||||
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@RunIf(min_torch="1.7.0")
|
||||
@pytest.mark.parametrize(
|
||||
"dataset_classes",
|
||||
[
|
||||
|
@ -975,7 +972,6 @@ def test_dataset_rng_states_restart_with_lightning(tmpdir, dataset_classes, mult
|
|||
|
||||
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@RunIf(min_torch="1.7.0")
|
||||
@pytest.mark.parametrize(
|
||||
["train_datasets", "val_datasets"],
|
||||
[
|
||||
|
@ -1139,7 +1135,7 @@ def _fit_model(
|
|||
@pytest.mark.parametrize("failure_on_training", [False, True])
|
||||
@pytest.mark.parametrize("failure_on_step", [False, True])
|
||||
@mock.patch.dict(os.environ, {"PL_FAULT_TOLERANT_TRAINING": "1"})
|
||||
@RunIf(min_torch="1.7.0", skip_windows=True)
|
||||
@RunIf(skip_windows=True)
|
||||
def test_auto_restart_under_signal(on_last_batch, val_check_interval, failure_on_training, failure_on_step, tmpdir):
|
||||
"""This test asserts that if a signal is being sent during the training / validation phase, the model should
|
||||
restart in a reproducible way."""
|
||||
|
|
Loading…
Reference in New Issue