From 0cf9d73d28d9dd0fd419e2a5ee47b7488e5c1883 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Wed, 15 Jun 2022 02:46:44 +0200 Subject: [PATCH] Drop PyTorch 1.8 support (#13155) * Drop PyTorch 1.8 support * Missed update * Skip profiler test until supported * Upgrade ipu dockerfile pytorch version * Update XLA version --- .azure-pipelines/gpu-tests.yml | 2 - .github/workflows/ci_dockers.yml | 5 +- .github/workflows/ci_test-conda.yml | 2 +- .github/workflows/ci_test-full.yml | 3 - .github/workflows/ci_test-slow.yml | 2 +- .github/workflows/events-nightly.yml | 3 - .github/workflows/release-docker.yml | 2 +- README.md | 28 ++++----- dockers/base-conda/Dockerfile | 2 +- dockers/base-cuda/Dockerfile | 2 +- dockers/base-xla/Dockerfile | 2 +- dockers/ci-runner-ipu/Dockerfile | 2 +- dockers/release/Dockerfile | 2 +- dockers/tpu-tests/Dockerfile | 2 +- environment.yml | 6 +- pytorch_lightning/profiler/pytorch.py | 5 +- pytorch_lightning/strategies/ddp.py | 5 +- pytorch_lightning/trainer/trainer.py | 5 +- pytorch_lightning/utilities/__init__.py | 1 - pytorch_lightning/utilities/distributed.py | 23 ++------ pytorch_lightning/utilities/imports.py | 4 +- requirements/adjust-versions.py | 3 - requirements/base.txt | 2 +- requirements/examples.txt | 2 +- requirements/extra.txt | 3 +- tests/helpers/runif.py | 11 ---- tests/loggers/test_all.py | 2 +- tests/profiler/test_profiler.py | 2 +- tests/standalone_tests.sh | 11 ++-- tests/strategies/test_ddp_spawn_strategy.py | 2 +- .../test_ddp_strategy_with_comm_hook.py | 8 +-- tests/trainer/test_dataloaders.py | 25 -------- tests/utilities/test_all_gather_grad.py | 2 +- tests/utilities/test_auto_restart.py | 58 +------------------ tests/utilities/test_model_summary.py | 11 +--- 35 files changed, 57 insertions(+), 193 deletions(-) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index 5ec9db1297..4e4ffcaaca 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -20,8 +20,6 @@ jobs: - job: pytest strategy: matrix: - 'PyTorch - LTS': - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.8" 'PyTorch - stable': image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11" # how long to run the job before automatically cancelling diff --git a/.github/workflows/ci_dockers.yml b/.github/workflows/ci_dockers.yml index ccd3fd27aa..f806463a71 100644 --- a/.github/workflows/ci_dockers.yml +++ b/.github/workflows/ci_dockers.yml @@ -52,7 +52,7 @@ jobs: matrix: # the config used in '.circleci/config.yml`' python_version: ["3.7"] - xla_version: ["1.8"] + xla_version: ["1.11"] steps: - name: Checkout uses: actions/checkout@v2 @@ -75,11 +75,9 @@ jobs: matrix: include: # the config used in '.azure-pipelines/gpu-tests.yml' - - {python_version: "3.7", pytorch_version: "1.8", cuda_version: "10.2", ubuntu_version: "18.04"} - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} # latest (used in Tutorials) - - {python_version: "3.8", pytorch_version: "1.8", cuda_version: "11.1", ubuntu_version: "20.04"} - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1", ubuntu_version: "20.04"} - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} @@ -106,7 +104,6 @@ jobs: matrix: include: # see: https://pytorch.org/get-started/previous-versions/ - - {python_version: "3.8", pytorch_version: "1.8", cuda_version: "11.1"} - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} diff --git a/.github/workflows/ci_test-conda.yml b/.github/workflows/ci_test-conda.yml index 0466a83760..69d7845931 100644 --- a/.github/workflows/ci_test-conda.yml +++ b/.github/workflows/ci_test-conda.yml @@ -23,7 +23,7 @@ jobs: fail-fast: false matrix: python-version: ["3.8"] # previous to last Python version as that one is already used in test-full - pytorch-version: ["1.8", "1.9", "1.10"] + pytorch-version: ["1.9", "1.10"] # nightly: add when there's a release candidate include: - {python-version: "3.9", pytorch-version: "1.11"} diff --git a/.github/workflows/ci_test-full.yml b/.github/workflows/ci_test-full.yml index bca5699d43..eaef6de6ed 100644 --- a/.github/workflows/ci_test-full.yml +++ b/.github/workflows/ci_test-full.yml @@ -24,9 +24,6 @@ jobs: python-version: ["3.7", "3.9"] # minimum, maximum requires: ["oldest", "latest"] release: ["stable"] - exclude: - # Skip if torch<1.8 and py3.9 on Linux: https://github.com/pytorch/pytorch/issues/50014 - - {os: ubuntu-20.04, python-version: "3.9", requires: "oldest"} # TODO: re-enable RC testing # include: # - {os: ubuntu-20.04, python-version: "3.10", requires: "latest", release: "pre"} diff --git a/.github/workflows/ci_test-slow.yml b/.github/workflows/ci_test-slow.yml index e2b5ca20b2..68cd58acff 100644 --- a/.github/workflows/ci_test-slow.yml +++ b/.github/workflows/ci_test-slow.yml @@ -20,7 +20,7 @@ jobs: os: [ubuntu-20.04, windows-2019, macOS-10.15] # same config as '.azure-pipelines/gpu-tests.yml' python-version: ["3.7"] - pytorch-version: ["1.8"] + pytorch-version: ["1.11"] timeout-minutes: 20 steps: diff --git a/.github/workflows/events-nightly.yml b/.github/workflows/events-nightly.yml index b4f137e898..b4a9e370b6 100644 --- a/.github/workflows/events-nightly.yml +++ b/.github/workflows/events-nightly.yml @@ -115,11 +115,9 @@ jobs: matrix: include: # the config used in '.azure-pipelines/gpu-tests.yml' - - {python_version: "3.7", pytorch_version: "1.8", cuda_version: "10.2", ubuntu_version: "18.04"} - {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} - {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} # latest (used in Tutorials) - - {python_version: "3.8", pytorch_version: "1.8", cuda_version: "11.1", ubuntu_version: "20.04"} - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1", ubuntu_version: "20.04"} - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"} @@ -168,7 +166,6 @@ jobs: matrix: include: # see: https://pytorch.org/get-started/previous-versions/ - - {python_version: "3.8", pytorch_version: "1.8", cuda_version: "11.1"} - {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"} - {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 936373e838..9d87f1a582 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: python_version: ["3.7", "3.8", "3.9"] - pytorch_version: ["1.8", "1.9", "1.10"] + pytorch_version: ["1.9", "1.10"] steps: - name: Checkout uses: actions/checkout@v2 diff --git a/README.md b/README.md index ca79aa53c4..7c75fbb242 100644 --- a/README.md +++ b/README.md @@ -78,17 +78,17 @@ Lightning is rigorously tested across multiple CPUs, GPUs, TPUs, IPUs, and HPUs
-| System / PyTorch ver. | 1.8 (LTS, min. req.) | 1.9 | 1.10 | 1.11 (latest) | -| :------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| Linux py3.7 \[GPUs\*\*\] | [![Build Status]()](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - | - | -| Linux py3.7 \[TPUs\*\*\*\] | - | [![CircleCI](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master.svg?style=svg)](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master) | - | - | -| Linux py3.8 \[IPUs\] | - | [![Build Status]()](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - | -| Linux py3.8 \[HPUs\] | - | - | [![Build Status]()](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | -| Linux py3.8 (with Conda) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | - | -| Linux py3.9 (with Conda) | - | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | -| Linux py3.{7,9} | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | -| OSX py3.{7,9} | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | -| Windows py3.{7,9} | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | +| System / PyTorch ver. | 1.9 | 1.10 | 1.11 (latest) | +| :------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| Linux py3.7 \[GPUs\*\*\] | - | - | - | +| Linux py3.7 \[TPUs\*\*\*\] | [![CircleCI](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master.svg?style=svg)](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master) | - | - | +| Linux py3.8 \[IPUs\] | [![Build Status]()](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - | +| Linux py3.8 \[HPUs\] | - | [![Build Status]()](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | +| Linux py3.8 (with Conda) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | - | +| Linux py3.9 (with Conda) | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | +| Linux py3.{7,9} | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | +| OSX py3.{7,9} | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | +| Windows py3.{7,9} | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - _\*\* tests run on two NVIDIA P100_ - _\*\*\* tests run on Google GKE TPUv2/3. TPU py3.7 means we support Colab and Kaggle env._ @@ -126,9 +126,9 @@ pip install pytorch-lightning['extra'] conda install pytorch-lightning -c conda-forge ``` -#### Install stable 1.5.x +#### Install stable 1.6.x -the actual status of 1.5 \[stable\] is following: +the actual status of 1.6 \[stable\] is following: ![CI basic testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20basic%20testing/badge.svg?branch=release%2F1.5.x&event=push) ![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=release%2F1.5.x&event=push) @@ -142,7 +142,7 @@ Install future release from the source pip install git+https://github.com/PytorchLightning/pytorch-lightning.git@release/1.5.x --upgrade ``` -#### Install bleeding-edge - future 1.6 +#### Install bleeding-edge - future 1.7 Install nightly from the source (no guarantees) diff --git a/dockers/base-conda/Dockerfile b/dockers/base-conda/Dockerfile index 72603b04ff..4b91748775 100644 --- a/dockers/base-conda/Dockerfile +++ b/dockers/base-conda/Dockerfile @@ -17,7 +17,7 @@ ARG CUDA_VERSION=11.3.1 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 ARG PYTHON_VERSION=3.9 -ARG PYTORCH_VERSION=1.8 +ARG PYTORCH_VERSION=1.9 ARG CONDA_VERSION=4.11.0 SHELL ["/bin/bash", "-c"] diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index ded351f163..729d73a75a 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -18,7 +18,7 @@ ARG CUDA_VERSION=11.3.1 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} ARG PYTHON_VERSION=3.9 -ARG PYTORCH_VERSION=1.8 +ARG PYTORCH_VERSION=1.9 SHELL ["/bin/bash", "-c"] # https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/ diff --git a/dockers/base-xla/Dockerfile b/dockers/base-xla/Dockerfile index d933aac67d..f2f769329a 100644 --- a/dockers/base-xla/Dockerfile +++ b/dockers/base-xla/Dockerfile @@ -19,7 +19,7 @@ LABEL maintainer="PyTorchLightning " # CALL: docker image build -t pytorch-lightning:XLA-image -f dockers/base-xla/Dockerfile . --build-arg PYTHON_VERSION=3.8 ARG PYTHON_VERSION=3.9 ARG CONDA_VERSION=4.9.2 -ARG XLA_VERSION=1.8 +ARG XLA_VERSION=1.11 SHELL ["/bin/bash", "-c"] # for skipping configurations diff --git a/dockers/ci-runner-ipu/Dockerfile b/dockers/ci-runner-ipu/Dockerfile index 0628142e91..1d1a41ab20 100644 --- a/dockers/ci-runner-ipu/Dockerfile +++ b/dockers/ci-runner-ipu/Dockerfile @@ -13,7 +13,7 @@ # limitations under the License. ARG PYTHON_VERSION=3.9 -ARG PYTORCH_VERSION=1.7 +ARG PYTORCH_VERSION=1.9 FROM pytorchlightning/pytorch_lightning:base-ipu-py${PYTHON_VERSION}-torch${PYTORCH_VERSION} diff --git a/dockers/release/Dockerfile b/dockers/release/Dockerfile index a0ba3a4a41..39da851e92 100644 --- a/dockers/release/Dockerfile +++ b/dockers/release/Dockerfile @@ -13,7 +13,7 @@ # limitations under the License. ARG PYTHON_VERSION=3.9 -ARG PYTORCH_VERSION=1.8 +ARG PYTORCH_VERSION=1.9 FROM pytorchlightning/pytorch_lightning:base-cuda-py${PYTHON_VERSION}-torch${PYTORCH_VERSION} diff --git a/dockers/tpu-tests/Dockerfile b/dockers/tpu-tests/Dockerfile index d4c58c665e..7b13eef747 100644 --- a/dockers/tpu-tests/Dockerfile +++ b/dockers/tpu-tests/Dockerfile @@ -13,7 +13,7 @@ # limitations under the License. ARG PYTHON_VERSION=3.9 -ARG PYTORCH_VERSION=1.8 +ARG PYTORCH_VERSION=1.9 FROM pytorchlightning/pytorch_lightning:base-xla-py${PYTHON_VERSION}-torch${PYTORCH_VERSION} diff --git a/environment.yml b/environment.yml index 28feb0b083..d6f885f00b 100644 --- a/environment.yml +++ b/environment.yml @@ -29,7 +29,7 @@ dependencies: - python>=3.7 - pip>20.1 - numpy>=1.17.2 - - pytorch>=1.8.* + - pytorch>=1.9.* - future>=0.17.1 - PyYAML>=5.1 - tqdm>=4.41.0 @@ -41,10 +41,10 @@ dependencies: - scikit-learn>=0.20.0 - matplotlib>=3.1.1 - omegaconf>=2.0.5 - - torchtext>=0.9.* + - torchtext>=0.10.* # Examples - - torchvision>=0.9.* + - torchvision>=0.10.* - pip: - test-tube>=0.7.5 diff --git a/pytorch_lightning/profiler/pytorch.py b/pytorch_lightning/profiler/pytorch.py index 65651aef1d..062031bafa 100644 --- a/pytorch_lightning/profiler/pytorch.py +++ b/pytorch_lightning/profiler/pytorch.py @@ -25,7 +25,7 @@ from torch.autograd.profiler import record_function from pytorch_lightning.profiler.profiler import Profiler from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.imports import _KINETO_AVAILABLE, _TORCH_GREATER_EQUAL_1_9 +from pytorch_lightning.utilities.imports import _KINETO_AVAILABLE from pytorch_lightning.utilities.rank_zero import rank_zero_warn from pytorch_lightning.utilities.warnings import WarningCache @@ -445,8 +445,7 @@ class PyTorchProfiler(Profiler): if self._schedule is not None: self.profiler.step_num = self._schedule.num_step self.profiler.step() - if _TORCH_GREATER_EQUAL_1_9: - self.profiler.add_metadata("Framework", "pytorch-lightning") + self.profiler.add_metadata("Framework", "pytorch-lightning") def summary(self) -> str: if not self._profiler_kwargs.get("enabled", True) or self._emit_nvtx: diff --git a/pytorch_lightning/strategies/ddp.py b/pytorch_lightning/strategies/ddp.py index 100f0c38e1..94cf2637e0 100644 --- a/pytorch_lightning/strategies/ddp.py +++ b/pytorch_lightning/strategies/ddp.py @@ -53,7 +53,6 @@ from pytorch_lightning.utilities.exceptions import DeadlockDetectedException from pytorch_lightning.utilities.imports import ( _FAIRSCALE_AVAILABLE, _IS_WINDOWS, - _TORCH_GREATER_EQUAL_1_9, _TORCH_GREATER_EQUAL_1_10, _TORCH_GREATER_EQUAL_1_11, ) @@ -228,9 +227,7 @@ class DDPStrategy(ParallelStrategy): def _register_ddp_hooks(self) -> None: log.detail(f"{self.__class__.__name__}: registering ddp hooks") - # In 1.8, DDP communication hooks only work with NCCL backend and SPSD (single process single device) mode - # Since 1.9, DDP communication hooks can work on all backends. - if _TORCH_GREATER_EQUAL_1_9 or (self.root_device.type == "cuda" and self._is_single_process_single_device): + if self.root_device.type == "cuda" and self._is_single_process_single_device: register_ddp_comm_hook( model=self.model, ddp_comm_state=self._ddp_comm_state, diff --git a/pytorch_lightning/trainer/trainer.py b/pytorch_lightning/trainer/trainer.py index 4f7b1feac2..85de7acbe3 100644 --- a/pytorch_lightning/trainer/trainer.py +++ b/pytorch_lightning/trainer/trainer.py @@ -101,7 +101,7 @@ from pytorch_lightning.utilities.cloud_io import get_filesystem from pytorch_lightning.utilities.data import _auto_add_worker_init_fn, has_len_all_ranks from pytorch_lightning.utilities.distributed import distributed_available from pytorch_lightning.utilities.exceptions import ExitGracefullyException, MisconfigurationException -from pytorch_lightning.utilities.imports import _fault_tolerant_training, _TORCH_GREATER_EQUAL_1_9 +from pytorch_lightning.utilities.imports import _fault_tolerant_training from pytorch_lightning.utilities.meta import is_on_meta_device, materialize_module from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_info, rank_zero_warn @@ -2768,8 +2768,7 @@ def _evaluation_context(accelerator: Accelerator) -> Generator: # and HPU & TPU accelerators. context_manager_class = ( torch.inference_mode - if _TORCH_GREATER_EQUAL_1_9 - and not (dist.is_initialized() and dist.get_backend() == "gloo") + if not (dist.is_initialized() and dist.get_backend() == "gloo") and not isinstance(accelerator, HPUAccelerator) and not isinstance(accelerator, TPUAccelerator) else torch.no_grad diff --git a/pytorch_lightning/utilities/__init__.py b/pytorch_lightning/utilities/__init__.py index c1d64b8ae7..139caed97e 100644 --- a/pytorch_lightning/utilities/__init__.py +++ b/pytorch_lightning/utilities/__init__.py @@ -46,7 +46,6 @@ from pytorch_lightning.utilities.imports import ( # noqa: F401 _OMEGACONF_AVAILABLE, _POPTORCH_AVAILABLE, _RICH_AVAILABLE, - _TORCH_GREATER_EQUAL_1_9, _TORCH_GREATER_EQUAL_1_10, _TORCH_GREATER_EQUAL_1_11, _TORCH_QUANTIZE_AVAILABLE, diff --git a/pytorch_lightning/utilities/distributed.py b/pytorch_lightning/utilities/distributed.py index c60e4cef69..20740f952d 100644 --- a/pytorch_lightning/utilities/distributed.py +++ b/pytorch_lightning/utilities/distributed.py @@ -21,12 +21,11 @@ from torch import Tensor from torch.nn.parallel.distributed import DistributedDataParallel import pytorch_lightning as pl -from pytorch_lightning.utilities.imports import _HPU_AVAILABLE, _TORCH_GREATER_EQUAL_1_9, _TPU_AVAILABLE +from pytorch_lightning.utilities.imports import _HPU_AVAILABLE, _TPU_AVAILABLE from pytorch_lightning.utilities.rank_zero import rank_zero_debug as new_rank_zero_debug from pytorch_lightning.utilities.rank_zero import rank_zero_only # noqa: F401 from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation from pytorch_lightning.utilities.rank_zero import rank_zero_info as new_rank_zero_info -from pytorch_lightning.utilities.rank_zero import rank_zero_warn as new_rank_zero_warn if _TPU_AVAILABLE: import torch_xla.core.xla_model as xm @@ -221,13 +220,6 @@ def register_ddp_comm_hook( as FP16 compression as wrapper, which could be combined with ddp_comm_hook - .. warning :: - DDP communication hook needs pytorch version at least 1.8.0 - - .. warning :: - DDP communication wrapper needs pytorch version at least 1.9.0 - Post-localSGD hook needs pytorch version at least 1.9.0 - Examples: >>> from torch.distributed.algorithms.ddp_comm_hooks import ( # doctest: +SKIP @@ -287,15 +279,10 @@ def register_ddp_comm_hook( ddp_comm_hook: Callable = ddp_comm_hook if ddp_comm_wrapper is not None: - if not _TORCH_GREATER_EQUAL_1_9: - new_rank_zero_warn( - "Not applying DDP comm wrapper. To use communication wrapper, please use pytorch>=1.9.0." - ) - else: - new_rank_zero_info( - f"DDP comm wrapper is provided, apply {ddp_comm_wrapper.__qualname__}({ddp_comm_hook.__qualname__})." - ) - ddp_comm_hook = ddp_comm_wrapper(ddp_comm_hook) + new_rank_zero_info( + f"DDP comm wrapper is provided, apply {ddp_comm_wrapper.__qualname__}({ddp_comm_hook.__qualname__})." + ) + ddp_comm_hook = ddp_comm_wrapper(ddp_comm_hook) new_rank_zero_debug(f"Registering DDP comm hook: {ddp_comm_hook.__qualname__}.") model.register_comm_hook(state=ddp_comm_state, hook=ddp_comm_hook) diff --git a/pytorch_lightning/utilities/imports.py b/pytorch_lightning/utilities/imports.py index 83850f8e74..afeebed85a 100644 --- a/pytorch_lightning/utilities/imports.py +++ b/pytorch_lightning/utilities/imports.py @@ -124,8 +124,6 @@ class _RequirementAvailable: _IS_WINDOWS = platform.system() == "Windows" _IS_INTERACTIVE = hasattr(sys, "ps1") # https://stackoverflow.com/a/64523765 _PYTHON_GREATER_EQUAL_3_8_0 = Version(platform.python_version()) >= Version("3.8.0") -_TORCH_GREATER_EQUAL_1_8_1 = _compare_version("torch", operator.ge, "1.8.1") -_TORCH_GREATER_EQUAL_1_9 = _compare_version("torch", operator.ge, "1.9.0") _TORCH_GREATER_EQUAL_1_9_1 = _compare_version("torch", operator.ge, "1.9.1") _TORCH_GREATER_EQUAL_1_10 = _compare_version("torch", operator.ge, "1.10.0") _TORCH_LESSER_EQUAL_1_10_2 = _compare_version("torch", operator.le, "1.10.2") @@ -146,7 +144,7 @@ _HIVEMIND_AVAILABLE = _package_available("hivemind") _HOROVOD_AVAILABLE = _module_available("horovod.torch") _HYDRA_AVAILABLE = _package_available("hydra") _HYDRA_EXPERIMENTAL_AVAILABLE = _module_available("hydra.experimental") -_KINETO_AVAILABLE = _TORCH_GREATER_EQUAL_1_8_1 and torch.profiler.kineto_available() +_KINETO_AVAILABLE = torch.profiler.kineto_available() _NEPTUNE_AVAILABLE = _package_available("neptune") _NEPTUNE_GREATER_EQUAL_0_9 = _NEPTUNE_AVAILABLE and _compare_version("neptune", operator.ge, "0.9.0") _OMEGACONF_AVAILABLE = _package_available("omegaconf") diff --git a/requirements/adjust-versions.py b/requirements/adjust-versions.py index 4b28897982..c01e071abf 100644 --- a/requirements/adjust-versions.py +++ b/requirements/adjust-versions.py @@ -12,9 +12,6 @@ VERSIONS = [ dict(torch="1.10.0", torchvision="0.11.1", torchtext="0.11.0"), dict(torch="1.9.1", torchvision="0.10.1", torchtext="0.10.1"), dict(torch="1.9.0", torchvision="0.10.0", torchtext="0.10.0"), - # dict(torch="1.8.2", torchvision="0.9.1", torchtext="0.9.1"), # LTS # Not on PyPI, commented so 1.8.1 is used - dict(torch="1.8.1", torchvision="0.9.1", torchtext="0.9.1"), - dict(torch="1.8.0", torchvision="0.9.0", torchtext="0.9.0"), ] diff --git a/requirements/base.txt b/requirements/base.txt index 555997c657..a334417c68 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,5 +1,5 @@ numpy>=1.17.2, <=1.22.3 -torch>=1.8.*, <=1.11.0 +torch>=1.9.*, <=1.11.0 tqdm>=4.57.0, <=4.63.0 PyYAML>=5.4, <=6.0 fsspec[http]>=2021.05.0, !=2021.06.0, <=2022.2.0 diff --git a/requirements/examples.txt b/requirements/examples.txt index 993087f2e1..3f23086655 100644 --- a/requirements/examples.txt +++ b/requirements/examples.txt @@ -1,3 +1,3 @@ -torchvision>=0.9.*, <=0.12.0 +torchvision>=0.10.*, <=0.12.0 gym[classic_control]>=0.17.0, <=0.23.1 ipython[all] <=8.1.1 diff --git a/requirements/extra.txt b/requirements/extra.txt index 8330ef40ac..e7d54903c2 100644 --- a/requirements/extra.txt +++ b/requirements/extra.txt @@ -1,7 +1,6 @@ # extended list of package dependencies to reach full functionality - matplotlib>3.1, <3.5.3 -torchtext>=0.9.*, <=0.12.0 +torchtext>=0.10.*, <=0.12.0 omegaconf>=2.0.5, <=2.1.* hydra-core>=1.0.5, <=1.1.* jsonargparse[signatures]>=4.9.0, <=4.9.0 diff --git a/tests/helpers/runif.py b/tests/helpers/runif.py index 5b7cd30b9c..6ad86653fb 100644 --- a/tests/helpers/runif.py +++ b/tests/helpers/runif.py @@ -82,7 +82,6 @@ class RunIf: fairscale_fully_sharded: bool = False, deepspeed: bool = False, rich: bool = False, - skip_hanging_spawn: bool = False, omegaconf: bool = False, slow: bool = False, bagua: bool = False, @@ -111,7 +110,6 @@ class RunIf: fairscale_fully_sharded: Require that `fairscale` fully sharded support is available. deepspeed: Require that microsoft/DeepSpeed is installed. rich: Require that willmcgugan/rich is installed. - skip_hanging_spawn: Skip the test as it's impacted by hanging loggers on spawn. omegaconf: Require that omry/omegaconf is installed. slow: Mark the test as slow, our CI will run it in a separate job. bagua: Require that BaguaSys/bagua is installed. @@ -213,15 +211,6 @@ class RunIf: conditions.append(not _RICH_AVAILABLE) reasons.append("Rich") - if skip_hanging_spawn: - # strategy=ddp_spawn, accelerator=cpu, python>=3.8, torch<1.9 does not work - py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}" - ge_3_8 = Version(py_version) >= Version("3.8") - torch_version = get_distribution("torch").version - old_torch = Version(torch_version) < Version("1.9") - conditions.append(ge_3_8 and old_torch) - reasons.append("Impacted by hanging DDP spawn") - if omegaconf: conditions.append(not _OMEGACONF_AVAILABLE) reasons.append("omegaconf") diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py index 1deec6f25b..e2688afa79 100644 --- a/tests/loggers/test_all.py +++ b/tests/loggers/test_all.py @@ -299,7 +299,7 @@ class RankZeroLoggerCheck(Callback): @pytest.mark.parametrize("logger_class", ALL_LOGGER_CLASSES_WO_NEPTUNE_WANDB) -@RunIf(skip_windows=True, skip_hanging_spawn=True) +@RunIf(skip_windows=True) def test_logger_created_on_rank_zero_only(tmpdir, monkeypatch, logger_class): """Test that loggers get replaced by dummy loggers on global rank > 0.""" _patch_comet_atexit(monkeypatch) diff --git a/tests/profiler/test_profiler.py b/tests/profiler/test_profiler.py index 6bf1989c6a..090dea820c 100644 --- a/tests/profiler/test_profiler.py +++ b/tests/profiler/test_profiler.py @@ -321,7 +321,7 @@ def pytorch_profiler(tmpdir): return PyTorchProfiler(dirpath=tmpdir, filename="profiler") -@RunIf(max_torch="1.8.1") +@pytest.mark.xfail(raises=AssertionError, reason="TODO: Support after 1.11 profiler added") def test_pytorch_profiler_describe(pytorch_profiler): """Ensure the profiler won't fail when reporting the summary.""" with pytorch_profiler.profile("on_test_start"): diff --git a/tests/standalone_tests.sh b/tests/standalone_tests.sh index 10892e4ab4..db9b950f08 100755 --- a/tests/standalone_tests.sh +++ b/tests/standalone_tests.sh @@ -64,13 +64,12 @@ if [ $? -eq 0 ]; then report+="Ran\ttests/utilities/test_warnings.py\n" fi -# TODO: enable when CI uses torch>=1.9 # test deadlock is properly handled with TorchElastic. -# LOGS=$(PL_RUN_STANDALONE_TESTS=1 PL_RECONCILE_PROCESS=1 python -m torch.distributed.run --nproc_per_node=2 --max_restarts 0 -m coverage run --source pytorch_lightning -a tests/plugins/environments/torch_elastic_deadlock.py | grep "SUCCEEDED") -# if [ -z "$LOGS" ]; then -# exit 1 -# fi -# report+="Ran\ttests/plugins/environments/torch_elastic_deadlock.py\n" +LOGS=$(PL_RUN_STANDALONE_TESTS=1 PL_RECONCILE_PROCESS=1 python -m torch.distributed.run --nproc_per_node=2 --max_restarts 0 -m coverage run --source pytorch_lightning -a tests/plugins/environments/torch_elastic_deadlock.py | grep "SUCCEEDED") +if [ -z "$LOGS" ]; then + exit 1 +fi +report+="Ran\ttests/plugins/environments/torch_elastic_deadlock.py\n" # test that a user can manually launch individual processes export PYTHONPATH="${PYTHONPATH}:$(pwd)" diff --git a/tests/strategies/test_ddp_spawn_strategy.py b/tests/strategies/test_ddp_spawn_strategy.py index 281b6bb098..3f471366bc 100644 --- a/tests/strategies/test_ddp_spawn_strategy.py +++ b/tests/strategies/test_ddp_spawn_strategy.py @@ -137,7 +137,7 @@ class BoringModelDDP(BoringModel): assert isinstance(self.trainer.model, LightningModule) -@RunIf(skip_windows=True, skip_hanging_spawn=True) +@RunIf(skip_windows=True) def test_ddp_spawn_configure_ddp(tmpdir): """Tests with ddp spawn strategy.""" trainer = Trainer(default_root_dir=tmpdir, accelerator="cpu", devices=2, strategy="ddp_spawn", fast_dev_run=True) diff --git a/tests/strategies/test_ddp_strategy_with_comm_hook.py b/tests/strategies/test_ddp_strategy_with_comm_hook.py index 032237e785..39fae16614 100644 --- a/tests/strategies/test_ddp_strategy_with_comm_hook.py +++ b/tests/strategies/test_ddp_strategy_with_comm_hook.py @@ -42,7 +42,7 @@ class TestDDPStrategy(DDPStrategy): return super().teardown() -@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True) +@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True) def test_ddp_fp16_compress_comm_hook(tmpdir): """Test for DDP FP16 compress hook.""" model = BoringModel() @@ -65,7 +65,7 @@ def test_ddp_fp16_compress_comm_hook(tmpdir): assert trainer.state.finished, f"Training failed with {trainer.state}" -@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True) +@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True) def test_ddp_sgd_comm_hook(tmpdir): """Test for DDP FP16 compress hook.""" model = BoringModel() @@ -89,7 +89,7 @@ def test_ddp_sgd_comm_hook(tmpdir): assert trainer.state.finished, f"Training failed with {trainer.state}" -@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True) +@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True) def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir): """Test for DDP FP16 compress wrapper for SGD hook.""" model = BoringModel() @@ -114,7 +114,7 @@ def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir): assert trainer.state.finished, f"Training failed with {trainer.state}" -@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True) +@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True) def test_ddp_spawn_fp16_compress_comm_hook(tmpdir): """Test for DDP Spawn FP16 compress hook.""" model = BoringModel() diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index c6c1239674..b99cb316cd 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -616,31 +616,6 @@ def _user_worker_init_fn(_): pass -@RunIf(max_torch="1.8.9") -def test_missing_worker_init_fn(): - """Test that naive worker seed initialization leads to undesired random state in subprocesses. - - PyTorch 1.9+ does not have this issue. - """ - dataset = NumpyRandomDataset() - - seed_everything(0) - dataloader = DataLoader(dataset, batch_size=2, num_workers=2, shuffle=False) - batches0 = torch.cat(list(dataloader)) - - seed_everything(0) - dataloader = DataLoader(dataset, batch_size=2, num_workers=2, shuffle=False) - batches1 = torch.cat(list(dataloader)) - - is_duplicated = len(torch.unique(batches1, dim=0)) < len(dataset) - is_deterministic = torch.eq(batches0, batches1).all() - - # depending on the OS, we either have - # 1) the same seed in all worker processes, producing duplicate samples / augmentations, or - # 2) different seeds in each worker process, but they are not derived from the seed of the main process - assert not is_deterministic or is_duplicated - - def test_auto_add_worker_init_fn(): """Test Trainer adds a default worker_init_fn to the dataloader when seed_everything() is used.""" dataset = Mock() diff --git a/tests/utilities/test_all_gather_grad.py b/tests/utilities/test_all_gather_grad.py index cd2afc5028..4535555c9f 100644 --- a/tests/utilities/test_all_gather_grad.py +++ b/tests/utilities/test_all_gather_grad.py @@ -54,7 +54,7 @@ def _test_all_gather_ddp(rank, world_size): assert torch.allclose(grad2, tensor2.grad) -@RunIf(skip_windows=True, skip_hanging_spawn=True) +@RunIf(skip_windows=True) def test_all_gather_ddp_spawn(): world_size = 3 torch.multiprocessing.spawn(_test_all_gather_ddp, args=(world_size,), nprocs=world_size) diff --git a/tests/utilities/test_auto_restart.py b/tests/utilities/test_auto_restart.py index 794b3da731..2d5419a435 100644 --- a/tests/utilities/test_auto_restart.py +++ b/tests/utilities/test_auto_restart.py @@ -31,7 +31,7 @@ import torch import torch.distributed as dist import torch.multiprocessing as mp from torch.utils.data import BatchSampler, DistributedSampler, RandomSampler, SequentialSampler -from torch.utils.data._utils.worker import get_worker_info +from torch.utils.data._utils.worker import _generate_state, get_worker_info from torch.utils.data.dataloader import DataLoader, default_collate from torch.utils.data.dataset import Dataset, IterableDataset from torch.utils.data.sampler import Sampler @@ -63,62 +63,6 @@ from pytorch_lightning.utilities.imports import _fault_tolerant_training from tests.helpers.runif import RunIf -# Credit to PyTorch Team. -# Taken from: -# https://github.com/pytorch/pytorch/blob/3b977a0d2834d300c0301a0c6af98c8e939019ce/torch/utils/data/_utils/worker.py#L151 -# Not available until torch 1.9.0 -def _generate_state(base_seed, worker_id): - INIT_A = 0x43B0D7E5 - MULT_A = 0x931E8875 - INIT_B = 0x8B51F9DD - MULT_B = 0x58F38DED - MIX_MULT_L = 0xCA01F9DD - MIX_MULT_R = 0x4973F715 - XSHIFT = 4 * 8 // 2 - MASK32 = 0xFFFFFFFF - - entropy = [worker_id, base_seed & MASK32, base_seed >> 32, 0] - pool = [0] * 4 - - hash_const_A = INIT_A - - def hash(value): - nonlocal hash_const_A - value = (value ^ hash_const_A) & MASK32 - hash_const_A = (hash_const_A * MULT_A) & MASK32 - value = (value * hash_const_A) & MASK32 - value = (value ^ (value >> XSHIFT)) & MASK32 - return value - - def mix(x, y): - result_x = (MIX_MULT_L * x) & MASK32 - result_y = (MIX_MULT_R * y) & MASK32 - result = (result_x - result_y) & MASK32 - result = (result ^ (result >> XSHIFT)) & MASK32 - return result - - # Add in the entropy to the pool. - for i in range(len(pool)): - pool[i] = hash(entropy[i]) - - # Mix all bits together so late bits can affect earlier bits. - for i_src in range(len(pool)): - for i_dst in range(len(pool)): - if i_src != i_dst: - pool[i_dst] = mix(pool[i_dst], hash(pool[i_src])) - - hash_const_B = INIT_B - state = [] - for i_dst in range(4): - data_val = pool[i_dst] - data_val = (data_val ^ hash_const_B) & MASK32 - hash_const_B = (hash_const_B * MULT_B) & MASK32 - data_val = (data_val * hash_const_B) & MASK32 - data_val = (data_val ^ (data_val >> XSHIFT)) & MASK32 - state.append(data_val) - return state - - def test_fast_forward_getattr(): dataset = range(15) sampler = SequentialSampler(dataset) diff --git a/tests/utilities/test_model_summary.py b/tests/utilities/test_model_summary.py index efc7371d03..d7a76c0fb5 100644 --- a/tests/utilities/test_model_summary.py +++ b/tests/utilities/test_model_summary.py @@ -19,7 +19,6 @@ import torch.nn as nn from pytorch_lightning import LightningModule, Trainer from pytorch_lightning.demos.boring_classes import BoringModel -from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_9 from pytorch_lightning.utilities.model_summary import ModelSummary, summarize, UNKNOWN_SIZE from tests.helpers.advanced_models import ParityModuleRNN from tests.helpers.runif import RunIf @@ -311,14 +310,8 @@ def test_lazy_model_summary(): match=r"A layer with UninitializedParameter was found. " r"Thus, the total number of parameters detected may be inaccurate.", ): - if _TORCH_GREATER_EQUAL_1_9: - assert summary.total_parameters == 0 - assert summary.trainable_parameters == 0 - else: - # bug in 1.8: the bias of a LazyLinear layer is initialized! - # https://github.com/pytorch/pytorch/issues/58350 - assert summary.total_parameters == 7 - assert summary.trainable_parameters == 7 + assert summary.total_parameters == 0 + assert summary.trainable_parameters == 0 @pytest.mark.parametrize("max_depth", [-1, 0, 1, 3, 999])