Drop PyTorch 1.8 support (#13155)
* Drop PyTorch 1.8 support * Missed update * Skip profiler test until supported * Upgrade ipu dockerfile pytorch version * Update XLA version
This commit is contained in:
parent
981a6da121
commit
0cf9d73d28
|
@ -20,8 +20,6 @@ jobs:
|
|||
- job: pytest
|
||||
strategy:
|
||||
matrix:
|
||||
'PyTorch - LTS':
|
||||
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.8"
|
||||
'PyTorch - stable':
|
||||
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.11"
|
||||
# how long to run the job before automatically cancelling
|
||||
|
|
|
@ -52,7 +52,7 @@ jobs:
|
|||
matrix:
|
||||
# the config used in '.circleci/config.yml`'
|
||||
python_version: ["3.7"]
|
||||
xla_version: ["1.8"]
|
||||
xla_version: ["1.11"]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
@ -75,11 +75,9 @@ jobs:
|
|||
matrix:
|
||||
include:
|
||||
# the config used in '.azure-pipelines/gpu-tests.yml'
|
||||
- {python_version: "3.7", pytorch_version: "1.8", cuda_version: "10.2", ubuntu_version: "18.04"}
|
||||
- {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"}
|
||||
- {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"}
|
||||
# latest (used in Tutorials)
|
||||
- {python_version: "3.8", pytorch_version: "1.8", cuda_version: "11.1", ubuntu_version: "20.04"}
|
||||
- {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1", ubuntu_version: "20.04"}
|
||||
- {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"}
|
||||
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"}
|
||||
|
@ -106,7 +104,6 @@ jobs:
|
|||
matrix:
|
||||
include:
|
||||
# see: https://pytorch.org/get-started/previous-versions/
|
||||
- {python_version: "3.8", pytorch_version: "1.8", cuda_version: "11.1"}
|
||||
- {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"}
|
||||
- {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
|
||||
|
|
|
@ -23,7 +23,7 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
python-version: ["3.8"] # previous to last Python version as that one is already used in test-full
|
||||
pytorch-version: ["1.8", "1.9", "1.10"]
|
||||
pytorch-version: ["1.9", "1.10"]
|
||||
# nightly: add when there's a release candidate
|
||||
include:
|
||||
- {python-version: "3.9", pytorch-version: "1.11"}
|
||||
|
|
|
@ -24,9 +24,6 @@ jobs:
|
|||
python-version: ["3.7", "3.9"] # minimum, maximum
|
||||
requires: ["oldest", "latest"]
|
||||
release: ["stable"]
|
||||
exclude:
|
||||
# Skip if torch<1.8 and py3.9 on Linux: https://github.com/pytorch/pytorch/issues/50014
|
||||
- {os: ubuntu-20.04, python-version: "3.9", requires: "oldest"}
|
||||
# TODO: re-enable RC testing
|
||||
# include:
|
||||
# - {os: ubuntu-20.04, python-version: "3.10", requires: "latest", release: "pre"}
|
||||
|
|
|
@ -20,7 +20,7 @@ jobs:
|
|||
os: [ubuntu-20.04, windows-2019, macOS-10.15]
|
||||
# same config as '.azure-pipelines/gpu-tests.yml'
|
||||
python-version: ["3.7"]
|
||||
pytorch-version: ["1.8"]
|
||||
pytorch-version: ["1.11"]
|
||||
|
||||
timeout-minutes: 20
|
||||
steps:
|
||||
|
|
|
@ -115,11 +115,9 @@ jobs:
|
|||
matrix:
|
||||
include:
|
||||
# the config used in '.azure-pipelines/gpu-tests.yml'
|
||||
- {python_version: "3.7", pytorch_version: "1.8", cuda_version: "10.2", ubuntu_version: "18.04"}
|
||||
- {python_version: "3.7", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"}
|
||||
- {python_version: "3.7", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"}
|
||||
# latest (used in Tutorials)
|
||||
- {python_version: "3.8", pytorch_version: "1.8", cuda_version: "11.1", ubuntu_version: "20.04"}
|
||||
- {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1", ubuntu_version: "20.04"}
|
||||
- {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.1", ubuntu_version: "20.04"}
|
||||
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1", ubuntu_version: "20.04"}
|
||||
|
@ -168,7 +166,6 @@ jobs:
|
|||
matrix:
|
||||
include:
|
||||
# see: https://pytorch.org/get-started/previous-versions/
|
||||
- {python_version: "3.8", pytorch_version: "1.8", cuda_version: "11.1"}
|
||||
- {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"}
|
||||
- {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
|
||||
|
|
|
@ -16,7 +16,7 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
python_version: ["3.7", "3.8", "3.9"]
|
||||
pytorch_version: ["1.8", "1.9", "1.10"]
|
||||
pytorch_version: ["1.9", "1.10"]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
|
28
README.md
28
README.md
|
@ -78,17 +78,17 @@ Lightning is rigorously tested across multiple CPUs, GPUs, TPUs, IPUs, and HPUs
|
|||
|
||||
<center>
|
||||
|
||||
| System / PyTorch ver. | 1.8 (LTS, min. req.) | 1.9 | 1.10 | 1.11 (latest) |
|
||||
| :------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| Linux py3.7 \[GPUs\*\*\] | [![Build Status](<https://dev.azure.com/PytorchLightning/pytorch-lightning/_apis/build/status/PL.pytorch-lightning%20(GPUs)?branchName=master>)](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - | - |
|
||||
| Linux py3.7 \[TPUs\*\*\*\] | - | [![CircleCI](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master.svg?style=svg)](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master) | - | - |
|
||||
| Linux py3.8 \[IPUs\] | - | [![Build Status](<https://dev.azure.com/PytorchLightning/pytorch-lightning/_apis/build/status/PL.pytorch-lightning%20(IPUs)?branchName=master>)](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - |
|
||||
| Linux py3.8 \[HPUs\] | - | - | [![Build Status](<https://dev.azure.com/PytorchLightning/pytorch-lightning/_apis/build/status/PL.pytorch-lightning%20(HPUs)?branchName=master>)](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - |
|
||||
| Linux py3.8 (with Conda) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | - |
|
||||
| Linux py3.9 (with Conda) | - | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) |
|
||||
| Linux py3.{7,9} | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) |
|
||||
| OSX py3.{7,9} | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) |
|
||||
| Windows py3.{7,9} | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) |
|
||||
| System / PyTorch ver. | 1.9 | 1.10 | 1.11 (latest) |
|
||||
| :------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| Linux py3.7 \[GPUs\*\*\] | - | - | - |
|
||||
| Linux py3.7 \[TPUs\*\*\*\] | [![CircleCI](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master.svg?style=svg)](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master) | - | - |
|
||||
| Linux py3.8 \[IPUs\] | [![Build Status](<https://dev.azure.com/PytorchLightning/pytorch-lightning/_apis/build/status/PL.pytorch-lightning%20(IPUs)?branchName=master>)](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - |
|
||||
| Linux py3.8 \[HPUs\] | - | [![Build Status](<https://dev.azure.com/PytorchLightning/pytorch-lightning/_apis/build/status/PL.pytorch-lightning%20(HPUs)?branchName=master>)](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - |
|
||||
| Linux py3.8 (with Conda) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) | - |
|
||||
| Linux py3.9 (with Conda) | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-conda.yml) |
|
||||
| Linux py3.{7,9} | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) |
|
||||
| OSX py3.{7,9} | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) |
|
||||
| Windows py3.{7,9} | - | - | [![Test](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions/workflows/ci_test-full.yml) |
|
||||
|
||||
- _\*\* tests run on two NVIDIA P100_
|
||||
- _\*\*\* tests run on Google GKE TPUv2/3. TPU py3.7 means we support Colab and Kaggle env._
|
||||
|
@ -126,9 +126,9 @@ pip install pytorch-lightning['extra']
|
|||
conda install pytorch-lightning -c conda-forge
|
||||
```
|
||||
|
||||
#### Install stable 1.5.x
|
||||
#### Install stable 1.6.x
|
||||
|
||||
the actual status of 1.5 \[stable\] is following:
|
||||
the actual status of 1.6 \[stable\] is following:
|
||||
|
||||
![CI basic testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20basic%20testing/badge.svg?branch=release%2F1.5.x&event=push)
|
||||
![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=release%2F1.5.x&event=push)
|
||||
|
@ -142,7 +142,7 @@ Install future release from the source
|
|||
pip install git+https://github.com/PytorchLightning/pytorch-lightning.git@release/1.5.x --upgrade
|
||||
```
|
||||
|
||||
#### Install bleeding-edge - future 1.6
|
||||
#### Install bleeding-edge - future 1.7
|
||||
|
||||
Install nightly from the source (no guarantees)
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ ARG CUDA_VERSION=11.3.1
|
|||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
|
||||
|
||||
ARG PYTHON_VERSION=3.9
|
||||
ARG PYTORCH_VERSION=1.8
|
||||
ARG PYTORCH_VERSION=1.9
|
||||
ARG CONDA_VERSION=4.11.0
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
|
|
@ -18,7 +18,7 @@ ARG CUDA_VERSION=11.3.1
|
|||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||
|
||||
ARG PYTHON_VERSION=3.9
|
||||
ARG PYTORCH_VERSION=1.8
|
||||
ARG PYTORCH_VERSION=1.9
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/
|
||||
|
|
|
@ -19,7 +19,7 @@ LABEL maintainer="PyTorchLightning <https://github.com/PyTorchLightning>"
|
|||
# CALL: docker image build -t pytorch-lightning:XLA-image -f dockers/base-xla/Dockerfile . --build-arg PYTHON_VERSION=3.8
|
||||
ARG PYTHON_VERSION=3.9
|
||||
ARG CONDA_VERSION=4.9.2
|
||||
ARG XLA_VERSION=1.8
|
||||
ARG XLA_VERSION=1.11
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
# for skipping configurations
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
|
||||
ARG PYTHON_VERSION=3.9
|
||||
ARG PYTORCH_VERSION=1.7
|
||||
ARG PYTORCH_VERSION=1.9
|
||||
|
||||
FROM pytorchlightning/pytorch_lightning:base-ipu-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
|
||||
ARG PYTHON_VERSION=3.9
|
||||
ARG PYTORCH_VERSION=1.8
|
||||
ARG PYTORCH_VERSION=1.9
|
||||
|
||||
FROM pytorchlightning/pytorch_lightning:base-cuda-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
|
||||
ARG PYTHON_VERSION=3.9
|
||||
ARG PYTORCH_VERSION=1.8
|
||||
ARG PYTORCH_VERSION=1.9
|
||||
|
||||
FROM pytorchlightning/pytorch_lightning:base-xla-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ dependencies:
|
|||
- python>=3.7
|
||||
- pip>20.1
|
||||
- numpy>=1.17.2
|
||||
- pytorch>=1.8.*
|
||||
- pytorch>=1.9.*
|
||||
- future>=0.17.1
|
||||
- PyYAML>=5.1
|
||||
- tqdm>=4.41.0
|
||||
|
@ -41,10 +41,10 @@ dependencies:
|
|||
- scikit-learn>=0.20.0
|
||||
- matplotlib>=3.1.1
|
||||
- omegaconf>=2.0.5
|
||||
- torchtext>=0.9.*
|
||||
- torchtext>=0.10.*
|
||||
|
||||
# Examples
|
||||
- torchvision>=0.9.*
|
||||
- torchvision>=0.10.*
|
||||
|
||||
- pip:
|
||||
- test-tube>=0.7.5
|
||||
|
|
|
@ -25,7 +25,7 @@ from torch.autograd.profiler import record_function
|
|||
|
||||
from pytorch_lightning.profiler.profiler import Profiler
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from pytorch_lightning.utilities.imports import _KINETO_AVAILABLE, _TORCH_GREATER_EQUAL_1_9
|
||||
from pytorch_lightning.utilities.imports import _KINETO_AVAILABLE
|
||||
from pytorch_lightning.utilities.rank_zero import rank_zero_warn
|
||||
from pytorch_lightning.utilities.warnings import WarningCache
|
||||
|
||||
|
@ -445,8 +445,7 @@ class PyTorchProfiler(Profiler):
|
|||
if self._schedule is not None:
|
||||
self.profiler.step_num = self._schedule.num_step
|
||||
self.profiler.step()
|
||||
if _TORCH_GREATER_EQUAL_1_9:
|
||||
self.profiler.add_metadata("Framework", "pytorch-lightning")
|
||||
self.profiler.add_metadata("Framework", "pytorch-lightning")
|
||||
|
||||
def summary(self) -> str:
|
||||
if not self._profiler_kwargs.get("enabled", True) or self._emit_nvtx:
|
||||
|
|
|
@ -53,7 +53,6 @@ from pytorch_lightning.utilities.exceptions import DeadlockDetectedException
|
|||
from pytorch_lightning.utilities.imports import (
|
||||
_FAIRSCALE_AVAILABLE,
|
||||
_IS_WINDOWS,
|
||||
_TORCH_GREATER_EQUAL_1_9,
|
||||
_TORCH_GREATER_EQUAL_1_10,
|
||||
_TORCH_GREATER_EQUAL_1_11,
|
||||
)
|
||||
|
@ -228,9 +227,7 @@ class DDPStrategy(ParallelStrategy):
|
|||
|
||||
def _register_ddp_hooks(self) -> None:
|
||||
log.detail(f"{self.__class__.__name__}: registering ddp hooks")
|
||||
# In 1.8, DDP communication hooks only work with NCCL backend and SPSD (single process single device) mode
|
||||
# Since 1.9, DDP communication hooks can work on all backends.
|
||||
if _TORCH_GREATER_EQUAL_1_9 or (self.root_device.type == "cuda" and self._is_single_process_single_device):
|
||||
if self.root_device.type == "cuda" and self._is_single_process_single_device:
|
||||
register_ddp_comm_hook(
|
||||
model=self.model,
|
||||
ddp_comm_state=self._ddp_comm_state,
|
||||
|
|
|
@ -101,7 +101,7 @@ from pytorch_lightning.utilities.cloud_io import get_filesystem
|
|||
from pytorch_lightning.utilities.data import _auto_add_worker_init_fn, has_len_all_ranks
|
||||
from pytorch_lightning.utilities.distributed import distributed_available
|
||||
from pytorch_lightning.utilities.exceptions import ExitGracefullyException, MisconfigurationException
|
||||
from pytorch_lightning.utilities.imports import _fault_tolerant_training, _TORCH_GREATER_EQUAL_1_9
|
||||
from pytorch_lightning.utilities.imports import _fault_tolerant_training
|
||||
from pytorch_lightning.utilities.meta import is_on_meta_device, materialize_module
|
||||
from pytorch_lightning.utilities.model_helpers import is_overridden
|
||||
from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation, rank_zero_info, rank_zero_warn
|
||||
|
@ -2768,8 +2768,7 @@ def _evaluation_context(accelerator: Accelerator) -> Generator:
|
|||
# and HPU & TPU accelerators.
|
||||
context_manager_class = (
|
||||
torch.inference_mode
|
||||
if _TORCH_GREATER_EQUAL_1_9
|
||||
and not (dist.is_initialized() and dist.get_backend() == "gloo")
|
||||
if not (dist.is_initialized() and dist.get_backend() == "gloo")
|
||||
and not isinstance(accelerator, HPUAccelerator)
|
||||
and not isinstance(accelerator, TPUAccelerator)
|
||||
else torch.no_grad
|
||||
|
|
|
@ -46,7 +46,6 @@ from pytorch_lightning.utilities.imports import ( # noqa: F401
|
|||
_OMEGACONF_AVAILABLE,
|
||||
_POPTORCH_AVAILABLE,
|
||||
_RICH_AVAILABLE,
|
||||
_TORCH_GREATER_EQUAL_1_9,
|
||||
_TORCH_GREATER_EQUAL_1_10,
|
||||
_TORCH_GREATER_EQUAL_1_11,
|
||||
_TORCH_QUANTIZE_AVAILABLE,
|
||||
|
|
|
@ -21,12 +21,11 @@ from torch import Tensor
|
|||
from torch.nn.parallel.distributed import DistributedDataParallel
|
||||
|
||||
import pytorch_lightning as pl
|
||||
from pytorch_lightning.utilities.imports import _HPU_AVAILABLE, _TORCH_GREATER_EQUAL_1_9, _TPU_AVAILABLE
|
||||
from pytorch_lightning.utilities.imports import _HPU_AVAILABLE, _TPU_AVAILABLE
|
||||
from pytorch_lightning.utilities.rank_zero import rank_zero_debug as new_rank_zero_debug
|
||||
from pytorch_lightning.utilities.rank_zero import rank_zero_only # noqa: F401
|
||||
from pytorch_lightning.utilities.rank_zero import rank_zero_deprecation
|
||||
from pytorch_lightning.utilities.rank_zero import rank_zero_info as new_rank_zero_info
|
||||
from pytorch_lightning.utilities.rank_zero import rank_zero_warn as new_rank_zero_warn
|
||||
|
||||
if _TPU_AVAILABLE:
|
||||
import torch_xla.core.xla_model as xm
|
||||
|
@ -221,13 +220,6 @@ def register_ddp_comm_hook(
|
|||
as FP16 compression as wrapper, which could be combined with
|
||||
ddp_comm_hook
|
||||
|
||||
.. warning ::
|
||||
DDP communication hook needs pytorch version at least 1.8.0
|
||||
|
||||
.. warning ::
|
||||
DDP communication wrapper needs pytorch version at least 1.9.0
|
||||
Post-localSGD hook needs pytorch version at least 1.9.0
|
||||
|
||||
Examples:
|
||||
|
||||
>>> from torch.distributed.algorithms.ddp_comm_hooks import ( # doctest: +SKIP
|
||||
|
@ -287,15 +279,10 @@ def register_ddp_comm_hook(
|
|||
ddp_comm_hook: Callable = ddp_comm_hook
|
||||
|
||||
if ddp_comm_wrapper is not None:
|
||||
if not _TORCH_GREATER_EQUAL_1_9:
|
||||
new_rank_zero_warn(
|
||||
"Not applying DDP comm wrapper. To use communication wrapper, please use pytorch>=1.9.0."
|
||||
)
|
||||
else:
|
||||
new_rank_zero_info(
|
||||
f"DDP comm wrapper is provided, apply {ddp_comm_wrapper.__qualname__}({ddp_comm_hook.__qualname__})."
|
||||
)
|
||||
ddp_comm_hook = ddp_comm_wrapper(ddp_comm_hook)
|
||||
new_rank_zero_info(
|
||||
f"DDP comm wrapper is provided, apply {ddp_comm_wrapper.__qualname__}({ddp_comm_hook.__qualname__})."
|
||||
)
|
||||
ddp_comm_hook = ddp_comm_wrapper(ddp_comm_hook)
|
||||
|
||||
new_rank_zero_debug(f"Registering DDP comm hook: {ddp_comm_hook.__qualname__}.")
|
||||
model.register_comm_hook(state=ddp_comm_state, hook=ddp_comm_hook)
|
||||
|
|
|
@ -124,8 +124,6 @@ class _RequirementAvailable:
|
|||
_IS_WINDOWS = platform.system() == "Windows"
|
||||
_IS_INTERACTIVE = hasattr(sys, "ps1") # https://stackoverflow.com/a/64523765
|
||||
_PYTHON_GREATER_EQUAL_3_8_0 = Version(platform.python_version()) >= Version("3.8.0")
|
||||
_TORCH_GREATER_EQUAL_1_8_1 = _compare_version("torch", operator.ge, "1.8.1")
|
||||
_TORCH_GREATER_EQUAL_1_9 = _compare_version("torch", operator.ge, "1.9.0")
|
||||
_TORCH_GREATER_EQUAL_1_9_1 = _compare_version("torch", operator.ge, "1.9.1")
|
||||
_TORCH_GREATER_EQUAL_1_10 = _compare_version("torch", operator.ge, "1.10.0")
|
||||
_TORCH_LESSER_EQUAL_1_10_2 = _compare_version("torch", operator.le, "1.10.2")
|
||||
|
@ -146,7 +144,7 @@ _HIVEMIND_AVAILABLE = _package_available("hivemind")
|
|||
_HOROVOD_AVAILABLE = _module_available("horovod.torch")
|
||||
_HYDRA_AVAILABLE = _package_available("hydra")
|
||||
_HYDRA_EXPERIMENTAL_AVAILABLE = _module_available("hydra.experimental")
|
||||
_KINETO_AVAILABLE = _TORCH_GREATER_EQUAL_1_8_1 and torch.profiler.kineto_available()
|
||||
_KINETO_AVAILABLE = torch.profiler.kineto_available()
|
||||
_NEPTUNE_AVAILABLE = _package_available("neptune")
|
||||
_NEPTUNE_GREATER_EQUAL_0_9 = _NEPTUNE_AVAILABLE and _compare_version("neptune", operator.ge, "0.9.0")
|
||||
_OMEGACONF_AVAILABLE = _package_available("omegaconf")
|
||||
|
|
|
@ -12,9 +12,6 @@ VERSIONS = [
|
|||
dict(torch="1.10.0", torchvision="0.11.1", torchtext="0.11.0"),
|
||||
dict(torch="1.9.1", torchvision="0.10.1", torchtext="0.10.1"),
|
||||
dict(torch="1.9.0", torchvision="0.10.0", torchtext="0.10.0"),
|
||||
# dict(torch="1.8.2", torchvision="0.9.1", torchtext="0.9.1"), # LTS # Not on PyPI, commented so 1.8.1 is used
|
||||
dict(torch="1.8.1", torchvision="0.9.1", torchtext="0.9.1"),
|
||||
dict(torch="1.8.0", torchvision="0.9.0", torchtext="0.9.0"),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
numpy>=1.17.2, <=1.22.3
|
||||
torch>=1.8.*, <=1.11.0
|
||||
torch>=1.9.*, <=1.11.0
|
||||
tqdm>=4.57.0, <=4.63.0
|
||||
PyYAML>=5.4, <=6.0
|
||||
fsspec[http]>=2021.05.0, !=2021.06.0, <=2022.2.0
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
torchvision>=0.9.*, <=0.12.0
|
||||
torchvision>=0.10.*, <=0.12.0
|
||||
gym[classic_control]>=0.17.0, <=0.23.1
|
||||
ipython[all] <=8.1.1
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
# extended list of package dependencies to reach full functionality
|
||||
|
||||
matplotlib>3.1, <3.5.3
|
||||
torchtext>=0.9.*, <=0.12.0
|
||||
torchtext>=0.10.*, <=0.12.0
|
||||
omegaconf>=2.0.5, <=2.1.*
|
||||
hydra-core>=1.0.5, <=1.1.*
|
||||
jsonargparse[signatures]>=4.9.0, <=4.9.0
|
||||
|
|
|
@ -82,7 +82,6 @@ class RunIf:
|
|||
fairscale_fully_sharded: bool = False,
|
||||
deepspeed: bool = False,
|
||||
rich: bool = False,
|
||||
skip_hanging_spawn: bool = False,
|
||||
omegaconf: bool = False,
|
||||
slow: bool = False,
|
||||
bagua: bool = False,
|
||||
|
@ -111,7 +110,6 @@ class RunIf:
|
|||
fairscale_fully_sharded: Require that `fairscale` fully sharded support is available.
|
||||
deepspeed: Require that microsoft/DeepSpeed is installed.
|
||||
rich: Require that willmcgugan/rich is installed.
|
||||
skip_hanging_spawn: Skip the test as it's impacted by hanging loggers on spawn.
|
||||
omegaconf: Require that omry/omegaconf is installed.
|
||||
slow: Mark the test as slow, our CI will run it in a separate job.
|
||||
bagua: Require that BaguaSys/bagua is installed.
|
||||
|
@ -213,15 +211,6 @@ class RunIf:
|
|||
conditions.append(not _RICH_AVAILABLE)
|
||||
reasons.append("Rich")
|
||||
|
||||
if skip_hanging_spawn:
|
||||
# strategy=ddp_spawn, accelerator=cpu, python>=3.8, torch<1.9 does not work
|
||||
py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
|
||||
ge_3_8 = Version(py_version) >= Version("3.8")
|
||||
torch_version = get_distribution("torch").version
|
||||
old_torch = Version(torch_version) < Version("1.9")
|
||||
conditions.append(ge_3_8 and old_torch)
|
||||
reasons.append("Impacted by hanging DDP spawn")
|
||||
|
||||
if omegaconf:
|
||||
conditions.append(not _OMEGACONF_AVAILABLE)
|
||||
reasons.append("omegaconf")
|
||||
|
|
|
@ -299,7 +299,7 @@ class RankZeroLoggerCheck(Callback):
|
|||
|
||||
|
||||
@pytest.mark.parametrize("logger_class", ALL_LOGGER_CLASSES_WO_NEPTUNE_WANDB)
|
||||
@RunIf(skip_windows=True, skip_hanging_spawn=True)
|
||||
@RunIf(skip_windows=True)
|
||||
def test_logger_created_on_rank_zero_only(tmpdir, monkeypatch, logger_class):
|
||||
"""Test that loggers get replaced by dummy loggers on global rank > 0."""
|
||||
_patch_comet_atexit(monkeypatch)
|
||||
|
|
|
@ -321,7 +321,7 @@ def pytorch_profiler(tmpdir):
|
|||
return PyTorchProfiler(dirpath=tmpdir, filename="profiler")
|
||||
|
||||
|
||||
@RunIf(max_torch="1.8.1")
|
||||
@pytest.mark.xfail(raises=AssertionError, reason="TODO: Support after 1.11 profiler added")
|
||||
def test_pytorch_profiler_describe(pytorch_profiler):
|
||||
"""Ensure the profiler won't fail when reporting the summary."""
|
||||
with pytorch_profiler.profile("on_test_start"):
|
||||
|
|
|
@ -64,13 +64,12 @@ if [ $? -eq 0 ]; then
|
|||
report+="Ran\ttests/utilities/test_warnings.py\n"
|
||||
fi
|
||||
|
||||
# TODO: enable when CI uses torch>=1.9
|
||||
# test deadlock is properly handled with TorchElastic.
|
||||
# LOGS=$(PL_RUN_STANDALONE_TESTS=1 PL_RECONCILE_PROCESS=1 python -m torch.distributed.run --nproc_per_node=2 --max_restarts 0 -m coverage run --source pytorch_lightning -a tests/plugins/environments/torch_elastic_deadlock.py | grep "SUCCEEDED")
|
||||
# if [ -z "$LOGS" ]; then
|
||||
# exit 1
|
||||
# fi
|
||||
# report+="Ran\ttests/plugins/environments/torch_elastic_deadlock.py\n"
|
||||
LOGS=$(PL_RUN_STANDALONE_TESTS=1 PL_RECONCILE_PROCESS=1 python -m torch.distributed.run --nproc_per_node=2 --max_restarts 0 -m coverage run --source pytorch_lightning -a tests/plugins/environments/torch_elastic_deadlock.py | grep "SUCCEEDED")
|
||||
if [ -z "$LOGS" ]; then
|
||||
exit 1
|
||||
fi
|
||||
report+="Ran\ttests/plugins/environments/torch_elastic_deadlock.py\n"
|
||||
|
||||
# test that a user can manually launch individual processes
|
||||
export PYTHONPATH="${PYTHONPATH}:$(pwd)"
|
||||
|
|
|
@ -137,7 +137,7 @@ class BoringModelDDP(BoringModel):
|
|||
assert isinstance(self.trainer.model, LightningModule)
|
||||
|
||||
|
||||
@RunIf(skip_windows=True, skip_hanging_spawn=True)
|
||||
@RunIf(skip_windows=True)
|
||||
def test_ddp_spawn_configure_ddp(tmpdir):
|
||||
"""Tests with ddp spawn strategy."""
|
||||
trainer = Trainer(default_root_dir=tmpdir, accelerator="cpu", devices=2, strategy="ddp_spawn", fast_dev_run=True)
|
||||
|
|
|
@ -42,7 +42,7 @@ class TestDDPStrategy(DDPStrategy):
|
|||
return super().teardown()
|
||||
|
||||
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
|
||||
def test_ddp_fp16_compress_comm_hook(tmpdir):
|
||||
"""Test for DDP FP16 compress hook."""
|
||||
model = BoringModel()
|
||||
|
@ -65,7 +65,7 @@ def test_ddp_fp16_compress_comm_hook(tmpdir):
|
|||
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
||||
|
||||
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
|
||||
def test_ddp_sgd_comm_hook(tmpdir):
|
||||
"""Test for DDP FP16 compress hook."""
|
||||
model = BoringModel()
|
||||
|
@ -89,7 +89,7 @@ def test_ddp_sgd_comm_hook(tmpdir):
|
|||
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
||||
|
||||
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
|
||||
def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir):
|
||||
"""Test for DDP FP16 compress wrapper for SGD hook."""
|
||||
model = BoringModel()
|
||||
|
@ -114,7 +114,7 @@ def test_ddp_fp16_compress_wrap_sgd_comm_hook(tmpdir):
|
|||
assert trainer.state.finished, f"Training failed with {trainer.state}"
|
||||
|
||||
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.9.0", skip_windows=True, standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, skip_windows=True, standalone=True)
|
||||
def test_ddp_spawn_fp16_compress_comm_hook(tmpdir):
|
||||
"""Test for DDP Spawn FP16 compress hook."""
|
||||
model = BoringModel()
|
||||
|
|
|
@ -616,31 +616,6 @@ def _user_worker_init_fn(_):
|
|||
pass
|
||||
|
||||
|
||||
@RunIf(max_torch="1.8.9")
|
||||
def test_missing_worker_init_fn():
|
||||
"""Test that naive worker seed initialization leads to undesired random state in subprocesses.
|
||||
|
||||
PyTorch 1.9+ does not have this issue.
|
||||
"""
|
||||
dataset = NumpyRandomDataset()
|
||||
|
||||
seed_everything(0)
|
||||
dataloader = DataLoader(dataset, batch_size=2, num_workers=2, shuffle=False)
|
||||
batches0 = torch.cat(list(dataloader))
|
||||
|
||||
seed_everything(0)
|
||||
dataloader = DataLoader(dataset, batch_size=2, num_workers=2, shuffle=False)
|
||||
batches1 = torch.cat(list(dataloader))
|
||||
|
||||
is_duplicated = len(torch.unique(batches1, dim=0)) < len(dataset)
|
||||
is_deterministic = torch.eq(batches0, batches1).all()
|
||||
|
||||
# depending on the OS, we either have
|
||||
# 1) the same seed in all worker processes, producing duplicate samples / augmentations, or
|
||||
# 2) different seeds in each worker process, but they are not derived from the seed of the main process
|
||||
assert not is_deterministic or is_duplicated
|
||||
|
||||
|
||||
def test_auto_add_worker_init_fn():
|
||||
"""Test Trainer adds a default worker_init_fn to the dataloader when seed_everything() is used."""
|
||||
dataset = Mock()
|
||||
|
|
|
@ -54,7 +54,7 @@ def _test_all_gather_ddp(rank, world_size):
|
|||
assert torch.allclose(grad2, tensor2.grad)
|
||||
|
||||
|
||||
@RunIf(skip_windows=True, skip_hanging_spawn=True)
|
||||
@RunIf(skip_windows=True)
|
||||
def test_all_gather_ddp_spawn():
|
||||
world_size = 3
|
||||
torch.multiprocessing.spawn(_test_all_gather_ddp, args=(world_size,), nprocs=world_size)
|
||||
|
|
|
@ -31,7 +31,7 @@ import torch
|
|||
import torch.distributed as dist
|
||||
import torch.multiprocessing as mp
|
||||
from torch.utils.data import BatchSampler, DistributedSampler, RandomSampler, SequentialSampler
|
||||
from torch.utils.data._utils.worker import get_worker_info
|
||||
from torch.utils.data._utils.worker import _generate_state, get_worker_info
|
||||
from torch.utils.data.dataloader import DataLoader, default_collate
|
||||
from torch.utils.data.dataset import Dataset, IterableDataset
|
||||
from torch.utils.data.sampler import Sampler
|
||||
|
@ -63,62 +63,6 @@ from pytorch_lightning.utilities.imports import _fault_tolerant_training
|
|||
from tests.helpers.runif import RunIf
|
||||
|
||||
|
||||
# Credit to PyTorch Team.
|
||||
# Taken from:
|
||||
# https://github.com/pytorch/pytorch/blob/3b977a0d2834d300c0301a0c6af98c8e939019ce/torch/utils/data/_utils/worker.py#L151
|
||||
# Not available until torch 1.9.0
|
||||
def _generate_state(base_seed, worker_id):
|
||||
INIT_A = 0x43B0D7E5
|
||||
MULT_A = 0x931E8875
|
||||
INIT_B = 0x8B51F9DD
|
||||
MULT_B = 0x58F38DED
|
||||
MIX_MULT_L = 0xCA01F9DD
|
||||
MIX_MULT_R = 0x4973F715
|
||||
XSHIFT = 4 * 8 // 2
|
||||
MASK32 = 0xFFFFFFFF
|
||||
|
||||
entropy = [worker_id, base_seed & MASK32, base_seed >> 32, 0]
|
||||
pool = [0] * 4
|
||||
|
||||
hash_const_A = INIT_A
|
||||
|
||||
def hash(value):
|
||||
nonlocal hash_const_A
|
||||
value = (value ^ hash_const_A) & MASK32
|
||||
hash_const_A = (hash_const_A * MULT_A) & MASK32
|
||||
value = (value * hash_const_A) & MASK32
|
||||
value = (value ^ (value >> XSHIFT)) & MASK32
|
||||
return value
|
||||
|
||||
def mix(x, y):
|
||||
result_x = (MIX_MULT_L * x) & MASK32
|
||||
result_y = (MIX_MULT_R * y) & MASK32
|
||||
result = (result_x - result_y) & MASK32
|
||||
result = (result ^ (result >> XSHIFT)) & MASK32
|
||||
return result
|
||||
|
||||
# Add in the entropy to the pool.
|
||||
for i in range(len(pool)):
|
||||
pool[i] = hash(entropy[i])
|
||||
|
||||
# Mix all bits together so late bits can affect earlier bits.
|
||||
for i_src in range(len(pool)):
|
||||
for i_dst in range(len(pool)):
|
||||
if i_src != i_dst:
|
||||
pool[i_dst] = mix(pool[i_dst], hash(pool[i_src]))
|
||||
|
||||
hash_const_B = INIT_B
|
||||
state = []
|
||||
for i_dst in range(4):
|
||||
data_val = pool[i_dst]
|
||||
data_val = (data_val ^ hash_const_B) & MASK32
|
||||
hash_const_B = (hash_const_B * MULT_B) & MASK32
|
||||
data_val = (data_val * hash_const_B) & MASK32
|
||||
data_val = (data_val ^ (data_val >> XSHIFT)) & MASK32
|
||||
state.append(data_val)
|
||||
return state
|
||||
|
||||
|
||||
def test_fast_forward_getattr():
|
||||
dataset = range(15)
|
||||
sampler = SequentialSampler(dataset)
|
||||
|
|
|
@ -19,7 +19,6 @@ import torch.nn as nn
|
|||
|
||||
from pytorch_lightning import LightningModule, Trainer
|
||||
from pytorch_lightning.demos.boring_classes import BoringModel
|
||||
from pytorch_lightning.utilities import _TORCH_GREATER_EQUAL_1_9
|
||||
from pytorch_lightning.utilities.model_summary import ModelSummary, summarize, UNKNOWN_SIZE
|
||||
from tests.helpers.advanced_models import ParityModuleRNN
|
||||
from tests.helpers.runif import RunIf
|
||||
|
@ -311,14 +310,8 @@ def test_lazy_model_summary():
|
|||
match=r"A layer with UninitializedParameter was found. "
|
||||
r"Thus, the total number of parameters detected may be inaccurate.",
|
||||
):
|
||||
if _TORCH_GREATER_EQUAL_1_9:
|
||||
assert summary.total_parameters == 0
|
||||
assert summary.trainable_parameters == 0
|
||||
else:
|
||||
# bug in 1.8: the bias of a LazyLinear layer is initialized!
|
||||
# https://github.com/pytorch/pytorch/issues/58350
|
||||
assert summary.total_parameters == 7
|
||||
assert summary.trainable_parameters == 7
|
||||
assert summary.total_parameters == 0
|
||||
assert summary.trainable_parameters == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("max_depth", [-1, 0, 1, 3, 999])
|
||||
|
|
Loading…
Reference in New Issue