Drop support for PyTorch 1.10 (#16492)
* Drop support for PyTorch 1.10
* CHANGELOG
* READMEs
* mypy
* ls
* New poplar version
* Fixed tests
* links
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
* skip azure badges
* Table
* Matching dockerfiles
* Drop unnecessary channels and packages
* Push nightly
* Undo unrelated changes
* Revert "Push nightly"
This reverts commit 9618f737c4
.
---------
Co-authored-by: Jirka <jirka.borovec@seznam.cz>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
parent
c8367a22b8
commit
ef2a6088ff
|
@ -33,7 +33,7 @@ pr:
|
|||
variables:
|
||||
- name: poplar_sdk
|
||||
# https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/installation.html#version-compatibility
|
||||
value: "poplar_sdk-ubuntu_20_04-3.0.0+1145-1b114aac3a"
|
||||
value: "poplar_sdk-ubuntu_20_04-3.1.0+1205-58b501c780"
|
||||
|
||||
jobs:
|
||||
- job: testing
|
||||
|
@ -44,7 +44,10 @@ jobs:
|
|||
clean: all
|
||||
|
||||
steps:
|
||||
- script: tar -xvzf /opt/poplar/${{ variables.poplar_sdk }}.tar.gz
|
||||
- script: |
|
||||
ls -la /mnt/public/packages
|
||||
ls -la /opt/poplar
|
||||
tar -xvzf /opt/poplar/${{ variables.poplar_sdk }}.tar.gz
|
||||
displayName: "Extract Poplar SDK"
|
||||
|
||||
- script: |
|
||||
|
|
|
@ -79,7 +79,7 @@ body:
|
|||
#- Lightning Component (e.g. Trainer, LightningModule, LightningApp, LightningWork, LightningFlow):
|
||||
#- PyTorch Lightning Version (e.g., 1.5.0):
|
||||
#- Lightning App Version (e.g., 0.5.2):
|
||||
#- PyTorch Version (e.g., 1.10):
|
||||
#- PyTorch Version (e.g., 2.0):
|
||||
#- Python version (e.g., 3.9):
|
||||
#- OS (e.g., Linux):
|
||||
#- CUDA/cuDNN version:
|
||||
|
|
|
@ -23,16 +23,15 @@ subprojects:
|
|||
- "pl-cpu (macOS-11, pytorch, 3.8, 1.11)"
|
||||
- "pl-cpu (macOS-11, pytorch, 3.9, 1.12)"
|
||||
- "pl-cpu (macOS-11, pytorch, 3.10, 1.13)"
|
||||
- "pl-cpu (macOS-11, pytorch, 3.8, 1.10, oldest)"
|
||||
- "pl-cpu (ubuntu-20.04, pytorch, 3.8, 1.10)"
|
||||
- "pl-cpu (macOS-11, pytorch, 3.8, 1.11, oldest)"
|
||||
- "pl-cpu (ubuntu-20.04, pytorch, 3.9, 1.11)"
|
||||
- "pl-cpu (ubuntu-20.04, pytorch, 3.10, 1.12)"
|
||||
- "pl-cpu (ubuntu-20.04, pytorch, 3.10, 1.13)"
|
||||
- "pl-cpu (ubuntu-20.04, pytorch, 3.8, 1.10, oldest)"
|
||||
- "pl-cpu (ubuntu-20.04, pytorch, 3.8, 1.11, oldest)"
|
||||
- "pl-cpu (windows-2022, pytorch, 3.9, 1.11)"
|
||||
- "pl-cpu (windows-2022, pytorch, 3.10, 1.12)"
|
||||
- "pl-cpu (windows-2022, pytorch, 3.10, 1.13)"
|
||||
- "pl-cpu (windows-2022, pytorch, 3.8, 1.10, oldest)"
|
||||
- "pl-cpu (windows-2022, pytorch, 3.8, 1.11, oldest)"
|
||||
- "pl-cpu (slow, macOS-11, pytorch, 3.8, 1.11)"
|
||||
- "pl-cpu (slow, ubuntu-20.04, pytorch, 3.8, 1.11)"
|
||||
- "pl-cpu (slow, windows-2022, pytorch, 3.8, 1.11)"
|
||||
|
@ -162,14 +161,12 @@ subprojects:
|
|||
- "!*.md"
|
||||
- "!**/*.md"
|
||||
checks:
|
||||
- "build-cuda (3.9, 1.10, 11.3.1)"
|
||||
- "build-cuda (3.9, 1.11, 11.3.1)"
|
||||
- "build-cuda (3.9, 1.12, 11.6.1)"
|
||||
- "build-cuda (3.9, 1.13, 11.7.1)"
|
||||
- "build-hpu (1.5.0, 1.11.0)"
|
||||
- "build-ipu (3.9, 1.10)"
|
||||
- "build-ipu (3.9, 1.13)"
|
||||
- "build-NGC"
|
||||
- "build-pl (3.9, 1.10, 11.3.1)"
|
||||
- "build-pl (3.9, 1.11, 11.3.1)"
|
||||
- "build-pl (3.9, 1.12, 11.6.1)"
|
||||
- "build-pl (3.9, 1.13, 11.7.1)"
|
||||
|
@ -193,16 +190,15 @@ subprojects:
|
|||
- "fabric-cpu (macOS-11, fabric, 3.8, 1.11)"
|
||||
- "fabric-cpu (macOS-11, fabric, 3.9, 1.12)"
|
||||
- "fabric-cpu (macOS-11, fabric, 3.10, 1.13)"
|
||||
- "fabric-cpu (macOS-11, fabric, 3.8, 1.10, oldest)"
|
||||
- "fabric-cpu (ubuntu-20.04, fabric, 3.8, 1.10)"
|
||||
- "fabric-cpu (macOS-11, fabric, 3.8, 1.11, oldest)"
|
||||
- "fabric-cpu (ubuntu-20.04, fabric, 3.9, 1.11)"
|
||||
- "fabric-cpu (ubuntu-20.04, fabric, 3.10, 1.12)"
|
||||
- "fabric-cpu (ubuntu-20.04, fabric, 3.10, 1.13)"
|
||||
- "fabric-cpu (ubuntu-20.04, fabric, 3.8, 1.10, oldest)"
|
||||
- "fabric-cpu (ubuntu-20.04, fabric, 3.8, 1.11, oldest)"
|
||||
- "fabric-cpu (windows-2022, fabric, 3.9, 1.11)"
|
||||
- "fabric-cpu (windows-2022, fabric, 3.10, 1.12)"
|
||||
- "fabric-cpu (windows-2022, fabric, 3.10, 1.13)"
|
||||
- "fabric-cpu (windows-2022, fabric, 3.8, 1.10, oldest)"
|
||||
- "fabric-cpu (windows-2022, fabric, 3.8, 1.11, oldest)"
|
||||
- "fabric-cpu (macOS-11, lightning, 3.8, 1.13)"
|
||||
- "fabric-cpu (ubuntu-20.04, lightning, 3.8, 1.13)"
|
||||
- "fabric-cpu (windows-2022, lightning, 3.8, 1.13)"
|
||||
|
|
|
@ -41,7 +41,6 @@ jobs:
|
|||
include:
|
||||
# We only release one docker image per PyTorch version.
|
||||
# The matrix here is the same as the one in release-docker.yml.
|
||||
- {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.7.1"}
|
||||
|
@ -103,7 +102,6 @@ jobs:
|
|||
include:
|
||||
# These are the base images for PL release docker images,
|
||||
# so include at least all of the combinations in release-dockers.yml.
|
||||
- {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.7.1"}
|
||||
|
@ -143,7 +141,7 @@ jobs:
|
|||
matrix:
|
||||
include:
|
||||
# the config used in 'dockers/ci-runner-ipu/Dockerfile'
|
||||
- {python_version: "3.9", pytorch_version: "1.10"}
|
||||
- {python_version: "3.9", pytorch_version: "1.13"}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: docker/setup-buildx-action@v2
|
||||
|
|
|
@ -38,11 +38,8 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# assign python and pytorch version combinations to operating systems (arbitrarily)
|
||||
# note: there's no distribution of torch==1.10 for Python>=3.10
|
||||
- {os: "macOS-11", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.11"}
|
||||
- {os: "macOS-11", pkg-name: "fabric", python-version: "3.9", pytorch-version: "1.12"}
|
||||
- {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.10"}
|
||||
- {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.9", pytorch-version: "1.11"}
|
||||
- {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.10", pytorch-version: "1.12"}
|
||||
- {os: "windows-2022", pkg-name: "fabric", python-version: "3.9", pytorch-version: "1.11"}
|
||||
|
@ -52,9 +49,9 @@ jobs:
|
|||
- {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.10", pytorch-version: "1.13"}
|
||||
- {os: "windows-2022", pkg-name: "fabric", python-version: "3.10", pytorch-version: "1.13"}
|
||||
# "oldest" versions tests, only on minimum Python
|
||||
- {os: "macOS-11", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"}
|
||||
- {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"}
|
||||
- {os: "windows-2022", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"}
|
||||
- {os: "macOS-11", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"}
|
||||
- {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"}
|
||||
- {os: "windows-2022", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"}
|
||||
# "lightning" installs the monolithic package
|
||||
- {os: "macOS-11", pkg-name: "lightning", python-version: "3.8", pytorch-version: "1.13"}
|
||||
- {os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.8", pytorch-version: "1.13"}
|
||||
|
|
|
@ -43,11 +43,8 @@ jobs:
|
|||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
# assign python and pytorch version combinations to operating systems (arbitrarily)
|
||||
# note: there's no distribution of torch==1.10 for Python>=3.10
|
||||
- {os: "macOS-11", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11"}
|
||||
- {os: "macOS-11", pkg-name: "pytorch", python-version: "3.9", pytorch-version: "1.12"}
|
||||
- {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.10"}
|
||||
- {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.9", pytorch-version: "1.11"}
|
||||
- {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "1.12"}
|
||||
- {os: "windows-2022", pkg-name: "pytorch", python-version: "3.9", pytorch-version: "1.11"}
|
||||
|
@ -57,9 +54,9 @@ jobs:
|
|||
- {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "1.13"}
|
||||
- {os: "windows-2022", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "1.13"}
|
||||
# "oldest" versions tests, only on minimum Python
|
||||
- {os: "macOS-11", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"}
|
||||
- {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"}
|
||||
- {os: "windows-2022", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"}
|
||||
- {os: "macOS-11", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"}
|
||||
- {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"}
|
||||
- {os: "windows-2022", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"}
|
||||
# run test under SLOW label
|
||||
- {type: "slow", os: "macOS-11", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11"}
|
||||
- {type: "slow", os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11"}
|
||||
|
|
|
@ -2,6 +2,9 @@
|
|||
"ignorePatterns": [
|
||||
{
|
||||
"pattern": "^https://github.com/Lightning-AI/lightning/pull/.*"
|
||||
},
|
||||
{
|
||||
"pattern": ".*https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/.*"
|
||||
}
|
||||
],
|
||||
"httpHeaders": [
|
||||
|
|
|
@ -16,7 +16,6 @@ jobs:
|
|||
matrix:
|
||||
include:
|
||||
# We only release one docker image per PyTorch version.
|
||||
- {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.7.1"}
|
||||
|
|
18
README.md
18
README.md
|
@ -91,15 +91,15 @@ Lightning is rigorously tested across multiple CPUs, GPUs, TPUs, IPUs, and HPUs
|
|||
|
||||
<center>
|
||||
|
||||
| System / PyTorch ver. | 1.10 | 1.12 |
|
||||
| :--------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| Linux py3.9 \[GPUs\] | - | - |
|
||||
| Linux py3.9 \[TPUs\] | - | - |
|
||||
| Linux py3.8 \[IPUs\] | - | - |
|
||||
| Linux py3.8 \[HPUs\] | [![Build Status](https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20%28HPUs%29?branchName=master)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=26&branchName=master) | - |
|
||||
| Linux (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) |
|
||||
| OSX (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) |
|
||||
| Windows (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) |
|
||||
| System / PyTorch ver. | 1.11 | 1.12 | 1.13 | 2.0 |
|
||||
| :--------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- |
|
||||
| Linux py3.9 \[GPUs\] | - | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(GPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=24&branchName=master) | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(GPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=24&branchName=master) | Soon |
|
||||
| Linux py3.9 \[TPUs\] | - | [![Test PyTorch - TPU](https://github.com/Lightning-AI/lightning/actions/workflows/tpu-tests.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/tpu-tests.yml) | | Soon |
|
||||
| Linux py3.8 \[IPUs\] | - | - | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(IPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=25&branchName=master) | Soon |
|
||||
| Linux py3.8 \[HPUs\] | - | - | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(HPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=26&branchName=master) | Soon |
|
||||
| Linux (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon |
|
||||
| OSX (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon |
|
||||
| Windows (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon |
|
||||
|
||||
</center>
|
||||
</details>
|
||||
|
|
|
@ -18,7 +18,7 @@ LABEL maintainer="Lightning-AI <https://github.com/Lightning-AI>"
|
|||
|
||||
ARG PYTHON_VERSION=3.9
|
||||
ARG CONDA_VERSION=4.9.2
|
||||
ARG PYTORCH_VERSION=1.10
|
||||
ARG PYTORCH_VERSION=1.13
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
|
@ -39,13 +39,13 @@ RUN apt-get update -qq --fix-missing && \
|
|||
unzip \
|
||||
wget \
|
||||
&& \
|
||||
# Install conda and python.
|
||||
# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385
|
||||
# Install conda and python.
|
||||
# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385
|
||||
curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py39_${CONDA_VERSION}-Linux-x86_64.sh && \
|
||||
chmod +x ~/miniconda.sh && \
|
||||
~/miniconda.sh -b && \
|
||||
rm ~/miniconda.sh && \
|
||||
# Cleaning
|
||||
# Cleaning
|
||||
apt-get autoremove -y && \
|
||||
apt-get clean && \
|
||||
rm -rf /root/.cache && \
|
||||
|
@ -57,12 +57,13 @@ ENV \
|
|||
|
||||
COPY environment.yml environment.yml
|
||||
|
||||
RUN conda init bash && \
|
||||
RUN conda create -y --name $CONDA_ENV && \
|
||||
conda init bash && \
|
||||
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'python>=[\d\.]+', 'python=${PYTHON_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
|
||||
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- pytorch[>=]+[\d\.]+', '- pytorch=${PYTORCH_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
|
||||
python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if not any(n in ln for n in ['pytorch>'])])" && \
|
||||
# drop unnecessary packages and channels
|
||||
python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if not any(n in ln for n in ['pytorch>', 'nvidia', 'torchvision'])])" && \
|
||||
cat environment.yml && \
|
||||
conda env create -f environment.yml && \
|
||||
conda env update --file environment.yml && \
|
||||
conda clean -ya && \
|
||||
rm environment.yml
|
||||
|
||||
|
@ -73,22 +74,28 @@ ENV \
|
|||
CONDA_DEFAULT_ENV=${CONDA_ENV} \
|
||||
MKL_THREADING_LAYER=GNU
|
||||
|
||||
COPY ./requirements/pytorch/* requirements/
|
||||
COPY ./.actions/assistant.py assistant.py
|
||||
# Disable cache
|
||||
RUN pip --version && \
|
||||
pip config set global.cache-dir false
|
||||
|
||||
# Get package
|
||||
COPY ./ ./lightning/
|
||||
|
||||
RUN \
|
||||
pip list | grep torch && \
|
||||
python -c "import torch; print(torch.__version__)" && \
|
||||
pip install -q fire && \
|
||||
python requirements/adjust-versions.py requirements/extra.txt && \
|
||||
# Install remaining requirements
|
||||
pip install -r requirements/extra.txt --no-cache-dir && \
|
||||
pip install -r requirements/test.txt --no-cache-dir && \
|
||||
rm -rf requirements/
|
||||
python --version && \
|
||||
cd lightning && \
|
||||
# set particular PyTorch version \
|
||||
for fpath in `ls requirements/**/*.txt`; do \
|
||||
python requirements/pytorch/adjust-versions.py $fpath ${PYTORCH_VERSION}; \
|
||||
done && \
|
||||
# install PL dependencies
|
||||
pip install --requirement requirements/pytorch/devel.txt && \
|
||||
cd .. && \
|
||||
rm -rf lightning && \
|
||||
rm -rf /root/.cache \
|
||||
|
||||
RUN \
|
||||
# Show what we have
|
||||
pip --version && \
|
||||
conda info && \
|
||||
pip list && \
|
||||
python -c "import sys; assert sys.version[:3] == '$PYTHON_VERSION', sys.version" && \
|
||||
|
|
|
@ -55,13 +55,14 @@ RUN apt-get update -qq --fix-missing && \
|
|||
ENV \
|
||||
PATH="/root/miniconda3/bin:$PATH" \
|
||||
LD_LIBRARY_PATH="/root/miniconda3/lib:$LD_LIBRARY_PATH"
|
||||
|
||||
COPY environment.yml environment.yml
|
||||
|
||||
RUN conda create -y --name $CONDA_ENV && \
|
||||
conda init bash && \
|
||||
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'python>=[\d\.]+', 'python=${PYTHON_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
|
||||
# drop unnecessary packages
|
||||
python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if not any(n in ln for n in ['pytorch>'])])" && \
|
||||
# drop unnecessary packages and channels
|
||||
python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if not any(n in ln for n in ['pytorch>', 'nvidia', 'torchvision'])])" && \
|
||||
cat environment.yml && \
|
||||
conda env update --file environment.yml && \
|
||||
conda clean -ya && \
|
||||
|
@ -89,10 +90,10 @@ RUN \
|
|||
pip install mkl==2021.4.0 && \
|
||||
# set particular PyTorch version \
|
||||
for fpath in `ls requirements/**/*.txt`; do \
|
||||
python ./requirements/pytorch/adjust-versions.py $fpath ${XLA_VERSION}; \
|
||||
python requirements/pytorch/adjust-versions.py $fpath ${XLA_VERSION}; \
|
||||
done && \
|
||||
# install PL dependencies
|
||||
pip install --requirement ./requirements/pytorch/devel.txt --no-cache-dir && \
|
||||
pip install --requirement requirements/pytorch/devel.txt && \
|
||||
# Install PyTorch XLA
|
||||
py_version=${PYTHON_VERSION/./} && \
|
||||
pip install "https://storage.googleapis.com/tpu-pytorch/wheels/tpuvm/torch_xla-${XLA_VERSION}-cp${py_version}-cp${py_version}-linux_x86_64.whl" && \
|
||||
|
@ -100,10 +101,8 @@ RUN \
|
|||
rm -rf lightning && \
|
||||
rm -rf /root/.cache \
|
||||
|
||||
|
||||
RUN \
|
||||
# Show what we have
|
||||
pip --version && \
|
||||
conda info && \
|
||||
pip list && \
|
||||
python -c "import sys; assert sys.version[:3] == '$PYTHON_VERSION', sys.version" && \
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
# limitations under the License.
|
||||
|
||||
ARG PYTHON_VERSION=3.9
|
||||
ARG PYTORCH_VERSION=1.10
|
||||
ARG PYTORCH_VERSION=1.13
|
||||
|
||||
FROM pytorchlightning/pytorch_lightning:base-ipu-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ Conda users
|
|||
|
||||
Or read the `advanced install guide <starter/installation.html>`_
|
||||
|
||||
We are fully compatible with any stable PyTorch version v1.10 and above.
|
||||
We are fully compatible with any stable PyTorch version v1.11 and above.
|
||||
|
||||
.. raw:: html
|
||||
|
||||
|
|
|
@ -29,12 +29,11 @@ dependencies:
|
|||
- python>=3.8
|
||||
- pip>20.1
|
||||
- numpy>=1.17.2
|
||||
- pytorch>=1.10.*
|
||||
- pytorch>=1.11.0
|
||||
- future>=0.17.1
|
||||
- PyYAML>=5.1
|
||||
- tqdm>=4.57.0
|
||||
- fsspec[http]>=2021.06.1
|
||||
#- tensorboard>=2.2.0 # not needed, already included in pytorch
|
||||
|
||||
# Optional
|
||||
- scikit-learn >0.22.1
|
||||
|
@ -42,4 +41,4 @@ dependencies:
|
|||
- omegaconf>=2.0.5
|
||||
|
||||
# Examples
|
||||
- torchvision>=0.11.*
|
||||
- torchvision>=0.12.0
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
|
||||
|
||||
numpy>=1.17.2, <1.24.2
|
||||
torch>=1.10.0, <=1.13.1
|
||||
torch>=1.11.0, <=2.0.0
|
||||
fsspec[http]>2021.06.0, <2022.6.0
|
||||
packaging>=17.1, <=23.0
|
||||
typing-extensions>=4.0.0, <=4.4.0
|
||||
|
|
|
@ -5,15 +5,12 @@ from typing import Dict, Optional
|
|||
|
||||
# IMPORTANT: this list needs to be sorted in reverse
|
||||
VERSIONS = [
|
||||
dict(torch="1.14.0", torchvision="0.15.0"), # nightly
|
||||
dict(torch="2.0.0", torchvision="0.15.0"), # nightly
|
||||
dict(torch="1.13.1", torchvision="0.14.1"), # stable
|
||||
dict(torch="1.13.0", torchvision="0.14.0"),
|
||||
dict(torch="1.12.1", torchvision="0.13.1"),
|
||||
dict(torch="1.12.0", torchvision="0.13.0"),
|
||||
dict(torch="1.11.0", torchvision="0.12.0"),
|
||||
dict(torch="1.10.2", torchvision="0.11.3"),
|
||||
dict(torch="1.10.1", torchvision="0.11.2"),
|
||||
dict(torch="1.10.0", torchvision="0.11.1"),
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment
|
||||
|
||||
numpy>=1.17.2, <1.24.2
|
||||
torch>=1.10.0, <=1.13.1
|
||||
torch>=1.11.0, <=2.0.0
|
||||
tqdm>=4.57.0, <4.65.0
|
||||
PyYAML>=5.4, <=6.0
|
||||
fsspec[http]>2021.06.0, <2022.8.0
|
||||
|
|
|
@ -40,6 +40,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
|
|||
|
||||
### Removed
|
||||
|
||||
- Removed support for PyTorch 1.10 ([#16492](https://github.com/Lightning-AI/lightning/pull/16492))
|
||||
|
||||
- Removed support for Python 3.7 ([#16579](https://github.com/Lightning-AI/lightning/pull/16579))
|
||||
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ import torch.multiprocessing as mp
|
|||
|
||||
from lightning.fabric.strategies.launchers.launcher import _Launcher
|
||||
from lightning.fabric.utilities.apply_func import move_data_to_device
|
||||
from lightning.fabric.utilities.imports import _IS_INTERACTIVE, _TORCH_GREATER_EQUAL_1_11
|
||||
from lightning.fabric.utilities.imports import _IS_INTERACTIVE
|
||||
from lightning.fabric.utilities.seed import _collect_rng_states, _set_rng_states
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
@ -154,22 +154,18 @@ class _GlobalStateSnapshot:
|
|||
def capture(cls) -> "_GlobalStateSnapshot":
|
||||
"""Capture a few global states from torch, numpy, etc., that we want to restore in a spawned worker
|
||||
process."""
|
||||
warn_only = torch.is_deterministic_algorithms_warn_only_enabled() if _TORCH_GREATER_EQUAL_1_11 else False
|
||||
return cls(
|
||||
use_deterministic_algorithms=torch.are_deterministic_algorithms_enabled(),
|
||||
use_deterministic_algorithms_warn_only=warn_only,
|
||||
use_deterministic_algorithms_warn_only=torch.is_deterministic_algorithms_warn_only_enabled(),
|
||||
cudnn_benchmark=torch.backends.cudnn.benchmark,
|
||||
rng_states=_collect_rng_states(),
|
||||
)
|
||||
|
||||
def restore(self) -> None:
|
||||
"""Restores all globals to the values captured in the :meth:`capture` method."""
|
||||
if _TORCH_GREATER_EQUAL_1_11:
|
||||
torch.use_deterministic_algorithms(
|
||||
self.use_deterministic_algorithms, warn_only=self.use_deterministic_algorithms_warn_only
|
||||
)
|
||||
else:
|
||||
torch.use_deterministic_algorithms(self.use_deterministic_algorithms)
|
||||
torch.use_deterministic_algorithms(
|
||||
self.use_deterministic_algorithms, warn_only=self.use_deterministic_algorithms_warn_only
|
||||
)
|
||||
torch.backends.cudnn.benchmark = self.cudnn_benchmark
|
||||
_set_rng_states(self.rng_states)
|
||||
|
||||
|
|
|
@ -25,7 +25,6 @@ _IS_WINDOWS = platform.system() == "Windows"
|
|||
# 2. The inspection mode via `python -i`: https://stackoverflow.com/a/6879085/1162383
|
||||
_IS_INTERACTIVE = hasattr(sys, "ps1") or bool(sys.flags.interactive)
|
||||
|
||||
_TORCH_GREATER_EQUAL_1_11 = compare_version("torch", operator.ge, "1.11.0")
|
||||
_TORCH_GREATER_EQUAL_1_12 = compare_version("torch", operator.ge, "1.12.0")
|
||||
_TORCH_GREATER_EQUAL_1_13 = compare_version("torch", operator.ge, "1.13.0")
|
||||
_TORCH_GREATER_EQUAL_2_0 = compare_version("torch", operator.ge, "2.0.0", use_base_version=True)
|
||||
|
|
|
@ -55,6 +55,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
|
|||
|
||||
### Removed
|
||||
|
||||
- Removed support for PyTorch 1.10 ([#16492](https://github.com/Lightning-AI/lightning/pull/16492))
|
||||
|
||||
- Removed support for Python 3.7 ([#16579](https://github.com/Lightning-AI/lightning/pull/16579))
|
||||
|
||||
- Removed the `pytorch_lightning.lite` module in favor of `lightning_fabric` ([#15953](https://github.com/Lightning-AI/lightning/pull/15953))
|
||||
|
|
|
@ -22,19 +22,15 @@ from typing import Any, Callable, Dict, Optional, Sequence, Union
|
|||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch.ao.quantization import fuse_modules_qat as fuse_modules
|
||||
from torch.ao.quantization.qconfig import QConfig
|
||||
from torch.quantization import FakeQuantizeBase
|
||||
|
||||
import lightning.pytorch as pl
|
||||
from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11, _TORCH_GREATER_EQUAL_1_12
|
||||
from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_12
|
||||
from lightning.pytorch.callbacks.callback import Callback
|
||||
from lightning.pytorch.utilities.exceptions import MisconfigurationException
|
||||
|
||||
if _TORCH_GREATER_EQUAL_1_11:
|
||||
from torch.ao.quantization import fuse_modules_qat as fuse_modules
|
||||
else:
|
||||
from torch.quantization import fuse_modules
|
||||
|
||||
|
||||
def wrap_qat_forward_context(
|
||||
quant_cb: "QuantizationAwareTraining",
|
||||
|
|
|
@ -34,7 +34,7 @@ from lightning.fabric.utilities.apply_func import convert_to_tensors
|
|||
from lightning.fabric.utilities.cloud_io import get_filesystem
|
||||
from lightning.fabric.utilities.device_dtype_mixin import _DeviceDtypeModuleMixin
|
||||
from lightning.fabric.utilities.distributed import _distributed_available, _sync_ddp
|
||||
from lightning.fabric.utilities.imports import _IS_WINDOWS, _TORCH_GREATER_EQUAL_1_11, _TORCH_GREATER_EQUAL_2_0
|
||||
from lightning.fabric.utilities.imports import _IS_WINDOWS, _TORCH_GREATER_EQUAL_2_0
|
||||
from lightning.fabric.wrappers import _FabricOptimizer
|
||||
from lightning.pytorch.callbacks.callback import Callback
|
||||
from lightning.pytorch.core.hooks import CheckpointHooks, DataHooks, ModelHooks
|
||||
|
@ -1780,10 +1780,7 @@ class LightningModule(
|
|||
rank_zero_debug("Could not register sharded tensor state dict hooks")
|
||||
return
|
||||
|
||||
if _TORCH_GREATER_EQUAL_1_11:
|
||||
from torch.distributed._shard.sharded_tensor import pre_load_state_dict_hook, state_dict_hook
|
||||
else:
|
||||
from torch.distributed._sharded_tensor import pre_load_state_dict_hook, state_dict_hook
|
||||
from torch.distributed._shard.sharded_tensor import pre_load_state_dict_hook, state_dict_hook
|
||||
|
||||
self._register_state_dict_hook(state_dict_hook)
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ from lightning.fabric.utilities.distributed import (
|
|||
_sync_ddp_if_available,
|
||||
)
|
||||
from lightning.fabric.utilities.distributed import group as _group
|
||||
from lightning.fabric.utilities.imports import _IS_WINDOWS, _TORCH_GREATER_EQUAL_1_11
|
||||
from lightning.fabric.utilities.imports import _IS_WINDOWS
|
||||
from lightning.fabric.utilities.optimizer import _optimizers_to_device
|
||||
from lightning.fabric.utilities.seed import reset_seed
|
||||
from lightning.fabric.utilities.types import ReduceOp
|
||||
|
@ -209,7 +209,6 @@ class DDPStrategy(ParallelStrategy):
|
|||
)
|
||||
|
||||
def _enable_model_averaging(self) -> None:
|
||||
# Only called when PyTorch version >= 1.10
|
||||
log.detail(f"{self.__class__.__name__}: reinitializing optimizers with post localSGD")
|
||||
if self._model_averaging_period is None:
|
||||
raise ValueError(
|
||||
|
@ -372,10 +371,8 @@ class DDPStrategy(ParallelStrategy):
|
|||
|
||||
pl_module = self.lightning_module
|
||||
if isinstance(self.model, DistributedDataParallel):
|
||||
if (
|
||||
_TORCH_GREATER_EQUAL_1_11
|
||||
and not self.model.static_graph
|
||||
and self.model._get_ddp_logging_data().get("can_set_static_graph") # type: ignore[operator]
|
||||
if not self.model.static_graph and self.model._get_ddp_logging_data().get( # type: ignore[operator]
|
||||
"can_set_static_graph"
|
||||
):
|
||||
rank_zero_info(
|
||||
"Your model can run with static graph optimizations. For future training runs, we suggest you"
|
||||
|
|
|
@ -32,7 +32,6 @@ from lightning.fabric.utilities.distributed import (
|
|||
_sync_ddp_if_available,
|
||||
)
|
||||
from lightning.fabric.utilities.distributed import group as _group
|
||||
from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11
|
||||
from lightning.fabric.utilities.optimizer import _optimizers_to_device
|
||||
from lightning.fabric.utilities.types import ReduceOp
|
||||
from lightning.pytorch.overrides.base import _LightningModuleWrapperBase, _LightningPrecisionModuleWrapperBase
|
||||
|
@ -339,10 +338,8 @@ class DDPSpawnStrategy(ParallelStrategy):
|
|||
|
||||
pl_module = self.lightning_module
|
||||
if isinstance(self.model, DistributedDataParallel):
|
||||
if (
|
||||
_TORCH_GREATER_EQUAL_1_11
|
||||
and not self.model.static_graph
|
||||
and self.model._get_ddp_logging_data().get("can_set_static_graph") # type: ignore[operator]
|
||||
if not self.model.static_graph and self.model._get_ddp_logging_data().get( # type: ignore[operator]
|
||||
"can_set_static_graph"
|
||||
):
|
||||
rank_zero_info(
|
||||
"Your model can run with static graph optimizations. For future training runs, we suggest you"
|
||||
|
|
|
@ -22,14 +22,13 @@ from torch.optim.optimizer import Optimizer
|
|||
import lightning.pytorch as pl
|
||||
from lightning.fabric.plugins import CheckpointIO, ClusterEnvironment
|
||||
from lightning.fabric.utilities.distributed import group as _group
|
||||
from lightning.pytorch.overrides.base import _LightningModuleWrapperBase
|
||||
from lightning.pytorch.overrides.torch_distributed import broadcast_object_list
|
||||
from lightning.pytorch.plugins.io.hpu_plugin import HPUCheckpointIO
|
||||
from lightning.pytorch.plugins.io.wrapper import _WrappingCheckpointIO
|
||||
from lightning.pytorch.plugins.precision import PrecisionPlugin
|
||||
from lightning.pytorch.strategies.ddp import DDPStrategy
|
||||
from lightning.pytorch.utilities.exceptions import MisconfigurationException
|
||||
from lightning.pytorch.utilities.imports import _HPU_AVAILABLE, _TORCH_LESSER_EQUAL_1_10_2
|
||||
from lightning.pytorch.utilities.imports import _HPU_AVAILABLE
|
||||
from lightning.pytorch.utilities.types import STEP_OUTPUT
|
||||
|
||||
if _HPU_AVAILABLE:
|
||||
|
@ -118,18 +117,6 @@ class HPUParallelStrategy(DDPStrategy):
|
|||
# DDP does not accept static_graph as a parameter, hence removing it from the list
|
||||
del self._ddp_kwargs["static_graph"]
|
||||
|
||||
def configure_ddp(self) -> None:
|
||||
# DDP does not accept static graph as param with torch < 1.11
|
||||
if _TORCH_LESSER_EQUAL_1_10_2:
|
||||
log.detail(f"{self.__class__.__name__}: configuring DistributedDataParallel")
|
||||
self._pre_configure_ddp()
|
||||
self.model = self._setup_model(_LightningModuleWrapperBase(self.model)) # type: ignore
|
||||
if self.root_device.type == "hpu" and self._static_graph:
|
||||
self._model._set_static_graph() # type: ignore
|
||||
self._register_ddp_hooks()
|
||||
else:
|
||||
super().configure_ddp()
|
||||
|
||||
def broadcast(self, obj: object, src: int = 0) -> object: # type: ignore
|
||||
obj = [obj]
|
||||
if self.global_rank != src:
|
||||
|
|
|
@ -30,7 +30,6 @@ from torch import Tensor
|
|||
import lightning.pytorch as pl
|
||||
from lightning.fabric.strategies.launchers.multiprocessing import _check_bad_cuda_fork
|
||||
from lightning.fabric.utilities import move_data_to_device
|
||||
from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11
|
||||
from lightning.fabric.utilities.seed import _collect_rng_states, _set_rng_states
|
||||
from lightning.fabric.utilities.types import _PATH
|
||||
from lightning.pytorch.strategies.launchers.launcher import _Launcher
|
||||
|
@ -297,21 +296,17 @@ class _GlobalStateSnapshot:
|
|||
def capture(cls) -> "_GlobalStateSnapshot":
|
||||
"""Capture a few global states from torch, numpy, etc., that we want to restore in a spawned worker
|
||||
process."""
|
||||
warn_only = torch.is_deterministic_algorithms_warn_only_enabled() if _TORCH_GREATER_EQUAL_1_11 else False
|
||||
return cls(
|
||||
use_deterministic_algorithms=torch.are_deterministic_algorithms_enabled(),
|
||||
use_deterministic_algorithms_warn_only=warn_only,
|
||||
use_deterministic_algorithms_warn_only=torch.is_deterministic_algorithms_warn_only_enabled(),
|
||||
cudnn_benchmark=torch.backends.cudnn.benchmark,
|
||||
rng_states=_collect_rng_states(),
|
||||
)
|
||||
|
||||
def restore(self) -> None:
|
||||
"""Restores all globals to the values captured in the :meth:`capture` method."""
|
||||
if _TORCH_GREATER_EQUAL_1_11:
|
||||
torch.use_deterministic_algorithms(
|
||||
self.use_deterministic_algorithms, warn_only=self.use_deterministic_algorithms_warn_only
|
||||
)
|
||||
else:
|
||||
torch.use_deterministic_algorithms(self.use_deterministic_algorithms)
|
||||
torch.use_deterministic_algorithms(
|
||||
self.use_deterministic_algorithms, warn_only=self.use_deterministic_algorithms_warn_only
|
||||
)
|
||||
torch.backends.cudnn.benchmark = self.cudnn_benchmark
|
||||
_set_rng_states(self.rng_states)
|
||||
|
|
|
@ -29,7 +29,7 @@ from lightning.fabric.plugins.environments import (
|
|||
TorchElasticEnvironment,
|
||||
)
|
||||
from lightning.fabric.utilities.device_parser import _determine_root_gpu_device
|
||||
from lightning.fabric.utilities.imports import _IS_INTERACTIVE, _TORCH_GREATER_EQUAL_1_11
|
||||
from lightning.fabric.utilities.imports import _IS_INTERACTIVE
|
||||
from lightning.pytorch.accelerators import AcceleratorRegistry
|
||||
from lightning.pytorch.accelerators.accelerator import Accelerator
|
||||
from lightning.pytorch.accelerators.cuda import CUDAAccelerator
|
||||
|
@ -194,10 +194,8 @@ class AcceleratorConnector:
|
|||
|
||||
def _init_deterministic(self, deterministic: Optional[Union[bool, _LITERAL_WARN]]) -> None:
|
||||
self.deterministic = deterministic or False # default to False if not set
|
||||
if _TORCH_GREATER_EQUAL_1_11 and deterministic == "warn":
|
||||
if deterministic == "warn":
|
||||
torch.use_deterministic_algorithms(True, warn_only=True)
|
||||
else:
|
||||
torch.use_deterministic_algorithms(self.deterministic)
|
||||
if self.deterministic:
|
||||
# https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
|
||||
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
|
||||
|
|
|
@ -20,7 +20,6 @@ from lightning_utilities.core.imports import compare_version, package_available,
|
|||
|
||||
_PYTHON_GREATER_EQUAL_3_8_0 = (sys.version_info.major, sys.version_info.minor) >= (3, 8)
|
||||
_PYTHON_GREATER_EQUAL_3_10_0 = (sys.version_info.major, sys.version_info.minor) >= (3, 10)
|
||||
_TORCH_LESSER_EQUAL_1_10_2 = compare_version("torch", operator.le, "1.10.2")
|
||||
# duplicated from fabric because HPU is patching it below
|
||||
_TORCH_GREATER_EQUAL_1_13 = compare_version("torch", operator.ge, "1.13.0")
|
||||
_TORCHMETRICS_GREATER_EQUAL_0_9_1 = RequirementCache("torchmetrics>=0.9.1")
|
||||
|
|
|
@ -78,15 +78,15 @@ Lightning is rigorously tested across multiple CPUs, GPUs, TPUs, IPUs, and HPUs
|
|||
|
||||
<center>
|
||||
|
||||
| System / PyTorch ver. | 1.10 | 1.12 |
|
||||
| :--------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| Linux py3.9 \[GPUs\] | - | - |
|
||||
| Linux py3.9 \[TPUs\] | - | - |
|
||||
| Linux py3.8 \[IPUs\] | - | - |
|
||||
| Linux py3.8 \[HPUs\] | [![Build Status](https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20%28HPUs%29?branchName=master)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=26&branchName=master) | - |
|
||||
| Linux (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) |
|
||||
| OSX (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) |
|
||||
| Windows (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) |
|
||||
| System / PyTorch ver. | 1.11 | 1.12 | 1.13 | 2.0 |
|
||||
| :--------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- |
|
||||
| Linux py3.9 \[GPUs\] | - | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(GPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=24&branchName=master) | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(GPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=24&branchName=master) | Soon |
|
||||
| Linux py3.9 \[TPUs\] | - | [![Test PyTorch - TPU](https://github.com/Lightning-AI/lightning/actions/workflows/tpu-tests.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/tpu-tests.yml) | | Soon |
|
||||
| Linux py3.8 \[IPUs\] | - | - | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(IPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=25&branchName=master) | Soon |
|
||||
| Linux py3.8 \[HPUs\] | - | - | [![Build Status](<https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20(HPUs)?branchName=master>)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=26&branchName=master) | Soon |
|
||||
| Linux (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon |
|
||||
| OSX (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon |
|
||||
| Windows (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon |
|
||||
|
||||
</center>
|
||||
</details>
|
||||
|
|
|
@ -13,7 +13,7 @@ from lightning_fabric.plugins.collectives import TorchCollective
|
|||
from lightning_fabric.plugins.environments import LightningEnvironment
|
||||
from lightning_fabric.strategies.ddp import DDPStrategy
|
||||
from lightning_fabric.strategies.launchers.multiprocessing import _MultiProcessingLauncher
|
||||
from lightning_fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11, _TORCH_GREATER_EQUAL_1_13
|
||||
from lightning_fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_13
|
||||
|
||||
if TorchCollective.is_available():
|
||||
from torch.distributed import ReduceOp
|
||||
|
@ -123,10 +123,7 @@ def test_convert_ops():
|
|||
# Test we are handling different casing properly
|
||||
assert TorchCollective._convert_to_native_op("BOR") == ReduceOp.BOR
|
||||
assert TorchCollective._convert_to_native_op("BoR") == ReduceOp.BOR
|
||||
|
||||
# AVG is very recent!
|
||||
if _TORCH_GREATER_EQUAL_1_11:
|
||||
assert TorchCollective._convert_to_native_op("avg") == ReduceOp.AVG
|
||||
assert TorchCollective._convert_to_native_op("avg") == ReduceOp.AVG
|
||||
|
||||
# Test invalid type
|
||||
with pytest.raises(ValueError, match="Unsupported op 1 of type int"):
|
||||
|
|
|
@ -793,7 +793,7 @@ def test_strategy_str_passed_being_case_insensitive(_, strategy, strategy_cls):
|
|||
assert isinstance(connector.strategy, strategy_cls)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("precision", ["64", "32", "16", pytest.param("bf16", marks=RunIf(min_torch="1.10"))])
|
||||
@pytest.mark.parametrize("precision", ["64", "32", "16", "bf16"])
|
||||
@mock.patch("lightning_fabric.accelerators.cuda.num_cuda_devices", return_value=1)
|
||||
def test_precision_from_environment(_, precision):
|
||||
"""Test that the precision input can be set through the environment variable."""
|
||||
|
|
|
@ -122,7 +122,6 @@ def test_no_warning_strategy(tmpdir):
|
|||
|
||||
|
||||
@RunIf(ipu=True)
|
||||
@pytest.mark.xfail(raises=NotImplementedError, reason="TODO: issues with latest poptorch")
|
||||
@pytest.mark.parametrize("devices", [1, 4])
|
||||
def test_all_stages(tmpdir, devices):
|
||||
model = IPUModel()
|
||||
|
@ -134,7 +133,6 @@ def test_all_stages(tmpdir, devices):
|
|||
|
||||
|
||||
@RunIf(ipu=True)
|
||||
@pytest.mark.xfail(raises=NotImplementedError, reason="TODO: issues with latest poptorch")
|
||||
@pytest.mark.parametrize("devices", [1, 4])
|
||||
def test_inference_only(tmpdir, devices):
|
||||
model = IPUModel()
|
||||
|
@ -285,7 +283,6 @@ def test_accumulated_batches(tmpdir):
|
|||
|
||||
|
||||
@RunIf(ipu=True)
|
||||
@pytest.mark.xfail(raises=NotImplementedError, reason="TODO: issues with latest poptorch")
|
||||
def test_stages_correct(tmpdir):
|
||||
"""Ensure all stages correctly are traced correctly by asserting the output for each stage."""
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ from torch import nn
|
|||
from torch.optim import Optimizer, SGD
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from lightning_fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11, _TORCH_GREATER_EQUAL_1_12
|
||||
from lightning_fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_12
|
||||
from pytorch_lightning import LightningModule, seed_everything, Trainer
|
||||
from pytorch_lightning.callbacks import BackboneFinetuning, BaseFinetuning, ModelCheckpoint
|
||||
from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset
|
||||
|
@ -361,9 +361,8 @@ def test_callbacks_restore(tmpdir):
|
|||
"weight_decay": 0,
|
||||
"nesterov": False,
|
||||
"params": ["layer.3.weight", "layer.3.bias"],
|
||||
"maximize": False,
|
||||
}
|
||||
if _TORCH_GREATER_EQUAL_1_11:
|
||||
expected["maximize"] = False
|
||||
if _TORCH_GREATER_EQUAL_1_12:
|
||||
expected["foreach"] = None
|
||||
if _TORCH_GREATER_EQUAL_1_13:
|
||||
|
@ -379,9 +378,8 @@ def test_callbacks_restore(tmpdir):
|
|||
"weight_decay": 0,
|
||||
"nesterov": False,
|
||||
"params": ["layer.0.weight", "layer.0.bias"],
|
||||
"maximize": False,
|
||||
}
|
||||
if _TORCH_GREATER_EQUAL_1_11:
|
||||
expected["maximize"] = False
|
||||
if _TORCH_GREATER_EQUAL_1_12:
|
||||
expected["foreach"] = None
|
||||
if _TORCH_GREATER_EQUAL_1_13:
|
||||
|
|
|
@ -17,89 +17,16 @@ from typing import Callable, Union
|
|||
import pytest
|
||||
import torch
|
||||
from torch.quantization import FakeQuantizeBase
|
||||
from torchmetrics.functional import mean_absolute_percentage_error as mape
|
||||
|
||||
from pytorch_lightning import seed_everything, Trainer
|
||||
from pytorch_lightning.accelerators import CUDAAccelerator
|
||||
from pytorch_lightning.callbacks import QuantizationAwareTraining
|
||||
from pytorch_lightning.demos.boring_classes import RandomDataset
|
||||
from pytorch_lightning.utilities.exceptions import MisconfigurationException
|
||||
from pytorch_lightning.utilities.memory import get_model_size_mb
|
||||
from tests_pytorch.helpers.datamodules import RegressDataModule
|
||||
from tests_pytorch.helpers.runif import RunIf
|
||||
from tests_pytorch.helpers.simple_models import RegressionModel
|
||||
|
||||
|
||||
# todo: [True-False-average] and [False-False-average] fail with 1.12
|
||||
# error: assert False (tensor(0.3262), tensor(0.8754), atol=0.45)
|
||||
@pytest.mark.parametrize("observe", ["average", "histogram"])
|
||||
@pytest.mark.parametrize("fuse", [True, False])
|
||||
@pytest.mark.parametrize("convert", [True, False])
|
||||
@RunIf(quantization=True, sklearn=True, max_torch="1.11")
|
||||
def test_quantization(tmpdir, observe: str, fuse: bool, convert: bool):
|
||||
"""Parity test for quant model."""
|
||||
cuda_available = CUDAAccelerator.is_available()
|
||||
|
||||
if observe == "average" and not fuse and CUDAAccelerator.is_available():
|
||||
pytest.xfail("TODO: flakiness in GPU CI")
|
||||
|
||||
seed_everything(42)
|
||||
dm = RegressDataModule()
|
||||
accelerator = "gpu" if cuda_available else "cpu"
|
||||
trainer_args = dict(default_root_dir=tmpdir, max_epochs=7, accelerator=accelerator, devices=1)
|
||||
model = RegressionModel()
|
||||
qmodel = copy.deepcopy(model)
|
||||
|
||||
trainer = Trainer(**trainer_args)
|
||||
trainer.fit(model, datamodule=dm)
|
||||
org_size = get_model_size_mb(model)
|
||||
org_score = torch.mean(torch.tensor([mape(model(x), y) for x, y in dm.test_dataloader()]))
|
||||
|
||||
fusing_layers = [(f"layer_{i}", f"layer_{i}a") for i in range(3)] if fuse else None
|
||||
qcb = QuantizationAwareTraining(
|
||||
observer_type=observe,
|
||||
modules_to_fuse=fusing_layers,
|
||||
quantize_on_fit_end=convert,
|
||||
observer_enabled_stages=("train", "validate"),
|
||||
)
|
||||
trainer = Trainer(callbacks=[qcb], **trainer_args)
|
||||
trainer.fit(qmodel, datamodule=dm)
|
||||
|
||||
quant_calls = qcb._forward_calls
|
||||
assert quant_calls == qcb._forward_calls
|
||||
quant_score = torch.mean(torch.tensor([mape(qmodel(x), y) for x, y in dm.test_dataloader()]))
|
||||
# test that the test score is almost the same as with pure training
|
||||
assert torch.allclose(org_score, quant_score, atol=0.45)
|
||||
model_path = trainer.checkpoint_callback.best_model_path
|
||||
curr_epoch = trainer.current_epoch
|
||||
|
||||
trainer_args.update(dict(max_epochs=1, enable_checkpointing=False))
|
||||
if not convert:
|
||||
trainer = Trainer(callbacks=[QuantizationAwareTraining()], **trainer_args)
|
||||
trainer.fit(qmodel, datamodule=dm)
|
||||
qmodel.eval()
|
||||
torch.quantization.convert(qmodel, inplace=True)
|
||||
|
||||
quant_size = get_model_size_mb(qmodel)
|
||||
# test that the trained model is smaller then initial
|
||||
size_ratio = quant_size / org_size
|
||||
assert size_ratio < 0.65
|
||||
|
||||
# todo: make it work also with strict loading
|
||||
qmodel2 = RegressionModel.load_from_checkpoint(model_path, strict=False)
|
||||
quant2_score = torch.mean(torch.tensor([mape(qmodel2(x), y) for x, y in dm.test_dataloader()]))
|
||||
assert torch.allclose(org_score, quant2_score, atol=0.47)
|
||||
|
||||
# test without and with QAT callback
|
||||
trainer_args.update(max_epochs=curr_epoch + 1)
|
||||
qmodel2 = RegressionModel()
|
||||
trainer = Trainer(callbacks=[QuantizationAwareTraining()], **trainer_args)
|
||||
trainer.fit(qmodel2, datamodule=dm, ckpt_path=model_path)
|
||||
quant2_score = torch.mean(torch.tensor([mape(qmodel2(x), y) for x, y in dm.test_dataloader()]))
|
||||
# test that the test score is almost the same as with pure training
|
||||
assert torch.allclose(org_score, quant2_score, atol=0.45)
|
||||
|
||||
|
||||
@RunIf(quantization=True, sklearn=True)
|
||||
def test_quantize_torchscript(tmpdir):
|
||||
"""Test converting to torchscipt."""
|
||||
|
|
|
@ -21,7 +21,6 @@ from torch import nn
|
|||
from torch.optim import Adam, SGD
|
||||
|
||||
from lightning_fabric import Fabric
|
||||
from lightning_fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11
|
||||
from pytorch_lightning import LightningModule, Trainer
|
||||
from pytorch_lightning.core.module import _TrainerFabricShim
|
||||
from pytorch_lightning.demos.boring_classes import BoringModel
|
||||
|
@ -315,10 +314,7 @@ def test_device_placement(tmpdir, accelerator, device):
|
|||
|
||||
@RunIf(skip_windows=True)
|
||||
def test_sharded_tensor_state_dict(single_process_pg):
|
||||
if _TORCH_GREATER_EQUAL_1_11:
|
||||
from torch.distributed._shard.sharded_tensor import empty as sharded_tensor_empty
|
||||
else:
|
||||
from torch.distributed._sharded_tensor import empty as sharded_tensor_empty
|
||||
from torch.distributed._shard.sharded_tensor import empty as sharded_tensor_empty
|
||||
from torch.distributed._sharding_spec import ChunkShardingSpec
|
||||
|
||||
class BoringModelWithShardedTensor(BoringModel):
|
||||
|
@ -336,7 +332,7 @@ def test_sharded_tensor_state_dict(single_process_pg):
|
|||
|
||||
m_0 = BoringModelWithShardedTensor(spec)
|
||||
m_0.sharded_tensor.local_shards()[0].tensor.fill_(1)
|
||||
name_st = ".sharded_tensor" if _TORCH_GREATER_EQUAL_1_11 and not _TORCH_GREATER_EQUAL_1_13 else "sharded_tensor"
|
||||
name_st = ".sharded_tensor" if not _TORCH_GREATER_EQUAL_1_13 else "sharded_tensor"
|
||||
assert name_st in m_0.state_dict(), 'Expect "sharded_tensor" to appear in the state dict'
|
||||
|
||||
m_1 = BoringModelWithShardedTensor(spec)
|
||||
|
|
|
@ -95,7 +95,7 @@ def test_ddp_torch_dist_is_available_in_setup(_, __, cuda_count_1, tmpdir):
|
|||
trainer.fit(model)
|
||||
|
||||
|
||||
@RunIf(min_cuda_gpus=2, min_torch="1.8.1", standalone=True)
|
||||
@RunIf(min_cuda_gpus=2, standalone=True)
|
||||
@pytest.mark.parametrize("precision", (16, 32))
|
||||
def test_ddp_wrapper(tmpdir, precision):
|
||||
"""Test parameters to ignore are carried over for DDP."""
|
||||
|
|
|
@ -667,7 +667,7 @@ def test_parallel_devices_in_strategy_confilict_with_accelerator(parallel_device
|
|||
Trainer(strategy=DDPStrategy(parallel_devices=parallel_devices), accelerator=accelerator)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deterministic", [True, False, pytest.param("warn", marks=RunIf(min_torch="1.11.0"))])
|
||||
@pytest.mark.parametrize("deterministic", [True, False, "warn"])
|
||||
def test_deterministic_init(deterministic):
|
||||
trainer = Trainer(accelerator="auto", deterministic=deterministic)
|
||||
assert trainer._accelerator_connector.deterministic == deterministic
|
||||
|
|
Loading…
Reference in New Issue