diff --git a/.azure/ipu-tests.yml b/.azure/ipu-tests.yml index afcab05dd1..539428b588 100644 --- a/.azure/ipu-tests.yml +++ b/.azure/ipu-tests.yml @@ -33,7 +33,7 @@ pr: variables: - name: poplar_sdk # https://docs.graphcore.ai/projects/poptorch-user-guide/en/latest/installation.html#version-compatibility - value: "poplar_sdk-ubuntu_20_04-3.0.0+1145-1b114aac3a" + value: "poplar_sdk-ubuntu_20_04-3.1.0+1205-58b501c780" jobs: - job: testing @@ -44,7 +44,10 @@ jobs: clean: all steps: - - script: tar -xvzf /opt/poplar/${{ variables.poplar_sdk }}.tar.gz + - script: | + ls -la /mnt/public/packages + ls -la /opt/poplar + tar -xvzf /opt/poplar/${{ variables.poplar_sdk }}.tar.gz displayName: "Extract Poplar SDK" - script: | diff --git a/.github/ISSUE_TEMPLATE/1_bug_report.yaml b/.github/ISSUE_TEMPLATE/1_bug_report.yaml index 7f84118c68..d86415b539 100644 --- a/.github/ISSUE_TEMPLATE/1_bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/1_bug_report.yaml @@ -79,7 +79,7 @@ body: #- Lightning Component (e.g. Trainer, LightningModule, LightningApp, LightningWork, LightningFlow): #- PyTorch Lightning Version (e.g., 1.5.0): #- Lightning App Version (e.g., 0.5.2): - #- PyTorch Version (e.g., 1.10): + #- PyTorch Version (e.g., 2.0): #- Python version (e.g., 3.9): #- OS (e.g., Linux): #- CUDA/cuDNN version: diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index e03512c845..1f2fdee8bb 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -23,16 +23,15 @@ subprojects: - "pl-cpu (macOS-11, pytorch, 3.8, 1.11)" - "pl-cpu (macOS-11, pytorch, 3.9, 1.12)" - "pl-cpu (macOS-11, pytorch, 3.10, 1.13)" - - "pl-cpu (macOS-11, pytorch, 3.8, 1.10, oldest)" - - "pl-cpu (ubuntu-20.04, pytorch, 3.8, 1.10)" + - "pl-cpu (macOS-11, pytorch, 3.8, 1.11, oldest)" - "pl-cpu (ubuntu-20.04, pytorch, 3.9, 1.11)" - "pl-cpu (ubuntu-20.04, pytorch, 3.10, 1.12)" - "pl-cpu (ubuntu-20.04, pytorch, 3.10, 1.13)" - - "pl-cpu (ubuntu-20.04, pytorch, 3.8, 1.10, oldest)" + - "pl-cpu (ubuntu-20.04, pytorch, 3.8, 1.11, oldest)" - "pl-cpu (windows-2022, pytorch, 3.9, 1.11)" - "pl-cpu (windows-2022, pytorch, 3.10, 1.12)" - "pl-cpu (windows-2022, pytorch, 3.10, 1.13)" - - "pl-cpu (windows-2022, pytorch, 3.8, 1.10, oldest)" + - "pl-cpu (windows-2022, pytorch, 3.8, 1.11, oldest)" - "pl-cpu (slow, macOS-11, pytorch, 3.8, 1.11)" - "pl-cpu (slow, ubuntu-20.04, pytorch, 3.8, 1.11)" - "pl-cpu (slow, windows-2022, pytorch, 3.8, 1.11)" @@ -162,14 +161,12 @@ subprojects: - "!*.md" - "!**/*.md" checks: - - "build-cuda (3.9, 1.10, 11.3.1)" - "build-cuda (3.9, 1.11, 11.3.1)" - "build-cuda (3.9, 1.12, 11.6.1)" - "build-cuda (3.9, 1.13, 11.7.1)" - "build-hpu (1.5.0, 1.11.0)" - - "build-ipu (3.9, 1.10)" + - "build-ipu (3.9, 1.13)" - "build-NGC" - - "build-pl (3.9, 1.10, 11.3.1)" - "build-pl (3.9, 1.11, 11.3.1)" - "build-pl (3.9, 1.12, 11.6.1)" - "build-pl (3.9, 1.13, 11.7.1)" @@ -193,16 +190,15 @@ subprojects: - "fabric-cpu (macOS-11, fabric, 3.8, 1.11)" - "fabric-cpu (macOS-11, fabric, 3.9, 1.12)" - "fabric-cpu (macOS-11, fabric, 3.10, 1.13)" - - "fabric-cpu (macOS-11, fabric, 3.8, 1.10, oldest)" - - "fabric-cpu (ubuntu-20.04, fabric, 3.8, 1.10)" + - "fabric-cpu (macOS-11, fabric, 3.8, 1.11, oldest)" - "fabric-cpu (ubuntu-20.04, fabric, 3.9, 1.11)" - "fabric-cpu (ubuntu-20.04, fabric, 3.10, 1.12)" - "fabric-cpu (ubuntu-20.04, fabric, 3.10, 1.13)" - - "fabric-cpu (ubuntu-20.04, fabric, 3.8, 1.10, oldest)" + - "fabric-cpu (ubuntu-20.04, fabric, 3.8, 1.11, oldest)" - "fabric-cpu (windows-2022, fabric, 3.9, 1.11)" - "fabric-cpu (windows-2022, fabric, 3.10, 1.12)" - "fabric-cpu (windows-2022, fabric, 3.10, 1.13)" - - "fabric-cpu (windows-2022, fabric, 3.8, 1.10, oldest)" + - "fabric-cpu (windows-2022, fabric, 3.8, 1.11, oldest)" - "fabric-cpu (macOS-11, lightning, 3.8, 1.13)" - "fabric-cpu (ubuntu-20.04, lightning, 3.8, 1.13)" - "fabric-cpu (windows-2022, lightning, 3.8, 1.13)" diff --git a/.github/workflows/ci-dockers-pytorch.yml b/.github/workflows/ci-dockers-pytorch.yml index 5975cfecd6..1cd350d1d0 100644 --- a/.github/workflows/ci-dockers-pytorch.yml +++ b/.github/workflows/ci-dockers-pytorch.yml @@ -41,7 +41,6 @@ jobs: include: # We only release one docker image per PyTorch version. # The matrix here is the same as the one in release-docker.yml. - - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.1"} - {python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.7.1"} @@ -103,7 +102,6 @@ jobs: include: # These are the base images for PL release docker images, # so include at least all of the combinations in release-dockers.yml. - - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.1"} - {python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.7.1"} @@ -143,7 +141,7 @@ jobs: matrix: include: # the config used in 'dockers/ci-runner-ipu/Dockerfile' - - {python_version: "3.9", pytorch_version: "1.10"} + - {python_version: "3.9", pytorch_version: "1.13"} steps: - uses: actions/checkout@v3 - uses: docker/setup-buildx-action@v2 diff --git a/.github/workflows/ci-tests-fabric.yml b/.github/workflows/ci-tests-fabric.yml index 927ea3c34e..793560f539 100644 --- a/.github/workflows/ci-tests-fabric.yml +++ b/.github/workflows/ci-tests-fabric.yml @@ -38,11 +38,8 @@ jobs: fail-fast: false matrix: include: - # assign python and pytorch version combinations to operating systems (arbitrarily) - # note: there's no distribution of torch==1.10 for Python>=3.10 - {os: "macOS-11", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.11"} - {os: "macOS-11", pkg-name: "fabric", python-version: "3.9", pytorch-version: "1.12"} - - {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.10"} - {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.9", pytorch-version: "1.11"} - {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.10", pytorch-version: "1.12"} - {os: "windows-2022", pkg-name: "fabric", python-version: "3.9", pytorch-version: "1.11"} @@ -52,9 +49,9 @@ jobs: - {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.10", pytorch-version: "1.13"} - {os: "windows-2022", pkg-name: "fabric", python-version: "3.10", pytorch-version: "1.13"} # "oldest" versions tests, only on minimum Python - - {os: "macOS-11", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"} - - {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"} - - {os: "windows-2022", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"} + - {os: "macOS-11", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"} + - {os: "ubuntu-20.04", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"} + - {os: "windows-2022", pkg-name: "fabric", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"} # "lightning" installs the monolithic package - {os: "macOS-11", pkg-name: "lightning", python-version: "3.8", pytorch-version: "1.13"} - {os: "ubuntu-20.04", pkg-name: "lightning", python-version: "3.8", pytorch-version: "1.13"} diff --git a/.github/workflows/ci-tests-pytorch.yml b/.github/workflows/ci-tests-pytorch.yml index acbb4142f7..bf2f7f6612 100644 --- a/.github/workflows/ci-tests-pytorch.yml +++ b/.github/workflows/ci-tests-pytorch.yml @@ -43,11 +43,8 @@ jobs: fail-fast: false matrix: include: - # assign python and pytorch version combinations to operating systems (arbitrarily) - # note: there's no distribution of torch==1.10 for Python>=3.10 - {os: "macOS-11", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11"} - {os: "macOS-11", pkg-name: "pytorch", python-version: "3.9", pytorch-version: "1.12"} - - {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.10"} - {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.9", pytorch-version: "1.11"} - {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "1.12"} - {os: "windows-2022", pkg-name: "pytorch", python-version: "3.9", pytorch-version: "1.11"} @@ -57,9 +54,9 @@ jobs: - {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "1.13"} - {os: "windows-2022", pkg-name: "pytorch", python-version: "3.10", pytorch-version: "1.13"} # "oldest" versions tests, only on minimum Python - - {os: "macOS-11", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"} - - {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"} - - {os: "windows-2022", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.10", requires: "oldest"} + - {os: "macOS-11", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"} + - {os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"} + - {os: "windows-2022", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11", requires: "oldest"} # run test under SLOW label - {type: "slow", os: "macOS-11", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11"} - {type: "slow", os: "ubuntu-20.04", pkg-name: "pytorch", python-version: "3.8", pytorch-version: "1.11"} diff --git a/.github/workflows/markdown.links.config.json b/.github/workflows/markdown.links.config.json index 3058433264..1739bc83bb 100644 --- a/.github/workflows/markdown.links.config.json +++ b/.github/workflows/markdown.links.config.json @@ -2,6 +2,9 @@ "ignorePatterns": [ { "pattern": "^https://github.com/Lightning-AI/lightning/pull/.*" + }, + { + "pattern": ".*https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/.*" } ], "httpHeaders": [ diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 471079d74c..09941987c7 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -16,7 +16,6 @@ jobs: matrix: include: # We only release one docker image per PyTorch version. - - {python_version: "3.9", pytorch_version: "1.10", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"} - {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.1"} - {python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.7.1"} diff --git a/README.md b/README.md index 4943ce1f68..fb8caba670 100644 --- a/README.md +++ b/README.md @@ -91,15 +91,15 @@ Lightning is rigorously tested across multiple CPUs, GPUs, TPUs, IPUs, and HPUs
-| System / PyTorch ver. | 1.10 | 1.12 | -| :--------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| Linux py3.9 \[GPUs\] | - | - | -| Linux py3.9 \[TPUs\] | - | - | -| Linux py3.8 \[IPUs\] | - | - | -| Linux py3.8 \[HPUs\] | [![Build Status](https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20%28HPUs%29?branchName=master)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=26&branchName=master) | - | -| Linux (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | -| OSX (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | -| Windows (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | +| System / PyTorch ver. | 1.11 | 1.12 | 1.13 | 2.0 | +| :--------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | +| Linux py3.9 \[GPUs\] | - | [![Build Status]()](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=24&branchName=master) | [![Build Status]()](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=24&branchName=master) | Soon | +| Linux py3.9 \[TPUs\] | - | [![Test PyTorch - TPU](https://github.com/Lightning-AI/lightning/actions/workflows/tpu-tests.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/tpu-tests.yml) | | Soon | +| Linux py3.8 \[IPUs\] | - | - | [![Build Status]()](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=25&branchName=master) | Soon | +| Linux py3.8 \[HPUs\] | - | - | [![Build Status]()](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=26&branchName=master) | Soon | +| Linux (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon | +| OSX (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon | +| Windows (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon |
diff --git a/dockers/base-ipu/Dockerfile b/dockers/base-ipu/Dockerfile index 47d0339e74..58fde458f8 100644 --- a/dockers/base-ipu/Dockerfile +++ b/dockers/base-ipu/Dockerfile @@ -18,7 +18,7 @@ LABEL maintainer="Lightning-AI " ARG PYTHON_VERSION=3.9 ARG CONDA_VERSION=4.9.2 -ARG PYTORCH_VERSION=1.10 +ARG PYTORCH_VERSION=1.13 SHELL ["/bin/bash", "-c"] @@ -39,13 +39,13 @@ RUN apt-get update -qq --fix-missing && \ unzip \ wget \ && \ -# Install conda and python. -# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385 + # Install conda and python. + # NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385 curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py39_${CONDA_VERSION}-Linux-x86_64.sh && \ chmod +x ~/miniconda.sh && \ ~/miniconda.sh -b && \ rm ~/miniconda.sh && \ -# Cleaning + # Cleaning apt-get autoremove -y && \ apt-get clean && \ rm -rf /root/.cache && \ @@ -57,12 +57,13 @@ ENV \ COPY environment.yml environment.yml -RUN conda init bash && \ +RUN conda create -y --name $CONDA_ENV && \ + conda init bash && \ python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'python>=[\d\.]+', 'python=${PYTHON_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \ - python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- pytorch[>=]+[\d\.]+', '- pytorch=${PYTORCH_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \ - python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if not any(n in ln for n in ['pytorch>'])])" && \ + # drop unnecessary packages and channels + python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if not any(n in ln for n in ['pytorch>', 'nvidia', 'torchvision'])])" && \ cat environment.yml && \ - conda env create -f environment.yml && \ + conda env update --file environment.yml && \ conda clean -ya && \ rm environment.yml @@ -73,22 +74,28 @@ ENV \ CONDA_DEFAULT_ENV=${CONDA_ENV} \ MKL_THREADING_LAYER=GNU -COPY ./requirements/pytorch/* requirements/ -COPY ./.actions/assistant.py assistant.py +# Disable cache +RUN pip --version && \ + pip config set global.cache-dir false + +# Get package +COPY ./ ./lightning/ RUN \ - pip list | grep torch && \ - python -c "import torch; print(torch.__version__)" && \ - pip install -q fire && \ - python requirements/adjust-versions.py requirements/extra.txt && \ - # Install remaining requirements - pip install -r requirements/extra.txt --no-cache-dir && \ - pip install -r requirements/test.txt --no-cache-dir && \ - rm -rf requirements/ + python --version && \ + cd lightning && \ + # set particular PyTorch version \ + for fpath in `ls requirements/**/*.txt`; do \ + python requirements/pytorch/adjust-versions.py $fpath ${PYTORCH_VERSION}; \ + done && \ + # install PL dependencies + pip install --requirement requirements/pytorch/devel.txt && \ + cd .. && \ + rm -rf lightning && \ + rm -rf /root/.cache \ RUN \ # Show what we have - pip --version && \ conda info && \ pip list && \ python -c "import sys; assert sys.version[:3] == '$PYTHON_VERSION', sys.version" && \ diff --git a/dockers/base-xla/Dockerfile b/dockers/base-xla/Dockerfile index 290f822992..3c0fc7f22a 100644 --- a/dockers/base-xla/Dockerfile +++ b/dockers/base-xla/Dockerfile @@ -55,13 +55,14 @@ RUN apt-get update -qq --fix-missing && \ ENV \ PATH="/root/miniconda3/bin:$PATH" \ LD_LIBRARY_PATH="/root/miniconda3/lib:$LD_LIBRARY_PATH" + COPY environment.yml environment.yml RUN conda create -y --name $CONDA_ENV && \ conda init bash && \ python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'python>=[\d\.]+', 'python=${PYTHON_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \ - # drop unnecessary packages - python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if not any(n in ln for n in ['pytorch>'])])" && \ + # drop unnecessary packages and channels + python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if not any(n in ln for n in ['pytorch>', 'nvidia', 'torchvision'])])" && \ cat environment.yml && \ conda env update --file environment.yml && \ conda clean -ya && \ @@ -89,10 +90,10 @@ RUN \ pip install mkl==2021.4.0 && \ # set particular PyTorch version \ for fpath in `ls requirements/**/*.txt`; do \ - python ./requirements/pytorch/adjust-versions.py $fpath ${XLA_VERSION}; \ + python requirements/pytorch/adjust-versions.py $fpath ${XLA_VERSION}; \ done && \ # install PL dependencies - pip install --requirement ./requirements/pytorch/devel.txt --no-cache-dir && \ + pip install --requirement requirements/pytorch/devel.txt && \ # Install PyTorch XLA py_version=${PYTHON_VERSION/./} && \ pip install "https://storage.googleapis.com/tpu-pytorch/wheels/tpuvm/torch_xla-${XLA_VERSION}-cp${py_version}-cp${py_version}-linux_x86_64.whl" && \ @@ -100,10 +101,8 @@ RUN \ rm -rf lightning && \ rm -rf /root/.cache \ - RUN \ # Show what we have - pip --version && \ conda info && \ pip list && \ python -c "import sys; assert sys.version[:3] == '$PYTHON_VERSION', sys.version" && \ diff --git a/dockers/ci-runner-ipu/Dockerfile b/dockers/ci-runner-ipu/Dockerfile index 1e773425ae..de960927da 100644 --- a/dockers/ci-runner-ipu/Dockerfile +++ b/dockers/ci-runner-ipu/Dockerfile @@ -13,7 +13,7 @@ # limitations under the License. ARG PYTHON_VERSION=3.9 -ARG PYTORCH_VERSION=1.10 +ARG PYTORCH_VERSION=1.13 FROM pytorchlightning/pytorch_lightning:base-ipu-py${PYTHON_VERSION}-torch${PYTORCH_VERSION} diff --git a/docs/source-pytorch/index.rst b/docs/source-pytorch/index.rst index 72f6508bc7..3130ee5ca4 100644 --- a/docs/source-pytorch/index.rst +++ b/docs/source-pytorch/index.rst @@ -64,7 +64,7 @@ Conda users Or read the `advanced install guide `_ -We are fully compatible with any stable PyTorch version v1.10 and above. +We are fully compatible with any stable PyTorch version v1.11 and above. .. raw:: html diff --git a/environment.yml b/environment.yml index ef939cf11b..7675c874e9 100644 --- a/environment.yml +++ b/environment.yml @@ -29,12 +29,11 @@ dependencies: - python>=3.8 - pip>20.1 - numpy>=1.17.2 - - pytorch>=1.10.* + - pytorch>=1.11.0 - future>=0.17.1 - PyYAML>=5.1 - tqdm>=4.57.0 - fsspec[http]>=2021.06.1 - #- tensorboard>=2.2.0 # not needed, already included in pytorch # Optional - scikit-learn >0.22.1 @@ -42,4 +41,4 @@ dependencies: - omegaconf>=2.0.5 # Examples - - torchvision>=0.11.* + - torchvision>=0.12.0 diff --git a/requirements/fabric/base.txt b/requirements/fabric/base.txt index 6722f66e74..49f4a5cb29 100644 --- a/requirements/fabric/base.txt +++ b/requirements/fabric/base.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment numpy>=1.17.2, <1.24.2 -torch>=1.10.0, <=1.13.1 +torch>=1.11.0, <=2.0.0 fsspec[http]>2021.06.0, <2022.6.0 packaging>=17.1, <=23.0 typing-extensions>=4.0.0, <=4.4.0 diff --git a/requirements/pytorch/adjust-versions.py b/requirements/pytorch/adjust-versions.py index 937356a192..3bc48df1b2 100644 --- a/requirements/pytorch/adjust-versions.py +++ b/requirements/pytorch/adjust-versions.py @@ -5,15 +5,12 @@ from typing import Dict, Optional # IMPORTANT: this list needs to be sorted in reverse VERSIONS = [ - dict(torch="1.14.0", torchvision="0.15.0"), # nightly + dict(torch="2.0.0", torchvision="0.15.0"), # nightly dict(torch="1.13.1", torchvision="0.14.1"), # stable dict(torch="1.13.0", torchvision="0.14.0"), dict(torch="1.12.1", torchvision="0.13.1"), dict(torch="1.12.0", torchvision="0.13.0"), dict(torch="1.11.0", torchvision="0.12.0"), - dict(torch="1.10.2", torchvision="0.11.3"), - dict(torch="1.10.1", torchvision="0.11.2"), - dict(torch="1.10.0", torchvision="0.11.1"), ] diff --git a/requirements/pytorch/base.txt b/requirements/pytorch/base.txt index 00822b02e6..42887a62f1 100644 --- a/requirements/pytorch/base.txt +++ b/requirements/pytorch/base.txt @@ -2,7 +2,7 @@ # in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment numpy>=1.17.2, <1.24.2 -torch>=1.10.0, <=1.13.1 +torch>=1.11.0, <=2.0.0 tqdm>=4.57.0, <4.65.0 PyYAML>=5.4, <=6.0 fsspec[http]>2021.06.0, <2022.8.0 diff --git a/src/lightning/fabric/CHANGELOG.md b/src/lightning/fabric/CHANGELOG.md index de8e3fd15c..3894d6fc19 100644 --- a/src/lightning/fabric/CHANGELOG.md +++ b/src/lightning/fabric/CHANGELOG.md @@ -40,6 +40,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed +- Removed support for PyTorch 1.10 ([#16492](https://github.com/Lightning-AI/lightning/pull/16492)) + - Removed support for Python 3.7 ([#16579](https://github.com/Lightning-AI/lightning/pull/16579)) diff --git a/src/lightning/fabric/strategies/launchers/multiprocessing.py b/src/lightning/fabric/strategies/launchers/multiprocessing.py index b2bebb75f9..a839f3f3eb 100644 --- a/src/lightning/fabric/strategies/launchers/multiprocessing.py +++ b/src/lightning/fabric/strategies/launchers/multiprocessing.py @@ -22,7 +22,7 @@ import torch.multiprocessing as mp from lightning.fabric.strategies.launchers.launcher import _Launcher from lightning.fabric.utilities.apply_func import move_data_to_device -from lightning.fabric.utilities.imports import _IS_INTERACTIVE, _TORCH_GREATER_EQUAL_1_11 +from lightning.fabric.utilities.imports import _IS_INTERACTIVE from lightning.fabric.utilities.seed import _collect_rng_states, _set_rng_states if TYPE_CHECKING: @@ -154,22 +154,18 @@ class _GlobalStateSnapshot: def capture(cls) -> "_GlobalStateSnapshot": """Capture a few global states from torch, numpy, etc., that we want to restore in a spawned worker process.""" - warn_only = torch.is_deterministic_algorithms_warn_only_enabled() if _TORCH_GREATER_EQUAL_1_11 else False return cls( use_deterministic_algorithms=torch.are_deterministic_algorithms_enabled(), - use_deterministic_algorithms_warn_only=warn_only, + use_deterministic_algorithms_warn_only=torch.is_deterministic_algorithms_warn_only_enabled(), cudnn_benchmark=torch.backends.cudnn.benchmark, rng_states=_collect_rng_states(), ) def restore(self) -> None: """Restores all globals to the values captured in the :meth:`capture` method.""" - if _TORCH_GREATER_EQUAL_1_11: - torch.use_deterministic_algorithms( - self.use_deterministic_algorithms, warn_only=self.use_deterministic_algorithms_warn_only - ) - else: - torch.use_deterministic_algorithms(self.use_deterministic_algorithms) + torch.use_deterministic_algorithms( + self.use_deterministic_algorithms, warn_only=self.use_deterministic_algorithms_warn_only + ) torch.backends.cudnn.benchmark = self.cudnn_benchmark _set_rng_states(self.rng_states) diff --git a/src/lightning/fabric/utilities/imports.py b/src/lightning/fabric/utilities/imports.py index d815c93782..738940ac20 100644 --- a/src/lightning/fabric/utilities/imports.py +++ b/src/lightning/fabric/utilities/imports.py @@ -25,7 +25,6 @@ _IS_WINDOWS = platform.system() == "Windows" # 2. The inspection mode via `python -i`: https://stackoverflow.com/a/6879085/1162383 _IS_INTERACTIVE = hasattr(sys, "ps1") or bool(sys.flags.interactive) -_TORCH_GREATER_EQUAL_1_11 = compare_version("torch", operator.ge, "1.11.0") _TORCH_GREATER_EQUAL_1_12 = compare_version("torch", operator.ge, "1.12.0") _TORCH_GREATER_EQUAL_1_13 = compare_version("torch", operator.ge, "1.13.0") _TORCH_GREATER_EQUAL_2_0 = compare_version("torch", operator.ge, "2.0.0", use_base_version=True) diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md index c4ea1c96da..7b1dd2f280 100644 --- a/src/lightning/pytorch/CHANGELOG.md +++ b/src/lightning/pytorch/CHANGELOG.md @@ -55,6 +55,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed +- Removed support for PyTorch 1.10 ([#16492](https://github.com/Lightning-AI/lightning/pull/16492)) + - Removed support for Python 3.7 ([#16579](https://github.com/Lightning-AI/lightning/pull/16579)) - Removed the `pytorch_lightning.lite` module in favor of `lightning_fabric` ([#15953](https://github.com/Lightning-AI/lightning/pull/15953)) diff --git a/src/lightning/pytorch/callbacks/quantization.py b/src/lightning/pytorch/callbacks/quantization.py index 5be2ca5690..bf8460c0fb 100644 --- a/src/lightning/pytorch/callbacks/quantization.py +++ b/src/lightning/pytorch/callbacks/quantization.py @@ -22,19 +22,15 @@ from typing import Any, Callable, Dict, Optional, Sequence, Union import torch from torch import Tensor +from torch.ao.quantization import fuse_modules_qat as fuse_modules from torch.ao.quantization.qconfig import QConfig from torch.quantization import FakeQuantizeBase import lightning.pytorch as pl -from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11, _TORCH_GREATER_EQUAL_1_12 +from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_12 from lightning.pytorch.callbacks.callback import Callback from lightning.pytorch.utilities.exceptions import MisconfigurationException -if _TORCH_GREATER_EQUAL_1_11: - from torch.ao.quantization import fuse_modules_qat as fuse_modules -else: - from torch.quantization import fuse_modules - def wrap_qat_forward_context( quant_cb: "QuantizationAwareTraining", diff --git a/src/lightning/pytorch/core/module.py b/src/lightning/pytorch/core/module.py index a0b8871a3b..7af55cd577 100644 --- a/src/lightning/pytorch/core/module.py +++ b/src/lightning/pytorch/core/module.py @@ -34,7 +34,7 @@ from lightning.fabric.utilities.apply_func import convert_to_tensors from lightning.fabric.utilities.cloud_io import get_filesystem from lightning.fabric.utilities.device_dtype_mixin import _DeviceDtypeModuleMixin from lightning.fabric.utilities.distributed import _distributed_available, _sync_ddp -from lightning.fabric.utilities.imports import _IS_WINDOWS, _TORCH_GREATER_EQUAL_1_11, _TORCH_GREATER_EQUAL_2_0 +from lightning.fabric.utilities.imports import _IS_WINDOWS, _TORCH_GREATER_EQUAL_2_0 from lightning.fabric.wrappers import _FabricOptimizer from lightning.pytorch.callbacks.callback import Callback from lightning.pytorch.core.hooks import CheckpointHooks, DataHooks, ModelHooks @@ -1780,10 +1780,7 @@ class LightningModule( rank_zero_debug("Could not register sharded tensor state dict hooks") return - if _TORCH_GREATER_EQUAL_1_11: - from torch.distributed._shard.sharded_tensor import pre_load_state_dict_hook, state_dict_hook - else: - from torch.distributed._sharded_tensor import pre_load_state_dict_hook, state_dict_hook + from torch.distributed._shard.sharded_tensor import pre_load_state_dict_hook, state_dict_hook self._register_state_dict_hook(state_dict_hook) diff --git a/src/lightning/pytorch/strategies/ddp.py b/src/lightning/pytorch/strategies/ddp.py index 2636678477..e4258cc949 100644 --- a/src/lightning/pytorch/strategies/ddp.py +++ b/src/lightning/pytorch/strategies/ddp.py @@ -32,7 +32,7 @@ from lightning.fabric.utilities.distributed import ( _sync_ddp_if_available, ) from lightning.fabric.utilities.distributed import group as _group -from lightning.fabric.utilities.imports import _IS_WINDOWS, _TORCH_GREATER_EQUAL_1_11 +from lightning.fabric.utilities.imports import _IS_WINDOWS from lightning.fabric.utilities.optimizer import _optimizers_to_device from lightning.fabric.utilities.seed import reset_seed from lightning.fabric.utilities.types import ReduceOp @@ -209,7 +209,6 @@ class DDPStrategy(ParallelStrategy): ) def _enable_model_averaging(self) -> None: - # Only called when PyTorch version >= 1.10 log.detail(f"{self.__class__.__name__}: reinitializing optimizers with post localSGD") if self._model_averaging_period is None: raise ValueError( @@ -372,10 +371,8 @@ class DDPStrategy(ParallelStrategy): pl_module = self.lightning_module if isinstance(self.model, DistributedDataParallel): - if ( - _TORCH_GREATER_EQUAL_1_11 - and not self.model.static_graph - and self.model._get_ddp_logging_data().get("can_set_static_graph") # type: ignore[operator] + if not self.model.static_graph and self.model._get_ddp_logging_data().get( # type: ignore[operator] + "can_set_static_graph" ): rank_zero_info( "Your model can run with static graph optimizations. For future training runs, we suggest you" diff --git a/src/lightning/pytorch/strategies/ddp_spawn.py b/src/lightning/pytorch/strategies/ddp_spawn.py index c5336b73d4..47b76abc67 100644 --- a/src/lightning/pytorch/strategies/ddp_spawn.py +++ b/src/lightning/pytorch/strategies/ddp_spawn.py @@ -32,7 +32,6 @@ from lightning.fabric.utilities.distributed import ( _sync_ddp_if_available, ) from lightning.fabric.utilities.distributed import group as _group -from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11 from lightning.fabric.utilities.optimizer import _optimizers_to_device from lightning.fabric.utilities.types import ReduceOp from lightning.pytorch.overrides.base import _LightningModuleWrapperBase, _LightningPrecisionModuleWrapperBase @@ -339,10 +338,8 @@ class DDPSpawnStrategy(ParallelStrategy): pl_module = self.lightning_module if isinstance(self.model, DistributedDataParallel): - if ( - _TORCH_GREATER_EQUAL_1_11 - and not self.model.static_graph - and self.model._get_ddp_logging_data().get("can_set_static_graph") # type: ignore[operator] + if not self.model.static_graph and self.model._get_ddp_logging_data().get( # type: ignore[operator] + "can_set_static_graph" ): rank_zero_info( "Your model can run with static graph optimizations. For future training runs, we suggest you" diff --git a/src/lightning/pytorch/strategies/hpu_parallel.py b/src/lightning/pytorch/strategies/hpu_parallel.py index c22e9b5863..66cd1b0131 100644 --- a/src/lightning/pytorch/strategies/hpu_parallel.py +++ b/src/lightning/pytorch/strategies/hpu_parallel.py @@ -22,14 +22,13 @@ from torch.optim.optimizer import Optimizer import lightning.pytorch as pl from lightning.fabric.plugins import CheckpointIO, ClusterEnvironment from lightning.fabric.utilities.distributed import group as _group -from lightning.pytorch.overrides.base import _LightningModuleWrapperBase from lightning.pytorch.overrides.torch_distributed import broadcast_object_list from lightning.pytorch.plugins.io.hpu_plugin import HPUCheckpointIO from lightning.pytorch.plugins.io.wrapper import _WrappingCheckpointIO from lightning.pytorch.plugins.precision import PrecisionPlugin from lightning.pytorch.strategies.ddp import DDPStrategy from lightning.pytorch.utilities.exceptions import MisconfigurationException -from lightning.pytorch.utilities.imports import _HPU_AVAILABLE, _TORCH_LESSER_EQUAL_1_10_2 +from lightning.pytorch.utilities.imports import _HPU_AVAILABLE from lightning.pytorch.utilities.types import STEP_OUTPUT if _HPU_AVAILABLE: @@ -118,18 +117,6 @@ class HPUParallelStrategy(DDPStrategy): # DDP does not accept static_graph as a parameter, hence removing it from the list del self._ddp_kwargs["static_graph"] - def configure_ddp(self) -> None: - # DDP does not accept static graph as param with torch < 1.11 - if _TORCH_LESSER_EQUAL_1_10_2: - log.detail(f"{self.__class__.__name__}: configuring DistributedDataParallel") - self._pre_configure_ddp() - self.model = self._setup_model(_LightningModuleWrapperBase(self.model)) # type: ignore - if self.root_device.type == "hpu" and self._static_graph: - self._model._set_static_graph() # type: ignore - self._register_ddp_hooks() - else: - super().configure_ddp() - def broadcast(self, obj: object, src: int = 0) -> object: # type: ignore obj = [obj] if self.global_rank != src: diff --git a/src/lightning/pytorch/strategies/launchers/multiprocessing.py b/src/lightning/pytorch/strategies/launchers/multiprocessing.py index 2def93b772..81916de21e 100644 --- a/src/lightning/pytorch/strategies/launchers/multiprocessing.py +++ b/src/lightning/pytorch/strategies/launchers/multiprocessing.py @@ -30,7 +30,6 @@ from torch import Tensor import lightning.pytorch as pl from lightning.fabric.strategies.launchers.multiprocessing import _check_bad_cuda_fork from lightning.fabric.utilities import move_data_to_device -from lightning.fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11 from lightning.fabric.utilities.seed import _collect_rng_states, _set_rng_states from lightning.fabric.utilities.types import _PATH from lightning.pytorch.strategies.launchers.launcher import _Launcher @@ -297,21 +296,17 @@ class _GlobalStateSnapshot: def capture(cls) -> "_GlobalStateSnapshot": """Capture a few global states from torch, numpy, etc., that we want to restore in a spawned worker process.""" - warn_only = torch.is_deterministic_algorithms_warn_only_enabled() if _TORCH_GREATER_EQUAL_1_11 else False return cls( use_deterministic_algorithms=torch.are_deterministic_algorithms_enabled(), - use_deterministic_algorithms_warn_only=warn_only, + use_deterministic_algorithms_warn_only=torch.is_deterministic_algorithms_warn_only_enabled(), cudnn_benchmark=torch.backends.cudnn.benchmark, rng_states=_collect_rng_states(), ) def restore(self) -> None: """Restores all globals to the values captured in the :meth:`capture` method.""" - if _TORCH_GREATER_EQUAL_1_11: - torch.use_deterministic_algorithms( - self.use_deterministic_algorithms, warn_only=self.use_deterministic_algorithms_warn_only - ) - else: - torch.use_deterministic_algorithms(self.use_deterministic_algorithms) + torch.use_deterministic_algorithms( + self.use_deterministic_algorithms, warn_only=self.use_deterministic_algorithms_warn_only + ) torch.backends.cudnn.benchmark = self.cudnn_benchmark _set_rng_states(self.rng_states) diff --git a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py index 5a1cf95cfa..a9910294d5 100644 --- a/src/lightning/pytorch/trainer/connectors/accelerator_connector.py +++ b/src/lightning/pytorch/trainer/connectors/accelerator_connector.py @@ -29,7 +29,7 @@ from lightning.fabric.plugins.environments import ( TorchElasticEnvironment, ) from lightning.fabric.utilities.device_parser import _determine_root_gpu_device -from lightning.fabric.utilities.imports import _IS_INTERACTIVE, _TORCH_GREATER_EQUAL_1_11 +from lightning.fabric.utilities.imports import _IS_INTERACTIVE from lightning.pytorch.accelerators import AcceleratorRegistry from lightning.pytorch.accelerators.accelerator import Accelerator from lightning.pytorch.accelerators.cuda import CUDAAccelerator @@ -194,10 +194,8 @@ class AcceleratorConnector: def _init_deterministic(self, deterministic: Optional[Union[bool, _LITERAL_WARN]]) -> None: self.deterministic = deterministic or False # default to False if not set - if _TORCH_GREATER_EQUAL_1_11 and deterministic == "warn": + if deterministic == "warn": torch.use_deterministic_algorithms(True, warn_only=True) - else: - torch.use_deterministic_algorithms(self.deterministic) if self.deterministic: # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" diff --git a/src/lightning/pytorch/utilities/imports.py b/src/lightning/pytorch/utilities/imports.py index 25f689a2a3..0f5c5b900c 100644 --- a/src/lightning/pytorch/utilities/imports.py +++ b/src/lightning/pytorch/utilities/imports.py @@ -20,7 +20,6 @@ from lightning_utilities.core.imports import compare_version, package_available, _PYTHON_GREATER_EQUAL_3_8_0 = (sys.version_info.major, sys.version_info.minor) >= (3, 8) _PYTHON_GREATER_EQUAL_3_10_0 = (sys.version_info.major, sys.version_info.minor) >= (3, 10) -_TORCH_LESSER_EQUAL_1_10_2 = compare_version("torch", operator.le, "1.10.2") # duplicated from fabric because HPU is patching it below _TORCH_GREATER_EQUAL_1_13 = compare_version("torch", operator.ge, "1.13.0") _TORCHMETRICS_GREATER_EQUAL_0_9_1 = RequirementCache("torchmetrics>=0.9.1") diff --git a/src/pytorch_lightning/README.md b/src/pytorch_lightning/README.md index f16154fe69..8211ec2ded 100644 --- a/src/pytorch_lightning/README.md +++ b/src/pytorch_lightning/README.md @@ -78,15 +78,15 @@ Lightning is rigorously tested across multiple CPUs, GPUs, TPUs, IPUs, and HPUs
-| System / PyTorch ver. | 1.10 | 1.12 | -| :--------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | -| Linux py3.9 \[GPUs\] | - | - | -| Linux py3.9 \[TPUs\] | - | - | -| Linux py3.8 \[IPUs\] | - | - | -| Linux py3.8 \[HPUs\] | [![Build Status](https://dev.azure.com/Lightning-AI/lightning/_apis/build/status/pytorch-lightning%20%28HPUs%29?branchName=master)](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=26&branchName=master) | - | -| Linux (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | -| OSX (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | -| Windows (multiple Python versions) | - | [![Test](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg?branch=master&event=push)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | +| System / PyTorch ver. | 1.11 | 1.12 | 1.13 | 2.0 | +| :--------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---- | +| Linux py3.9 \[GPUs\] | - | [![Build Status]()](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=24&branchName=master) | [![Build Status]()](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=24&branchName=master) | Soon | +| Linux py3.9 \[TPUs\] | - | [![Test PyTorch - TPU](https://github.com/Lightning-AI/lightning/actions/workflows/tpu-tests.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/tpu-tests.yml) | | Soon | +| Linux py3.8 \[IPUs\] | - | - | [![Build Status]()](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=25&branchName=master) | Soon | +| Linux py3.8 \[HPUs\] | - | - | [![Build Status]()](https://dev.azure.com/Lightning-AI/lightning/_build/latest?definitionId=26&branchName=master) | Soon | +| Linux (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon | +| OSX (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon | +| Windows (multiple Python versions) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | [![Test PyTorch](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml/badge.svg)](https://github.com/Lightning-AI/lightning/actions/workflows/ci-tests-pytorch.yml) | Soon |
diff --git a/tests/tests_fabric/plugins/collectives/test_torch_collective.py b/tests/tests_fabric/plugins/collectives/test_torch_collective.py index cdc7b20a75..06d1a50951 100644 --- a/tests/tests_fabric/plugins/collectives/test_torch_collective.py +++ b/tests/tests_fabric/plugins/collectives/test_torch_collective.py @@ -13,7 +13,7 @@ from lightning_fabric.plugins.collectives import TorchCollective from lightning_fabric.plugins.environments import LightningEnvironment from lightning_fabric.strategies.ddp import DDPStrategy from lightning_fabric.strategies.launchers.multiprocessing import _MultiProcessingLauncher -from lightning_fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11, _TORCH_GREATER_EQUAL_1_13 +from lightning_fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_13 if TorchCollective.is_available(): from torch.distributed import ReduceOp @@ -123,10 +123,7 @@ def test_convert_ops(): # Test we are handling different casing properly assert TorchCollective._convert_to_native_op("BOR") == ReduceOp.BOR assert TorchCollective._convert_to_native_op("BoR") == ReduceOp.BOR - - # AVG is very recent! - if _TORCH_GREATER_EQUAL_1_11: - assert TorchCollective._convert_to_native_op("avg") == ReduceOp.AVG + assert TorchCollective._convert_to_native_op("avg") == ReduceOp.AVG # Test invalid type with pytest.raises(ValueError, match="Unsupported op 1 of type int"): diff --git a/tests/tests_fabric/test_connector.py b/tests/tests_fabric/test_connector.py index 4ba1fc4a8d..5ca8031879 100644 --- a/tests/tests_fabric/test_connector.py +++ b/tests/tests_fabric/test_connector.py @@ -793,7 +793,7 @@ def test_strategy_str_passed_being_case_insensitive(_, strategy, strategy_cls): assert isinstance(connector.strategy, strategy_cls) -@pytest.mark.parametrize("precision", ["64", "32", "16", pytest.param("bf16", marks=RunIf(min_torch="1.10"))]) +@pytest.mark.parametrize("precision", ["64", "32", "16", "bf16"]) @mock.patch("lightning_fabric.accelerators.cuda.num_cuda_devices", return_value=1) def test_precision_from_environment(_, precision): """Test that the precision input can be set through the environment variable.""" diff --git a/tests/tests_pytorch/accelerators/test_ipu.py b/tests/tests_pytorch/accelerators/test_ipu.py index bebbb43874..82e25eccb1 100644 --- a/tests/tests_pytorch/accelerators/test_ipu.py +++ b/tests/tests_pytorch/accelerators/test_ipu.py @@ -122,7 +122,6 @@ def test_no_warning_strategy(tmpdir): @RunIf(ipu=True) -@pytest.mark.xfail(raises=NotImplementedError, reason="TODO: issues with latest poptorch") @pytest.mark.parametrize("devices", [1, 4]) def test_all_stages(tmpdir, devices): model = IPUModel() @@ -134,7 +133,6 @@ def test_all_stages(tmpdir, devices): @RunIf(ipu=True) -@pytest.mark.xfail(raises=NotImplementedError, reason="TODO: issues with latest poptorch") @pytest.mark.parametrize("devices", [1, 4]) def test_inference_only(tmpdir, devices): model = IPUModel() @@ -285,7 +283,6 @@ def test_accumulated_batches(tmpdir): @RunIf(ipu=True) -@pytest.mark.xfail(raises=NotImplementedError, reason="TODO: issues with latest poptorch") def test_stages_correct(tmpdir): """Ensure all stages correctly are traced correctly by asserting the output for each stage.""" diff --git a/tests/tests_pytorch/callbacks/test_finetuning_callback.py b/tests/tests_pytorch/callbacks/test_finetuning_callback.py index 42b721e509..c0ed55f2a1 100644 --- a/tests/tests_pytorch/callbacks/test_finetuning_callback.py +++ b/tests/tests_pytorch/callbacks/test_finetuning_callback.py @@ -19,7 +19,7 @@ from torch import nn from torch.optim import Optimizer, SGD from torch.utils.data import DataLoader -from lightning_fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11, _TORCH_GREATER_EQUAL_1_12 +from lightning_fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_12 from pytorch_lightning import LightningModule, seed_everything, Trainer from pytorch_lightning.callbacks import BackboneFinetuning, BaseFinetuning, ModelCheckpoint from pytorch_lightning.demos.boring_classes import BoringModel, RandomDataset @@ -361,9 +361,8 @@ def test_callbacks_restore(tmpdir): "weight_decay": 0, "nesterov": False, "params": ["layer.3.weight", "layer.3.bias"], + "maximize": False, } - if _TORCH_GREATER_EQUAL_1_11: - expected["maximize"] = False if _TORCH_GREATER_EQUAL_1_12: expected["foreach"] = None if _TORCH_GREATER_EQUAL_1_13: @@ -379,9 +378,8 @@ def test_callbacks_restore(tmpdir): "weight_decay": 0, "nesterov": False, "params": ["layer.0.weight", "layer.0.bias"], + "maximize": False, } - if _TORCH_GREATER_EQUAL_1_11: - expected["maximize"] = False if _TORCH_GREATER_EQUAL_1_12: expected["foreach"] = None if _TORCH_GREATER_EQUAL_1_13: diff --git a/tests/tests_pytorch/callbacks/test_quantization.py b/tests/tests_pytorch/callbacks/test_quantization.py index ee5b3e750f..a6a6f5bd0b 100644 --- a/tests/tests_pytorch/callbacks/test_quantization.py +++ b/tests/tests_pytorch/callbacks/test_quantization.py @@ -17,89 +17,16 @@ from typing import Callable, Union import pytest import torch from torch.quantization import FakeQuantizeBase -from torchmetrics.functional import mean_absolute_percentage_error as mape from pytorch_lightning import seed_everything, Trainer -from pytorch_lightning.accelerators import CUDAAccelerator from pytorch_lightning.callbacks import QuantizationAwareTraining from pytorch_lightning.demos.boring_classes import RandomDataset from pytorch_lightning.utilities.exceptions import MisconfigurationException -from pytorch_lightning.utilities.memory import get_model_size_mb from tests_pytorch.helpers.datamodules import RegressDataModule from tests_pytorch.helpers.runif import RunIf from tests_pytorch.helpers.simple_models import RegressionModel -# todo: [True-False-average] and [False-False-average] fail with 1.12 -# error: assert False (tensor(0.3262), tensor(0.8754), atol=0.45) -@pytest.mark.parametrize("observe", ["average", "histogram"]) -@pytest.mark.parametrize("fuse", [True, False]) -@pytest.mark.parametrize("convert", [True, False]) -@RunIf(quantization=True, sklearn=True, max_torch="1.11") -def test_quantization(tmpdir, observe: str, fuse: bool, convert: bool): - """Parity test for quant model.""" - cuda_available = CUDAAccelerator.is_available() - - if observe == "average" and not fuse and CUDAAccelerator.is_available(): - pytest.xfail("TODO: flakiness in GPU CI") - - seed_everything(42) - dm = RegressDataModule() - accelerator = "gpu" if cuda_available else "cpu" - trainer_args = dict(default_root_dir=tmpdir, max_epochs=7, accelerator=accelerator, devices=1) - model = RegressionModel() - qmodel = copy.deepcopy(model) - - trainer = Trainer(**trainer_args) - trainer.fit(model, datamodule=dm) - org_size = get_model_size_mb(model) - org_score = torch.mean(torch.tensor([mape(model(x), y) for x, y in dm.test_dataloader()])) - - fusing_layers = [(f"layer_{i}", f"layer_{i}a") for i in range(3)] if fuse else None - qcb = QuantizationAwareTraining( - observer_type=observe, - modules_to_fuse=fusing_layers, - quantize_on_fit_end=convert, - observer_enabled_stages=("train", "validate"), - ) - trainer = Trainer(callbacks=[qcb], **trainer_args) - trainer.fit(qmodel, datamodule=dm) - - quant_calls = qcb._forward_calls - assert quant_calls == qcb._forward_calls - quant_score = torch.mean(torch.tensor([mape(qmodel(x), y) for x, y in dm.test_dataloader()])) - # test that the test score is almost the same as with pure training - assert torch.allclose(org_score, quant_score, atol=0.45) - model_path = trainer.checkpoint_callback.best_model_path - curr_epoch = trainer.current_epoch - - trainer_args.update(dict(max_epochs=1, enable_checkpointing=False)) - if not convert: - trainer = Trainer(callbacks=[QuantizationAwareTraining()], **trainer_args) - trainer.fit(qmodel, datamodule=dm) - qmodel.eval() - torch.quantization.convert(qmodel, inplace=True) - - quant_size = get_model_size_mb(qmodel) - # test that the trained model is smaller then initial - size_ratio = quant_size / org_size - assert size_ratio < 0.65 - - # todo: make it work also with strict loading - qmodel2 = RegressionModel.load_from_checkpoint(model_path, strict=False) - quant2_score = torch.mean(torch.tensor([mape(qmodel2(x), y) for x, y in dm.test_dataloader()])) - assert torch.allclose(org_score, quant2_score, atol=0.47) - - # test without and with QAT callback - trainer_args.update(max_epochs=curr_epoch + 1) - qmodel2 = RegressionModel() - trainer = Trainer(callbacks=[QuantizationAwareTraining()], **trainer_args) - trainer.fit(qmodel2, datamodule=dm, ckpt_path=model_path) - quant2_score = torch.mean(torch.tensor([mape(qmodel2(x), y) for x, y in dm.test_dataloader()])) - # test that the test score is almost the same as with pure training - assert torch.allclose(org_score, quant2_score, atol=0.45) - - @RunIf(quantization=True, sklearn=True) def test_quantize_torchscript(tmpdir): """Test converting to torchscipt.""" diff --git a/tests/tests_pytorch/core/test_lightning_module.py b/tests/tests_pytorch/core/test_lightning_module.py index d96578b597..64fa483d33 100644 --- a/tests/tests_pytorch/core/test_lightning_module.py +++ b/tests/tests_pytorch/core/test_lightning_module.py @@ -21,7 +21,6 @@ from torch import nn from torch.optim import Adam, SGD from lightning_fabric import Fabric -from lightning_fabric.utilities.imports import _TORCH_GREATER_EQUAL_1_11 from pytorch_lightning import LightningModule, Trainer from pytorch_lightning.core.module import _TrainerFabricShim from pytorch_lightning.demos.boring_classes import BoringModel @@ -315,10 +314,7 @@ def test_device_placement(tmpdir, accelerator, device): @RunIf(skip_windows=True) def test_sharded_tensor_state_dict(single_process_pg): - if _TORCH_GREATER_EQUAL_1_11: - from torch.distributed._shard.sharded_tensor import empty as sharded_tensor_empty - else: - from torch.distributed._sharded_tensor import empty as sharded_tensor_empty + from torch.distributed._shard.sharded_tensor import empty as sharded_tensor_empty from torch.distributed._sharding_spec import ChunkShardingSpec class BoringModelWithShardedTensor(BoringModel): @@ -336,7 +332,7 @@ def test_sharded_tensor_state_dict(single_process_pg): m_0 = BoringModelWithShardedTensor(spec) m_0.sharded_tensor.local_shards()[0].tensor.fill_(1) - name_st = ".sharded_tensor" if _TORCH_GREATER_EQUAL_1_11 and not _TORCH_GREATER_EQUAL_1_13 else "sharded_tensor" + name_st = ".sharded_tensor" if not _TORCH_GREATER_EQUAL_1_13 else "sharded_tensor" assert name_st in m_0.state_dict(), 'Expect "sharded_tensor" to appear in the state dict' m_1 = BoringModelWithShardedTensor(spec) diff --git a/tests/tests_pytorch/strategies/test_ddp.py b/tests/tests_pytorch/strategies/test_ddp.py index 529b9ff21c..62cdaffeed 100644 --- a/tests/tests_pytorch/strategies/test_ddp.py +++ b/tests/tests_pytorch/strategies/test_ddp.py @@ -95,7 +95,7 @@ def test_ddp_torch_dist_is_available_in_setup(_, __, cuda_count_1, tmpdir): trainer.fit(model) -@RunIf(min_cuda_gpus=2, min_torch="1.8.1", standalone=True) +@RunIf(min_cuda_gpus=2, standalone=True) @pytest.mark.parametrize("precision", (16, 32)) def test_ddp_wrapper(tmpdir, precision): """Test parameters to ignore are carried over for DDP.""" diff --git a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py index 8b55356ac6..e24cb90da6 100644 --- a/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py +++ b/tests/tests_pytorch/trainer/connectors/test_accelerator_connector.py @@ -667,7 +667,7 @@ def test_parallel_devices_in_strategy_confilict_with_accelerator(parallel_device Trainer(strategy=DDPStrategy(parallel_devices=parallel_devices), accelerator=accelerator) -@pytest.mark.parametrize("deterministic", [True, False, pytest.param("warn", marks=RunIf(min_torch="1.11.0"))]) +@pytest.mark.parametrize("deterministic", [True, False, "warn"]) def test_deterministic_init(deterministic): trainer = Trainer(accelerator="auto", deterministic=deterministic) assert trainer._accelerator_connector.deterministic == deterministic