Update GPU CI and docker images for PyTorch 2.1 (#18719)

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
Co-authored-by: Jirka <jirka.borovec@seznam.cz>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
Adrian Wälchli 2023-10-06 05:12:37 -07:00 committed by GitHub
parent 5d819c91fb
commit 77eef8aff5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 10 additions and 44 deletions

View File

@ -59,16 +59,10 @@ jobs:
strategy:
matrix:
"Fabric | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
IS_NIGHTLY: "false"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
PACKAGE_NAME: "fabric"
"Lightning | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
IS_NIGHTLY: "false"
PACKAGE_NAME: "lightning"
"Lightning | RC":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
IS_NIGHTLY: "true"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
PACKAGE_NAME: "lightning"
workspace:
clean: all
@ -87,7 +81,6 @@ jobs:
echo $CUDA_VISIBLE_DEVICES
echo $CUDA_VERSION_MM
echo $TORCH_URL
echo $(IS_NIGHTLY)
echo $COVERAGE_SOURCE
whereis nvidia
nvidia-smi
@ -104,8 +97,6 @@ jobs:
for fpath in `ls requirements/**/*.txt`; do \
python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
done
# without succeeded this could run even if the job has already failed
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'false'))
displayName: "Adjust dependencies"
- bash: |
@ -113,14 +104,6 @@ jobs:
pip install -e ".[${extra}dev]" pytest-timeout -U --find-links ${TORCH_URL}
displayName: "Install package & dependencies"
- bash: |
pip uninstall -y torch torchvision
pip install torch torchvision -U --pre --no-cache --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM%}
python -c "from torch import __version__ as ver; assert ver.startswith('2.1.0'), ver"
# without succeeded this could run even if the job has already failed
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'true'))
displayName: "Bump to RC"
- bash: |
set -e
python requirements/collect_env_details.py

View File

@ -51,16 +51,10 @@ jobs:
strategy:
matrix:
"PyTorch | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
IS_NIGHTLY: "false"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
PACKAGE_NAME: "pytorch"
"Lightning | latest":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
IS_NIGHTLY: "false"
PACKAGE_NAME: "lightning"
"Lightning | RC":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0"
IS_NIGHTLY: "true"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0"
PACKAGE_NAME: "lightning"
pool: lit-rtx-3090
variables:
@ -89,7 +83,6 @@ jobs:
echo $CUDA_VISIBLE_DEVICES
echo $CUDA_VERSION_MM
echo $TORCH_URL
echo $(IS_NIGHTLY)
echo $COVERAGE_SOURCE
whereis nvidia
nvidia-smi
@ -106,8 +99,6 @@ jobs:
for fpath in `ls requirements/**/*.txt`; do \
python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \
done
# without succeeded this could run even if the job has already failed
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'false'))
displayName: "Adjust dependencies"
- bash: |
@ -122,14 +113,6 @@ jobs:
pip install -e ".[${extra}dev]" -r requirements/_integrations/strategies.txt pytest-timeout -U --find-links ${TORCH_URL}
displayName: "Install package & dependencies"
- bash: |
pip uninstall -y torch torchvision
pip install torch torchvision -U --pre --no-cache --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM%}
python -c "from torch import __version__ as ver; assert ver.startswith('2.1.0'), ver"
# without succeeded this could run even if the job has already failed
condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'true'))
displayName: "Bump to RC"
- bash: pip uninstall -y lightning
# without succeeded this could run even if the job has already failed
condition: and(succeeded(), eq(variables['PACKAGE_NAME'], 'pytorch'))

View File

@ -149,13 +149,13 @@ subprojects:
- "build-cuda (3.9, 1.13, 11.8.0)"
- "build-cuda (3.9, 1.13, 12.0.1)"
- "build-cuda (3.10, 2.0, 11.8.0)"
- "build-cuda (3.10, 2.0, 12.0.1)"
- "build-cuda (3.10, 2.1, 12.1.0)"
#- "build-NGC"
- "build-pl (3.9, 1.12, 11.7.1)"
- "build-pl (3.9, 1.13, 11.8.0)"
- "build-pl (3.9, 1.13, 12.0.1)"
- "build-pl (3.10, 2.0, 11.8.0)"
- "build-pl (3.10, 2.0, 12.0.1)"
- "build-pl (3.10, 2.1, 12.1.0)"
# SECTIONS: lightning_data

View File

@ -47,7 +47,7 @@ jobs:
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" }
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" }
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" }
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" }
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" }
steps:
- uses: actions/checkout@v4
with:
@ -108,7 +108,7 @@ jobs:
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" }
- { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" }
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" }
- { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" }
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" }
steps:
- uses: actions/checkout@v4
- uses: docker/setup-buildx-action@v3

View File

@ -19,8 +19,8 @@ ARG CUDA_VERSION=11.7.1
FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
ARG PYTHON_VERSION=3.10
ARG PYTORCH_VERSION=2.0
ARG MAX_ALLOWED_NCCL=2.16.2
ARG PYTORCH_VERSION=2.1
ARG MAX_ALLOWED_NCCL=2.17.1
SHELL ["/bin/bash", "-c"]
# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/