diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index c185060b75..2554ab3d67 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -59,16 +59,10 @@ jobs: strategy: matrix: "Fabric | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" - IS_NIGHTLY: "false" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0" PACKAGE_NAME: "fabric" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" - IS_NIGHTLY: "false" - PACKAGE_NAME: "lightning" - "Lightning | RC": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" - IS_NIGHTLY: "true" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0" PACKAGE_NAME: "lightning" workspace: clean: all @@ -87,7 +81,6 @@ jobs: echo $CUDA_VISIBLE_DEVICES echo $CUDA_VERSION_MM echo $TORCH_URL - echo $(IS_NIGHTLY) echo $COVERAGE_SOURCE whereis nvidia nvidia-smi @@ -104,8 +97,6 @@ jobs: for fpath in `ls requirements/**/*.txt`; do \ python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \ done - # without succeeded this could run even if the job has already failed - condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'false')) displayName: "Adjust dependencies" - bash: | @@ -113,14 +104,6 @@ jobs: pip install -e ".[${extra}dev]" pytest-timeout -U --find-links ${TORCH_URL} displayName: "Install package & dependencies" - - bash: | - pip uninstall -y torch torchvision - pip install torch torchvision -U --pre --no-cache --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM%} - python -c "from torch import __version__ as ver; assert ver.startswith('2.1.0'), ver" - # without succeeded this could run even if the job has already failed - condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'true')) - displayName: "Bump to RC" - - bash: | set -e python requirements/collect_env_details.py diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 1a17252924..95c760328e 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -51,16 +51,10 @@ jobs: strategy: matrix: "PyTorch | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" - IS_NIGHTLY: "false" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0" PACKAGE_NAME: "pytorch" "Lightning | latest": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" - IS_NIGHTLY: "false" - PACKAGE_NAME: "lightning" - "Lightning | RC": - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.8.0" - IS_NIGHTLY: "true" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.1-cuda12.1.0" PACKAGE_NAME: "lightning" pool: lit-rtx-3090 variables: @@ -89,7 +83,6 @@ jobs: echo $CUDA_VISIBLE_DEVICES echo $CUDA_VERSION_MM echo $TORCH_URL - echo $(IS_NIGHTLY) echo $COVERAGE_SOURCE whereis nvidia nvidia-smi @@ -106,8 +99,6 @@ jobs: for fpath in `ls requirements/**/*.txt`; do \ python ./adjust-torch-versions.py $fpath ${PYTORCH_VERSION}; \ done - # without succeeded this could run even if the job has already failed - condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'false')) displayName: "Adjust dependencies" - bash: | @@ -122,14 +113,6 @@ jobs: pip install -e ".[${extra}dev]" -r requirements/_integrations/strategies.txt pytest-timeout -U --find-links ${TORCH_URL} displayName: "Install package & dependencies" - - bash: | - pip uninstall -y torch torchvision - pip install torch torchvision -U --pre --no-cache --index-url https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM%} - python -c "from torch import __version__ as ver; assert ver.startswith('2.1.0'), ver" - # without succeeded this could run even if the job has already failed - condition: and(succeeded(), eq(variables.IS_NIGHTLY, 'true')) - displayName: "Bump to RC" - - bash: pip uninstall -y lightning # without succeeded this could run even if the job has already failed condition: and(succeeded(), eq(variables['PACKAGE_NAME'], 'pytorch')) diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml index 9b660639f9..a8bbe1d341 100644 --- a/.github/checkgroup.yml +++ b/.github/checkgroup.yml @@ -149,13 +149,13 @@ subprojects: - "build-cuda (3.9, 1.13, 11.8.0)" - "build-cuda (3.9, 1.13, 12.0.1)" - "build-cuda (3.10, 2.0, 11.8.0)" - - "build-cuda (3.10, 2.0, 12.0.1)" + - "build-cuda (3.10, 2.1, 12.1.0)" #- "build-NGC" - "build-pl (3.9, 1.12, 11.7.1)" - "build-pl (3.9, 1.13, 11.8.0)" - "build-pl (3.9, 1.13, 12.0.1)" - "build-pl (3.10, 2.0, 11.8.0)" - - "build-pl (3.10, 2.0, 12.0.1)" + - "build-pl (3.10, 2.1, 12.1.0)" # SECTIONS: lightning_data diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index 1c1b8e99d5..4aba32fbdf 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -47,7 +47,7 @@ jobs: - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" } - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" } + - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 with: @@ -108,7 +108,7 @@ jobs: - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.8.0" } - { python_version: "3.9", pytorch_version: "1.13", cuda_version: "12.0.1" } - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "12.0.1" } + - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 5fcaec5ffb..dd89692dd7 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -19,8 +19,8 @@ ARG CUDA_VERSION=11.7.1 FROM nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} ARG PYTHON_VERSION=3.10 -ARG PYTORCH_VERSION=2.0 -ARG MAX_ALLOWED_NCCL=2.16.2 +ARG PYTORCH_VERSION=2.1 +ARG MAX_ALLOWED_NCCL=2.17.1 SHELL ["/bin/bash", "-c"] # https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/