upgrade gpu workflow

This commit is contained in:
awaelchli 2024-07-20 01:47:56 +02:00
parent 1cd774197d
commit 6165c7719a
5 changed files with 10 additions and 20 deletions

View File

@ -63,7 +63,7 @@ jobs:
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0"
PACKAGE_NAME: "lightning"
"Lightning | future":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.4-cuda12.1.0"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.4-cuda12.4.0"
PACKAGE_NAME: "lightning"
workspace:
clean: all

View File

@ -56,7 +56,7 @@ jobs:
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.3-cuda12.1.0"
PACKAGE_NAME: "lightning"
"Lightning | future":
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.11-torch2.4-cuda12.1.0"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.12-torch2.4-cuda12.4.0"
PACKAGE_NAME: "lightning"
pool: lit-rtx-3090
variables:

View File

@ -138,19 +138,14 @@ subprojects:
- "!*.md"
- "!**/*.md"
checks:
- "build-cuda (3.10, 2.1, 12.1.0)"
- "build-cuda (3.10, 2.2, 12.1.0)"
- "build-cuda (3.11, 2.1, 12.1.0)"
- "build-cuda (3.11, 2.2, 12.1.0)"
- "build-cuda (3.11, 2.3, 12.1.0)"
- "build-cuda (3.11, 2.4, 12.1.0)"
#- "build-NGC"
- "build-pl (3.10, 2.1, 12.1.0)"
- "build-pl (3.10, 2.2, 12.1.0)"
- "build-cuda (3.11, 2.4, 12.4.0)"
- "build-pl (3.11, 2.1, 12.1.0)"
- "build-pl (3.11, 2.2, 12.1.0)"
- "build-pl (3.11, 2.3, 12.1.0)"
- "build-pl (3.11, 2.4, 12.1.0)"
- "build-pl (3.11, 2.4, 12.4.0)"
# SECTION: lightning_fabric

View File

@ -28,14 +28,14 @@ concurrency:
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
env:
PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
PUSH_NIGHTLY: true # ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }}
jobs:
build-pl:
# the images generated by this job are not used anywhere in this repository. they are just meant to be available
# for users
if: github.event.pull_request.draft == false
# if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
strategy:
fail-fast: false
@ -43,12 +43,10 @@ jobs:
include:
# We only release one docker image per PyTorch version.
# Make sure the matrix here matches the one below.
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" }
- { python_version: "3.10", pytorch_version: "2.2", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.0" }
- { python_version: "3.12", pytorch_version: "2.4", cuda_version: "12.4.0" }
steps:
- uses: actions/checkout@v4
with:
@ -97,7 +95,7 @@ jobs:
timeout-minutes: 35
build-cuda:
if: github.event.pull_request.draft == false
# if: github.event.pull_request.draft == false
runs-on: ubuntu-latest
strategy:
fail-fast: false
@ -105,13 +103,10 @@ jobs:
include:
# These are the base images for PL release docker images.
# Make sure the matrix here matches the one above.
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" }
- { python_version: "3.10", pytorch_version: "2.2", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" }
- { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.0" }
# - { python_version: "3.12", pytorch_version: "2.2", cuda_version: "12.1.0" } # todo: pending on `onnxruntime`
- { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.4.0" }
steps:
- uses: actions/checkout@v4
- uses: docker/setup-buildx-action@v3

View File

@ -208,7 +208,7 @@ class TorchCollective(Collective):
@override
def _convert_to_native_op(cls, op: Union[str, ReduceOp, RedOpType]) -> Union[ReduceOp, RedOpType]:
# `ReduceOp` is an empty shell for `RedOpType`, the latter being the actually returned class.
# For example, `ReduceOp.SUM` returns a `RedOpType.SUM`. the only exception is `RedOpType.PREMUL_SUM` where
# For example, `ReduceOp.SUM` returns a `RedOpType.SUM`. The only exception is `RedOpType.PREMUL_SUM` where
# `ReduceOp` is still the desired class, but it's created via a special `_make_nccl_premul_sum` function
if isinstance(op, (ReduceOp, RedOpType)):
return op