163 lines
6.4 KiB
YAML
163 lines
6.4 KiB
YAML
name: Docker builds
|
|
|
|
on:
|
|
push:
|
|
branches: [master, "release/*"]
|
|
pull_request:
|
|
branches: [master, "release/*"]
|
|
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
|
paths:
|
|
- ".actions/*"
|
|
- ".github/workflows/docker-build.yml"
|
|
- "dockers/**"
|
|
- "requirements/*.txt"
|
|
- "requirements/pytorch/**"
|
|
- "requirements/fabric/**"
|
|
- "setup.py"
|
|
- "!requirements/*/docs.txt"
|
|
- "!*.md"
|
|
- "!**/*.md"
|
|
schedule:
|
|
- cron: "0 0 * * *" # at the end of every day
|
|
release:
|
|
types: [published]
|
|
workflow_dispatch: {}
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}-${{ github.event_name }}
|
|
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
|
|
|
|
env:
|
|
PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
|
|
PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }}
|
|
|
|
jobs:
|
|
build-pl:
|
|
# the images generated by this job are not used anywhere in this repository. they are just meant to be available
|
|
# for users
|
|
if: github.event.pull_request.draft == false
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
include:
|
|
# We only release one docker image per PyTorch version.
|
|
# Make sure the matrix here matches the one below.
|
|
- { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" }
|
|
- { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" }
|
|
- { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" }
|
|
- { python_version: "3.11", pytorch_version: "2.4", cuda_version: "12.1.0" }
|
|
- { python_version: "3.12", pytorch_version: "2.5", cuda_version: "12.1.0" }
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
submodules: true
|
|
- uses: docker/setup-buildx-action@v3
|
|
- uses: docker/login-action@v3
|
|
if: env.PUSH_RELEASE == 'true' && github.repository_owner == 'Lightning-AI'
|
|
with:
|
|
username: ${{ secrets.DOCKER_USERNAME }}
|
|
password: ${{ secrets.DOCKER_PASSWORD }}
|
|
|
|
- name: Get release version
|
|
if: github.event_name == 'release'
|
|
# For workflows triggered by release, `GITHUB_REF` is the release tag created.
|
|
run: echo "RELEASE_VERSION=$(echo ${GITHUB_REF##*/})" >> $GITHUB_ENV
|
|
- name: Set tags
|
|
run: |
|
|
import os
|
|
|
|
repo = "pytorchlightning/pytorch_lightning"
|
|
ver = os.getenv('RELEASE_VERSION')
|
|
py_ver = "${{ matrix.python_version }}"
|
|
pt_ver = "${{ matrix.pytorch_version }}"
|
|
cuda_ver = "${{ matrix.cuda_version }}"
|
|
tags = [f"latest-py{py_ver}-torch{pt_ver}-cuda{cuda_ver}"]
|
|
if ver:
|
|
tags += [f"{ver}-py{py_ver}-torch{pt_ver}-cuda{cuda_ver}"]
|
|
if py_ver == '3.11' and pt_ver == '2.3' and cuda_ver == '12.1.0':
|
|
tags += ["latest"]
|
|
|
|
tags = [f"{repo}:{tag}" for tag in tags]
|
|
with open(os.getenv('GITHUB_ENV'), "a") as gh_env:
|
|
gh_env.write("DOCKER_TAGS=" + ",".join(tags))
|
|
shell: python
|
|
|
|
- uses: docker/build-push-action@v6
|
|
with:
|
|
build-args: |
|
|
PYTHON_VERSION=${{ matrix.python_version }}
|
|
PYTORCH_VERSION=${{ matrix.pytorch_version }}
|
|
CUDA_VERSION=${{ matrix.cuda_version }}
|
|
LIGHTNING_VERSION=${{ env.RELEASE_VERSION }}
|
|
file: dockers/release/Dockerfile
|
|
push: ${{ env.PUSH_RELEASE }} # pushed in release-docker.yml only when PL is released
|
|
tags: ${{ env.DOCKER_TAGS }}
|
|
timeout-minutes: 35
|
|
|
|
build-cuda:
|
|
if: github.event.pull_request.draft == false
|
|
runs-on: ubuntu-latest
|
|
strategy:
|
|
fail-fast: false
|
|
matrix:
|
|
include:
|
|
# These are the base images for PL release docker images.
|
|
# Make sure the matrix here matches the one above.
|
|
- { python_version: "3.10", pytorch_version: "2.1.2", cuda_version: "12.1.0" }
|
|
- { python_version: "3.11", pytorch_version: "2.2.2", cuda_version: "12.1.0" }
|
|
- { python_version: "3.11", pytorch_version: "2.3.1", cuda_version: "12.1.0" }
|
|
- { python_version: "3.11", pytorch_version: "2.4.1", cuda_version: "12.1.0" }
|
|
- { python_version: "3.12", pytorch_version: "2.5.1", cuda_version: "12.1.0" }
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
- uses: docker/setup-buildx-action@v3
|
|
- uses: docker/login-action@v3
|
|
if: env.PUSH_NIGHTLY == 'true' && github.repository_owner == 'Lightning-AI'
|
|
with:
|
|
username: ${{ secrets.DOCKER_USERNAME }}
|
|
password: ${{ secrets.DOCKER_PASSWORD }}
|
|
|
|
- name: shorten Torch version
|
|
run: |
|
|
# convert 1.10.2 to 1.10
|
|
pt_version=$(echo ${{ matrix.pytorch_version }} | cut -d. -f1,2)
|
|
echo "PT_VERSION=$pt_version" >> $GITHUB_ENV
|
|
- uses: docker/build-push-action@v6
|
|
with:
|
|
build-args: |
|
|
PYTHON_VERSION=${{ matrix.python_version }}
|
|
PYTORCH_VERSION=${{ matrix.pytorch_version }}
|
|
CUDA_VERSION=${{ matrix.cuda_version }}
|
|
file: dockers/base-cuda/Dockerfile
|
|
push: ${{ env.PUSH_NIGHTLY }}
|
|
tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ env.PT_VERSION }}-cuda${{ matrix.cuda_version }}"
|
|
timeout-minutes: 95
|
|
- uses: ravsamhq/notify-slack-action@v2
|
|
if: failure() && env.PUSH_NIGHTLY == 'true'
|
|
with:
|
|
status: ${{ job.status }}
|
|
token: ${{ secrets.GITHUB_TOKEN }}
|
|
notification_title: ${{ format('CUDA; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }}
|
|
message_format: "{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>" # akihironitta
|
|
env:
|
|
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
|
|
|
|
build-NGC:
|
|
if: github.event.pull_request.draft == false
|
|
# fixme: use larger machine or optimize image size
|
|
# runs-on: ubuntu-latest-4-cores
|
|
# then drop continue-on-error
|
|
runs-on: ubuntu-latest
|
|
steps:
|
|
- name: Checkout
|
|
uses: actions/checkout@v4
|
|
- name: Build Conda Docker
|
|
# publish master/release
|
|
continue-on-error: true
|
|
uses: docker/build-push-action@v6
|
|
with:
|
|
file: dockers/nvidia/Dockerfile
|
|
push: false
|
|
timeout-minutes: 55
|