name: Docker builds on: push: branches: [master, "release/*"] pull_request: branches: [master, "release/*"] types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped paths: - ".actions/*" - ".github/workflows/docker-build.yml" - "dockers/**" - "requirements/*.txt" - "requirements/pytorch/**" - "requirements/fabric/**" - "setup.py" - "!requirements/*/docs.txt" - "!*.md" - "!**/*.md" schedule: - cron: "0 0 * * *" # at the end of every day release: types: [published] workflow_dispatch: {} concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}-${{ github.event_name }} cancel-in-progress: ${{ github.event_name == 'pull_request' }} env: PUSH_NIGHTLY: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} PUSH_RELEASE: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'release' }} jobs: build-pl: # the images generated by this job are not used anywhere in this repository. they are just meant to be available # for users if: github.event.pull_request.draft == false runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: # We only release one docker image per PyTorch version. # Make sure the matrix here matches the one below. - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } - { python_version: "3.10", pytorch_version: "2.2", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } steps: - uses: actions/checkout@v4 with: submodules: true - uses: docker/setup-buildx-action@v3 - uses: docker/login-action@v3 if: env.PUSH_RELEASE == 'true' && github.repository_owner == 'Lightning-AI' with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - name: Get release version if: github.event_name == 'release' # For workflows triggered by release, `GITHUB_REF` is the release tag created. run: echo "RELEASE_VERSION=$(echo ${GITHUB_REF##*/})" >> $GITHUB_ENV - name: Set tags run: | import os repo = "pytorchlightning/pytorch_lightning" ver = os.getenv('RELEASE_VERSION') py_ver = "${{ matrix.python_version }}" pt_ver = "${{ matrix.pytorch_version }}" cuda_ver = "${{ matrix.cuda_version }}" tags = [f"latest-py{py_ver}-torch{pt_ver}-cuda{cuda_ver}"] if ver: tags += [f"{ver}-py{py_ver}-torch{pt_ver}-cuda{cuda_ver}"] if py_ver == '3.10' and pt_ver == '2.1' and cuda_ver == '12.1.0': tags += ["latest"] tags = [f"{repo}:{tag}" for tag in tags] with open(os.getenv('GITHUB_ENV'), "a") as gh_env: gh_env.write("DOCKER_TAGS=" + ",".join(tags)) shell: python - uses: docker/build-push-action@v6 with: build-args: | PYTHON_VERSION=${{ matrix.python_version }} PYTORCH_VERSION=${{ matrix.pytorch_version }} CUDA_VERSION=${{ matrix.cuda_version }} LIGHTNING_VERSION=${{ env.RELEASE_VERSION }} file: dockers/release/Dockerfile push: ${{ env.PUSH_RELEASE }} # pushed in release-docker.yml only when PL is released tags: ${{ env.DOCKER_TAGS }} timeout-minutes: 35 build-cuda: if: github.event.pull_request.draft == false runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: # These are the base images for PL release docker images. # Make sure the matrix here matches the one above. - { python_version: "3.10", pytorch_version: "2.0", cuda_version: "11.8.0" } - { python_version: "3.10", pytorch_version: "2.1", cuda_version: "12.1.0" } - { python_version: "3.10", pytorch_version: "2.2", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.1", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.2", cuda_version: "12.1.0" } - { python_version: "3.11", pytorch_version: "2.3", cuda_version: "12.1.0" } # - { python_version: "3.12", pytorch_version: "2.2", cuda_version: "12.1.0" } # todo: pending on `onnxruntime` steps: - uses: actions/checkout@v4 - uses: docker/setup-buildx-action@v3 - uses: docker/login-action@v3 if: env.PUSH_NIGHTLY == 'true' && github.repository_owner == 'Lightning-AI' with: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - uses: docker/build-push-action@v6 with: build-args: | PYTHON_VERSION=${{ matrix.python_version }} PYTORCH_VERSION=${{ matrix.pytorch_version }} CUDA_VERSION=${{ matrix.cuda_version }} file: dockers/base-cuda/Dockerfile push: ${{ env.PUSH_NIGHTLY }} tags: "pytorchlightning/pytorch_lightning:base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}-cuda${{ matrix.cuda_version }}" timeout-minutes: 95 - uses: ravsamhq/notify-slack-action@v2 if: failure() && env.PUSH_NIGHTLY == 'true' with: status: ${{ job.status }} token: ${{ secrets.GITHUB_TOKEN }} notification_title: ${{ format('CUDA; {0} py{1} for *{2}*', runner.os, matrix.python_version, matrix.pytorch_version) }} message_format: "{emoji} *{workflow}* {status_message}, see <{run_url}|detail>, cc: <@U01A5T7EY9M>" # akihironitta env: SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} build-NGC: if: github.event.pull_request.draft == false # fixme: use larger machine or optimize image size # runs-on: ubuntu-latest-4-cores # then drop continue-on-error runs-on: ubuntu-latest steps: - name: Checkout uses: actions/checkout@v4 - name: Build Conda Docker # publish master/release continue-on-error: true uses: docker/build-push-action@v6 with: file: dockers/nvidia/Dockerfile push: false timeout-minutes: 55