CI: validate JSON & fix benchmark (#8567)
* CI: validate JSON * as GHA * PT1.8 * 32g Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
This commit is contained in:
parent
0a71fe2859
commit
470842f5c8
|
@ -1,21 +1,19 @@
|
||||||
name: GPU Parity testing
|
schedules:
|
||||||
|
- cron: "0 0 * * *" # At the end of every day
|
||||||
on:
|
displayName: Daily midnight benchmark
|
||||||
schedule:
|
branches:
|
||||||
- cron: "0 0 * * *" # At the end of every day
|
include:
|
||||||
|
- "master"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
parity-test:
|
- job: benchmarks
|
||||||
timeoutInMinutes: 120
|
timeoutInMinutes: "90"
|
||||||
|
cancelTimeoutInMinutes: "2"
|
||||||
cancelTimeoutInMinutes: 2
|
|
||||||
|
|
||||||
pool: gridai-spot-pool
|
pool: gridai-spot-pool
|
||||||
|
|
||||||
container:
|
container:
|
||||||
# base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
|
# base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
|
||||||
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.6"
|
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.8"
|
||||||
|
options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
|
||||||
workspace:
|
workspace:
|
||||||
clean: all
|
clean: all
|
||||||
|
|
|
@ -9,19 +9,19 @@ trigger:
|
||||||
- '*'
|
- '*'
|
||||||
branches:
|
branches:
|
||||||
include:
|
include:
|
||||||
- master
|
- "master"
|
||||||
- release/*
|
- "release/*"
|
||||||
- refs/tags/*
|
- "refs/tags/*"
|
||||||
pr:
|
pr:
|
||||||
- master
|
- "master"
|
||||||
- release/*
|
- "release/*"
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
- job: pytest
|
- job: pytest
|
||||||
# how long to run the job before automatically cancelling
|
# how long to run the job before automatically cancelling
|
||||||
timeoutInMinutes: 45
|
timeoutInMinutes: "45"
|
||||||
# how much time to give 'run always even if cancelled tasks' before stopping them
|
# how much time to give 'run always even if cancelled tasks' before stopping them
|
||||||
cancelTimeoutInMinutes: 2
|
cancelTimeoutInMinutes: "2"
|
||||||
|
|
||||||
pool: gridai-spot-pool
|
pool: gridai-spot-pool
|
||||||
|
|
||||||
|
@ -92,14 +92,15 @@ jobs:
|
||||||
testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
|
testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
|
||||||
condition: succeededOrFailed()
|
condition: succeededOrFailed()
|
||||||
|
|
||||||
- task: PublishCodeCoverageResults@1
|
# todo: re-enable after schema check pass, also atm it seems does not have any effect
|
||||||
displayName: 'Publish coverage report'
|
#- task: PublishCodeCoverageResults@2
|
||||||
inputs:
|
# displayName: 'Publish coverage report'
|
||||||
codeCoverageTool: 'cobertura'
|
# inputs:
|
||||||
summaryFileLocation: 'coverage.xml'
|
# codeCoverageTool: 'Cobertura'
|
||||||
reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
|
# summaryFileLocation: 'coverage.xml'
|
||||||
testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
|
# reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
|
||||||
condition: succeededOrFailed()
|
# testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
|
||||||
|
# condition: succeededOrFailed()
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
set -e
|
set -e
|
||||||
|
|
|
@ -123,21 +123,6 @@ jobs:
|
||||||
push: false
|
push: false
|
||||||
timeout-minutes: 50
|
timeout-minutes: 50
|
||||||
|
|
||||||
build-nvidia:
|
|
||||||
runs-on: ubuntu-20.04
|
|
||||||
# todo: temporarily skip as the base container does not fit to agent
|
|
||||||
if: false
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
|
|
||||||
- name: Build NVIDIA Docker
|
|
||||||
uses: docker/build-push-action@v2
|
|
||||||
with:
|
|
||||||
file: dockers/nvidia/Dockerfile
|
|
||||||
push: false
|
|
||||||
timeout-minutes: 50
|
|
||||||
|
|
||||||
build-ipu:
|
build-ipu:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-20.04
|
||||||
strategy:
|
strategy:
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
name: CI action schema
|
||||||
|
on: # Trigger the workflow on push or pull request, but only for the master branch
|
||||||
|
push: {}
|
||||||
|
pull_request:
|
||||||
|
branches: [master, "release/*"]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
validate-schema:
|
||||||
|
runs-on: ubuntu-20.04
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
|
||||||
|
- name: Install pkg
|
||||||
|
run: |
|
||||||
|
pip install check-jsonschema
|
||||||
|
|
||||||
|
- name: GH Workflows
|
||||||
|
run: |
|
||||||
|
check-jsonschema .github/workflows/*.yml --schemafile "https://json.schemastore.org/github-workflow"
|
||||||
|
|
||||||
|
- name: Azure Pipelines
|
||||||
|
run: |
|
||||||
|
check-jsonschema .azure-pipelines/*.yml --schemafile "https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.188.1/service-schema.json"
|
|
@ -153,30 +153,6 @@ jobs:
|
||||||
tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
|
tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
|
||||||
timeout-minutes: 55
|
timeout-minutes: 55
|
||||||
|
|
||||||
docker-NVIDIA:
|
|
||||||
runs-on: ubuntu-20.04
|
|
||||||
# todo: temporarily skip as the base container does not fit to agent
|
|
||||||
if: false
|
|
||||||
steps:
|
|
||||||
- name: Checkout
|
|
||||||
uses: actions/checkout@v2
|
|
||||||
|
|
||||||
# https://github.com/docker/setup-buildx-action
|
|
||||||
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
|
|
||||||
- uses: docker/setup-buildx-action@v1
|
|
||||||
- name: Login to DockerHub
|
|
||||||
uses: docker/login-action@v1
|
|
||||||
with:
|
|
||||||
username: ${{ secrets.DOCKER_USERNAME }}
|
|
||||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
|
||||||
|
|
||||||
- name: Publish NVIDIA to Docker Hub
|
|
||||||
uses: docker/build-push-action@v2
|
|
||||||
with:
|
|
||||||
file: dockers/nvidia/Dockerfile
|
|
||||||
tags: nvcr.io/pytorchlightning/pytorch_lightning:latest
|
|
||||||
timeout-minutes: 55
|
|
||||||
|
|
||||||
docker-IPU:
|
docker-IPU:
|
||||||
runs-on: ubuntu-20.04
|
runs-on: ubuntu-20.04
|
||||||
strategy:
|
strategy:
|
||||||
|
|
Loading…
Reference in New Issue