CI: validate JSON & fix benchmark (#8567)
* CI: validate JSON * as GHA * PT1.8 * 32g Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
This commit is contained in:
parent
0a71fe2859
commit
470842f5c8
|
@ -1,21 +1,19 @@
|
|||
name: GPU Parity testing
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "0 0 * * *" # At the end of every day
|
||||
schedules:
|
||||
- cron: "0 0 * * *" # At the end of every day
|
||||
displayName: Daily midnight benchmark
|
||||
branches:
|
||||
include:
|
||||
- "master"
|
||||
|
||||
jobs:
|
||||
parity-test:
|
||||
timeoutInMinutes: 120
|
||||
|
||||
cancelTimeoutInMinutes: 2
|
||||
|
||||
- job: benchmarks
|
||||
timeoutInMinutes: "90"
|
||||
cancelTimeoutInMinutes: "2"
|
||||
pool: gridai-spot-pool
|
||||
|
||||
container:
|
||||
# base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
|
||||
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.6"
|
||||
|
||||
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.8"
|
||||
options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
|
||||
workspace:
|
||||
clean: all
|
||||
|
|
@ -9,19 +9,19 @@ trigger:
|
|||
- '*'
|
||||
branches:
|
||||
include:
|
||||
- master
|
||||
- release/*
|
||||
- refs/tags/*
|
||||
- "master"
|
||||
- "release/*"
|
||||
- "refs/tags/*"
|
||||
pr:
|
||||
- master
|
||||
- release/*
|
||||
- "master"
|
||||
- "release/*"
|
||||
|
||||
jobs:
|
||||
- job: pytest
|
||||
# how long to run the job before automatically cancelling
|
||||
timeoutInMinutes: 45
|
||||
timeoutInMinutes: "45"
|
||||
# how much time to give 'run always even if cancelled tasks' before stopping them
|
||||
cancelTimeoutInMinutes: 2
|
||||
cancelTimeoutInMinutes: "2"
|
||||
|
||||
pool: gridai-spot-pool
|
||||
|
||||
|
@ -92,14 +92,15 @@ jobs:
|
|||
testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
|
||||
condition: succeededOrFailed()
|
||||
|
||||
- task: PublishCodeCoverageResults@1
|
||||
displayName: 'Publish coverage report'
|
||||
inputs:
|
||||
codeCoverageTool: 'cobertura'
|
||||
summaryFileLocation: 'coverage.xml'
|
||||
reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
|
||||
testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
|
||||
condition: succeededOrFailed()
|
||||
# todo: re-enable after schema check pass, also atm it seems does not have any effect
|
||||
#- task: PublishCodeCoverageResults@2
|
||||
# displayName: 'Publish coverage report'
|
||||
# inputs:
|
||||
# codeCoverageTool: 'Cobertura'
|
||||
# summaryFileLocation: 'coverage.xml'
|
||||
# reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
|
||||
# testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
|
||||
# condition: succeededOrFailed()
|
||||
|
||||
- script: |
|
||||
set -e
|
||||
|
|
|
@ -123,21 +123,6 @@ jobs:
|
|||
push: false
|
||||
timeout-minutes: 50
|
||||
|
||||
build-nvidia:
|
||||
runs-on: ubuntu-20.04
|
||||
# todo: temporarily skip as the base container does not fit to agent
|
||||
if: false
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Build NVIDIA Docker
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
file: dockers/nvidia/Dockerfile
|
||||
push: false
|
||||
timeout-minutes: 50
|
||||
|
||||
build-ipu:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
name: CI action schema
|
||||
on: # Trigger the workflow on push or pull request, but only for the master branch
|
||||
push: {}
|
||||
pull_request:
|
||||
branches: [master, "release/*"]
|
||||
|
||||
jobs:
|
||||
validate-schema:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install pkg
|
||||
run: |
|
||||
pip install check-jsonschema
|
||||
|
||||
- name: GH Workflows
|
||||
run: |
|
||||
check-jsonschema .github/workflows/*.yml --schemafile "https://json.schemastore.org/github-workflow"
|
||||
|
||||
- name: Azure Pipelines
|
||||
run: |
|
||||
check-jsonschema .azure-pipelines/*.yml --schemafile "https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.188.1/service-schema.json"
|
|
@ -153,30 +153,6 @@ jobs:
|
|||
tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
|
||||
timeout-minutes: 55
|
||||
|
||||
docker-NVIDIA:
|
||||
runs-on: ubuntu-20.04
|
||||
# todo: temporarily skip as the base container does not fit to agent
|
||||
if: false
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
# https://github.com/docker/setup-buildx-action
|
||||
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
|
||||
- uses: docker/setup-buildx-action@v1
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Publish NVIDIA to Docker Hub
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
file: dockers/nvidia/Dockerfile
|
||||
tags: nvcr.io/pytorchlightning/pytorch_lightning:latest
|
||||
timeout-minutes: 55
|
||||
|
||||
docker-IPU:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
|
|
Loading…
Reference in New Issue