CI: validate JSON & fix benchmark (#8567)

* CI: validate JSON

* as GHA

* PT1.8

* 32g

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
This commit is contained in:
Jirka Borovec 2021-07-28 18:09:15 +02:00 committed by GitHub
parent 0a71fe2859
commit 470842f5c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 51 additions and 67 deletions

View File

@ -1,21 +1,19 @@
name: GPU Parity testing
on:
schedule:
- cron: "0 0 * * *" # At the end of every day
schedules:
- cron: "0 0 * * *" # At the end of every day
displayName: Daily midnight benchmark
branches:
include:
- "master"
jobs:
parity-test:
timeoutInMinutes: 120
cancelTimeoutInMinutes: 2
- job: benchmarks
timeoutInMinutes: "90"
cancelTimeoutInMinutes: "2"
pool: gridai-spot-pool
container:
# base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.6"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.8"
options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
workspace:
clean: all

View File

@ -9,19 +9,19 @@ trigger:
- '*'
branches:
include:
- master
- release/*
- refs/tags/*
- "master"
- "release/*"
- "refs/tags/*"
pr:
- master
- release/*
- "master"
- "release/*"
jobs:
- job: pytest
# how long to run the job before automatically cancelling
timeoutInMinutes: 45
timeoutInMinutes: "45"
# how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: 2
cancelTimeoutInMinutes: "2"
pool: gridai-spot-pool
@ -92,14 +92,15 @@ jobs:
testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
condition: succeededOrFailed()
- task: PublishCodeCoverageResults@1
displayName: 'Publish coverage report'
inputs:
codeCoverageTool: 'cobertura'
summaryFileLocation: 'coverage.xml'
reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
condition: succeededOrFailed()
# todo: re-enable after schema check pass, also atm it seems does not have any effect
#- task: PublishCodeCoverageResults@2
# displayName: 'Publish coverage report'
# inputs:
# codeCoverageTool: 'Cobertura'
# summaryFileLocation: 'coverage.xml'
# reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
# testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
# condition: succeededOrFailed()
- script: |
set -e

View File

@ -123,21 +123,6 @@ jobs:
push: false
timeout-minutes: 50
build-nvidia:
runs-on: ubuntu-20.04
# todo: temporarily skip as the base container does not fit to agent
if: false
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Build NVIDIA Docker
uses: docker/build-push-action@v2
with:
file: dockers/nvidia/Dockerfile
push: false
timeout-minutes: 50
build-ipu:
runs-on: ubuntu-20.04
strategy:

24
.github/workflows/ci_schema.yml vendored Normal file
View File

@ -0,0 +1,24 @@
name: CI action schema
on: # Trigger the workflow on push or pull request, but only for the master branch
push: {}
pull_request:
branches: [master, "release/*"]
jobs:
validate-schema:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Install pkg
run: |
pip install check-jsonschema
- name: GH Workflows
run: |
check-jsonschema .github/workflows/*.yml --schemafile "https://json.schemastore.org/github-workflow"
- name: Azure Pipelines
run: |
check-jsonschema .azure-pipelines/*.yml --schemafile "https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.188.1/service-schema.json"

View File

@ -153,30 +153,6 @@ jobs:
tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
timeout-minutes: 55
docker-NVIDIA:
runs-on: ubuntu-20.04
# todo: temporarily skip as the base container does not fit to agent
if: false
steps:
- name: Checkout
uses: actions/checkout@v2
# https://github.com/docker/setup-buildx-action
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
- uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Publish NVIDIA to Docker Hub
uses: docker/build-push-action@v2
with:
file: dockers/nvidia/Dockerfile
tags: nvcr.io/pytorchlightning/pytorch_lightning:latest
timeout-minutes: 55
docker-IPU:
runs-on: ubuntu-20.04
strategy: