CI: validate JSON & fix benchmark (#8567)

* CI: validate JSON

* as GHA

* PT1.8

* 32g

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
This commit is contained in:
Jirka Borovec 2021-07-28 18:09:15 +02:00 committed by GitHub
parent 0a71fe2859
commit 470842f5c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 51 additions and 67 deletions

View File

@ -1,21 +1,19 @@
name: GPU Parity testing schedules:
- cron: "0 0 * * *" # At the end of every day
on: displayName: Daily midnight benchmark
schedule: branches:
- cron: "0 0 * * *" # At the end of every day include:
- "master"
jobs: jobs:
parity-test: - job: benchmarks
timeoutInMinutes: 120 timeoutInMinutes: "90"
cancelTimeoutInMinutes: "2"
cancelTimeoutInMinutes: 2
pool: gridai-spot-pool pool: gridai-spot-pool
container: container:
# base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04 # base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.6" image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.8"
options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
workspace: workspace:
clean: all clean: all

View File

@ -9,19 +9,19 @@ trigger:
- '*' - '*'
branches: branches:
include: include:
- master - "master"
- release/* - "release/*"
- refs/tags/* - "refs/tags/*"
pr: pr:
- master - "master"
- release/* - "release/*"
jobs: jobs:
- job: pytest - job: pytest
# how long to run the job before automatically cancelling # how long to run the job before automatically cancelling
timeoutInMinutes: 45 timeoutInMinutes: "45"
# how much time to give 'run always even if cancelled tasks' before stopping them # how much time to give 'run always even if cancelled tasks' before stopping them
cancelTimeoutInMinutes: 2 cancelTimeoutInMinutes: "2"
pool: gridai-spot-pool pool: gridai-spot-pool
@ -92,14 +92,15 @@ jobs:
testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)' testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
condition: succeededOrFailed() condition: succeededOrFailed()
- task: PublishCodeCoverageResults@1 # todo: re-enable after schema check pass, also atm it seems does not have any effect
displayName: 'Publish coverage report' #- task: PublishCodeCoverageResults@2
inputs: # displayName: 'Publish coverage report'
codeCoverageTool: 'cobertura' # inputs:
summaryFileLocation: 'coverage.xml' # codeCoverageTool: 'Cobertura'
reportDirectory: '$(Build.SourcesDirectory)/htmlcov' # summaryFileLocation: 'coverage.xml'
testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)' # reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
condition: succeededOrFailed() # testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
# condition: succeededOrFailed()
- script: | - script: |
set -e set -e

View File

@ -123,21 +123,6 @@ jobs:
push: false push: false
timeout-minutes: 50 timeout-minutes: 50
build-nvidia:
runs-on: ubuntu-20.04
# todo: temporarily skip as the base container does not fit to agent
if: false
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Build NVIDIA Docker
uses: docker/build-push-action@v2
with:
file: dockers/nvidia/Dockerfile
push: false
timeout-minutes: 50
build-ipu: build-ipu:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
strategy: strategy:

24
.github/workflows/ci_schema.yml vendored Normal file
View File

@ -0,0 +1,24 @@
name: CI action schema
on: # Trigger the workflow on push or pull request, but only for the master branch
push: {}
pull_request:
branches: [master, "release/*"]
jobs:
validate-schema:
runs-on: ubuntu-20.04
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Install pkg
run: |
pip install check-jsonschema
- name: GH Workflows
run: |
check-jsonschema .github/workflows/*.yml --schemafile "https://json.schemastore.org/github-workflow"
- name: Azure Pipelines
run: |
check-jsonschema .azure-pipelines/*.yml --schemafile "https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.188.1/service-schema.json"

View File

@ -153,30 +153,6 @@ jobs:
tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
timeout-minutes: 55 timeout-minutes: 55
docker-NVIDIA:
runs-on: ubuntu-20.04
# todo: temporarily skip as the base container does not fit to agent
if: false
steps:
- name: Checkout
uses: actions/checkout@v2
# https://github.com/docker/setup-buildx-action
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
- uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Publish NVIDIA to Docker Hub
uses: docker/build-push-action@v2
with:
file: dockers/nvidia/Dockerfile
tags: nvcr.io/pytorchlightning/pytorch_lightning:latest
timeout-minutes: 55
docker-IPU: docker-IPU:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
strategy: strategy: