diff --git a/.azure-pipelines/gpu_benchmark.yml b/.azure-pipelines/gpu-benchmark.yml similarity index 56% rename from .azure-pipelines/gpu_benchmark.yml rename to .azure-pipelines/gpu-benchmark.yml index 4ca1b531f8..d8c644f458 100644 --- a/.azure-pipelines/gpu_benchmark.yml +++ b/.azure-pipelines/gpu-benchmark.yml @@ -1,21 +1,19 @@ -name: GPU Parity testing - -on: - schedule: - - cron: "0 0 * * *" # At the end of every day +schedules: + - cron: "0 0 * * *" # At the end of every day + displayName: Daily midnight benchmark + branches: + include: + - "master" jobs: - parity-test: - timeoutInMinutes: 120 - - cancelTimeoutInMinutes: 2 - + - job: benchmarks + timeoutInMinutes: "90" + cancelTimeoutInMinutes: "2" pool: gridai-spot-pool - container: # base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04 - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.6" - + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.8" + options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g" workspace: clean: all diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index edeba51a2e..994bddcda4 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -9,19 +9,19 @@ trigger: - '*' branches: include: - - master - - release/* - - refs/tags/* + - "master" + - "release/*" + - "refs/tags/*" pr: - - master - - release/* + - "master" + - "release/*" jobs: - job: pytest # how long to run the job before automatically cancelling - timeoutInMinutes: 45 + timeoutInMinutes: "45" # how much time to give 'run always even if cancelled tasks' before stopping them - cancelTimeoutInMinutes: 2 + cancelTimeoutInMinutes: "2" pool: gridai-spot-pool @@ -92,14 +92,15 @@ jobs: testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)' condition: succeededOrFailed() - - task: PublishCodeCoverageResults@1 - displayName: 'Publish coverage report' - inputs: - codeCoverageTool: 'cobertura' - summaryFileLocation: 'coverage.xml' - reportDirectory: '$(Build.SourcesDirectory)/htmlcov' - testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)' - condition: succeededOrFailed() + # todo: re-enable after schema check pass, also atm it seems does not have any effect + #- task: PublishCodeCoverageResults@2 + # displayName: 'Publish coverage report' + # inputs: + # codeCoverageTool: 'Cobertura' + # summaryFileLocation: 'coverage.xml' + # reportDirectory: '$(Build.SourcesDirectory)/htmlcov' + # testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)' + # condition: succeededOrFailed() - script: | set -e diff --git a/.github/workflows/ci_dockers.yml b/.github/workflows/ci_dockers.yml index 6efb4175bb..440460219b 100644 --- a/.github/workflows/ci_dockers.yml +++ b/.github/workflows/ci_dockers.yml @@ -123,21 +123,6 @@ jobs: push: false timeout-minutes: 50 - build-nvidia: - runs-on: ubuntu-20.04 - # todo: temporarily skip as the base container does not fit to agent - if: false - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Build NVIDIA Docker - uses: docker/build-push-action@v2 - with: - file: dockers/nvidia/Dockerfile - push: false - timeout-minutes: 50 - build-ipu: runs-on: ubuntu-20.04 strategy: diff --git a/.github/workflows/ci_schema.yml b/.github/workflows/ci_schema.yml new file mode 100644 index 0000000000..51c4400666 --- /dev/null +++ b/.github/workflows/ci_schema.yml @@ -0,0 +1,24 @@ +name: CI action schema +on: # Trigger the workflow on push or pull request, but only for the master branch + push: {} + pull_request: + branches: [master, "release/*"] + +jobs: + validate-schema: + runs-on: ubuntu-20.04 + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Install pkg + run: | + pip install check-jsonschema + + - name: GH Workflows + run: | + check-jsonschema .github/workflows/*.yml --schemafile "https://json.schemastore.org/github-workflow" + + - name: Azure Pipelines + run: | + check-jsonschema .azure-pipelines/*.yml --schemafile "https://raw.githubusercontent.com/microsoft/azure-pipelines-vscode/v1.188.1/service-schema.json" diff --git a/.github/workflows/events-nightly.yml b/.github/workflows/events-nightly.yml index 16489bb1f3..8fc763e79a 100644 --- a/.github/workflows/events-nightly.yml +++ b/.github/workflows/events-nightly.yml @@ -153,30 +153,6 @@ jobs: tags: pytorchlightning/pytorch_lightning:base-conda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }} timeout-minutes: 55 - docker-NVIDIA: - runs-on: ubuntu-20.04 - # todo: temporarily skip as the base container does not fit to agent - if: false - steps: - - name: Checkout - uses: actions/checkout@v2 - - # https://github.com/docker/setup-buildx-action - # Set up Docker Buildx - to use cache-from and cache-to argument of buildx command - - uses: docker/setup-buildx-action@v1 - - name: Login to DockerHub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} - - - name: Publish NVIDIA to Docker Hub - uses: docker/build-push-action@v2 - with: - file: dockers/nvidia/Dockerfile - tags: nvcr.io/pytorchlightning/pytorch_lightning:latest - timeout-minutes: 55 - docker-IPU: runs-on: ubuntu-20.04 strategy: