diff --git a/.github/workflows/ci_test-tpu.yml b/.github/workflows/ci_test-tpu.yml
deleted file mode 100644
index 22bb7bd7cd..0000000000
--- a/.github/workflows/ci_test-tpu.yml
+++ /dev/null
@@ -1,144 +0,0 @@
-name: TPU tests
-
-on:
- push:
- branches: [master, "release/*"]
-# TODO: temporal disable TPU testing until we find way how to pass credentials to forked PRs
-# pull_request:
-# branches:
-# - master
-
-env:
- GKE_CLUSTER: lightning-cluster
- GKE_ZONE: us-central1-a
- IMAGE: gcr.io/${{ secrets.GKE_PROJECT }}/tpu-testing-image
- MAX_CHECKS: 360
- CHECK_SPEEP: 5
-
-jobs:
- setup-build-publish-deploy:
- name: tpu-testing-job
- runs-on: ubuntu-20.04
- strategy:
- fail-fast: false
- matrix:
- python-version: [3.7]
- xla-version: [1.6, 1.8]
- # Timeout: https://stackoverflow.com/a/59076067/4521646
- timeout-minutes: 50
-
- steps:
- - name: Set IMAGETAG
- run: echo "IMAGETAG=$(date +%s)_${{ matrix.python-version }}" >> $GITHUB_ENV
- - name: Install Go
- uses: actions/setup-go@v2
- with:
- go-version: 1.14.x
- - name: Set up Python 3.7
- uses: actions/setup-python@v2
- with:
- python-version: 3.7
-
- - name: Checkout Pytorch Lightning
- uses: actions/checkout@v2
- with:
- repository: PyTorchLightning/pytorch-lightning
- ref: ${{ github.event.pull_request.head.sha }}
-
- - name: Checkout ml-testing-accelerators
- uses: actions/checkout@v2
- with:
- repository: GoogleCloudPlatform/ml-testing-accelerators
- path: ml-testing-accelerators
- ref: 5e88ac24f631c27045e62f0e8d5dfcf34e425e25
-
- - name: Setup gcloud CLI
- uses: GoogleCloudPlatform/github-actions/setup-gcloud@master
- with:
- version: '290.0.1'
- service_account_key: ${{ secrets.GKE_SA_KEY_BASE64 }}
- project_id: ${{ secrets.GKE_PROJECT }}
- export_default_credentials: true
-
- # Configure Docker to use the gcloud command-line tool as a credential helper for authentication.
- - name: Configure Docker
- run: |-
- gcloud --quiet auth configure-docker
- shell: bash
- - name: Build and Push Docker Image
- env:
- PYTHON_VER: ${{ matrix.python-version }}
- XLA_VER: ${{ matrix.xla-version }}
- run: |
- #cd dockers/tpu-tests
- docker build --tag "$IMAGE:$IMAGETAG" -f ./dockers/tpu-tests/Dockerfile --build-arg "PYTHON_VERSION=$PYTHON_VER" --build-arg "PYTORCH_VERSION=$XLA_VER" .
- docker push "$IMAGE:$IMAGETAG"
- shell: bash
-
- - name: Install jsonnet
- run: |-
- go get github.com/google/go-jsonnet/cmd/jsonnet
- shell: bash
- # Get the GKE credentials so we can deploy to the cluster
- # Use either zone or region depending on cluster setup.
- - run: |-
- gcloud container clusters get-credentials "$GKE_CLUSTER" --zone "$GKE_ZONE"
- shell: bash
-
- - name: Deploy the job on the kubernetes cluster
- env:
- XLA_VER: ${{ matrix.xla-version }}
- run: |-
- python -c "fname = 'dockers/tpu-tests/tpu_test_cases.jsonnet' ; ttt = open(fname).read().replace('pytorch-VERSION', 'pytorch-$XLA_VER') ; open(fname, 'w').write(ttt)"
- job_name=$(jsonnet -J ml-testing-accelerators/ dockers/tpu-tests/tpu_test_cases.jsonnet --ext-str image=$IMAGE --ext-str image-tag=$IMAGETAG | kubectl create -f -) && \
- job_name=${job_name#job.batch/} && \
- job_name=${job_name% created} && \
- echo "Waiting on kubernetes job: $job_name in cluster: $GKE_CLUSTER" && \
- i=0 && \
- # 60 checks spaced 30s apart = 900s total.
- status_code=2 && \
- # Check on the job periodically. Set the status code depending on what
- # happened to the job in Kubernetes. If we try MAX_CHECKS times and
- # still the job hasn't finished, give up and return the starting
- # non-zero status code.
- printf "Waiting for job to finish: " && \
- while [ $i -lt $MAX_CHECKS ]; do ((i++)); if kubectl get jobs $job_name -o jsonpath='Failed:{.status.failed}' | grep "Failed:1"; then status_code=1 && break; elif kubectl get jobs $job_name -o jsonpath='Succeeded:{.status.succeeded}' | grep "Succeeded:1" ; then status_code=0 && break; else printf "." ; fi; sleep $CHECK_SPEEP; done && \
- echo "Done waiting. Job status code: $status_code" && \
- pod_name=$(kubectl get po -l controller-uid=`kubectl get job $job_name -o "jsonpath={.metadata.labels.controller-uid}"` | awk 'match($0,!/NAME/) {print $1}') && \
- echo "GKE pod name: $pod_name" && \
- kubectl logs -f $pod_name --container=train > /tmp/full_output.txt
- if grep -q '' /tmp/full_output.txt ; then csplit /tmp/full_output.txt '//'; else mv /tmp/full_output.txt xx00; fi && \
- # First portion is the test logs. Print these to Github Action stdout.
- cat xx00 && \
- echo "Done with log retrieval attempt." && \
- gcloud container images delete "$IMAGE:$IMAGETAG" --force-delete-tags && \
- echo "Status code: $status_code"
- exit $status_code
- shell: bash
-
- - name: Statistics
- if: success()
- run: |
- mv ./xx01 coverage
- # TODO: add human readable report
- cat coverage
- # sudo pip install pycobertura
- # pycobertura show coverage.xml
-
- - name: Upload coverage results
- uses: actions/upload-artifact@v2
- with:
- name: coverage-TPU
- path: coverage
-
- - name: Upload coverage to Codecov
- uses: codecov/codecov-action@v1
- # see: https://github.com/actions/toolkit/issues/399
- continue-on-error: true
- if: always()
- with:
- token: ${{ secrets.CODECOV_TOKEN }}
- file: coverage
- flags: tpu,pytest
- name: TPU-coverage
- fail_ci_if_error: true
diff --git a/README.md b/README.md
index d31d652850..c80995293a 100644
--- a/README.md
+++ b/README.md
@@ -78,14 +78,14 @@ Lightning is rigorously tested across multiple GPUs, TPUs CPUs and against major
-| System / PyTorch ver. | 1.6 (min. req.) | 1.7 | 1.8 (LTS) | 1.9 (latest) | 1.10 (nightly) |
-| :----------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| Conda py3.7 \[linux\] | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) |
-| Linux py3.7 \[GPUs\*\*\] | - | - | [![Build Status]()](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - |
-| Linux py3.{6,7} \[TPUs\*\*\*\] | [![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22TPU+tests%22+branch%3Amaster) | - | [![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22TPU+tests%22+branch%3Amaster) | - | - |
-| Linux py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - |
-| OSX py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - |
-| Windows py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - |
+| System / PyTorch ver. | 1.6 (min. req.) | 1.7 | 1.8 (LTS) | 1.9 (latest) | 1.10 (nightly) |
+| :------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| Conda py3.7 \[linux\] | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) |
+| Linux py3.7 \[GPUs\*\*\] | - | - | [![Build Status]()](https://dev.azure.com/PytorchLightning/pytorch-lightning/_build/latest?definitionId=6&branchName=master) | - | - |
+| Linux py3.7 \[TPUs\*\*\*\] | - | - | [![CircleCI](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master.svg?style=svg)](https://circleci.com/gh/PyTorchLightning/pytorch-lightning/tree/master) | - | - |
+| Linux py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - |
+| OSX py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - |
+| Windows py3.{6,7,8,9} | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg?branch=master&event=push)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - |
- _\*\* tests run on two NVIDIA P100_
- _\*\*\* tests run on Google GKE TPUv2/3_