From fcfa7fabbf75ce6ffb5f23f6a40a184751e87e86 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Tue, 16 Feb 2021 19:01:22 +0100 Subject: [PATCH] move TPU cleaning to GH actions (#5991) * move TPU cleaning to GH actions * test * . --- .circleci/config.yml | 37 ----------------------- .github/workflows/ci_test-tpu.yml | 1 - .github/workflows/events-recurent.yml | 42 +++++++++++++++++++++++++++ 3 files changed, 42 insertions(+), 38 deletions(-) create mode 100644 .github/workflows/events-recurent.yml diff --git a/.circleci/config.yml b/.circleci/config.yml index 74d8695cec..c95f89ec36 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -85,19 +85,6 @@ references: sudo pip install pycobertura pycobertura show coverage.xml - delete_gke_jobs: &delete_gke_jobs - run: - name: Delete GKE Jobs - command: | - # Match jobs whose age matches patterns like '1h' or '1d', i.e. any job - # that has been around longer than 1hr. First print all columns for - # matches, then execute the delete. - jobs_to_delete=$(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}') - echo $jobs_to_delete - if [ ${#jobs_to_delete} -gt 1 ]; - then kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}'); - fi - jobs: TPU-tests: @@ -138,33 +125,9 @@ jobs: path: docs/build/html/ destination: html - cleanup-gke-jobs: - docker: - - image: circleci/python:3.7 - steps: - - gcp-gke/install - - gcp-gke/update-kubeconfig-with-credentials: - cluster: $GKE_CLUSTER - perform-login: true - - *delete_gke_jobs - - workflows: version: 2 tpu-tests: jobs: - build-Docs - TPU-tests - tpu-cleanup: - triggers: - - schedule: - # The cron format is: - # min (0-59) hour (0-23) monthday (1-31) month (1-12) weekday (0-6, 0=Sun) - # Set to run at the first minute of every hour. - cron: "0 * * * *" - filters: - branches: - only: - - master - jobs: - - cleanup-gke-jobs diff --git a/.github/workflows/ci_test-tpu.yml b/.github/workflows/ci_test-tpu.yml index 6490ef173e..62814b2f26 100644 --- a/.github/workflows/ci_test-tpu.yml +++ b/.github/workflows/ci_test-tpu.yml @@ -9,7 +9,6 @@ on: # - master env: - PROJECT_ID: ${{ secrets.GKE_PROJECT }} GKE_CLUSTER: lightning-cluster GKE_ZONE: us-central1-a IMAGE: gcr.io/${{ secrets.GKE_PROJECT }}/tpu-testing-image diff --git a/.github/workflows/events-recurent.yml b/.github/workflows/events-recurent.yml new file mode 100644 index 0000000000..cb49b482f0 --- /dev/null +++ b/.github/workflows/events-recurent.yml @@ -0,0 +1,42 @@ +name: Recurent events + +# https://jasonet.co/posts/scheduled-actions/ +# https://github.community/t/distinct-job-for-each-schedule/17811/2 +on: + push: + branches: [ master ] + schedule: + - cron: "*/20 * * * *" # At every 20 minutes + +env: + GKE_CLUSTER: lightning-cluster + GKE_ZONE: us-central1-a + +jobs: + tpu-cleanup: + name: TPU cleaning + runs-on: ubuntu-20.04 + + steps: + - name: Setup gcloud CLI + uses: GoogleCloudPlatform/github-actions/setup-gcloud@master + with: + version: '290.0.1' + service_account_key: ${{ secrets.GKE_SA_KEY_BASE64 }} + project_id: ${{ secrets.GKE_PROJECT }} + export_default_credentials: true + # Get the GKE credentials so we can deploy to the cluster; Use either zone or region depending on cluster setup. + - run: |- + gcloud container clusters get-credentials "$GKE_CLUSTER" --zone "$GKE_ZONE" + shell: bash + + - name: Clean all mong hanging jobs + run: | + # Match jobs whose age matches patterns like '1h' or '1d', i.e. any job + # that has been around longer than 1hr. First print all columns for + # matches, then execute the delete. + jobs_to_delete=$(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}') + echo $jobs_to_delete + if [ ${#jobs_to_delete} -gt 1 ]; + then kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}'); + fi \ No newline at end of file