move TPU cleaning to GH actions (#5991)
* move TPU cleaning to GH actions * test * .
This commit is contained in:
parent
27ab76923a
commit
fcfa7fabbf
|
@ -85,19 +85,6 @@ references:
|
|||
sudo pip install pycobertura
|
||||
pycobertura show coverage.xml
|
||||
|
||||
delete_gke_jobs: &delete_gke_jobs
|
||||
run:
|
||||
name: Delete GKE Jobs
|
||||
command: |
|
||||
# Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
|
||||
# that has been around longer than 1hr. First print all columns for
|
||||
# matches, then execute the delete.
|
||||
jobs_to_delete=$(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}')
|
||||
echo $jobs_to_delete
|
||||
if [ ${#jobs_to_delete} -gt 1 ];
|
||||
then kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}');
|
||||
fi
|
||||
|
||||
jobs:
|
||||
|
||||
TPU-tests:
|
||||
|
@ -138,33 +125,9 @@ jobs:
|
|||
path: docs/build/html/
|
||||
destination: html
|
||||
|
||||
cleanup-gke-jobs:
|
||||
docker:
|
||||
- image: circleci/python:3.7
|
||||
steps:
|
||||
- gcp-gke/install
|
||||
- gcp-gke/update-kubeconfig-with-credentials:
|
||||
cluster: $GKE_CLUSTER
|
||||
perform-login: true
|
||||
- *delete_gke_jobs
|
||||
|
||||
|
||||
workflows:
|
||||
version: 2
|
||||
tpu-tests:
|
||||
jobs:
|
||||
- build-Docs
|
||||
- TPU-tests
|
||||
tpu-cleanup:
|
||||
triggers:
|
||||
- schedule:
|
||||
# The cron format is:
|
||||
# min (0-59) hour (0-23) monthday (1-31) month (1-12) weekday (0-6, 0=Sun)
|
||||
# Set to run at the first minute of every hour.
|
||||
cron: "0 * * * *"
|
||||
filters:
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
jobs:
|
||||
- cleanup-gke-jobs
|
||||
|
|
|
@ -9,7 +9,6 @@ on:
|
|||
# - master
|
||||
|
||||
env:
|
||||
PROJECT_ID: ${{ secrets.GKE_PROJECT }}
|
||||
GKE_CLUSTER: lightning-cluster
|
||||
GKE_ZONE: us-central1-a
|
||||
IMAGE: gcr.io/${{ secrets.GKE_PROJECT }}/tpu-testing-image
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
name: Recurent events
|
||||
|
||||
# https://jasonet.co/posts/scheduled-actions/
|
||||
# https://github.community/t/distinct-job-for-each-schedule/17811/2
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
schedule:
|
||||
- cron: "*/20 * * * *" # At every 20 minutes
|
||||
|
||||
env:
|
||||
GKE_CLUSTER: lightning-cluster
|
||||
GKE_ZONE: us-central1-a
|
||||
|
||||
jobs:
|
||||
tpu-cleanup:
|
||||
name: TPU cleaning
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
steps:
|
||||
- name: Setup gcloud CLI
|
||||
uses: GoogleCloudPlatform/github-actions/setup-gcloud@master
|
||||
with:
|
||||
version: '290.0.1'
|
||||
service_account_key: ${{ secrets.GKE_SA_KEY_BASE64 }}
|
||||
project_id: ${{ secrets.GKE_PROJECT }}
|
||||
export_default_credentials: true
|
||||
# Get the GKE credentials so we can deploy to the cluster; Use either zone or region depending on cluster setup.
|
||||
- run: |-
|
||||
gcloud container clusters get-credentials "$GKE_CLUSTER" --zone "$GKE_ZONE"
|
||||
shell: bash
|
||||
|
||||
- name: Clean all mong hanging jobs
|
||||
run: |
|
||||
# Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
|
||||
# that has been around longer than 1hr. First print all columns for
|
||||
# matches, then execute the delete.
|
||||
jobs_to_delete=$(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}')
|
||||
echo $jobs_to_delete
|
||||
if [ ${#jobs_to_delete} -gt 1 ];
|
||||
then kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}');
|
||||
fi
|
Loading…
Reference in New Issue