lightning/.github/workflows/events-recurrent.yml

44 lines
1.5 KiB
YAML

name: Recurrent
# https://jasonet.co/posts/scheduled-actions/
# https://github.community/t/distinct-job-for-each-schedule/17811/2
on:
push:
branches: [ master ]
schedule:
- cron: "*/20 * * * *" # At every 20 minutes
env:
GKE_CLUSTER: lightning-cluster
GKE_ZONE: us-central1-a
jobs:
tpu-cleanup:
name: TPU cleaning
if: ${{ github.repository_owner == 'Lightning-AI' }}
runs-on: ubuntu-20.04
steps:
- name: Setup gcloud CLI
uses: google-github-actions/setup-gcloud@v0
with:
version: '290.0.1'
service_account_key: ${{ secrets.GKE_SA_KEY_BASE64 }}
project_id: ${{ secrets.GKE_PROJECT }}
export_default_credentials: true
# Get the GKE credentials so we can deploy to the cluster; Use either zone or region depending on cluster setup.
- run: |-
gcloud container clusters get-credentials "$GKE_CLUSTER" --zone "$GKE_ZONE"
shell: bash
- name: Clean all mong hanging jobs
run: |
# Match jobs whose age matches patterns like '1h' or '1d', i.e. any job
# that has been around longer than 1hr. First print all columns for
# matches, then execute the delete.
jobs_to_delete=$(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}')
echo $jobs_to_delete
if [ ${#jobs_to_delete} -gt 1 ];
then kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}');
fi