name: Recurrent # https://jasonet.co/posts/scheduled-actions/ # https://github.community/t/distinct-job-for-each-schedule/17811/2 on: push: branches: [ master ] schedule: - cron: "*/20 * * * *" # At every 20 minutes env: GKE_CLUSTER: lightning-cluster GKE_ZONE: us-central1-a jobs: tpu-cleanup: name: TPU cleaning if: ${{ github.repository_owner == 'Lightning-AI' }} runs-on: ubuntu-20.04 steps: - name: Setup gcloud CLI uses: google-github-actions/setup-gcloud@v0 with: version: '290.0.1' service_account_key: ${{ secrets.GKE_SA_KEY_BASE64 }} project_id: ${{ secrets.GKE_PROJECT }} export_default_credentials: true # Get the GKE credentials so we can deploy to the cluster; Use either zone or region depending on cluster setup. - run: |- gcloud container clusters get-credentials "$GKE_CLUSTER" --zone "$GKE_ZONE" shell: bash - name: Clean all mong hanging jobs run: | # Match jobs whose age matches patterns like '1h' or '1d', i.e. any job # that has been around longer than 1hr. First print all columns for # matches, then execute the delete. jobs_to_delete=$(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $0}') echo $jobs_to_delete if [ ${#jobs_to_delete} -gt 1 ]; then kubectl delete job $(kubectl get job | awk 'match($4,/[0-9]+[dh]/) {print $1}'); fi