lightning/dockers/tpu-tests/tpu_test_cases.jsonnet

local base = import 'templates/base.libsonnet';
local tpus = import 'templates/tpus.libsonnet';
local utils = import "templates/utils.libsonnet";

local tputests = base.BaseTest {
  frameworkPrefix: 'pl',
  modelName: 'tpu-tests',
  mode: 'postsubmit',
  configMaps: [],

  timeout: 6000, # 100 minutes, in seconds.

  image: 'pytorchlightning/pytorch_lightning',
  imageTag: 'base-xla-py{PYTHON_VERSION}-torch{PYTORCH_VERSION}',

  tpuSettings+: {
    softwareVersion: 'pytorch-{PYTORCH_VERSION}',
  },
  accelerator: tpus.v3_8,

  command: utils.scriptCommand(
    |||
      source ~/.bashrc
      set -e
      conda activate lightning
      mkdir -p /home/runner/work/lightning && cd /home/runner/work/lightning
      git clone https://github.com/Lightning-AI/lightning.git
      cd lightning
      echo $PWD
      git ls-remote --refs origin
      git fetch origin "refs/pull/{PR_NUMBER}/head"
      git checkout {SHA}
      export PACKAGE_NAME=pytorch
      export FREEZE_REQUIREMENTS=1
      pip install -e .[test]
      echo $KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS
      export XRT_TPU_CONFIG="tpu_worker;0;${KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS:7}"
      export PL_RUN_TPU_TESTS=1
      cd tests/tests_pytorch
      coverage run --source=pytorch_lightning -m pytest -vv --durations=0 ./
      echo "\n||| Running standalone tests |||\n"
      export PL_STANDALONE_TESTS_SOURCE=pytorch_lightning
      export PL_STANDALONE_TESTS_BATCH_SIZE=1
      bash run_standalone_tests.sh
      echo "\n||| END PYTEST LOGS |||\n"
      coverage xml
      cat coverage.xml | tr -d '\t'
    |||
  ),
};

tputests.oneshotJob