lightning/tests/tests_pytorch/run_standalone_tests.sh

#!/bin/bash
# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
# THIS FILE ASSUMES IT IS RUN INSIDE THE tests/tests_<package> DIRECTORY

# Batch size for testing: Determines how many standalone test invocations run in parallel
# It can be set through the env variable PL_STANDALONE_TESTS_BATCH_SIZE and defaults to 6 if not set
test_batch_size="${PL_STANDALONE_TESTS_BATCH_SIZE:-6}"
source="${PL_STANDALONE_TESTS_SOURCE:-"lightning"}"

# this environment variable allows special tests to run
export PL_RUN_STANDALONE_TESTS=1
# python arguments
defaults="-m coverage run --source ${source} --append -m pytest --no-header -v -s --timeout 120"
echo "Using defaults: ${defaults}"

# find tests marked as `@RunIf(standalone=True)`. done manually instead of with pytest because it is faster
grep_output=$(grep --recursive --word-regexp . --regexp 'standalone=True' --include '*.py')

# file paths, remove duplicates
files=$(echo "$grep_output" | cut -f1 -d: | sort | uniq)

# get the list of parametrizations. we need to call them separately. the last two lines are removed.
# note: if there's a syntax error, this will fail with some garbled output
if [[ "$OSTYPE" == "darwin"* ]]; then
  parametrizations=$(python3 -m pytest $files --collect-only --quiet "$@" | tail -r | sed -e '1,3d' | tail -r)
else
  parametrizations=$(python3 -m pytest $files --collect-only --quiet "$@" | head -n -2)
fi
# remove the "tests/tests_pytorch/" path suffixes
path_suffix=$(basename "$(dirname "$(pwd)")")/$(basename "$(pwd)")"/"  # https://stackoverflow.com/a/8223345
parametrizations=${parametrizations//$path_suffix/}
parametrizations_arr=($parametrizations)

report=''

rm -f standalone_test_output.txt  # in case it exists, remove it
function show_batched_output {
  if [ -f standalone_test_output.txt ]; then  # if exists
    cat standalone_test_output.txt
    # heuristic: stop if there's mentions of errors. this can prevent false negatives when only some of the ranks fail
    if grep -iE 'error|exception|traceback|failed' standalone_test_output.txt | grep -qvE 'on_exception|xfailed'; then
      echo "Potential error! Stopping."
      rm standalone_test_output.txt
      exit 1
    fi
    rm standalone_test_output.txt
  fi
}
trap show_batched_output EXIT  # show the output on exit

for i in "${!parametrizations_arr[@]}"; do
  parametrization=${parametrizations_arr[$i]}

  # check blocklist
  if [[ "${parametrization}" == *"test_pytorch_profiler_nested_emit_nvtx"* ]]; then
    echo "Skipping $parametrization"
    report+="Skipped\t$parametrization\n"
    # do not continue the loop because we might need to wait for batched jobs
  else
    echo "Running $parametrization"
    # execute the test in the background
    # redirect to a log file that buffers test output. since the tests will run in the background, we cannot let them
    # output to std{out,err} because the outputs would be garbled together
    python3 ${defaults} "$parametrization" &>> standalone_test_output.txt &
    # save the PID in an array
    pids[${i}]=$!
    # add row to the final report
    report+="Ran\t$parametrization\n"
  fi

  if ((($i + 1) % $test_batch_size == 0)); then
    # wait for running tests
    for pid in ${pids[*]}; do wait $pid; done
    unset pids  # empty the array
    show_batched_output
  fi
done
# wait for leftover tests
for pid in ${pids[*]}; do wait $pid; done
show_batched_output

# echo test report
printf '=%.s' {1..80}
printf "\n$report"
printf '=%.s' {1..80}
printf '\n'
Automatically find and run special tests (#6669) 2021-03-26 17:04:59 +00:00			`#!/bin/bash`
Simplify optimization Logic (#4984) * Rely on ddp plugin for blocking sync behaviour, and skip if we're using manual optimization * debug * Revert "debug" This reverts commit ccca6b6b * Expose manual reduce for automatic optimization * Add input arguments * Enable parity test * clean imports * Expose hook after to ensure we reset * Fix naming * add * fix test * uniformize optimizer logic * resolve test * resovle flake8 * resolve amp bug * update tests * remove bug * remove optimizer_step in accelerators * typo * update lightning optimizer * set doesn't work with ddp_spawn * resolve flake8 * update threshold * ignore pyright * correct codeFactor * remove useless if * remove zer_grad function * simplify step * remove typo * resolve bug * Apply suggestions from code review * update on comments * resolve bugs * remove tests * Update pytorch_lightning/trainer/configuration_validator.py Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> * simplify testing * add more tests Co-authored-by: SeanNaren <sean@grid.ai> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Rohit Gupta <rohitgr1998@gmail.com> 2020-12-07 12:55:49 +00:00			`# Copyright The Lightning AI team.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
[bug-fix] Trainer.test points to latest best_model_path (#5161) * resolve bug * update code * add set -e * Update pytorch_lightning/callbacks/model_checkpoint.py Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> * update test * Update tests/checkpointing/test_trainer_checkpoint.py Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> * Update tests/checkpointing/test_trainer_checkpoint.py Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> * update on comments * resolve test * convert to set * update * add error triggering * update * update on comments * update * resolve import * update * update * Update pytorch_lightning/plugins/rpc_plugin.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Sean Naren <sean.narenthiran@gmail.com> Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Ubuntu <ubuntu@ip-172-31-62-109.ec2.internal> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> (cherry picked from commit d5b367871fa3924090ec74bf903bd172bd3e2343) 2021-01-05 10:01:59 +00:00			`set -e`
Standalone Lite CI setup (#14451) Co-authored-by: Jirka <jirka.borovec@seznam.cz> Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2022-09-01 22:13:12 +00:00			`# THIS FILE ASSUMES IT IS RUN INSIDE THE tests/tests_<package> DIRECTORY`
Automatically find and run special tests (#6669) 2021-03-26 17:04:59 +00:00
Add batch size script argument for standalone tests (#13841) Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2022-07-27 12:36:22 +00:00			`# Batch size for testing: Determines how many standalone test invocations run in parallel`
Improvements to standalone scripts (#13840) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-07-28 23:33:22 +00:00			`# It can be set through the env variable PL_STANDALONE_TESTS_BATCH_SIZE and defaults to 6 if not set`
			`test_batch_size="${PL_STANDALONE_TESTS_BATCH_SIZE:-6}"`
Always run standalone tests (#16705) 2023-02-20 14:58:44 +00:00			`source="${PL_STANDALONE_TESTS_SOURCE:-"lightning"}"`
Add batch size script argument for standalone tests (#13841) Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2022-07-27 12:36:22 +00:00
Automatically find and run special tests (#6669) 2021-03-26 17:04:59 +00:00			`# this environment variable allows special tests to run`
Rename special to standalone (#10779) 2021-11-26 17:13:14 +00:00			`export PL_RUN_STANDALONE_TESTS=1`
Automatically find and run special tests (#6669) 2021-03-26 17:04:59 +00:00			`# python arguments`
[TPU] Replace GKE in CI with manual gcloud usage (#17362) 2023-04-14 12:47:31 +00:00			`defaults="-m coverage run --source ${source} --append -m pytest --no-header -v -s --timeout 120"`
Always run standalone tests (#16705) 2023-02-20 14:58:44 +00:00			`echo "Using defaults: ${defaults}"`
Automatically find and run special tests (#6669) 2021-03-26 17:04:59 +00:00
Rename special to standalone (#10779) 2021-11-26 17:13:14 +00:00			# find tests marked as `@RunIf(standalone=True)`. done manually instead of with pytest because it is faster
Run only CUDA tests on Azure GPU CI (#13651) 2022-07-15 11:51:23 +00:00			`grep_output=$(grep --recursive --word-regexp . --regexp 'standalone=True' --include '*.py')`
Support special test parametrizations (#10569) 2021-11-17 15:46:14 +00:00
			`# file paths, remove duplicates`
			`files=$(echo "$grep_output" \| cut -f1 -d: \| sort \| uniq)`

			`# get the list of parametrizations. we need to call them separately. the last two lines are removed.`
			`# note: if there's a syntax error, this will fail with some garbled output`
			`if [[ "$OSTYPE" == "darwin"* ]]; then`
[TPU] Replace GKE in CI with manual gcloud usage (#17362) 2023-04-14 12:47:31 +00:00			`parametrizations=$(python3 -m pytest $files --collect-only --quiet "$@" \| tail -r \| sed -e '1,3d' \| tail -r)`
Support special test parametrizations (#10569) 2021-11-17 15:46:14 +00:00			`else`
[TPU] Replace GKE in CI with manual gcloud usage (#17362) 2023-04-14 12:47:31 +00:00			`parametrizations=$(python3 -m pytest $files --collect-only --quiet "$@" \| head -n -2)`
Support special test parametrizations (#10569) 2021-11-17 15:46:14 +00:00			`fi`
Use a standalone test symlink for Lite (#14502) 2022-09-04 18:57:28 +00:00			`# remove the "tests/tests_pytorch/" path suffixes`
			`path_suffix=$(basename "$(dirname "$(pwd)")")/$(basename "$(pwd)")"/" # https://stackoverflow.com/a/8223345`
			`parametrizations=${parametrizations//$path_suffix/}`
Support special test parametrizations (#10569) 2021-11-17 15:46:14 +00:00			`parametrizations_arr=($parametrizations)`
Automatically find and run special tests (#6669) 2021-03-26 17:04:59 +00:00
			`report=''`
Run standalone tests in batches (#13673) 2022-07-18 12:10:35 +00:00
			`rm -f standalone_test_output.txt # in case it exists, remove it`
			`function show_batched_output {`
			`if [ -f standalone_test_output.txt ]; then # if exists`
			`cat standalone_test_output.txt`
Grep for potential errors in standalone tests (#15341) Co-authored-by: awaelchli <aedu.waelchli@gmail.com> 2022-11-05 03:29:38 +00:00			`# heuristic: stop if there's mentions of errors. this can prevent false negatives when only some of the ranks fail`
CI: Update colossalai version (#16747) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2023-02-14 19:07:29 +00:00			`if grep -iE 'error\|exception\|traceback\|failed' standalone_test_output.txt \| grep -qvE 'on_exception\|xfailed'; then`
Grep for potential errors in standalone tests (#15341) Co-authored-by: awaelchli <aedu.waelchli@gmail.com> 2022-11-05 03:29:38 +00:00			`echo "Potential error! Stopping."`
			`rm standalone_test_output.txt`
			`exit 1`
			`fi`
Run standalone tests in batches (#13673) 2022-07-18 12:10:35 +00:00			`rm standalone_test_output.txt`
			`fi`
			`}`
			`trap show_batched_output EXIT # show the output on exit`
Automatically find and run special tests (#6669) 2021-03-26 17:04:59 +00:00
Support special test parametrizations (#10569) 2021-11-17 15:46:14 +00:00			`for i in "${!parametrizations_arr[@]}"; do`
			`parametrization=${parametrizations_arr[$i]}`
Automatically find and run special tests (#6669) 2021-03-26 17:04:59 +00:00
Support special test parametrizations (#10569) 2021-11-17 15:46:14 +00:00			`# check blocklist`
Prepare CI to run on 3090s (#14910) 2022-09-29 14:01:59 +00:00			`if [[ "${parametrization}" == "test_pytorch_profiler_nested_emit_nvtx" ]]; then`
			`echo "Skipping $parametrization"`
Support special test parametrizations (#10569) 2021-11-17 15:46:14 +00:00			`report+="Skipped\t$parametrization\n"`
Run standalone tests in batches (#13673) 2022-07-18 12:10:35 +00:00			`# do not continue the loop because we might need to wait for batched jobs`
			`else`
			`echo "Running $parametrization"`
			`# execute the test in the background`
			`# redirect to a log file that buffers test output. since the tests will run in the background, we cannot let them`
			`# output to std{out,err} because the outputs would be garbled together`
[TPU] Replace GKE in CI with manual gcloud usage (#17362) 2023-04-14 12:47:31 +00:00			`python3 ${defaults} "$parametrization" &>> standalone_test_output.txt &`
Run standalone tests in batches (#13673) 2022-07-18 12:10:35 +00:00			`# save the PID in an array`
			`pids[${i}]=$!`
			`# add row to the final report`
			`report+="Ran\t$parametrization\n"`
Support special test parametrizations (#10569) 2021-11-17 15:46:14 +00:00			`fi`
Automatically find and run special tests (#6669) 2021-03-26 17:04:59 +00:00
Run standalone tests in batches (#13673) 2022-07-18 12:10:35 +00:00			`if ((($i + 1) % $test_batch_size == 0)); then`
			`# wait for running tests`
			`for pid in ${pids[*]}; do wait $pid; done`
			`unset pids # empty the array`
			`show_batched_output`
			`fi`
Automatically find and run special tests (#6669) 2021-03-26 17:04:59 +00:00			`done`
Run standalone tests in batches (#13673) 2022-07-18 12:10:35 +00:00			`# wait for leftover tests`
			`for pid in ${pids[*]}; do wait $pid; done`
			`show_batched_output`
support launching Lightning ddp with traditional command (#7480) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2021-07-14 11:25:36 +00:00
Automatically find and run special tests (#6669) 2021-03-26 17:04:59 +00:00			`# echo test report`
			`printf '=%.s' {1..80}`
			`printf "\n$report"`
			`printf '=%.s' {1..80}`
			`printf '\n'`