Merge branch 'master' into bump/python_3.9+
This commit is contained in:
commit
8f39967036
|
@ -134,13 +134,13 @@ jobs:
|
||||||
condition: and(succeeded(), eq(variables['PACKAGE_NAME'], 'fabric'))
|
condition: and(succeeded(), eq(variables['PACKAGE_NAME'], 'fabric'))
|
||||||
displayName: "Adjust tests & examples"
|
displayName: "Adjust tests & examples"
|
||||||
|
|
||||||
- bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest . -v --durations=50
|
- bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest tests_fabric/ -v --durations=50
|
||||||
workingDirectory: tests/tests_fabric/
|
workingDirectory: tests/
|
||||||
displayName: "Testing: fabric standard"
|
displayName: "Testing: fabric standard"
|
||||||
timeoutInMinutes: "10"
|
timeoutInMinutes: "10"
|
||||||
|
|
||||||
- bash: bash ../run_standalone_tests.sh "."
|
- bash: bash ./run_standalone_tests.sh "tests_fabric"
|
||||||
workingDirectory: tests/tests_fabric/
|
workingDirectory: tests/
|
||||||
env:
|
env:
|
||||||
PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)
|
PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)
|
||||||
displayName: "Testing: fabric standalone"
|
displayName: "Testing: fabric standalone"
|
||||||
|
@ -157,7 +157,7 @@ jobs:
|
||||||
./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
|
./codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) \
|
||||||
--flags=gpu,pytest,${COVERAGE_SOURCE} --name="GPU-coverage" --env=linux,azure
|
--flags=gpu,pytest,${COVERAGE_SOURCE} --name="GPU-coverage" --env=linux,azure
|
||||||
ls -l
|
ls -l
|
||||||
workingDirectory: tests/tests_fabric/
|
workingDirectory: tests/
|
||||||
displayName: "Statistics"
|
displayName: "Statistics"
|
||||||
|
|
||||||
- script: |
|
- script: |
|
||||||
|
|
|
@ -155,13 +155,13 @@ jobs:
|
||||||
ls -l checkpoints/
|
ls -l checkpoints/
|
||||||
displayName: "Get legacy checkpoints"
|
displayName: "Get legacy checkpoints"
|
||||||
|
|
||||||
- bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest -v --durations=50
|
- bash: python -m coverage run --source ${COVERAGE_SOURCE} -m pytest tests_pytorch/ -v --durations=50
|
||||||
workingDirectory: tests/tests_pytorch
|
workingDirectory: tests/
|
||||||
displayName: "Testing: PyTorch standard"
|
displayName: "Testing: PyTorch standard"
|
||||||
timeoutInMinutes: "35"
|
timeoutInMinutes: "35"
|
||||||
|
|
||||||
- bash: bash ../run_standalone_tests.sh "."
|
- bash: bash ./run_standalone_tests.sh "tests_pytorch"
|
||||||
workingDirectory: tests/tests_pytorch
|
workingDirectory: tests/
|
||||||
env:
|
env:
|
||||||
PL_USE_MOCKED_MNIST: "1"
|
PL_USE_MOCKED_MNIST: "1"
|
||||||
PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)
|
PL_STANDALONE_TESTS_SOURCE: $(COVERAGE_SOURCE)
|
||||||
|
|
|
@ -177,7 +177,7 @@ jobs:
|
||||||
coverage xml
|
coverage xml
|
||||||
|
|
||||||
- name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
uses: codecov/codecov-action@v4
|
uses: codecov/codecov-action@v5
|
||||||
# see: https://github.com/actions/toolkit/issues/399
|
# see: https://github.com/actions/toolkit/issues/399
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
with:
|
with:
|
||||||
|
|
|
@ -214,7 +214,7 @@ jobs:
|
||||||
coverage xml
|
coverage xml
|
||||||
|
|
||||||
- name: Upload coverage to Codecov
|
- name: Upload coverage to Codecov
|
||||||
uses: codecov/codecov-action@v4
|
uses: codecov/codecov-action@v5
|
||||||
# see: https://github.com/actions/toolkit/issues/399
|
# see: https://github.com/actions/toolkit/issues/399
|
||||||
continue-on-error: true
|
continue-on-error: true
|
||||||
with:
|
with:
|
||||||
|
|
|
@ -19,6 +19,11 @@ set -e
|
||||||
# It can be set through the env variable PL_STANDALONE_TESTS_BATCH_SIZE and defaults to 6 if not set
|
# It can be set through the env variable PL_STANDALONE_TESTS_BATCH_SIZE and defaults to 6 if not set
|
||||||
test_batch_size="${PL_STANDALONE_TESTS_BATCH_SIZE:-6}"
|
test_batch_size="${PL_STANDALONE_TESTS_BATCH_SIZE:-6}"
|
||||||
source="${PL_STANDALONE_TESTS_SOURCE:-"lightning"}"
|
source="${PL_STANDALONE_TESTS_SOURCE:-"lightning"}"
|
||||||
|
# this is the directory where the tests are located
|
||||||
|
test_dir=$1 # parse the first argument
|
||||||
|
COLLECTED_TESTS_FILE="collected_tests.txt"
|
||||||
|
|
||||||
|
ls -lh . # show the contents of the directory
|
||||||
|
|
||||||
# this environment variable allows special tests to run
|
# this environment variable allows special tests to run
|
||||||
export PL_RUN_STANDALONE_TESTS=1
|
export PL_RUN_STANDALONE_TESTS=1
|
||||||
|
@ -26,72 +31,87 @@ export PL_RUN_STANDALONE_TESTS=1
|
||||||
defaults=" -m coverage run --source ${source} --append -m pytest --no-header -v -s --timeout 120 "
|
defaults=" -m coverage run --source ${source} --append -m pytest --no-header -v -s --timeout 120 "
|
||||||
echo "Using defaults: ${defaults}"
|
echo "Using defaults: ${defaults}"
|
||||||
|
|
||||||
# get the testing location as the first argument
|
# get the list of parametrizations. we need to call them separately. the last two lines are removed.
|
||||||
test_path=$1
|
# note: if there's a syntax error, this will fail with some garbled output
|
||||||
printf "source path: $test_path\n"
|
python3 -um pytest $test_dir -q --collect-only --pythonwarnings ignore 2>&1 > $COLLECTED_TESTS_FILE
|
||||||
|
# early terminate if collection failed (e.g. syntax error)
|
||||||
|
if [[ $? != 0 ]]; then
|
||||||
|
cat $COLLECTED_TESTS_FILE
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
# collect all tests with parametrization based filtering with PL_RUN_STANDALONE_TESTS
|
# removes the last line of the file
|
||||||
standalone_tests=$(python3 -m pytest $test_path -q --collect-only --pythonwarnings ignore)
|
sed -i '$d' $COLLECTED_TESTS_FILE
|
||||||
printf "Collected tests: \n $standalone_tests\n"
|
|
||||||
# match only lines with tests
|
|
||||||
parametrizations=$(perl -nle 'print $& while m{\S+::test_\S+}g' <<< "$standalone_tests")
|
|
||||||
# convert the list to be array
|
|
||||||
parametrizations_arr=($parametrizations)
|
|
||||||
report=''
|
|
||||||
|
|
||||||
rm -f standalone_test_output.txt # in case it exists, remove it
|
# Get test list and run each test individually
|
||||||
rm -f testnames.txt
|
tests=($(grep -oP '\S+::test_\S+' "$COLLECTED_TESTS_FILE"))
|
||||||
|
test_count=${#tests[@]}
|
||||||
|
# present the collected tests
|
||||||
|
printf "collected $test_count tests:\n-------------------\n"
|
||||||
|
# replace space with new line
|
||||||
|
echo "${tests[@]}" | tr ' ' '\n'
|
||||||
|
printf "\n===================\n"
|
||||||
|
|
||||||
function show_batched_output {
|
# if test count is one print warning
|
||||||
if [ -f standalone_test_output.txt ]; then # if exists
|
if [[ $test_count -eq 1 ]]; then
|
||||||
cat standalone_test_output.txt
|
printf "WARNING: only one test found!\n"
|
||||||
# heuristic: stop if there's mentions of errors. this can prevent false negatives when only some of the ranks fail
|
elif [ $test_count -eq 0 ]; then
|
||||||
if perl -nle 'print if /error|(?<!(?-i)on_)exception|traceback|(?<!(?-i)x)failed/i' standalone_test_output.txt | grep -qv -f testnames.txt; then
|
printf "ERROR: no tests found!\n"
|
||||||
echo "Potential error! Stopping."
|
exit 1
|
||||||
perl -nle 'print if /error|(?<!(?-i)on_)exception|traceback|(?<!(?-i)x)failed/i' standalone_test_output.txt
|
fi
|
||||||
rm standalone_test_output.txt
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
rm standalone_test_output.txt
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
trap show_batched_output EXIT # show the output on exit
|
|
||||||
|
|
||||||
# remove the "tests/tests_pytorch/" path suffixes
|
# clear all the collected reports
|
||||||
path_prefix=$(basename "$(dirname "$(pwd)")")/$(basename "$(pwd)")"/" # https://stackoverflow.com/a/8223345
|
rm -f parallel_test_output-*.txt # in case it exists, remove it
|
||||||
|
|
||||||
for i in "${!parametrizations_arr[@]}"; do
|
|
||||||
parametrization=${parametrizations_arr[$i]//$path_prefix/}
|
|
||||||
prefix="$((i+1))/${#parametrizations_arr[@]}"
|
|
||||||
|
|
||||||
echo "$prefix: Running $parametrization"
|
status=0 # reset the script status
|
||||||
echo $parametrization | sed 's/\[[^][]*\]//g' >> testnames.txt
|
report="" # final report
|
||||||
|
pids=() # array of PID for running tests
|
||||||
# fix the port to avoid race condition when batched distributed tests select the port randomly
|
test_ids=() # array of indexes of running tests
|
||||||
export MASTER_PORT=$((29500 + $i % $test_batch_size))
|
printf "Running $test_count tests in batches of $test_batch_size\n"
|
||||||
|
for i in "${!tests[@]}"; do
|
||||||
|
# remove initial "tests/" from the test name
|
||||||
|
test=${tests[$i]/tests\//}
|
||||||
|
printf "Running test $((i+1))/$test_count: $test\n"
|
||||||
|
|
||||||
# execute the test in the background
|
# execute the test in the background
|
||||||
# redirect to a log file that buffers test output. since the tests will run in the background, we cannot let them
|
# redirect to a log file that buffers test output. since the tests will run in the background,
|
||||||
# output to std{out,err} because the outputs would be garbled together
|
# we cannot let them output to std{out,err} because the outputs would be garbled together
|
||||||
python3 ${defaults} "$parametrization" &>> standalone_test_output.txt &
|
python3 ${defaults} "$test" 2>&1 > "standalone_test_output-$i.txt" &
|
||||||
# save the PID in an array
|
test_ids+=($i) # save the test's id in an array with running tests
|
||||||
pids[${i}]=$!
|
pids+=($!) # save the PID in an array with running tests
|
||||||
# add row to the final report
|
|
||||||
report+="Ran\t$parametrization\n"
|
|
||||||
|
|
||||||
if ((($i + 1) % $test_batch_size == 0)); then
|
# if we reached the batch size, wait for all tests to finish
|
||||||
|
if (( (($i + 1) % $test_batch_size == 0) || $i == $test_count-1 )); then
|
||||||
|
printf "Waiting for batch to finish: $(IFS=' '; echo "${pids[@]}")\n"
|
||||||
# wait for running tests
|
# wait for running tests
|
||||||
for pid in ${pids[*]}; do wait $pid; done
|
for j in "${!test_ids[@]}"; do
|
||||||
unset pids # empty the array
|
i=${test_ids[$j]} # restore the global test's id
|
||||||
show_batched_output
|
pid=${pids[$j]} # restore the particular PID
|
||||||
|
test=${tests[$i]} # restore the test name
|
||||||
|
printf "Waiting for $tests >> standalone_test_output-$i.txt (PID: $pid)\n"
|
||||||
|
wait -n $pid
|
||||||
|
# get the exit status of the test
|
||||||
|
test_status=$?
|
||||||
|
# add row to the final report
|
||||||
|
report+="Ran\t$test\t>> exit:$test_status\n"
|
||||||
|
if [[ $test_status != 0 ]]; then
|
||||||
|
# show the output of the failed test
|
||||||
|
cat "standalone_test_output-$i.txt"
|
||||||
|
# Process exited with a non-zero exit status
|
||||||
|
status=$test_status
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
test_ids=() # reset the test's id array
|
||||||
|
pids=() # reset the PID array
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
# wait for leftover tests
|
|
||||||
for pid in ${pids[*]}; do wait $pid; done
|
|
||||||
show_batched_output
|
|
||||||
|
|
||||||
# echo test report
|
# echo test report
|
||||||
printf '=%.s' {1..80}
|
printf '=%.s' {1..80}
|
||||||
printf "\n$report"
|
printf "\n$report"
|
||||||
printf '=%.s' {1..80}
|
printf '=%.s' {1..80}
|
||||||
printf '\n'
|
printf '\n'
|
||||||
|
|
||||||
|
# exit with the worst test result
|
||||||
|
exit $status
|
||||||
|
|
|
@ -21,7 +21,13 @@ export PL_RUN_STANDALONE_TESTS=1
|
||||||
# test that a user can manually launch individual processes
|
# test that a user can manually launch individual processes
|
||||||
echo "Running manual ddp launch test"
|
echo "Running manual ddp launch test"
|
||||||
export PYTHONPATH="${PYTHONPATH}:$(pwd)"
|
export PYTHONPATH="${PYTHONPATH}:$(pwd)"
|
||||||
args="fit --trainer.accelerator gpu --trainer.devices 2 --trainer.strategy ddp --trainer.max_epochs=1 --trainer.limit_train_batches=1 --trainer.limit_val_batches=1 --trainer.limit_test_batches=1"
|
args="fit --trainer.accelerator gpu \
|
||||||
|
--trainer.devices 2 \
|
||||||
|
--trainer.strategy ddp \
|
||||||
|
--trainer.max_epochs=1 \
|
||||||
|
--trainer.limit_train_batches=1 \
|
||||||
|
--trainer.limit_val_batches=1 \
|
||||||
|
--trainer.limit_test_batches=1"
|
||||||
MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=1 python strategies/scripts/cli_script.py ${args} &
|
MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=1 python strategies/scripts/cli_script.py ${args} &
|
||||||
MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=0 python strategies/scripts/cli_script.py ${args}
|
MASTER_ADDR="localhost" MASTER_PORT=1234 LOCAL_RANK=0 python strategies/scripts/cli_script.py ${args}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue