lightning/.github/workflows/ci-pytorch-test-full.yml

209 lines
8.3 KiB
YAML

name: Test PyTorch full
# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
on: # Trigger the workflow on push or pull request, but only for the master branch
push:
branches: [master, "release/*"]
pull_request:
branches: [master, "release/*"]
types: [opened, reopened, ready_for_review, synchronize]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
cancel-in-progress: ${{ ! (github.ref == 'refs/heads/master' || startsWith(github.ref, 'refs/heads/release/')) }}
jobs:
cpu:
runs-on: ${{ matrix.os }}
if: github.event.pull_request.draft == false
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, windows-2022, macOS-11]
python-version: ["3.7", "3.10"] # minimum, maximum
requires: ["oldest", "latest"]
release: ["stable"]
exclude:
# There's no distribution of the oldest PyTorch 1.9 for Python 3.10.
# TODO: Remove the exclusion when dropping PyTorch 1.9 support.
- {python-version: "3.10", requires: "oldest"}
# TODO: re-enable RC testing
# include:
# - {os: ubuntu-20.04, python-version: "3.10", requires: "latest", release: "pre"}
timeout-minutes: 40
steps:
- uses: actions/checkout@v2
- name: Get changed files
id: changed-files
uses: tj-actions/changed-files@v28
- name: Decide if the test should be skipped
id: skip
shell: bash -l {0}
run: |
FILTER='src/pytorch_lightning|requirements/pytorch|tests/tests_pytorch|examples/pl_*'
echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr " " "\n" > changed_files.txt
MATCHES=$(cat changed_files.txt | grep -E $FILTER)
echo $MATCHES
if [ -z "$MATCHES" ]; then
echo "Skip"
echo "::set-output name=continue::0"
else
echo "Continue"
echo "::set-output name=continue::1"
fi
- name: Set up Python ${{ matrix.python-version }}
if: ${{ (steps.skip.outputs.continue == '1') }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Reset caching
if: ${{ (steps.skip.outputs.continue == '1') }}
run: python -c "import time; days = time.time() / 60 / 60 / 24; print(f'TIME_PERIOD=d{int(days / 2) * 2}')" >> $GITHUB_ENV
- name: basic setup
if: ${{ (steps.skip.outputs.continue == '1') }}
run: |
pip --version
pip install -q fire
# Github Actions: Run step on specific OS: https://stackoverflow.com/a/57948488/4521646
- name: Setup macOS
if: ${{ (runner.os == 'macOS') && (steps.skip.outputs.continue == '1') }}
run: |
brew install openmpi libuv # Horovod on macOS requires OpenMPI, Gloo not currently supported
- name: Setup Windows
if: ${{ (runner.os == 'windows') && (steps.skip.outputs.continue == '1') }}
run: |
python .actions/assistant.py requirements_prune_pkgs horovod
- name: Set min. dependencies
if: ${{ (matrix.requires == 'oldest') && (steps.skip.outputs.continue == '1') }}
run: |
python .actions/assistant.py replace_oldest_ver
# Note: This uses an internal pip API and may not always work
# https://github.com/actions/cache/blob/master/examples.md#multiple-oss-in-a-workflow
- name: Get pip cache dir
if: ${{ (steps.skip.outputs.continue == '1') }}
id: pip-cache
run: echo "::set-output name=dir::$(pip cache dir)"
- name: pip cache
if: ${{ (steps.skip.outputs.continue == '1') }}
uses: actions/cache@v3
with:
path: ${{ steps.pip-cache.outputs.dir }}
key: ${{ runner.os }}-pip-td${{ env.TIME_PERIOD }}-py${{ matrix.python-version }}-${{ matrix.release }}-${{ matrix.requires }}-${{ hashFiles('requirements/pytorch/*.txt') }}
restore-keys: |
${{ runner.os }}-pip-td${{ env.TIME_PERIOD }}-py${{ matrix.python-version }}-${{ matrix.release }}-${{ matrix.requires }}-
- name: Pull legacy checkpoints
if: ${{ (steps.skip.outputs.continue == '1') }}
run: bash .actions/pull_legacy_checkpoints.sh
- name: Install dependencies
if: ${{ (steps.skip.outputs.continue == '1') }}
env:
PACKAGE_NAME: pytorch
FREEZE_REQUIREMENTS: 1
run: |
flag=$(python -c "print('--pre' if '${{matrix.release}}' == 'pre' else '')" 2>&1)
url=$(python -c "print('test/cpu/torch_test.html' if '${{matrix.release}}' == 'pre' else 'cpu/torch_stable.html')" 2>&1)
pip install -e .[test] --upgrade $flag --find-links "https://download.pytorch.org/whl/${url}"
pip list
shell: bash
- name: DocTests
if: ${{ (steps.skip.outputs.continue == '1') }}
working-directory: ./src
run: pytest pytorch_lightning --cov=pytorch_lightning
- name: Install extra dependencies
if: ${{ (steps.skip.outputs.continue == '1') }}
run: |
# adjust versions according installed Torch version
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/extra.txt
pip install -r ./requirements/pytorch/extra.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
pip list
shell: bash
- name: Reinstall Horovod if necessary
if: ${{ (runner.os != 'windows') && (steps.skip.outputs.continue == '1') }}
env:
HOROVOD_BUILD_ARCH_FLAGS: "-mfma"
HOROVOD_WITHOUT_MXNET: 1
HOROVOD_WITHOUT_TENSORFLOW: 1
run: |
HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true)
if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
pip uninstall -y horovod
grep "horovod" requirements/pytorch/strategies.txt > requirements/pytorch/horovod.txt
pip install --no-cache-dir -r requirements/pytorch/horovod.txt
fi
horovodrun --check-build
python -c "import horovod.torch"
shell: bash
- name: Cache datasets
if: ${{ (steps.skip.outputs.continue == '1') }}
uses: actions/cache@v3
with:
path: Datasets
key: pl-dataset
- name: Sanity check
if: ${{ (steps.skip.outputs.continue == '1') }}
run: python requirements/pytorch/check-avail-extras.py
- name: Testing PyTorch
if: ${{ (steps.skip.outputs.continue == '1') }}
working-directory: tests/tests_pytorch
# NOTE: do not include coverage report here, see: https://github.com/nedbat/coveragepy/issues/1003
run: coverage run --source pytorch_lightning -m pytest -v --durations=50 --junitxml=results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml
- name: Upload pytest results
if: ${{ (failure()) && (steps.skip.outputs.continue == '1') }}
uses: actions/upload-artifact@v3
with:
name: unittest-results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}
path: tests/tests_pytorch/results-${{ runner.os }}-py${{ matrix.python-version }}-${{ matrix.requires }}-${{ matrix.release }}.xml
- name: Prepare Examples
if: ${{ (steps.skip.outputs.continue == '1') }}
run: |
# adjust versions according installed Torch version
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/examples.txt
pip install -r requirements/pytorch/examples.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --upgrade
- name: Run Examples
if: ${{ (steps.skip.outputs.continue == '1') }}
working-directory: ./examples
run: python -m pytest test_pl_examples.py -v --durations=10
- name: Statistics
if: ${{ (success()) && (steps.skip.outputs.continue == '1') }}
working-directory: tests/tests_pytorch
run: |
coverage report
coverage xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
if: ${{ (always()) && (steps.skip.outputs.continue == '1') }}
# see: https://github.com/actions/toolkit/issues/399
continue-on-error: true
with:
token: ${{ secrets.CODECOV_TOKEN }}
file: tests/tests_pytorch/coverage.xml
flags: cpu,pytest,python${{ matrix.python-version }}
name: CPU-coverage
fail_ci_if_error: false