run TPU tests with multiple versions (#3024)
* rename * multi build * multi build * copy * copy * copy * copy * copy * copy * clean * note * docker * formatting Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: William Falcon <waf2107@columbia.edu>
This commit is contained in:
parent
faa357648f
commit
ab508dae0c
|
@ -35,12 +35,8 @@ references:
|
|||
name: Build and push Docker image
|
||||
command: |
|
||||
gcloud --quiet auth configure-docker
|
||||
cd dockers/tpu-tests
|
||||
# TODO: How to find the GITHUB_REF in CircleCI?
|
||||
# $CI_PULL_REQUEST seems to be of form: https://github.com/org/repo-name/pull/11.
|
||||
# Grab the last bit, e.g. pull/11, convert to pull/11/head, and use it
|
||||
# for the GITHUB_REF so Docker can pull the latest pending code in PR.
|
||||
if [ -z "$CI_PULL_REQUEST" ]; then docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" .; else git_ref=$(echo "$CI_PULL_REQUEST" | sed "s/.*pytorch-lightning\///")/head && docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f Dockerfile --build-arg "TEST_IMAGE=1" --build-arg "GITHUB_REF=$git_ref" .; fi
|
||||
#cd dockers/tpu-tests
|
||||
docker build --tag "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID" -f ./dockers/tpu-tests/Dockerfile --build-arg "PYTHON_VERSION=$PYTHON_VER" .
|
||||
docker push "$GCR_IMAGE_PATH:$CIRCLE_WORKFLOW_JOB_ID"
|
||||
|
||||
deploy_cluster: &deploy_cluster
|
||||
|
@ -99,9 +95,13 @@ references:
|
|||
jobs:
|
||||
|
||||
TPU-tests:
|
||||
parameters:
|
||||
python:
|
||||
type: string
|
||||
docker:
|
||||
- image: circleci/python:3.7
|
||||
environment:
|
||||
- PYTHON_VER: << parameters.python >>
|
||||
- MAX_CHECKS: 240
|
||||
- CHECK_SPEEP: 5
|
||||
steps:
|
||||
|
@ -148,19 +148,14 @@ jobs:
|
|||
|
||||
workflows:
|
||||
version: 2
|
||||
build:
|
||||
tpu-tests:
|
||||
jobs:
|
||||
- build-Docs
|
||||
- TPU-tests:
|
||||
filters:
|
||||
branches:
|
||||
# https://discuss.circleci.com/t/create-separate-steps-jobs-for-pr-forks-versus-branches/13419/4
|
||||
#only:
|
||||
# # only from forks
|
||||
# - /^pull\/.\d+$/
|
||||
ignore:
|
||||
- master
|
||||
cleanup:
|
||||
matrix:
|
||||
parameters:
|
||||
python: ["3.6", "3.7"]
|
||||
tpu-cleanup:
|
||||
triggers:
|
||||
- schedule:
|
||||
# The cron format is:
|
||||
|
|
|
@ -20,6 +20,10 @@ jobs:
|
|||
setup-build-publish-deploy:
|
||||
name: tpu-testing-job
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
python-version: [3.6, 3.7]
|
||||
# Timeout: https://stackoverflow.com/a/59076067/4521646
|
||||
timeout-minutes: 50
|
||||
|
||||
|
@ -61,8 +65,8 @@ jobs:
|
|||
shell: bash
|
||||
- name: Build and Push Docker Image
|
||||
run: |
|
||||
cd dockers/tpu-tests
|
||||
docker build --tag "$IMAGE:$GITHUB_RUN_ID" -f Dockerfile --build-arg "GITHUB_REF=$GITHUB_REF" --build-arg "TEST_IMAGE=1" .
|
||||
#cd dockers/tpu-tests
|
||||
docker build --tag "$IMAGE:$GITHUB_RUN_ID" -f ./dockers/tpu-tests/Dockerfile --build-arg "PYTHON_VERSION=${{ matrix.python-version }}" .
|
||||
docker push "$IMAGE:$GITHUB_RUN_ID"
|
||||
shell: bash
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ pull_request_rules:
|
|||
# no requested chnages from any reviewer
|
||||
- "#changes-requested-reviews-by=0"
|
||||
# this serves as ALL check has to pass as we have actually around 40 tests in total
|
||||
- "#status-success>=53"
|
||||
- "#status-success>=54"
|
||||
# this is just in case since we rely on GPU tests (note: redundand to the above)
|
||||
- status-success=continuous-integration/drone/pr
|
||||
- "status-success=ci/circleci: TPU-tests"
|
||||
|
|
|
@ -82,6 +82,8 @@ Get started with our [3 steps guide](https://pytorch-lightning.readthedocs.io/en
|
|||
| System / PyTorch ver. | 1.3 (min. req.)* | 1.4 | 1.5 | 1.6 (latest) | 1.7 (nightly) |
|
||||
| :---: | :---: | :---: | :---: | :---: | :---: |
|
||||
| Conda py3.7 [linux] | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) | [![PyTorch & Conda](https://github.com/PyTorchLightning/pytorch-lightning/workflows/PyTorch%20&%20Conda/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22PyTorch+%26+Conda%22+branch%3Amaster) |
|
||||
| Linux py3.7 [GPUs**] | - | - | - | [![Build Status](http://35.192.60.23/api/badges/PyTorchLightning/pytorch-lightning/status.svg)](http://35.192.60.23/PyTorchLightning/pytorch-lightning) | - |
|
||||
| Linux py3.6 / py3.7 [TPUs***] | - | - | - | [![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22TPU+tests%22+branch%3Amaster) | - |
|
||||
| Linux py3.7 [GPUs**] | - | - |[![Build Status](http://104.154.220.231/api/badges/PyTorchLightning/pytorch-lightning/status.svg)](http://104.154.220.231/PyTorchLightning/pytorch-lightning) | - | - |
|
||||
| Linux py3.7 [TPUs***] | - | - | - | [![TPU tests](https://github.com/PyTorchLightning/pytorch-lightning/workflows/TPU%20tests/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22TPU+tests%22+branch%3Amaster) | - |
|
||||
| Linux py3.6 / py3.7 / py3.8 | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - | - | [![CI complete testing](https://github.com/PyTorchLightning/pytorch-lightning/workflows/CI%20complete%20testing/badge.svg)](https://github.com/PyTorchLightning/pytorch-lightning/actions?query=workflow%3A%22CI+testing%22) | - |
|
||||
|
@ -91,6 +93,7 @@ Get started with our [3 steps guide](https://pytorch-lightning.readthedocs.io/en
|
|||
- _\* `torch>=1.4` is the minimal pytorch version for Python 3.8_
|
||||
- _\** tests run on two NVIDIA K80_
|
||||
- _\*** tests run on Google GKE TPUv2/3_
|
||||
- _TPU w/ py3.6/py3.7 means we support Colab and Kaggle env._
|
||||
|
||||
</center>
|
||||
|
||||
|
|
|
@ -69,15 +69,13 @@ COPY ./ ./pytorch-lightning/
|
|||
RUN \
|
||||
# Install PL dependencies
|
||||
cd pytorch-lightning && \
|
||||
# drop Torch
|
||||
# drop Torch as it was installed with XLA
|
||||
python -c "fname = \"./requirements/base.txt\" ; lines = [line for line in open(fname).readlines() if not line.startswith(\"torch\")] ; open(fname, \"w\").writelines(lines)" && \
|
||||
pip install --requirement ./requirements/base.txt --upgrade-strategy only-if-needed && \
|
||||
# drop Horovod
|
||||
# drop Horovod as it is not needed
|
||||
python -c "fname = \"./requirements/extra.txt\" ; lines = [line for line in open(fname).readlines() if not line.startswith(\"horovod\")] ; open(fname, \"w\").writelines(lines)" && \
|
||||
pip install --requirement ./requirements/extra.txt --upgrade-strategy only-if-needed && \
|
||||
# drop TorchVision
|
||||
# drop TorchVision as it was installed with XLA
|
||||
python -c "fname = \"./requirements/examples.txt\" ; lines = [line for line in open(fname).readlines() if not line.startswith(\"torchvision\")] ; open(fname, \"w\").writelines(lines)" && \
|
||||
pip install --requirement ./requirements/examples.txt --upgrade-strategy only-if-needed && \
|
||||
pip install --requirement ./requirements/devel.txt --upgrade-strategy only-if-needed && \
|
||||
cd .. && \
|
||||
rm -rf pytorch-lightning && \
|
||||
rm -rf /root/.cache
|
||||
|
|
|
@ -1,38 +1,23 @@
|
|||
ARG PYTHON_VERSION=3.7
|
||||
ARG PYTORCH_VERSION=1.6
|
||||
|
||||
FROM pytorchlightning/pytorch_lightning:XLA-extras-py${PYTHON_VERSION}
|
||||
|
||||
# Build args.
|
||||
ARG GITHUB_REF=refs/heads/master
|
||||
ARG TEST_IMAGE=0
|
||||
|
||||
# This Dockerfile installs pytorch/xla 3.7 wheels. There are also 3.6 wheels available; see below.
|
||||
FROM pytorchlightning/pytorch_lightning:base-xla-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}
|
||||
|
||||
#SHELL ["/bin/bash", "-c"]
|
||||
|
||||
# Install pytorch-lightning at the current PR, plus dependencies.
|
||||
RUN git clone https://github.com/PyTorchLightning/pytorch-lightning.git && \
|
||||
cd pytorch-lightning && \
|
||||
echo $GITHUB_REF && \
|
||||
git fetch origin $GITHUB_REF:CI && \
|
||||
git checkout CI && \
|
||||
pip install --requirement ./requirements/base.txt --no-cache-dir
|
||||
COPY ./ ./pytorch-lightning/
|
||||
|
||||
# If using this image for tests, intall more dependencies and don"t delete
|
||||
# the source code where the tests live.
|
||||
# If using this image for tests, intall more dependencies and don"t delete the source code where the tests live.
|
||||
RUN \
|
||||
# Install pytorch-lightning at the current PR, plus dependencies.
|
||||
#pip install -r pytorch-lightning/requirements/base.txt --no-cache-dir && \
|
||||
# drop Horovod
|
||||
#python -c "fname = 'pytorch-lightning/requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \
|
||||
#pip install --requirement pytorch-lightning/requirements/extra.txt --no-cache-dir && \
|
||||
if [ $TEST_IMAGE -eq 1 ] ; then \
|
||||
pip install --requirement pytorch-lightning/requirements/test.txt --no-cache-dir ; \
|
||||
else \
|
||||
rm -rf pytorch-lightning ; \
|
||||
fi
|
||||
pip install -r pytorch-lightning/requirements/devel.txt --no-cache-dir --upgrade-strategy only-if-needed
|
||||
|
||||
#RUN python -c "import pytorch_lightning as pl; print(pl.__version__)"
|
||||
|
||||
COPY docker-entrypoint.sh /usr/local/bin/
|
||||
COPY ./dockers/tpu-tests/docker-entrypoint.sh /usr/local/bin/
|
||||
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
|
||||
|
|
Loading…
Reference in New Issue