test dockers & add AMP in pt-1.6 (#1584)

* exist images

* names

* images

* args

* pt 1.6 dev

* circleci

* update

* refactor

* build

* fix

* MKL
This commit is contained in:
Jirka Borovec 2020-07-31 14:23:13 +02:00 committed by GitHub
parent a6719f09f0
commit bc7a08fbe0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 73 additions and 33 deletions

View File

@ -6,12 +6,13 @@ name: torch-GPU
steps:
- name: testing
image: pytorchlightning/pytorch_lightning:devel-pt1.4
image: pytorchlightning/pytorch_lightning:cuda-extras-py3.7-torch1.5
environment:
SLURM_LOCALID: 0
CODECOV_TOKEN:
from_secret: codecov_token
MKL_THREADING_LAYER: GNU
HOROVOD_GPU_ALLREDUCE: NCCL
HOROVOD_GPU_BROADCAST: NCCL
HOROVOD_WITH_PYTORCH: 1
@ -33,10 +34,10 @@ steps:
- nvidia-smi
#- bash ./tests/install_AMP.sh
- apt-get update && apt-get install -y cmake
- pip install -r ./requirements/base.txt --user -q
- pip install -r ./requirements/devel.txt --user -q
- pip install -r ./requirements/base.txt --user -q --upgrade-strategy only-if-needed
- pip install -r ./requirements/devel.txt --user -q --upgrade-strategy only-if-needed
#- pip install -r ./requirements/docs.txt --user -q
- pip install -r ./requirements/examples.txt --user -q
- pip install -r ./requirements/examples.txt --user -q --upgrade-strategy only-if-needed
- pip list
- python -c "import torch ; print(' & '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) if torch.cuda.is_available() else 'only CPU')"
- coverage run --source pytorch_lightning -m py.test pytorch_lightning tests -v --durations=25 # --flake8

View File

@ -82,3 +82,29 @@ jobs:
build_args: PYTHON_VERSION=${{ matrix.python_version }}
tags: "XLA-extras-py${{ matrix.python_version }}"
timeout-minutes: 25
build-cuda:
runs-on: ubuntu-20.04
strategy:
matrix:
python_version: [3.7]
pytorch_version: [1.3, 1.4, 1.5, 1.6.0]
steps:
- name: Checkout
uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Publish Master to Docker
# publish master
uses: docker/build-push-action@v1.1.0
if: github.event_name == 'push'
with:
repository: pytorchlightning/pytorch_lightning
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
dockerfile: dockers/tpu-extras/Dockerfile
build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }}
tags: "cuda-extras-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}"
timeout-minutes: 40

View File

@ -12,7 +12,7 @@ or with specific arguments
```bash
git clone <git-repository>
docker image build \
-t pytorch-lightning:py38 \
-t pytorch-lightning:py3.8 \
-f dockers/conda/Dockerfile \
--build-arg PYTHON_VERSION=3.8 \
--build-arg PYTORCH_VERSION=1.4 \

View File

@ -0,0 +1,40 @@
# Existing images:
# --build-arg TORCH_VERSION=1.6.0 --build-arg CUDA_VERSION=10.1
# --build-arg TORCH_VERSION=1.5 --build-arg CUDA_VERSION=10.1
# --build-arg TORCH_VERSION=1.4 --build-arg CUDA_VERSION=10.1
# --build-arg TORCH_VERSION=1.3 --build-arg CUDA_VERSION=10.1
# --build-arg TORCH_VERSION=1.2 --build-arg CUDA_VERSION=10.0
# --build-arg TORCH_VERSION=1.1.0 --build-arg CUDA_VERSION=10.0 --build-arg CUDNN_VERSION=7.5
ARG TORCH_VERSION=1.6
ARG CUDA_VERSION=10.1
ARG CUDNN_VERSION=7
FROM pytorch/pytorch:${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel
ENV HOROVOD_GPU_ALLREDUCE=NCCL
ENV HOROVOD_GPU_BROADCAST=NCCL
ENV HOROVOD_WITH_PYTORCH=1
ENV HOROVOD_WITHOUT_TENSORFLOW=1
ENV HOROVOD_WITHOUT_MXNET=1
ENV HOROVOD_WITH_GLOO=1
ENV HOROVOD_WITHOUT_MPI=1
ENV PATH="$PATH:/root/.local/bin"
ENV MAKEFLAGS="-j$(nproc)"
COPY ./tests/install_AMP.sh install_AMP.sh
COPY ./requirements/base.txt requirements.txt
COPY ./requirements/extra.txt requirements-extra.txt
COPY ./requirements/test.txt requirements-tests.txt
COPY ./requirements/examples.txt requirements-examples.txt
RUN apt-get update && apt-get install -y cmake && \
# Install AMP
bash install_AMP.sh && \
pip install -r requirements.txt && \
# HOROVOD_BUILD_ARCH_FLAGS="-mfma" && \
pip install -r requirements-extra.txt && \
pip install -r requirements-examples.txt && \
pip install -r requirements-tests.txt && \
rm requirements* && \
pip list

View File

@ -1,27 +0,0 @@
ARG TORCH_VERSION=1.4
ARG CUDA_VERSION=10.1
FROM pytorch/pytorch:${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn7-devel
ENV HOROVOD_GPU_ALLREDUCE: NCCL
ENV HOROVOD_GPU_BROADCAST: NCCL
ENV HOROVOD_WITH_PYTORCH: 1
ENV HOROVOD_WITHOUT_TENSORFLOW: 1
ENV HOROVOD_WITHOUT_MXNET: 1
ENV HOROVOD_WITH_GLOO: 1
ENV HOROVOD_WITHOUT_MPI: 1
ENV PATH: "$PATH:/root/.local/bin"
ENV MAKEFLAGS: "-j$(nproc)"
COPY ./tests/install_AMP.sh install_AMP.sh
COPY ./requirements/base.txt requirements.txt
COPY ./requirements/extra.txt requirements-extra.txt
COPY ./requirements/test.txt requirements-tests.txt
# Install AMP
RUN apt-get update && apt-get install -y cmake && \
bash install_AMP.sh && \
pip install -r requirements.txt --user && \
pip install -r requirements-extra.txt --user && \
pip install -r requirements-tests.txt --user && \
pip list

View File

@ -54,7 +54,7 @@ coverage xml
You can build it on your own, note it takes lots of time, be prepared.
```bash
git clone <git-repository>
docker image build -t pytorch_lightning:devel-torch1.4 -f tests/Dockerfile --build-arg TORCH_VERSION=1.4 .
docker image build -t pytorch_lightning:devel-torch1.4 -f dockers/cuda-extras/Dockerfile --build-arg TORCH_VERSION=1.4 .
```
To build other versions, select different Dockerfile.
```bash