test dockers & add AMP in pt-1.6 (#1584)
* exist images * names * images * args * pt 1.6 dev * circleci * update * refactor * build * fix * MKL
This commit is contained in:
parent
a6719f09f0
commit
bc7a08fbe0
|
@ -6,12 +6,13 @@ name: torch-GPU
|
|||
|
||||
steps:
|
||||
- name: testing
|
||||
image: pytorchlightning/pytorch_lightning:devel-pt1.4
|
||||
image: pytorchlightning/pytorch_lightning:cuda-extras-py3.7-torch1.5
|
||||
|
||||
environment:
|
||||
SLURM_LOCALID: 0
|
||||
CODECOV_TOKEN:
|
||||
from_secret: codecov_token
|
||||
MKL_THREADING_LAYER: GNU
|
||||
HOROVOD_GPU_ALLREDUCE: NCCL
|
||||
HOROVOD_GPU_BROADCAST: NCCL
|
||||
HOROVOD_WITH_PYTORCH: 1
|
||||
|
@ -33,10 +34,10 @@ steps:
|
|||
- nvidia-smi
|
||||
#- bash ./tests/install_AMP.sh
|
||||
- apt-get update && apt-get install -y cmake
|
||||
- pip install -r ./requirements/base.txt --user -q
|
||||
- pip install -r ./requirements/devel.txt --user -q
|
||||
- pip install -r ./requirements/base.txt --user -q --upgrade-strategy only-if-needed
|
||||
- pip install -r ./requirements/devel.txt --user -q --upgrade-strategy only-if-needed
|
||||
#- pip install -r ./requirements/docs.txt --user -q
|
||||
- pip install -r ./requirements/examples.txt --user -q
|
||||
- pip install -r ./requirements/examples.txt --user -q --upgrade-strategy only-if-needed
|
||||
- pip list
|
||||
- python -c "import torch ; print(' & '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) if torch.cuda.is_available() else 'only CPU')"
|
||||
- coverage run --source pytorch_lightning -m py.test pytorch_lightning tests -v --durations=25 # --flake8
|
||||
|
|
|
@ -82,3 +82,29 @@ jobs:
|
|||
build_args: PYTHON_VERSION=${{ matrix.python_version }}
|
||||
tags: "XLA-extras-py${{ matrix.python_version }}"
|
||||
timeout-minutes: 25
|
||||
|
||||
build-cuda:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
python_version: [3.7]
|
||||
pytorch_version: [1.3, 1.4, 1.5, 1.6.0]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.7
|
||||
|
||||
- name: Publish Master to Docker
|
||||
# publish master
|
||||
uses: docker/build-push-action@v1.1.0
|
||||
if: github.event_name == 'push'
|
||||
with:
|
||||
repository: pytorchlightning/pytorch_lightning
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
dockerfile: dockers/tpu-extras/Dockerfile
|
||||
build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }}
|
||||
tags: "cuda-extras-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}"
|
||||
timeout-minutes: 40
|
||||
|
|
|
@ -12,7 +12,7 @@ or with specific arguments
|
|||
```bash
|
||||
git clone <git-repository>
|
||||
docker image build \
|
||||
-t pytorch-lightning:py38 \
|
||||
-t pytorch-lightning:py3.8 \
|
||||
-f dockers/conda/Dockerfile \
|
||||
--build-arg PYTHON_VERSION=3.8 \
|
||||
--build-arg PYTORCH_VERSION=1.4 \
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
# Existing images:
|
||||
# --build-arg TORCH_VERSION=1.6.0 --build-arg CUDA_VERSION=10.1
|
||||
# --build-arg TORCH_VERSION=1.5 --build-arg CUDA_VERSION=10.1
|
||||
# --build-arg TORCH_VERSION=1.4 --build-arg CUDA_VERSION=10.1
|
||||
# --build-arg TORCH_VERSION=1.3 --build-arg CUDA_VERSION=10.1
|
||||
# --build-arg TORCH_VERSION=1.2 --build-arg CUDA_VERSION=10.0
|
||||
# --build-arg TORCH_VERSION=1.1.0 --build-arg CUDA_VERSION=10.0 --build-arg CUDNN_VERSION=7.5
|
||||
|
||||
ARG TORCH_VERSION=1.6
|
||||
ARG CUDA_VERSION=10.1
|
||||
ARG CUDNN_VERSION=7
|
||||
|
||||
FROM pytorch/pytorch:${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel
|
||||
|
||||
ENV HOROVOD_GPU_ALLREDUCE=NCCL
|
||||
ENV HOROVOD_GPU_BROADCAST=NCCL
|
||||
ENV HOROVOD_WITH_PYTORCH=1
|
||||
ENV HOROVOD_WITHOUT_TENSORFLOW=1
|
||||
ENV HOROVOD_WITHOUT_MXNET=1
|
||||
ENV HOROVOD_WITH_GLOO=1
|
||||
ENV HOROVOD_WITHOUT_MPI=1
|
||||
ENV PATH="$PATH:/root/.local/bin"
|
||||
ENV MAKEFLAGS="-j$(nproc)"
|
||||
|
||||
COPY ./tests/install_AMP.sh install_AMP.sh
|
||||
COPY ./requirements/base.txt requirements.txt
|
||||
COPY ./requirements/extra.txt requirements-extra.txt
|
||||
COPY ./requirements/test.txt requirements-tests.txt
|
||||
COPY ./requirements/examples.txt requirements-examples.txt
|
||||
|
||||
RUN apt-get update && apt-get install -y cmake && \
|
||||
# Install AMP
|
||||
bash install_AMP.sh && \
|
||||
pip install -r requirements.txt && \
|
||||
# HOROVOD_BUILD_ARCH_FLAGS="-mfma" && \
|
||||
pip install -r requirements-extra.txt && \
|
||||
pip install -r requirements-examples.txt && \
|
||||
pip install -r requirements-tests.txt && \
|
||||
rm requirements* && \
|
||||
pip list
|
|
@ -1,27 +0,0 @@
|
|||
ARG TORCH_VERSION=1.4
|
||||
ARG CUDA_VERSION=10.1
|
||||
|
||||
FROM pytorch/pytorch:${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn7-devel
|
||||
|
||||
ENV HOROVOD_GPU_ALLREDUCE: NCCL
|
||||
ENV HOROVOD_GPU_BROADCAST: NCCL
|
||||
ENV HOROVOD_WITH_PYTORCH: 1
|
||||
ENV HOROVOD_WITHOUT_TENSORFLOW: 1
|
||||
ENV HOROVOD_WITHOUT_MXNET: 1
|
||||
ENV HOROVOD_WITH_GLOO: 1
|
||||
ENV HOROVOD_WITHOUT_MPI: 1
|
||||
ENV PATH: "$PATH:/root/.local/bin"
|
||||
ENV MAKEFLAGS: "-j$(nproc)"
|
||||
|
||||
COPY ./tests/install_AMP.sh install_AMP.sh
|
||||
COPY ./requirements/base.txt requirements.txt
|
||||
COPY ./requirements/extra.txt requirements-extra.txt
|
||||
COPY ./requirements/test.txt requirements-tests.txt
|
||||
|
||||
# Install AMP
|
||||
RUN apt-get update && apt-get install -y cmake && \
|
||||
bash install_AMP.sh && \
|
||||
pip install -r requirements.txt --user && \
|
||||
pip install -r requirements-extra.txt --user && \
|
||||
pip install -r requirements-tests.txt --user && \
|
||||
pip list
|
|
@ -54,7 +54,7 @@ coverage xml
|
|||
You can build it on your own, note it takes lots of time, be prepared.
|
||||
```bash
|
||||
git clone <git-repository>
|
||||
docker image build -t pytorch_lightning:devel-torch1.4 -f tests/Dockerfile --build-arg TORCH_VERSION=1.4 .
|
||||
docker image build -t pytorch_lightning:devel-torch1.4 -f dockers/cuda-extras/Dockerfile --build-arg TORCH_VERSION=1.4 .
|
||||
```
|
||||
To build other versions, select different Dockerfile.
|
||||
```bash
|
||||
|
|
Loading…
Reference in New Issue