update GPU to PT 1.5 (#2779)

* update gpu PT 1.6

* fix docker

* use PT 1.5

* Update tests/install_AMP.sh

Co-authored-by: Nathan Raw <nxr9266@g.rit.edu>

Co-authored-by: Nathan Raw <nxr9266@g.rit.edu>
This commit is contained in:
Jirka Borovec 2020-08-02 14:14:53 +02:00 committed by GitHub
parent a0c4365278
commit 448be60701
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 25 additions and 5 deletions

View File

@ -6,7 +6,7 @@ name: torch-GPU
steps:
- name: testing
image: pytorchlightning/pytorch_lightning:devel-pt1.4
image: pytorchlightning/pytorch_lightning:cuda-extras-py3.7-torch1.5
environment:
SLURM_LOCALID: 0

View File

@ -6,12 +6,16 @@
# --build-arg TORCH_VERSION=1.2 --build-arg CUDA_VERSION=10.0
# --build-arg TORCH_VERSION=1.1.0 --build-arg CUDA_VERSION=10.0 --build-arg CUDNN_VERSION=7.5
ARG TORCH_VERSION=1.6
ARG TORCH_VERSION=1.6.0
ARG CUDA_VERSION=10.1
ARG CUDNN_VERSION=7
# TODO: make his imagge from pure Ubuntu + install all NVIDIA drivers
# FROM nvidia/cuda:${CUDA_VERSION}-base
FROM pytorch/pytorch:${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel
SHELL ["/bin/bash", "-c"]
ENV HOROVOD_GPU_ALLREDUCE=NCCL
ENV HOROVOD_GPU_BROADCAST=NCCL
ENV HOROVOD_WITH_PYTORCH=1
@ -28,13 +32,28 @@ COPY ./requirements/extra.txt requirements-extra.txt
COPY ./requirements/test.txt requirements-tests.txt
COPY ./requirements/examples.txt requirements-examples.txt
RUN apt-get update && apt-get install -y cmake && \
# Install AMP
RUN apt-get update && \
apt-get install -y \
git \
cmake \
&& \
# Install AMP
# TODO: skip this instrall for PT >= 1.6
bash install_AMP.sh && \
# Install all requirements
pip install -r requirements.txt && \
# HOROVOD_BUILD_ARCH_FLAGS="-mfma" && \
pip install -r requirements-extra.txt && \
pip install -r requirements-examples.txt && \
pip install -r requirements-tests.txt && \
rm install_AMP.sh && \
rm requirements* && \
# Cleaning
apt-get autoremove -y && \
apt-get clean && \
rm -rf /root/.cache && \
# Show what we have
pip list

View File

@ -4,6 +4,7 @@ ROOT=$PWD
git clone https://github.com/NVIDIA/apex
cd apex
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
pip install -v --no-cache-dir ./
# If build with extensions fails, you can run this line to build without extensions
# pip install -v --no-cache-dir ./
cd $ROOT
rm -rf apex