update GPU to PT 1.5 (#2779)
* update gpu PT 1.6 * fix docker * use PT 1.5 * Update tests/install_AMP.sh Co-authored-by: Nathan Raw <nxr9266@g.rit.edu> Co-authored-by: Nathan Raw <nxr9266@g.rit.edu>
This commit is contained in:
parent
a0c4365278
commit
448be60701
|
@ -6,7 +6,7 @@ name: torch-GPU
|
|||
|
||||
steps:
|
||||
- name: testing
|
||||
image: pytorchlightning/pytorch_lightning:devel-pt1.4
|
||||
image: pytorchlightning/pytorch_lightning:cuda-extras-py3.7-torch1.5
|
||||
|
||||
environment:
|
||||
SLURM_LOCALID: 0
|
||||
|
|
|
@ -6,12 +6,16 @@
|
|||
# --build-arg TORCH_VERSION=1.2 --build-arg CUDA_VERSION=10.0
|
||||
# --build-arg TORCH_VERSION=1.1.0 --build-arg CUDA_VERSION=10.0 --build-arg CUDNN_VERSION=7.5
|
||||
|
||||
ARG TORCH_VERSION=1.6
|
||||
ARG TORCH_VERSION=1.6.0
|
||||
ARG CUDA_VERSION=10.1
|
||||
ARG CUDNN_VERSION=7
|
||||
|
||||
# TODO: make his imagge from pure Ubuntu + install all NVIDIA drivers
|
||||
# FROM nvidia/cuda:${CUDA_VERSION}-base
|
||||
FROM pytorch/pytorch:${TORCH_VERSION}-cuda${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
ENV HOROVOD_GPU_ALLREDUCE=NCCL
|
||||
ENV HOROVOD_GPU_BROADCAST=NCCL
|
||||
ENV HOROVOD_WITH_PYTORCH=1
|
||||
|
@ -28,13 +32,28 @@ COPY ./requirements/extra.txt requirements-extra.txt
|
|||
COPY ./requirements/test.txt requirements-tests.txt
|
||||
COPY ./requirements/examples.txt requirements-examples.txt
|
||||
|
||||
RUN apt-get update && apt-get install -y cmake && \
|
||||
# Install AMP
|
||||
RUN apt-get update && \
|
||||
apt-get install -y \
|
||||
git \
|
||||
cmake \
|
||||
&& \
|
||||
|
||||
# Install AMP
|
||||
# TODO: skip this instrall for PT >= 1.6
|
||||
bash install_AMP.sh && \
|
||||
# Install all requirements
|
||||
pip install -r requirements.txt && \
|
||||
# HOROVOD_BUILD_ARCH_FLAGS="-mfma" && \
|
||||
pip install -r requirements-extra.txt && \
|
||||
pip install -r requirements-examples.txt && \
|
||||
pip install -r requirements-tests.txt && \
|
||||
rm install_AMP.sh && \
|
||||
rm requirements* && \
|
||||
|
||||
# Cleaning
|
||||
apt-get autoremove -y && \
|
||||
apt-get clean && \
|
||||
rm -rf /root/.cache && \
|
||||
|
||||
# Show what we have
|
||||
pip list
|
||||
|
|
|
@ -4,6 +4,7 @@ ROOT=$PWD
|
|||
git clone https://github.com/NVIDIA/apex
|
||||
cd apex
|
||||
pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./
|
||||
pip install -v --no-cache-dir ./
|
||||
# If build with extensions fails, you can run this line to build without extensions
|
||||
# pip install -v --no-cache-dir ./
|
||||
cd $ROOT
|
||||
rm -rf apex
|
||||
|
|
Loading…
Reference in New Issue