2020-10-26 10:47:09 +00:00
|
|
|
# Copyright The PyTorch Lightning team.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
|
2022-03-10 18:38:47 +00:00
|
|
|
ARG CUDA_VERSION=11.3.1
|
2020-10-26 10:47:09 +00:00
|
|
|
|
2021-07-14 15:04:33 +00:00
|
|
|
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu18.04
|
2020-10-26 10:47:09 +00:00
|
|
|
|
2022-03-10 18:38:47 +00:00
|
|
|
ARG PYTHON_VERSION=3.9
|
|
|
|
ARG PYTORCH_VERSION=1.8
|
|
|
|
ARG CONDA_VERSION=4.11.0
|
2020-10-26 10:47:09 +00:00
|
|
|
|
|
|
|
SHELL ["/bin/bash", "-c"]
|
2021-07-14 15:04:33 +00:00
|
|
|
# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/
|
|
|
|
ENV \
|
|
|
|
PATH="$PATH:/root/.local/bin" \
|
|
|
|
DEBIAN_FRONTEND=noninteractive \
|
|
|
|
TZ=Europe/Prague \
|
|
|
|
# CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \
|
|
|
|
MKL_THREADING_LAYER=GNU
|
2020-10-26 10:47:09 +00:00
|
|
|
|
2021-07-14 15:04:33 +00:00
|
|
|
RUN apt-get update -qq --fix-missing && \
|
2020-11-09 14:48:24 +00:00
|
|
|
apt-get install -y --no-install-recommends \
|
2020-10-26 10:47:09 +00:00
|
|
|
build-essential \
|
|
|
|
cmake \
|
|
|
|
git \
|
2021-01-08 15:36:49 +00:00
|
|
|
wget \
|
2020-10-26 10:47:09 +00:00
|
|
|
curl \
|
2021-01-08 15:36:49 +00:00
|
|
|
unzip \
|
2020-10-26 10:47:09 +00:00
|
|
|
ca-certificates \
|
2021-02-17 12:15:49 +00:00
|
|
|
libopenmpi-dev \
|
2020-10-26 10:47:09 +00:00
|
|
|
&& \
|
|
|
|
|
|
|
|
# Install conda and python.
|
|
|
|
# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385
|
2021-02-09 08:22:35 +00:00
|
|
|
curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_${CONDA_VERSION}-Linux-x86_64.sh && \
|
2020-10-26 10:47:09 +00:00
|
|
|
chmod +x ~/miniconda.sh && \
|
|
|
|
~/miniconda.sh -b && \
|
|
|
|
rm ~/miniconda.sh && \
|
|
|
|
|
|
|
|
# Cleaning
|
|
|
|
apt-get autoremove -y && \
|
|
|
|
apt-get clean && \
|
|
|
|
rm -rf /root/.cache && \
|
|
|
|
rm -rf /var/lib/apt/lists/*
|
|
|
|
|
2021-01-29 12:27:18 +00:00
|
|
|
ENV \
|
|
|
|
PATH="/root/miniconda3/bin:$PATH" \
|
|
|
|
LD_LIBRARY_PATH="/root/miniconda3/lib:$LD_LIBRARY_PATH" \
|
|
|
|
CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \
|
2021-02-09 15:30:48 +00:00
|
|
|
MKL_THREADING_LAYER=GNU \
|
2021-05-19 19:48:14 +00:00
|
|
|
MAKEFLAGS="-j$(nproc)" \
|
|
|
|
# MAKEFLAGS="-j1" \
|
2022-03-10 16:01:08 +00:00
|
|
|
TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0;7.5;8.0" \
|
2021-01-29 12:27:18 +00:00
|
|
|
CONDA_ENV=lightning
|
|
|
|
|
2020-10-26 10:47:09 +00:00
|
|
|
COPY environment.yml environment.yml
|
|
|
|
|
|
|
|
# conda init
|
2021-07-14 15:04:33 +00:00
|
|
|
RUN conda update -n base -c defaults conda && \
|
2022-03-10 18:38:47 +00:00
|
|
|
conda install mamba -n base -c conda-forge && \
|
|
|
|
mamba create -y --name $CONDA_ENV python=${PYTHON_VERSION} pytorch=${PYTORCH_VERSION} torchvision torchtext cudatoolkit=${CUDA_VERSION} -c nvidia -c pytorch -c pytorch-test -c pytorch-nightly && \
|
2020-10-26 10:47:09 +00:00
|
|
|
conda init bash && \
|
2022-03-10 18:38:47 +00:00
|
|
|
# NOTE: this requires that the channel is presented in the yaml before packages \
|
|
|
|
printf "import re;\nfname = 'environment.yml';\nreq = open(fname).read();\nfor n in ['python', 'pytorch', 'torchtext', 'torchvision']:\n req = re.sub(rf'- {n}[>=]+', f'# - {n}=', req);\nopen(fname, 'w').write(req)" > prune.py && \
|
|
|
|
python prune.py && \
|
|
|
|
rm prune.py && \
|
2020-10-26 10:47:09 +00:00
|
|
|
cat environment.yml && \
|
2022-03-10 18:38:47 +00:00
|
|
|
mamba env update --name $CONDA_ENV --file environment.yml && \
|
|
|
|
mamba clean -ya && \
|
2020-10-26 10:47:09 +00:00
|
|
|
rm environment.yml
|
|
|
|
|
2021-01-29 12:27:18 +00:00
|
|
|
ENV \
|
2021-07-14 15:04:33 +00:00
|
|
|
PATH=/root/miniconda3/envs/${CONDA_ENV}/bin:$PATH \
|
2022-03-10 16:01:08 +00:00
|
|
|
LD_LIBRARY_PATH="/root/miniconda3/envs/${CONDA_ENV}/lib:$LD_LIBRARY_PATH"
|
2020-10-26 10:47:09 +00:00
|
|
|
|
|
|
|
COPY ./requirements/extra.txt requirements-extra.txt
|
2021-07-23 17:24:31 +00:00
|
|
|
COPY ./requirements/examples.txt requirements-examples.txt
|
2020-10-26 10:47:09 +00:00
|
|
|
COPY ./requirements/test.txt requirements-test.txt
|
2022-03-11 09:20:47 +00:00
|
|
|
COPY ./requirements/adjust-versions.py requirements_adjust_versions.py
|
2022-03-10 16:01:08 +00:00
|
|
|
COPY ./.github/prune-packages.py requirements_prune_packages.py
|
2020-10-26 10:47:09 +00:00
|
|
|
|
|
|
|
RUN \
|
2021-02-09 08:22:35 +00:00
|
|
|
pip list | grep torch && \
|
|
|
|
python -c "import torch; print(torch.__version__)" && \
|
2021-03-06 13:34:54 +00:00
|
|
|
python requirements_adjust_versions.py requirements-extra.txt && \
|
2022-03-10 16:01:08 +00:00
|
|
|
python -c "print(' '.join([ln for ln in open('requirements-extra.txt').readlines() if 'horovod' in ln]))" > requirements_horovod.txt && \
|
|
|
|
python requirements_prune_packages.py requirements-extra.txt "horovod" && \
|
2021-07-23 17:24:31 +00:00
|
|
|
python requirements_adjust_versions.py requirements-examples.txt && \
|
2020-10-26 10:47:09 +00:00
|
|
|
# Install remaining requirements
|
2022-03-10 18:38:47 +00:00
|
|
|
pip install -r requirements-extra.txt --no-cache-dir --find-links https://download.pytorch.org/whl/test/torch_test.html && \
|
2022-03-10 16:01:08 +00:00
|
|
|
pip install -r requirements-examples.txt --no-cache-dir --find-links https://download.pytorch.org/whl/test/torch_test.html && \
|
|
|
|
pip install -r requirements-test.txt --no-cache-dir
|
|
|
|
|
|
|
|
ENV \
|
|
|
|
# if you want this environment to be the default o \ne, uncomment the following line:
|
|
|
|
CONDA_DEFAULT_ENV=${CONDA_ENV} \
|
|
|
|
HOROVOD_CUDA_HOME=$CUDA_TOOLKIT_ROOT_DIR \
|
|
|
|
HOROVOD_GPU_OPERATIONS=NCCL \
|
|
|
|
HOROVOD_WITH_PYTORCH=1 \
|
|
|
|
HOROVOD_WITHOUT_TENSORFLOW=1 \
|
|
|
|
HOROVOD_WITHOUT_MXNET=1 \
|
|
|
|
HOROVOD_WITH_GLOO=1 \
|
|
|
|
HOROVOD_WITHOUT_MPI=1
|
|
|
|
|
|
|
|
RUN \
|
|
|
|
HOROVOD_BUILD_CUDA_CC_LIST=${TORCH_CUDA_ARCH_LIST//";"/","} && \
|
|
|
|
export HOROVOD_BUILD_CUDA_CC_LIST=${HOROVOD_BUILD_CUDA_CC_LIST//"."/""} && \
|
|
|
|
cat requirements_horovod.txt && \
|
|
|
|
pip install --no-cache-dir -r requirements_horovod.txt && \
|
|
|
|
rm requirements*
|
2020-10-26 10:47:09 +00:00
|
|
|
|
2020-11-12 14:03:43 +00:00
|
|
|
RUN \
|
2021-07-14 15:04:33 +00:00
|
|
|
CUDA_VERSION_MAJOR=$(python -c "import torch ; print(torch.version.cuda.split('.')[0])") && \
|
|
|
|
py_ver=$(python -c "print(int('$PYTHON_VERSION'.split('.') >= '3.9'.split('.')))") && \
|
2020-11-12 14:03:43 +00:00
|
|
|
# install DALI, needed for examples
|
2021-07-14 15:04:33 +00:00
|
|
|
# todo: waiting for 1.4 - https://github.com/NVIDIA/DALI/issues/3144#issuecomment-877386691
|
|
|
|
if [ $py_ver -eq "0" ]; then \
|
|
|
|
pip install --extra-index-url https://developer.download.nvidia.com/compute/redist "nvidia-dali-cuda${CUDA_VERSION_MAJOR}0>1.0" ; \
|
|
|
|
python -c 'from nvidia.dali.pipeline import Pipeline' ; \
|
|
|
|
fi
|
2020-11-12 14:03:43 +00:00
|
|
|
|
2020-10-26 10:47:09 +00:00
|
|
|
RUN \
|
2021-04-21 22:58:55 +00:00
|
|
|
# install NVIDIA apex
|
2021-07-14 15:04:33 +00:00
|
|
|
pip install --no-cache-dir --global-option="--cuda_ext" https://github.com/NVIDIA/apex/archive/refs/heads/master.zip && \
|
|
|
|
python -c "from apex import amp"
|
2020-10-26 10:47:09 +00:00
|
|
|
|
|
|
|
RUN \
|
|
|
|
# Show what we have
|
|
|
|
pip --version && \
|
|
|
|
conda info && \
|
|
|
|
pip list && \
|
2021-07-14 15:04:33 +00:00
|
|
|
python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \
|
2022-02-10 07:48:33 +00:00
|
|
|
python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \
|
|
|
|
python -c "import horovod.torch"
|