2020-10-02 09:26:21 +00:00
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
2022-05-06 02:56:57 +00:00
ARG UBUNTU_VERSION = 20 .04
2022-11-12 14:58:37 +00:00
ARG CUDA_VERSION = 11 .6.1
2020-07-31 12:23:13 +00:00
2022-05-06 02:56:57 +00:00
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
2020-09-17 18:30:39 +00:00
2021-11-04 17:26:24 +00:00
ARG PYTHON_VERSION = 3 .9
2022-11-12 14:58:37 +00:00
ARG PYTORCH_VERSION = 1 .13
2020-07-31 12:23:13 +00:00
2020-08-02 12:14:53 +00:00
SHELL [ "/bin/bash" , "-c" ]
2020-10-26 10:47:09 +00:00
# https://techoverflow.net/2019/05/18/how-to-fix-configuring-tzdata-interactive-input-when-building-docker-images/
2021-01-29 12:27:18 +00:00
ENV \
DEBIAN_FRONTEND = noninteractive \
TZ = Europe/Prague \
PATH = " $PATH :/root/.local/bin " \
2021-02-10 09:43:26 +00:00
CUDA_TOOLKIT_ROOT_DIR = "/usr/local/cuda" \
2022-03-10 16:01:08 +00:00
TORCH_CUDA_ARCH_LIST = "3.7;5.0;6.0;7.0;7.5;8.0" \
2021-07-14 15:04:33 +00:00
MKL_THREADING_LAYER = GNU \
2022-03-12 09:00:20 +00:00
# MAKEFLAGS="-j$(nproc)"
MAKEFLAGS = "-j2"
2020-07-31 12:23:13 +00:00
2022-05-02 07:00:44 +00:00
RUN \
# TODO: Remove the manual key installation once the base image is updated.
# https://github.com/NVIDIA/nvidia-docker/issues/1631
2022-11-12 14:58:37 +00:00
# https://github.com/NVIDIA/nvidia-docker/issues/1631#issuecomment-1264715214
apt-get update && apt-get install -y wget && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
mkdir -p /etc/apt/keyrings/ && mv 3bf863cc.pub /etc/apt/keyrings/ && \
echo "deb [signed-by=/etc/apt/keyrings/3bf863cc.pub] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/ /" /etc/apt/sources.list.d/cuda.list && \
apt-get update && \
2022-05-02 07:00:44 +00:00
apt-get update -qq --fix-missing && \
2022-08-25 17:30:06 +00:00
NCCL_VER = $( dpkg -s libnccl2 | grep '^Version:' | awk -F ' ' '{print $2}' | awk -F '-' '{print $1}' | grep -ve '^\s*$' ) && \
CUDA_VERSION_MM = " ${ CUDA_VERSION %.* } " && \
MAX_ALLOWED_NCCL = 2.11.4 && \
TO_INSTALL_NCCL = $( echo -e " $MAX_ALLOWED_NCCL \n $NCCL_VER " | sort -V | head -n1) -1+cuda${ CUDA_VERSION_MM } && \
apt-get install -y --no-install-recommends --allow-downgrades --allow-change-held-packages \
2020-09-22 23:41:35 +00:00
build-essential \
2020-10-26 10:47:09 +00:00
pkg-config \
2020-09-22 23:41:35 +00:00
cmake \
git \
2020-10-26 10:47:09 +00:00
wget \
2021-01-08 15:36:49 +00:00
curl \
unzip \
2020-09-22 23:41:35 +00:00
ca-certificates \
2020-10-26 10:47:09 +00:00
software-properties-common \
2021-02-17 12:15:49 +00:00
libopenmpi-dev \
2022-05-06 02:56:57 +00:00
openmpi-bin \
ssh \
2022-10-11 11:59:09 +00:00
ninja-build \
2022-08-25 17:30:06 +00:00
libnccl2 = $TO_INSTALL_NCCL \
libnccl-dev= $TO_INSTALL_NCCL && \
2022-10-11 11:59:09 +00:00
# Install python
2020-10-26 10:47:09 +00:00
add-apt-repository ppa:deadsnakes/ppa && \
apt-get install -y \
python${ PYTHON_VERSION } \
python${ PYTHON_VERSION } -distutils \
python${ PYTHON_VERSION } -dev \
2020-08-02 12:14:53 +00:00
&& \
2020-10-26 10:47:09 +00:00
update-alternatives --install /usr/bin/python${ PYTHON_VERSION %%.* } python${ PYTHON_VERSION %%.* } /usr/bin/python${ PYTHON_VERSION } 1 && \
update-alternatives --install /usr/bin/python python /usr/bin/python${ PYTHON_VERSION } 1 && \
2022-10-11 11:59:09 +00:00
# Cleaning
2020-08-02 12:14:53 +00:00
apt-get autoremove -y && \
apt-get clean && \
rm -rf /root/.cache && \
2020-09-17 18:30:39 +00:00
rm -rf /var/lib/apt/lists/*
2022-06-21 15:11:33 +00:00
COPY ./requirements/pytorch/ ./requirements/pytorch/
2022-03-12 09:00:20 +00:00
COPY ./.actions/assistant.py assistant.py
2020-09-22 23:41:35 +00:00
2022-02-16 20:15:44 +00:00
ENV PYTHONPATH = /usr/lib/python${ PYTHON_VERSION } /site-packages
2020-09-17 18:30:39 +00:00
RUN \
2020-10-26 10:47:09 +00:00
wget https://bootstrap.pypa.io/get-pip.py --progress= bar:force:noscroll --no-check-certificate && \
python${ PYTHON_VERSION } get-pip.py && \
rm get-pip.py && \
2022-03-12 09:00:20 +00:00
pip install -q fire && \
2022-03-11 09:20:47 +00:00
# Disable cache \
2022-10-11 11:59:09 +00:00
export CUDA_VERSION_MM = $( python -c " print(''.join(' $CUDA_VERSION '.split('.')[:2])) " ) && \
2020-09-30 12:33:22 +00:00
pip config set global.cache-dir false && \
2020-10-26 10:47:09 +00:00
# set particular PyTorch version
2022-06-21 15:11:33 +00:00
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/base.txt ${ PYTORCH_VERSION } && \
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/extra.txt ${ PYTORCH_VERSION } && \
python ./requirements/pytorch/adjust-versions.py requirements/pytorch/examples.txt ${ PYTORCH_VERSION } && \
2022-10-11 11:59:09 +00:00
# Install base requirements \
pip install -r requirements/pytorch/base.txt --no-cache-dir --find-links https://download.pytorch.org/whl/cu${ CUDA_VERSION_MM } /torch_stable.html && \
2022-03-12 09:00:20 +00:00
rm assistant.py
2022-03-10 16:01:08 +00:00
2022-03-12 09:00:20 +00:00
ENV \
HOROVOD_CUDA_HOME = $CUDA_TOOLKIT_ROOT_DIR \
HOROVOD_GPU_OPERATIONS = NCCL \
HOROVOD_WITH_PYTORCH = 1 \
HOROVOD_WITHOUT_TENSORFLOW = 1 \
HOROVOD_WITHOUT_MXNET = 1 \
HOROVOD_WITH_GLOO = 1 \
2022-03-18 14:23:19 +00:00
HOROVOD_WITH_MPI = 1
2022-03-12 09:00:20 +00:00
2022-03-10 16:01:08 +00:00
RUN \
2022-05-06 02:56:57 +00:00
# CUDA 10.2 doesn't support ampere architecture (8.0).
if [ [ " $CUDA_VERSION " < "11.0" ] ] ; then export TORCH_CUDA_ARCH_LIST = ${ TORCH_CUDA_ARCH_LIST // ";8.0" / } ; echo $TORCH_CUDA_ARCH_LIST ; fi && \
2022-03-18 14:23:19 +00:00
HOROVOD_BUILD_CUDA_CC_LIST = ${ TORCH_CUDA_ARCH_LIST // ";" / "," } && \
export HOROVOD_BUILD_CUDA_CC_LIST = ${ HOROVOD_BUILD_CUDA_CC_LIST // "." / "" } && \
2022-05-06 02:56:57 +00:00
echo $HOROVOD_BUILD_CUDA_CC_LIST && \
2022-03-18 14:23:19 +00:00
cmake --version && \
2022-10-11 11:59:09 +00:00
pip install --no-cache-dir horovod && \
2022-05-12 13:14:18 +00:00
horovodrun --check-build
2020-10-26 10:47:09 +00:00
2020-11-12 14:03:43 +00:00
RUN \
2022-02-10 07:48:33 +00:00
CUDA_VERSION_MAJOR = $( python -c "import torch; print(torch.version.cuda.split('.')[0])" ) && \
2021-07-14 15:04:33 +00:00
py_ver = $( python -c " print(int(' $PYTHON_VERSION '.split('.') >= '3.9'.split('.'))) " ) && \
2020-11-12 14:03:43 +00:00
# install DALI, needed for examples
2021-07-14 15:04:33 +00:00
# todo: waiting for 1.4 - https://github.com/NVIDIA/DALI/issues/3144#issuecomment-877386691
if [ $py_ver -eq "0" ] ; then \
pip install --extra-index-url https://developer.download.nvidia.com/compute/redist " nvidia-dali-cuda ${ CUDA_VERSION_MAJOR } 0>1.0 " ; \
python -c 'from nvidia.dali.pipeline import Pipeline' ; \
fi
2020-10-26 10:47:09 +00:00
RUN \
2022-05-06 02:56:57 +00:00
# CUDA 10.2 doesn't support ampere architecture (8.0).
if [ [ " $CUDA_VERSION " < "11.0" ] ] ; then export TORCH_CUDA_ARCH_LIST = ${ TORCH_CUDA_ARCH_LIST // ";8.0" / } ; echo $TORCH_CUDA_ARCH_LIST ; fi && \
2021-04-21 22:58:55 +00:00
# install NVIDIA apex
2022-02-16 20:15:44 +00:00
pip install -v --disable-pip-version-check --no-cache-dir --global-option= "--cpp_ext" --global-option= "--cuda_ext" https://github.com/NVIDIA/apex/archive/refs/heads/master.zip && \
2021-07-14 15:04:33 +00:00
python -c "from apex import amp"
2020-09-30 12:33:22 +00:00
2022-04-11 15:29:54 +00:00
RUN \
# install Bagua
2022-11-12 14:58:37 +00:00
if [ [ $PYTORCH_VERSION != "1.13" ] ] ; then \
CUDA_VERSION_MM = $( python -c " print(''.join(' $CUDA_VERSION '.split('.')[:2])) " ) ; \
CUDA_VERSION_BAGUA = $( python -c " print([ver for ver in [116,113,111,102] if $CUDA_VERSION_MM >= ver][0]) " ) ; \
pip install " bagua-cuda $CUDA_VERSION_BAGUA " ; \
if [ [ " $CUDA_VERSION_MM " = " $CUDA_VERSION_BAGUA " ] ] ; then \
python -c "import bagua_core; bagua_core.install_deps()" ; \
fi ; \
python -c "import bagua; print(bagua.__version__)" ; \
fi
2022-04-11 15:29:54 +00:00
2022-10-11 11:59:09 +00:00
RUN \
# install ColossalAI
2022-11-12 14:58:37 +00:00
# TODO: 1.13 wheels are not released, remove skip once they are
if [ [ $PYTORCH_VERSION != "1.13" ] ] ; then \
PYTORCH_VERSION_COLOSSALAI = $( python -c "import torch; print(torch.__version__.split('+')[0][:4])" ) ; \
CUDA_VERSION_MM_COLOSSALAI = $( python -c "import torch ; print(''.join(map(str, torch.version.cuda)))" ) ; \
CUDA_VERSION_COLOSSALAI = $( python -c " print([ver for ver in [11.3, 11.1] if $CUDA_VERSION_MM_COLOSSALAI >= ver][0]) " ) ; \
pip install " colossalai==0.1.10+torch ${ PYTORCH_VERSION_COLOSSALAI } cu ${ CUDA_VERSION_COLOSSALAI } " --find-links https://release.colossalai.org ; \
python -c "import colossalai; print(colossalai.__version__)" ; \
fi
2022-10-11 11:59:09 +00:00
RUN \
# install rest of strategies
# remove colossalai from requirements since they are installed separately
2022-11-10 13:59:13 +00:00
python -c "fname = 'requirements/pytorch/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'colossalai' not in line] ; open(fname, 'w').writelines(lines)" ; \
2022-11-12 14:58:37 +00:00
python -c "fname = 'requirements/pytorch/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" ; \
2022-10-11 11:59:09 +00:00
cat requirements/pytorch/strategies.txt && \
pip install -r requirements/pytorch/devel.txt -r requirements/pytorch/strategies.txt --no-cache-dir --find-links https://download.pytorch.org/whl/cu${ CUDA_VERSION_MM } /torch_stable.html
2022-06-21 15:11:33 +00:00
COPY requirements/pytorch/check-avail-extras.py check-avail-extras.py
COPY requirements/pytorch/check-avail-strategies.py check-avail-strategies.py
2022-04-11 15:29:54 +00:00
2020-09-30 12:33:22 +00:00
RUN \
2020-09-17 18:30:39 +00:00
# Show what we have
2020-08-15 19:39:44 +00:00
pip --version && \
2020-09-30 12:33:22 +00:00
pip list && \
2021-07-14 15:04:33 +00:00
python -c " import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == ' $PYTHON_VERSION ', ver " && \
2022-02-10 07:48:33 +00:00
python -c " import torch; assert torch.__version__.startswith(' $PYTORCH_VERSION '), torch.__version__ " && \
2022-06-21 15:11:33 +00:00
python requirements/pytorch/check-avail-extras.py && \
2022-05-12 13:14:18 +00:00
rm -rf requirements/