Fix horovod installation `base-cuda` Dockerfile (#11811)

* pip install --user

* add checks

* rm unrelated comment

* consistent format

* Fail if horovod not found

Co-authored-by: Jirka <jirka.borovec@seznam.cz>
Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
This commit is contained in:
Aki Nitta 2022-02-10 16:48:33 +09:00 committed by GitHub
parent 8d23f6287a
commit 0a1b8b880d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 7 deletions

View File

@ -129,4 +129,5 @@ RUN \
conda info && \
pip list && \
python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \
python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__"
python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \
python -c "import horovod.torch"

View File

@ -75,7 +75,6 @@ ENV \
COPY ./requirements.txt requirements.txt
COPY ./requirements/ ./requirements/
# conda init
RUN \
wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \
python${PYTHON_VERSION} get-pip.py && \
@ -88,11 +87,11 @@ RUN \
python ./requirements/adjust_versions.py requirements/extra.txt ${PYTORCH_VERSION} && \
python ./requirements/adjust_versions.py requirements/examples.txt ${PYTORCH_VERSION} && \
# Install all requirements
pip install -r requirements/devel.txt --no-cache-dir && \
pip install --user -r requirements/devel.txt --no-cache-dir && \
rm -rf requirements.* requirements/
RUN \
CUDA_VERSION_MAJOR=$(python -c "import torch ; print(torch.version.cuda.split('.')[0])") && \
CUDA_VERSION_MAJOR=$(python -c "import torch; print(torch.version.cuda.split('.')[0])") && \
py_ver=$(python -c "print(int('$PYTHON_VERSION'.split('.') >= '3.9'.split('.')))") && \
# install DALI, needed for examples
# todo: waiting for 1.4 - https://github.com/NVIDIA/DALI/issues/3144#issuecomment-877386691
@ -108,15 +107,18 @@ RUN \
RUN \
# install FairScale
pip install fairscale==0.4.0
pip install fairscale==0.4.0 && \
python -c "import fairscale; print(fairscale.__version__)"
RUN \
# install DeepSpeed
pip install deepspeed==0.5.7
pip install deepspeed==0.5.7 && \
python -c "import deepspeed; print(deepspeed.__version__)"
RUN \
# Show what we have
pip --version && \
pip list && \
python -c "import sys; ver = sys.version_info ; assert f'{ver.major}.{ver.minor}' == '$PYTHON_VERSION', ver" && \
python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__"
python -c "import torch; assert torch.__version__.startswith('$PYTORCH_VERSION'), torch.__version__" && \
python -c "import horovod.torch"