CI: fix running PT 1.11 (#12304)
* fix fire * horovod * assistant * cmake * u20 * cuda * -j2 * fix mypy Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com>
This commit is contained in:
parent
90a9da5abb
commit
7ee690758c
|
@ -95,16 +95,15 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
# the config used in '.github/workflows/ci_test-conda.yml'
|
||||
python_version: ["3.8"]
|
||||
pytorch_version: ["1.8", "1.9", "1.10"]
|
||||
include:
|
||||
# see: https://pytorch.org/get-started/previous-versions/
|
||||
- {python_version: "3.8", pytorch_version: "1.8", cuda_version: "11.1"}
|
||||
- {python_version: "3.8", pytorch_version: "1.9", cuda_version: "11.1"}
|
||||
- {python_version: "3.8", pytorch_version: "1.10", cuda_version: "11.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
- run: |
|
||||
cuda=$(python -c "from distutils.version import LooseVersion as LVer ; print(11.1 if LVer('${{matrix.pytorch_version}}') > LVer('1.7') else 10.2)" 2>&1)
|
||||
echo "::set-output name=CUDA::$cuda"
|
||||
id: extend
|
||||
- name: Build Conda Docker
|
||||
# publish master/release
|
||||
uses: docker/build-push-action@v2
|
||||
|
@ -112,7 +111,7 @@ jobs:
|
|||
build-args: |
|
||||
PYTHON_VERSION=${{ matrix.python_version }}
|
||||
PYTORCH_VERSION=${{ matrix.pytorch_version }}
|
||||
CUDA_VERSION=${{ steps.extend.outputs.CUDA }}
|
||||
CUDA_VERSION=${{ matrix.cuda_version }}
|
||||
file: dockers/base-conda/Dockerfile
|
||||
push: false
|
||||
timeout-minutes: 75
|
||||
|
|
|
@ -33,14 +33,19 @@ jobs:
|
|||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Update dependencies
|
||||
env:
|
||||
HOROVOD_BUILD_ARCH_FLAGS: "-mfma"
|
||||
HOROVOD_WITHOUT_MXNET: 1
|
||||
HOROVOD_WITHOUT_TENSORFLOW: 1
|
||||
run: |
|
||||
set -e
|
||||
conda info
|
||||
conda list
|
||||
# adjust versions according installed Torch version
|
||||
python ./requirements/adjust-versions.py requirements/extra.txt
|
||||
python ./requirements/adjust-versions.py requirements/examples.txt
|
||||
pip install --requirement requirements/devel.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
# set a per-test timeout of 2.5 minutes to fail sooner. this aids with hanging tests
|
||||
pip install -r requirements/devel.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
# set a per-test timeout of 2.5 minutes to fail sooner; this aids with hanging tests
|
||||
pip install pytest-timeout
|
||||
pip list
|
||||
# sanity check
|
||||
|
|
|
@ -93,7 +93,7 @@ jobs:
|
|||
# the config used in '.azure-pipelines/gpu-tests.yml'
|
||||
- {python_version: "3.7", pytorch_version: "1.8"}
|
||||
# latest (not used)
|
||||
- {python_version: "3.9", pytorch_version: "1.10"}
|
||||
- {python_version: "3.9", pytorch_version: "1.11"}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
|
|
|
@ -14,9 +14,9 @@ or with specific arguments
|
|||
```bash
|
||||
git clone <git-repository>
|
||||
docker image build \
|
||||
-t pytorch-lightning:base-cuda-py3.9-pt1.8 \
|
||||
-t pytorch-lightning:base-cuda-py3.7-pt1.8 \
|
||||
-f dockers/base-cuda/Dockerfile \
|
||||
--build-arg PYTHON_VERSION=3.9 \
|
||||
--build-arg PYTHON_VERSION=3.7 \
|
||||
--build-arg PYTORCH_VERSION=1.8 \
|
||||
.
|
||||
```
|
||||
|
@ -26,10 +26,10 @@ or nightly version from Conda
|
|||
```bash
|
||||
git clone <git-repository>
|
||||
docker image build \
|
||||
-t pytorch-lightning:base-conda-py3.8-pt1.9 \
|
||||
-t pytorch-lightning:base-conda-py3.9-pt1.11 \
|
||||
-f dockers/base-conda/Dockerfile \
|
||||
--build-arg PYTHON_VERSION=3.8 \
|
||||
--build-arg PYTORCH_VERSION=1.9 \
|
||||
--build-arg PYTHON_VERSION=3.9 \
|
||||
--build-arg PYTORCH_VERSION=1.11 \
|
||||
.
|
||||
```
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
ARG CUDA_VERSION=11.3.1
|
||||
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu18.04
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
|
||||
|
||||
ARG PYTHON_VERSION=3.9
|
||||
ARG PYTORCH_VERSION=1.8
|
||||
|
@ -59,8 +59,8 @@ ENV \
|
|||
LD_LIBRARY_PATH="/root/miniconda3/lib:$LD_LIBRARY_PATH" \
|
||||
CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \
|
||||
MKL_THREADING_LAYER=GNU \
|
||||
MAKEFLAGS="-j$(nproc)" \
|
||||
# MAKEFLAGS="-j1" \
|
||||
# MAKEFLAGS="-j$(nproc)" \
|
||||
MAKEFLAGS="-j2" \
|
||||
TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0;7.5;8.0" \
|
||||
CONDA_ENV=lightning
|
||||
|
||||
|
@ -84,9 +84,9 @@ ENV \
|
|||
PATH=/root/miniconda3/envs/${CONDA_ENV}/bin:$PATH \
|
||||
LD_LIBRARY_PATH="/root/miniconda3/envs/${CONDA_ENV}/lib:$LD_LIBRARY_PATH"
|
||||
|
||||
COPY ./requirements.txt requirements.txt
|
||||
COPY ./requirements/extra.txt requirements-extra.txt
|
||||
COPY ./requirements/examples.txt requirements-examples.txt
|
||||
COPY ./requirements/test.txt requirements-test.txt
|
||||
COPY ./requirements/adjust-versions.py requirements_adjust_versions.py
|
||||
COPY ./.actions/assistant.py assistant.py
|
||||
|
||||
|
@ -95,14 +95,25 @@ RUN \
|
|||
python -c "import torch; print(torch.__version__)" && \
|
||||
python requirements_adjust_versions.py requirements-extra.txt && \
|
||||
python -c "print(' '.join([ln for ln in open('requirements-extra.txt').readlines() if 'horovod' in ln]))" > requirements_horovod.txt && \
|
||||
pip install -q fire && \
|
||||
python assistant.py requirements_prune_pkgs requirements-extra.txt "horovod" && \
|
||||
python requirements_adjust_versions.py requirements-examples.txt && \
|
||||
# Install remaining requirements
|
||||
pip install -r requirements.txt --no-cache-dir --find-links https://download.pytorch.org/whl/test/torch_test.html && \
|
||||
pip install -r requirements-extra.txt --no-cache-dir --find-links https://download.pytorch.org/whl/test/torch_test.html && \
|
||||
pip install -r requirements-examples.txt --no-cache-dir --find-links https://download.pytorch.org/whl/test/torch_test.html && \
|
||||
pip install -r requirements-test.txt --no-cache-dir && \
|
||||
rm assistant.py
|
||||
|
||||
RUN \
|
||||
apt-get purge -y cmake && \
|
||||
wget -q https://github.com/Kitware/CMake/releases/download/v3.20.2/cmake-3.20.2.tar.gz && \
|
||||
tar -zxvf cmake-3.20.2.tar.gz && \
|
||||
cd cmake-3.20.2 && \
|
||||
./bootstrap -- -DCMAKE_USE_OPENSSL=OFF && \
|
||||
make && \
|
||||
make install && \
|
||||
cmake --version
|
||||
|
||||
ENV \
|
||||
# if you want this environment to be the default o \ne, uncomment the following line:
|
||||
CONDA_DEFAULT_ENV=${CONDA_ENV} \
|
||||
|
|
|
@ -12,9 +12,9 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG CUDA_VERSION=10.2
|
||||
ARG CUDA_VERSION=11.1
|
||||
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu18.04
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04
|
||||
|
||||
ARG PYTHON_VERSION=3.9
|
||||
ARG PYTORCH_VERSION=1.8
|
||||
|
@ -28,8 +28,8 @@ ENV \
|
|||
CUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda" \
|
||||
TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0;7.5;8.0" \
|
||||
MKL_THREADING_LAYER=GNU \
|
||||
MAKEFLAGS="-j$(nproc)"
|
||||
# MAKEFLAGS="-j1"
|
||||
# MAKEFLAGS="-j$(nproc)"
|
||||
MAKEFLAGS="-j2"
|
||||
|
||||
RUN apt-get update -qq --fix-missing && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
|
@ -64,7 +64,7 @@ RUN apt-get update -qq --fix-missing && \
|
|||
|
||||
COPY ./requirements.txt requirements.txt
|
||||
COPY ./requirements/ ./requirements/
|
||||
COPY ./.github/prune-packages.py requirements/prune_packages.py
|
||||
COPY ./.actions/assistant.py assistant.py
|
||||
|
||||
ENV PYTHONPATH=/usr/lib/python${PYTHON_VERSION}/site-packages
|
||||
|
||||
|
@ -73,27 +73,21 @@ RUN \
|
|||
python${PYTHON_VERSION} get-pip.py && \
|
||||
rm get-pip.py && \
|
||||
|
||||
pip install -q fire && \
|
||||
# Disable cache \
|
||||
export BAGUA_CUDA_VERSION=${CUDA_VERSION//"."/""} && \
|
||||
CUDA_VERSION_MM=$(python -c "print(''.join('$CUDA_VERSION'.split('.')[:2]))") && \
|
||||
export BAGUA_CUDA_VERSION=$CUDA_VERSION_MM && \
|
||||
pip config set global.cache-dir false && \
|
||||
# set particular PyTorch version
|
||||
python ./requirements/adjust-versions.py requirements.txt ${PYTORCH_VERSION} && \
|
||||
python ./requirements/adjust-versions.py requirements/extra.txt ${PYTORCH_VERSION} && \
|
||||
python ./requirements/adjust-versions.py requirements/examples.txt ${PYTORCH_VERSION} && \
|
||||
python -c "print(' '.join([ln for ln in open('requirements/extra.txt').readlines() if 'horovod' in ln]))" > ./requirements/horovod.txt && \
|
||||
python requirements/prune_packages.py requirements/extra.txt "horovod" && \
|
||||
# Install all requirements
|
||||
pip install -r requirements/devel.txt --no-cache-dir && \
|
||||
rm -rf requirements.*
|
||||
|
||||
ENV \
|
||||
HOROVOD_CUDA_HOME=$CUDA_TOOLKIT_ROOT_DIR \
|
||||
HOROVOD_GPU_OPERATIONS=NCCL \
|
||||
HOROVOD_WITH_PYTORCH=1 \
|
||||
HOROVOD_WITHOUT_TENSORFLOW=1 \
|
||||
HOROVOD_WITHOUT_MXNET=1 \
|
||||
HOROVOD_WITH_GLOO=1 \
|
||||
HOROVOD_WITHOUT_MPI=1
|
||||
python assistant.py requirements_prune_pkgs requirements/examples.txt "horovod" && \
|
||||
# Install all requirements \
|
||||
pip install -r requirements/devel.txt --no-cache-dir --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html && \
|
||||
rm -rf requirements.* && \
|
||||
rm assistant.py
|
||||
|
||||
RUN \
|
||||
apt-get purge -y cmake && \
|
||||
|
@ -105,6 +99,15 @@ RUN \
|
|||
make install && \
|
||||
cmake --version
|
||||
|
||||
ENV \
|
||||
HOROVOD_CUDA_HOME=$CUDA_TOOLKIT_ROOT_DIR \
|
||||
HOROVOD_GPU_OPERATIONS=NCCL \
|
||||
HOROVOD_WITH_PYTORCH=1 \
|
||||
HOROVOD_WITHOUT_TENSORFLOW=1 \
|
||||
HOROVOD_WITHOUT_MXNET=1 \
|
||||
HOROVOD_WITH_GLOO=1 \
|
||||
HOROVOD_WITHOUT_MPI=1
|
||||
|
||||
RUN \
|
||||
HOROVOD_BUILD_CUDA_CC_LIST=${TORCH_CUDA_ARCH_LIST//";"/","} && \
|
||||
export HOROVOD_BUILD_CUDA_CC_LIST=${HOROVOD_BUILD_CUDA_CC_LIST//"."/""} && \
|
||||
|
|
Loading…
Reference in New Issue