diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml index 76dcd1ba31..32edda1d9d 100644 --- a/.github/workflows/docker-builds.yml +++ b/.github/workflows/docker-builds.yml @@ -64,7 +64,8 @@ jobs: strategy: fail-fast: false matrix: - python_version: [3.7] # TODO: add 3.6, for now the Google docker image has missing 3.6 + python_version: [3.6, 3.7] + xla_version: [1.6, "nightly"] steps: - name: Checkout uses: actions/checkout@v2 @@ -80,10 +81,10 @@ jobs: repository: pytorchlightning/pytorch_lightning username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - dockerfile: dockers/tpu-extras/Dockerfile - build_args: PYTHON_VERSION=${{ matrix.python_version }} - tags: "XLA-extras-py${{ matrix.python_version }}" - timeout-minutes: 25 + dockerfile: dockers/base-xla/Dockerfile + build_args: PYTHON_VERSION=${{ matrix.python_version }},XLA_VERSION=${{ matrix.xla_version }} + tags: "base-xla-py${{ matrix.python_version }}-torch${{ matrix.xla_version }}" + timeout-minutes: 35 build-cuda: runs-on: ubuntu-20.04 @@ -107,7 +108,7 @@ jobs: repository: pytorchlightning/pytorch_lightning username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PASSWORD }} - dockerfile: dockers/tpu-extras/Dockerfile + dockerfile: dockers/base-cuda/Dockerfile build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }} - tags: "cuda-extras-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}" + tags: "base-cuda-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}" timeout-minutes: 40 diff --git a/dockers/cuda-extras/Dockerfile b/dockers/base-cuda/Dockerfile similarity index 96% rename from dockers/cuda-extras/Dockerfile rename to dockers/base-cuda/Dockerfile index f3c435ff53..fb0a00f6e3 100644 --- a/dockers/cuda-extras/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -45,7 +45,7 @@ RUN apt-get update && \ # HOROVOD_BUILD_ARCH_FLAGS="-mfma" && \ pip install -r requirements-extra.txt && \ pip install -r requirements-examples.txt && \ - pip install -r requirements-tests.txt && \ + #pip install -r requirements-tests.txt && \ rm install_AMP.sh && \ rm requirements* && \ @@ -55,4 +55,5 @@ RUN apt-get update && \ rm -rf /root/.cache && \ # Show what we have + pip --version && \ pip list diff --git a/dockers/base-xla/Dockerfile b/dockers/base-xla/Dockerfile new file mode 100644 index 0000000000..1c9f052f2d --- /dev/null +++ b/dockers/base-xla/Dockerfile @@ -0,0 +1,87 @@ +FROM google/cloud-sdk:slim + +# CALL: docker image build -t pytorch-lightning:XLA-extras-py3.6 -f dockers/base-xla/Dockerfile . +# This Dockerfile installs pytorch/xla 3.7 wheels. There are also 3.6 wheels available; see below. +ARG PYTHON_VERSION=3.7 +ARG XLA_VERSION="1.6" + +SHELL ["/bin/bash", "-c"] + +# for skipping configurations +ENV DEBIAN_FRONTEND=noninteractive +ENV CONDA_ENV=pytorch-xla + +# show system inforation +RUN lsb_release -a && cat /etc/*-release + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + cmake \ + wget \ + curl \ + unzip \ + ca-certificates \ + libomp5 \ + && \ + +# Install conda and python. +# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385 + curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh && \ + chmod +x ~/miniconda.sh && \ + ~/miniconda.sh -b && \ + rm ~/miniconda.sh && \ + +# Cleaning + apt-get autoremove -y && \ + apt-get clean && \ + rm -rf /root/.cache + +ENV PATH="/root/miniconda3/bin:$PATH" +ENV LD_LIBRARY_PATH="/root/miniconda3/lib:$LD_LIBRARY_PATH" + +RUN conda create -y --name $CONDA_ENV python=$PYTHON_VERSION && \ + conda init bash && \ + conda install -y python=$PYTHON_VERSION mkl && \ + +# Disable cache + pip config set global.cache-dir false && \ + pip install "pip>20.1" -U && \ + +# Install Pytorch XLA + py_version=${PYTHON_VERSION/./} && \ + # Python 3.7 wheels are available. Replace cp36-cp36m with cp37-cp37m + gsutil cp "gs://tpu-pytorch/wheels/torch-${XLA_VERSION}-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ + gsutil cp "gs://tpu-pytorch/wheels/torch_xla-${XLA_VERSION}-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ + gsutil cp "gs://tpu-pytorch/wheels/torchvision-${XLA_VERSION}-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ + pip install *.whl && \ + rm *.whl + +ENV LD_LIBRARY_PATH="/root/miniconda3/envs/$CONDA_ENV/lib:$LD_LIBRARY_PATH" +# if you want this environment to be the default one, uncomment the following line: +ENV CONDA_DEFAULT_ENV=${CONDA_ENV} + +# Install pytorch-lightning dependencies. +RUN \ +# Get package + wget https://github.com/PyTorchLightning/pytorch-lightning/archive/master.zip --progress=bar:force:noscroll && \ + unzip -q master.zip && \ + rm master.zip && \ + +# Install PL dependencies + cd pytorch-lightning-master && \ + # drop Torch + python -c "fname = \"./requirements/base.txt\" ; lines = [line for line in open(fname).readlines() if not line.startswith(\"torch\")] ; open(fname, \"w\").writelines(lines)" && \ + pip install --requirement ./requirements/base.txt && \ + # drop Horovod + python -c "fname = \"./requirements/extra.txt\" ; lines = [line for line in open(fname).readlines() if not line.startswith(\"horovod\")] ; open(fname, \"w\").writelines(lines)" && \ + pip install --requirement ./requirements/extra.txt && \ + # drop TorchVision + python -c "fname = \"./requirements/examples.txt\" ; lines = [line for line in open(fname).readlines() if not line.startswith(\"torchvision\")] ; open(fname, \"w\").writelines(lines)" && \ + pip install --requirement ./requirements/examples.txt && \ + cd .. && \ + rm -rf pytorch-lightning-master && \ + rm -rf /root/.cache + +RUN pip --version && \ + python -c "import torch; print(torch.__version__)" diff --git a/dockers/conda/Dockerfile b/dockers/conda/Dockerfile index 1cd5d821a7..ae4cc6f341 100644 --- a/dockers/conda/Dockerfile +++ b/dockers/conda/Dockerfile @@ -5,30 +5,52 @@ FROM nvidia/cuda:${CUDA_VERSION}-base ARG PYTHON_VERSION=3.7 ARG PYTORCH_VERSION=1.4 ARG LIGHTNING_VERSION=master +# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385 +ARG CONDA_VERSION=4.7.12 -RUN apt-get update && apt-get install -y --no-install-recommends \ +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ build-essential \ cmake \ git \ curl \ - ca-certificates + ca-certificates \ + && \ + +# Cleaning + apt-get autoremove -y && \ + apt-get clean && \ + rm -rf /root/.cache # add non-root user RUN useradd --create-home --shell /bin/bash containeruser + USER containeruser WORKDIR /home/containeruser - - +ENV CONDA_ENV=lightning + # install conda and python -RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh && \ +RUN curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh && \ chmod +x ~/miniconda.sh && \ - ~/miniconda.sh -b -p /home/containeruser/conda && \ - rm ~/miniconda.sh && \ - /home/containeruser/conda/bin/conda clean -ya && \ - /home/containeruser/conda/bin/conda install -y python=$PYTHON_VERSION - + ~/miniconda.sh -b -p /home/containeruser/miniconda && \ + rm ~/miniconda.sh + # add conda to path -ENV PATH /home/containeruser/conda/bin:$PATH +ENV PATH="/home/containeruser/miniconda/bin:$PATH" +ENV LD_LIBRARY_PATH="/home/containeruser/miniconda/lib:$LD_LIBRARY_PATH" + +# conda init +RUN conda create -y --name $CONDA_ENV python=$PYTHON_VERSION && \ + conda init bash && \ + conda install -y python=$PYTHON_VERSION && \ + +# Disable cache + pip config set global.cache-dir false && \ + pip install "pip>20.1" -U + +ENV LD_LIBRARY_PATH="/home/containeruser/miniconda/envs/$CONDA_ENV/lib:$LD_LIBRARY_PATH" +# if you want this environment to be the default one, uncomment the following line: +ENV CONDA_DEFAULT_ENV=${CONDA_ENV} # install dependencies RUN pip install torch==$PYTORCH_VERSION -f https://download.pytorch.org/whl/torch_stable.html && \ @@ -37,6 +59,8 @@ RUN pip install torch==$PYTORCH_VERSION -f https://download.pytorch.org/whl/tor pip install -r pytorch-lightning/requirements/extra.txt && \ rm -rf pytorch-lightning -RUN python -c "import pytorch_lightning as pl; print(pl.__version__)" +RUN python --version && \ + pip --version && \ + python -c "import pytorch_lightning as pl; print(pl.__version__)" CMD ["/bin/bash"] diff --git a/dockers/tpu-extras/Dockerfile b/dockers/tpu-extras/Dockerfile deleted file mode 100644 index 03f27c0c9b..0000000000 --- a/dockers/tpu-extras/Dockerfile +++ /dev/null @@ -1,71 +0,0 @@ -FROM google/cloud-sdk:slim - -# This Dockerfile installs pytorch/xla 3.7 wheels. There are also 3.6 wheels available; see below. -ARG PYTHON_VERSION=3.7 - -SHELL ["/bin/bash", "-c"] - -RUN apt-get update && apt-get install -y --no-install-recommends \ - python${PYTHON_VERSION} \ - python${PYTHON_VERSION}-dev \ - $( [ ${PYTHON_VERSION%%.*} -ge 3 ] && echo "python${PYTHON_VERSION%%.*}-distutils" ) \ - build-essential \ - cmake \ - wget \ - unzip \ - ca-certificates \ - libomp5 \ - && \ - -# Install python dependencies - wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \ - python${PYTHON_VERSION} get-pip.py && \ - rm get-pip.py && \ - -# Set the default python and install PIP packages - update-alternatives --install /usr/bin/python${PYTHON_VERSION%%.*} python${PYTHON_VERSION%%.*} /usr/bin/python${PYTHON_VERSION} 1 && \ - update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 && \ - -# Disable cache - pip config set global.cache-dir false && \ - pip install "pip>20.1" -U && \ - -# Install Pytorch XLA - py_version=${PYTHON_VERSION/./} && \ - # Python 3.7 wheels are available. Replace cp36-cp36m with cp37-cp37m - gsutil cp "gs://tpu-pytorch/wheels/torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ - gsutil cp "gs://tpu-pytorch/wheels/torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ - gsutil cp "gs://tpu-pytorch/wheels/torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \ - pip install "torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - pip install "torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - pip install "torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - rm "torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - rm "torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - rm "torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \ - pip install mkl && \ - -# Cleaning - apt-get autoremove -y && \ - apt-get clean && \ - rm -rf /root/.cache - -# Install pytorch-lightning dependencies. -RUN \ -# Get package - wget https://github.com/PyTorchLightning/pytorch-lightning/archive/master.zip --progress=bar:force:noscroll && \ - unzip master.zip && \ - rm master.zip && \ - -# Install PL dependencies - cd pytorch-lightning-master && \ - pip install --requirement ./requirements/base.txt && \ - # Drop Horovod - python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \ - pip install --requirement ./requirements/extra.txt && \ - cd .. && \ - rm -rf pytorch-lightning-master && \ - rm -rf /root/.cache - -ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" - -RUN python -c "import torch; print(torch.__version__)"