Add base IPU dockerfiles (#7252)

This commit is contained in:
Louis Taylor 2021-05-07 13:07:29 +01:00 committed by GitHub
parent 1a27c12b26
commit 2b7e65b747
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 309 additions and 0 deletions

View File

@ -149,3 +149,36 @@ jobs:
file: dockers/nvidia/Dockerfile
push: false
timeout-minutes: 50
build-ipu:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
include:
- python_version: 3.8
pytorch_version: 1.7
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Build IPU Docker
uses: docker/build-push-action@v2
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
file: dockers/base-ipu/Dockerfile
push: false
tags: pytorchlightning/pytorch_lightning:base-ipu-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
timeout-minutes: 50
- name: Build IPU CI runner Docker
uses: docker/build-push-action@v2
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
file: dockers/ipu-ci-runner/Dockerfile
push: false
timeout-minutes: 50

View File

@ -152,3 +152,49 @@ jobs:
push: true
tags: nvcr.io/pytorchlightning/pytorch_lightning:nvidia
timeout-minutes: 55
docker-ipu:
runs-on: ubuntu-20.04
strategy:
fail-fast: false
matrix:
include:
- python_version: 3.8
pytorch_version: 1.7
steps:
- name: Checkout
uses: actions/checkout@v2
# https://github.com/docker/setup-buildx-action
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
- uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Publish IPU base to Docker Hub
# publish master/release
uses: docker/build-push-action@v2
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
file: dockers/base-ipu/Dockerfile
push: true
tags: pytorchlightning/pytorch_lightning:base-ipu-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
timeout-minutes: 55
- name: Publish IPU CI runner to Docker Hub
# publish master/release
uses: docker/build-push-action@v2
with:
build-args: |
PYTHON_VERSION=${{ matrix.python_version }}
PYTORCH_VERSION=${{ matrix.pytorch_version }}
file: dockers/ipu-ci-runner/Dockerfile
push: true
tags: pytorchlightning/pytorch_lightning:ipu-ci-runner-py${{ matrix.python_version }}
timeout-minutes: 55

View File

@ -0,0 +1,99 @@
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM ubuntu:20.04
MAINTAINER PyTorchLightning <https://github.com/PyTorchLightning>
ARG PYTHON_VERSION=3.8
ARG PYTORCH_VERSION=1.7
ARG CONDA_VERSION=4.9.2
SHELL ["/bin/bash", "-c"]
# for skipping configurations
ENV \
DEBIAN_FRONTEND=noninteractive \
CONDA_ENV=lightning
RUN apt-get update -qq && \
apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
cmake \
curl \
git \
jq \
libomp5 \
libopenmpi-dev \
unzip \
wget \
&& \
# Install conda and python.
# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385
curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_${CONDA_VERSION}-Linux-x86_64.sh && \
chmod +x ~/miniconda.sh && \
~/miniconda.sh -b && \
rm ~/miniconda.sh && \
# Cleaning
apt-get autoremove -y && \
apt-get clean && \
rm -rf /root/.cache && \
rm -rf /var/lib/apt/lists/*
ENV \
PATH="/root/miniconda3/bin:$PATH" \
LD_LIBRARY_PATH="/root/miniconda3/lib:$LD_LIBRARY_PATH"
COPY environment.yml environment.yml
RUN conda create -y --name $CONDA_ENV python=${PYTHON_VERSION} pytorch=${PYTORCH_VERSION} cudatoolkit=${CUDA_VERSION} -c pytorch -c pytorch-test -c pytorch-nightly && \
conda init bash && \
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'python>=[\d\.]+', 'python=${PYTHON_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- pytorch[>=]+[\d\.]+', '# - pytorch=${PYTORCH_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if not any(n in ln for n in ['pytorch>', 'horovod'])])" && \
cat environment.yml && \
conda env update --file environment.yml && \
conda clean -ya && \
rm environment.yml
ENV \
PATH=/root/miniconda3/envs/${CONDA_ENV}/bin:$PATH \
LD_LIBRARY_PATH="/root/miniconda3/envs/${CONDA_ENV}/lib:$LD_LIBRARY_PATH" \
# if you want this environment to be the default one, uncomment the following line:
CONDA_DEFAULT_ENV=${CONDA_ENV} \
MKL_THREADING_LAYER=GNU
COPY ./requirements/extra.txt requirements-extra.txt
COPY ./requirements/test.txt requirements-test.txt
COPY ./requirements/adjust_versions.py requirements_adjust_versions.py
RUN \
pip list | grep torch && \
python -c "import torch; print(torch.__version__)" && \
python requirements_adjust_versions.py requirements-extra.txt && \
python -c "fname = 'requirements-extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)" && \
python -c "fname = 'requirements-extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" && \
# Install remaining requirements
pip install -r requirements-extra.txt --no-cache-dir && \
pip install -r requirements-test.txt --no-cache-dir && \
rm requirements*
RUN \
# Show what we have
pip --version && \
conda info && \
pip list && \
python -c "import sys; assert sys.version[:3] == '$PYTHON_VERSION', sys.version" && \
python -c "import torch; assert torch.__version__[:3] == '$PYTORCH_VERSION', torch.__version__"

View File

@ -0,0 +1,35 @@
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG PYTHON_VERSION=3.8
ARG PYTORCH_VERSION=1.7
FROM pytorchlightning/pytorch_lightning:base-ipu-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}
MAINTAINER PyTorchLightning <https://github.com/PyTorchLightning>
RUN echo "ALL ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
WORKDIR /azp
COPY ./dockers/ipu-ci-runner/start.sh /usr/local/bin/
RUN curl -o /usr/local/bin/installdependencies.sh \
"https://raw.githubusercontent.com/microsoft/azure-pipelines-agent/d2acd5f77c6b3914cdb6ed0e5fbea672929c7da9/src/Misc/layoutbin/installdependencies.sh" && \
chmod +x /usr/local/bin/installdependencies.sh && \
chmod +x /usr/local/bin/start.sh && \
/usr/local/bin/installdependencies.sh
ENTRYPOINT ["/usr/local/bin/start.sh"]
CMD ["bash"]

View File

@ -0,0 +1,96 @@
#!/bin/bash
# This is a slightly modified version of the script from
# https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/docker
set -e
if [ -z "$AZP_URL" ]; then
echo 1>&2 "error: missing AZP_URL environment variable"
exit 1
fi
if [ -z "$AZP_TOKEN_FILE" ]; then
if [ -z "$AZP_TOKEN" ]; then
echo 1>&2 "error: missing AZP_TOKEN environment variable"
exit 1
fi
AZP_TOKEN_FILE=/azp/.token
echo -n $AZP_TOKEN > "$AZP_TOKEN_FILE"
fi
unset AZP_TOKEN
if [ -n "$AZP_WORK" ]; then
mkdir -p "$AZP_WORK"
fi
rm -rf /azp/agent
mkdir /azp/agent
cd /azp/agent
export AGENT_ALLOW_RUNASROOT="1"
cleanup() {
if [ -e config.sh ]; then
print_header "Cleanup. Removing Azure Pipelines agent..."
./config.sh remove --unattended \
--auth PAT \
--token $(cat "$AZP_TOKEN_FILE")
fi
}
print_header() {
lightcyan='\033[1;36m'
nocolor='\033[0m'
echo -e "${lightcyan}$1${nocolor}"
}
# Let the agent ignore the token env variables
export VSO_AGENT_IGNORE=AZP_TOKEN,AZP_TOKEN_FILE
print_header "1. Determining matching Azure Pipelines agent..."
AZP_AGENT_RESPONSE=$(curl -LsS \
-u user:$(cat "$AZP_TOKEN_FILE") \
-H 'Accept:application/json;api-version=3.0-preview' \
"$AZP_URL/_apis/distributedtask/packages/agent?platform=linux-x64")
if echo "$AZP_AGENT_RESPONSE" | jq . >/dev/null 2>&1; then
AZP_AGENTPACKAGE_URL=$(echo "$AZP_AGENT_RESPONSE" \
| jq -r '.value | map([.version.major,.version.minor,.version.patch,.downloadUrl]) | sort | .[length-1] | .[3]')
fi
if [ -z "$AZP_AGENTPACKAGE_URL" -o "$AZP_AGENTPACKAGE_URL" == "null" ]; then
echo 1>&2 "error: could not determine a matching Azure Pipelines agent - check that account '$AZP_URL' is correct and the token is valid for that account"
exit 1
fi
print_header "2. Downloading and installing Azure Pipelines agent..."
curl -LsS $AZP_AGENTPACKAGE_URL | tar -xz & wait $!
source ./env.sh
print_header "3. Configuring Azure Pipelines agent..."
./config.sh --unattended \
--agent "${AZP_AGENT_NAME:-$(hostname)}" \
--url "$AZP_URL" \
--auth PAT \
--token $(cat "$AZP_TOKEN_FILE") \
--pool "${AZP_POOL:-Default}" \
--work "${AZP_WORK:-_work}" \
--replace \
--acceptTeeEula & wait $!
print_header "4. Running Azure Pipelines agent..."
trap 'cleanup; exit 130' INT
trap 'cleanup; exit 143' TERM
# To be aware of TERM and INT signals call run.sh
# Running it with the --once flag at the end will shut down the agent after the build is executed
./run.sh --once & wait $!