Add base IPU dockerfiles (#7252)
This commit is contained in:
parent
1a27c12b26
commit
2b7e65b747
|
@ -149,3 +149,36 @@ jobs:
|
|||
file: dockers/nvidia/Dockerfile
|
||||
push: false
|
||||
timeout-minutes: 50
|
||||
|
||||
build-ipu:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- python_version: 3.8
|
||||
pytorch_version: 1.7
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Build IPU Docker
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
build-args: |
|
||||
PYTHON_VERSION=${{ matrix.python_version }}
|
||||
PYTORCH_VERSION=${{ matrix.pytorch_version }}
|
||||
file: dockers/base-ipu/Dockerfile
|
||||
push: false
|
||||
tags: pytorchlightning/pytorch_lightning:base-ipu-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
|
||||
timeout-minutes: 50
|
||||
|
||||
- name: Build IPU CI runner Docker
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
build-args: |
|
||||
PYTHON_VERSION=${{ matrix.python_version }}
|
||||
PYTORCH_VERSION=${{ matrix.pytorch_version }}
|
||||
file: dockers/ipu-ci-runner/Dockerfile
|
||||
push: false
|
||||
timeout-minutes: 50
|
||||
|
|
|
@ -152,3 +152,49 @@ jobs:
|
|||
push: true
|
||||
tags: nvcr.io/pytorchlightning/pytorch_lightning:nvidia
|
||||
timeout-minutes: 55
|
||||
|
||||
docker-ipu:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- python_version: 3.8
|
||||
pytorch_version: 1.7
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
|
||||
# https://github.com/docker/setup-buildx-action
|
||||
# Set up Docker Buildx - to use cache-from and cache-to argument of buildx command
|
||||
- uses: docker/setup-buildx-action@v1
|
||||
- name: Login to DockerHub
|
||||
uses: docker/login-action@v1
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
|
||||
- name: Publish IPU base to Docker Hub
|
||||
# publish master/release
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
build-args: |
|
||||
PYTHON_VERSION=${{ matrix.python_version }}
|
||||
PYTORCH_VERSION=${{ matrix.pytorch_version }}
|
||||
file: dockers/base-ipu/Dockerfile
|
||||
push: true
|
||||
tags: pytorchlightning/pytorch_lightning:base-ipu-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}
|
||||
timeout-minutes: 55
|
||||
|
||||
- name: Publish IPU CI runner to Docker Hub
|
||||
# publish master/release
|
||||
uses: docker/build-push-action@v2
|
||||
with:
|
||||
build-args: |
|
||||
PYTHON_VERSION=${{ matrix.python_version }}
|
||||
PYTORCH_VERSION=${{ matrix.pytorch_version }}
|
||||
file: dockers/ipu-ci-runner/Dockerfile
|
||||
push: true
|
||||
tags: pytorchlightning/pytorch_lightning:ipu-ci-runner-py${{ matrix.python_version }}
|
||||
timeout-minutes: 55
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
# Copyright The PyTorch Lightning team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
FROM ubuntu:20.04
|
||||
|
||||
MAINTAINER PyTorchLightning <https://github.com/PyTorchLightning>
|
||||
|
||||
ARG PYTHON_VERSION=3.8
|
||||
ARG PYTORCH_VERSION=1.7
|
||||
ARG CONDA_VERSION=4.9.2
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
# for skipping configurations
|
||||
ENV \
|
||||
DEBIAN_FRONTEND=noninteractive \
|
||||
CONDA_ENV=lightning
|
||||
|
||||
RUN apt-get update -qq && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
build-essential \
|
||||
ca-certificates \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
jq \
|
||||
libomp5 \
|
||||
libopenmpi-dev \
|
||||
unzip \
|
||||
wget \
|
||||
&& \
|
||||
# Install conda and python.
|
||||
# NOTE new Conda does not forward the exit status... https://github.com/conda/conda/issues/8385
|
||||
curl -o ~/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-py38_${CONDA_VERSION}-Linux-x86_64.sh && \
|
||||
chmod +x ~/miniconda.sh && \
|
||||
~/miniconda.sh -b && \
|
||||
rm ~/miniconda.sh && \
|
||||
# Cleaning
|
||||
apt-get autoremove -y && \
|
||||
apt-get clean && \
|
||||
rm -rf /root/.cache && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV \
|
||||
PATH="/root/miniconda3/bin:$PATH" \
|
||||
LD_LIBRARY_PATH="/root/miniconda3/lib:$LD_LIBRARY_PATH"
|
||||
|
||||
COPY environment.yml environment.yml
|
||||
|
||||
RUN conda create -y --name $CONDA_ENV python=${PYTHON_VERSION} pytorch=${PYTORCH_VERSION} cudatoolkit=${CUDA_VERSION} -c pytorch -c pytorch-test -c pytorch-nightly && \
|
||||
conda init bash && \
|
||||
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'python>=[\d\.]+', 'python=${PYTHON_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
|
||||
python -c "import re ; fname = 'environment.yml' ; req = re.sub(r'- pytorch[>=]+[\d\.]+', '# - pytorch=${PYTORCH_VERSION}', open(fname).read()) ; open(fname, 'w').write(req)" && \
|
||||
python -c "fname = 'environment.yml' ; req = open(fname).readlines() ; open(fname, 'w').writelines([ln for ln in req if not any(n in ln for n in ['pytorch>', 'horovod'])])" && \
|
||||
cat environment.yml && \
|
||||
conda env update --file environment.yml && \
|
||||
conda clean -ya && \
|
||||
rm environment.yml
|
||||
|
||||
ENV \
|
||||
PATH=/root/miniconda3/envs/${CONDA_ENV}/bin:$PATH \
|
||||
LD_LIBRARY_PATH="/root/miniconda3/envs/${CONDA_ENV}/lib:$LD_LIBRARY_PATH" \
|
||||
# if you want this environment to be the default one, uncomment the following line:
|
||||
CONDA_DEFAULT_ENV=${CONDA_ENV} \
|
||||
MKL_THREADING_LAYER=GNU
|
||||
|
||||
COPY ./requirements/extra.txt requirements-extra.txt
|
||||
COPY ./requirements/test.txt requirements-test.txt
|
||||
COPY ./requirements/adjust_versions.py requirements_adjust_versions.py
|
||||
|
||||
RUN \
|
||||
pip list | grep torch && \
|
||||
python -c "import torch; print(torch.__version__)" && \
|
||||
python requirements_adjust_versions.py requirements-extra.txt && \
|
||||
python -c "fname = 'requirements-extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)" && \
|
||||
python -c "fname = 'requirements-extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" && \
|
||||
# Install remaining requirements
|
||||
pip install -r requirements-extra.txt --no-cache-dir && \
|
||||
pip install -r requirements-test.txt --no-cache-dir && \
|
||||
rm requirements*
|
||||
|
||||
RUN \
|
||||
# Show what we have
|
||||
pip --version && \
|
||||
conda info && \
|
||||
pip list && \
|
||||
python -c "import sys; assert sys.version[:3] == '$PYTHON_VERSION', sys.version" && \
|
||||
python -c "import torch; assert torch.__version__[:3] == '$PYTORCH_VERSION', torch.__version__"
|
|
@ -0,0 +1,35 @@
|
|||
# Copyright The PyTorch Lightning team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
ARG PYTHON_VERSION=3.8
|
||||
ARG PYTORCH_VERSION=1.7
|
||||
|
||||
FROM pytorchlightning/pytorch_lightning:base-ipu-py${PYTHON_VERSION}-torch${PYTORCH_VERSION}
|
||||
|
||||
MAINTAINER PyTorchLightning <https://github.com/PyTorchLightning>
|
||||
|
||||
RUN echo "ALL ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
|
||||
|
||||
WORKDIR /azp
|
||||
|
||||
COPY ./dockers/ipu-ci-runner/start.sh /usr/local/bin/
|
||||
|
||||
RUN curl -o /usr/local/bin/installdependencies.sh \
|
||||
"https://raw.githubusercontent.com/microsoft/azure-pipelines-agent/d2acd5f77c6b3914cdb6ed0e5fbea672929c7da9/src/Misc/layoutbin/installdependencies.sh" && \
|
||||
chmod +x /usr/local/bin/installdependencies.sh && \
|
||||
chmod +x /usr/local/bin/start.sh && \
|
||||
/usr/local/bin/installdependencies.sh
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/start.sh"]
|
||||
CMD ["bash"]
|
|
@ -0,0 +1,96 @@
|
|||
#!/bin/bash
|
||||
|
||||
# This is a slightly modified version of the script from
|
||||
# https://docs.microsoft.com/en-us/azure/devops/pipelines/agents/docker
|
||||
|
||||
set -e
|
||||
|
||||
if [ -z "$AZP_URL" ]; then
|
||||
echo 1>&2 "error: missing AZP_URL environment variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$AZP_TOKEN_FILE" ]; then
|
||||
if [ -z "$AZP_TOKEN" ]; then
|
||||
echo 1>&2 "error: missing AZP_TOKEN environment variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
AZP_TOKEN_FILE=/azp/.token
|
||||
echo -n $AZP_TOKEN > "$AZP_TOKEN_FILE"
|
||||
fi
|
||||
|
||||
unset AZP_TOKEN
|
||||
|
||||
if [ -n "$AZP_WORK" ]; then
|
||||
mkdir -p "$AZP_WORK"
|
||||
fi
|
||||
|
||||
rm -rf /azp/agent
|
||||
mkdir /azp/agent
|
||||
cd /azp/agent
|
||||
|
||||
export AGENT_ALLOW_RUNASROOT="1"
|
||||
|
||||
cleanup() {
|
||||
if [ -e config.sh ]; then
|
||||
print_header "Cleanup. Removing Azure Pipelines agent..."
|
||||
|
||||
./config.sh remove --unattended \
|
||||
--auth PAT \
|
||||
--token $(cat "$AZP_TOKEN_FILE")
|
||||
fi
|
||||
}
|
||||
|
||||
print_header() {
|
||||
lightcyan='\033[1;36m'
|
||||
nocolor='\033[0m'
|
||||
echo -e "${lightcyan}$1${nocolor}"
|
||||
}
|
||||
|
||||
# Let the agent ignore the token env variables
|
||||
export VSO_AGENT_IGNORE=AZP_TOKEN,AZP_TOKEN_FILE
|
||||
|
||||
print_header "1. Determining matching Azure Pipelines agent..."
|
||||
|
||||
AZP_AGENT_RESPONSE=$(curl -LsS \
|
||||
-u user:$(cat "$AZP_TOKEN_FILE") \
|
||||
-H 'Accept:application/json;api-version=3.0-preview' \
|
||||
"$AZP_URL/_apis/distributedtask/packages/agent?platform=linux-x64")
|
||||
|
||||
if echo "$AZP_AGENT_RESPONSE" | jq . >/dev/null 2>&1; then
|
||||
AZP_AGENTPACKAGE_URL=$(echo "$AZP_AGENT_RESPONSE" \
|
||||
| jq -r '.value | map([.version.major,.version.minor,.version.patch,.downloadUrl]) | sort | .[length-1] | .[3]')
|
||||
fi
|
||||
|
||||
if [ -z "$AZP_AGENTPACKAGE_URL" -o "$AZP_AGENTPACKAGE_URL" == "null" ]; then
|
||||
echo 1>&2 "error: could not determine a matching Azure Pipelines agent - check that account '$AZP_URL' is correct and the token is valid for that account"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
print_header "2. Downloading and installing Azure Pipelines agent..."
|
||||
|
||||
curl -LsS $AZP_AGENTPACKAGE_URL | tar -xz & wait $!
|
||||
|
||||
source ./env.sh
|
||||
|
||||
print_header "3. Configuring Azure Pipelines agent..."
|
||||
|
||||
./config.sh --unattended \
|
||||
--agent "${AZP_AGENT_NAME:-$(hostname)}" \
|
||||
--url "$AZP_URL" \
|
||||
--auth PAT \
|
||||
--token $(cat "$AZP_TOKEN_FILE") \
|
||||
--pool "${AZP_POOL:-Default}" \
|
||||
--work "${AZP_WORK:-_work}" \
|
||||
--replace \
|
||||
--acceptTeeEula & wait $!
|
||||
|
||||
print_header "4. Running Azure Pipelines agent..."
|
||||
|
||||
trap 'cleanup; exit 130' INT
|
||||
trap 'cleanup; exit 143' TERM
|
||||
|
||||
# To be aware of TERM and INT signals call run.sh
|
||||
# Running it with the --once flag at the end will shut down the agent after the build is executed
|
||||
./run.sh --once & wait $!
|
Loading…
Reference in New Issue