Docker: building XLA base image (#2494)

* refactor

* add TPU base

* wip

* builds

* typo

* extras

* simple

* unzip

* rename
This commit is contained in:
Jirka Borovec 2020-07-06 20:21:36 +02:00 committed by GitHub
parent a91b06ed1e
commit 977df6ed31
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 101 additions and 11 deletions

View File

@ -6,7 +6,7 @@ name: torch-GPU
steps:
- name: testing
image: pytorchlightning/pytorch_lightning:devel-pt_1_4
image: pytorchlightning/pytorch_lightning:devel-pt1.4
environment:
SLURM_LOCALID: 0

View File

@ -10,7 +10,7 @@ on:
- created
jobs:
build:
build-Conda:
runs-on: ubuntu-20.04
strategy:
matrix:
@ -35,7 +35,7 @@ jobs:
repository: pytorchlightning/pytorch_lightning
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
dockerfile: docker/Dockerfile
dockerfile: dockers/conda/Dockerfile
build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }}
tags: "nightly-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}"
timeout-minutes: 40
@ -53,7 +53,32 @@ jobs:
repository: pytorchlightning/pytorch_lightning
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
dockerfile: docker/Dockerfile
dockerfile: dockers/conda/Dockerfile
build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ env.RELEASE_VERSION }}
tags: "${{ env.RELEASE_VERSION }}-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }},latest-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}"
timeout-minutes: 40
build-XLA:
runs-on: ubuntu-20.04
strategy:
matrix:
python_version: [3.6, 3.7]
steps:
- name: Checkout
uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Publish Master to Docker
# publish master
uses: docker/build-push-action@v1.1.0
if: github.event_name == 'push'
with:
repository: pytorchlightning/pytorch_lightning
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
dockerfile: dockers/tpu-extras/Dockerfile
build_args: PYTHON_VERSION=${{ matrix.python_version }}
tags: "XLA-extras-py${{ matrix.python_version }}"
timeout-minutes: 25

View File

@ -60,7 +60,7 @@ jobs:
shell: bash
- name: Build and Push Docker Image
run: |
cd docker/tpu
cd dockers/tpu-tests
docker build --tag "$IMAGE:$GITHUB_RUN_ID" -f Dockerfile --build-arg "GITHUB_REF=$GITHUB_REF" --build-arg "TEST_IMAGE=1" .
docker push "$IMAGE:$GITHUB_RUN_ID"
shell: bash
@ -77,7 +77,7 @@ jobs:
- name: Deploy the job on the kubernetes cluster
run: |-
job_name=$(jsonnet -J ml-testing-accelerators/ docker/tpu/tpu_test_cases.jsonnet --ext-str image=$IMAGE --ext-str image-tag=$GITHUB_RUN_ID | kubectl create -f -) && \
job_name=$(jsonnet -J ml-testing-accelerators/ dockers/tpu-tests/tpu_test_cases.jsonnet --ext-str image=$IMAGE --ext-str image-tag=$GITHUB_RUN_ID | kubectl create -f -) && \
job_name=${job_name#job.batch/} && \
job_name=${job_name% created} && \
echo "Waiting on kubernetes job: $job_name in cluster: $GKE_CLUSTER" && \

View File

@ -4,7 +4,7 @@ You can build it on your own, note it takes lots of time, be prepared.
```bash
git clone <git-repository>
docker image build -t pytorch-lightning:latest -f docker/Dockerfile .
docker image build -t pytorch-lightning:latest -f dockers/conda/Dockerfile .
```
or with specific arguments
@ -13,7 +13,7 @@ or with specific arguments
git clone <git-repository>
docker image build \
-t pytorch-lightning:py38 \
-f docker/Dockerfile \
-f dockers/conda/Dockerfile \
--build-arg PYTHON_VERSION=3.8 \
--build-arg PYTORCH_VERSION=1.4 \
.

View File

@ -0,0 +1,65 @@
FROM google/cloud-sdk:slim
# This Dockerfile installs pytorch/xla 3.7 wheels. There are also 3.6 wheels available; see below.
ARG PYTHON_VERSION=3.7
SHELL ["/bin/bash", "-c"]
RUN apt-get update && apt-get install -y --no-install-recommends \
python${PYTHON_VERSION} \
python${PYTHON_VERSION}-dev \
$( [ ${PYTHON_VERSION%%.*} -ge 3 ] && echo "python${PYTHON_VERSION%%.*}-distutils" ) \
build-essential \
cmake \
wget \
unzip \
ca-certificates \
libomp5 \
&& \
# Install python dependencies
wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \
python${PYTHON_VERSION} get-pip.py && \
rm get-pip.py && \
# Set the default python and install PIP packages
update-alternatives --install /usr/bin/python${PYTHON_VERSION%%.*} python${PYTHON_VERSION%%.*} /usr/bin/python${PYTHON_VERSION} 1 && \
update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 && \
# Install Pytorch XLA
py_version=${PYTHON_VERSION/./} && \
# Python 3.7 wheels are available. Replace cp36-cp36m with cp37-cp37m
gsutil cp "gs://tpu-pytorch/wheels/torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \
gsutil cp "gs://tpu-pytorch/wheels/torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \
gsutil cp "gs://tpu-pytorch/wheels/torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \
pip install "torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
pip install "torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
pip install "torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
rm "torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
rm "torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
rm "torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
pip install mkl && \
# Cleaning
apt-get autoremove -y && \
apt-get clean
# Install pytorch-lightning dependencies.
RUN \
# Get package
wget https://github.com/PyTorchLightning/pytorch-lightning/archive/master.zip --progress=bar:force:noscroll && \
unzip master.zip && \
rm master.zip && \
# Install PL dependencies
cd pytorch-lightning-master && \
pip install --requirement ./requirements/base.txt && \
# Drop Horovod
python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \
pip install --requirement ./requirements/extra.txt && \
cd .. && \
rm -rf pytorch-lightning-master
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/"
RUN python -c "import torch; print(torch.__version__)"

View File

@ -54,11 +54,11 @@ coverage xml
You can build it on your own, note it takes lots of time, be prepared.
```bash
git clone <git-repository>
docker image build -t pytorch_lightning:devel-pt_1_4 -f tests/Dockerfile --build-arg TORCH_VERSION=1.4 .
docker image build -t pytorch_lightning:devel-torch1.4 -f tests/Dockerfile --build-arg TORCH_VERSION=1.4 .
```
To build other versions, select different Dockerfile.
```bash
docker image list
docker run --rm -it pytorch_lightning:devel-pt_1_4 bash
docker image rm pytorch_lightning:devel-pt_1_4
docker run --rm -it pytorch_lightning:devel-torch1.4 bash
docker image rm pytorch_lightning:devel-torch1.4
```