Docker: building XLA base image (#2494)
* refactor * add TPU base * wip * builds * typo * extras * simple * unzip * rename
This commit is contained in:
parent
a91b06ed1e
commit
977df6ed31
|
@ -6,7 +6,7 @@ name: torch-GPU
|
|||
|
||||
steps:
|
||||
- name: testing
|
||||
image: pytorchlightning/pytorch_lightning:devel-pt_1_4
|
||||
image: pytorchlightning/pytorch_lightning:devel-pt1.4
|
||||
|
||||
environment:
|
||||
SLURM_LOCALID: 0
|
||||
|
|
|
@ -10,7 +10,7 @@ on:
|
|||
- created
|
||||
|
||||
jobs:
|
||||
build:
|
||||
build-Conda:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
|
@ -35,7 +35,7 @@ jobs:
|
|||
repository: pytorchlightning/pytorch_lightning
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
dockerfile: docker/Dockerfile
|
||||
dockerfile: dockers/conda/Dockerfile
|
||||
build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }}
|
||||
tags: "nightly-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}"
|
||||
timeout-minutes: 40
|
||||
|
@ -53,7 +53,32 @@ jobs:
|
|||
repository: pytorchlightning/pytorch_lightning
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
dockerfile: docker/Dockerfile
|
||||
dockerfile: dockers/conda/Dockerfile
|
||||
build_args: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ env.RELEASE_VERSION }}
|
||||
tags: "${{ env.RELEASE_VERSION }}-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }},latest-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}"
|
||||
timeout-minutes: 40
|
||||
|
||||
build-XLA:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
python_version: [3.6, 3.7]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v2
|
||||
- uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: 3.7
|
||||
|
||||
- name: Publish Master to Docker
|
||||
# publish master
|
||||
uses: docker/build-push-action@v1.1.0
|
||||
if: github.event_name == 'push'
|
||||
with:
|
||||
repository: pytorchlightning/pytorch_lightning
|
||||
username: ${{ secrets.DOCKER_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_PASSWORD }}
|
||||
dockerfile: dockers/tpu-extras/Dockerfile
|
||||
build_args: PYTHON_VERSION=${{ matrix.python_version }}
|
||||
tags: "XLA-extras-py${{ matrix.python_version }}"
|
||||
timeout-minutes: 25
|
||||
|
|
|
@ -60,7 +60,7 @@ jobs:
|
|||
shell: bash
|
||||
- name: Build and Push Docker Image
|
||||
run: |
|
||||
cd docker/tpu
|
||||
cd dockers/tpu-tests
|
||||
docker build --tag "$IMAGE:$GITHUB_RUN_ID" -f Dockerfile --build-arg "GITHUB_REF=$GITHUB_REF" --build-arg "TEST_IMAGE=1" .
|
||||
docker push "$IMAGE:$GITHUB_RUN_ID"
|
||||
shell: bash
|
||||
|
@ -77,7 +77,7 @@ jobs:
|
|||
|
||||
- name: Deploy the job on the kubernetes cluster
|
||||
run: |-
|
||||
job_name=$(jsonnet -J ml-testing-accelerators/ docker/tpu/tpu_test_cases.jsonnet --ext-str image=$IMAGE --ext-str image-tag=$GITHUB_RUN_ID | kubectl create -f -) && \
|
||||
job_name=$(jsonnet -J ml-testing-accelerators/ dockers/tpu-tests/tpu_test_cases.jsonnet --ext-str image=$IMAGE --ext-str image-tag=$GITHUB_RUN_ID | kubectl create -f -) && \
|
||||
job_name=${job_name#job.batch/} && \
|
||||
job_name=${job_name% created} && \
|
||||
echo "Waiting on kubernetes job: $job_name in cluster: $GKE_CLUSTER" && \
|
||||
|
|
|
@ -4,7 +4,7 @@ You can build it on your own, note it takes lots of time, be prepared.
|
|||
|
||||
```bash
|
||||
git clone <git-repository>
|
||||
docker image build -t pytorch-lightning:latest -f docker/Dockerfile .
|
||||
docker image build -t pytorch-lightning:latest -f dockers/conda/Dockerfile .
|
||||
```
|
||||
|
||||
or with specific arguments
|
||||
|
@ -13,7 +13,7 @@ or with specific arguments
|
|||
git clone <git-repository>
|
||||
docker image build \
|
||||
-t pytorch-lightning:py38 \
|
||||
-f docker/Dockerfile \
|
||||
-f dockers/conda/Dockerfile \
|
||||
--build-arg PYTHON_VERSION=3.8 \
|
||||
--build-arg PYTORCH_VERSION=1.4 \
|
||||
.
|
|
@ -0,0 +1,65 @@
|
|||
FROM google/cloud-sdk:slim
|
||||
|
||||
# This Dockerfile installs pytorch/xla 3.7 wheels. There are also 3.6 wheels available; see below.
|
||||
ARG PYTHON_VERSION=3.7
|
||||
|
||||
SHELL ["/bin/bash", "-c"]
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python${PYTHON_VERSION} \
|
||||
python${PYTHON_VERSION}-dev \
|
||||
$( [ ${PYTHON_VERSION%%.*} -ge 3 ] && echo "python${PYTHON_VERSION%%.*}-distutils" ) \
|
||||
build-essential \
|
||||
cmake \
|
||||
wget \
|
||||
unzip \
|
||||
ca-certificates \
|
||||
libomp5 \
|
||||
&& \
|
||||
|
||||
# Install python dependencies
|
||||
wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \
|
||||
python${PYTHON_VERSION} get-pip.py && \
|
||||
rm get-pip.py && \
|
||||
|
||||
# Set the default python and install PIP packages
|
||||
update-alternatives --install /usr/bin/python${PYTHON_VERSION%%.*} python${PYTHON_VERSION%%.*} /usr/bin/python${PYTHON_VERSION} 1 && \
|
||||
update-alternatives --install /usr/bin/python python /usr/bin/python${PYTHON_VERSION} 1 && \
|
||||
|
||||
# Install Pytorch XLA
|
||||
py_version=${PYTHON_VERSION/./} && \
|
||||
# Python 3.7 wheels are available. Replace cp36-cp36m with cp37-cp37m
|
||||
gsutil cp "gs://tpu-pytorch/wheels/torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \
|
||||
gsutil cp "gs://tpu-pytorch/wheels/torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \
|
||||
gsutil cp "gs://tpu-pytorch/wheels/torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" . && \
|
||||
pip install "torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
|
||||
pip install "torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
|
||||
pip install "torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
|
||||
rm "torch-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
|
||||
rm "torch_xla-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
|
||||
rm "torchvision-nightly-cp${py_version}-cp${py_version}m-linux_x86_64.whl" && \
|
||||
pip install mkl && \
|
||||
|
||||
# Cleaning
|
||||
apt-get autoremove -y && \
|
||||
apt-get clean
|
||||
|
||||
# Install pytorch-lightning dependencies.
|
||||
RUN \
|
||||
# Get package
|
||||
wget https://github.com/PyTorchLightning/pytorch-lightning/archive/master.zip --progress=bar:force:noscroll && \
|
||||
unzip master.zip && \
|
||||
rm master.zip && \
|
||||
|
||||
# Install PL dependencies
|
||||
cd pytorch-lightning-master && \
|
||||
pip install --requirement ./requirements/base.txt && \
|
||||
# Drop Horovod
|
||||
python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if not line.startswith('horovod')] ; open(fname, 'w').writelines(lines)" && \
|
||||
pip install --requirement ./requirements/extra.txt && \
|
||||
cd .. && \
|
||||
rm -rf pytorch-lightning-master
|
||||
|
||||
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/"
|
||||
|
||||
RUN python -c "import torch; print(torch.__version__)"
|
|
@ -54,11 +54,11 @@ coverage xml
|
|||
You can build it on your own, note it takes lots of time, be prepared.
|
||||
```bash
|
||||
git clone <git-repository>
|
||||
docker image build -t pytorch_lightning:devel-pt_1_4 -f tests/Dockerfile --build-arg TORCH_VERSION=1.4 .
|
||||
docker image build -t pytorch_lightning:devel-torch1.4 -f tests/Dockerfile --build-arg TORCH_VERSION=1.4 .
|
||||
```
|
||||
To build other versions, select different Dockerfile.
|
||||
```bash
|
||||
docker image list
|
||||
docker run --rm -it pytorch_lightning:devel-pt_1_4 bash
|
||||
docker image rm pytorch_lightning:devel-pt_1_4
|
||||
docker run --rm -it pytorch_lightning:devel-torch1.4 bash
|
||||
docker image rm pytorch_lightning:devel-torch1.4
|
||||
```
|
||||
|
|
Loading…
Reference in New Issue