ci/gpu: fix install future & use local cache (#16929)

This commit is contained in:
Jirka Borovec 2023-03-02 23:17:29 +01:00 committed by GitHub
parent 2f17d1b999
commit 70b257c17f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 5 deletions

View File

@ -47,11 +47,12 @@ jobs:
variables:
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
FREEZE_REQUIREMENTS: "1"
PIP_CACHE_DIR: "/var/tmp/pip"
container:
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.7.1"
# default shm size is 64m. Increase it to avoid:
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
options: "--gpus=all --shm-size=2gb"
options: "--gpus=all --shm-size=2gb -v /var/tmp:/var/tmp"
strategy:
matrix:
'pkg: Fabric':

View File

@ -56,7 +56,7 @@ jobs:
scope: ""
PACKAGE_NAME: "pytorch"
'PyTorch | future':
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.7.1"
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch1.13-cuda11.7.1"
scope: "future"
PACKAGE_NAME: "pytorch"
'Lightning | latest':
@ -67,11 +67,12 @@ jobs:
variables:
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
FREEZE_REQUIREMENTS: "1"
PIP_CACHE_DIR: "/var/tmp/pip"
container:
image: $(image)
# default shm size is 64m. Increase it to avoid:
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
options: "--gpus=all --shm-size=2gb"
options: "--gpus=all --shm-size=2gb -v /var/tmp:/var/tmp"
workspace:
clean: all
steps:
@ -114,8 +115,11 @@ jobs:
displayName: 'Install package & extras'
- bash: |
pip install -U -q pip
pip uninstall -y torch torchvision
pip install torch torchvision -U --pre --no-cache -f https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM}/torch_test.html
pip install "torch==2.0.0" "torchvision==0.15.0" -U --pre \
-f "https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM}/torch_test.html" \
-f "https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_MM}/torch_nightly.html"
python -c "from torch import __version__ as ver; assert ver.startswith('2.0.0'), ver"
condition: eq(variables['scope'], 'future')
displayName: 'bump to future'

View File

@ -8,7 +8,7 @@ on:
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
paths:
- ".actions/**"
- ".github/workflows/ci-pytorch-dockers.yml"
- ".github/workflows/ci-dockers-pytorch.yml"
- "dockers/**"
- "requirements/*.txt"
- "requirements/pytorch/**"
@ -105,6 +105,7 @@ jobs:
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
- {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.1"}
- {python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.7.1"}
- {python_version: "3.10", pytorch_version: "1.13", cuda_version: "11.7.1"}
steps:
- uses: actions/checkout@v3
- uses: docker/setup-buildx-action@v2