ci/gpu: fix install future & use local cache (#16929)
This commit is contained in:
parent
2f17d1b999
commit
70b257c17f
|
@ -47,11 +47,12 @@ jobs:
|
|||
variables:
|
||||
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
|
||||
FREEZE_REQUIREMENTS: "1"
|
||||
PIP_CACHE_DIR: "/var/tmp/pip"
|
||||
container:
|
||||
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.7.1"
|
||||
# default shm size is 64m. Increase it to avoid:
|
||||
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
|
||||
options: "--gpus=all --shm-size=2gb"
|
||||
options: "--gpus=all --shm-size=2gb -v /var/tmp:/var/tmp"
|
||||
strategy:
|
||||
matrix:
|
||||
'pkg: Fabric':
|
||||
|
|
|
@ -56,7 +56,7 @@ jobs:
|
|||
scope: ""
|
||||
PACKAGE_NAME: "pytorch"
|
||||
'PyTorch | future':
|
||||
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.13-cuda11.7.1"
|
||||
image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch1.13-cuda11.7.1"
|
||||
scope: "future"
|
||||
PACKAGE_NAME: "pytorch"
|
||||
'Lightning | latest':
|
||||
|
@ -67,11 +67,12 @@ jobs:
|
|||
variables:
|
||||
DEVICES: $( python -c 'print("$(Agent.Name)".split("_")[-1])' )
|
||||
FREEZE_REQUIREMENTS: "1"
|
||||
PIP_CACHE_DIR: "/var/tmp/pip"
|
||||
container:
|
||||
image: $(image)
|
||||
# default shm size is 64m. Increase it to avoid:
|
||||
# 'Error while creating shared memory: unhandled system error, NCCL version 2.7.8'
|
||||
options: "--gpus=all --shm-size=2gb"
|
||||
options: "--gpus=all --shm-size=2gb -v /var/tmp:/var/tmp"
|
||||
workspace:
|
||||
clean: all
|
||||
steps:
|
||||
|
@ -114,8 +115,11 @@ jobs:
|
|||
displayName: 'Install package & extras'
|
||||
|
||||
- bash: |
|
||||
pip install -U -q pip
|
||||
pip uninstall -y torch torchvision
|
||||
pip install torch torchvision -U --pre --no-cache -f https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM}/torch_test.html
|
||||
pip install "torch==2.0.0" "torchvision==0.15.0" -U --pre \
|
||||
-f "https://download.pytorch.org/whl/test/cu${CUDA_VERSION_MM}/torch_test.html" \
|
||||
-f "https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION_MM}/torch_nightly.html"
|
||||
python -c "from torch import __version__ as ver; assert ver.startswith('2.0.0'), ver"
|
||||
condition: eq(variables['scope'], 'future')
|
||||
displayName: 'bump to future'
|
||||
|
|
|
@ -8,7 +8,7 @@ on:
|
|||
types: [opened, reopened, ready_for_review, synchronize] # added `ready_for_review` since draft is skipped
|
||||
paths:
|
||||
- ".actions/**"
|
||||
- ".github/workflows/ci-pytorch-dockers.yml"
|
||||
- ".github/workflows/ci-dockers-pytorch.yml"
|
||||
- "dockers/**"
|
||||
- "requirements/*.txt"
|
||||
- "requirements/pytorch/**"
|
||||
|
@ -105,6 +105,7 @@ jobs:
|
|||
- {python_version: "3.9", pytorch_version: "1.11", cuda_version: "11.3.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.12", cuda_version: "11.6.1"}
|
||||
- {python_version: "3.9", pytorch_version: "1.13", cuda_version: "11.7.1"}
|
||||
- {python_version: "3.10", pytorch_version: "1.13", cuda_version: "11.7.1"}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: docker/setup-buildx-action@v2
|
||||
|
|
Loading…
Reference in New Issue