ci: update runner for IPU (#17183)
This commit is contained in:
parent
3458258905
commit
5b08d5ee70
|
@ -43,23 +43,31 @@ variables:
|
|||
jobs:
|
||||
- job: testing
|
||||
# how long to run the job before automatically cancelling
|
||||
timeoutInMinutes: "15"
|
||||
timeoutInMinutes: "20"
|
||||
pool: graphcore-ipus
|
||||
workspace:
|
||||
clean: all
|
||||
|
||||
steps:
|
||||
- script: |
|
||||
ls -la /mnt/public/packages
|
||||
ls -la /opt/poplar
|
||||
tar -xvzf /opt/poplar/${{ variables.poplar_sdk }}.tar.gz
|
||||
displayName: "Extract Poplar SDK"
|
||||
|
||||
- script: |
|
||||
set -eux
|
||||
# ls -la /mnt/public/packages
|
||||
ls -la /opt/poplar
|
||||
tar -xzf /opt/poplar/${{ variables.poplar_sdk }}.tar.gz
|
||||
pip debug --verbose
|
||||
pip install ${{ variables.poplar_sdk }}/poptorch-*ubuntu*.whl
|
||||
displayName: "Install poptorch"
|
||||
displayName: "Poplar SDK: Extract & Install"
|
||||
|
||||
- bash: |
|
||||
# enable scripts basically just set/modify some environment variables
|
||||
source ${{ variables.poplar_sdk }}/poplar-ubuntu*/enable.sh
|
||||
gc-info --list-devices
|
||||
printenv
|
||||
python --version
|
||||
pip --version
|
||||
pip list
|
||||
displayName: 'Image info & GraphCore'
|
||||
|
||||
- script: |
|
||||
set -eux
|
||||
|
@ -78,17 +86,18 @@ jobs:
|
|||
displayName: "Reset IPU devices"
|
||||
|
||||
- bash: |
|
||||
for fpath in `ls requirements/**/*.txt`; do \
|
||||
for fpath in `ls requirements/pytorch/*.txt`; do \
|
||||
python ./requirements/pytorch/adjust-versions.py $fpath; \
|
||||
done
|
||||
pip install -e .[extra,examples,test]
|
||||
|
||||
pip install .[test] -f /mnt/public/packages
|
||||
pip uninstall -y neptune-client # it is not clear what version is compatible
|
||||
pip list
|
||||
env:
|
||||
PACKAGE_NAME: "pytorch"
|
||||
FREEZE_REQUIREMENTS: "1"
|
||||
GIT_TERMINAL_PROMPT: "1"
|
||||
displayName: 'Install dependencies'
|
||||
displayName: 'Install package & dependencies'
|
||||
|
||||
- bash: |
|
||||
python requirements/collect_env_details.py
|
||||
|
|
|
@ -173,7 +173,7 @@ subprojects:
|
|||
- "build-cuda (3.9, 1.12, 11.6.1)"
|
||||
- "build-cuda (3.9, 1.13, 11.7.1)"
|
||||
- "build-cuda (3.10, 2.0, 11.7.1)"
|
||||
- "build-ipu (3.9, 1.13)"
|
||||
- "build-ipu (3.8, 1.13)"
|
||||
- "build-NGC"
|
||||
- "build-pl (3.9, 1.11, 11.3.1)"
|
||||
- "build-pl (3.9, 1.12, 11.6.1)"
|
||||
|
|
|
@ -145,7 +145,7 @@ jobs:
|
|||
matrix:
|
||||
include:
|
||||
# the config used in 'dockers/ci-runner-ipu/Dockerfile'
|
||||
- {python_version: "3.9", pytorch_version: "1.13"}
|
||||
- {python_version: "3.8", pytorch_version: "1.13"}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: docker/setup-buildx-action@v2
|
||||
|
|
|
@ -116,7 +116,9 @@ jobs:
|
|||
- name: Install package & dependencies
|
||||
run: |
|
||||
pip install -q pip -U
|
||||
pip install .[extra,test] -U "pytest-timeout" -f ${TORCH_URL} ${TORCH_PREINSTALL} -f ${PYPI_CACHE} --prefer-binary
|
||||
pip install .[extra,test] -U \
|
||||
"pytest-timeout" -r requirements/_integrations/accelerators.txt \
|
||||
-f ${TORCH_URL} ${TORCH_PREINSTALL} -f ${PYPI_CACHE} --prefer-binary
|
||||
pip list
|
||||
- name: Dump handy wheels
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
# validation HPU connectors
|
||||
lightning-habana>=0.1.0rc0
|
|
@ -20,7 +20,7 @@ def find_latest(ver: str) -> Dict[str, str]:
|
|||
ver = re.search(r"([\.\d]+)", ver).groups()[0]
|
||||
# in case there remaining dot at the end - e.g "1.9.0.dev20210504"
|
||||
ver = ver[:-1] if ver[-1] == "." else ver
|
||||
print(f"finding ecosystem versions for: {ver}")
|
||||
print(f"\n\n\nfinding ecosystem versions for: {ver}")
|
||||
|
||||
# find first match
|
||||
for option in VERSIONS:
|
||||
|
|
|
@ -18,6 +18,3 @@ uvicorn<0.19.1 # for `ServableModuleValidator`
|
|||
|
||||
tensorboard>=2.9.1, <2.12.0 # for `TensorBoardLogger`
|
||||
protobuf<=3.20.1 # strict # an extra is updating protobuf, this pin prevents TensorBoard failure
|
||||
|
||||
# validation HPU connectors
|
||||
lightning-habana>=0.1.0rc0
|
||||
|
|
|
@ -35,8 +35,9 @@ from lightning.pytorch.utilities.model_summary import ModelSummary
|
|||
from lightning.pytorch.utilities.rank_zero import rank_zero_only
|
||||
|
||||
# neptune is available with two names on PyPI : `neptune` and `neptune-client`
|
||||
_NEPTUNE_AVAILABLE = RequirementCache("neptune")
|
||||
_NEPTUNE_AVAILABLE = RequirementCache("neptune>=1.0")
|
||||
_NEPTUNE_CLIENT_AVAILABLE = RequirementCache("neptune-client")
|
||||
|
||||
if _NEPTUNE_AVAILABLE:
|
||||
# >1.0 package structure
|
||||
import neptune
|
||||
|
|
Loading…
Reference in New Issue