From 373a31e63ed03e1bf56b1017dba1e714a65bce2a Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Wed, 10 Feb 2021 21:21:20 +0100 Subject: [PATCH] add azure timeout (#5907) * add azure timeout * rework --- azure-pipelines.yml | 121 ++++++++++++++++++++++++-------------------- 1 file changed, 66 insertions(+), 55 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 1b91c63b4b..17029d2817 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -12,68 +12,79 @@ pr: - master - release/* -pool: dsvm-spot-pool +jobs: + - job: pytest + # how long to run the job before automatically cancelling + timeoutInMinutes: 45 + # how much time to give 'run always even if cancelled tasks' before stopping them + cancelTimeoutInMinutes: 2 -strategy: - matrix: - PT16: - torch.version: '1.6' - python.version: '3.7' + pool: dsvm-spot-pool -# ToDo: this need to have installed docker in the base image... -#container: pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.6 -#container: "pytorchlightning/pytorch_lightning:base-cuda-py$[ variables['python.version'] ]-torch1.6" -container: - # base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04 - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.6" - #endpoint: azureContainerRegistryConnection - options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all" + #strategy: + # matrix: + # PT16: + # torch.version: '1.6' + # python.version: '3.7' -steps: -- bash: | - lspci | egrep 'VGA|3D' - whereis nvidia - nvidia-smi - python --version - pip --version - pip list - displayName: 'Image info & NVIDIA' + # ToDo: this need to have installed docker in the base image... + #container: pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.6 + #container: "pytorchlightning/pytorch_lightning:base-cuda-py$[ variables['python.version'] ]-torch1.6" + container: + # base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04 + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.6" + #endpoint: azureContainerRegistryConnection + options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all" -- bash: | - #sudo apt-get install -y cmake - # python -m pip install "pip==20.1" - pip install --requirement requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html - python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)" - python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" - pip install --requirement ./requirements/devel.txt --upgrade-strategy only-if-needed - pip install git+https://$(AUTH_TOKEN)@github.com/PyTorchLightning/lightning-dtrun.git@v0.0.2 --no-cache-dir - pip list - displayName: 'Install dependencies' + workspace: + clean: all -- script: | - python tests/collect_env_details.py - displayName: 'Env details' + steps: -- bash: | - wget https://pl-public-data.s3.amazonaws.com/legacy/checkpoints.zip -P legacy/ - unzip -o legacy/checkpoints.zip -d legacy/ - ls -l legacy/checkpoints/ - displayName: 'Get legacy checkpoints' + - bash: | + lspci | egrep 'VGA|3D' + whereis nvidia + nvidia-smi + python --version + pip --version + pip list + displayName: 'Image info & NVIDIA' -- script: | - python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --durations=50 - displayName: 'Testing: standard' + - bash: | + #sudo apt-get install -y cmake + # python -m pip install "pip==20.1" + pip install --requirement requirements.txt --find-links https://download.pytorch.org/whl/cpu/torch_stable.html + python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)" + python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" + pip install --requirement ./requirements/devel.txt --upgrade-strategy only-if-needed + pip install git+https://$(AUTH_TOKEN)@github.com/PyTorchLightning/lightning-dtrun.git@v0.0.2 --no-cache-dir + pip list + displayName: 'Install dependencies' -- script: | - sh tests/special_tests.sh - displayName: 'Testing: special' + - script: | + python tests/collect_env_details.py + displayName: 'Env details' -- bash: | - python -m coverage report - python -m coverage xml - codecov --token $(CODECOV_TOKEN) --flags=gpu,pytest --name="GPU-coverage" --env=linux,azure - displayName: 'Statistics' + - bash: | + wget https://pl-public-data.s3.amazonaws.com/legacy/checkpoints.zip -P legacy/ + unzip -o legacy/checkpoints.zip -d legacy/ + ls -l legacy/checkpoints/ + displayName: 'Get legacy checkpoints' -- script: | - python -m pytest benchmarks pl_examples -v --maxfail=2 --durations=0 - displayName: 'Testing: extended' + - script: | + python -m coverage run --source pytorch_lightning -m pytest pytorch_lightning tests -v --durations=50 + displayName: 'Testing: standard' + + - script: | + sh tests/special_tests.sh + displayName: 'Testing: special' + + - bash: | + python -m coverage report + python -m coverage xml + codecov --token $(CODECOV_TOKEN) --flags=gpu,pytest --name="GPU-coverage" --env=linux,azure + displayName: 'Statistics' + + - script: | + python -m pytest benchmarks pl_examples -v --maxfail=2 --durations=0 + displayName: 'Testing: extended'