diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 52475bb4b3..a1e35ac35a 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -36,7 +36,7 @@ jobs: #container: "pytorchlightning/pytorch_lightning:base-cuda-py$[ variables['python.version'] ]-torch1.6" container: # base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04 - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.7-torch1.6" + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.8-torch1.6" #endpoint: azureContainerRegistryConnection options: "--runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all" diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 12df4a74db..10446282c0 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -114,7 +114,8 @@ RUN \ RUN \ # install DeepSpeed - pip install deepspeed>=0.3.14 + # TODO(@SeanNaren): 0.3.15 is broken - skipping to unblock + pip install 'deepspeed>=0.3.14,!=0.3.15' RUN \ # Show what we have diff --git a/requirements.txt b/requirements.txt index cb56c98a60..6fa400cb3f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,6 @@ future>=0.17.1 # required for builtins in setup.py PyYAML>=5.1, !=5.4.* # OmegaConf requirement >=5.1 tqdm>=4.41.0 fsspec[http]>=0.8.1 -tensorboard>=2.2.0 +tensorboard>=2.2.0, !=2.5.0 # 2.5.0 GPU CI error: 'Couldn't build proto file into descriptor pool!' torchmetrics==0.2.0 pyDeprecate==0.2.0