diff --git a/.azure/gpu-benchmark.yml b/.azure/gpu-benchmark.yml index 34942c8928..f9580fb595 100644 --- a/.azure/gpu-benchmark.yml +++ b/.azure/gpu-benchmark.yml @@ -59,7 +59,6 @@ jobs: - bash: | echo $CUDA_VISIBLE_DEVICES echo $TORCH_URL - lspci | egrep 'VGA|3D' whereis nvidia nvidia-smi which python && which pip @@ -76,7 +75,6 @@ jobs: - bash: | set -e - pip list python requirements/collect_env_details.py python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'" displayName: 'Env details' diff --git a/.azure/gpu-tests-fabric.yml b/.azure/gpu-tests-fabric.yml index 2c6699c3a9..c4efcf3383 100644 --- a/.azure/gpu-tests-fabric.yml +++ b/.azure/gpu-tests-fabric.yml @@ -81,7 +81,6 @@ jobs: echo $CUDA_VERSION_MM echo $TORCH_URL echo $COVERAGE_SOURCE - lspci | egrep 'VGA|3D' whereis nvidia nvidia-smi which python && which pip @@ -104,7 +103,6 @@ jobs: - bash: | set -e - pip list python requirements/collect_env_details.py python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'" displayName: 'Env details' diff --git a/.azure/gpu-tests-pytorch.yml b/.azure/gpu-tests-pytorch.yml index 66ef3db407..5df5dad4b4 100644 --- a/.azure/gpu-tests-pytorch.yml +++ b/.azure/gpu-tests-pytorch.yml @@ -51,10 +51,6 @@ jobs: cancelTimeoutInMinutes: "2" strategy: matrix: - 'PyTorch & strategies': # this uses torch 1.12 as not all strategies support 1.13 yet - image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.12-cuda11.6.1" - scope: "strategies" - PACKAGE_NAME: "pytorch" 'PyTorch | latest': image: "pytorchlightning/pytorch_lightning:base-cuda-py3.10-torch2.0-cuda11.7.1" scope: "" @@ -92,7 +88,6 @@ jobs: echo $CUDA_VERSION_MM echo $TORCH_URL echo $COVERAGE_SOURCE - lspci | egrep 'VGA|3D' whereis nvidia nvidia-smi which python && which pip @@ -106,31 +101,20 @@ jobs: for fpath in `ls requirements/**/*.txt`; do \ python ./requirements/pytorch/adjust-versions.py $fpath ${PYTORCH_VERSION}; \ done + # prune packages with installation issues + pip install -q -r .actions/requirements.txt + python .actions/assistant.py requirements_prune_pkgs \ + --packages="[lightning-colossalai,lightning-bagua]" \ + --req_files="[requirements/_integrations/strategies.txt]" displayName: 'Adjust dependencies' - bash: | extra=$(python -c "print({'lightning': 'pytorch-'}.get('$(PACKAGE_NAME)', ''))") - pip install -e ".[${extra}extra,${extra}test,${extra}examples]" pytest-timeout -U --find-links ${TORCH_URL} + pip install -e ".[${extra}dev]" -r requirements/_integrations/strategies.txt pytest-timeout -U --find-links ${TORCH_URL} displayName: 'Install package & dependencies' - - bash: | - pip uninstall -y -r requirements/pytorch/strategies.txt \ - -r requirements/_integrations/strategies.txt - condition: ne(variables['scope'], 'strategies') - displayName: 'Uninstall strategies' - - bash: | set -e - pip install -r requirements/pytorch/strategies.txt \ - -r requirements/_integrations/strategies.txt \ - --find-links ${TORCH_URL} - python requirements/pytorch/check-avail-strategies.py - condition: eq(variables['scope'], 'strategies') - displayName: 'Install strategies' - - - bash: | - set -e - pip list python requirements/collect_env_details.py python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu == 2, f'GPU: {mgpu}'" python requirements/pytorch/check-avail-extras.py @@ -145,7 +129,6 @@ jobs: displayName: 'Testing: PyTorch doctests' - bash: | - pip install -q -r .actions/requirements.txt python .actions/assistant.py copy_replace_imports --source_dir="./tests/tests_pytorch" \ --source_import="lightning.fabric,lightning.pytorch" \ --target_import="lightning_fabric,pytorch_lightning" diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 9a12bb8c55..6c83762fad 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -76,8 +76,8 @@ RUN \ rm -rf /root/.cache && \ rm -rf /var/lib/apt/lists/* -COPY ./requirements/pytorch/ ./requirements/pytorch/ -COPY ./.actions/assistant.py assistant.py +COPY requirements/pytorch/ requirements/pytorch/ +COPY requirements/_integrations/ requirements/_integrations/ ENV PYTHONPATH="/usr/lib/python${PYTHON_VERSION}/site-packages" @@ -85,31 +85,17 @@ RUN \ wget https://bootstrap.pypa.io/get-pip.py --progress=bar:force:noscroll --no-check-certificate && \ python${PYTHON_VERSION} get-pip.py && \ rm get-pip.py && \ - pip install -q fire && \ # Disable cache \ - export CUDA_VERSION_MM=$(python -c "print(''.join('$CUDA_VERSION'.split('.')[:2]))") && \ pip config set global.cache-dir false && \ # set particular PyTorch version \ for fpath in `ls requirements/**/*.txt`; do \ python ./requirements/pytorch/adjust-versions.py $fpath ${PYTORCH_VERSION}; \ done && \ - - rm assistant.py && \ - # Install base requirements \ CUDA_VERSION_MM=${CUDA_VERSION%.*} && \ - pip install -r requirements/pytorch/base.txt \ - --no-cache-dir --find-links "https://download.pytorch.org/whl/cu${CUDA_VERSION_MM//'.'/''}/torch_stable.html" - - -RUN \ - # install rest of strategies - CUDA_VERSION_MM=${CUDA_VERSION%.*} && \ - cat requirements/pytorch/strategies.txt && \ - pip install -r requirements/pytorch/devel.txt -r requirements/pytorch/strategies.txt \ - --no-cache-dir --find-links "https://download.pytorch.org/whl/cu${CUDA_VERSION_MM//'.'/''}/torch_stable.html" - -COPY requirements/pytorch/check-avail-extras.py check-avail-extras.py -COPY requirements/pytorch/check-avail-strategies.py check-avail-strategies.py + pip install \ + -r requirements/pytorch/devel.txt \ + -r requirements/pytorch/strategies.txt \ + --find-links "https://download.pytorch.org/whl/cu${CUDA_VERSION_MM//'.'/''}/torch_stable.html" RUN \ # Show what we have diff --git a/requirements/pytorch/check-avail-strategies.py b/requirements/pytorch/check-avail-strategies.py deleted file mode 100644 index af7fee95cc..0000000000 --- a/requirements/pytorch/check-avail-strategies.py +++ /dev/null @@ -1,2 +0,0 @@ -if __name__ == "__main__": - import deepspeed # noqa: F401