From 4656d39ede869f1bea0d977f21cb101fd7afc114 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Wed, 4 May 2022 23:06:02 +0900 Subject: [PATCH] Merge pull request #12723 from PyTorchLightning/req/strategies Separate strategies' requirements --- .actions/assistant.py | 2 +- .azure-pipelines/gpu-tests.yml | 6 ++---- .azure-pipelines/hpu-tests.yml | 5 +++-- .azure-pipelines/ipu-tests.yml | 8 ++++---- .github/workflows/ci_test-full.yml | 6 +----- .github/workflows/docs-checks.yml | 5 +---- dockers/base-conda/Dockerfile | 13 ++----------- dockers/base-cuda/Dockerfile | 17 ++--------------- dockers/base-ipu/Dockerfile | 1 - dockers/base-xla/Dockerfile | 3 +-- dockers/tpu-tests/Dockerfile | 3 +-- requirements/devel-base.txt | 14 ++++++++++++++ requirements/devel.txt | 6 ++++++ requirements/extra.txt | 1 - requirements/strategies.txt | 3 +++ requirements/test.txt | 2 -- setup.cfg | 1 + setup.py | 3 ++- 18 files changed, 44 insertions(+), 55 deletions(-) create mode 100644 requirements/devel-base.txt create mode 100644 requirements/strategies.txt diff --git a/.actions/assistant.py b/.actions/assistant.py index 4e68b548e7..e8088599ae 100644 --- a/.actions/assistant.py +++ b/.actions/assistant.py @@ -10,7 +10,7 @@ REQUIREMENT_FILES = ( "requirements.txt", "requirements/extra.txt", "requirements/loggers.txt", - # "requirements/test.txt", + "requirements/strategies.txt", "requirements/examples.txt", ) diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-tests.yml index 68206ad8e9..d9b59c5b2c 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-tests.yml @@ -19,7 +19,7 @@ pr: jobs: - job: pytest # how long to run the job before automatically cancelling - timeoutInMinutes: "45" + timeoutInMinutes: "55" # how much time to give 'run always even if cancelled tasks' before stopping them cancelTimeoutInMinutes: "2" @@ -51,9 +51,7 @@ jobs: displayName: 'Image info & NVIDIA' - bash: | - python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" - pip install "fairscale>=0.4.5" - pip install "deepspeed<0.6.0" # https://github.com/microsoft/DeepSpeed/issues/1878 + python -c "fname = 'requirements/strategies.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" CUDA_VERSION_MM=$(python -c "import torch ; print(''.join(map(str, torch.version.cuda.split('.')[:2])))") pip install "bagua-cuda$CUDA_VERSION_MM>=0.9.0" pip install . --requirement requirements/devel.txt diff --git a/.azure-pipelines/hpu-tests.yml b/.azure-pipelines/hpu-tests.yml index 13d1c1ecb1..c4abbe3c94 100644 --- a/.azure-pipelines/hpu-tests.yml +++ b/.azure-pipelines/hpu-tests.yml @@ -14,10 +14,10 @@ pr: - "release/*" jobs: - - job: hpu + - job: tests # how long to run the job before automatically cancelling - timeoutInMinutes: "5" + timeoutInMinutes: "10" # how much time to give 'run always even if cancelled tasks' before stopping them cancelTimeoutInMinutes: "2" @@ -33,6 +33,7 @@ jobs: displayName: 'Instance HW info' - bash: | + pip install . --requirement requirements/extra.txt pip install . --requirement requirements/test.txt displayName: 'Install dependencies' diff --git a/.azure-pipelines/ipu-tests.yml b/.azure-pipelines/ipu-tests.yml index caa3df86a7..0a60cf7867 100644 --- a/.azure-pipelines/ipu-tests.yml +++ b/.azure-pipelines/ipu-tests.yml @@ -16,8 +16,10 @@ variables: value: "poplar_sdk-ubuntu_20_04-2.3.1+793-89796d462d" jobs: - - job: ipu + - job: tests + # how long to run the job before automatically cancelling + timeoutInMinutes: "15" pool: graphcore-ipus workspace: @@ -51,11 +53,9 @@ jobs: - bash: | export GIT_TERMINAL_PROMPT=1 - python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'fairscale' not in line] ; open(fname, 'w').writelines(lines)" - python -c "fname = 'requirements/extra.txt' ; lines = [line for line in open(fname).readlines() if 'horovod' not in line] ; open(fname, 'w').writelines(lines)" python ./requirements/adjust-versions.py requirements/extra.txt python ./requirements/adjust-versions.py requirements/examples.txt - pip install . --requirement requirements/devel.txt + pip install . --requirement ./requirements/devel-base.txt pip list displayName: 'Install dependencies' diff --git a/.github/workflows/ci_test-full.yml b/.github/workflows/ci_test-full.yml index 9e2e1554ca..84ca60fd30 100644 --- a/.github/workflows/ci_test-full.yml +++ b/.github/workflows/ci_test-full.yml @@ -98,10 +98,6 @@ jobs: shell: bash - name: Install extra dependencies - env: - HOROVOD_BUILD_ARCH_FLAGS: "-mfma" - HOROVOD_WITHOUT_MXNET: 1 - HOROVOD_WITHOUT_TENSORFLOW: 1 run: | # adjust versions according installed Torch version python ./requirements/adjust-versions.py requirements/extra.txt @@ -119,7 +115,7 @@ jobs: HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')" || true) if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then pip uninstall -y horovod - echo $(grep "horovod" requirements/extra.txt) > requirements/horovod.txt + grep "horovod" requirements/strategies.txt > requirements/horovod.txt pip install --no-cache-dir -r requirements/horovod.txt fi horovodrun --check-build diff --git a/.github/workflows/docs-checks.yml b/.github/workflows/docs-checks.yml index c2219479f6..7c9069e53d 100644 --- a/.github/workflows/docs-checks.yml +++ b/.github/workflows/docs-checks.yml @@ -41,12 +41,9 @@ jobs: sudo apt-get install -y cmake pandoc pip --version pip install -q fire - # remove Horovod from requirements - python .actions/assistant.py requirements_prune_pkgs horovod # python -m pip install --upgrade --user pip pip install --requirement requirements.txt --upgrade-strategy only-if-needed --find-links https://download.pytorch.org/whl/cpu/torch_stable.html --quiet - pip install --requirement requirements/extra.txt - pip install --requirement requirements/loggers.txt + pip install --requirement requirements/devel-base.txt pip install --requirement requirements/docs.txt pip list shell: bash diff --git a/dockers/base-conda/Dockerfile b/dockers/base-conda/Dockerfile index 5600f5f3cb..d778cc3f9e 100644 --- a/dockers/base-conda/Dockerfile +++ b/dockers/base-conda/Dockerfile @@ -91,6 +91,7 @@ ENV \ COPY ./requirements.txt requirements.txt COPY ./requirements/extra.txt requirements-extra.txt COPY ./requirements/examples.txt requirements-examples.txt +COPY ./requirements/strategies.txt requirements-strategies.txt COPY ./requirements/adjust-versions.py requirements_adjust_versions.py COPY ./.actions/assistant.py assistant.py @@ -123,7 +124,7 @@ RUN \ HOROVOD_BUILD_CUDA_CC_LIST=${TORCH_CUDA_ARCH_LIST//";"/","} && \ export HOROVOD_BUILD_CUDA_CC_LIST=${HOROVOD_BUILD_CUDA_CC_LIST//"."/""} && \ cat requirements_horovod.txt && \ - pip install --no-cache-dir -r requirements_horovod.txt && \ + pip install --no-cache-dir -r requirements-strategies.txt && \ rm requirements* RUN \ @@ -141,16 +142,6 @@ RUN \ pip install --no-cache-dir --global-option="--cuda_ext" https://github.com/NVIDIA/apex/archive/refs/heads/master.zip && \ python -c "from apex import amp" -RUN \ - # install FairScale - pip install fairscale==0.4.5 && \ - python -c "import fairscale; print(fairscale.__version__)" - -RUN \ - # install DeepSpeed - pip install deepspeed==0.6.0 && \ - python -c "import deepspeed; print(deepspeed.__version__)" - RUN \ # install Bagua CUDA_VERSION_MM=$(python -c "print(''.join('$CUDA_VERSION'.split('.')[:2]))") && \ diff --git a/dockers/base-cuda/Dockerfile b/dockers/base-cuda/Dockerfile index 15f490f2da..41dea516ae 100644 --- a/dockers/base-cuda/Dockerfile +++ b/dockers/base-cuda/Dockerfile @@ -85,10 +85,8 @@ RUN \ python ./requirements/adjust-versions.py requirements.txt ${PYTORCH_VERSION} && \ python ./requirements/adjust-versions.py requirements/extra.txt ${PYTORCH_VERSION} && \ python ./requirements/adjust-versions.py requirements/examples.txt ${PYTORCH_VERSION} && \ - python -c "print(' '.join([ln for ln in open('requirements/extra.txt').readlines() if 'horovod' in ln]))" > ./requirements/horovod.txt && \ - python assistant.py requirements_prune_pkgs "horovod" && \ # Install all requirements \ - pip install -r requirements/devel.txt --no-cache-dir --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html && \ + pip install -r requirements/devel-base.txt --no-cache-dir --find-links https://download.pytorch.org/whl/cu${CUDA_VERSION_MM}/torch_stable.html && \ rm -rf requirements.* && \ rm assistant.py @@ -114,9 +112,8 @@ ENV \ RUN \ HOROVOD_BUILD_CUDA_CC_LIST=${TORCH_CUDA_ARCH_LIST//";"/","} && \ export HOROVOD_BUILD_CUDA_CC_LIST=${HOROVOD_BUILD_CUDA_CC_LIST//"."/""} && \ - cat ./requirements/horovod.txt && \ cmake --version && \ - pip install --no-cache-dir -r ./requirements/horovod.txt && \ + pip install --no-cache-dir -r ./requirements/strategies.txt && \ rm -rf requirements/ RUN \ @@ -134,16 +131,6 @@ RUN \ pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" https://github.com/NVIDIA/apex/archive/refs/heads/master.zip && \ python -c "from apex import amp" -RUN \ - # install FairScale - pip install fairscale==0.4.5 && \ - python -c "import fairscale; print(fairscale.__version__)" - -RUN \ - # install DeepSpeed - pip install deepspeed==0.6.0 && \ - python -c "import deepspeed; print(deepspeed.__version__)" - RUN \ # install Bagua CUDA_VERSION_MM=$(python -c "print(''.join('$CUDA_VERSION'.split('.')[:2]))") && \ diff --git a/dockers/base-ipu/Dockerfile b/dockers/base-ipu/Dockerfile index 727bb29203..48efdd5e7a 100644 --- a/dockers/base-ipu/Dockerfile +++ b/dockers/base-ipu/Dockerfile @@ -83,7 +83,6 @@ RUN \ python -c "import torch; print(torch.__version__)" && \ python adjust_versions.py requirements-extra.txt && \ pip install -q fire && \ - python assistant.py requirements_prune_pkgs fairscale,horovod --req_files requirements-extra.txt && \ # Install remaining requirements pip install -r requirements-extra.txt --no-cache-dir && \ pip install -r requirements-test.txt --no-cache-dir && \ diff --git a/dockers/base-xla/Dockerfile b/dockers/base-xla/Dockerfile index 57a2bd3e95..2fb5b99905 100644 --- a/dockers/base-xla/Dockerfile +++ b/dockers/base-xla/Dockerfile @@ -95,10 +95,9 @@ RUN \ # drop packages installed with XLA python .actions/assistant.py requirements_prune_pkgs torch,torchvision && \ # drop unnecessary packages - python .actions/assistant.py requirements_prune_pkgs fairscale,horovod && \ python ./requirements/adjust-versions.py ./requirements/extra.txt && \ # install PL dependencies - pip install --requirement ./requirements/devel.txt --no-cache-dir && \ + pip install --requirement ./requirements/devel-base.txt --no-cache-dir && \ cd .. && \ rm -rf pytorch-lightning && \ rm -rf /root/.cache diff --git a/dockers/tpu-tests/Dockerfile b/dockers/tpu-tests/Dockerfile index cd0c3c779d..9a7528cc22 100644 --- a/dockers/tpu-tests/Dockerfile +++ b/dockers/tpu-tests/Dockerfile @@ -30,8 +30,7 @@ RUN cd pytorch-lightning && \ RUN \ pip install -q fire && \ # drop unnecessary packages - python .actions/assistant.py requirements_prune_pkgs fairscale,horovod --req_files ./pytorch-lightning/requirements/extra.txt && \ - pip install -r pytorch-lightning/requirements/devel.txt --no-cache-dir + pip install -r pytorch-lightning/requirements/devel-base.txt --no-cache-dir COPY ./dockers/tpu-tests/docker-entrypoint.sh /usr/local/bin/ RUN chmod +x /usr/local/bin/docker-entrypoint.sh diff --git a/requirements/devel-base.txt b/requirements/devel-base.txt new file mode 100644 index 0000000000..3252ebf39f --- /dev/null +++ b/requirements/devel-base.txt @@ -0,0 +1,14 @@ +# install all mandatory dependencies +-r ../requirements.txt + +# install all extra dependencies for full package testing +-r ./extra.txt + +# install all loggers for full package testing +-r ./loggers.txt + +# extended list of dependencies for development and run lint and tests +-r ./test.txt + +# install all extra dependencies for running examples +-r ./examples.txt diff --git a/requirements/devel.txt b/requirements/devel.txt index dcf66495ee..09c2699ccd 100644 --- a/requirements/devel.txt +++ b/requirements/devel.txt @@ -4,6 +4,12 @@ # install all extra dependencies for full package testing -r ./extra.txt +# install all loggers for full package testing +-r ./loggers.txt + +# install all strategies for full package testing +-r ./strategies.txt + # extended list of dependencies for development and run lint and tests -r ./test.txt diff --git a/requirements/extra.txt b/requirements/extra.txt index aa5d15c3af..880fb47de9 100644 --- a/requirements/extra.txt +++ b/requirements/extra.txt @@ -1,7 +1,6 @@ # extended list of package dependencies to reach full functionality matplotlib>3.1 -horovod>=0.21.2,!=0.24.0 # no need to install with [pytorch] as pytorch is already installed torchtext>=0.9.* omegaconf>=2.0.5 hydra-core>=1.0.5 diff --git a/requirements/strategies.txt b/requirements/strategies.txt new file mode 100644 index 0000000000..ae6648a6eb --- /dev/null +++ b/requirements/strategies.txt @@ -0,0 +1,3 @@ +fairscale>=0.4.5 +deepspeed<0.6.0 +horovod>=0.21.2,!=0.24.0 # no need to install with [pytorch] as pytorch is already installed diff --git a/requirements/test.txt b/requirements/test.txt index 51d9ecf71d..a744f57382 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -7,8 +7,6 @@ mypy>=0.920 flake8>=3.9.2 pre-commit>=1.0 pytest-forked -sklearn -jsonargparse # needed in tests cloudpickle>=1.3 diff --git a/setup.cfg b/setup.cfg index 9f908742c0..91d5ba3678 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,6 +24,7 @@ addopts = --doctest-modules --color=yes --disable-pytest-warnings + --ignore=legacy/checkpoints filterwarnings = # error out on our deprecation warnings - ensures the code and tests are kept up-to-date error::pytorch_lightning.utilities.rank_zero.LightningDeprecationWarning diff --git a/setup.py b/setup.py index 956b49a8b9..e5be7cd37d 100755 --- a/setup.py +++ b/setup.py @@ -43,9 +43,10 @@ extras = { "examples": setup_tools._load_requirements(path_dir=_PATH_REQUIRE, file_name="examples.txt"), "loggers": setup_tools._load_requirements(path_dir=_PATH_REQUIRE, file_name="loggers.txt"), "extra": setup_tools._load_requirements(path_dir=_PATH_REQUIRE, file_name="extra.txt"), + "strategies": setup_tools._load_requirements(path_dir=_PATH_REQUIRE, file_name="strategies.txt"), "test": setup_tools._load_requirements(path_dir=_PATH_REQUIRE, file_name="test.txt"), } -extras["dev"] = extras["extra"] + extras["loggers"] + extras["test"] +extras["dev"] = extras["extra"] + extras["loggers"] + extras["strategies"] + extras["test"] extras["all"] = extras["dev"] + extras["examples"] # + extras['docs'] # These packages shall be installed only on GPU machines