lightning/tests/helpers/runif.py

263 lines
9.4 KiB
Python
Raw Normal View History

# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
from typing import Optional
import pytest
import torch
2021-04-22 18:25:51 +00:00
from packaging.version import Version
from pkg_resources import get_distribution
from pytorch_lightning.utilities.imports import (
_APEX_AVAILABLE,
_BAGUA_AVAILABLE,
_DEEPSPEED_AVAILABLE,
_FAIRSCALE_AVAILABLE,
FSDP with full state dict (#7487) * Fix some test errors Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: * checkpoint consolidation * Update ddp_spawn.py * Update test_metric_result_integration.py * Update test_results.py * Update utils.py * Update utils.py * Update test_all_gather_grad.py * Update test_all_gather_grad.py * Update test_results.py * Revert "Update test_results.py" This reverts commit 9d4a2b891d2a4b37e21529a444bda1883d1b5ed1. * Revert "Merge pull request #1 from shuyingsunshine21/shuyingsunshine21-checkpoint_consolidate" This reverts commit c5053da789f9d04d2c967a65adf4fb026dc134b8, reversing changes made to 0d23d75bc91e4e0b7805712e394cb093fac22841. * Revert "Update test_all_gather_grad.py" This reverts commit 0d23d75bc91e4e0b7805712e394cb093fac22841. * Revert "Update utils.py" This reverts commit 70fe5da9c66ceff2fcf4be5b9efdd23a9af8389c. * Revert "Update utils.py" This reverts commit a9aae99f6ed6e9388ecf1d8a7bd79966176a65af. * Revert "Update test_results.py" This reverts commit ea749068785bbad689a12066544893b1605f20c5. * Revert "Update test_metric_result_integration.py" This reverts commit bf70e431b3ce4893de804e0f3b5d59e79346d6d7. * Revert "Update ddp_spawn.py" This reverts commit f17210183b84f90c9a62d1ff9b3e05e1fbe5f33b. * Revert "checkpoint consolidation" This reverts commit 536c1323b0e6715fb5919196ea48b0fcddddcd66. * Revert "Revert "checkpoint consolidation"" This reverts commit 3a9fde915ad4c69620a6ccc411f5890cb38ba5ac. * Revert "Revert "Revert "checkpoint consolidation""" This reverts commit 7a369f47e1a94d701fce48c994cc3f2da266dad0. * Revert "Revert "Update ddp_spawn.py"" This reverts commit 8222dc98ead37d961a52b7366070aa10f66d92d1. * Revert "Revert "Update test_metric_result_integration.py"" This reverts commit 6c095b2370a2afe9d24918a5798ce1ebffed7e0d. * Revert "Revert "Update test_results.py"" This reverts commit 250d0aaaa2e6c6a6a3407bc6c8b83c0fe2479c0b. * Revert "Revert "Update utils.py"" This reverts commit 8651d54d79396eaaba16d7eb1e769a1e91d5702e. * Revert "Revert "Update test_all_gather_grad.py"" This reverts commit dcdcd29731061c919b15ab0b56669259817a81c4. * modify distributed environment to make test pass * fix version for ddp plugin test * fix * fix * changelog * Update CHANGELOG.md * fsdp with full state dict * fix missing import * modify unitest * fix * fix * fix typo * modify test and add changelog * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * limit max_epoch to 1 for testing * test * fix * update * testing remove special for multi gpu * assert gpu * add assertion for gpu * fix * Re-enable special test, use ModelCheckpoint * Fix paths * Fix path passing * test * test * fix test * fix * pre-commit format * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: SeanNaren <sean@grid.ai>
2021-05-24 07:11:45 +00:00
_FAIRSCALE_FULLY_SHARDED_AVAILABLE,
2022-05-05 16:06:26 +00:00
_HIVEMIND_AVAILABLE,
_HOROVOD_AVAILABLE,
_HPU_AVAILABLE,
IPU Integration 5/5 (#7867) * Initial changes * Add broken example for now * Fix reference * Fix format * Code runs * Fixes * Clear up files * Add tests, helpers, fixes * Small cleanups * Refactors based on review * Swap to special tests * Add special tests * Add source * Cleanups * Add logic to attach/detach model from devices * Fixes for tests * Fixes for tests * Move earlier * Cleanups * Add check for nvcc * Add tests, cleanups * Fix errors * fix * Try condition * Add missing annotation * Clearer * Clearer message * Fix variable * Cleanups * Add comment * CHANGELOG.md * Add simple selection test * Remove special=True to see what happens * Fix test * Update tests/accelerators/test_ipu.py Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> * Convert ipu_cores -> ipus * Add typing, fail earlier * simplify precision * Add test, add helper * fix accum * Update pytorch_lightning/plugins/training_type/ipu.py Co-authored-by: thomas chaton <thomas@grid.ai> * Use stages * Make sure warning message returned * thorw error * Add more tests, use fs * add comment * Clean * Address feedback, add IPU tests * Fixes * Fix signature * Add types * Remove autoround * Add docstring * ipu_cores -> ipus * Add test, remove unnecessary precision set * Add optimizer test * Add precision back with test * Address code review * Change to probs * Move some of the asserts earlier Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Co-authored-by: thomas chaton <thomas@grid.ai>
2021-06-11 15:07:04 +00:00
_IPU_AVAILABLE,
_OMEGACONF_AVAILABLE,
_PSUTIL_AVAILABLE,
_RICH_AVAILABLE,
_TORCH_GREATER_EQUAL_1_10,
_TORCH_QUANTIZE_AVAILABLE,
_TPU_AVAILABLE,
)
2022-02-28 10:45:32 +00:00
_HOROVOD_NCCL_AVAILABLE = False
if _HOROVOD_AVAILABLE:
import horovod
2022-02-28 10:45:32 +00:00
try:
# `nccl_built` returns an integer
_HOROVOD_NCCL_AVAILABLE = bool(horovod.torch.nccl_built())
except AttributeError:
# AttributeError can be raised if MPI is not available:
# https://github.com/horovod/horovod/blob/v0.23.0/horovod/torch/__init__.py#L33-L34
pass
class RunIf:
"""RunIf wrapper for simple marking specific cases, fully compatible with pytest.mark::
@RunIf(min_torch="0.0")
@pytest.mark.parametrize("arg1", [1, 2.0])
def test_wrapper(arg1):
assert arg1 > 0.0
"""
def __new__(
self,
*args,
min_gpus: int = 0,
min_torch: Optional[str] = None,
Add PyTorch 1.8 Profiler 5/5 (#6618) * Refactor profilers * Update PassThrough * WIP - This is broken and will change * Update pytorch_lightning/profiler/pytorch.py Co-authored-by: thomas chaton <thomas@grid.ai> * resolve tests * resolve tests * find output * try something * update * add support for test and predict * update * update * use getattr * test * test * update * tests * update * update * update * update * update * remove file * update * update * update * update * update * test * update# * update * update tests * update * add suport for 1.8 * rename records * add support for 1.8 * update * resolve flake8 * resolve test * Refactor basic profilers * Fixes * Unused import * Introduce setup * Profile on all ranks. Print to stdout on 0 * Introduce dirpath + filename * CHANGELOG * Add tests. Address comments * add `on_run_stage_setup` * add on_run_stage_setup function * update * add test for RegisterRecordFunction * update lightnng flow direction * move variable to private * remove trace * Undo code that should be in 3/4 * Multi-stage multi-rank * 2/5 changes * Pass stage in __del__ * Remove TODOs * Describe on_evaluation_end. Add tests * Typo * Address comments * deepcopy tests * Advanced teardown * Fix teardown test * Fix tests * Minor change * Update CHANGELOG.md * Fix test * Quick fixes * Fix 6522 * resolve ddp tests * resolve tests * resolve some tests * update tests * resolve tests * update * resolve tests * resolve some tests * Missed fixes from 3/5 * Fixes * resolve some tests * resolve test for 1.7.1 * Broken refactor * Missed stage * Minor changes * resolve tests * Update CHANGELOG * resolve bug * remove print * Typo * Cleanup * resolve ddp test * remove barrier * update profiler * update * Smaller model * update * resolve tests * update * Minor changes. CHANGELOG * Minimize diff * update to 1.8.1 * RunIf. Extra code. Check segfault * resolve tests * Typo. Bad merge * Fixing a bad merge * replace for kineto * Update pytorch_lightning/profiler/pytorch.py Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> * Update pytorch_lightning/profiler/pytorch.py Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> * Minor changes * Bad merge * Use lists for flexibility * Use sets * predict_step * Ananth's suggestion * update * Docs * Update pl_examples/basic_examples/profiler_example.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update example * update example Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com> Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2021-03-23 20:43:21 +00:00
max_torch: Optional[str] = None,
min_python: Optional[str] = None,
quantization: bool = False,
amp_apex: bool = False,
bf16_cuda: bool = False,
tpu: bool = False,
IPU Integration 5/5 (#7867) * Initial changes * Add broken example for now * Fix reference * Fix format * Code runs * Fixes * Clear up files * Add tests, helpers, fixes * Small cleanups * Refactors based on review * Swap to special tests * Add special tests * Add source * Cleanups * Add logic to attach/detach model from devices * Fixes for tests * Fixes for tests * Move earlier * Cleanups * Add check for nvcc * Add tests, cleanups * Fix errors * fix * Try condition * Add missing annotation * Clearer * Clearer message * Fix variable * Cleanups * Add comment * CHANGELOG.md * Add simple selection test * Remove special=True to see what happens * Fix test * Update tests/accelerators/test_ipu.py Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> * Convert ipu_cores -> ipus * Add typing, fail earlier * simplify precision * Add test, add helper * fix accum * Update pytorch_lightning/plugins/training_type/ipu.py Co-authored-by: thomas chaton <thomas@grid.ai> * Use stages * Make sure warning message returned * thorw error * Add more tests, use fs * add comment * Clean * Address feedback, add IPU tests * Fixes * Fix signature * Add types * Remove autoround * Add docstring * ipu_cores -> ipus * Add test, remove unnecessary precision set * Add optimizer test * Add precision back with test * Address code review * Change to probs * Move some of the asserts earlier Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Co-authored-by: thomas chaton <thomas@grid.ai>
2021-06-11 15:07:04 +00:00
ipu: bool = False,
hpu: bool = False,
horovod: bool = False,
horovod_nccl: bool = False,
skip_windows: bool = False,
2021-11-26 17:13:14 +00:00
standalone: bool = False,
fairscale: bool = False,
FSDP with full state dict (#7487) * Fix some test errors Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: * checkpoint consolidation * Update ddp_spawn.py * Update test_metric_result_integration.py * Update test_results.py * Update utils.py * Update utils.py * Update test_all_gather_grad.py * Update test_all_gather_grad.py * Update test_results.py * Revert "Update test_results.py" This reverts commit 9d4a2b891d2a4b37e21529a444bda1883d1b5ed1. * Revert "Merge pull request #1 from shuyingsunshine21/shuyingsunshine21-checkpoint_consolidate" This reverts commit c5053da789f9d04d2c967a65adf4fb026dc134b8, reversing changes made to 0d23d75bc91e4e0b7805712e394cb093fac22841. * Revert "Update test_all_gather_grad.py" This reverts commit 0d23d75bc91e4e0b7805712e394cb093fac22841. * Revert "Update utils.py" This reverts commit 70fe5da9c66ceff2fcf4be5b9efdd23a9af8389c. * Revert "Update utils.py" This reverts commit a9aae99f6ed6e9388ecf1d8a7bd79966176a65af. * Revert "Update test_results.py" This reverts commit ea749068785bbad689a12066544893b1605f20c5. * Revert "Update test_metric_result_integration.py" This reverts commit bf70e431b3ce4893de804e0f3b5d59e79346d6d7. * Revert "Update ddp_spawn.py" This reverts commit f17210183b84f90c9a62d1ff9b3e05e1fbe5f33b. * Revert "checkpoint consolidation" This reverts commit 536c1323b0e6715fb5919196ea48b0fcddddcd66. * Revert "Revert "checkpoint consolidation"" This reverts commit 3a9fde915ad4c69620a6ccc411f5890cb38ba5ac. * Revert "Revert "Revert "checkpoint consolidation""" This reverts commit 7a369f47e1a94d701fce48c994cc3f2da266dad0. * Revert "Revert "Update ddp_spawn.py"" This reverts commit 8222dc98ead37d961a52b7366070aa10f66d92d1. * Revert "Revert "Update test_metric_result_integration.py"" This reverts commit 6c095b2370a2afe9d24918a5798ce1ebffed7e0d. * Revert "Revert "Update test_results.py"" This reverts commit 250d0aaaa2e6c6a6a3407bc6c8b83c0fe2479c0b. * Revert "Revert "Update utils.py"" This reverts commit 8651d54d79396eaaba16d7eb1e769a1e91d5702e. * Revert "Revert "Update test_all_gather_grad.py"" This reverts commit dcdcd29731061c919b15ab0b56669259817a81c4. * modify distributed environment to make test pass * fix version for ddp plugin test * fix * fix * changelog * Update CHANGELOG.md * fsdp with full state dict * fix missing import * modify unitest * fix * fix * fix typo * modify test and add changelog * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * limit max_epoch to 1 for testing * test * fix * update * testing remove special for multi gpu * assert gpu * add assertion for gpu * fix * Re-enable special test, use ModelCheckpoint * Fix paths * Fix path passing * test * test * fix test * fix * pre-commit format * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: SeanNaren <sean@grid.ai>
2021-05-24 07:11:45 +00:00
fairscale_fully_sharded: bool = False,
deepspeed: bool = False,
rich: bool = False,
skip_hanging_spawn: bool = False,
omegaconf: bool = False,
slow: bool = False,
bagua: bool = False,
psutil: bool = False,
2022-05-05 16:06:26 +00:00
hivemind: bool = False,
**kwargs,
):
2021-03-01 15:26:09 +00:00
"""
Args:
2021-11-30 13:21:38 +00:00
*args: Any :class:`pytest.mark.skipif` arguments.
min_gpus: Require this number of gpus.
min_torch: Require that PyTorch is greater or equal than this version.
max_torch: Require that PyTorch is less than this version.
min_python: Require that Python is greater or equal than this version.
quantization: Require that `torch.quantization` is available.
amp_apex: Require that NVIDIA/apex is installed.
bf16_cuda: Require that CUDA device supports bf16.
2021-11-30 13:21:38 +00:00
tpu: Require that TPU is available.
ipu: Require that IPU is available.
hpu: Require that HPU is available.
2021-11-30 13:21:38 +00:00
horovod: Require that Horovod is installed.
horovod_nccl: Require that Horovod is installed with NCCL support.
skip_windows: Skip for Windows platform.
2021-11-26 17:13:14 +00:00
standalone: Mark the test as standalone, our CI will run it in a separate process.
2021-11-30 13:21:38 +00:00
fairscale: Require that facebookresearch/fairscale is installed.
fairscale_fully_sharded: Require that `fairscale` fully sharded support is available.
deepspeed: Require that microsoft/DeepSpeed is installed.
2021-11-30 13:21:38 +00:00
rich: Require that willmcgugan/rich is installed.
skip_hanging_spawn: Skip the test as it's impacted by hanging loggers on spawn.
omegaconf: Require that omry/omegaconf is installed.
slow: Mark the test as slow, our CI will run it in a separate job.
bagua: Require that BaguaSys/bagua is installed.
psutil: Require that psutil is installed.
2022-05-05 16:06:26 +00:00
hivemind: Require that Hivemind is installed.
2021-11-30 13:21:38 +00:00
**kwargs: Any :class:`pytest.mark.skipif` keyword arguments.
2021-03-01 15:26:09 +00:00
"""
conditions = []
reasons = []
2021-03-01 15:26:09 +00:00
if min_gpus:
conditions.append(torch.cuda.device_count() < min_gpus)
reasons.append(f"GPUs>={min_gpus}")
2021-03-01 15:26:09 +00:00
if min_torch:
2021-04-22 18:25:51 +00:00
torch_version = get_distribution("torch").version
conditions.append(Version(torch_version) < Version(min_torch))
2021-03-01 15:26:09 +00:00
reasons.append(f"torch>={min_torch}")
Add PyTorch 1.8 Profiler 5/5 (#6618) * Refactor profilers * Update PassThrough * WIP - This is broken and will change * Update pytorch_lightning/profiler/pytorch.py Co-authored-by: thomas chaton <thomas@grid.ai> * resolve tests * resolve tests * find output * try something * update * add support for test and predict * update * update * use getattr * test * test * update * tests * update * update * update * update * update * remove file * update * update * update * update * update * test * update# * update * update tests * update * add suport for 1.8 * rename records * add support for 1.8 * update * resolve flake8 * resolve test * Refactor basic profilers * Fixes * Unused import * Introduce setup * Profile on all ranks. Print to stdout on 0 * Introduce dirpath + filename * CHANGELOG * Add tests. Address comments * add `on_run_stage_setup` * add on_run_stage_setup function * update * add test for RegisterRecordFunction * update lightnng flow direction * move variable to private * remove trace * Undo code that should be in 3/4 * Multi-stage multi-rank * 2/5 changes * Pass stage in __del__ * Remove TODOs * Describe on_evaluation_end. Add tests * Typo * Address comments * deepcopy tests * Advanced teardown * Fix teardown test * Fix tests * Minor change * Update CHANGELOG.md * Fix test * Quick fixes * Fix 6522 * resolve ddp tests * resolve tests * resolve some tests * update tests * resolve tests * update * resolve tests * resolve some tests * Missed fixes from 3/5 * Fixes * resolve some tests * resolve test for 1.7.1 * Broken refactor * Missed stage * Minor changes * resolve tests * Update CHANGELOG * resolve bug * remove print * Typo * Cleanup * resolve ddp test * remove barrier * update profiler * update * Smaller model * update * resolve tests * update * Minor changes. CHANGELOG * Minimize diff * update to 1.8.1 * RunIf. Extra code. Check segfault * resolve tests * Typo. Bad merge * Fixing a bad merge * replace for kineto * Update pytorch_lightning/profiler/pytorch.py Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> * Update pytorch_lightning/profiler/pytorch.py Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> * Minor changes * Bad merge * Use lists for flexibility * Use sets * predict_step * Ananth's suggestion * update * Docs * Update pl_examples/basic_examples/profiler_example.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update example * update example Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com> Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2021-03-23 20:43:21 +00:00
if max_torch:
2021-04-22 18:25:51 +00:00
torch_version = get_distribution("torch").version
conditions.append(Version(torch_version) >= Version(max_torch))
Add PyTorch 1.8 Profiler 5/5 (#6618) * Refactor profilers * Update PassThrough * WIP - This is broken and will change * Update pytorch_lightning/profiler/pytorch.py Co-authored-by: thomas chaton <thomas@grid.ai> * resolve tests * resolve tests * find output * try something * update * add support for test and predict * update * update * use getattr * test * test * update * tests * update * update * update * update * update * remove file * update * update * update * update * update * test * update# * update * update tests * update * add suport for 1.8 * rename records * add support for 1.8 * update * resolve flake8 * resolve test * Refactor basic profilers * Fixes * Unused import * Introduce setup * Profile on all ranks. Print to stdout on 0 * Introduce dirpath + filename * CHANGELOG * Add tests. Address comments * add `on_run_stage_setup` * add on_run_stage_setup function * update * add test for RegisterRecordFunction * update lightnng flow direction * move variable to private * remove trace * Undo code that should be in 3/4 * Multi-stage multi-rank * 2/5 changes * Pass stage in __del__ * Remove TODOs * Describe on_evaluation_end. Add tests * Typo * Address comments * deepcopy tests * Advanced teardown * Fix teardown test * Fix tests * Minor change * Update CHANGELOG.md * Fix test * Quick fixes * Fix 6522 * resolve ddp tests * resolve tests * resolve some tests * update tests * resolve tests * update * resolve tests * resolve some tests * Missed fixes from 3/5 * Fixes * resolve some tests * resolve test for 1.7.1 * Broken refactor * Missed stage * Minor changes * resolve tests * Update CHANGELOG * resolve bug * remove print * Typo * Cleanup * resolve ddp test * remove barrier * update profiler * update * Smaller model * update * resolve tests * update * Minor changes. CHANGELOG * Minimize diff * update to 1.8.1 * RunIf. Extra code. Check segfault * resolve tests * Typo. Bad merge * Fixing a bad merge * replace for kineto * Update pytorch_lightning/profiler/pytorch.py Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> * Update pytorch_lightning/profiler/pytorch.py Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> * Minor changes * Bad merge * Use lists for flexibility * Use sets * predict_step * Ananth's suggestion * update * Docs * Update pl_examples/basic_examples/profiler_example.py Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> * update example * update example Co-authored-by: Carlos Mocholi <carlossmocholi@gmail.com> Co-authored-by: ananthsub <ananth.subramaniam@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2021-03-23 20:43:21 +00:00
reasons.append(f"torch<{max_torch}")
if min_python:
py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
2021-04-22 18:25:51 +00:00
conditions.append(Version(py_version) < Version(min_python))
reasons.append(f"python>={min_python}")
2021-03-01 15:26:09 +00:00
if quantization:
_miss_default = "fbgemm" not in torch.backends.quantized.supported_engines
2021-03-01 15:26:09 +00:00
conditions.append(not _TORCH_QUANTIZE_AVAILABLE or _miss_default)
reasons.append("PyTorch quantization")
if amp_apex:
conditions.append(not _APEX_AVAILABLE)
reasons.append("NVIDIA Apex")
if bf16_cuda:
try:
cond = not (torch.cuda.is_available() and _TORCH_GREATER_EQUAL_1_10 and torch.cuda.is_bf16_supported())
except (AssertionError, RuntimeError) as e:
# AssertionError: Torch not compiled with CUDA enabled
# RuntimeError: Found no NVIDIA driver on your system.
is_unrelated = "Found no NVIDIA driver" not in str(e) or "Torch not compiled with CUDA" not in str(e)
if is_unrelated:
raise e
cond = True
conditions.append(cond)
reasons.append("CUDA device bf16")
if skip_windows:
conditions.append(sys.platform == "win32")
reasons.append("unimplemented on Windows")
if tpu:
conditions.append(not _TPU_AVAILABLE)
reasons.append("TPU")
IPU Integration 5/5 (#7867) * Initial changes * Add broken example for now * Fix reference * Fix format * Code runs * Fixes * Clear up files * Add tests, helpers, fixes * Small cleanups * Refactors based on review * Swap to special tests * Add special tests * Add source * Cleanups * Add logic to attach/detach model from devices * Fixes for tests * Fixes for tests * Move earlier * Cleanups * Add check for nvcc * Add tests, cleanups * Fix errors * fix * Try condition * Add missing annotation * Clearer * Clearer message * Fix variable * Cleanups * Add comment * CHANGELOG.md * Add simple selection test * Remove special=True to see what happens * Fix test * Update tests/accelerators/test_ipu.py Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> * Convert ipu_cores -> ipus * Add typing, fail earlier * simplify precision * Add test, add helper * fix accum * Update pytorch_lightning/plugins/training_type/ipu.py Co-authored-by: thomas chaton <thomas@grid.ai> * Use stages * Make sure warning message returned * thorw error * Add more tests, use fs * add comment * Clean * Address feedback, add IPU tests * Fixes * Fix signature * Add types * Remove autoround * Add docstring * ipu_cores -> ipus * Add test, remove unnecessary precision set * Add optimizer test * Add precision back with test * Address code review * Change to probs * Move some of the asserts earlier Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Co-authored-by: thomas chaton <thomas@grid.ai>
2021-06-11 15:07:04 +00:00
if ipu:
env_flag = os.getenv("PL_RUN_IPU_TESTS", "0")
conditions.append(env_flag != "1" or not _IPU_AVAILABLE)
IPU Integration 5/5 (#7867) * Initial changes * Add broken example for now * Fix reference * Fix format * Code runs * Fixes * Clear up files * Add tests, helpers, fixes * Small cleanups * Refactors based on review * Swap to special tests * Add special tests * Add source * Cleanups * Add logic to attach/detach model from devices * Fixes for tests * Fixes for tests * Move earlier * Cleanups * Add check for nvcc * Add tests, cleanups * Fix errors * fix * Try condition * Add missing annotation * Clearer * Clearer message * Fix variable * Cleanups * Add comment * CHANGELOG.md * Add simple selection test * Remove special=True to see what happens * Fix test * Update tests/accelerators/test_ipu.py Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> * Convert ipu_cores -> ipus * Add typing, fail earlier * simplify precision * Add test, add helper * fix accum * Update pytorch_lightning/plugins/training_type/ipu.py Co-authored-by: thomas chaton <thomas@grid.ai> * Use stages * Make sure warning message returned * thorw error * Add more tests, use fs * add comment * Clean * Address feedback, add IPU tests * Fixes * Fix signature * Add types * Remove autoround * Add docstring * ipu_cores -> ipus * Add test, remove unnecessary precision set * Add optimizer test * Add precision back with test * Address code review * Change to probs * Move some of the asserts earlier Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Co-authored-by: thomas chaton <thomas@grid.ai>
2021-06-11 15:07:04 +00:00
reasons.append("IPU")
kwargs["ipu"] = True
IPU Integration 5/5 (#7867) * Initial changes * Add broken example for now * Fix reference * Fix format * Code runs * Fixes * Clear up files * Add tests, helpers, fixes * Small cleanups * Refactors based on review * Swap to special tests * Add special tests * Add source * Cleanups * Add logic to attach/detach model from devices * Fixes for tests * Fixes for tests * Move earlier * Cleanups * Add check for nvcc * Add tests, cleanups * Fix errors * fix * Try condition * Add missing annotation * Clearer * Clearer message * Fix variable * Cleanups * Add comment * CHANGELOG.md * Add simple selection test * Remove special=True to see what happens * Fix test * Update tests/accelerators/test_ipu.py Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> * Convert ipu_cores -> ipus * Add typing, fail earlier * simplify precision * Add test, add helper * fix accum * Update pytorch_lightning/plugins/training_type/ipu.py Co-authored-by: thomas chaton <thomas@grid.ai> * Use stages * Make sure warning message returned * thorw error * Add more tests, use fs * add comment * Clean * Address feedback, add IPU tests * Fixes * Fix signature * Add types * Remove autoround * Add docstring * ipu_cores -> ipus * Add test, remove unnecessary precision set * Add optimizer test * Add precision back with test * Address code review * Change to probs * Move some of the asserts earlier Co-authored-by: Kaushik B <45285388+kaushikb11@users.noreply.github.com> Co-authored-by: thomas chaton <thomas@grid.ai>
2021-06-11 15:07:04 +00:00
if hpu:
conditions.append(not _HPU_AVAILABLE)
reasons.append("HPU")
if horovod:
conditions.append(not _HOROVOD_AVAILABLE)
reasons.append("Horovod")
if horovod_nccl:
2022-02-28 10:45:32 +00:00
conditions.append(not _HOROVOD_NCCL_AVAILABLE)
reasons.append("Horovod with NCCL")
2021-11-26 17:13:14 +00:00
if standalone:
env_flag = os.getenv("PL_RUN_STANDALONE_TESTS", "0")
conditions.append(env_flag != "1")
2021-11-26 17:13:14 +00:00
reasons.append("Standalone execution")
# used in tests/conftest.py::pytest_collection_modifyitems
2021-11-26 17:13:14 +00:00
kwargs["standalone"] = True
if fairscale:
conditions.append(not _FAIRSCALE_AVAILABLE)
reasons.append("Fairscale")
FSDP with full state dict (#7487) * Fix some test errors Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: * checkpoint consolidation * Update ddp_spawn.py * Update test_metric_result_integration.py * Update test_results.py * Update utils.py * Update utils.py * Update test_all_gather_grad.py * Update test_all_gather_grad.py * Update test_results.py * Revert "Update test_results.py" This reverts commit 9d4a2b891d2a4b37e21529a444bda1883d1b5ed1. * Revert "Merge pull request #1 from shuyingsunshine21/shuyingsunshine21-checkpoint_consolidate" This reverts commit c5053da789f9d04d2c967a65adf4fb026dc134b8, reversing changes made to 0d23d75bc91e4e0b7805712e394cb093fac22841. * Revert "Update test_all_gather_grad.py" This reverts commit 0d23d75bc91e4e0b7805712e394cb093fac22841. * Revert "Update utils.py" This reverts commit 70fe5da9c66ceff2fcf4be5b9efdd23a9af8389c. * Revert "Update utils.py" This reverts commit a9aae99f6ed6e9388ecf1d8a7bd79966176a65af. * Revert "Update test_results.py" This reverts commit ea749068785bbad689a12066544893b1605f20c5. * Revert "Update test_metric_result_integration.py" This reverts commit bf70e431b3ce4893de804e0f3b5d59e79346d6d7. * Revert "Update ddp_spawn.py" This reverts commit f17210183b84f90c9a62d1ff9b3e05e1fbe5f33b. * Revert "checkpoint consolidation" This reverts commit 536c1323b0e6715fb5919196ea48b0fcddddcd66. * Revert "Revert "checkpoint consolidation"" This reverts commit 3a9fde915ad4c69620a6ccc411f5890cb38ba5ac. * Revert "Revert "Revert "checkpoint consolidation""" This reverts commit 7a369f47e1a94d701fce48c994cc3f2da266dad0. * Revert "Revert "Update ddp_spawn.py"" This reverts commit 8222dc98ead37d961a52b7366070aa10f66d92d1. * Revert "Revert "Update test_metric_result_integration.py"" This reverts commit 6c095b2370a2afe9d24918a5798ce1ebffed7e0d. * Revert "Revert "Update test_results.py"" This reverts commit 250d0aaaa2e6c6a6a3407bc6c8b83c0fe2479c0b. * Revert "Revert "Update utils.py"" This reverts commit 8651d54d79396eaaba16d7eb1e769a1e91d5702e. * Revert "Revert "Update test_all_gather_grad.py"" This reverts commit dcdcd29731061c919b15ab0b56669259817a81c4. * modify distributed environment to make test pass * fix version for ddp plugin test * fix * fix * changelog * Update CHANGELOG.md * fsdp with full state dict * fix missing import * modify unitest * fix * fix * fix typo * modify test and add changelog * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * limit max_epoch to 1 for testing * test * fix * update * testing remove special for multi gpu * assert gpu * add assertion for gpu * fix * Re-enable special test, use ModelCheckpoint * Fix paths * Fix path passing * test * test * fix test * fix * pre-commit format * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: SeanNaren <sean@grid.ai>
2021-05-24 07:11:45 +00:00
if fairscale_fully_sharded:
conditions.append(not _FAIRSCALE_FULLY_SHARDED_AVAILABLE)
reasons.append("Fairscale Fully Sharded")
if deepspeed:
conditions.append(not _DEEPSPEED_AVAILABLE)
reasons.append("Deepspeed")
if rich:
conditions.append(not _RICH_AVAILABLE)
reasons.append("Rich")
if skip_hanging_spawn:
# strategy=ddp_spawn, accelerator=cpu, python>=3.8, torch<1.9 does not work
py_version = f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}"
ge_3_8 = Version(py_version) >= Version("3.8")
torch_version = get_distribution("torch").version
old_torch = Version(torch_version) < Version("1.9")
conditions.append(ge_3_8 and old_torch)
reasons.append("Impacted by hanging DDP spawn")
if omegaconf:
conditions.append(not _OMEGACONF_AVAILABLE)
reasons.append("omegaconf")
if slow:
env_flag = os.getenv("PL_RUN_SLOW_TESTS", "0")
conditions.append(env_flag != "1")
reasons.append("Slow test")
# used in tests/conftest.py::pytest_collection_modifyitems
kwargs["slow"] = True
if bagua:
conditions.append(not _BAGUA_AVAILABLE or sys.platform in ("win32", "darwin"))
reasons.append("Bagua")
if psutil:
conditions.append(not _PSUTIL_AVAILABLE)
reasons.append("psutil")
2022-05-05 16:06:26 +00:00
if hivemind:
conditions.append(not _HIVEMIND_AVAILABLE or sys.platform in ("win32", "darwin"))
reasons.append("Hivemind")
2021-03-01 15:26:09 +00:00
reasons = [rs for cond, rs in zip(conditions, reasons) if cond]
return pytest.mark.skipif(
*args, condition=any(conditions), reason=f"Requires: [{' + '.join(reasons)}]", **kwargs
2021-03-01 15:26:09 +00:00
)
@RunIf(min_torch="99")
def test_always_skip():
exit(1)
2021-03-01 15:26:09 +00:00
@pytest.mark.parametrize("arg1", [0.5, 1.0, 2.0])
@RunIf(min_torch="0.0")
def test_wrapper(arg1: float):
2021-03-01 15:26:09 +00:00
assert arg1 > 0.0