lightning/tests/tests_fabric/accelerators/test_cuda.py

# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
import logging
import os
from re import escape
from unittest import mock
from unittest.mock import Mock

import lightning.fabric
import pytest
import torch
from lightning.fabric.accelerators.cuda import (
    CUDAAccelerator,
    _check_cuda_matmul_precision,
    find_usable_cuda_devices,
    is_cuda_available,
    num_cuda_devices,
)

from tests_fabric.helpers.runif import RunIf


@mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=2)
def test_auto_device_count(_):
    assert CUDAAccelerator.auto_device_count() == 2


@RunIf(min_cuda_gpus=1)
def test_gpu_availability():
    assert CUDAAccelerator.is_available()


def test_init_device_with_wrong_device_type():
    with pytest.raises(ValueError, match="Device should be CUDA"):
        CUDAAccelerator().setup_device(torch.device("cpu"))


@pytest.mark.parametrize(
    ("devices", "expected"),
    [
        ([], []),
        ([1], [torch.device("cuda", 1)]),
        ([3, 1], [torch.device("cuda", 3), torch.device("cuda", 1)]),
    ],
)
def test_get_parallel_devices(devices, expected):
    assert CUDAAccelerator.get_parallel_devices(devices) == expected


@mock.patch("torch.cuda.set_device")
@mock.patch("torch.cuda.get_device_capability", return_value=(7, 0))
def test_set_cuda_device(_, set_device_mock):
    device = torch.device("cuda", 1)
    CUDAAccelerator().setup_device(device)
    set_device_mock.assert_called_once_with(device)


@mock.patch("lightning.fabric.accelerators.cuda._device_count_nvml", return_value=-1)
@mock.patch("torch.cuda.is_available", return_value=True)
@mock.patch("torch.cuda.device_count", return_value=100)
def test_num_cuda_devices_without_nvml(*_):
    """Test that if NVML can't be loaded, our helper functions fall back to the default implementation for determining
    CUDA availability."""
    num_cuda_devices.cache_clear()
    assert is_cuda_available()
    assert num_cuda_devices() == 100
    num_cuda_devices.cache_clear()


@mock.patch.dict(os.environ, {}, clear=True)
def test_force_nvml_based_cuda_check():
    """Test that we force PyTorch to use the NVML-based CUDA checks."""
    importlib.reload(lightning.fabric)  # reevaluate top-level code, without becoming a different object

    assert os.environ["PYTORCH_NVML_BASED_CUDA_CHECK"] == "1"


@mock.patch("torch.cuda.get_device_capability", return_value=(10, 1))
@mock.patch("torch.cuda.get_device_name", return_value="Z100")
@mock.patch("torch.cuda.is_available", return_value=True)
def test_tf32_message(_, __, ___, caplog, monkeypatch):
    # for some reason, caplog doesn't work with our rank_zero_info utilities
    monkeypatch.setattr(lightning.fabric.accelerators.cuda, "rank_zero_info", logging.info)

    device = Mock()
    expected = "Z100') that has Tensor Cores"
    assert torch.get_float32_matmul_precision() == "highest"  # default in torch
    with caplog.at_level(logging.INFO):
        _check_cuda_matmul_precision(device)
    assert expected in caplog.text
    _check_cuda_matmul_precision.cache_clear()

    caplog.clear()
    torch.backends.cuda.matmul.allow_tf32 = True  # changing this changes the string
    assert torch.get_float32_matmul_precision() == "high"
    with caplog.at_level(logging.INFO):
        _check_cuda_matmul_precision(device)
    assert not caplog.text
    _check_cuda_matmul_precision.cache_clear()

    caplog.clear()
    torch.backends.cuda.matmul.allow_tf32 = False
    torch.set_float32_matmul_precision("medium")  # also the other way around
    assert torch.backends.cuda.matmul.allow_tf32
    with caplog.at_level(logging.INFO):
        _check_cuda_matmul_precision(device)
    assert not caplog.text
    _check_cuda_matmul_precision.cache_clear()

    torch.set_float32_matmul_precision("highest")  # can be reverted
    with caplog.at_level(logging.INFO):
        _check_cuda_matmul_precision(device)
    assert expected in caplog.text

    # subsequent calls don't produce more messages
    caplog.clear()
    with caplog.at_level(logging.INFO):
        _check_cuda_matmul_precision(device)
    assert expected not in caplog.text
    _check_cuda_matmul_precision.cache_clear()


def test_find_usable_cuda_devices_error_handling():
    """Test error handling for edge cases when using `find_usable_cuda_devices`."""
    # Asking for GPUs if no GPUs visible
    with mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=0), pytest.raises(
        ValueError, match="You requested to find 2 devices but there are no visible CUDA"
    ):
        find_usable_cuda_devices(2)

    # Asking for more GPUs than are visible
    with mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=1), pytest.raises(
        ValueError, match="this machine only has 1 GPUs"
    ):
        find_usable_cuda_devices(2)

    # All GPUs are unusable
    tensor_mock = Mock(side_effect=RuntimeError)  # simulate device placement fails
    with mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=2), mock.patch(
        "lightning.fabric.accelerators.cuda.torch.tensor", tensor_mock
    ), pytest.raises(RuntimeError, match=escape("The devices [0, 1] are occupied by other processes")):
        find_usable_cuda_devices(2)

    # Request for as many GPUs as there are, no error should be raised
    with mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=5), mock.patch(
        "lightning.fabric.accelerators.cuda.torch.tensor"
    ):
        assert find_usable_cuda_devices(-1) == [0, 1, 2, 3, 4]

    # Edge case
    assert find_usable_cuda_devices(0) == []
Standalone Lite: Accelerators (#14578) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2022-09-12 16:00:14 +00:00			`# Copyright The Lightning AI team.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
[LAI] Make lite tests safe for combined package (#15204) Make lite tests safe for combined package Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> 2022-10-20 09:10:39 +00:00			`import importlib`
Add info message for Ampere GPUs to enable tf32 matmuls (#16037) 2022-12-13 18:24:31 +00:00			`import logging`
Force NVML-based CUDA check in PyTorch 1.14+ (#15110) 2022-10-13 17:10:29 +00:00			`import os`
Deprecate `auto_select_gpus` (#16147) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-12-22 16:44:07 +00:00			`from re import escape`
Standalone Lite: Accelerators (#14578) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2022-09-12 16:00:14 +00:00			`from unittest import mock`
Add info message for Ampere GPUs to enable tf32 matmuls (#16037) 2022-12-13 18:24:31 +00:00			`from unittest.mock import Mock`
Standalone Lite: Accelerators (#14578) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2022-09-12 16:00:14 +00:00
ruff: replace isort with ruff +TPU (#17684) * ruff: replace isort with ruff * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixing & imports * lines in warning test * docs * fix enum import * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixing * import * fix lines * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * type ClusterEnvironment * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2023-09-26 15:54:55 +00:00			`import lightning.fabric`
Standalone Lite: Accelerators (#14578) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2022-09-12 16:00:14 +00:00			`import pytest`
			`import torch`
tests: switch imports for fabric (#16592) 2023-02-01 20:34:38 +00:00			`from lightning.fabric.accelerators.cuda import (`
Add info message for Ampere GPUs to enable tf32 matmuls (#16037) 2022-12-13 18:24:31 +00:00			`CUDAAccelerator,`
ruff: replace isort with ruff +TPU (#17684) * ruff: replace isort with ruff * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixing & imports * lines in warning test * docs * fix enum import * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixing * import * fix lines * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * type ClusterEnvironment * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2023-09-26 15:54:55 +00:00			`_check_cuda_matmul_precision,`
Deprecate `auto_select_gpus` (#16147) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-12-22 16:44:07 +00:00			`find_usable_cuda_devices,`
Add info message for Ampere GPUs to enable tf32 matmuls (#16037) 2022-12-13 18:24:31 +00:00			`is_cuda_available,`
			`num_cuda_devices,`
			`)`
ruff: replace isort with ruff +TPU (#17684) * ruff: replace isort with ruff * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixing & imports * lines in warning test * docs * fix enum import * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fixing * import * fix lines * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * type ClusterEnvironment * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2023-09-26 15:54:55 +00:00
update list of fist party packages (#16859) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2023-03-03 16:55:48 +00:00			`from tests_fabric.helpers.runif import RunIf`
Standalone Lite: Accelerators (#14578) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2022-09-12 16:00:14 +00:00

tests: switch imports for fabric (#16592) 2023-02-01 20:34:38 +00:00			`@mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=2)`
Standalone Lite: Accelerators (#14578) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2022-09-12 16:00:14 +00:00			`def test_auto_device_count(_):`
			`assert CUDAAccelerator.auto_device_count() == 2`


			`@RunIf(min_cuda_gpus=1)`
			`def test_gpu_availability():`
			`assert CUDAAccelerator.is_available()`


			`def test_init_device_with_wrong_device_type():`
			`with pytest.raises(ValueError, match="Device should be CUDA"):`
			`CUDAAccelerator().setup_device(torch.device("cpu"))`


			`@pytest.mark.parametrize(`
ruff: autofix PT (#17541) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2023-05-04 15:50:39 +00:00			`("devices", "expected"),`
Standalone Lite: Accelerators (#14578) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> 2022-09-12 16:00:14 +00:00			`[`
			`([], []),`
			`([1], [torch.device("cuda", 1)]),`
			`([3, 1], [torch.device("cuda", 3), torch.device("cuda", 1)]),`
			`],`
			`)`
			`def test_get_parallel_devices(devices, expected):`
			`assert CUDAAccelerator.get_parallel_devices(devices) == expected`


			`@mock.patch("torch.cuda.set_device")`
Add info message for Ampere GPUs to enable tf32 matmuls (#16037) 2022-12-13 18:24:31 +00:00			`@mock.patch("torch.cuda.get_device_capability", return_value=(7, 0))`
			`def test_set_cuda_device(_, set_device_mock):`
			`device = torch.device("cuda", 1)`
			`CUDAAccelerator().setup_device(device)`
			`set_device_mock.assert_called_once_with(device)`
Attempt to query device count via NVML (#14631) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-09-22 09:57:13 +00:00

tests: switch imports for fabric (#16592) 2023-02-01 20:34:38 +00:00			`@mock.patch("lightning.fabric.accelerators.cuda._device_count_nvml", return_value=-1)`
Fabric: Test PyTorch 2.0 pre-release on CPU and CUDA (#16905) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> 2023-03-03 17:48:49 +00:00			`@mock.patch("torch.cuda.is_available", return_value=True)`
Attempt to query device count via NVML (#14631) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-09-22 09:57:13 +00:00			`@mock.patch("torch.cuda.device_count", return_value=100)`
			`def test_num_cuda_devices_without_nvml(*_):`
docformatter: config with black (#18064) * docformatter: config with black * additional_dependencies: [tomli] * 119 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> 2023-08-09 14:44:20 +00:00			`"""Test that if NVML can't be loaded, our helper functions fall back to the default implementation for determining`
			`CUDA availability."""`
Refactor launching tests to use our launchers (#14954) 2022-09-30 07:57:18 +00:00			`num_cuda_devices.cache_clear()`
Attempt to query device count via NVML (#14631) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-09-22 09:57:13 +00:00			`assert is_cuda_available()`
			`assert num_cuda_devices() == 100`
Resolve interactions between CUDA tests (#15042) 2022-10-09 10:20:40 +00:00			`num_cuda_devices.cache_clear()`
Force NVML-based CUDA check in PyTorch 1.14+ (#15110) 2022-10-13 17:10:29 +00:00

			`@mock.patch.dict(os.environ, {}, clear=True)`
[LAI] Make lite tests safe for combined package (#15204) Make lite tests safe for combined package Co-authored-by: Justus Schock <12886177+justusschock@users.noreply.github.com> 2022-10-20 09:10:39 +00:00			`def test_force_nvml_based_cuda_check():`
Force NVML-based CUDA check in PyTorch 1.14+ (#15110) 2022-10-13 17:10:29 +00:00			`"""Test that we force PyTorch to use the NVML-based CUDA checks."""`
tests: switch imports for fabric (#16592) 2023-02-01 20:34:38 +00:00			`importlib.reload(lightning.fabric) # reevaluate top-level code, without becoming a different object`
Force NVML-based CUDA check in PyTorch 1.14+ (#15110) 2022-10-13 17:10:29 +00:00
			`assert os.environ["PYTORCH_NVML_BASED_CUDA_CHECK"] == "1"`
Add info message for Ampere GPUs to enable tf32 matmuls (#16037) 2022-12-13 18:24:31 +00:00

			`@mock.patch("torch.cuda.get_device_capability", return_value=(10, 1))`
			`@mock.patch("torch.cuda.get_device_name", return_value="Z100")`
`set_device` before `init_process_group` (#19184) 2023-12-21 15:28:16 +00:00			`@mock.patch("torch.cuda.is_available", return_value=True)`
			`def test_tf32_message(_, __, ___, caplog, monkeypatch):`
Show tf32 info only on rank 0 (#16152) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> 2023-02-02 23:56:12 +00:00			`# for some reason, caplog doesn't work with our rank_zero_info utilities`
			`monkeypatch.setattr(lightning.fabric.accelerators.cuda, "rank_zero_info", logging.info)`

Add info message for Ampere GPUs to enable tf32 matmuls (#16037) 2022-12-13 18:24:31 +00:00			`device = Mock()`
			`expected = "Z100') that has Tensor Cores"`
			`assert torch.get_float32_matmul_precision() == "highest" # default in torch`
			`with caplog.at_level(logging.INFO):`
			`_check_cuda_matmul_precision(device)`
			`assert expected in caplog.text`
Show CUDA matmul precision info only ever once (#17960) 2023-07-04 07:47:27 +00:00			`_check_cuda_matmul_precision.cache_clear()`
Add info message for Ampere GPUs to enable tf32 matmuls (#16037) 2022-12-13 18:24:31 +00:00
			`caplog.clear()`
			`torch.backends.cuda.matmul.allow_tf32 = True # changing this changes the string`
			`assert torch.get_float32_matmul_precision() == "high"`
			`with caplog.at_level(logging.INFO):`
			`_check_cuda_matmul_precision(device)`
			`assert not caplog.text`
Show CUDA matmul precision info only ever once (#17960) 2023-07-04 07:47:27 +00:00			`_check_cuda_matmul_precision.cache_clear()`
Add info message for Ampere GPUs to enable tf32 matmuls (#16037) 2022-12-13 18:24:31 +00:00
			`caplog.clear()`
			`torch.backends.cuda.matmul.allow_tf32 = False`
			`torch.set_float32_matmul_precision("medium") # also the other way around`
			`assert torch.backends.cuda.matmul.allow_tf32`
			`with caplog.at_level(logging.INFO):`
			`_check_cuda_matmul_precision(device)`
			`assert not caplog.text`
Show CUDA matmul precision info only ever once (#17960) 2023-07-04 07:47:27 +00:00			`_check_cuda_matmul_precision.cache_clear()`
Add info message for Ampere GPUs to enable tf32 matmuls (#16037) 2022-12-13 18:24:31 +00:00
			`torch.set_float32_matmul_precision("highest") # can be reverted`
			`with caplog.at_level(logging.INFO):`
			`_check_cuda_matmul_precision(device)`
			`assert expected in caplog.text`
Deprecate `auto_select_gpus` (#16147) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-12-22 16:44:07 +00:00
Show CUDA matmul precision info only ever once (#17960) 2023-07-04 07:47:27 +00:00			`# subsequent calls don't produce more messages`
			`caplog.clear()`
			`with caplog.at_level(logging.INFO):`
			`_check_cuda_matmul_precision(device)`
			`assert expected not in caplog.text`
			`_check_cuda_matmul_precision.cache_clear()`

Deprecate `auto_select_gpus` (#16147) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-12-22 16:44:07 +00:00
			`def test_find_usable_cuda_devices_error_handling():`
			"""Test error handling for edge cases when using `find_usable_cuda_devices`."""
			`# Asking for GPUs if no GPUs visible`
tests: switch imports for fabric (#16592) 2023-02-01 20:34:38 +00:00			`with mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=0), pytest.raises(`
Deprecate `auto_select_gpus` (#16147) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-12-22 16:44:07 +00:00			`ValueError, match="You requested to find 2 devices but there are no visible CUDA"`
			`):`
			`find_usable_cuda_devices(2)`

			`# Asking for more GPUs than are visible`
tests: switch imports for fabric (#16592) 2023-02-01 20:34:38 +00:00			`with mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=1), pytest.raises(`
Deprecate `auto_select_gpus` (#16147) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-12-22 16:44:07 +00:00			`ValueError, match="this machine only has 1 GPUs"`
			`):`
			`find_usable_cuda_devices(2)`

			`# All GPUs are unusable`
			`tensor_mock = Mock(side_effect=RuntimeError) # simulate device placement fails`
tests: switch imports for fabric (#16592) 2023-02-01 20:34:38 +00:00			`with mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=2), mock.patch(`
			`"lightning.fabric.accelerators.cuda.torch.tensor", tensor_mock`
Deprecate `auto_select_gpus` (#16147) Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com> 2022-12-22 16:44:07 +00:00			`), pytest.raises(RuntimeError, match=escape("The devices [0, 1] are occupied by other processes")):`
			`find_usable_cuda_devices(2)`
Fix support for passing -1 to `find_usable_cuda_devices` function (#16866) Co-authored-by: Adrian Wälchli <aedu.waelchli@gmail.com> 2023-02-27 20:08:42 +00:00
			`# Request for as many GPUs as there are, no error should be raised`
			`with mock.patch("lightning.fabric.accelerators.cuda.num_cuda_devices", return_value=5), mock.patch(`
			`"lightning.fabric.accelerators.cuda.torch.tensor"`
			`):`
			`assert find_usable_cuda_devices(-1) == [0, 1, 2, 3, 4]`
Handle edge case for `find_usable_cuda_devices(0)` (#18722) 2023-10-07 03:44:33 +00:00
			`# Edge case`
			`assert find_usable_cuda_devices(0) == []`