From 48f171006dbc308e9f35ac3f9109443db156f73c Mon Sep 17 00:00:00 2001 From: Rohit Gupta Date: Fri, 25 Mar 2022 21:29:06 +0530 Subject: [PATCH] Avoid fallback on CPU if no devices are provided (#12410) --- CHANGELOG.md | 5 +++++ docs/source/accelerators/gpu.rst | 7 ------- .../trainer/connectors/accelerator_connector.py | 4 ---- .../accelerators/test_accelerator_connector.py | 17 +++++++++-------- tests/deprecated_api/test_remove_1-8.py | 1 - 5 files changed, 14 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6873e86eb2..4092e04d57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -393,8 +393,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - The strategies that support `sync_batchnorm` now only apply it when fitting ([#11919](https://github.com/PyTorchLightning/pytorch-lightning/pull/11919)) + +- Avoided fallback on CPU if no devices are provided for other accelerators ([#12410](https://github.com/PyTorchLightning/pytorch-lightning/pull/12410)) + + - Modified `supporters.py` so that in the accumulator element (for loss) is created directly on the device ([#12430](https://github.com/PyTorchLightning/pytorch-lightning/pull/12430)) + - Removed `EarlyStopping.on_save_checkpoint` and `EarlyStopping.on_load_checkpoint` in favor of `EarlyStopping.state_dict` and `EarlyStopping.load_state_dict` ([#11887](https://github.com/PyTorchLightning/pytorch-lightning/pull/11887)) diff --git a/docs/source/accelerators/gpu.rst b/docs/source/accelerators/gpu.rst index 53b79470bf..a5f7328718 100644 --- a/docs/source/accelerators/gpu.rst +++ b/docs/source/accelerators/gpu.rst @@ -223,15 +223,10 @@ a comma separated list of GPU ids: Trainer(accelerator="gpu", devices=-1) The table below lists examples of possible input formats and how they are interpreted by Lightning. -Note in particular the difference between ``devices=0``, ``devices=[0]`` and ``devices="0"``. +------------------+-----------+---------------------+---------------------------------+ | `devices` | Type | Parsed | Meaning | +==================+===========+=====================+=================================+ -| None | NoneType | None | CPU | -+------------------+-----------+---------------------+---------------------------------+ -| 0 | int | None | CPU | -+------------------+-----------+---------------------+---------------------------------+ | 3 | int | [0, 1, 2] | first 3 GPUs | +------------------+-----------+---------------------+---------------------------------+ | -1 | int | [0, 1, 2, ...] | all available GPUs | @@ -240,8 +235,6 @@ Note in particular the difference between ``devices=0``, ``devices=[0]`` and ``d +------------------+-----------+---------------------+---------------------------------+ | [1, 3] | list | [1, 3] | GPUs 1 and 3 | +------------------+-----------+---------------------+---------------------------------+ -| "0" | str | None | CPU | -+------------------+-----------+---------------------+---------------------------------+ | "3" | str | [0, 1, 2] | first 3 GPUs | +------------------+-----------+---------------------+---------------------------------+ | "1, 3" | str | [1, 3] | GPUs 1 and 3 | diff --git a/pytorch_lightning/trainer/connectors/accelerator_connector.py b/pytorch_lightning/trainer/connectors/accelerator_connector.py index 28c6da86c6..673964b13e 100644 --- a/pytorch_lightning/trainer/connectors/accelerator_connector.py +++ b/pytorch_lightning/trainer/connectors/accelerator_connector.py @@ -424,10 +424,6 @@ class AcceleratorConnector: devices, num_processes, gpus, ipus, tpu_cores ) - if self._devices_flag in ([], 0, "0"): - rank_zero_warn(f"You passed `devices={devices}`, switching to `cpu` accelerator") - self._accelerator_flag = "cpu" - if self._devices_flag == "auto" and self._accelerator_flag is None: raise MisconfigurationException( f"You passed `devices={devices}` but haven't specified" diff --git a/tests/accelerators/test_accelerator_connector.py b/tests/accelerators/test_accelerator_connector.py index 8c29dfb09a..6edbb0eeff 100644 --- a/tests/accelerators/test_accelerator_connector.py +++ b/tests/accelerators/test_accelerator_connector.py @@ -506,6 +506,15 @@ def test_accelerator_cpu(_): trainer = Trainer(accelerator="cpu", gpus=1) +@mock.patch("torch.cuda.is_available", return_value=False) +@pytest.mark.parametrize("devices", ["0", 0, []]) +def test_passing_zero_and_empty_list_to_devices_flag(_, devices): + with pytest.raises( + MisconfigurationException, match="can not run on your system since the accelerator is not available." + ): + Trainer(accelerator="gpu", devices=devices) + + @RunIf(min_gpus=1) def test_accelerator_gpu(): trainer = Trainer(accelerator="gpu", gpus=1) @@ -950,14 +959,6 @@ def test_parallel_devices_in_strategy_confilict_with_accelerator(parallel_device Trainer(strategy=DDPStrategy(parallel_devices=parallel_devices), accelerator=accelerator) -def test_passing_zero_and_empty_list_to_devices_flag(): - with pytest.warns(UserWarning, match=r"switching to `cpu` accelerator"): - Trainer(accelerator="gpu", devices=0) - - with pytest.warns(UserWarning, match=r"switching to `cpu` accelerator"): - Trainer(accelerator="gpu", devices=[]) - - @pytest.mark.parametrize("deterministic", [True, False]) def test_deterministic_init(deterministic): trainer = Trainer(accelerator="auto", deterministic=deterministic) diff --git a/tests/deprecated_api/test_remove_1-8.py b/tests/deprecated_api/test_remove_1-8.py index a8cf01e434..2157dd2ad3 100644 --- a/tests/deprecated_api/test_remove_1-8.py +++ b/tests/deprecated_api/test_remove_1-8.py @@ -1038,7 +1038,6 @@ def test_trainer_num_processes(monkeypatch, trainer_kwargs, expected_num_process ({"accelerator": "gpu", "devices": 1}, [0]), ({"accelerator": "gpu", "devices": 2}, [0, 1]), ({"accelerator": "gpu", "devices": [1]}, [1]), - ({"accelerator": "gpu", "devices": "0"}, None), ({"accelerator": "gpu", "devices": "0,"}, [0]), ], )