From 195a3bf5b5bc090e8421b66f41892fa909341f09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20W=C3=A4lchli?= Date: Mon, 6 Nov 2023 21:50:11 +0100 Subject: [PATCH] Fix parsing v100s in `get_available_flops` (#18952) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Carlos MocholĂ­ --- src/lightning/fabric/CHANGELOG.md | 3 +++ src/lightning/fabric/utilities/throughput.py | 2 ++ .../tests_fabric/utilities/test_throughput.py | 26 ++++++++++++++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/lightning/fabric/CHANGELOG.md b/src/lightning/fabric/CHANGELOG.md index 30d70c613b..7c3bbf96fc 100644 --- a/src/lightning/fabric/CHANGELOG.md +++ b/src/lightning/fabric/CHANGELOG.md @@ -41,6 +41,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed layer conversion under `Fabric.init_module()` context manager when using the `BitsandbytesPrecision` plugin ([#18914](https://github.com/Lightning-AI/lightning/pull/18914)) +- Fixed parsing of v100s GPUs in `get_available_flops` ([#18952](https://github.com/Lightning-AI/lightning/pull/18952)) + + ## [2.1.0] - 2023-10-11 ### Added diff --git a/src/lightning/fabric/utilities/throughput.py b/src/lightning/fabric/utilities/throughput.py index 1afe5217e2..44841c0039 100644 --- a/src/lightning/fabric/utilities/throughput.py +++ b/src/lightning/fabric/utilities/throughput.py @@ -382,6 +382,8 @@ def get_available_flops(device: torch.device, dtype: Union[torch.dtype, str]) -> chip = "v100-sxm" elif "v100-pcie" in chip: chip = "v100-pcie" + elif "v100s-pcie" in chip: + chip = "v100s-pcie" elif "t4" in chip: chip = "t4" elif "quadro rtx 5000" in chip: diff --git a/tests/tests_fabric/utilities/test_throughput.py b/tests/tests_fabric/utilities/test_throughput.py index 6bf7dbb5f7..06e63706d5 100644 --- a/tests/tests_fabric/utilities/test_throughput.py +++ b/tests/tests_fabric/utilities/test_throughput.py @@ -33,7 +33,7 @@ def test_measure_flops(): assert fwd_flops < fwd_and_bwd_flops -def test_available_flops(xla_available): +def test_get_available_flops(xla_available): with mock.patch("torch.cuda.get_device_name", return_value="NVIDIA H100 PCIe"): flops = get_available_flops(torch.device("cuda"), torch.bfloat16) assert flops == 1.513e15 / 2 @@ -61,6 +61,30 @@ def test_available_flops(xla_available): tpu.reset_mock() +@pytest.mark.parametrize( + "device_name", + [ + # TODO: We need to represent the real names here + "h100-hbm3", + "NVIDIA H100 PCIe", + "h100-hbm2e", + "NVIDIA A100 80GB PCIe", + "NVIDIA A100-SXM4-40GB", + "NVIDIA A10G", + "Tesla V100-SXm2-32GB", + "Tesla V100-PCIE-32GB", + "Tesla V100S-PCIE-32GB", + "Tesla T4", + "Quadro RTX 5000 with Max-Q Design", + ], +) +@mock.patch("lightning.fabric.accelerators.cuda._is_ampere_or_later", return_value=False) +def test_get_available_flops_cuda_mapping_exists(_, device_name): + """Tests `get_available_flops` against known device names.""" + with mock.patch("lightning.fabric.utilities.throughput.torch.cuda.get_device_name", return_value=device_name): + assert get_available_flops(device=torch.device("cuda"), dtype=torch.float32) is not None + + def test_throughput(): # required args only throughput = Throughput()