Fix parsing v100s in `get_available_flops` (#18952)

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
2023-11-06 21:50:11 +01:00 · 2023-11-06 21:50:11 +01:00 · 195a3bf5b5
parent c4af18b2c5
commit 195a3bf5b5
3 changed files with 30 additions and 1 deletions
--- a/src/lightning/fabric/CHANGELOG.md
+++ b/src/lightning/fabric/CHANGELOG.md
@ -41,6 +41,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed layer conversion under `Fabric.init_module()` context manager when using the `BitsandbytesPrecision` plugin ([#18914](https://github.com/Lightning-AI/lightning/pull/18914))


+- Fixed parsing of v100s GPUs in `get_available_flops` ([#18952](https://github.com/Lightning-AI/lightning/pull/18952))
+
+
 ## [2.1.0] - 2023-10-11

 ### Added
--- a/src/lightning/fabric/utilities/throughput.py
+++ b/src/lightning/fabric/utilities/throughput.py
@ -382,6 +382,8 @@ def get_available_flops(device: torch.device, dtype: Union[torch.dtype, str]) ->
            chip = "v100-sxm"
        elif "v100-pcie" in chip:
            chip = "v100-pcie"
+        elif "v100s-pcie" in chip:
+            chip = "v100s-pcie"
        elif "t4" in chip:
            chip = "t4"
        elif "quadro rtx 5000" in chip:
--- a/tests/tests_fabric/utilities/test_throughput.py
+++ b/tests/tests_fabric/utilities/test_throughput.py
@ -33,7 +33,7 @@ def test_measure_flops():
    assert fwd_flops < fwd_and_bwd_flops


-def test_available_flops(xla_available):
+def test_get_available_flops(xla_available):
    with mock.patch("torch.cuda.get_device_name", return_value="NVIDIA H100 PCIe"):
        flops = get_available_flops(torch.device("cuda"), torch.bfloat16)
    assert flops == 1.513e15 / 2
@ -61,6 +61,30 @@ def test_available_flops(xla_available):
    tpu.reset_mock()


+@pytest.mark.parametrize(
+    "device_name",
+    [
+        # TODO: We need to represent the real names here
+        "h100-hbm3",
+        "NVIDIA H100 PCIe",
+        "h100-hbm2e",
+        "NVIDIA A100 80GB PCIe",
+        "NVIDIA A100-SXM4-40GB",
+        "NVIDIA A10G",
+        "Tesla V100-SXm2-32GB",
+        "Tesla V100-PCIE-32GB",
+        "Tesla V100S-PCIE-32GB",
+        "Tesla T4",
+        "Quadro RTX 5000 with Max-Q Design",
+    ],
+)
+@mock.patch("lightning.fabric.accelerators.cuda._is_ampere_or_later", return_value=False)
+def test_get_available_flops_cuda_mapping_exists(_, device_name):
+    """Tests `get_available_flops` against known device names."""
+    with mock.patch("lightning.fabric.utilities.throughput.torch.cuda.get_device_name", return_value=device_name):
+        assert get_available_flops(device=torch.device("cuda"), dtype=torch.float32) is not None
+
+
 def test_throughput():
    # required args only
    throughput = Throughput()