Fix parsing v100s in `get_available_flops` (#18952)

Co-authored-by: Carlos Mocholí <carlossmocholi@gmail.com>
This commit is contained in:
Adrian Wälchli 2023-11-06 21:50:11 +01:00 committed by GitHub
parent c4af18b2c5
commit 195a3bf5b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 30 additions and 1 deletions

View File

@ -41,6 +41,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Fixed layer conversion under `Fabric.init_module()` context manager when using the `BitsandbytesPrecision` plugin ([#18914](https://github.com/Lightning-AI/lightning/pull/18914))
- Fixed parsing of v100s GPUs in `get_available_flops` ([#18952](https://github.com/Lightning-AI/lightning/pull/18952))
## [2.1.0] - 2023-10-11
### Added

View File

@ -382,6 +382,8 @@ def get_available_flops(device: torch.device, dtype: Union[torch.dtype, str]) ->
chip = "v100-sxm"
elif "v100-pcie" in chip:
chip = "v100-pcie"
elif "v100s-pcie" in chip:
chip = "v100s-pcie"
elif "t4" in chip:
chip = "t4"
elif "quadro rtx 5000" in chip:

View File

@ -33,7 +33,7 @@ def test_measure_flops():
assert fwd_flops < fwd_and_bwd_flops
def test_available_flops(xla_available):
def test_get_available_flops(xla_available):
with mock.patch("torch.cuda.get_device_name", return_value="NVIDIA H100 PCIe"):
flops = get_available_flops(torch.device("cuda"), torch.bfloat16)
assert flops == 1.513e15 / 2
@ -61,6 +61,30 @@ def test_available_flops(xla_available):
tpu.reset_mock()
@pytest.mark.parametrize(
"device_name",
[
# TODO: We need to represent the real names here
"h100-hbm3",
"NVIDIA H100 PCIe",
"h100-hbm2e",
"NVIDIA A100 80GB PCIe",
"NVIDIA A100-SXM4-40GB",
"NVIDIA A10G",
"Tesla V100-SXm2-32GB",
"Tesla V100-PCIE-32GB",
"Tesla V100S-PCIE-32GB",
"Tesla T4",
"Quadro RTX 5000 with Max-Q Design",
],
)
@mock.patch("lightning.fabric.accelerators.cuda._is_ampere_or_later", return_value=False)
def test_get_available_flops_cuda_mapping_exists(_, device_name):
"""Tests `get_available_flops` against known device names."""
with mock.patch("lightning.fabric.utilities.throughput.torch.cuda.get_device_name", return_value=device_name):
assert get_available_flops(device=torch.device("cuda"), dtype=torch.float32) is not None
def test_throughput():
# required args only
throughput = Throughput()