lightning/tests/tests_fabric/strategies/test_dp.py

94 lines
3.4 KiB
Python

# Copyright The Lightning AI team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from unittest import mock
from unittest.mock import MagicMock, Mock
import pytest
import torch
from lightning.fabric import Fabric
from lightning.fabric.strategies import DataParallelStrategy
from tests_fabric.helpers.runif import RunIf
from tests_fabric.strategies.test_single_device import _run_test_clip_gradients
def test_data_parallel_root_device():
strategy = DataParallelStrategy()
strategy.parallel_devices = [torch.device("cuda", 2), torch.device("cuda", 0), torch.device("cuda", 1)]
assert strategy.root_device == torch.device("cuda", 2)
def test_data_parallel_ranks():
strategy = DataParallelStrategy()
assert strategy.world_size == 1
assert strategy.local_rank == 0
assert strategy.global_rank == 0
assert strategy.is_global_zero
@mock.patch("lightning.fabric.strategies.dp.DataParallel")
def test_data_parallel_setup_module(data_parallel_mock):
strategy = DataParallelStrategy()
strategy.parallel_devices = [0, 2, 1]
module = torch.nn.Linear(2, 2)
wrapped_module = strategy.setup_module(module)
assert wrapped_module == data_parallel_mock(module=module, device_ids=[0, 2, 1])
def test_data_parallel_module_to_device():
strategy = DataParallelStrategy()
strategy.parallel_devices = [torch.device("cuda", 2)]
module = Mock()
strategy.module_to_device(module)
module.to.assert_called_with(torch.device("cuda", 2))
def test_dp_module_state_dict():
"""Test that the module state dict gets retrieved without the prefixed wrapper keys from DP."""
class DataParallelMock(MagicMock):
def __instancecheck__(self, instance):
# to make the strategy's `isinstance(model, DataParallel)` pass with a mock as class
return True
strategy = DataParallelStrategy(parallel_devices=[torch.device("cpu"), torch.device("cpu")])
# Without DP applied (no setup call)
original_module = torch.nn.Linear(2, 3)
assert strategy.get_module_state_dict(original_module).keys() == original_module.state_dict().keys()
# With DP applied (setup called)
with mock.patch("lightning.fabric.strategies.dp.DataParallel", DataParallelMock):
wrapped_module = strategy.setup_module(original_module)
assert strategy.get_module_state_dict(wrapped_module).keys() == original_module.state_dict().keys()
@pytest.mark.parametrize(
"precision",
[
"32-true",
"16-mixed",
pytest.param("bf16-mixed", marks=RunIf(bf16_cuda=True)),
],
)
@pytest.mark.parametrize("clip_type", ["norm", "val"])
@RunIf(min_cuda_gpus=2)
def test_clip_gradients(clip_type, precision):
if clip_type == "norm" and precision == "16-mixed":
pytest.skip(reason="Clipping by norm with 16-mixed is numerically unstable.")
fabric = Fabric(accelerator="cuda", devices=2, precision=precision, strategy="dp")
fabric.launch()
_run_test_clip_gradients(fabric=fabric, clip_type=clip_type)