95 lines
3.5 KiB
Python
95 lines
3.5 KiB
Python
# Copyright The Lightning AI team.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
from unittest import mock
|
|
from unittest.mock import MagicMock, Mock
|
|
|
|
import pytest
|
|
import torch
|
|
from lightning.fabric import Fabric
|
|
from lightning.fabric.strategies import DataParallelStrategy
|
|
|
|
from tests_fabric.helpers.runif import RunIf
|
|
from tests_fabric.strategies.test_single_device import _run_test_clip_gradients
|
|
|
|
|
|
def test_data_parallel_root_device():
|
|
strategy = DataParallelStrategy()
|
|
strategy.parallel_devices = [torch.device("cuda", 2), torch.device("cuda", 0), torch.device("cuda", 1)]
|
|
assert strategy.root_device == torch.device("cuda", 2)
|
|
|
|
|
|
def test_data_parallel_ranks():
|
|
strategy = DataParallelStrategy()
|
|
assert strategy.world_size == 1
|
|
assert strategy.local_rank == 0
|
|
assert strategy.global_rank == 0
|
|
assert strategy.is_global_zero
|
|
|
|
|
|
@mock.patch("lightning.fabric.strategies.dp.DataParallel")
|
|
def test_data_parallel_setup_module(data_parallel_mock):
|
|
strategy = DataParallelStrategy()
|
|
strategy.parallel_devices = [0, 2, 1]
|
|
module = torch.nn.Linear(2, 2)
|
|
wrapped_module = strategy.setup_module(module)
|
|
assert wrapped_module == data_parallel_mock(module=module, device_ids=[0, 2, 1])
|
|
|
|
|
|
def test_data_parallel_module_to_device():
|
|
strategy = DataParallelStrategy()
|
|
strategy.parallel_devices = [torch.device("cuda", 2)]
|
|
module = Mock()
|
|
strategy.module_to_device(module)
|
|
module.to.assert_called_with(torch.device("cuda", 2))
|
|
|
|
|
|
def test_dp_module_state_dict():
|
|
"""Test that the module state dict gets retrieved without the prefixed wrapper keys from DP."""
|
|
|
|
class DataParallelMock(MagicMock):
|
|
def __instancecheck__(self, instance):
|
|
# to make the strategy's `isinstance(model, DataParallel)` pass with a mock as class
|
|
return True
|
|
|
|
strategy = DataParallelStrategy(parallel_devices=[torch.device("cpu"), torch.device("cpu")])
|
|
|
|
# Without DP applied (no setup call)
|
|
original_module = torch.nn.Linear(2, 3)
|
|
assert strategy.get_module_state_dict(original_module).keys() == original_module.state_dict().keys()
|
|
|
|
# With DP applied (setup called)
|
|
with mock.patch("lightning.fabric.strategies.dp.DataParallel", DataParallelMock):
|
|
wrapped_module = strategy.setup_module(original_module)
|
|
assert strategy.get_module_state_dict(wrapped_module).keys() == original_module.state_dict().keys()
|
|
|
|
|
|
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
|
@pytest.mark.parametrize(
|
|
"precision",
|
|
[
|
|
"32-true",
|
|
"16-mixed",
|
|
pytest.param("bf16-mixed", marks=RunIf(bf16_cuda=True)),
|
|
],
|
|
)
|
|
@pytest.mark.parametrize("clip_type", ["norm", "val"])
|
|
@RunIf(min_cuda_gpus=2)
|
|
def test_clip_gradients(clip_type, precision):
|
|
if clip_type == "norm" and precision == "16-mixed":
|
|
pytest.skip(reason="Clipping by norm with 16-mixed is numerically unstable.")
|
|
|
|
fabric = Fabric(accelerator="cuda", devices=2, precision=precision, strategy="dp")
|
|
fabric.launch()
|
|
_run_test_clip_gradients(fabric=fabric, clip_type=clip_type)
|