from unittest.mock import MagicMock, Mock import pytest import torch from torch.nn import DataParallel from pytorch_lightning.overrides import LightningDistributedModule from pytorch_lightning.overrides.data_parallel import ( LightningParallelModule, python_scalar_to_tensor, unsqueeze_scalar_tensor, ) from pytorch_lightning.trainer.states import RunningStage from tests.helpers import BoringModel from tests.helpers.runif import RunIf @pytest.mark.parametrize("wrapper_class", [ LightningParallelModule, LightningDistributedModule, ]) @pytest.mark.parametrize( "stage", [ ("training", "training_step"), ("testing", "test_step"), ("validating", "validation_step"), ("predicting", "predict"), ] ) def test_lightning_wrapper_module_methods(wrapper_class, stage): """ Test that the LightningWrapper redirects .forward() to the LightningModule methods. """ pl_module = MagicMock() wrapped_module = wrapper_class(pl_module) batch = torch.rand(5) batch_idx = 3 prop, step = stage pl_module.trainer.sanity_checking = False for p in ("training", "testing", "validating", "predicting"): setattr(pl_module.trainer, p, p == prop) wrapped_module(batch, batch_idx) getattr(pl_module, step).assert_called_with(batch, batch_idx) @pytest.mark.parametrize( "inp,expected", [ [torch.tensor(1.0), torch.tensor([1.0])], [torch.tensor([2.0]), torch.tensor([2.0])], [torch.ones(3, 4, 5), torch.ones(3, 4, 5)], ] ) def test_unsqueeze_scalar_tensor(inp, expected): """ Test that the utility function unsqueezes only scalar tensors. """ assert torch.all(unsqueeze_scalar_tensor(inp).eq(expected)) @RunIf(min_gpus=2) def test_lightning_parallel_module_unsqueeze_scalar(): """ Test that LightningParallelModule takes care of un-squeezeing 0-dim tensors. """ class TestModel(BoringModel): def training_step(self, batch, batch_idx): output = super().training_step(batch, batch_idx) loss = output["loss"] loss = loss.squeeze() assert loss.dim() == 0 # PyTorch usually warns about 0-dim tensors returned in DP return {"loss": loss} model = TestModel() model.trainer = Mock() model.trainer._running_stage = RunningStage.TRAINING batch = torch.rand(2, 32).cuda() batch_idx = 0 wrapped_model = LightningParallelModule(model).cuda() dp_module = DataParallel(wrapped_model, device_ids=[0, 1]) output = wrapped_model(batch, batch_idx) assert output["loss"].dim() == 1 with pytest.warns(None) as record: output = dp_module(batch, batch_idx) assert output["loss"].dim() == 1 assert not record @pytest.mark.parametrize( "inp,expected", [ [1.0, torch.tensor([1.0])], [2, torch.tensor([2.0])], [True, torch.tensor([True])], ] ) def test_python_scalar_to_tensor(inp, expected): assert torch.all(python_scalar_to_tensor(inp).eq(expected)) @RunIf(min_gpus=1) @pytest.mark.parametrize("device", [torch.device("cpu"), torch.device("cuda", 0)]) def test_lightning_parallel_module_python_scalar_conversion(device): """ Test that LightningParallelModule can convert Python scalars to tensors. """ class TestModel(BoringModel): def training_step(self, batch, batch_idx): output = super().training_step(batch, batch_idx) # PyTorch DP does not support Python scalars, Lightning converts them to tensors output.update({"python scalar": 12.3}) return output model = TestModel().to(device) model.trainer = Mock() model.trainer._running_stage = RunningStage.TRAINING batch = torch.rand(2, 32).to(device) batch_idx = 0 wrapped_model = LightningParallelModule(model) output = wrapped_model(batch, batch_idx) assert output["python scalar"] == torch.tensor([12.3], device=device)