fix dp reduction test (#6404)

* fix * update * fix * move the class outside
2021-03-08 19:11:20 +01:00 · 2021-03-08 19:11:20 +01:00 · e1f5eacab9
parent efd272a3ca
commit e1f5eacab9
2 changed files with 51 additions and 11 deletions
--- a/pytorch_lightning/plugins/training_type/dp.py
+++ b/pytorch_lightning/plugins/training_type/dp.py
@ -49,9 +49,9 @@ class DataParallelPlugin(ParallelPlugin):

        else:

-            def _reduce(tensor: torch.Tensor):
-                dtype_tensor = tensor.dtype
-                return tensor.float().mean().type(dtype_tensor)
+            def _reduce(t: torch.Tensor):
+                dtype_tensor = t.dtype
+                return t.float().mean().type(dtype_tensor)

            tensor = apply_to_collection(tensor, torch.Tensor, _reduce)

--- a/tests/accelerators/test_dp.py
+++ b/tests/accelerators/test_dp.py
@ -13,13 +13,14 @@
 # limitations under the License.
 import torch
 import torch.nn.functional as F
+from torch.utils.data import DataLoader

 import pytorch_lightning as pl
 import tests.helpers.pipelines as tpipes
 import tests.helpers.utils as tutils
 from pytorch_lightning.callbacks import EarlyStopping
 from pytorch_lightning.core import memory
-from tests.helpers import BoringModel
+from tests.helpers import BoringModel, RandomDataset
 from tests.helpers.datamodules import ClassifDataModule
 from tests.helpers.runif import RunIf
 from tests.helpers.simple_models import ClassificationModel
@ -125,19 +126,58 @@ def test_dp_test(tmpdir):
    assert torch.all(torch.eq(old_weights, new_weights))


+class ReductionTestModel(BoringModel):
+
+    def train_dataloader(self):
+        return DataLoader(RandomDataset(32, 64), batch_size=2)
+
+    def val_dataloader(self):
+        return DataLoader(RandomDataset(32, 64), batch_size=2)
+
+    def test_dataloader(self):
+        return DataLoader(RandomDataset(32, 64), batch_size=2)
+
+    def add_outputs(self, output, device):
+        output.update({
+            "reduce_int": torch.tensor(device.index, dtype=torch.int, device=device),
+            "reduce_float": torch.tensor(device.index, dtype=torch.float, device=device),
+        })
+
+    def training_step(self, batch, batch_idx):
+        output = super().training_step(batch, batch_idx)
+        self.add_outputs(output, batch.device)
+        return output
+
+    def validation_step(self, batch, batch_idx):
+        output = super().validation_step(batch, batch_idx)
+        self.add_outputs(output, batch.device)
+        return output
+
+    def test_step(self, batch, batch_idx):
+        output = super().test_step(batch, batch_idx)
+        self.add_outputs(output, batch.device)
+        return output
+
+    def training_epoch_end(self, outputs):
+        assert outputs[0]["loss"].shape == torch.Size([])
+        assert outputs[0]["reduce_int"].item() == 0  # mean([0, 1]) = 0
+        assert outputs[0]["reduce_float"].item() == 0.5  # mean([0., 1.]) = 0.5
+
+
@RunIf(min_gpus=2)
 def test_dp_training_step_dict(tmpdir):
-    """
-    This test verify dp properly reduce dictionaries
-    """
-
-    model = BoringModel()
+    """ This test verifies that dp properly reduces dictionaries """
+    model = ReductionTestModel()
    model.training_step_end = None
+    model.validation_step_end = None
+    model.test_step_end = None
+
    trainer = pl.Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
-        limit_train_batches=2,
-        limit_val_batches=0,
+        limit_train_batches=1,
+        limit_val_batches=1,
+        limit_test_batches=1,
        gpus=2,
        accelerator='dp',
    )