diff --git a/pytorch_lightning/models/trainer.py b/pytorch_lightning/models/trainer.py
index e8e841e3f5..50b6d305c8 100644
--- a/pytorch_lightning/models/trainer.py
+++ b/pytorch_lightning/models/trainer.py
@@ -79,9 +79,6 @@ class Trainer(TrainerIO):
         self.data_parallel_device_ids = gpus
         self.data_parallel = gpus is not None and len(gpus) > 0
 
-        # TODO: remove
-        self.on_gpu = True
-
         # training state
         self.optimizers = None
         self.prog_bar = None
@@ -204,7 +201,7 @@ class Trainer(TrainerIO):
             # when DP, we need to aggregate the scalars we received as outputs
             # use mean as the reduce function
             if self.data_parallel:
-                output = reduce_distributed_output(output, len(self.gpus))
+                output = reduce_distributed_output(output, len(self.data_parallel_device_ids))
 
             outputs.append(output)