diff --git a/pytorch_lightning/trainer/evaluation_loop.py b/pytorch_lightning/trainer/evaluation_loop.py
index a6451d3610..3b07b81dae 100644
--- a/pytorch_lightning/trainer/evaluation_loop.py
+++ b/pytorch_lightning/trainer/evaluation_loop.py
@@ -217,7 +217,7 @@ class TrainerEvaluationLoopMixin(ABC):
         """Warning: this is just empty shell for code implemented in other class."""
 
     @abstractmethod
-    def log_metrics(self, *args):
+    def log_metrics(self, *args, **kwargs):
         """Warning: this is just empty shell for code implemented in other class."""
 
     @abstractmethod
@@ -379,7 +379,7 @@ class TrainerEvaluationLoopMixin(ABC):
 
                     dl_outputs.append(output)
 
-                self.__eval_add_step_metrics(output)
+                self.__eval_add_step_metrics(output, batch_idx)
 
                 # track debug metrics
                 self.dev_debugger.track_eval_loss_history(test_mode, batch_idx, dataloader_idx, output)
@@ -505,14 +505,19 @@ class TrainerEvaluationLoopMixin(ABC):
             eval_results = eval_results[0]
         return eval_results
 
-    def __eval_add_step_metrics(self, output):
+    def __eval_add_step_metrics(self, output, batch_idx):
         # track step level metrics
         if isinstance(output, EvalResult) and not self.running_sanity_check:
             step_log_metrics = output.batch_log_metrics
             step_pbar_metrics = output.batch_pbar_metrics
 
             if len(step_log_metrics) > 0:
-                self.log_metrics(step_log_metrics, {})
+                # make the metrics appear as a different line in the same graph
+                metrics_by_epoch = {}
+                for k, v in step_log_metrics.items():
+                    metrics_by_epoch[f'{k}/epoch_{self.current_epoch}'] = v
+
+                self.log_metrics(metrics_by_epoch, {}, step=batch_idx)
 
             if len(step_pbar_metrics) > 0:
                 self.add_progress_bar_metrics(step_pbar_metrics)
diff --git a/pytorch_lightning/trainer/logging.py b/pytorch_lightning/trainer/logging.py
index c90ba59abf..f84b071114 100644
--- a/pytorch_lightning/trainer/logging.py
+++ b/pytorch_lightning/trainer/logging.py
@@ -64,10 +64,12 @@ class TrainerLoggingMixin(ABC):
 
         if "step" in scalar_metrics and step is None:
             step = scalar_metrics.pop("step")
-        else:
+
+        elif step is None:
             # added metrics by Lightning for convenience
             scalar_metrics['epoch'] = self.current_epoch
             step = step if step is not None else self.global_step
+
         # log actual metrics
         if self.is_global_zero and self.logger is not None:
             self.logger.agg_and_log_metrics(scalar_metrics, step=step)
diff --git a/tests/trainer/test_validation_steps_result_return.py b/tests/trainer/test_validation_steps_result_return.py
index 8162f57287..28f012535d 100644
--- a/tests/trainer/test_validation_steps_result_return.py
+++ b/tests/trainer/test_validation_steps_result_return.py
@@ -214,12 +214,15 @@ def test_val_step_only_step_metrics(tmpdir):
 
     # make sure we logged the correct epoch metrics
     total_empty_epoch_metrics = 0
+    epoch = 0
     for metric in trainer.dev_debugger.logged_metrics:
+        if 'epoch' in metric:
+            epoch += 1
         if len(metric) > 2:
             assert 'no_val_no_pbar' not in metric
             assert 'val_step_pbar_acc' not in metric
-            assert metric['val_step_log_acc']
-            assert metric['val_step_log_pbar_acc']
+            assert metric[f'val_step_log_acc/epoch_{epoch}']
+            assert metric[f'val_step_log_pbar_acc/epoch_{epoch}']
         else:
             total_empty_epoch_metrics += 1
 
@@ -228,6 +231,8 @@ def test_val_step_only_step_metrics(tmpdir):
     # make sure we logged the correct epoch pbar metrics
     total_empty_epoch_metrics = 0
     for metric in trainer.dev_debugger.pbar_added_metrics:
+        if 'epoch' in metric:
+            epoch += 1
         if len(metric) > 2:
             assert 'no_val_no_pbar' not in metric
             assert 'val_step_log_acc' not in metric
@@ -288,11 +293,12 @@ def test_val_step_epoch_step_metrics(tmpdir):
     for metric_idx in range(0, len(trainer.dev_debugger.logged_metrics), batches + 1):
         batch_metrics = trainer.dev_debugger.logged_metrics[metric_idx: metric_idx + batches]
         epoch_metric = trainer.dev_debugger.logged_metrics[metric_idx + batches]
+        epoch = epoch_metric['epoch']
 
         # make sure the metric was split
         for batch_metric in batch_metrics:
-            assert 'step_val_step_log_acc' in batch_metric
-            assert 'step_val_step_log_pbar_acc' in batch_metric
+            assert f'step_val_step_log_acc/epoch_{epoch}' in batch_metric
+            assert f'step_val_step_log_pbar_acc/epoch_{epoch}' in batch_metric
 
         # make sure the epoch split was correct
         assert 'epoch_val_step_log_acc' in epoch_metric
@@ -421,11 +427,11 @@ def test_val_step_full_loop_result_dp(tmpdir):
     assert 'train_step_metric' in seen_keys
     assert 'train_step_end_metric' in seen_keys
     assert 'epoch_train_epoch_end_metric' in seen_keys
-    assert 'step_validation_step_metric' in seen_keys
+    assert 'step_validation_step_metric/epoch_0' in seen_keys
     assert 'epoch_validation_step_metric' in seen_keys
     assert 'validation_step_end_metric' in seen_keys
     assert 'validation_epoch_end_metric' in seen_keys
-    assert 'step_test_step_metric' in seen_keys
+    assert 'step_test_step_metric/epoch_2' in seen_keys
     assert 'epoch_test_step_metric' in seen_keys
     assert 'test_step_end_metric' in seen_keys
     assert 'test_epoch_end_metric' in seen_keys