diff --git a/pytorch_lightning/metrics/classification/auroc.py b/pytorch_lightning/metrics/classification/auroc.py
index a755e2bbb8..ece2452938 100644
--- a/pytorch_lightning/metrics/classification/auroc.py
+++ b/pytorch_lightning/metrics/classification/auroc.py
@@ -29,10 +29,10 @@ class AUROC(Metric):
 
     Forward accepts
 
-    - ``preds`` (float tensor): ``(N, )`` (binary) or ``(N, C, ...)`` (multilabel/multiclass)
-      where C is the number of classes
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.
 
-    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)``
+    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels
 
     For non-binary input, if the ``preds`` and ``target`` tensor have the same
     size the input will be interpretated as multilabel and if ``preds`` have one
diff --git a/pytorch_lightning/metrics/classification/average_precision.py b/pytorch_lightning/metrics/classification/average_precision.py
index f6678ddd4a..f9c7bde158 100644
--- a/pytorch_lightning/metrics/classification/average_precision.py
+++ b/pytorch_lightning/metrics/classification/average_precision.py
@@ -28,10 +28,10 @@ class AveragePrecision(Metric):
 
     Forward accepts
 
-    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass)
-      where C is the number of classes
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.
 
-    - ``target`` (long tensor): ``(N, ...)``
+    - ``target`` (long tensor): ``(N, ...)`` with integer labels
 
     Args:
         num_classes: integer with number of classes. Not nessesary to provide
diff --git a/pytorch_lightning/metrics/classification/confusion_matrix.py b/pytorch_lightning/metrics/classification/confusion_matrix.py
index 77933ab9ba..c3defc82bc 100644
--- a/pytorch_lightning/metrics/classification/confusion_matrix.py
+++ b/pytorch_lightning/metrics/classification/confusion_matrix.py
@@ -23,7 +23,7 @@ class ConfusionMatrix(Metric):
     """
     Computes the `confusion matrix
     <https://scikit-learn.org/stable/modules/model_evaluation.html#confusion-matrix>`_.  Works with binary,
-    multiclass, and multilabel data.  Accepts logits from a model output or
+    multiclass, and multilabel data.  Accepts probabilities from a model output or
     integer class values in prediction.  Works with multi-dimensional preds and
     target.
 
@@ -35,8 +35,8 @@ class ConfusionMatrix(Metric):
     - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
     - ``target`` (long tensor): ``(N, ...)``
 
-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.
 
     If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
 
@@ -44,13 +44,13 @@ class ConfusionMatrix(Metric):
         num_classes: Number of classes in the dataset.
         normalize: Normalization mode for confusion matrix. Choose from
 
-            - ``None``: no normalization (default)
+            - ``None`` or ``'none'``: no normalization (default)
             - ``'true'``: normalization over the targets (most commonly used)
             - ``'pred'``: normalization over the predictions
             - ``'all'``: normalization over the whole matrix
 
         threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilites. default: 0.5
         compute_on_step:
             Forward only calls ``update()`` and return None if this is set to False. default: True
         dist_sync_on_step:
@@ -90,7 +90,7 @@ class ConfusionMatrix(Metric):
         self.normalize = normalize
         self.threshold = threshold
 
-        allowed_normalize = ('true', 'pred', 'all', None)
+        allowed_normalize = ('true', 'pred', 'all', 'none', None)
         assert self.normalize in allowed_normalize, \
             f"Argument average needs to one of the following: {allowed_normalize}"
 
diff --git a/pytorch_lightning/metrics/classification/f_beta.py b/pytorch_lightning/metrics/classification/f_beta.py
index 6d5fd6e78e..9a580e02cf 100755
--- a/pytorch_lightning/metrics/classification/f_beta.py
+++ b/pytorch_lightning/metrics/classification/f_beta.py
@@ -29,7 +29,7 @@ class FBeta(Metric):
         {(\beta^2 * \text{precision}) + \text{recall}}
 
     Where :math:`\beta` is some positive real factor. Works with binary, multiclass, and multilabel data.
-    Accepts logits from a model output or integer class values in prediction.
+    Accepts probabilities from a model output or integer class values in prediction.
     Works with multi-dimensional preds and target.
 
     Forward accepts
@@ -37,8 +37,8 @@ class FBeta(Metric):
     - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
     - ``target`` (long tensor): ``(N, ...)``
 
-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.
 
     If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
 
@@ -46,14 +46,14 @@ class FBeta(Metric):
         num_classes: Number of classes in the dataset.
         beta: Beta coefficient in the F measure.
         threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilities. default: 0.5
 
         average:
             - ``'micro'`` computes metric globally
             - ``'macro'`` computes metric for each class and uniformly averages them
             - ``'weighted'`` computes metric for each class and does a weighted-average,
               where each class is weighted by their support (accounts for class imbalance)
-            - ``'none'`` computes and returns the metric per class
+            - ``'none'`` or ``None`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
         compute_on_step:
@@ -98,7 +98,7 @@ class FBeta(Metric):
         self.average = average
         self.multilabel = multilabel
 
-        allowed_average = ("micro", "macro", "weighted", None)
+        allowed_average = ("micro", "macro", "weighted", "none", None)
         if self.average not in allowed_average:
             raise ValueError(
                 'Argument `average` expected to be one of the following:'
@@ -163,7 +163,7 @@ class F1(FBeta):
             - ``'macro'`` computes metric for each class and uniformly averages them
             - ``'weighted'`` computes metric for each class and does a weighted-average,
               where each class is weighted by their support (accounts for class imbalance)
-            - ``'none'`` computes and returns the metric per class
+            - ``'none'`` or ``None`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
         compute_on_step:
diff --git a/pytorch_lightning/metrics/classification/iou.py b/pytorch_lightning/metrics/classification/iou.py
index 40567a40c8..a261b767a8 100644
--- a/pytorch_lightning/metrics/classification/iou.py
+++ b/pytorch_lightning/metrics/classification/iou.py
@@ -29,7 +29,7 @@ class IoU(ConfusionMatrix):
     They may be subject to conversion from input data (see description below). Note that it is different from box IoU.
 
     Works with binary, multiclass and multi-label data.
-    Accepts logits from a model output or integer class values in prediction.
+    Accepts probabilities from a model output or integer class values in prediction.
     Works with multi-dimensional preds and target.
 
     Forward accepts
@@ -37,8 +37,8 @@ class IoU(ConfusionMatrix):
     - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
     - ``target`` (long tensor): ``(N, ...)``
 
-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.
 
     If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
 
@@ -51,7 +51,7 @@ class IoU(ConfusionMatrix):
             `pred` AND no instances of the class index were present in `target`. For example, if we have 3 classes,
             [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be assigned the `absent_score`.
         threshold:
-            Threshold value for binary or multi-label logits.
+            Threshold value for binary or multi-label probabilities.
         reduction: a method to reduce metric score over labels.
 
             - ``'elementwise_mean'``: takes the mean (default)
diff --git a/pytorch_lightning/metrics/classification/precision_recall_curve.py b/pytorch_lightning/metrics/classification/precision_recall_curve.py
index 4f81c7283e..9c6c4421cb 100644
--- a/pytorch_lightning/metrics/classification/precision_recall_curve.py
+++ b/pytorch_lightning/metrics/classification/precision_recall_curve.py
@@ -31,10 +31,10 @@ class PrecisionRecallCurve(Metric):
 
     Forward accepts
 
-    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass)
-      where C is the number of classes
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.
 
-    - ``target`` (long tensor): ``(N, ...)``
+    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels
 
     Args:
         num_classes: integer with number of classes. Not nessesary to provide
diff --git a/pytorch_lightning/metrics/classification/roc.py b/pytorch_lightning/metrics/classification/roc.py
index a5ff459f67..9452d59fb9 100644
--- a/pytorch_lightning/metrics/classification/roc.py
+++ b/pytorch_lightning/metrics/classification/roc.py
@@ -28,10 +28,10 @@ class ROC(Metric):
 
     Forward accepts
 
-    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass)
-      where C is the number of classes
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.
 
-    - ``target`` (long tensor): ``(N, ...)``
+    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels
 
     Args:
         num_classes: integer with number of classes. Not nessesary to provide
diff --git a/pytorch_lightning/metrics/functional/auroc.py b/pytorch_lightning/metrics/functional/auroc.py
index 29f5081295..816bbde7c6 100644
--- a/pytorch_lightning/metrics/functional/auroc.py
+++ b/pytorch_lightning/metrics/functional/auroc.py
@@ -147,7 +147,7 @@ def auroc(
     <https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Further_interpretations>`_
 
     Args:
-        preds: Predictions from model (probabilities)
+        preds: predictions from model (logits or probabilities)
         target: Ground truth labels
         num_classes: integer with number of classes. Not nessesary to provide
             for binary problems.
diff --git a/pytorch_lightning/metrics/functional/average_precision.py b/pytorch_lightning/metrics/functional/average_precision.py
index 49dc6fed9c..19d496dd57 100644
--- a/pytorch_lightning/metrics/functional/average_precision.py
+++ b/pytorch_lightning/metrics/functional/average_precision.py
@@ -61,6 +61,8 @@ def average_precision(
     Computes the average precision score.
 
     Args:
+        preds: predictions from model (logits or probabilities)
+        target: ground truth values
         num_classes: integer with number of classes. Not nessesary to provide
             for binary problems.
         pos_label: integer determining the positive class. Default is ``None``
diff --git a/pytorch_lightning/metrics/functional/confusion_matrix.py b/pytorch_lightning/metrics/functional/confusion_matrix.py
index a55619dd04..58947f2cb1 100644
--- a/pytorch_lightning/metrics/functional/confusion_matrix.py
+++ b/pytorch_lightning/metrics/functional/confusion_matrix.py
@@ -33,11 +33,11 @@ def _confusion_matrix_update(
 
 
 def _confusion_matrix_compute(confmat: torch.Tensor, normalize: Optional[str] = None) -> torch.Tensor:
-    allowed_normalize = ('true', 'pred', 'all', None)
+    allowed_normalize = ('true', 'pred', 'all', 'none', None)
     assert normalize in allowed_normalize, \
         f"Argument average needs to one of the following: {allowed_normalize}"
     confmat = confmat.float()
-    if normalize is not None:
+    if normalize is not None and normalize != 'none':
         if normalize == 'true':
             cm = confmat / confmat.sum(axis=1, keepdim=True)
         elif normalize == 'pred':
@@ -61,28 +61,28 @@ def confusion_matrix(
 ) -> torch.Tensor:
     """
     Computes the confusion matrix. Works with binary, multiclass, and multilabel data.
-    Accepts logits from a model output or integer class values in prediction.
+    Accepts probabilities from a model output or integer class values in prediction.
     Works with multi-dimensional preds and target.
 
-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.
 
     If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
 
     Args:
         preds: (float or long tensor), Either a ``(N, ...)`` tensor with labels or
-            ``(N, C, ...)`` where C is the number of classes, tensor with logits/probabilities
+            ``(N, C, ...)`` where C is the number of classes, tensor with labels/probabilities
         target: ``target`` (long tensor), tensor with shape ``(N, ...)`` with ground true labels
         num_classes: Number of classes in the dataset.
         normalize: Normalization mode for confusion matrix. Choose from
 
-            - ``None``: no normalization (default)
+            - ``None`` or ``'none'``: no normalization (default)
             - ``'true'``: normalization over the targets (most commonly used)
             - ``'pred'``: normalization over the predictions
             - ``'all'``: normalization over the whole matrix
 
         threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilities. default: 0.5
 
     Example:
 
diff --git a/pytorch_lightning/metrics/functional/f_beta.py b/pytorch_lightning/metrics/functional/f_beta.py
index 07633e8174..debb6c8285 100755
--- a/pytorch_lightning/metrics/functional/f_beta.py
+++ b/pytorch_lightning/metrics/functional/f_beta.py
@@ -64,28 +64,28 @@ def fbeta(
     Computes f_beta metric.
 
     Works with binary, multiclass, and multilabel data.
-    Accepts logits from a model output or integer class values in prediction.
+    Accepts probabilities from a model output or integer class values in prediction.
     Works with multi-dimensional preds and target.
 
-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.
 
     If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
 
     Args:
-        preds: estimated probabilities
-        target: ground-truth labels
+        preds: predictions from model (probabilities, or labels)
+        target: ground truth labels
         num_classes: Number of classes in the dataset.
         beta: Beta coefficient in the F measure.
         threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilities. default: 0.5
 
         average:
             - ``'micro'`` computes metric globally
             - ``'macro'`` computes metric for each class and uniformly averages them
             - ``'weighted'`` computes metric for each class and does a weighted-average,
               where each class is weighted by their support (accounts for class imbalance)
-            - ``'none'`` computes and returns the metric per class
+            - ``'none'`` or ``None`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
 
@@ -117,27 +117,27 @@ def f1(
     precision and recall scores.
 
     Works with binary, multiclass, and multilabel data.
-    Accepts logits from a model output or integer class values in prediction.
+    Accepts probabilities from a model output or integer class values in prediction.
     Works with multi-dimensional preds and target.
 
-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.
 
     If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.
 
     Args:
-        preds: estimated probabilities
-        target: ground-truth labels
+        preds: predictions from model (probabilities, or labels)
+        target: ground truth labels
         num_classes: Number of classes in the dataset.
         threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilities. default: 0.5
 
         average:
             - ``'micro'`` computes metric globally
             - ``'macro'`` computes metric for each class and uniformly averages them
             - ``'weighted'`` computes metric for each class and does a weighted-average,
               where each class is weighted by their support (accounts for class imbalance)
-            - ``'none'`` computes and returns the metric per class
+            - ``'none'`` or ``None`` computes and returns the metric per class
 
         multilabel: If predictions are from multilabel classification.
 
diff --git a/pytorch_lightning/metrics/functional/iou.py b/pytorch_lightning/metrics/functional/iou.py
index 1f539215cc..a716bcbdc4 100644
--- a/pytorch_lightning/metrics/functional/iou.py
+++ b/pytorch_lightning/metrics/functional/iou.py
@@ -63,15 +63,15 @@ def iou(
 
     Note that it is different from box IoU.
 
-    If pred and target are the same shape and pred is a float tensor,
-    we use the ``threshold`` argument. This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.
 
     If pred has an extra dimension as in the case of multi-class scores we
     perform an argmax on ``dim=1``.
 
     Args:
-        pred: Tensor containing integer predictions, with shape [N, d1, d2, ...]
-        target: Tensor containing integer targets, with shape [N, d1, d2, ...]
+        preds: tensor containing predictions from model (probabilities, or labels) with shape ``[N, d1, d2, ...]``
+        target: tensor containing ground truth labels with shape ``[N, d1, d2, ...]``
         ignore_index: optional int specifying a target class to ignore. If given,
             this class index does not contribute to the returned score, regardless
             of reduction method. Has no effect if given an int that is not in the
@@ -83,7 +83,7 @@ def iou(
             [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be
             assigned the `absent_score`.
         threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilities. default: 0.5
         num_classes:
             Optionally specify the number of classes
         reduction: a method to reduce metric score over labels.
diff --git a/pytorch_lightning/metrics/functional/precision_recall_curve.py b/pytorch_lightning/metrics/functional/precision_recall_curve.py
index 4eab13e6bb..57d8392dce 100644
--- a/pytorch_lightning/metrics/functional/precision_recall_curve.py
+++ b/pytorch_lightning/metrics/functional/precision_recall_curve.py
@@ -160,6 +160,8 @@ def precision_recall_curve(
     Computes precision-recall pairs for different thresholds.
 
     Args:
+        preds: predictions from model (probabilities)
+        target: ground truth labels
         num_classes: integer with number of classes. Not nessesary to provide
             for binary problems.
         pos_label: integer determining the positive class. Default is ``None``
diff --git a/pytorch_lightning/metrics/functional/roc.py b/pytorch_lightning/metrics/functional/roc.py
index 16ecf18b91..5c4cb7c9de 100644
--- a/pytorch_lightning/metrics/functional/roc.py
+++ b/pytorch_lightning/metrics/functional/roc.py
@@ -89,6 +89,8 @@ def roc(
     Computes the Receiver Operating Characteristic (ROC).
 
     Args:
+        preds: predictions from model (logits or probabilities)
+        target: ground truth values
         num_classes: integer with number of classes. Not nessesary to provide
             for binary problems.
         pos_label: integer determining the positive class. Default is ``None``