fix metric docs (#5880)

2021-02-11 15:32:12 +01:00 · 2021-02-11 15:32:12 +01:00 · 0c80b9f890
parent cf30b956a2
commit 0c80b9f890
14 changed files with 63 additions and 57 deletions
--- a/pytorch_lightning/metrics/classification/auroc.py
+++ b/pytorch_lightning/metrics/classification/auroc.py
@ -29,10 +29,10 @@ class AUROC(Metric):

    Forward accepts

-    - ``preds`` (float tensor): ``(N, )`` (binary) or ``(N, C, ...)`` (multilabel/multiclass)
-      where C is the number of classes
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.

-    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)``
+    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels

    For non-binary input, if the ``preds`` and ``target`` tensor have the same
    size the input will be interpretated as multilabel and if ``preds`` have one
--- a/pytorch_lightning/metrics/classification/average_precision.py
+++ b/pytorch_lightning/metrics/classification/average_precision.py
@ -28,10 +28,10 @@ class AveragePrecision(Metric):

    Forward accepts

-    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass)
-      where C is the number of classes
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.

-    - ``target`` (long tensor): ``(N, ...)``
+    - ``target`` (long tensor): ``(N, ...)`` with integer labels

    Args:
        num_classes: integer with number of classes. Not nessesary to provide
--- a/pytorch_lightning/metrics/classification/confusion_matrix.py
+++ b/pytorch_lightning/metrics/classification/confusion_matrix.py
@ -23,7 +23,7 @@ class ConfusionMatrix(Metric):
    """
    Computes the `confusion matrix
    <https://scikit-learn.org/stable/modules/model_evaluation.html#confusion-matrix>`_.  Works with binary,
-    multiclass, and multilabel data.  Accepts logits from a model output or
+    multiclass, and multilabel data.  Accepts probabilities from a model output or
    integer class values in prediction.  Works with multi-dimensional preds and
    target.

@ -35,8 +35,8 @@ class ConfusionMatrix(Metric):
    - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
    - ``target`` (long tensor): ``(N, ...)``

-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.

    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.

@ -44,13 +44,13 @@ class ConfusionMatrix(Metric):
        num_classes: Number of classes in the dataset.
        normalize: Normalization mode for confusion matrix. Choose from

-            - ``None``: no normalization (default)
+            - ``None`` or ``'none'``: no normalization (default)
            - ``'true'``: normalization over the targets (most commonly used)
            - ``'pred'``: normalization over the predictions
            - ``'all'``: normalization over the whole matrix

        threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilites. default: 0.5
        compute_on_step:
            Forward only calls ``update()`` and return None if this is set to False. default: True
        dist_sync_on_step:
@ -90,7 +90,7 @@ class ConfusionMatrix(Metric):
        self.normalize = normalize
        self.threshold = threshold

-        allowed_normalize = ('true', 'pred', 'all', None)
+        allowed_normalize = ('true', 'pred', 'all', 'none', None)
        assert self.normalize in allowed_normalize, \
            f"Argument average needs to one of the following: {allowed_normalize}"

--- a/pytorch_lightning/metrics/classification/f_beta.py
+++ b/pytorch_lightning/metrics/classification/f_beta.py
@ -29,7 +29,7 @@ class FBeta(Metric):
        {(\beta^2 * \text{precision}) + \text{recall}}

    Where :math:`\beta` is some positive real factor. Works with binary, multiclass, and multilabel data.
-    Accepts logits from a model output or integer class values in prediction.
+    Accepts probabilities from a model output or integer class values in prediction.
    Works with multi-dimensional preds and target.

    Forward accepts
@ -37,8 +37,8 @@ class FBeta(Metric):
    - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
    - ``target`` (long tensor): ``(N, ...)``

-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.

    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.

@ -46,14 +46,14 @@ class FBeta(Metric):
        num_classes: Number of classes in the dataset.
        beta: Beta coefficient in the F measure.
        threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilities. default: 0.5

        average:
            - ``'micro'`` computes metric globally
            - ``'macro'`` computes metric for each class and uniformly averages them
            - ``'weighted'`` computes metric for each class and does a weighted-average,
              where each class is weighted by their support (accounts for class imbalance)
-            - ``'none'`` computes and returns the metric per class
+            - ``'none'`` or ``None`` computes and returns the metric per class

        multilabel: If predictions are from multilabel classification.
        compute_on_step:
@ -98,7 +98,7 @@ class FBeta(Metric):
        self.average = average
        self.multilabel = multilabel

-        allowed_average = ("micro", "macro", "weighted", None)
+        allowed_average = ("micro", "macro", "weighted", "none", None)
        if self.average not in allowed_average:
            raise ValueError(
                'Argument `average` expected to be one of the following:'
@ -163,7 +163,7 @@ class F1(FBeta):
            - ``'macro'`` computes metric for each class and uniformly averages them
            - ``'weighted'`` computes metric for each class and does a weighted-average,
              where each class is weighted by their support (accounts for class imbalance)
-            - ``'none'`` computes and returns the metric per class
+            - ``'none'`` or ``None`` computes and returns the metric per class

        multilabel: If predictions are from multilabel classification.
        compute_on_step:
--- a/pytorch_lightning/metrics/classification/iou.py
+++ b/pytorch_lightning/metrics/classification/iou.py
@ -29,7 +29,7 @@ class IoU(ConfusionMatrix):
    They may be subject to conversion from input data (see description below). Note that it is different from box IoU.

    Works with binary, multiclass and multi-label data.
-    Accepts logits from a model output or integer class values in prediction.
+    Accepts probabilities from a model output or integer class values in prediction.
    Works with multi-dimensional preds and target.

    Forward accepts
@ -37,8 +37,8 @@ class IoU(ConfusionMatrix):
    - ``preds`` (float or long tensor): ``(N, ...)`` or ``(N, C, ...)`` where C is the number of classes
    - ``target`` (long tensor): ``(N, ...)``

-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.

    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.

@ -51,7 +51,7 @@ class IoU(ConfusionMatrix):
            `pred` AND no instances of the class index were present in `target`. For example, if we have 3 classes,
            [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be assigned the `absent_score`.
        threshold:
-            Threshold value for binary or multi-label logits.
+            Threshold value for binary or multi-label probabilities.
        reduction: a method to reduce metric score over labels.

            - ``'elementwise_mean'``: takes the mean (default)
--- a/pytorch_lightning/metrics/classification/precision_recall_curve.py
+++ b/pytorch_lightning/metrics/classification/precision_recall_curve.py
@ -31,10 +31,10 @@ class PrecisionRecallCurve(Metric):

    Forward accepts

-    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass)
-      where C is the number of classes
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.

-    - ``target`` (long tensor): ``(N, ...)``
+    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels

    Args:
        num_classes: integer with number of classes. Not nessesary to provide
--- a/pytorch_lightning/metrics/classification/roc.py
+++ b/pytorch_lightning/metrics/classification/roc.py
@ -28,10 +28,10 @@ class ROC(Metric):

    Forward accepts

-    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass)
-      where C is the number of classes
+    - ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass) tensor
+      with probabilities, where C is the number of classes.

-    - ``target`` (long tensor): ``(N, ...)``
+    - ``target`` (long tensor): ``(N, ...)`` or ``(N, C, ...)`` with integer labels

    Args:
        num_classes: integer with number of classes. Not nessesary to provide
--- a/pytorch_lightning/metrics/functional/auroc.py
+++ b/pytorch_lightning/metrics/functional/auroc.py
@ -147,7 +147,7 @@ def auroc(
    <https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Further_interpretations>`_

    Args:
-        preds: Predictions from model (probabilities)
+        preds: predictions from model (logits or probabilities)
        target: Ground truth labels
        num_classes: integer with number of classes. Not nessesary to provide
            for binary problems.
--- a/pytorch_lightning/metrics/functional/average_precision.py
+++ b/pytorch_lightning/metrics/functional/average_precision.py
@ -61,6 +61,8 @@ def average_precision(
    Computes the average precision score.

    Args:
+        preds: predictions from model (logits or probabilities)
+        target: ground truth values
        num_classes: integer with number of classes. Not nessesary to provide
            for binary problems.
        pos_label: integer determining the positive class. Default is ``None``
--- a/pytorch_lightning/metrics/functional/confusion_matrix.py
+++ b/pytorch_lightning/metrics/functional/confusion_matrix.py
@ -33,11 +33,11 @@ def _confusion_matrix_update(


 def _confusion_matrix_compute(confmat: torch.Tensor, normalize: Optional[str] = None) -> torch.Tensor:
-    allowed_normalize = ('true', 'pred', 'all', None)
+    allowed_normalize = ('true', 'pred', 'all', 'none', None)
    assert normalize in allowed_normalize, \
        f"Argument average needs to one of the following: {allowed_normalize}"
    confmat = confmat.float()
-    if normalize is not None:
+    if normalize is not None and normalize != 'none':
        if normalize == 'true':
            cm = confmat / confmat.sum(axis=1, keepdim=True)
        elif normalize == 'pred':
@ -61,28 +61,28 @@ def confusion_matrix(
 ) -> torch.Tensor:
    """
    Computes the confusion matrix. Works with binary, multiclass, and multilabel data.
-    Accepts logits from a model output or integer class values in prediction.
+    Accepts probabilities from a model output or integer class values in prediction.
    Works with multi-dimensional preds and target.

-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.

    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.

    Args:
        preds: (float or long tensor), Either a ``(N, ...)`` tensor with labels or
-            ``(N, C, ...)`` where C is the number of classes, tensor with logits/probabilities
+            ``(N, C, ...)`` where C is the number of classes, tensor with labels/probabilities
        target: ``target`` (long tensor), tensor with shape ``(N, ...)`` with ground true labels
        num_classes: Number of classes in the dataset.
        normalize: Normalization mode for confusion matrix. Choose from

-            - ``None``: no normalization (default)
+            - ``None`` or ``'none'``: no normalization (default)
            - ``'true'``: normalization over the targets (most commonly used)
            - ``'pred'``: normalization over the predictions
            - ``'all'``: normalization over the whole matrix

        threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilities. default: 0.5

    Example:

--- a/pytorch_lightning/metrics/functional/f_beta.py
+++ b/pytorch_lightning/metrics/functional/f_beta.py
@ -64,28 +64,28 @@ def fbeta(
    Computes f_beta metric.

    Works with binary, multiclass, and multilabel data.
-    Accepts logits from a model output or integer class values in prediction.
+    Accepts probabilities from a model output or integer class values in prediction.
    Works with multi-dimensional preds and target.

-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.

    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.

    Args:
-        preds: estimated probabilities
-        target: ground-truth labels
+        preds: predictions from model (probabilities, or labels)
+        target: ground truth labels
        num_classes: Number of classes in the dataset.
        beta: Beta coefficient in the F measure.
        threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilities. default: 0.5

        average:
            - ``'micro'`` computes metric globally
            - ``'macro'`` computes metric for each class and uniformly averages them
            - ``'weighted'`` computes metric for each class and does a weighted-average,
              where each class is weighted by their support (accounts for class imbalance)
-            - ``'none'`` computes and returns the metric per class
+            - ``'none'`` or ``None`` computes and returns the metric per class

        multilabel: If predictions are from multilabel classification.

@ -117,27 +117,27 @@ def f1(
    precision and recall scores.

    Works with binary, multiclass, and multilabel data.
-    Accepts logits from a model output or integer class values in prediction.
+    Accepts probabilities from a model output or integer class values in prediction.
    Works with multi-dimensional preds and target.

-    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument.
-    This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.

    If preds has an extra dimension as in the case of multi-class scores we perform an argmax on ``dim=1``.

    Args:
-        preds: estimated probabilities
-        target: ground-truth labels
+        preds: predictions from model (probabilities, or labels)
+        target: ground truth labels
        num_classes: Number of classes in the dataset.
        threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilities. default: 0.5

        average:
            - ``'micro'`` computes metric globally
            - ``'macro'`` computes metric for each class and uniformly averages them
            - ``'weighted'`` computes metric for each class and does a weighted-average,
              where each class is weighted by their support (accounts for class imbalance)
-            - ``'none'`` computes and returns the metric per class
+            - ``'none'`` or ``None`` computes and returns the metric per class

        multilabel: If predictions are from multilabel classification.

--- a/pytorch_lightning/metrics/functional/iou.py
+++ b/pytorch_lightning/metrics/functional/iou.py
@ -63,15 +63,15 @@ def iou(

    Note that it is different from box IoU.

-    If pred and target are the same shape and pred is a float tensor,
-    we use the ``threshold`` argument. This is the case for binary and multi-label logits.
+    If preds and target are the same shape and preds is a float tensor, we use the ``self.threshold`` argument
+    to convert into integer labels. This is the case for binary and multi-label probabilities.

    If pred has an extra dimension as in the case of multi-class scores we
    perform an argmax on ``dim=1``.

    Args:
-        pred: Tensor containing integer predictions, with shape [N, d1, d2, ...]
-        target: Tensor containing integer targets, with shape [N, d1, d2, ...]
+        preds: tensor containing predictions from model (probabilities, or labels) with shape ``[N, d1, d2, ...]``
+        target: tensor containing ground truth labels with shape ``[N, d1, d2, ...]``
        ignore_index: optional int specifying a target class to ignore. If given,
            this class index does not contribute to the returned score, regardless
            of reduction method. Has no effect if given an int that is not in the
@ -83,7 +83,7 @@ def iou(
            [0, 0] for `pred`, and [0, 2] for `target`, then class 1 would be
            assigned the `absent_score`.
        threshold:
-            Threshold value for binary or multi-label logits. default: 0.5
+            Threshold value for binary or multi-label probabilities. default: 0.5
        num_classes:
            Optionally specify the number of classes
        reduction: a method to reduce metric score over labels.
--- a/pytorch_lightning/metrics/functional/precision_recall_curve.py
+++ b/pytorch_lightning/metrics/functional/precision_recall_curve.py
@ -160,6 +160,8 @@ def precision_recall_curve(
    Computes precision-recall pairs for different thresholds.

    Args:
+        preds: predictions from model (probabilities)
+        target: ground truth labels
        num_classes: integer with number of classes. Not nessesary to provide
            for binary problems.
        pos_label: integer determining the positive class. Default is ``None``
--- a/pytorch_lightning/metrics/functional/roc.py
+++ b/pytorch_lightning/metrics/functional/roc.py
@ -89,6 +89,8 @@ def roc(
    Computes the Receiver Operating Characteristic (ROC).

    Args:
+        preds: predictions from model (logits or probabilities)
+        target: ground truth values
        num_classes: integer with number of classes. Not nessesary to provide
            for binary problems.
        pos_label: integer determining the positive class. Default is ``None``