lightning/pytorch_lightning/metrics/classification/precision_recall_curve.py

151 lines
5.9 KiB
Python

# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional, Any, Union, Tuple, List
import torch
from pytorch_lightning.metrics import Metric
from pytorch_lightning.metrics.functional.precision_recall_curve import (
_precision_recall_curve_update,
_precision_recall_curve_compute
)
from pytorch_lightning.utilities import rank_zero_warn
class PrecisionRecallCurve(Metric):
"""
Computes precision-recall pairs for different thresholds. Works for both
binary and multiclass problems. In the case of multiclass, the values will
be calculated based on a one-vs-the-rest approach.
Forward accepts
- ``preds`` (float tensor): ``(N, ...)`` (binary) or ``(N, C, ...)`` (multiclass)
where C is the number of classes
- ``target`` (long tensor): ``(N, ...)``
Args:
num_classes: integer with number of classes. Not nessesary to provide
for binary problems.
pos_label: integer determining the positive class. Default is ``None``
which for binary problem is translate to 1. For multiclass problems
this argument should not be set as we iteratively change it in the
range [0,num_classes-1]
compute_on_step:
Forward only calls ``update()`` and return None if this is set to False. default: True
dist_sync_on_step:
Synchronize metric state across processes at each ``forward()``
before returning the value at the step. default: False
process_group:
Specify the process group on which synchronization is called. default: None (which selects the entire world)
Example (binary case):
>>> pred = torch.tensor([0, 1, 2, 3])
>>> target = torch.tensor([0, 1, 1, 0])
>>> pr_curve = PrecisionRecallCurve(pos_label=1)
>>> precision, recall, thresholds = pr_curve(pred, target)
>>> precision
tensor([0.6667, 0.5000, 0.0000, 1.0000])
>>> recall
tensor([1.0000, 0.5000, 0.0000, 0.0000])
>>> thresholds
tensor([1, 2, 3])
Example (multiclass case):
>>> pred = torch.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
... [0.05, 0.75, 0.05, 0.05, 0.05],
... [0.05, 0.05, 0.75, 0.05, 0.05],
... [0.05, 0.05, 0.05, 0.75, 0.05]])
>>> target = torch.tensor([0, 1, 3, 2])
>>> pr_curve = PrecisionRecallCurve(num_classes=5)
>>> precision, recall, thresholds = pr_curve(pred, target)
>>> precision # doctest: +NORMALIZE_WHITESPACE
[tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]),
tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])]
>>> recall
[tensor([1., 0.]), tensor([1., 0.]), tensor([1., 0., 0.]), tensor([1., 0., 0.]), tensor([nan, 0.])]
>>> thresholds
[tensor([0.7500]), tensor([0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500, 0.7500]), tensor([0.0500])]
"""
def __init__(
self,
num_classes: Optional[int] = None,
pos_label: Optional[int] = None,
compute_on_step: bool = True,
dist_sync_on_step: bool = False,
process_group: Optional[Any] = None,
):
super().__init__(
compute_on_step=compute_on_step,
dist_sync_on_step=dist_sync_on_step,
process_group=process_group,
)
self.num_classes = num_classes
self.pos_label = pos_label
self.add_state("preds", default=[], dist_reduce_fx=None)
self.add_state("target", default=[], dist_reduce_fx=None)
rank_zero_warn(
'Metric `PrecisionRecallCurve` will save all targets and predictions in buffer.'
' For large datasets this may lead to large memory footprint.'
)
def update(self, preds: torch.Tensor, target: torch.Tensor):
"""
Update state with predictions and targets.
Args:
preds: Predictions from model
target: Ground truth values
"""
preds, target, num_classes, pos_label = _precision_recall_curve_update(
preds,
target,
self.num_classes,
self.pos_label
)
self.preds.append(preds)
self.target.append(target)
self.num_classes = num_classes
self.pos_label = pos_label
def compute(self) -> Union[Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
Tuple[List[torch.Tensor], List[torch.Tensor], List[torch.Tensor]]]:
"""
Compute the precision-recall curve
Returns: 3-element tuple containing
precision:
tensor where element i is the precision of predictions with
score >= thresholds[i] and the last element is 1.
If multiclass, this is a list of such tensors, one for each class.
recall:
tensor where element i is the recall of predictions with
score >= thresholds[i] and the last element is 0.
If multiclass, this is a list of such tensors, one for each class.
thresholds:
Thresholds used for computing precision/recall scores
"""
preds = torch.cat(self.preds, dim=0)
target = torch.cat(self.target, dim=0)
return _precision_recall_curve_compute(preds, target, self.num_classes, self.pos_label)