lightning/tests/metrics/regression/test_r2score.py

from collections import namedtuple
from functools import partial

import pytest
import torch
from sklearn.metrics import r2_score as sk_r2score

from pytorch_lightning.metrics.functional import r2score
from pytorch_lightning.metrics.regression import R2Score
from tests.metrics.utils import BATCH_SIZE, MetricTester, NUM_BATCHES

torch.manual_seed(42)

num_targets = 5

Input = namedtuple('Input', ["preds", "target"])

_single_target_inputs = Input(preds=torch.rand(NUM_BATCHES, BATCH_SIZE), target=torch.rand(NUM_BATCHES, BATCH_SIZE),)

_multi_target_inputs = Input(
    preds=torch.rand(NUM_BATCHES, BATCH_SIZE, num_targets), target=torch.rand(NUM_BATCHES, BATCH_SIZE, num_targets),
)


def _single_target_sk_metric(preds, target, adjusted, multioutput):
    sk_preds = preds.view(-1).numpy()
    sk_target = target.view(-1).numpy()
    r2_score = sk_r2score(sk_target, sk_preds, multioutput=multioutput)
    if adjusted != 0:
        r2_score = 1 - (1 - r2_score) * (sk_preds.shape[0] - 1) / (sk_preds.shape[0] - adjusted - 1)
    return r2_score


def _multi_target_sk_metric(preds, target, adjusted, multioutput):
    sk_preds = preds.view(-1, num_targets).numpy()
    sk_target = target.view(-1, num_targets).numpy()
    r2_score = sk_r2score(sk_target, sk_preds, multioutput=multioutput)
    if adjusted != 0:
        r2_score = 1 - (1 - r2_score) * (sk_preds.shape[0] - 1) / (sk_preds.shape[0] - adjusted - 1)
    return r2_score


@pytest.mark.parametrize("adjusted", [0, 5, 10])
@pytest.mark.parametrize("multioutput", ['raw_values', 'uniform_average', 'variance_weighted'])
@pytest.mark.parametrize(
    "preds, target, sk_metric, num_outputs",
    [
        (_single_target_inputs.preds, _single_target_inputs.target, _single_target_sk_metric, 1),
        (_multi_target_inputs.preds, _multi_target_inputs.target, _multi_target_sk_metric, num_targets),
    ],
)
class TestR2Score(MetricTester):
    @pytest.mark.parametrize("ddp", [True, False])
    @pytest.mark.parametrize("dist_sync_on_step", [True, False])
    def test_r2(self, adjusted, multioutput, preds, target, sk_metric, num_outputs, ddp, dist_sync_on_step):
        self.run_class_metric_test(
            ddp,
            preds,
            target,
            R2Score,
            partial(sk_metric, adjusted=adjusted, multioutput=multioutput),
            dist_sync_on_step,
            metric_args=dict(adjusted=adjusted,
                             multioutput=multioutput,
                             num_outputs=num_outputs),
        )

    def test_r2_functional(self, adjusted, multioutput, preds, target, sk_metric, num_outputs):
        self.run_functional_metric_test(
            preds,
            target,
            r2score,
            partial(sk_metric, adjusted=adjusted, multioutput=multioutput),
            metric_args=dict(adjusted=adjusted,
                             multioutput=multioutput),
        )


def test_error_on_different_shape(metric_class=R2Score):
    metric = metric_class()
    with pytest.raises(RuntimeError, match='Predictions and targets are expected to have the same shape'):
        metric(torch.randn(100,), torch.randn(50,))


def test_error_on_multidim_tensors(metric_class=R2Score):
    metric = metric_class()
    with pytest.raises(ValueError, match=r'Expected both prediction and target to be 1D or 2D tensors,'
                                         r' but recevied tensors with dimension .'):
        metric(torch.randn(10, 20, 5), torch.randn(10, 20, 5))


def test_error_on_too_few_samples(metric_class=R2Score):
    metric = metric_class()
    with pytest.raises(ValueError, match='Needs atleast two samples to calculate r2 score.'):
        metric(torch.randn(1,), torch.randn(1,))


def test_warning_on_too_large_adjusted(metric_class=R2Score):
    metric = metric_class(adjusted=10)

    with pytest.warns(UserWarning,
                      match="More independent regressions than datapoints in"
                            " adjusted r2 score. Falls back to standard r2 score."):
        metric(torch.randn(10,), torch.randn(10,))

    with pytest.warns(UserWarning,
                      match="Division by zero in adjusted r2 score. Falls back to"
                            " standard r2 score."):
        metric(torch.randn(11,), torch.randn(11,))