180 lines
7.9 KiB
Python
180 lines
7.9 KiB
Python
import numbers
|
|
from functools import partial
|
|
|
|
import numpy as np
|
|
import pytest
|
|
import torch
|
|
from sklearn.metrics import (
|
|
accuracy_score as sk_accuracy,
|
|
precision_score as sk_precision,
|
|
recall_score as sk_recall,
|
|
f1_score as sk_f1_score,
|
|
fbeta_score as sk_fbeta_score,
|
|
confusion_matrix as sk_confusion_matrix,
|
|
average_precision_score as sk_average_precision,
|
|
auc as sk_auc,
|
|
precision_recall_curve as sk_precision_recall_curve,
|
|
roc_curve as sk_roc_curve,
|
|
roc_auc_score as sk_roc_auc_score,
|
|
balanced_accuracy_score as sk_balanced_accuracy_score,
|
|
dcg_score as sk_dcg_score,
|
|
mean_absolute_error as sk_mean_absolute_error,
|
|
mean_squared_error as sk_mean_squared_error,
|
|
mean_squared_log_error as sk_mean_squared_log_error,
|
|
median_absolute_error as sk_median_absolute_error,
|
|
r2_score as sk_r2_score,
|
|
mean_poisson_deviance as sk_mean_poisson_deviance,
|
|
mean_gamma_deviance as sk_mean_gamma_deviance,
|
|
mean_tweedie_deviance as sk_mean_tweedie_deviance,
|
|
explained_variance_score as sk_explained_variance_score,
|
|
cohen_kappa_score as sk_cohen_kappa_score,
|
|
hamming_loss as sk_hamming_loss,
|
|
hinge_loss as sk_hinge_loss,
|
|
jaccard_score as sk_jaccard_score
|
|
)
|
|
|
|
from pytorch_lightning.metrics.converters import _convert_to_numpy
|
|
from pytorch_lightning.metrics.sklearns import (
|
|
Accuracy,
|
|
AUC,
|
|
AveragePrecision,
|
|
BalancedAccuracy,
|
|
ConfusionMatrix,
|
|
CohenKappaScore,
|
|
DCG,
|
|
F1,
|
|
FBeta,
|
|
Hamming,
|
|
Hinge,
|
|
Jaccard,
|
|
Precision,
|
|
Recall,
|
|
PrecisionRecallCurve,
|
|
ROC,
|
|
AUROC,
|
|
MeanAbsoluteError,
|
|
MeanSquaredError,
|
|
MeanSquaredLogError,
|
|
MedianAbsoluteError,
|
|
R2Score,
|
|
MeanPoissonDeviance,
|
|
MeanGammaDeviance,
|
|
MeanTweedieDeviance,
|
|
ExplainedVariance,
|
|
)
|
|
from pytorch_lightning.utilities.apply_func import apply_to_collection
|
|
|
|
|
|
def _xy_only(func):
|
|
def new_func(*args, **kwargs):
|
|
return np.array(func(*args, **kwargs)[:2])
|
|
return new_func
|
|
|
|
|
|
@pytest.mark.parametrize(['metric_class', 'sklearn_func', 'inputs'], [
|
|
pytest.param(Accuracy(), sk_accuracy,
|
|
{'y_pred': torch.randint(10, size=(128,)),
|
|
'y_true': torch.randint(10, size=(128,))},
|
|
id='Accuracy'),
|
|
pytest.param(AUC(), sk_auc,
|
|
{'x': torch.arange(10, dtype=torch.float) / 10,
|
|
'y': torch.tensor([0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.3, 0.5, 0.6, 0.7])},
|
|
id='AUC'),
|
|
pytest.param(AveragePrecision(), sk_average_precision,
|
|
{'y_score': torch.randint(2, size=(128,)),
|
|
'y_true': torch.randint(2, size=(128,))},
|
|
id='AveragePrecision'),
|
|
pytest.param(ConfusionMatrix(), sk_confusion_matrix,
|
|
{'y_pred': torch.randint(10, size=(128,)),
|
|
'y_true': torch.randint(10, size=(128,))},
|
|
id='ConfusionMatrix'),
|
|
pytest.param(F1(average='macro'), partial(sk_f1_score, average='macro'),
|
|
{'y_pred': torch.randint(10, size=(128,)),
|
|
'y_true': torch.randint(10, size=(128,))},
|
|
id='F1'),
|
|
pytest.param(FBeta(beta=0.5, average='macro'), partial(sk_fbeta_score, beta=0.5, average='macro'),
|
|
{'y_pred': torch.randint(10, size=(128,)),
|
|
'y_true': torch.randint(10, size=(128,))},
|
|
id='FBeta'),
|
|
pytest.param(Precision(average='macro'), partial(sk_precision, average='macro'),
|
|
{'y_pred': torch.randint(10, size=(128,)),
|
|
'y_true': torch.randint(10, size=(128,))},
|
|
id='Precision'),
|
|
pytest.param(Recall(average='macro'), partial(sk_recall, average='macro'),
|
|
{'y_pred': torch.randint(10, size=(128,)),
|
|
'y_true': torch.randint(10, size=(128,))},
|
|
id='Recall'),
|
|
pytest.param(PrecisionRecallCurve(), _xy_only(sk_precision_recall_curve),
|
|
{'probas_pred': torch.rand(size=(128,)),
|
|
'y_true': torch.randint(2, size=(128,))},
|
|
id='PrecisionRecallCurve'),
|
|
pytest.param(ROC(), _xy_only(sk_roc_curve),
|
|
{'y_score': torch.rand(size=(128,)),
|
|
'y_true': torch.randint(2, size=(128,))},
|
|
id='ROC'),
|
|
pytest.param(AUROC(), sk_roc_auc_score,
|
|
{'y_score': torch.rand(size=(128,)),
|
|
'y_true': torch.randint(2, size=(128,))},
|
|
id='AUROC'),
|
|
pytest.param(BalancedAccuracy(), sk_balanced_accuracy_score,
|
|
{'y_pred': torch.randint(10, size=(128,)), 'y_true': torch.randint(10, size=(128,))},
|
|
id='BalancedAccuracy'),
|
|
pytest.param(DCG(), sk_dcg_score,
|
|
{'y_score': torch.rand(size=(128, 3)), 'y_true': torch.randint(3, size=(128, 3))},
|
|
id='DCG'),
|
|
pytest.param(ExplainedVariance(), sk_explained_variance_score,
|
|
{'y_pred': torch.rand(size=(128,)), 'y_true': torch.rand(size=(128,))},
|
|
id='ExplainedVariance'),
|
|
pytest.param(MeanAbsoluteError(), sk_mean_absolute_error,
|
|
{'y_pred': torch.rand(size=(128,)), 'y_true': torch.rand(size=(128,))},
|
|
id='MeanAbsolutError'),
|
|
pytest.param(MeanSquaredError(), sk_mean_squared_error,
|
|
{'y_pred': torch.rand(size=(128,)), 'y_true': torch.rand(size=(128,))},
|
|
id='MeanSquaredError'),
|
|
pytest.param(MeanSquaredLogError(), sk_mean_squared_log_error,
|
|
{'y_pred': torch.rand(size=(128,)), 'y_true': torch.rand(size=(128,))},
|
|
id='MeanSquaredLogError'),
|
|
pytest.param(MedianAbsoluteError(), sk_median_absolute_error,
|
|
{'y_pred': torch.rand(size=(128,)), 'y_true': torch.rand(size=(128,))},
|
|
id='MedianAbsoluteError'),
|
|
pytest.param(R2Score(), sk_r2_score,
|
|
{'y_pred': torch.rand(size=(128,)), 'y_true': torch.rand(size=(128,))},
|
|
id='R2Score'),
|
|
pytest.param(MeanPoissonDeviance(), sk_mean_poisson_deviance,
|
|
{'y_pred': torch.rand(size=(128,)), 'y_true': torch.rand(size=(128,))},
|
|
id='MeanPoissonDeviance'),
|
|
pytest.param(MeanGammaDeviance(), sk_mean_gamma_deviance,
|
|
{'y_pred': torch.rand(size=(128,)), 'y_true': torch.rand(size=(128,))},
|
|
id='MeanGammaDeviance'),
|
|
pytest.param(MeanTweedieDeviance(), sk_mean_tweedie_deviance,
|
|
{'y_pred': torch.rand(size=(128,)), 'y_true': torch.rand(size=(128,))},
|
|
id='MeanTweedieDeviance'),
|
|
pytest.param(CohenKappaScore(), sk_cohen_kappa_score,
|
|
{'y1': torch.randint(3, size=(128,)), 'y2': torch.randint(3, size=(128,))},
|
|
id='CohenKappaScore'),
|
|
pytest.param(Hamming(), sk_hamming_loss,
|
|
{'y_pred': torch.randint(10, size=(128,)), 'y_true': torch.randint(10, size=(128,))},
|
|
id='Hamming'),
|
|
pytest.param(Hinge(), sk_hinge_loss,
|
|
{'pred_decision': torch.randn(size=(128,)), 'y_true': torch.randint(2, size=(128,))},
|
|
id='Hinge'),
|
|
pytest.param(Jaccard(average='macro'), partial(sk_jaccard_score, average='macro'),
|
|
{'y_pred': torch.randint(10, size=(128,)), 'y_true': torch.randint(10, size=(128,))},
|
|
id='Jaccard')
|
|
])
|
|
def test_sklearn_metric(metric_class, sklearn_func, inputs):
|
|
numpy_inputs = apply_to_collection(inputs, (torch.Tensor, np.ndarray, numbers.Number), _convert_to_numpy)
|
|
|
|
sklearn_result = sklearn_func(**numpy_inputs)
|
|
lightning_result = metric_class(**inputs)
|
|
assert np.allclose(sklearn_result, lightning_result, atol=1e-5)
|
|
|
|
sklearn_result = apply_to_collection(
|
|
sklearn_result, (torch.Tensor, np.ndarray, numbers.Number), _convert_to_numpy)
|
|
|
|
lightning_result = apply_to_collection(
|
|
lightning_result, (torch.Tensor, np.ndarray, numbers.Number), _convert_to_numpy)
|
|
|
|
assert np.allclose(sklearn_result, lightning_result, atol=1e-5)
|
|
assert isinstance(lightning_result, type(sklearn_result))
|