Fixing best_matching to chose cluster maximizing F1

Fixes #19
This commit is contained in:
Yomguithereal 2021-05-17 20:24:08 +02:00
parent 891f57d1d4
commit 1e10b3b462
5 changed files with 50 additions and 71 deletions

View File

@ -15,7 +15,7 @@ pip install fog
## Usage
* [Evaluation](#evaluation)
* [best_matching](#best_matching)
* [best_matching_macro_average](#best_matching_macro_average)
* [Graph](#graph)
* [floatsam_sparsification](#floatsam_sparsification)
* [monopartite_projection](#monopartite_projection)
@ -35,10 +35,10 @@ pip install fog
### Evaluation
#### best_matching
#### best_matching_macro_average
Efficient implementation of the "best matching F1" evaluation metric for
clusters.
Efficient implementation of the "macro average best matching F1" evaluation
metric for clusters.
Note that this metric is not symmetric and will match truth -> predicted.
@ -49,8 +49,6 @@ Note that this metric is not symmetric and will match truth -> predicted.
that don't exist in truth clusters to be found in predicted ones. Those
additional items will then be ignored when computing the metrics instead
of raising an error when found.
* **micro** *?bool* [`False`]: Whether to compute the micro average instead of the macro
average of the evaluation metric.
### Graph

View File

@ -18,7 +18,7 @@ DOCS = [
{
'title': 'Evaluation',
'fns': [
evaluation.best_matching
evaluation.best_matching_macro_average
]
},
{

View File

@ -1,2 +1,2 @@
from fog.evaluation.best_matching import best_matching
from fog.evaluation.best_matching import best_matching_macro_average
from fog.evaluation.utils import labels_to_clusters, clusters_to_labels

View File

@ -18,15 +18,14 @@ from typing import Hashable, Iterable, Tuple
from fog.utils import OnlineMean
def best_matching(
def best_matching_macro_average(
truth: Iterable[Iterable[Hashable]],
predicted: Iterable[Iterable[Hashable]],
allow_additional_items: bool = False,
micro: bool = False
allow_additional_items: bool = False
) -> Tuple[float, float, float]:
"""
Efficient implementation of the "best matching F1" evaluation metric for
clusters.
Efficient implementation of the "macro average best matching F1" evaluation
metric for clusters.
Note that this metric is not symmetric and will match truth -> predicted.
@ -37,8 +36,6 @@ def best_matching(
that don't exist in truth clusters to be found in predicted ones. Those
additional items will then be ignored when computing the metrics instead
of raising an error when found. Defaults to False.
micro (bool, optional): Whether to compute the micro average instead of the macro
average of the evaluation metric. Defaults to False.
Returns:
tuple of floats: precision, recall and f1 score.
@ -89,10 +86,6 @@ def best_matching(
R = OnlineMean()
F = OnlineMean()
micro_true_positives = 0
micro_false_positives = 0
micro_false_negatives = 0
# Matching truth
for cluster in truth:
if not cluster:
@ -111,38 +104,32 @@ def best_matching(
candidates[candidate_cluster_index] += 1
cluster_size += 1
matching_cluster_index, true_positives = candidates.most_common(1)[0]
matching_cluster_size = predicted_cluster_sizes[matching_cluster_index]
best_f1 = -1.0
best = None
false_positives = matching_cluster_size - true_positives
false_negatives = cluster_size - true_positives
# Finding a matching cluster that maximizes F1 score
for matching_cluster_index, true_positives in candidates.items():
matching_cluster_size = predicted_cluster_sizes[matching_cluster_index]
false_positives = matching_cluster_size - true_positives
false_negatives = cluster_size - true_positives
if not micro:
precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives)
f1 = 2 * precision * recall / (precision + recall)
P.add(precision)
R.add(recall)
F.add(f1)
if f1 > best_f1:
best_f1 = f1
best = (precision, recall, f1)
else:
micro_true_positives += true_positives
micro_false_positives += false_positives
micro_false_negatives += false_negatives
assert best is not None
if not micro:
return (
float(P),
float(R),
float(F)
)
micro_precision = micro_true_positives / (micro_true_positives + micro_false_positives)
micro_recall = micro_true_positives / (micro_true_positives + micro_false_negatives)
P.add(best[0])
R.add(best[1])
F.add(best[2])
return (
micro_precision,
micro_recall,
2 * micro_precision * micro_recall / (micro_precision + micro_recall)
float(P),
float(R),
float(F)
)

View File

@ -2,8 +2,9 @@
# Fog Best Matching Cluster Evaluation Unit Tests
# =============================================================================
from pytest import approx, raises
from random import shuffle
from fog.evaluation import best_matching
from fog.evaluation import best_matching_macro_average
TRUTH = [
@ -30,57 +31,50 @@ CLUSTERS_WITH_ADDITIONAL_ITEMS = [
class TestBestMatching(object):
def test_exceptions(self):
with raises(TypeError, match='cannot be found'):
best_matching([['A1']], [['A2']])
best_matching_macro_average([['A1']], [['A2']])
with raises(TypeError, match='fuzzy'):
best_matching([['A1', 'B1']], [['A1'], ['B1'], ['A1']])
best_matching_macro_average([['A1', 'B1']], [['A1'], ['B1'], ['A1']])
with raises(TypeError, match='empty'):
best_matching([['A1'], []], [['A1']])
best_matching_macro_average([['A1'], []], [['A1']])
with raises(TypeError, match='empty'):
best_matching([['A1']], [['A1'], []])
best_matching_macro_average([['A1']], [['A1'], []])
with raises(TypeError, match='truth is empty'):
best_matching([], [['A1']])
best_matching_macro_average([], [['A1']])
with raises(TypeError, match='predicted is empty'):
best_matching([['A1']], [])
best_matching_macro_average([['A1']], [])
with raises(TypeError, match='cannot be found'):
best_matching([['A1']], [['A1', 'B1']])
best_matching_macro_average([['A1']], [['A1', 'B1']])
def test_basics(self):
result = best_matching(TRUTH, CLUSTERS)
result = best_matching_macro_average(TRUTH, CLUSTERS)
assert result == approx((
0.625,
0.687,
0.875,
0.714
0.756
), rel=1e-2)
assert best_matching(TRUTH, CLUSTERS) == best_matching(TRUTH, CLUSTERS_WITH_ADDITIONAL_ITEMS, allow_additional_items=True)
assert best_matching_macro_average(TRUTH, CLUSTERS) == best_matching_macro_average(TRUTH, CLUSTERS_WITH_ADDITIONAL_ITEMS, allow_additional_items=True)
def test_micro(self):
result = best_matching(TRUTH, CLUSTERS, micro=True)
def test_deterministic(self):
shuffled_clusters = CLUSTERS.copy()
shuffled_truth = TRUTH.copy()
assert result == approx((
0.642,
0.9,
0.75
), rel=1e-2)
for _ in range(10):
shuffle(shuffled_clusters)
shuffle(shuffled_truth)
assert best_matching(TRUTH, CLUSTERS, micro=True) == best_matching(TRUTH, CLUSTERS_WITH_ADDITIONAL_ITEMS, micro=True, allow_additional_items=True)
assert best_matching_macro_average(shuffled_truth, shuffled_clusters) == best_matching_macro_average(TRUTH, CLUSTERS)
def test_identity(self):
result = best_matching(TRUTH, TRUTH)
result = best_matching_macro_average(TRUTH, TRUTH)
assert result == approx((1.0, 1.0, 1.0))
result = best_matching(CLUSTERS, CLUSTERS)
assert result == approx((1.0, 1.0, 1.0))
result = best_matching(TRUTH, TRUTH, micro=True)
assert result == approx((1.0, 1.0, 1.0))
result = best_matching(CLUSTERS, CLUSTERS, micro=True)
result = best_matching_macro_average(CLUSTERS, CLUSTERS)
assert result == approx((1.0, 1.0, 1.0))