Fixing best_matching to chose cluster maximizing F1

Fixes #19
This commit is contained in:
Yomguithereal 2021-05-17 20:24:08 +02:00
parent 891f57d1d4
commit 1e10b3b462
5 changed files with 50 additions and 71 deletions

View File

@ -15,7 +15,7 @@ pip install fog
## Usage ## Usage
* [Evaluation](#evaluation) * [Evaluation](#evaluation)
* [best_matching](#best_matching) * [best_matching_macro_average](#best_matching_macro_average)
* [Graph](#graph) * [Graph](#graph)
* [floatsam_sparsification](#floatsam_sparsification) * [floatsam_sparsification](#floatsam_sparsification)
* [monopartite_projection](#monopartite_projection) * [monopartite_projection](#monopartite_projection)
@ -35,10 +35,10 @@ pip install fog
### Evaluation ### Evaluation
#### best_matching #### best_matching_macro_average
Efficient implementation of the "best matching F1" evaluation metric for Efficient implementation of the "macro average best matching F1" evaluation
clusters. metric for clusters.
Note that this metric is not symmetric and will match truth -> predicted. Note that this metric is not symmetric and will match truth -> predicted.
@ -49,8 +49,6 @@ Note that this metric is not symmetric and will match truth -> predicted.
that don't exist in truth clusters to be found in predicted ones. Those that don't exist in truth clusters to be found in predicted ones. Those
additional items will then be ignored when computing the metrics instead additional items will then be ignored when computing the metrics instead
of raising an error when found. of raising an error when found.
* **micro** *?bool* [`False`]: Whether to compute the micro average instead of the macro
average of the evaluation metric.
### Graph ### Graph

View File

@ -18,7 +18,7 @@ DOCS = [
{ {
'title': 'Evaluation', 'title': 'Evaluation',
'fns': [ 'fns': [
evaluation.best_matching evaluation.best_matching_macro_average
] ]
}, },
{ {

View File

@ -1,2 +1,2 @@
from fog.evaluation.best_matching import best_matching from fog.evaluation.best_matching import best_matching_macro_average
from fog.evaluation.utils import labels_to_clusters, clusters_to_labels from fog.evaluation.utils import labels_to_clusters, clusters_to_labels

View File

@ -18,15 +18,14 @@ from typing import Hashable, Iterable, Tuple
from fog.utils import OnlineMean from fog.utils import OnlineMean
def best_matching( def best_matching_macro_average(
truth: Iterable[Iterable[Hashable]], truth: Iterable[Iterable[Hashable]],
predicted: Iterable[Iterable[Hashable]], predicted: Iterable[Iterable[Hashable]],
allow_additional_items: bool = False, allow_additional_items: bool = False
micro: bool = False
) -> Tuple[float, float, float]: ) -> Tuple[float, float, float]:
""" """
Efficient implementation of the "best matching F1" evaluation metric for Efficient implementation of the "macro average best matching F1" evaluation
clusters. metric for clusters.
Note that this metric is not symmetric and will match truth -> predicted. Note that this metric is not symmetric and will match truth -> predicted.
@ -37,8 +36,6 @@ def best_matching(
that don't exist in truth clusters to be found in predicted ones. Those that don't exist in truth clusters to be found in predicted ones. Those
additional items will then be ignored when computing the metrics instead additional items will then be ignored when computing the metrics instead
of raising an error when found. Defaults to False. of raising an error when found. Defaults to False.
micro (bool, optional): Whether to compute the micro average instead of the macro
average of the evaluation metric. Defaults to False.
Returns: Returns:
tuple of floats: precision, recall and f1 score. tuple of floats: precision, recall and f1 score.
@ -89,10 +86,6 @@ def best_matching(
R = OnlineMean() R = OnlineMean()
F = OnlineMean() F = OnlineMean()
micro_true_positives = 0
micro_false_positives = 0
micro_false_negatives = 0
# Matching truth # Matching truth
for cluster in truth: for cluster in truth:
if not cluster: if not cluster:
@ -111,38 +104,32 @@ def best_matching(
candidates[candidate_cluster_index] += 1 candidates[candidate_cluster_index] += 1
cluster_size += 1 cluster_size += 1
matching_cluster_index, true_positives = candidates.most_common(1)[0] best_f1 = -1.0
best = None
# Finding a matching cluster that maximizes F1 score
for matching_cluster_index, true_positives in candidates.items():
matching_cluster_size = predicted_cluster_sizes[matching_cluster_index] matching_cluster_size = predicted_cluster_sizes[matching_cluster_index]
false_positives = matching_cluster_size - true_positives false_positives = matching_cluster_size - true_positives
false_negatives = cluster_size - true_positives false_negatives = cluster_size - true_positives
if not micro:
precision = true_positives / (true_positives + false_positives) precision = true_positives / (true_positives + false_positives)
recall = true_positives / (true_positives + false_negatives) recall = true_positives / (true_positives + false_negatives)
f1 = 2 * precision * recall / (precision + recall) f1 = 2 * precision * recall / (precision + recall)
P.add(precision) if f1 > best_f1:
R.add(recall) best_f1 = f1
F.add(f1) best = (precision, recall, f1)
else: assert best is not None
micro_true_positives += true_positives
micro_false_positives += false_positives P.add(best[0])
micro_false_negatives += false_negatives R.add(best[1])
F.add(best[2])
if not micro:
return ( return (
float(P), float(P),
float(R), float(R),
float(F) float(F)
) )
micro_precision = micro_true_positives / (micro_true_positives + micro_false_positives)
micro_recall = micro_true_positives / (micro_true_positives + micro_false_negatives)
return (
micro_precision,
micro_recall,
2 * micro_precision * micro_recall / (micro_precision + micro_recall)
)

View File

@ -2,8 +2,9 @@
# Fog Best Matching Cluster Evaluation Unit Tests # Fog Best Matching Cluster Evaluation Unit Tests
# ============================================================================= # =============================================================================
from pytest import approx, raises from pytest import approx, raises
from random import shuffle
from fog.evaluation import best_matching from fog.evaluation import best_matching_macro_average
TRUTH = [ TRUTH = [
@ -30,57 +31,50 @@ CLUSTERS_WITH_ADDITIONAL_ITEMS = [
class TestBestMatching(object): class TestBestMatching(object):
def test_exceptions(self): def test_exceptions(self):
with raises(TypeError, match='cannot be found'): with raises(TypeError, match='cannot be found'):
best_matching([['A1']], [['A2']]) best_matching_macro_average([['A1']], [['A2']])
with raises(TypeError, match='fuzzy'): with raises(TypeError, match='fuzzy'):
best_matching([['A1', 'B1']], [['A1'], ['B1'], ['A1']]) best_matching_macro_average([['A1', 'B1']], [['A1'], ['B1'], ['A1']])
with raises(TypeError, match='empty'): with raises(TypeError, match='empty'):
best_matching([['A1'], []], [['A1']]) best_matching_macro_average([['A1'], []], [['A1']])
with raises(TypeError, match='empty'): with raises(TypeError, match='empty'):
best_matching([['A1']], [['A1'], []]) best_matching_macro_average([['A1']], [['A1'], []])
with raises(TypeError, match='truth is empty'): with raises(TypeError, match='truth is empty'):
best_matching([], [['A1']]) best_matching_macro_average([], [['A1']])
with raises(TypeError, match='predicted is empty'): with raises(TypeError, match='predicted is empty'):
best_matching([['A1']], []) best_matching_macro_average([['A1']], [])
with raises(TypeError, match='cannot be found'): with raises(TypeError, match='cannot be found'):
best_matching([['A1']], [['A1', 'B1']]) best_matching_macro_average([['A1']], [['A1', 'B1']])
def test_basics(self): def test_basics(self):
result = best_matching(TRUTH, CLUSTERS) result = best_matching_macro_average(TRUTH, CLUSTERS)
assert result == approx(( assert result == approx((
0.625, 0.687,
0.875, 0.875,
0.714 0.756
), rel=1e-2) ), rel=1e-2)
assert best_matching(TRUTH, CLUSTERS) == best_matching(TRUTH, CLUSTERS_WITH_ADDITIONAL_ITEMS, allow_additional_items=True) assert best_matching_macro_average(TRUTH, CLUSTERS) == best_matching_macro_average(TRUTH, CLUSTERS_WITH_ADDITIONAL_ITEMS, allow_additional_items=True)
def test_micro(self): def test_deterministic(self):
result = best_matching(TRUTH, CLUSTERS, micro=True) shuffled_clusters = CLUSTERS.copy()
shuffled_truth = TRUTH.copy()
assert result == approx(( for _ in range(10):
0.642, shuffle(shuffled_clusters)
0.9, shuffle(shuffled_truth)
0.75
), rel=1e-2)
assert best_matching(TRUTH, CLUSTERS, micro=True) == best_matching(TRUTH, CLUSTERS_WITH_ADDITIONAL_ITEMS, micro=True, allow_additional_items=True) assert best_matching_macro_average(shuffled_truth, shuffled_clusters) == best_matching_macro_average(TRUTH, CLUSTERS)
def test_identity(self): def test_identity(self):
result = best_matching(TRUTH, TRUTH) result = best_matching_macro_average(TRUTH, TRUTH)
assert result == approx((1.0, 1.0, 1.0)) assert result == approx((1.0, 1.0, 1.0))
result = best_matching(CLUSTERS, CLUSTERS) result = best_matching_macro_average(CLUSTERS, CLUSTERS)
assert result == approx((1.0, 1.0, 1.0))
result = best_matching(TRUTH, TRUTH, micro=True)
assert result == approx((1.0, 1.0, 1.0))
result = best_matching(CLUSTERS, CLUSTERS, micro=True)
assert result == approx((1.0, 1.0, 1.0)) assert result == approx((1.0, 1.0, 1.0))