mirror of https://github.com/Yomguithereal/fog.git
parent
891f57d1d4
commit
1e10b3b462
10
README.md
10
README.md
|
@ -15,7 +15,7 @@ pip install fog
|
|||
## Usage
|
||||
|
||||
* [Evaluation](#evaluation)
|
||||
* [best_matching](#best_matching)
|
||||
* [best_matching_macro_average](#best_matching_macro_average)
|
||||
* [Graph](#graph)
|
||||
* [floatsam_sparsification](#floatsam_sparsification)
|
||||
* [monopartite_projection](#monopartite_projection)
|
||||
|
@ -35,10 +35,10 @@ pip install fog
|
|||
|
||||
### Evaluation
|
||||
|
||||
#### best_matching
|
||||
#### best_matching_macro_average
|
||||
|
||||
Efficient implementation of the "best matching F1" evaluation metric for
|
||||
clusters.
|
||||
Efficient implementation of the "macro average best matching F1" evaluation
|
||||
metric for clusters.
|
||||
|
||||
Note that this metric is not symmetric and will match truth -> predicted.
|
||||
|
||||
|
@ -49,8 +49,6 @@ Note that this metric is not symmetric and will match truth -> predicted.
|
|||
that don't exist in truth clusters to be found in predicted ones. Those
|
||||
additional items will then be ignored when computing the metrics instead
|
||||
of raising an error when found.
|
||||
* **micro** *?bool* [`False`]: Whether to compute the micro average instead of the macro
|
||||
average of the evaluation metric.
|
||||
|
||||
### Graph
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@ DOCS = [
|
|||
{
|
||||
'title': 'Evaluation',
|
||||
'fns': [
|
||||
evaluation.best_matching
|
||||
evaluation.best_matching_macro_average
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
from fog.evaluation.best_matching import best_matching
|
||||
from fog.evaluation.best_matching import best_matching_macro_average
|
||||
from fog.evaluation.utils import labels_to_clusters, clusters_to_labels
|
||||
|
|
|
@ -18,15 +18,14 @@ from typing import Hashable, Iterable, Tuple
|
|||
from fog.utils import OnlineMean
|
||||
|
||||
|
||||
def best_matching(
|
||||
def best_matching_macro_average(
|
||||
truth: Iterable[Iterable[Hashable]],
|
||||
predicted: Iterable[Iterable[Hashable]],
|
||||
allow_additional_items: bool = False,
|
||||
micro: bool = False
|
||||
allow_additional_items: bool = False
|
||||
) -> Tuple[float, float, float]:
|
||||
"""
|
||||
Efficient implementation of the "best matching F1" evaluation metric for
|
||||
clusters.
|
||||
Efficient implementation of the "macro average best matching F1" evaluation
|
||||
metric for clusters.
|
||||
|
||||
Note that this metric is not symmetric and will match truth -> predicted.
|
||||
|
||||
|
@ -37,8 +36,6 @@ def best_matching(
|
|||
that don't exist in truth clusters to be found in predicted ones. Those
|
||||
additional items will then be ignored when computing the metrics instead
|
||||
of raising an error when found. Defaults to False.
|
||||
micro (bool, optional): Whether to compute the micro average instead of the macro
|
||||
average of the evaluation metric. Defaults to False.
|
||||
|
||||
Returns:
|
||||
tuple of floats: precision, recall and f1 score.
|
||||
|
@ -89,10 +86,6 @@ def best_matching(
|
|||
R = OnlineMean()
|
||||
F = OnlineMean()
|
||||
|
||||
micro_true_positives = 0
|
||||
micro_false_positives = 0
|
||||
micro_false_negatives = 0
|
||||
|
||||
# Matching truth
|
||||
for cluster in truth:
|
||||
if not cluster:
|
||||
|
@ -111,38 +104,32 @@ def best_matching(
|
|||
candidates[candidate_cluster_index] += 1
|
||||
cluster_size += 1
|
||||
|
||||
matching_cluster_index, true_positives = candidates.most_common(1)[0]
|
||||
matching_cluster_size = predicted_cluster_sizes[matching_cluster_index]
|
||||
best_f1 = -1.0
|
||||
best = None
|
||||
|
||||
false_positives = matching_cluster_size - true_positives
|
||||
false_negatives = cluster_size - true_positives
|
||||
# Finding a matching cluster that maximizes F1 score
|
||||
for matching_cluster_index, true_positives in candidates.items():
|
||||
matching_cluster_size = predicted_cluster_sizes[matching_cluster_index]
|
||||
|
||||
false_positives = matching_cluster_size - true_positives
|
||||
false_negatives = cluster_size - true_positives
|
||||
|
||||
if not micro:
|
||||
precision = true_positives / (true_positives + false_positives)
|
||||
recall = true_positives / (true_positives + false_negatives)
|
||||
f1 = 2 * precision * recall / (precision + recall)
|
||||
|
||||
P.add(precision)
|
||||
R.add(recall)
|
||||
F.add(f1)
|
||||
if f1 > best_f1:
|
||||
best_f1 = f1
|
||||
best = (precision, recall, f1)
|
||||
|
||||
else:
|
||||
micro_true_positives += true_positives
|
||||
micro_false_positives += false_positives
|
||||
micro_false_negatives += false_negatives
|
||||
assert best is not None
|
||||
|
||||
if not micro:
|
||||
return (
|
||||
float(P),
|
||||
float(R),
|
||||
float(F)
|
||||
)
|
||||
|
||||
micro_precision = micro_true_positives / (micro_true_positives + micro_false_positives)
|
||||
micro_recall = micro_true_positives / (micro_true_positives + micro_false_negatives)
|
||||
P.add(best[0])
|
||||
R.add(best[1])
|
||||
F.add(best[2])
|
||||
|
||||
return (
|
||||
micro_precision,
|
||||
micro_recall,
|
||||
2 * micro_precision * micro_recall / (micro_precision + micro_recall)
|
||||
float(P),
|
||||
float(R),
|
||||
float(F)
|
||||
)
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
# Fog Best Matching Cluster Evaluation Unit Tests
|
||||
# =============================================================================
|
||||
from pytest import approx, raises
|
||||
from random import shuffle
|
||||
|
||||
from fog.evaluation import best_matching
|
||||
from fog.evaluation import best_matching_macro_average
|
||||
|
||||
|
||||
TRUTH = [
|
||||
|
@ -30,57 +31,50 @@ CLUSTERS_WITH_ADDITIONAL_ITEMS = [
|
|||
class TestBestMatching(object):
|
||||
def test_exceptions(self):
|
||||
with raises(TypeError, match='cannot be found'):
|
||||
best_matching([['A1']], [['A2']])
|
||||
best_matching_macro_average([['A1']], [['A2']])
|
||||
|
||||
with raises(TypeError, match='fuzzy'):
|
||||
best_matching([['A1', 'B1']], [['A1'], ['B1'], ['A1']])
|
||||
best_matching_macro_average([['A1', 'B1']], [['A1'], ['B1'], ['A1']])
|
||||
|
||||
with raises(TypeError, match='empty'):
|
||||
best_matching([['A1'], []], [['A1']])
|
||||
best_matching_macro_average([['A1'], []], [['A1']])
|
||||
|
||||
with raises(TypeError, match='empty'):
|
||||
best_matching([['A1']], [['A1'], []])
|
||||
best_matching_macro_average([['A1']], [['A1'], []])
|
||||
|
||||
with raises(TypeError, match='truth is empty'):
|
||||
best_matching([], [['A1']])
|
||||
best_matching_macro_average([], [['A1']])
|
||||
|
||||
with raises(TypeError, match='predicted is empty'):
|
||||
best_matching([['A1']], [])
|
||||
best_matching_macro_average([['A1']], [])
|
||||
|
||||
with raises(TypeError, match='cannot be found'):
|
||||
best_matching([['A1']], [['A1', 'B1']])
|
||||
best_matching_macro_average([['A1']], [['A1', 'B1']])
|
||||
|
||||
def test_basics(self):
|
||||
result = best_matching(TRUTH, CLUSTERS)
|
||||
result = best_matching_macro_average(TRUTH, CLUSTERS)
|
||||
|
||||
assert result == approx((
|
||||
0.625,
|
||||
0.687,
|
||||
0.875,
|
||||
0.714
|
||||
0.756
|
||||
), rel=1e-2)
|
||||
|
||||
assert best_matching(TRUTH, CLUSTERS) == best_matching(TRUTH, CLUSTERS_WITH_ADDITIONAL_ITEMS, allow_additional_items=True)
|
||||
assert best_matching_macro_average(TRUTH, CLUSTERS) == best_matching_macro_average(TRUTH, CLUSTERS_WITH_ADDITIONAL_ITEMS, allow_additional_items=True)
|
||||
|
||||
def test_micro(self):
|
||||
result = best_matching(TRUTH, CLUSTERS, micro=True)
|
||||
def test_deterministic(self):
|
||||
shuffled_clusters = CLUSTERS.copy()
|
||||
shuffled_truth = TRUTH.copy()
|
||||
|
||||
assert result == approx((
|
||||
0.642,
|
||||
0.9,
|
||||
0.75
|
||||
), rel=1e-2)
|
||||
for _ in range(10):
|
||||
shuffle(shuffled_clusters)
|
||||
shuffle(shuffled_truth)
|
||||
|
||||
assert best_matching(TRUTH, CLUSTERS, micro=True) == best_matching(TRUTH, CLUSTERS_WITH_ADDITIONAL_ITEMS, micro=True, allow_additional_items=True)
|
||||
assert best_matching_macro_average(shuffled_truth, shuffled_clusters) == best_matching_macro_average(TRUTH, CLUSTERS)
|
||||
|
||||
def test_identity(self):
|
||||
result = best_matching(TRUTH, TRUTH)
|
||||
result = best_matching_macro_average(TRUTH, TRUTH)
|
||||
assert result == approx((1.0, 1.0, 1.0))
|
||||
|
||||
result = best_matching(CLUSTERS, CLUSTERS)
|
||||
assert result == approx((1.0, 1.0, 1.0))
|
||||
|
||||
result = best_matching(TRUTH, TRUTH, micro=True)
|
||||
assert result == approx((1.0, 1.0, 1.0))
|
||||
|
||||
result = best_matching(CLUSTERS, CLUSTERS, micro=True)
|
||||
result = best_matching_macro_average(CLUSTERS, CLUSTERS)
|
||||
assert result == approx((1.0, 1.0, 1.0))
|
||||
|
|
Loading…
Reference in New Issue