Adding fog.metrics.overlap_coefficient. cc @diegantobass

This commit is contained in:
Yomguithereal 2018-07-18 11:27:32 +02:00
parent 79566508eb
commit 45019bc2b2
3 changed files with 75 additions and 0 deletions

View File

@ -7,3 +7,4 @@ from fog.metrics.jaccard import (
jaccard_similarity,
weighted_jaccard_similarity
)
from fog.metrics.overlap import overlap_coefficient

52
fog/metrics/overlap.py Normal file
View File

@ -0,0 +1,52 @@
# =============================================================================
# Fog Overlap Coefficient
# =============================================================================
#
# Functions computing the overlap coefficient.
#
# [Urls]:
# https://en.wikipedia.org/wiki/Overlap_coefficient
#
ACCEPTABLE_TYPES = (set, frozenset, dict)
def overlap_coefficient(A, B):
"""
Function computing the overlap coefficient of the given sets, i.e. the size
of their intersection divided by the size of the smallest set.
Runs in O(n), n being the size of the smallest set.
Args:
A (iterable): First sequence.
B (iterable): Second sequence.
Returns:
float: overlap coefficient between A & B.
"""
if A is B:
return 1.0
if not isinstance(A, ACCEPTABLE_TYPES):
A = set(A)
if not isinstance(B, ACCEPTABLE_TYPES):
B = set(B)
if len(A) == 0 or len(B) == 0:
return 0.0
# Swapping to iterate over smaller set and minimize lookups
if len(A) > len(B):
A, B = B, A
# Counting intersection
I = 0
for v in A:
if v in B:
I += 1
return I / min(len(A), len(B))

View File

@ -0,0 +1,22 @@
# =============================================================================
# Fog Overlap Coefficient Unit Tests
# =============================================================================
from pytest import approx
from fog.metrics import overlap_coefficient
TESTS = [
('abc', 'abc', 1.0),
('abc', 'def', 0.0),
('abc', 'abd', 2 / 3),
('abc', 'abcde', 1),
('abcdefij', 'abc', 1),
(list('abcdefij'), list('abc'), 1),
((1, 2, 3), (1, 2), 1),
('aaaaaaabc', 'aaabbbbbbc', 1.0)
]
class TestOverlapCoefficient(object):
def test_basics(self):
for A, B, coefficient in TESTS:
assert overlap_coefficient(A, B) == coefficient