mirror of https://github.com/Yomguithereal/fog.git
Adding fog.metrics.overlap_coefficient. cc @diegantobass
This commit is contained in:
parent
79566508eb
commit
45019bc2b2
|
@ -7,3 +7,4 @@ from fog.metrics.jaccard import (
|
|||
jaccard_similarity,
|
||||
weighted_jaccard_similarity
|
||||
)
|
||||
from fog.metrics.overlap import overlap_coefficient
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
# =============================================================================
|
||||
# Fog Overlap Coefficient
|
||||
# =============================================================================
|
||||
#
|
||||
# Functions computing the overlap coefficient.
|
||||
#
|
||||
# [Urls]:
|
||||
# https://en.wikipedia.org/wiki/Overlap_coefficient
|
||||
#
|
||||
|
||||
ACCEPTABLE_TYPES = (set, frozenset, dict)
|
||||
|
||||
|
||||
def overlap_coefficient(A, B):
|
||||
"""
|
||||
Function computing the overlap coefficient of the given sets, i.e. the size
|
||||
of their intersection divided by the size of the smallest set.
|
||||
|
||||
Runs in O(n), n being the size of the smallest set.
|
||||
|
||||
Args:
|
||||
A (iterable): First sequence.
|
||||
B (iterable): Second sequence.
|
||||
|
||||
Returns:
|
||||
float: overlap coefficient between A & B.
|
||||
|
||||
"""
|
||||
if A is B:
|
||||
return 1.0
|
||||
|
||||
if not isinstance(A, ACCEPTABLE_TYPES):
|
||||
A = set(A)
|
||||
|
||||
if not isinstance(B, ACCEPTABLE_TYPES):
|
||||
B = set(B)
|
||||
|
||||
if len(A) == 0 or len(B) == 0:
|
||||
return 0.0
|
||||
|
||||
# Swapping to iterate over smaller set and minimize lookups
|
||||
if len(A) > len(B):
|
||||
A, B = B, A
|
||||
|
||||
# Counting intersection
|
||||
I = 0
|
||||
|
||||
for v in A:
|
||||
if v in B:
|
||||
I += 1
|
||||
|
||||
return I / min(len(A), len(B))
|
|
@ -0,0 +1,22 @@
|
|||
# =============================================================================
|
||||
# Fog Overlap Coefficient Unit Tests
|
||||
# =============================================================================
|
||||
from pytest import approx
|
||||
from fog.metrics import overlap_coefficient
|
||||
|
||||
TESTS = [
|
||||
('abc', 'abc', 1.0),
|
||||
('abc', 'def', 0.0),
|
||||
('abc', 'abd', 2 / 3),
|
||||
('abc', 'abcde', 1),
|
||||
('abcdefij', 'abc', 1),
|
||||
(list('abcdefij'), list('abc'), 1),
|
||||
((1, 2, 3), (1, 2), 1),
|
||||
('aaaaaaabc', 'aaabbbbbbc', 1.0)
|
||||
]
|
||||
|
||||
|
||||
class TestOverlapCoefficient(object):
|
||||
def test_basics(self):
|
||||
for A, B, coefficient in TESTS:
|
||||
assert overlap_coefficient(A, B) == coefficient
|
Loading…
Reference in New Issue