mirror of https://github.com/Yomguithereal/fog.git
Adding fog.metrics.overlap_coefficient. cc @diegantobass
This commit is contained in:
parent
79566508eb
commit
45019bc2b2
|
@ -7,3 +7,4 @@ from fog.metrics.jaccard import (
|
||||||
jaccard_similarity,
|
jaccard_similarity,
|
||||||
weighted_jaccard_similarity
|
weighted_jaccard_similarity
|
||||||
)
|
)
|
||||||
|
from fog.metrics.overlap import overlap_coefficient
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
# =============================================================================
|
||||||
|
# Fog Overlap Coefficient
|
||||||
|
# =============================================================================
|
||||||
|
#
|
||||||
|
# Functions computing the overlap coefficient.
|
||||||
|
#
|
||||||
|
# [Urls]:
|
||||||
|
# https://en.wikipedia.org/wiki/Overlap_coefficient
|
||||||
|
#
|
||||||
|
|
||||||
|
ACCEPTABLE_TYPES = (set, frozenset, dict)
|
||||||
|
|
||||||
|
|
||||||
|
def overlap_coefficient(A, B):
|
||||||
|
"""
|
||||||
|
Function computing the overlap coefficient of the given sets, i.e. the size
|
||||||
|
of their intersection divided by the size of the smallest set.
|
||||||
|
|
||||||
|
Runs in O(n), n being the size of the smallest set.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
A (iterable): First sequence.
|
||||||
|
B (iterable): Second sequence.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: overlap coefficient between A & B.
|
||||||
|
|
||||||
|
"""
|
||||||
|
if A is B:
|
||||||
|
return 1.0
|
||||||
|
|
||||||
|
if not isinstance(A, ACCEPTABLE_TYPES):
|
||||||
|
A = set(A)
|
||||||
|
|
||||||
|
if not isinstance(B, ACCEPTABLE_TYPES):
|
||||||
|
B = set(B)
|
||||||
|
|
||||||
|
if len(A) == 0 or len(B) == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# Swapping to iterate over smaller set and minimize lookups
|
||||||
|
if len(A) > len(B):
|
||||||
|
A, B = B, A
|
||||||
|
|
||||||
|
# Counting intersection
|
||||||
|
I = 0
|
||||||
|
|
||||||
|
for v in A:
|
||||||
|
if v in B:
|
||||||
|
I += 1
|
||||||
|
|
||||||
|
return I / min(len(A), len(B))
|
|
@ -0,0 +1,22 @@
|
||||||
|
# =============================================================================
|
||||||
|
# Fog Overlap Coefficient Unit Tests
|
||||||
|
# =============================================================================
|
||||||
|
from pytest import approx
|
||||||
|
from fog.metrics import overlap_coefficient
|
||||||
|
|
||||||
|
TESTS = [
|
||||||
|
('abc', 'abc', 1.0),
|
||||||
|
('abc', 'def', 0.0),
|
||||||
|
('abc', 'abd', 2 / 3),
|
||||||
|
('abc', 'abcde', 1),
|
||||||
|
('abcdefij', 'abc', 1),
|
||||||
|
(list('abcdefij'), list('abc'), 1),
|
||||||
|
((1, 2, 3), (1, 2), 1),
|
||||||
|
('aaaaaaabc', 'aaabbbbbbc', 1.0)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TestOverlapCoefficient(object):
|
||||||
|
def test_basics(self):
|
||||||
|
for A, B, coefficient in TESTS:
|
||||||
|
assert overlap_coefficient(A, B) == coefficient
|
Loading…
Reference in New Issue