diff --git a/fog/metrics/jaccard.py b/fog/metrics/jaccard.py index ecbc6da..6c42e7c 100644 --- a/fog/metrics/jaccard.py +++ b/fog/metrics/jaccard.py @@ -40,6 +40,10 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x): done = set() + # Swapping to iterate over smaller set and minimize lookups + if len(A) > len(B): + A, B = B, A + # Computing intersection for k, v in A.items(): weight_A = key(v)