From 7ca15193f4b4018af60df78fa6b1eba01e550aac Mon Sep 17 00:00:00 2001 From: Yomguithereal Date: Wed, 30 May 2018 18:27:37 +0200 Subject: [PATCH] Dropping key option for perf reasons --- README.md | 12 ------------ fog/metrics/cosine.py | 9 ++++----- fog/metrics/jaccard.py | 9 ++++----- test/metrics/cosine_test.py | 13 ------------- test/metrics/jaccard_test.py | 13 ------------- 5 files changed, 8 insertions(+), 48 deletions(-) diff --git a/README.md b/README.md index e7e0140..f04bbc5 100644 --- a/README.md +++ b/README.md @@ -30,18 +30,12 @@ from fog.metrics import sparse_cosine_similarity # Basic sparse_cosine_similarity({'apple': 34, 'pear': 3}, {'pear': 1, 'orange': 1}) >>> ~0.062 - -# Using custom key -A = {'apple': {'weight': 34}, 'pear': {'weight': 3}} -B = {'pear': {'weight': 1}, 'orange': {'weight': 1}} -sparse_cosine_similarity(A, B, key=lambda x: x['weight']) ``` *Arguments* * **A** *Counter*: first weighted set. Must be a dictionary mapping keys to weights. * **B** *Counter*: second weighted set. Muset be a dictionary mapping keys to weights. -* **key** *?callable*: Optional function retrieving the weight from values. #### weighted_jaccard_similarity @@ -53,15 +47,9 @@ from fog.metrics import weighted_jaccard_similarity # Basic weighted_jaccard_similarity({'apple': 34, 'pear': 3}, {'pear': 1, 'orange': 1}) >>> ~0.026 - -# Using custom key -A = {'apple': {'weight': 34}, 'pear': {'weight': 3}} -B = {'pear': {'weight': 1}, 'orange': {'weight': 1}} -weighted_jaccard_similarity(A, B, key=lambda x: x['weight']) ``` *Arguments* * **A** *Counter*: first weighted set. Must be a dictionary mapping keys to weights. * **B** *Counter*: second weighted set. Muset be a dictionary mapping keys to weights. -* **key** *?callable*: Optional function retrieving the weight from values. diff --git a/fog/metrics/cosine.py b/fog/metrics/cosine.py index d2f8e05..d81b08d 100644 --- a/fog/metrics/cosine.py +++ b/fog/metrics/cosine.py @@ -7,7 +7,7 @@ import math -def sparse_cosine_similarity(A, B, key=lambda x: x): +def sparse_cosine_similarity(A, B): """ Function computing cosine similarity on sparse weighted sets represented by python dicts. @@ -17,7 +17,6 @@ def sparse_cosine_similarity(A, B, key=lambda x: x): Args: A (Counter): First weighted set. B (Counter): Second weighted set. - key (callable, optional): Function retrieving the weight from item. Returns: float: Cosine similarity between A & B. @@ -40,16 +39,16 @@ def sparse_cosine_similarity(A, B, key=lambda x: x): A, B = B, A for k, v in A.items(): - weight = key(v) + weight = v xx += weight ** 2 v2 = B.get(k) if v2 is not None: - xy += weight * key(v2) + xy += weight * v2 for v in B.values(): - weight = key(v) + weight = v yy += weight ** 2 return xy / math.sqrt(xx * yy) diff --git a/fog/metrics/jaccard.py b/fog/metrics/jaccard.py index 7576889..1966796 100644 --- a/fog/metrics/jaccard.py +++ b/fog/metrics/jaccard.py @@ -6,7 +6,7 @@ # -def weighted_jaccard_similarity(A, B, key=lambda x: x): +def weighted_jaccard_similarity(A, B): """ Function computing the weighted Jaccard similarity. @@ -21,7 +21,6 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x): Args: A (Counter): First weighted set. B (Counter): Second weighted set. - key (callable, optional): Function retrieving the weight from item. Returns: float: Weighted Jaccard similarity between A & B. @@ -46,13 +45,13 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x): # Computing intersection for k, v in A.items(): - weight_A = key(v) + weight_A = v weight_B = 0.0 v2 = B.get(k) if v2 is not None: - weight_B = key(v2) + weight_B = v2 done.add(k) if weight_A < weight_B: @@ -67,6 +66,6 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x): if k in done: continue - U += key(v) + U += v return I / U diff --git a/test/metrics/cosine_test.py b/test/metrics/cosine_test.py index b821321..68f3a9b 100644 --- a/test/metrics/cosine_test.py +++ b/test/metrics/cosine_test.py @@ -20,16 +20,3 @@ class TestSparseCosineSimilarity(object): def test_basics(self): for A, B, similarity in BASIC_TESTS: assert sparse_cosine_similarity(A, B) == approx(similarity, 1e-2) - - def test_key(self): - tests = [] - - for test in BASIC_TESTS: - tests.append(( - {k: {'weight': v} for k, v in test[0].items()}, - {k: {'weight': v} for k, v in test[1].items()}, - test[2] - )) - - for A, B, similarity in tests: - assert sparse_cosine_similarity(A, B, key=lambda x: x['weight']) == approx(similarity, 1e-2) diff --git a/test/metrics/jaccard_test.py b/test/metrics/jaccard_test.py index 7d06557..38bf657 100644 --- a/test/metrics/jaccard_test.py +++ b/test/metrics/jaccard_test.py @@ -19,16 +19,3 @@ class TestWeightedJaccardSimilarity(object): def test_basics(self): for A, B, similarity in BASIC_TESTS: assert weighted_jaccard_similarity(A, B) == approx(similarity, 1e-2) - - def test_key(self): - tests = [] - - for test in BASIC_TESTS: - tests.append(( - {k: {'weight': v} for k, v in test[0].items()}, - {k: {'weight': v} for k, v in test[1].items()}, - test[2] - )) - - for A, B, similarity in tests: - assert weighted_jaccard_similarity(A, B, key=lambda x: x['weight']) == approx(similarity, 1e-2)