Dropping key option for perf reasons

2018-05-30 18:27:37 +02:00 · 2018-05-30 18:27:37 +02:00 · 7ca15193f4
parent 61f67fed8c
commit 7ca15193f4
5 changed files with 8 additions and 48 deletions
--- a/README.md
+++ b/README.md
@ -30,18 +30,12 @@ from fog.metrics import sparse_cosine_similarity
 # Basic
 sparse_cosine_similarity({'apple': 34, 'pear': 3}, {'pear': 1, 'orange': 1})
 >>> ~0.062
-
-# Using custom key
-A = {'apple': {'weight': 34}, 'pear': {'weight': 3}}
-B = {'pear': {'weight': 1}, 'orange': {'weight': 1}}
-sparse_cosine_similarity(A, B, key=lambda x: x['weight'])
 ```

 *Arguments*

 * **A** *Counter*: first weighted set. Must be a dictionary mapping keys to weights.
 * **B** *Counter*: second weighted set. Muset be a dictionary mapping keys to weights.
-* **key** *?callable*: Optional function retrieving the weight from values.

 #### weighted_jaccard_similarity

@ -53,15 +47,9 @@ from fog.metrics import weighted_jaccard_similarity
 # Basic
 weighted_jaccard_similarity({'apple': 34, 'pear': 3}, {'pear': 1, 'orange': 1})
 >>> ~0.026
-
-# Using custom key
-A = {'apple': {'weight': 34}, 'pear': {'weight': 3}}
-B = {'pear': {'weight': 1}, 'orange': {'weight': 1}}
-weighted_jaccard_similarity(A, B, key=lambda x: x['weight'])
 ```

 *Arguments*

 * **A** *Counter*: first weighted set. Must be a dictionary mapping keys to weights.
 * **B** *Counter*: second weighted set. Muset be a dictionary mapping keys to weights.
-* **key** *?callable*: Optional function retrieving the weight from values.
--- a/fog/metrics/cosine.py
+++ b/fog/metrics/cosine.py
@ -7,7 +7,7 @@
 import math


-def sparse_cosine_similarity(A, B, key=lambda x: x):
+def sparse_cosine_similarity(A, B):
    """
    Function computing cosine similarity on sparse weighted sets represented
    by python dicts.
@ -17,7 +17,6 @@ def sparse_cosine_similarity(A, B, key=lambda x: x):
    Args:
        A (Counter): First weighted set.
        B (Counter): Second weighted set.
-        key (callable, optional): Function retrieving the weight from item.

    Returns:
        float: Cosine similarity between A & B.
@ -40,16 +39,16 @@ def sparse_cosine_similarity(A, B, key=lambda x: x):
        A, B = B, A

    for k, v in A.items():
-        weight = key(v)
+        weight = v
        xx += weight ** 2

        v2 = B.get(k)

        if v2 is not None:
-            xy += weight * key(v2)
+            xy += weight * v2

    for v in B.values():
-        weight = key(v)
+        weight = v
        yy += weight ** 2

    return xy / math.sqrt(xx * yy)
--- a/fog/metrics/jaccard.py
+++ b/fog/metrics/jaccard.py
@ -6,7 +6,7 @@
 #


-def weighted_jaccard_similarity(A, B, key=lambda x: x):
+def weighted_jaccard_similarity(A, B):
    """
    Function computing the weighted Jaccard similarity.

@ -21,7 +21,6 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x):
    Args:
        A (Counter): First weighted set.
        B (Counter): Second weighted set.
-        key (callable, optional): Function retrieving the weight from item.

    Returns:
        float: Weighted Jaccard similarity between A & B.
@ -46,13 +45,13 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x):

    # Computing intersection
    for k, v in A.items():
-        weight_A = key(v)
+        weight_A = v
        weight_B = 0.0

        v2 = B.get(k)

        if v2 is not None:
-            weight_B = key(v2)
+            weight_B = v2
            done.add(k)

        if weight_A < weight_B:
@ -67,6 +66,6 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x):
        if k in done:
            continue

-        U += key(v)
+        U += v

    return I / U
--- a/test/metrics/cosine_test.py
+++ b/test/metrics/cosine_test.py
@ -20,16 +20,3 @@ class TestSparseCosineSimilarity(object):
    def test_basics(self):
        for A, B, similarity in BASIC_TESTS:
            assert sparse_cosine_similarity(A, B) == approx(similarity, 1e-2)
-
-    def test_key(self):
-        tests = []
-
-        for test in BASIC_TESTS:
-            tests.append((
-                {k: {'weight': v} for k, v in test[0].items()},
-                {k: {'weight': v} for k, v in test[1].items()},
-                test[2]
-            ))
-
-        for A, B, similarity in tests:
-            assert sparse_cosine_similarity(A, B, key=lambda x: x['weight']) == approx(similarity, 1e-2)
--- a/test/metrics/jaccard_test.py
+++ b/test/metrics/jaccard_test.py
@ -19,16 +19,3 @@ class TestWeightedJaccardSimilarity(object):
    def test_basics(self):
        for A, B, similarity in BASIC_TESTS:
            assert weighted_jaccard_similarity(A, B) == approx(similarity, 1e-2)
-
-    def test_key(self):
-        tests = []
-
-        for test in BASIC_TESTS:
-            tests.append((
-                {k: {'weight': v} for k, v in test[0].items()},
-                {k: {'weight': v} for k, v in test[1].items()},
-                test[2]
-            ))
-
-        for A, B, similarity in tests:
-            assert weighted_jaccard_similarity(A, B, key=lambda x: x['weight']) == approx(similarity, 1e-2)