Dropping key option for perf reasons

This commit is contained in:
Yomguithereal 2018-05-30 18:27:37 +02:00
parent 61f67fed8c
commit 7ca15193f4
5 changed files with 8 additions and 48 deletions

View File

@ -30,18 +30,12 @@ from fog.metrics import sparse_cosine_similarity
# Basic
sparse_cosine_similarity({'apple': 34, 'pear': 3}, {'pear': 1, 'orange': 1})
>>> ~0.062
# Using custom key
A = {'apple': {'weight': 34}, 'pear': {'weight': 3}}
B = {'pear': {'weight': 1}, 'orange': {'weight': 1}}
sparse_cosine_similarity(A, B, key=lambda x: x['weight'])
```
*Arguments*
* **A** *Counter*: first weighted set. Must be a dictionary mapping keys to weights.
* **B** *Counter*: second weighted set. Muset be a dictionary mapping keys to weights.
* **key** *?callable*: Optional function retrieving the weight from values.
#### weighted_jaccard_similarity
@ -53,15 +47,9 @@ from fog.metrics import weighted_jaccard_similarity
# Basic
weighted_jaccard_similarity({'apple': 34, 'pear': 3}, {'pear': 1, 'orange': 1})
>>> ~0.026
# Using custom key
A = {'apple': {'weight': 34}, 'pear': {'weight': 3}}
B = {'pear': {'weight': 1}, 'orange': {'weight': 1}}
weighted_jaccard_similarity(A, B, key=lambda x: x['weight'])
```
*Arguments*
* **A** *Counter*: first weighted set. Must be a dictionary mapping keys to weights.
* **B** *Counter*: second weighted set. Muset be a dictionary mapping keys to weights.
* **key** *?callable*: Optional function retrieving the weight from values.

View File

@ -7,7 +7,7 @@
import math
def sparse_cosine_similarity(A, B, key=lambda x: x):
def sparse_cosine_similarity(A, B):
"""
Function computing cosine similarity on sparse weighted sets represented
by python dicts.
@ -17,7 +17,6 @@ def sparse_cosine_similarity(A, B, key=lambda x: x):
Args:
A (Counter): First weighted set.
B (Counter): Second weighted set.
key (callable, optional): Function retrieving the weight from item.
Returns:
float: Cosine similarity between A & B.
@ -40,16 +39,16 @@ def sparse_cosine_similarity(A, B, key=lambda x: x):
A, B = B, A
for k, v in A.items():
weight = key(v)
weight = v
xx += weight ** 2
v2 = B.get(k)
if v2 is not None:
xy += weight * key(v2)
xy += weight * v2
for v in B.values():
weight = key(v)
weight = v
yy += weight ** 2
return xy / math.sqrt(xx * yy)

View File

@ -6,7 +6,7 @@
#
def weighted_jaccard_similarity(A, B, key=lambda x: x):
def weighted_jaccard_similarity(A, B):
"""
Function computing the weighted Jaccard similarity.
@ -21,7 +21,6 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x):
Args:
A (Counter): First weighted set.
B (Counter): Second weighted set.
key (callable, optional): Function retrieving the weight from item.
Returns:
float: Weighted Jaccard similarity between A & B.
@ -46,13 +45,13 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x):
# Computing intersection
for k, v in A.items():
weight_A = key(v)
weight_A = v
weight_B = 0.0
v2 = B.get(k)
if v2 is not None:
weight_B = key(v2)
weight_B = v2
done.add(k)
if weight_A < weight_B:
@ -67,6 +66,6 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x):
if k in done:
continue
U += key(v)
U += v
return I / U

View File

@ -20,16 +20,3 @@ class TestSparseCosineSimilarity(object):
def test_basics(self):
for A, B, similarity in BASIC_TESTS:
assert sparse_cosine_similarity(A, B) == approx(similarity, 1e-2)
def test_key(self):
tests = []
for test in BASIC_TESTS:
tests.append((
{k: {'weight': v} for k, v in test[0].items()},
{k: {'weight': v} for k, v in test[1].items()},
test[2]
))
for A, B, similarity in tests:
assert sparse_cosine_similarity(A, B, key=lambda x: x['weight']) == approx(similarity, 1e-2)

View File

@ -19,16 +19,3 @@ class TestWeightedJaccardSimilarity(object):
def test_basics(self):
for A, B, similarity in BASIC_TESTS:
assert weighted_jaccard_similarity(A, B) == approx(similarity, 1e-2)
def test_key(self):
tests = []
for test in BASIC_TESTS:
tests.append((
{k: {'weight': v} for k, v in test[0].items()},
{k: {'weight': v} for k, v in test[1].items()},
test[2]
))
for A, B, similarity in tests:
assert weighted_jaccard_similarity(A, B, key=lambda x: x['weight']) == approx(similarity, 1e-2)