mirror of https://github.com/Yomguithereal/fog.git
Dropping key option for perf reasons
This commit is contained in:
parent
61f67fed8c
commit
7ca15193f4
12
README.md
12
README.md
|
@ -30,18 +30,12 @@ from fog.metrics import sparse_cosine_similarity
|
|||
# Basic
|
||||
sparse_cosine_similarity({'apple': 34, 'pear': 3}, {'pear': 1, 'orange': 1})
|
||||
>>> ~0.062
|
||||
|
||||
# Using custom key
|
||||
A = {'apple': {'weight': 34}, 'pear': {'weight': 3}}
|
||||
B = {'pear': {'weight': 1}, 'orange': {'weight': 1}}
|
||||
sparse_cosine_similarity(A, B, key=lambda x: x['weight'])
|
||||
```
|
||||
|
||||
*Arguments*
|
||||
|
||||
* **A** *Counter*: first weighted set. Must be a dictionary mapping keys to weights.
|
||||
* **B** *Counter*: second weighted set. Muset be a dictionary mapping keys to weights.
|
||||
* **key** *?callable*: Optional function retrieving the weight from values.
|
||||
|
||||
#### weighted_jaccard_similarity
|
||||
|
||||
|
@ -53,15 +47,9 @@ from fog.metrics import weighted_jaccard_similarity
|
|||
# Basic
|
||||
weighted_jaccard_similarity({'apple': 34, 'pear': 3}, {'pear': 1, 'orange': 1})
|
||||
>>> ~0.026
|
||||
|
||||
# Using custom key
|
||||
A = {'apple': {'weight': 34}, 'pear': {'weight': 3}}
|
||||
B = {'pear': {'weight': 1}, 'orange': {'weight': 1}}
|
||||
weighted_jaccard_similarity(A, B, key=lambda x: x['weight'])
|
||||
```
|
||||
|
||||
*Arguments*
|
||||
|
||||
* **A** *Counter*: first weighted set. Must be a dictionary mapping keys to weights.
|
||||
* **B** *Counter*: second weighted set. Muset be a dictionary mapping keys to weights.
|
||||
* **key** *?callable*: Optional function retrieving the weight from values.
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
import math
|
||||
|
||||
|
||||
def sparse_cosine_similarity(A, B, key=lambda x: x):
|
||||
def sparse_cosine_similarity(A, B):
|
||||
"""
|
||||
Function computing cosine similarity on sparse weighted sets represented
|
||||
by python dicts.
|
||||
|
@ -17,7 +17,6 @@ def sparse_cosine_similarity(A, B, key=lambda x: x):
|
|||
Args:
|
||||
A (Counter): First weighted set.
|
||||
B (Counter): Second weighted set.
|
||||
key (callable, optional): Function retrieving the weight from item.
|
||||
|
||||
Returns:
|
||||
float: Cosine similarity between A & B.
|
||||
|
@ -40,16 +39,16 @@ def sparse_cosine_similarity(A, B, key=lambda x: x):
|
|||
A, B = B, A
|
||||
|
||||
for k, v in A.items():
|
||||
weight = key(v)
|
||||
weight = v
|
||||
xx += weight ** 2
|
||||
|
||||
v2 = B.get(k)
|
||||
|
||||
if v2 is not None:
|
||||
xy += weight * key(v2)
|
||||
xy += weight * v2
|
||||
|
||||
for v in B.values():
|
||||
weight = key(v)
|
||||
weight = v
|
||||
yy += weight ** 2
|
||||
|
||||
return xy / math.sqrt(xx * yy)
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#
|
||||
|
||||
|
||||
def weighted_jaccard_similarity(A, B, key=lambda x: x):
|
||||
def weighted_jaccard_similarity(A, B):
|
||||
"""
|
||||
Function computing the weighted Jaccard similarity.
|
||||
|
||||
|
@ -21,7 +21,6 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x):
|
|||
Args:
|
||||
A (Counter): First weighted set.
|
||||
B (Counter): Second weighted set.
|
||||
key (callable, optional): Function retrieving the weight from item.
|
||||
|
||||
Returns:
|
||||
float: Weighted Jaccard similarity between A & B.
|
||||
|
@ -46,13 +45,13 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x):
|
|||
|
||||
# Computing intersection
|
||||
for k, v in A.items():
|
||||
weight_A = key(v)
|
||||
weight_A = v
|
||||
weight_B = 0.0
|
||||
|
||||
v2 = B.get(k)
|
||||
|
||||
if v2 is not None:
|
||||
weight_B = key(v2)
|
||||
weight_B = v2
|
||||
done.add(k)
|
||||
|
||||
if weight_A < weight_B:
|
||||
|
@ -67,6 +66,6 @@ def weighted_jaccard_similarity(A, B, key=lambda x: x):
|
|||
if k in done:
|
||||
continue
|
||||
|
||||
U += key(v)
|
||||
U += v
|
||||
|
||||
return I / U
|
||||
|
|
|
@ -20,16 +20,3 @@ class TestSparseCosineSimilarity(object):
|
|||
def test_basics(self):
|
||||
for A, B, similarity in BASIC_TESTS:
|
||||
assert sparse_cosine_similarity(A, B) == approx(similarity, 1e-2)
|
||||
|
||||
def test_key(self):
|
||||
tests = []
|
||||
|
||||
for test in BASIC_TESTS:
|
||||
tests.append((
|
||||
{k: {'weight': v} for k, v in test[0].items()},
|
||||
{k: {'weight': v} for k, v in test[1].items()},
|
||||
test[2]
|
||||
))
|
||||
|
||||
for A, B, similarity in tests:
|
||||
assert sparse_cosine_similarity(A, B, key=lambda x: x['weight']) == approx(similarity, 1e-2)
|
||||
|
|
|
@ -19,16 +19,3 @@ class TestWeightedJaccardSimilarity(object):
|
|||
def test_basics(self):
|
||||
for A, B, similarity in BASIC_TESTS:
|
||||
assert weighted_jaccard_similarity(A, B) == approx(similarity, 1e-2)
|
||||
|
||||
def test_key(self):
|
||||
tests = []
|
||||
|
||||
for test in BASIC_TESTS:
|
||||
tests.append((
|
||||
{k: {'weight': v} for k, v in test[0].items()},
|
||||
{k: {'weight': v} for k, v in test[1].items()},
|
||||
test[2]
|
||||
))
|
||||
|
||||
for A, B, similarity in tests:
|
||||
assert weighted_jaccard_similarity(A, B, key=lambda x: x['weight']) == approx(similarity, 1e-2)
|
||||
|
|
Loading…
Reference in New Issue