From b03b1e402d07157a30eafb2859060529c5f88f63 Mon Sep 17 00:00:00 2001 From: Yomguithereal Date: Sat, 3 Oct 2020 18:10:09 +0200 Subject: [PATCH] Abstracting monopartite_projection basic case It can be considered as a basic unweighted intersection of vectors It probably makes the code run a bit slower but way more readable --- fog/graph/projection.py | 67 ++++++++++++++++------------------- test/graph/projection_test.py | 2 +- 2 files changed, 31 insertions(+), 38 deletions(-) diff --git a/fog/graph/projection.py b/fog/graph/projection.py index 3a26931..ab44686 100644 --- a/fog/graph/projection.py +++ b/fog/graph/projection.py @@ -12,7 +12,6 @@ from fog.metrics.cosine import sparse_dot_product from fog.metrics.utils import intersection_size MONOPARTITE_PROJECTION_METRICS = ('cosine', 'jaccard', 'overlap') -EMPTY_COUPLE = (None, None) def compute_metric(metric, vector1, vector2, norm1, norm2): @@ -29,6 +28,9 @@ def compute_metric(metric, vector1, vector2, norm1, norm2): if w == 0: return 0 + if metric is None: + return w + if metric == 'jaccard': return w / (norm1 + norm2 - w) @@ -112,31 +114,28 @@ def monopartite_projection(bipartite, project, part='bipartite', weight='weight' # the function to a sparse vector input vectors = {} - if metric is not None: - for node in monopartite.nodes: - s = 0 - neighbors = {} if metric == 'cosine' else set() + for node in monopartite.nodes: + s = 0 + neighbors = {} if metric == 'cosine' else set() - for _, neighbor, w in bipartite.edges(node, data=weight, default=1): - if metric == 'cosine': - s += w * w - neighbors[neighbor] = w - else: - s += 1 - neighbors.add(neighbor) + for _, neighbor, w in bipartite.edges(node, data=weight, default=1): + if metric == 'cosine': + s += w * w + neighbors[neighbor] = w + else: + s += 1 + neighbors.add(neighbor) - if s > 0: - if metric == 'cosine': - vectors[node] = (math.sqrt(s), neighbors) - else: - vectors[node] = (s, neighbors) + if s > 0: + if metric == 'cosine': + vectors[node] = (math.sqrt(s), neighbors) + else: + vectors[node] = (s, neighbors) # Basic projection - if metric is None or use_topology: - - for n1 in monopartite.nodes: - norm1, vector1 = vectors.get(n1, EMPTY_COUPLE) if metric is not None else EMPTY_COUPLE + if use_topology: + for n1, (norm1, vector1) in vectors.items(): for np in bipartite.neighbors(n1): for n2 in bipartite.neighbors(np): @@ -144,27 +143,21 @@ def monopartite_projection(bipartite, project, part='bipartite', weight='weight' if n1 >= n2: continue - if metric is not None: - if monopartite.has_edge(n1, n2): - continue + if monopartite.has_edge(n1, n2): + continue - norm2, vector2 = vectors[n2] + norm2, vector2 = vectors[n2] - # NOTE: at this point, both norms should be > 0 - w = compute_metric(metric, vector1, vector2, norm1, norm2) + # NOTE: at this point, both norms should be > 0 + w = compute_metric(metric, vector1, vector2, norm1, norm2) - if w == 0: - continue + if w == 0: + continue - if threshold is not None and w < threshold: - continue + if threshold is not None and w < threshold: + continue - monopartite.add_edge(n1, n2, **{weight: w}) - else: - if monopartite.has_edge(n1, n2): - monopartite[n1][n2][weight] += 1 - else: - monopartite.add_edge(n1, n2, **{weight: 1}) + monopartite.add_edge(n1, n2, **{weight: w}) return monopartite diff --git a/test/graph/projection_test.py b/test/graph/projection_test.py index d7741a0..7d00c91 100644 --- a/test/graph/projection_test.py +++ b/test/graph/projection_test.py @@ -127,7 +127,7 @@ class TestGraphProjection(object): def test_not_use_topology(self): - for metric in MONOPARTITE_PROJECTION_METRICS: + for metric in [None, *MONOPARTITE_PROJECTION_METRICS]: mono1 = monopartite_projection(BIPARTITE, 'people', part='part', metric=metric) mono2 = monopartite_projection(BIPARTITE, 'people', part='part', metric=metric, use_topology=False)