diff --git a/.gitignore b/.gitignore index 884c5c3..9190cc3 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ __pycache__ build dist +output diff --git a/experiments/monopartite_projection.py b/experiments/monopartite_projection.py index f53493d..d0cd949 100644 --- a/experiments/monopartite_projection.py +++ b/experiments/monopartite_projection.py @@ -1,8 +1,9 @@ import csv -from networkx import Graph +import networkx as nx from fog.graph import cosine_monopartite_projection +from experiments.utils import Timer -bipartite = Graph() +bipartite = nx.Graph() with open('./data/bipartite.csv') as f: reader = csv.DictReader(f) @@ -11,8 +12,13 @@ with open('./data/bipartite.csv') as f: account = 'a%s' %line['account'] url = 'u%s' % line['url'] - bipartite.add_node(account, type='account') - bipartite.add_node(url, type='url') + bipartite.add_node(account, node_type='account') + bipartite.add_node(url, node_type='url') bipartite.add_edge(account, url, weight=int(line['weight'])) -monopartite = cosine_monopartite_projection(bipartite, 'account', part='type') +with Timer('quadratic'): + monopartite = cosine_monopartite_projection(bipartite, 'account', part='node_type') + +print(monopartite.order(), monopartite.size()) + +nx.write_gexf(monopartite, './output/monopartite.gexf') diff --git a/fog/graph/projection.py b/fog/graph/projection.py index 8b8b759..aafe8d3 100644 --- a/fog/graph/projection.py +++ b/fog/graph/projection.py @@ -4,16 +4,20 @@ # # Miscellaneous functions related to bipartite to monopartite projections. # +import math +import networkx as nx from collections import defaultdict, Counter -from networkx import Graph + +from fog.metrics import sparse_dot_product def cosine_monopartite_projection(bipartite, keep, part='bipartite', weight='weight', threshold=None): - monopartite = Graph() + monopartite = nx.Graph() vectors = defaultdict(Counter) + # TODO: what to do with nodes having no edges? for n1, n2, w in bipartite.edges(data=weight, default=1): p1 = bipartite.nodes[n1][part] p2 = bipartite.nodes[n2][part] @@ -21,22 +25,46 @@ def cosine_monopartite_projection(bipartite, keep, part='bipartite', weight='wei assert p1 != p2, 'fog.graph.cosine_monopartite_projection: given graph is not truly bipartite.' # Swapping so n1 is from part to keep - if p2 == part: + if p2 == keep: n1, n2 = n2, n1 vectors[n1][n2] += w norms = {} - inverted_index = defaultdict(list) + nodes = list(vectors) + # inverted_index = defaultdict(list) for node, vector in vectors.items(): - monopartite.add_node(node, attr_dict=bipartite.nodes[node]) + monopartite.add_node(node, **bipartite.nodes[node]) s = 0 for neighbor, w in vector.items(): - s += w - inverted_index[neighbor].append(node) + s += w * w + # inverted_index[neighbor].append(node) - norms[node] = s + norms[node] = math.sqrt(s) - # print(inverted_index) + # Quadratic version + l = len(nodes) + + for i, n1 in enumerate(nodes): + norm1 = norms[n1] + vector1 = vectors[n1] + + for j in range(i + 1, l): + n2 = nodes[j] + norm2 = norms[n2] + vector2 = vectors[n2] + + w = sparse_dot_product(vector1, vector2) + + if w == 0: + continue + + # NOTE: at this point, both norms should be > 0, so no need to test + w = w / (norm1 * norm2) + + if threshold is None or w >= threshold: + monopartite.add_edge(n1, n2, weight=w) + + return monopartite