Working projection

This commit is contained in:
Yomguithereal 2020-10-01 12:18:41 +02:00
parent 6f4c7905d6
commit fea1b7b69b
3 changed files with 49 additions and 14 deletions

1
.gitignore vendored
View File

@ -10,3 +10,4 @@ __pycache__
build
dist
output

View File

@ -1,8 +1,9 @@
import csv
from networkx import Graph
import networkx as nx
from fog.graph import cosine_monopartite_projection
from experiments.utils import Timer
bipartite = Graph()
bipartite = nx.Graph()
with open('./data/bipartite.csv') as f:
reader = csv.DictReader(f)
@ -11,8 +12,13 @@ with open('./data/bipartite.csv') as f:
account = 'a%s' %line['account']
url = 'u%s' % line['url']
bipartite.add_node(account, type='account')
bipartite.add_node(url, type='url')
bipartite.add_node(account, node_type='account')
bipartite.add_node(url, node_type='url')
bipartite.add_edge(account, url, weight=int(line['weight']))
monopartite = cosine_monopartite_projection(bipartite, 'account', part='type')
with Timer('quadratic'):
monopartite = cosine_monopartite_projection(bipartite, 'account', part='node_type')
print(monopartite.order(), monopartite.size())
nx.write_gexf(monopartite, './output/monopartite.gexf')

View File

@ -4,16 +4,20 @@
#
# Miscellaneous functions related to bipartite to monopartite projections.
#
import math
import networkx as nx
from collections import defaultdict, Counter
from networkx import Graph
from fog.metrics import sparse_dot_product
def cosine_monopartite_projection(bipartite, keep, part='bipartite', weight='weight',
threshold=None):
monopartite = Graph()
monopartite = nx.Graph()
vectors = defaultdict(Counter)
# TODO: what to do with nodes having no edges?
for n1, n2, w in bipartite.edges(data=weight, default=1):
p1 = bipartite.nodes[n1][part]
p2 = bipartite.nodes[n2][part]
@ -21,22 +25,46 @@ def cosine_monopartite_projection(bipartite, keep, part='bipartite', weight='wei
assert p1 != p2, 'fog.graph.cosine_monopartite_projection: given graph is not truly bipartite.'
# Swapping so n1 is from part to keep
if p2 == part:
if p2 == keep:
n1, n2 = n2, n1
vectors[n1][n2] += w
norms = {}
inverted_index = defaultdict(list)
nodes = list(vectors)
# inverted_index = defaultdict(list)
for node, vector in vectors.items():
monopartite.add_node(node, attr_dict=bipartite.nodes[node])
monopartite.add_node(node, **bipartite.nodes[node])
s = 0
for neighbor, w in vector.items():
s += w
inverted_index[neighbor].append(node)
s += w * w
# inverted_index[neighbor].append(node)
norms[node] = s
norms[node] = math.sqrt(s)
# print(inverted_index)
# Quadratic version
l = len(nodes)
for i, n1 in enumerate(nodes):
norm1 = norms[n1]
vector1 = vectors[n1]
for j in range(i + 1, l):
n2 = nodes[j]
norm2 = norms[n2]
vector2 = vectors[n2]
w = sparse_dot_product(vector1, vector2)
if w == 0:
continue
# NOTE: at this point, both norms should be > 0, so no need to test
w = w / (norm1 * norm2)
if threshold is None or w >= threshold:
monopartite.add_edge(n1, n2, weight=w)
return monopartite