mirror of https://github.com/Yomguithereal/fog.git
Working projection
This commit is contained in:
parent
6f4c7905d6
commit
fea1b7b69b
|
@ -10,3 +10,4 @@ __pycache__
|
|||
|
||||
build
|
||||
dist
|
||||
output
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
import csv
|
||||
from networkx import Graph
|
||||
import networkx as nx
|
||||
from fog.graph import cosine_monopartite_projection
|
||||
from experiments.utils import Timer
|
||||
|
||||
bipartite = Graph()
|
||||
bipartite = nx.Graph()
|
||||
|
||||
with open('./data/bipartite.csv') as f:
|
||||
reader = csv.DictReader(f)
|
||||
|
@ -11,8 +12,13 @@ with open('./data/bipartite.csv') as f:
|
|||
account = 'a%s' %line['account']
|
||||
url = 'u%s' % line['url']
|
||||
|
||||
bipartite.add_node(account, type='account')
|
||||
bipartite.add_node(url, type='url')
|
||||
bipartite.add_node(account, node_type='account')
|
||||
bipartite.add_node(url, node_type='url')
|
||||
bipartite.add_edge(account, url, weight=int(line['weight']))
|
||||
|
||||
monopartite = cosine_monopartite_projection(bipartite, 'account', part='type')
|
||||
with Timer('quadratic'):
|
||||
monopartite = cosine_monopartite_projection(bipartite, 'account', part='node_type')
|
||||
|
||||
print(monopartite.order(), monopartite.size())
|
||||
|
||||
nx.write_gexf(monopartite, './output/monopartite.gexf')
|
||||
|
|
|
@ -4,16 +4,20 @@
|
|||
#
|
||||
# Miscellaneous functions related to bipartite to monopartite projections.
|
||||
#
|
||||
import math
|
||||
import networkx as nx
|
||||
from collections import defaultdict, Counter
|
||||
from networkx import Graph
|
||||
|
||||
from fog.metrics import sparse_dot_product
|
||||
|
||||
|
||||
def cosine_monopartite_projection(bipartite, keep, part='bipartite', weight='weight',
|
||||
threshold=None):
|
||||
monopartite = Graph()
|
||||
monopartite = nx.Graph()
|
||||
|
||||
vectors = defaultdict(Counter)
|
||||
|
||||
# TODO: what to do with nodes having no edges?
|
||||
for n1, n2, w in bipartite.edges(data=weight, default=1):
|
||||
p1 = bipartite.nodes[n1][part]
|
||||
p2 = bipartite.nodes[n2][part]
|
||||
|
@ -21,22 +25,46 @@ def cosine_monopartite_projection(bipartite, keep, part='bipartite', weight='wei
|
|||
assert p1 != p2, 'fog.graph.cosine_monopartite_projection: given graph is not truly bipartite.'
|
||||
|
||||
# Swapping so n1 is from part to keep
|
||||
if p2 == part:
|
||||
if p2 == keep:
|
||||
n1, n2 = n2, n1
|
||||
|
||||
vectors[n1][n2] += w
|
||||
|
||||
norms = {}
|
||||
inverted_index = defaultdict(list)
|
||||
nodes = list(vectors)
|
||||
# inverted_index = defaultdict(list)
|
||||
|
||||
for node, vector in vectors.items():
|
||||
monopartite.add_node(node, attr_dict=bipartite.nodes[node])
|
||||
monopartite.add_node(node, **bipartite.nodes[node])
|
||||
s = 0
|
||||
|
||||
for neighbor, w in vector.items():
|
||||
s += w
|
||||
inverted_index[neighbor].append(node)
|
||||
s += w * w
|
||||
# inverted_index[neighbor].append(node)
|
||||
|
||||
norms[node] = s
|
||||
norms[node] = math.sqrt(s)
|
||||
|
||||
# print(inverted_index)
|
||||
# Quadratic version
|
||||
l = len(nodes)
|
||||
|
||||
for i, n1 in enumerate(nodes):
|
||||
norm1 = norms[n1]
|
||||
vector1 = vectors[n1]
|
||||
|
||||
for j in range(i + 1, l):
|
||||
n2 = nodes[j]
|
||||
norm2 = norms[n2]
|
||||
vector2 = vectors[n2]
|
||||
|
||||
w = sparse_dot_product(vector1, vector2)
|
||||
|
||||
if w == 0:
|
||||
continue
|
||||
|
||||
# NOTE: at this point, both norms should be > 0, so no need to test
|
||||
w = w / (norm1 * norm2)
|
||||
|
||||
if threshold is None or w >= threshold:
|
||||
monopartite.add_edge(n1, n2, weight=w)
|
||||
|
||||
return monopartite
|
||||
|
|
Loading…
Reference in New Issue