import torch
from torch_geometric.utils import to_networkx, degree, to_dense_adj, to_scipy_sparse_matrix
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from scipy import sparse as sp
import dgl
import numpy as np
import networkx as nx
def convert_to_nodeDegreeFeatures(graphs):
graph_infos = []
maxdegree = 0
for i, graph in enumerate(graphs):
g = to_networkx(graph, to_undirected=True)
gdegree = max(dict(g.degree).values())
if gdegree > maxdegree:
maxdegree = gdegree
graph_infos.append((graph, g.degree, graph.num_nodes)) # (graph, node_degrees, num_nodes)
new_graphs = []
for i, tuple in enumerate(graph_infos):
idx, x = tuple[0].edge_index[0], tuple[0].x
deg = degree(idx, tuple[2], dtype=torch.long)
deg = F.one_hot(deg, num_classes=maxdegree + 1).to(torch.float)
new_graph = tuple[0].clone()
new_graph.__setitem__('x', deg)
new_graphs.append(new_graph)
return new_graphs
def get_maxDegree(graphs):
maxdegree = 0
for i, graph in enumerate(graphs):
g = to_networkx(graph, to_undirected=True)
gdegree = max(dict(g.degree).values())
if gdegree > maxdegree:
maxdegree = gdegree
return maxdegree
def use_node_attributes(graphs):
num_node_attributes = graphs.num_node_attributes
new_graphs = []
for i, graph in enumerate(graphs):
new_graph = graph.clone()
new_graph.__setitem__('x', graph.x[:, :num_node_attributes])
new_graphs.append(new_graph)
return new_graphs
def split_data(graphs, train=None, test=None, shuffle=True, seed=None):
y = torch.cat([graph.y for graph in graphs])
graphs_tv, graphs_test = train_test_split(graphs, train_size=train, test_size=test, stratify=y, shuffle=shuffle, random_state=seed)
return graphs_tv, graphs_test
def get_numGraphLabels(dataset):
s = set()
for g in dataset:
s.add(g.y.item())
return len(s)
def _get_avg_nodes_edges(graphs):
numNodes = 0.
numEdges = 0.
numGraphs = len(graphs)
for g in graphs:
numNodes += g.num_nodes
numEdges += g.num_edges / 2. # undirected
return numNodes/numGraphs, numEdges/numGraphs
def get_stats(df, ds, graphs_train, graphs_val=None, graphs_test=None):
df.loc[ds, "#graphs_train"] = len(graphs_train)
avgNodes, avgEdges = _get_avg_nodes_edges(graphs_train)
df.loc[ds, 'avgNodes_train'] = avgNodes
df.loc[ds, 'avgEdges_train'] = avgEdges
if graphs_val:
df.loc[ds, '#graphs_val'] = len(graphs_val)
avgNodes, avgEdges = _get_avg_nodes_edges(graphs_val)
df.loc[ds, 'avgNodes_val'] = avgNodes
df.loc[ds, 'avgEdges_val'] = avgEdges
if graphs_test:
df.loc[ds, '#graphs_test'] = len(graphs_test)
avgNodes, avgEdges = _get_avg_nodes_edges(graphs_test)
df.loc[ds, 'avgNodes_test'] = avgNodes
df.loc[ds, 'avgEdges_test'] = avgEdges
return df
def init_structure_encoding(args, gs, type_init):
if type_init == 'rw':
for g in gs:
# Geometric diffusion features with Random Walk
A = to_scipy_sparse_matrix(g.edge_index, num_nodes=g.num_nodes)
D = (degree(g.edge_index[0], num_nodes=g.num_nodes) ** -1.0).numpy()
Dinv=sp.diags(D)
RW=A*Dinv
M=RW
SE_rw=[torch.from_numpy(M.diagonal()).float()]
M_power=M
for _ in range(args.n_rw-1):
M_power=M_power*M
SE_rw.append(torch.from_numpy(M_power.diagonal()).float())
SE_rw=torch.stack(SE_rw,dim=-1)
g['stc_enc'] = SE_rw
elif type_init == 'dg':
for g in gs:
# PE_degree
g_dg = (degree(g.edge_index[0], num_nodes=g.num_nodes)).numpy().clip(1, args.n_dg)
SE_dg = torch.zeros([g.num_nodes, args.n_dg])
for i in range(len(g_dg)):
SE_dg[i,int(g_dg[i]-1)] = 1
g['stc_enc'] = SE_dg
elif type_init == 'rw_dg':
for g in gs:
# SE_rw
A = to_scipy_sparse_matrix(g.edge_index, num_nodes=g.num_nodes)
D = (degree(g.edge_index[0], num_nodes=g.num_nodes) ** -1.0).numpy()
Dinv=sp.diags(D)
RW=A*Dinv
M=RW
SE=[torch.from_numpy(M.diagonal()).float()]
M_power=M
for _ in range(args.n_rw-1):
M_power=M_power*M
SE.append(torch.from_numpy(M_power.diagonal()).float())
SE_rw=torch.stack(SE,dim=-1)
# PE_degree
g_dg = (degree(g.edge_index[0], num_nodes=g.num_nodes)).numpy().clip(1, args.n_dg)
SE_dg = torch.zeros([g.num_nodes, args.n_dg])
for i in range(len(g_dg)):
SE_dg[i,int(g_dg[i]-1)] = 1
g['stc_enc'] = torch.cat([SE_rw, SE_dg], dim=1)
return gs