MIA-GCL / CCA-SSG / dataset.py
dataset.py
Raw
import numpy as np
import torch as th

import torch
import dgl

from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
from dgl.data import AmazonCoBuyPhotoDataset, AmazonCoBuyComputerDataset
from dgl.data import CoauthorCSDataset, CoauthorPhysicsDataset

import random
import preprocessing
import networkx as nx
import pickle as pkl

def load(name):
    if name == 'cora':
        dataset = CoraGraphDataset()
    elif name == 'citeseer':
        dataset = CiteseerGraphDataset()
    elif name == 'pubmed':
        dataset = PubmedGraphDataset()
    elif name == 'photo':
        dataset = AmazonCoBuyPhotoDataset()
    elif name == 'comp':
        dataset = AmazonCoBuyComputerDataset()
    elif name == 'cs':
        dataset = CoauthorCSDataset()
    elif name == 'physics':
        dataset = CoauthorPhysicsDataset()

    graph = dataset[0]
    citegraph = ['cora', 'citeseer', 'pubmed']
    cograph = ['photo', 'comp', 'cs', 'physics']

    if name in citegraph:
        train_mask = graph.ndata.pop('train_mask')
        val_mask = graph.ndata.pop('val_mask')
        test_mask = graph.ndata.pop('test_mask')

        train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
        val_idx = th.nonzero(val_mask, as_tuple=False).squeeze()
        test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()

    if name in cograph:
        train_ratio = 0.1
        val_ratio = 0.1
        test_ratio = 0.8

        N = graph.number_of_nodes()
        train_num = int(N * train_ratio)
        val_num = int(N * (train_ratio + val_ratio))

        idx = np.arange(N)
        np.random.shuffle(idx)

        train_idx = idx[:train_num]
        val_idx = idx[train_num:val_num]
        test_idx = idx[val_num:]

        train_idx = th.tensor(train_idx)
        val_idx = th.tensor(val_idx)
        test_idx = th.tensor(test_idx)

    num_class = dataset.num_classes
    feat = graph.ndata.pop('feat')
    labels = graph.ndata.pop('label')

    return graph, feat, labels, num_class, train_idx, val_idx, test_idx

    # edge_mask_rate=0.3
    #
    # edges=graph.edges()
    #
    # g = nx.Graph()
    # g.add_edges_from(edges)
    # adj_sparse = nx.to_scipy_sparse_matrix(g)
    # random.seed(42)
    # train_test_split = preprocessing.mask_test_edges(adj_sparse, test_frac=.3, val_frac=0)
    # adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = train_test_split  # Unpack train-test split
    # # print(adj_train)
    # g_train0 = nx.from_scipy_sparse_matrix(
    #     adj_train)  # new graph object with only non-hidden edges, keep all the original nodes
    #
    # edge_tuples0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in g_train0.edges()]
    # # print(edge_tuples0)
    #
    # train_edges0 = set(edge_tuples0)  # initialize train_edges to have all edges
    # train_edges0 = np.array([list(edge_tuple) for edge_tuple in train_edges0])
    # # print(train_edges1)
    #
    #
    # edge_tuples_test0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in test_edges]
    #
    # edges_test0 = set(edge_tuples_test0)  # initialize test_edges to have all edges
    # edges_test0 = np.array([list(edge_tuple) for edge_tuple in edges_test0])
    #
    # test_edges0 = edges_test0
    #
    # res_dir = '%s-ccassg-mia-mi' % (name)
    #
    # out = open('%s/%s-edges-train.txt' % (res_dir, name), 'w')
    # for item in train_edges0:
    #     for jtem in item:
    #         out.write(str(jtem) + '\t')
    #     out.write('\n')
    # out.close()
    #
    # out = open('%s/%s-edges-test.txt' % (res_dir, name), 'w')
    # for item in edges_test0:
    #     for jtem in item:
    #         out.write(str(jtem) + '\t')
    #     out.write('\n')
    # out.close()
    #
    # # adj = adj_train
    # #
    # # adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
    # train_edges_1 = np.concatenate((train_edges0[:, 1].reshape(-1, 1), train_edges0[:, 0].reshape(-1, 1)), axis=1)
    # train_edges_1 = (np.array(train_edges_1))
    # train_edges_2 = (np.array(train_edges0))
    # test_edges_1 = np.concatenate((test_edges0[:, 1].reshape(-1, 1), test_edges0[:, 0].reshape(-1, 1)), axis=1)
    # test_edges_1 = (np.array(test_edges_1))
    # test_edges_2 = (np.array(test_edges0))
    #
    # edges_train_index = np.concatenate((train_edges_1, train_edges_2), axis=0)
    #
    # edges_test_index = np.concatenate((test_edges_1, test_edges_2), axis=0)
    #
    # # graph.edges=edges_train_index
    #
    #
    #
    # if name in citegraph:
    #     train_mask = graph.ndata.pop('train_mask')
    #     val_mask = graph.ndata.pop('val_mask')
    #     test_mask = graph.ndata.pop('test_mask')
    #
    #     train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
    #     val_idx = th.nonzero(val_mask, as_tuple=False).squeeze()
    #     test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()
    #
    # if name in cograph:
    #     train_ratio = 0.1
    #     val_ratio = 0.1
    #     test_ratio = 0.8
    #
    #     N = graph.number_of_nodes()
    #     train_num = int(N * train_ratio)
    #     val_num = int(N * (train_ratio + val_ratio))
    #
    #     idx = np.arange(N)
    #     np.random.shuffle(idx)
    #
    #     train_idx = idx[:train_num]
    #     val_idx = idx[train_num:val_num]
    #     test_idx = idx[val_num:]
    #
    #     train_idx = th.tensor(train_idx)
    #     val_idx = th.tensor(val_idx)
    #     test_idx = th.tensor(test_idx)
    #
    # num_class = dataset.num_classes
    # feat = graph.ndata.pop('feat')
    # labels = graph.ndata.pop('label')
    #
    # return graph, feat, labels, num_class, train_idx, val_idx, test_idx



def load_mia(name):
    if name == 'cora':
        dataset = CoraGraphDataset()
    elif name == 'citeseer':
        dataset = CiteseerGraphDataset()
    elif name == 'pubmed':
        dataset = PubmedGraphDataset()
    elif name == 'photo':
        dataset = AmazonCoBuyPhotoDataset()
    elif name == 'comp':
        dataset = AmazonCoBuyComputerDataset()
    elif name == 'cs':
        dataset = CoauthorCSDataset()
    elif name == 'physics':
        dataset = CoauthorPhysicsDataset()

    graph = dataset[0]
    citegraph = ['cora', 'citeseer', 'pubmed']
    cograph = ['photo', 'comp', 'cs', 'physics']

    edge_mask_rate=0.3

    edges=graph.edges()
    print(edges)

    e1=edges[0].numpy()
    e2 = edges[1].numpy()
    _edges=np.concatenate((e1.reshape(-1,1),e2.reshape(-1,1)),axis=1)
    edges_=_edges

    g = nx.Graph()
    g.add_edges_from((edges_))
    adj_sparse = nx.to_scipy_sparse_matrix(g)
    random.seed(42)
    train_test_split = preprocessing.mask_test_edges(adj_sparse, test_frac=.3, val_frac=0)
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = train_test_split  # Unpack train-test split
    # print(adj_train)
    g_train0 = nx.from_scipy_sparse_matrix(
        adj_train)  # new graph object with only non-hidden edges, keep all the original nodes

    edge_tuples0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in g_train0.edges()]
    # print(edge_tuples0)

    train_edges0 = set(edge_tuples0)  # initialize train_edges to have all edges
    train_edges0 = np.array([list(edge_tuple) for edge_tuple in train_edges0])
    # print(train_edges1)


    edge_tuples_test0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in test_edges]

    edges_test0 = set(edge_tuples_test0)  # initialize test_edges to have all edges
    edges_test0 = np.array([list(edge_tuple) for edge_tuple in edges_test0])

    test_edges0 = edges_test0

    res_dir = '%s-ccassg-mia' % (name)

    out = open('%s/%s-edges-train.txt' % (res_dir, name), 'w')
    for item in train_edges0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    out = open('%s/%s-edges-test.txt' % (res_dir, name), 'w')
    for item in edges_test0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    # adj = adj_train
    #
    # adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
    train_edges_1 = np.concatenate((train_edges0[:, 1].reshape(-1, 1), train_edges0[:, 0].reshape(-1, 1)), axis=1)
    train_edges_1 = np.transpose(np.array(train_edges_1))
    train_edges_2 = np.transpose(np.array(train_edges0))
    test_edges_1 = np.concatenate((test_edges0[:, 1].reshape(-1, 1), test_edges0[:, 0].reshape(-1, 1)), axis=1)
    test_edges_1 = np.transpose(np.array(test_edges_1))
    test_edges_2 = np.transpose(np.array(test_edges0))

    edges_train_index = np.concatenate((train_edges_1, train_edges_2), axis=1)

    edges_test_index = np.concatenate((test_edges_1, test_edges_2), axis=1)

    print(graph.edges)

    # graph.edges=[]
    # print('@@@@ ',graph.edges)

    # graph.add_edges(edges_train_index)



    if name in citegraph:
        train_mask = graph.ndata.pop('train_mask')
        val_mask = graph.ndata.pop('val_mask')
        test_mask = graph.ndata.pop('test_mask')

        train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
        val_idx = th.nonzero(val_mask, as_tuple=False).squeeze()
        test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()

    if name in cograph:
        train_ratio = 0.1
        val_ratio = 0.1
        test_ratio = 0.8

        N = graph.number_of_nodes()
        train_num = int(N * train_ratio)
        val_num = int(N * (train_ratio + val_ratio))

        idx = np.arange(N)
        np.random.shuffle(idx)

        train_idx = idx[:train_num]
        val_idx = idx[train_num:val_num]
        test_idx = idx[val_num:]

        train_idx = th.tensor(train_idx)
        val_idx = th.tensor(val_idx)
        test_idx = th.tensor(test_idx)

    num_class = dataset.num_classes
    feat = graph.ndata.pop('feat')
    labels = graph.ndata.pop('label')


    edges_src = torch.from_numpy(edges_train_index[0])
    edges_dst = torch.from_numpy(edges_train_index[1])

    graph0 = dgl.graph((edges_src, edges_dst), num_nodes=g.number_of_nodes())
    graph0.ndata['feat'] = feat
    graph0.ndata['label'] = labels
    # graph0.edata['weight'] = edge_features

    print(feat, labels)
    print(feat.type(), labels.type())
    # exit()

    return graph0, feat, labels, num_class, train_idx, val_idx, test_idx,train_edges0,edges_test0


def load_DE(dt):
    name=dt

    feat_dir = './data/' + dt + '-adj-feat.pkl'

    f2 = open(feat_dir, 'rb')

    adj, ft, labels = pkl.load(f2, encoding='latin1')

    g = nx.Graph(adj)

    x = ft
    num_features = np.shape(ft)[1]

    x = torch.from_numpy(np.array(x)).float()

    graph = g

    edge_mask_rate=0.3

    edges=graph.edges()
    print(edges)

    edges_all = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in g.edges()]

    edges_all = set(edges_all)  # initialize train_edges to have all edges
    edges_ = np.array([list(edge_tuple) for edge_tuple in edges_all])


    # e1=edges[0].numpy()
    # e2 = edges[1].numpy()
    # _edges=np.concatenate((e1.reshape(-1,1),e2.reshape(-1,1)),axis=1)
    # edges_=_edges

    g = nx.Graph()
    g.add_edges_from((edges_))
    adj_sparse = nx.to_scipy_sparse_matrix(g)
    random.seed(42)
    train_test_split = preprocessing.mask_test_edges(adj_sparse, test_frac=.3, val_frac=0)
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = train_test_split  # Unpack train-test split
    # print(adj_train)
    g_train0 = nx.from_scipy_sparse_matrix(
        adj_train)  # new graph object with only non-hidden edges, keep all the original nodes

    edge_tuples0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in g_train0.edges()]
    # print(edge_tuples0)

    train_edges0 = set(edge_tuples0)  # initialize train_edges to have all edges
    train_edges0 = np.array([list(edge_tuple) for edge_tuple in train_edges0])
    # print(train_edges1)


    edge_tuples_test0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in test_edges]

    edges_test0 = set(edge_tuples_test0)  # initialize test_edges to have all edges
    edges_test0 = np.array([list(edge_tuple) for edge_tuple in edges_test0])

    test_edges0 = edges_test0

    res_dir = '%s-ccassg-mia-mi' % (name)

    out = open('%s/%s-edges-train.txt' % (res_dir, name), 'w')
    for item in train_edges0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    out = open('%s/%s-edges-test.txt' % (res_dir, name), 'w')
    for item in edges_test0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    # adj = adj_train
    #
    # adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
    train_edges_1 = np.concatenate((train_edges0[:, 1].reshape(-1, 1), train_edges0[:, 0].reshape(-1, 1)), axis=1)
    train_edges_1 = np.transpose(np.array(train_edges_1))
    train_edges_2 = np.transpose(np.array(train_edges0))
    test_edges_1 = np.concatenate((test_edges0[:, 1].reshape(-1, 1), test_edges0[:, 0].reshape(-1, 1)), axis=1)
    test_edges_1 = np.transpose(np.array(test_edges_1))
    test_edges_2 = np.transpose(np.array(test_edges0))

    edges_train_index = np.concatenate((train_edges_1, train_edges_2), axis=1)

    edges_test_index = np.concatenate((test_edges_1, test_edges_2), axis=1)

    print(graph.edges)

    # graph.edges=[]
    # print('@@@@ ',graph.edges)

    train_ratio = 0.7
    val_ratio = 0.1
    test_ratio = 0.2

    N = graph.number_of_nodes()
    train_num = int(N * train_ratio)
    val_num = int(N * (train_ratio + val_ratio))

    idx = np.arange(N)
    np.random.shuffle(idx)

    train_idx = idx[:train_num]
    val_idx = idx[train_num:val_num]
    test_idx = idx[val_num:]

    train_idx = th.tensor(train_idx)
    val_idx = th.tensor(val_idx)
    test_idx = th.tensor(test_idx)

    num_class = max(labels)+1
    feat = x

    edges_src = torch.from_numpy(edges_train_index[0])
    edges_dst = torch.from_numpy(edges_train_index[1])

    labels = torch.from_numpy(labels).long()


    graph0 = dgl.graph((edges_src, edges_dst), num_nodes=g.number_of_nodes())
    graph0.ndata['feat'] = feat
    graph0.ndata['label'] = labels
    # graph0.edata['weight'] = edge_features

    return graph0, feat, labels, num_class, train_idx, val_idx, test_idx,train_edges0,edges_test0,res_dir


def load_chemistry(dt):
    name=dt

    feat_dir = '../data/' + dt + '-adj-feat.pkl'
    # feat_dir = '/Users/xiulingwang/Downloads/line-master/26-adj-feat.pkl'

    f2 = open(feat_dir, 'rb')

    adj, ft,labels = pkl.load(f2, encoding='latin1')

    # print(ft)
    x=ft

    g = nx.Graph(adj)
    print(g.number_of_nodes(),g.number_of_edges())

    for ii in range(np.shape(x)[0]):
        if np.sum(x[ii])!=0:

            x[ii]=x[ii]/np.sum(x[ii])


    print(set(list(labels)))

    for cls in set(list(labels)):
        print('KKKKK', cls, len(np.where(labels == cls)[0]))

    labels = torch.from_numpy(labels).long()

    x = torch.from_numpy(np.array(x)).float()



    edges_all = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in g.edges()]

    edges_all = set(edges_all)  # initialize train_edges to have all edges
    edges_ = np.array([list(edge_tuple) for edge_tuple in edges_all])


    # e1=edges[0].numpy()
    # e2 = edges[1].numpy()
    # _edges=np.concatenate((e1.reshape(-1,1),e2.reshape(-1,1)),axis=1)
    # edges_=_edges

    g = nx.Graph()

    g.add_edges_from((edges_))

    adj_sparse = nx.to_scipy_sparse_matrix(g)
    random.seed(42)
    train_test_split = preprocessing.mask_test_edges(adj_sparse, test_frac=.3, val_frac=0)
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = train_test_split  # Unpack train-test split
    # print(adj_train)
    g_train0 = nx.from_scipy_sparse_matrix(
        adj_train)  # new graph object with only non-hidden edges, keep all the original nodes

    edge_tuples0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in g_train0.edges()]
    # print(edge_tuples0)

    train_edges0 = set(edge_tuples0)  # initialize train_edges to have all edges
    train_edges0 = np.array([list(edge_tuple) for edge_tuple in train_edges0])
    # print(train_edges1)


    edge_tuples_test0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in test_edges]

    edges_test0 = set(edge_tuples_test0)  # initialize test_edges to have all edges
    edges_test0 = np.array([list(edge_tuple) for edge_tuple in edges_test0])

    test_edges0 = edges_test0

    res_dir = '%s-ccassg-mia-mi' % (name)

    out = open('%s/%s-edges-train.txt' % (res_dir, name), 'w')
    for item in train_edges0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    out = open('%s/%s-edges-test.txt' % (res_dir, name), 'w')
    for item in edges_test0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    # adj = adj_train
    #
    # adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
    train_edges_1 = np.concatenate((train_edges0[:, 1].reshape(-1, 1), train_edges0[:, 0].reshape(-1, 1)), axis=1)
    train_edges_1 = np.transpose(np.array(train_edges_1))
    train_edges_2 = np.transpose(np.array(train_edges0))
    test_edges_1 = np.concatenate((test_edges0[:, 1].reshape(-1, 1), test_edges0[:, 0].reshape(-1, 1)), axis=1)
    test_edges_1 = np.transpose(np.array(test_edges_1))
    test_edges_2 = np.transpose(np.array(test_edges0))

    edges_train_index = np.concatenate((train_edges_1, train_edges_2), axis=1)

    edges_test_index = np.concatenate((test_edges_1, test_edges_2), axis=1)

    print(g.edges)

    # graph.edges=[]
    # print('@@@@ ',graph.edges)

    train_ratio = 0.7
    val_ratio = 0.1
    test_ratio = 0.2

    N = g.number_of_nodes()
    train_num = int(N * train_ratio)
    val_num = int(N * (train_ratio + val_ratio))

    idx = np.arange(N)
    np.random.shuffle(idx)

    train_idx = idx[:train_num]
    val_idx = idx[train_num:val_num]
    test_idx = idx[val_num:]

    train_idx = th.tensor(train_idx)
    val_idx = th.tensor(val_idx)
    test_idx = th.tensor(test_idx)

    num_class = max(labels)+1
    feat = x

    edges_src = torch.from_numpy(edges_train_index[0])
    edges_dst = torch.from_numpy(edges_train_index[1])

    labels = torch.from_numpy(labels).long()


    graph0 = dgl.graph((edges_src, edges_dst), num_nodes=g.number_of_nodes())
    graph0.ndata['feat'] = feat
    graph0.ndata['label'] = labels
    # graph0.edata['weight'] = edge_features

    return graph0, feat, labels, num_class, train_idx, val_idx, test_idx,train_edges0,edges_test0,res_dir



def load_fb(dt):
    name=dt

    feat_dir = '../data/' + dt + '-adj-feat.pkl'
    # feat_dir = '/Users/xiulingwang/Downloads/line-master/26-adj-feat.pkl'

    f2 = open(feat_dir, 'rb')

    adj, ft = pkl.load(f2, encoding='latin1')

    # print(ft)
    ft=ft


    # gender_idx=0
    # edu_idx=53
    # idx=gender_idx

    # lbs=np.sum(ft[:, idx:idx+1],axis=1)
    # print(ft[:, idx:idx+1])
    # classes = int(np.max(lbs))
    # print(set(list(lbs)))
    # for cls in range(classes+1):
    #     print('KKKKK', cls, len(np.where(lbs == cls)[0]))

    g = nx.Graph(adj)
    print(g.number_of_nodes(),g.number_of_edges())


    if dt=='3980':

        featname_dir = '../data/' + str(dt) + '.featnames'
        # facebook feature map
        f = open(featname_dir)
        featnames = []
        for line in f:
            line = line.strip().split(' ')
            feats = line[1]
            feats = feats.split(';')
            feat = feats[0]
            featnames.append(feat)
        # print(featnames)
        # exit()
        f.close()

        # gender 77, gender 78
        gindex = featnames.index('gender')
        print(gindex)
        x = np.delete(ft, [gindex,gindex+1], axis=1)
        labels = np.sum(ft[:, gindex:gindex+1], axis=1)
        num_features = np.shape(ft)[1] - 2
    elif dt=='combined':
        gender_idx=77
        edu_idx=53
        idx=gender_idx
        gindex=idx
        x = np.delete(ft, [gindex,gindex+1], axis=1)
        labels = np.sum(ft[:, gindex:gindex + 1], axis=1)
        num_features = np.shape(ft)[1] - 2

    elif dt=='dblp-2':
        gindex=0
        x = np.delete(ft, [gindex], axis=1)
        labels = np.sum(ft[:, gindex], axis=1)
        num_features = np.shape(ft)[1] - 1

    elif dt=='pokec':
        gindex=0
    # print(gindex)
        x = np.delete(ft,[gindex],axis=1)
        labels = np.sum(ft[:, gindex], axis=1)
        num_features = np.shape(ft)[1] - 1
    for ii in range(np.shape(x)[0]):
        if np.sum(x[ii])!=0:

            x[ii]=x[ii]/np.sum(x[ii])


    print(set(list(labels)))

    for cls in set(list(labels)):
        print('KKKKK', cls, len(np.where(labels == cls)[0]))

    labels = torch.from_numpy(labels).long()

    x = torch.from_numpy(np.array(x)).float()



    edges_all = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in g.edges()]

    edges_all = set(edges_all)  # initialize train_edges to have all edges
    edges_ = np.array([list(edge_tuple) for edge_tuple in edges_all])


    # e1=edges[0].numpy()
    # e2 = edges[1].numpy()
    # _edges=np.concatenate((e1.reshape(-1,1),e2.reshape(-1,1)),axis=1)
    # edges_=_edges

    g = nx.Graph()

    g.add_edges_from((edges_))

    adj_sparse = nx.to_scipy_sparse_matrix(g)
    random.seed(42)
    train_test_split = preprocessing.mask_test_edges(adj_sparse, test_frac=.3, val_frac=0)
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = train_test_split  # Unpack train-test split
    # print(adj_train)
    g_train0 = nx.from_scipy_sparse_matrix(
        adj_train)  # new graph object with only non-hidden edges, keep all the original nodes

    edge_tuples0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in g_train0.edges()]
    # print(edge_tuples0)

    train_edges0 = set(edge_tuples0)  # initialize train_edges to have all edges
    train_edges0 = np.array([list(edge_tuple) for edge_tuple in train_edges0])
    # print(train_edges1)


    edge_tuples_test0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in test_edges]

    edges_test0 = set(edge_tuples_test0)  # initialize test_edges to have all edges
    edges_test0 = np.array([list(edge_tuple) for edge_tuple in edges_test0])

    test_edges0 = edges_test0

    res_dir = '%s-ccassg-mia-mi' % (name)

    out = open('%s/%s-edges-train.txt' % (res_dir, name), 'w')
    for item in train_edges0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    out = open('%s/%s-edges-test.txt' % (res_dir, name), 'w')
    for item in edges_test0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    # adj = adj_train
    #
    # adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
    train_edges_1 = np.concatenate((train_edges0[:, 1].reshape(-1, 1), train_edges0[:, 0].reshape(-1, 1)), axis=1)
    train_edges_1 = np.transpose(np.array(train_edges_1))
    train_edges_2 = np.transpose(np.array(train_edges0))
    test_edges_1 = np.concatenate((test_edges0[:, 1].reshape(-1, 1), test_edges0[:, 0].reshape(-1, 1)), axis=1)
    test_edges_1 = np.transpose(np.array(test_edges_1))
    test_edges_2 = np.transpose(np.array(test_edges0))

    edges_train_index = np.concatenate((train_edges_1, train_edges_2), axis=1)

    edges_test_index = np.concatenate((test_edges_1, test_edges_2), axis=1)

    print(g.edges)

    # graph.edges=[]
    # print('@@@@ ',graph.edges)

    train_ratio = 0.7
    val_ratio = 0.1
    test_ratio = 0.2

    N = g.number_of_nodes()
    train_num = int(N * train_ratio)
    val_num = int(N * (train_ratio + val_ratio))

    idx = np.arange(N)
    np.random.shuffle(idx)

    train_idx = idx[:train_num]
    val_idx = idx[train_num:val_num]
    test_idx = idx[val_num:]

    train_idx = th.tensor(train_idx)
    val_idx = th.tensor(val_idx)
    test_idx = th.tensor(test_idx)

    num_class = max(labels)+1
    feat = x

    edges_src = torch.from_numpy(edges_train_index[0])
    edges_dst = torch.from_numpy(edges_train_index[1])

    labels = torch.from_numpy(labels).long()


    graph0 = dgl.graph((edges_src, edges_dst), num_nodes=g.number_of_nodes())
    graph0.ndata['feat'] = feat
    graph0.ndata['label'] = labels
    # graph0.edata['weight'] = edge_features

    return graph0, feat, labels, num_class, train_idx, val_idx, test_idx,train_edges0,edges_test0,res_dir


def load_mia_white(name,res_dir):
    if name == 'cora':
        dataset = CoraGraphDataset()
    elif name == 'citeseer':
        dataset = CiteseerGraphDataset()
    elif name == 'pubmed':
        dataset = PubmedGraphDataset()
    elif name == 'photo':
        dataset = AmazonCoBuyPhotoDataset()
    elif name == 'comp':
        dataset = AmazonCoBuyComputerDataset()
    elif name == 'cs':
        dataset = CoauthorCSDataset()
    elif name == 'physics':
        dataset = CoauthorPhysicsDataset()

    graph = dataset[0]

    citegraph = ['cora', 'citeseer', 'pubmed']
    cograph = ['photo', 'comp', 'cs', 'physics']

    edge_mask_rate=0.3

    edges=graph.edges()
    print(edges)

    e1=edges[0].numpy()
    e2 = edges[1].numpy()
    _edges=np.concatenate((e1.reshape(-1,1),e2.reshape(-1,1)),axis=1)
    edges_=_edges

    g = nx.Graph()
    g.add_edges_from((edges_))
    adj_sparse = nx.to_scipy_sparse_matrix(g)
    random.seed(42)
    train_test_split = preprocessing.mask_test_edges(adj_sparse, test_frac=.3, val_frac=0)
    # res_dir = '%s-mvgrl-mia-mi-white-2-%s' % (dt,alpha)
    with open('./%s/%s-train_test_split' % (res_dir, name), 'wb') as f:
        pkl.dump(train_test_split, f)

    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = train_test_split  # Unpack train-test split
    # print(adj_train)
    g_train0 = nx.from_scipy_sparse_matrix(
        adj_train)  # new graph object with only non-hidden edges, keep all the original nodes

    edge_tuples0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in g_train0.edges()]
    # print(edge_tuples0)

    train_edges0 = set(edge_tuples0)  # initialize train_edges to have all edges
    train_edges0 = np.array([list(edge_tuple) for edge_tuple in train_edges0])
    # print(train_edges1)


    edge_tuples_test0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in test_edges]

    edges_test0 = set(edge_tuples_test0)  # initialize test_edges to have all edges
    edges_test0 = np.array([list(edge_tuple) for edge_tuple in edges_test0])

    test_edges0 = edges_test0

    # res_dir = '%s-ccassg-mia' % (name)

    out = open('%s/%s-edges-train.txt' % (res_dir, name), 'w')
    for item in train_edges0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    out = open('%s/%s-edges-test.txt' % (res_dir, name), 'w')
    for item in edges_test0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    # adj = adj_train
    #
    # adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
    train_edges_1 = np.concatenate((train_edges0[:, 1].reshape(-1, 1), train_edges0[:, 0].reshape(-1, 1)), axis=1)
    train_edges_1 = np.transpose(np.array(train_edges_1))
    train_edges_2 = np.transpose(np.array(train_edges0))
    test_edges_1 = np.concatenate((test_edges0[:, 1].reshape(-1, 1), test_edges0[:, 0].reshape(-1, 1)), axis=1)
    test_edges_1 = np.transpose(np.array(test_edges_1))
    test_edges_2 = np.transpose(np.array(test_edges0))

    edges_train_index = np.concatenate((train_edges_1, train_edges_2), axis=1)

    edges_test_index = np.concatenate((test_edges_1, test_edges_2), axis=1)

    print(graph.edges)

    # graph.edges=[]
    # print('@@@@ ',graph.edges)

    # graph.add_edges(edges_train_index)



    if name in citegraph:
        train_mask = graph.ndata.pop('train_mask')
        val_mask = graph.ndata.pop('val_mask')
        test_mask = graph.ndata.pop('test_mask')

        train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
        val_idx = th.nonzero(val_mask, as_tuple=False).squeeze()
        test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()

    if name in cograph:
        train_ratio = 0.1
        val_ratio = 0.1
        test_ratio = 0.8

        N = graph.number_of_nodes()
        train_num = int(N * train_ratio)
        val_num = int(N * (train_ratio + val_ratio))

        idx = np.arange(N)
        np.random.shuffle(idx)

        train_idx = idx[:train_num]
        val_idx = idx[train_num:val_num]
        test_idx = idx[val_num:]

        train_idx = th.tensor(train_idx)
        val_idx = th.tensor(val_idx)
        test_idx = th.tensor(test_idx)

    num_class = dataset.num_classes
    feat = graph.ndata.pop('feat')
    labels = graph.ndata.pop('label')


    edges_src = torch.from_numpy(edges_train_index[0])
    edges_dst = torch.from_numpy(edges_train_index[1])

    graph0 = dgl.graph((edges_src, edges_dst), num_nodes=g.number_of_nodes())
    graph0.ndata['feat'] = feat
    graph0.ndata['label'] = labels
    # graph0.edata['weight'] = edge_features

    print(feat, labels)
    print(feat.type(), labels.type())
    # exit()

    return graph0, feat, labels, num_class, train_idx, val_idx, test_idx,train_edges0,edges_test0

def load_mia_white2(name,res_dir):
    if name == 'cora':
        dataset = CoraGraphDataset()
    elif name == 'citeseer':
        dataset = CiteseerGraphDataset()
    elif name == 'pubmed':
        dataset = PubmedGraphDataset()
    elif name == 'photo':
        dataset = AmazonCoBuyPhotoDataset()
    elif name == 'comp':
        dataset = AmazonCoBuyComputerDataset()
    elif name == 'cs':
        dataset = CoauthorCSDataset()
    elif name == 'physics':
        dataset = CoauthorPhysicsDataset()

    graph = dataset[0]
    citegraph = ['cora', 'citeseer', 'pubmed']
    cograph = ['photo', 'comp', 'cs', 'physics']

    edge_mask_rate=0.3

    print()

    edges=graph.edges()
    print(edges)

    e1=edges[0].numpy()
    e2 = edges[1].numpy()
    _edges=np.concatenate((e1.reshape(-1,1),e2.reshape(-1,1)),axis=1)
    edges_=_edges

    feat = graph.ndata.pop('feat')

    g = nx.Graph()
    g.add_edges_from((edges_))
    g.add_nodes_from((list(range(np.shape(feat)[0]))))
    adj_sparse = nx.to_scipy_sparse_matrix(g)
    random.seed(42)
    train_test_split = preprocessing.mask_test_edges(adj_sparse, test_frac=.3, val_frac=0)
    with open('./%s/%s-train_test_split' % (res_dir, name), 'wb') as f:
        pkl.dump(train_test_split, f)


    f2 = open('./%s/%s-train_test_split' % (res_dir, name), 'rb')
    train_test_split = pkl.load(f2, encoding='latin1')

    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = train_test_split  # Unpack train-test split
    # print(adj_train)
    g_train0 = nx.from_scipy_sparse_matrix(
        adj_train)  # new graph object with only non-hidden edges, keep all the original nodes

    g_train0.add_nodes_from(list(range(g.number_of_nodes())))

    edge_tuples0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in g_train0.edges()]
    # print(edge_tuples0)

    train_edges0 = set(edge_tuples0)  # initialize train_edges to have all edges
    train_edges0 = np.array([list(edge_tuple) for edge_tuple in train_edges0])
    # print(train_edges1)


    edge_tuples_test0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in test_edges]

    edges_test0 = set(edge_tuples_test0)  # initialize test_edges to have all edges
    edges_test0 = np.array([list(edge_tuple) for edge_tuple in edges_test0])

    test_edges0 = edges_test0

    # res_dir = '%s-ccassg-mia' % (name)

    out = open('%s/%s-edges-train.txt' % (res_dir, name), 'w')
    for item in train_edges0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    out = open('%s/%s-edges-test.txt' % (res_dir, name), 'w')
    for item in edges_test0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    # adj = adj_train
    #
    # adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
    train_edges_1 = np.concatenate((train_edges0[:, 1].reshape(-1, 1), train_edges0[:, 0].reshape(-1, 1)), axis=1)
    train_edges_1 = np.transpose(np.array(train_edges_1))
    train_edges_2 = np.transpose(np.array(train_edges0))
    test_edges_1 = np.concatenate((test_edges0[:, 1].reshape(-1, 1), test_edges0[:, 0].reshape(-1, 1)), axis=1)
    test_edges_1 = np.transpose(np.array(test_edges_1))
    test_edges_2 = np.transpose(np.array(test_edges0))

    edges_train_index = np.concatenate((train_edges_1, train_edges_2), axis=1)

    edges_test_index = np.concatenate((test_edges_1, test_edges_2), axis=1)

    print(graph.edges)

    # graph.edges=[]
    # print('@@@@ ',graph.edges)

    # graph.add_edges(edges_train_index)



    if name in citegraph:
        train_mask = graph.ndata.pop('train_mask')
        val_mask = graph.ndata.pop('val_mask')
        test_mask = graph.ndata.pop('test_mask')

        train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
        val_idx = th.nonzero(val_mask, as_tuple=False).squeeze()
        test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()

    if name in cograph:
        train_ratio = 0.1
        val_ratio = 0.1
        test_ratio = 0.8

        N = graph.number_of_nodes()
        train_num = int(N * train_ratio)
        val_num = int(N * (train_ratio + val_ratio))

        idx = np.arange(N)
        np.random.shuffle(idx)

        train_idx = idx[:train_num]
        val_idx = idx[train_num:val_num]
        test_idx = idx[val_num:]

        train_idx = th.tensor(train_idx)
        val_idx = th.tensor(val_idx)
        test_idx = th.tensor(test_idx)

    num_class = dataset.num_classes
    # feat = graph.ndata.pop('feat')
    labels = graph.ndata.pop('label')


    edges_src = torch.from_numpy(edges_train_index[0])
    edges_dst = torch.from_numpy(edges_train_index[1])

    graph0 = dgl.graph((edges_src, edges_dst), num_nodes=g.number_of_nodes())
    graph0.ndata['feat'] = feat
    graph0.ndata['label'] = labels
    # graph0.edata['weight'] = edge_features

    print(feat, labels)
    print(feat.type(), labels.type())
    # exit()

    return graph0, feat, labels, num_class, train_idx, val_idx, test_idx,train_edges0,edges_test0


def load_mia_white_varying_density(name, res_dir,r):
    if name == 'cora':
        dataset = CoraGraphDataset()
    elif name == 'citeseer':
        dataset = CiteseerGraphDataset()
    elif name == 'pubmed':
        dataset = PubmedGraphDataset()
    elif name == 'photo':
        dataset = AmazonCoBuyPhotoDataset()
    elif name == 'comp':
        dataset = AmazonCoBuyComputerDataset()
    elif name == 'cs':
        dataset = CoauthorCSDataset()
    elif name == 'physics':
        dataset = CoauthorPhysicsDataset()

    graph = dataset[0]

    citegraph = ['cora', 'citeseer', 'pubmed']
    cograph = ['photo', 'comp', 'cs', 'physics']

    edge_mask_rate = 0.3

    edges = graph.edges()
    print(edges)

    num_edges= graph.number_of_edges()

    print('111',graph.number_of_nodes())


    graph=dgl.remove_edges(graph,np.array(list(range(0,num_edges))))
    dir = '../PyGCL-main/examples/%s-adj-%s' % (name, r)
    f2 = open(dir, 'rb')
    adj = pkl.load(f2, encoding='latin1')
    g0 = nx.Graph(adj)

    graph= dgl.add_nodes(graph, graph.number_of_nodes())

    # graph.add_nodes_from(list(range(graph.number_of_nodes())))

    graph = dgl.add_edges(graph,edges[0].numpy(),edges[1].numpy())
    graph = dgl.add_edges(graph, edges[1].numpy(), edges[0].numpy())

    # graph.add_edges_from(list(eds))

    edges = graph.edges()

    e1 = edges[0].numpy()
    e2 = edges[1].numpy()
    _edges = np.concatenate((e1.reshape(-1, 1), e2.reshape(-1, 1)), axis=1)
    edges_ = _edges

    g = nx.Graph()
    g.add_edges_from((edges_))
    adj_sparse = nx.to_scipy_sparse_matrix(g)
    random.seed(42)

    train_test_split = preprocessing.mask_test_edges(adj_sparse, test_frac=.3, val_frac=0)

    with open('./%s/%s-train_test_split' % (res_dir, name), 'wb') as f:
        pkl.dump(train_test_split, f)

    # f2 = open('./%s/%s-train_test_split' % (res_dir, name), 'rb')
    # train_test_split = pkl.load(f2, encoding='latin1')

    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = train_test_split  # Unpack train-test split
    # print(adj_train)
    g_train0 = nx.from_scipy_sparse_matrix(
        adj_train)  # new graph object with only non-hidden edges, keep all the original nodes

    edge_tuples0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in g_train0.edges()]
    # print(edge_tuples0)

    train_edges0 = set(edge_tuples0)  # initialize train_edges to have all edges
    train_edges0 = np.array([list(edge_tuple) for edge_tuple in train_edges0])
    # print(train_edges1)


    edge_tuples_test0 = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in test_edges]

    edges_test0 = set(edge_tuples_test0)  # initialize test_edges to have all edges
    edges_test0 = np.array([list(edge_tuple) for edge_tuple in edges_test0])

    test_edges0 = edges_test0

    # res_dir = '%s-ccassg-mia' % (name)

    out = open('%s/%s-edges-train.txt' % (res_dir, name), 'w')
    for item in train_edges0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    out = open('%s/%s-edges-test.txt' % (res_dir, name), 'w')
    for item in edges_test0:
        for jtem in item:
            out.write(str(jtem) + '\t')
        out.write('\n')
    out.close()

    # adj = adj_train
    #
    # adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
    train_edges_1 = np.concatenate((train_edges0[:, 1].reshape(-1, 1), train_edges0[:, 0].reshape(-1, 1)), axis=1)
    train_edges_1 = np.transpose(np.array(train_edges_1))
    train_edges_2 = np.transpose(np.array(train_edges0))
    test_edges_1 = np.concatenate((test_edges0[:, 1].reshape(-1, 1), test_edges0[:, 0].reshape(-1, 1)), axis=1)
    test_edges_1 = np.transpose(np.array(test_edges_1))
    test_edges_2 = np.transpose(np.array(test_edges0))

    edges_train_index = np.concatenate((train_edges_1, train_edges_2), axis=1)

    edges_test_index = np.concatenate((test_edges_1, test_edges_2), axis=1)

    print(graph.edges)

    # graph.edges=[]
    # print('@@@@ ',graph.edges)

    # graph.add_edges(edges_train_index)



    if name in citegraph:
        train_mask = graph.ndata.pop('train_mask')
        val_mask = graph.ndata.pop('val_mask')
        test_mask = graph.ndata.pop('test_mask')

        train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
        val_idx = th.nonzero(val_mask, as_tuple=False).squeeze()
        test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()

    if name in cograph:
        train_ratio = 0.1
        val_ratio = 0.1
        test_ratio = 0.8

        N = graph.number_of_nodes()
        train_num = int(N * train_ratio)
        val_num = int(N * (train_ratio + val_ratio))

        idx = np.arange(N)
        np.random.shuffle(idx)

        train_idx = idx[:train_num]
        val_idx = idx[train_num:val_num]
        test_idx = idx[val_num:]

        train_idx = th.tensor(train_idx)
        val_idx = th.tensor(val_idx)
        test_idx = th.tensor(test_idx)

    num_class = dataset.num_classes
    feat = graph.ndata.pop('feat')
    print(type(feat))
    # labels = graph.ndata.pop('label')
    # print(labels)

    dir = '../PyGCL-main/examples/%s-adj-ft' % (name)
    f2 = open(dir, 'rb')
    _,feat,labels = pkl.load(f2, encoding='latin1')
    feat=torch.from_numpy(feat)
    labels = torch.from_numpy(labels)

    edges_src = torch.from_numpy(edges_train_index[0])
    edges_dst = torch.from_numpy(edges_train_index[1])

    print(g.number_of_nodes(),np.shape(feat))

    graph0 = dgl.graph((edges_src, edges_dst), num_nodes=g.number_of_nodes())
    graph0.ndata['feat'] = feat
    graph0.ndata['label'] = labels
    # graph0.edata['weight'] = edge_features

    print(feat, labels)
    print(feat.type(), labels.type())
    # exit()

    return graph0, feat, labels, num_class, train_idx, val_idx, test_idx, train_edges0, edges_test0