MIA-GCL / CCA-SSG / main-cora-mia-white-lap-defense.py
main-cora-mia-white-lap-defense.py
Raw
import sys

import numpy as np
import networkx as nx
from sklearn.metrics import f1_score,accuracy_score


import pandas as pd

import random
import os
import pickle as pk

import itertools

def readedges(file_name):
    file = open(file_name)

    dataMat = []
    for line in file.readlines():
        curLine = line.strip().split('\t')
        floatLine = list(map(int, curLine))
        # print(floatLine)
        dataMat.append(floatLine)

    embeddings = np.array(dataMat,dtype='int')

    return embeddings


def readedges2(file_name):
    file = open(file_name)

    dataMat = []
    for line in file.readlines():
        curLine = line.strip().split('\t')
        floatLine = list(map(int, curLine))
        # print(floatLine)
        dataMat.append(floatLine)

    # embeddings = np.array(dataMat,dtype='int')

    return dataMat

def get_edge_embeddings2(edge_list, emb_matrixs):
    embs = []
    # i=0
    print(',,,',np.shape(idx_epoches_all))

    for edge in edge_list:
        node1 = int(edge[0])
        node2 = int(edge[1])
        emb=[]
        # print(i)
        # print(idx_epoches_all[i,:])
        # print(len(idx_epoches_all[i,:]))
        # print(emb_matrixs[idx_epoches_all[i],:,:])
        for emb_matrix in emb_matrixs:
            emb1 = emb_matrix[node1]
            #print(np.shape(emb1))
            emb2 = emb_matrix[node2]
            edge_emb = np.multiply(emb1, emb2)
            sim1 = np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2))

            sim2 = np.dot(emb1, emb2)

            sim3 = np.linalg.norm(np.array(emb1) - np.array(emb2))

            #edge_emb = np.array(emb1) + np.array(emb2)
            # print(np.shape(edge_emb))
            emb.append(sim1)
            emb.append(sim2)
        # i+=1
        embs.append(emb)
    embs = np.array(embs)
    return embs


def add_laplace_noise(data_list, u=0, b=2):
    laplace_noise = np.random.laplace(u, b, np.shape(data_list))
    return laplace_noise + data_list

def get_edge_embeddings(edge_list, emb_matrixs,idx_epoches_all ):

    u = 0
    b = 1
    emb_matrixs = add_laplace_noise(np.array(emb_matrixs), u, b)


    embs = []
    i=0
    print(',,,',np.shape(idx_epoches_all))

    for edge in edge_list:
        node1 = int(edge[0])
        node2 = int(edge[1])
        emb=[]
        # print(i)
        # print(idx_epoches_all[i,:])
        # print(len(idx_epoches_all[i,:]))
        # print(emb_matrixs[idx_epoches_all[i],:,:])
        for emb_matrix in emb_matrixs[idx_epoches_all[i],:,:]:
            emb1 = emb_matrix[node1]
            #print(np.shape(emb1))
            emb2 = emb_matrix[node2]
            edge_emb = np.multiply(emb1, emb2)
            sim1 = np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2)+0.000000000000001)

            sim2 = np.dot(emb1, emb2)

            sim3 = np.linalg.norm(np.array(emb1) - np.array(emb2))

            #edge_emb = np.array(emb1) + np.array(emb2)
            # print(np.shape(edge_emb))
            emb.append(sim1)
            emb.append(sim2)
        i+=1
        embs.append(emb)
    embs = np.array(embs)
    return embs

dt='citeseer'
results=[]
rats=[0.2,0.4,0.6,0.8]
rats=[0.4]
for rat in rats:
    res_dir = '%s-ccassg-mia-white-2-%s' % (dt,rat)

    file_name='%s/%s-edges-train.txt' % (res_dir, dt)
    train_edges=readedges(file_name)

    file_name='%s/%s-edges-test.txt' % (res_dir, dt)
    test_edges=readedges(file_name)

    file_name='%s/%s-edges-train_sampled.txt' % (res_dir, dt)
    train_edges_sampled = readedges(file_name)

    file_name ='%s/%s-edges-test_sampled.txt' % (res_dir, dt)
    test_edges_sampled = readedges(file_name)

    f2 = open('./%s/%s-aug1.pkl' % (res_dir, dt), 'rb')
    aug1s = pk.load(f2, encoding='latin1')
    # print(aug1s)
    # print(np.shape(aug1s))
    # exit()
    f2 = open('./%s/%s-aug2.pkl' % (res_dir, dt), 'rb')
    aug2s = pk.load(f2, encoding='latin1')


    f2 = open('./%s/%s-aug1-embed.pkl' % (res_dir, dt), 'rb')
    aug1s_embed = pk.load(f2, encoding='latin1')

    f2 = open('./%s/%s-aug2-embed.pkl' % (res_dir, dt), 'rb')
    aug2s_embed = pk.load(f2, encoding='latin1')

    #name = ['y_score', 'y_test_grd', 'node1', 'node2']
    # graph_path="{}/embed-mlp_sim0.csv".format(res_dir)
    graph_path = "{}/embed-mlp_sim2.csv".format(res_dir)
    data = pd.read_csv(graph_path)
    edges = data.values.tolist()
    edges=np.array(edges,dtype='int')
    print('lll',np.shape(train_edges_sampled),np.shape(test_edges_sampled),np.shape(edges))
    edges_mia = [(min(edge[3], edge[4]), max(edge[3], edge[4]),edge[2]) for edge in edges]
    edges_mia = set(edges_mia)  # initialize test_edges to have all edges
    edges_mia = np.array([list(edge_tuple) for edge_tuple in edges_mia])
    print('###',np.shape(edges_mia))
    # print(edges_mia)


    edges_mia0=np.array(edges_mia)[:,0:2]

    edges_mia=np.array(edges_mia)
    index_pos=np.where(edges_mia[:,2]==1)[0]
    index_neg=np.where(edges_mia[:,2]==0)[0]

    print(len(index_pos),len(index_neg))

    edges_mia_pos0=edges_mia[index_pos]
    edges_mia_neg0=edges_mia[index_neg]

    edges_mia_pos = [[min(edge[0], edge[1]), max(edge[0], edge[1])]for edge in edges_mia_pos0]
    print(np.shape(edges_mia_pos))
    edges_mia_pos_idx=np.array(edges_mia_pos)[:,0]*99999+np.array(edges_mia_pos)[:,1]#pos testing

    edges_mia_neg= [[min(edge[0], edge[1]), max(edge[0], edge[1])]for edge in edges_mia_neg0]#neg testing

    edges_mia_neg_idx=np.array(edges_mia_neg)[:,0]*99999+np.array(edges_mia_neg)[:,1]


    train_edges_sampled_=[[min(edge[0], edge[1]), max(edge[0], edge[1])]for edge in train_edges_sampled]
    test_edges_sampled_=[[min(edge[0], edge[1]), max(edge[0], edge[1])]for edge in test_edges_sampled]

    train_edges_sampled_idx=np.array(train_edges_sampled_)[:,0]*99999+np.array(train_edges_sampled_)[:,1]
    test_edges_sampled_idx=np.array(test_edges_sampled_)[:,0]*99999+np.array(test_edges_sampled_)[:,1]


    train_edges_pos_idx=np.setdiff1d(train_edges_sampled_idx, edges_mia_pos_idx)#pos training
    train_edges_neg_idx=np.setdiff1d(test_edges_sampled_idx, edges_mia_neg_idx)#neg training

    print(len(train_edges_sampled_idx),len(test_edges_sampled_idx),len(train_edges_pos_idx),len(train_edges_neg_idx))
    print(len(train_edges_pos_idx),len(train_edges_neg_idx))
    # # exit()
    #
    aug1s_idx=[]
    for aug in aug1s:
        # print(aug,np.shape(aug))
        aug=aug.T
        aug_=[[min(edge[0], edge[1]), max(edge[0], edge[1])] for edge in aug]
        aug_idx=np.array(aug_)[:,0]*99999+np.array(aug_)[:,1]
        # print('$$$$$$$',np.shape(aug_idx))
        aug1s_idx.append(aug_idx)

    aug2s_idx = []
    for aug in aug2s:
        aug = aug.T
        aug_ = [[min(edge[0], edge[1]), max(edge[0], edge[1])] for edge in aug]
        aug_idx = np.array(aug_)[:, 0] * 99999 + np.array(aug_)[:, 1]
        # print('$$$$$$$', np.shape(aug_idx))
        aug2s_idx.append(aug_idx)

    #
    drop1s_pos_idx=[]
    drop2s_pos_idx=[]
    #
    drop1s_pos_idx_test=[]
    drop2s_pos_idx_test=[]

    for aug_idx in aug1s_idx:
        drop_idx=np.setdiff1d(train_edges_pos_idx,aug_idx)
        drop1s_pos_idx.append(drop_idx)

    for aug_idx in aug2s_idx:
        drop_idx=np.setdiff1d(train_edges_pos_idx,aug_idx)
        drop2s_pos_idx.append(drop_idx)

    # print(drop1s_pos_idx)
    # print(drop2s_pos_idx)

    for aug_idx in aug1s_idx:
        drop_idx=np.setdiff1d(edges_mia_pos_idx,aug_idx)
        drop1s_pos_idx_test.append(drop_idx)

    for aug_idx in aug2s_idx:
        drop_idx=np.setdiff1d(edges_mia_pos_idx,aug_idx)
        drop2s_pos_idx_test.append(drop_idx)


    with open('./%s/%s-drop1s_pos_idx.txt' % (res_dir,dt), 'w') as f:
        for item in drop1s_pos_idx:
            for jtem in item:
                f.write(str(jtem) + '\t')
            f.write('\n')
        f.close()

    with open('./%s/%s-drop2s_pos_idx.txt' % (res_dir,dt), 'w') as f:
        for item in drop2s_pos_idx:
            for jtem in item:
                f.write(str(jtem) + '\t')
            f.write('\n')
        f.close()


    with open('./%s/%s-drop1s_pos_idx_test.txt' % (res_dir,dt), 'w') as f:
        for item in drop1s_pos_idx_test:
            for jtem in item:
                f.write(str(jtem) + '\t')
            f.write('\n')
        f.close()

    with open('./%s/%s-drop2s_pos_idx_test.txt' % (res_dir,dt), 'w') as f:
        for item in drop2s_pos_idx_test:
            for jtem in item:
                f.write(str(jtem) + '\t')
            f.write('\n')
        f.close()

    file_name='./%s/%s-drop1s_pos_idx.txt' % (res_dir,dt)
    drop1s_pos_idx0=readedges2(file_name)
    # print(drop1s_pos_idx)

    file_name='./%s/%s-drop2s_pos_idx.txt' % (res_dir,dt)
    drop2s_pos_idx0=readedges2(file_name)

    print('####',drop1s_pos_idx0[0])

    # print(drop2s_pos_idx0[0])

    # print(drop2s_pos_idx0[0])
    file_name = './%s/%s-drop1s_pos_idx_test.txt' % (res_dir, dt)
    drop1s_pos_idx0_test = readedges2(file_name)
    # print(drop1s_pos_idx)

    file_name = './%s/%s-drop2s_pos_idx_test.txt' % (res_dir, dt)
    drop2s_pos_idx0_test = readedges2(file_name)

    iterations=np.shape(drop2s_pos_idx0)[0]

    iter_ratios=[0.2,0.4,0.6,0.8,1]
    # iter_ratios=[1]

    # results=[]
    for iters in iter_ratios:
        iter_=int(iterations*iters)-1

        drop1s_pos_idx=drop1s_pos_idx0[0:iter_]
        drop2s_pos_idx=drop2s_pos_idx0[0:iter_]

        drop1s_pos_idx_test = drop1s_pos_idx0_test[0:iter_]
        drop2s_pos_idx_test = drop2s_pos_idx0_test[0:iter_]

        drop1s_pos_idx_=list(itertools.chain.from_iterable(drop1s_pos_idx))
        drop2s_pos_idx_=list(itertools.chain.from_iterable(drop2s_pos_idx))

        drop1s_pos_idx_test_ = list(itertools.chain.from_iterable(drop1s_pos_idx_test))
        drop2s_pos_idx_test_ = list(itertools.chain.from_iterable(drop2s_pos_idx_test))

        print(len(drop1s_pos_idx_),len(drop2s_pos_idx_))
        set1=list(set(drop1s_pos_idx_))
        set2=list(set(drop2s_pos_idx_))
        print(len(set1),len(set2))
        set0=list(set(set1+set2))
        # print(set0)
        print(len(set0))
        print(np.shape(test_edges_sampled)[0])
        # exit()
        idx_dic1=dict()
        idx_dic2=dict()
        idx_dic1_=dict()
        idx_dic2_=dict()
        for idx in set0:
            idx_dic1[idx]=0
            idx_dic2[idx] = 0
            idx_dic1_[idx]=[]
            idx_dic2_[idx] = []

        i=0
        for idx in drop1s_pos_idx:
            for j in idx:
                idx_dic1[j]+=1
                idx_dic1_[j].append(i)
            i+=1

        i=0
        for idx in drop2s_pos_idx:
            for j in idx:
                idx_dic2[j]+=1
                idx_dic2_[j].append(i)
            i += 1

        print(min(idx_dic1.values()),max(idx_dic1.values()))
        print(min(idx_dic2.values()),max(idx_dic2.values()))

        # print(idx_dic1,idx_dic2)
        idx_dic0=[]
        for idx in set0:
            idx_dic0.append(idx_dic1[idx]+idx_dic2[idx])
        # print(idx_dic0)
        print(min(idx_dic0),max(idx_dic0))

        train_edges_pos=[]
        train_edges_neg=[]
        for i in train_edges_pos_idx:
            node1=int(i/99999)
            node2=i%99999
            train_edges_pos.append([node1,node2])

        for i in train_edges_neg_idx:
            node1=int(i/99999)
            node2=i%99999
            train_edges_neg.append([node1,node2])

        test_edges_pos=np.array(edges_mia_pos)
        test_edges_neg=np.array(edges_mia_neg)

        epoches=np.shape(aug1s_embed)[0]
        idx_epoches=list(range(epoches))

        idx_epoches_all=[]
        drop_idx_all = []
        for i in train_edges_pos_idx:

            if i in idx_dic1_.keys():

                drop_idx=idx_dic1_[i]
                idx_epoches_ = list(set(idx_epoches).difference(set(drop_idx)))
                if len(drop_idx)<max(idx_dic1.values()):
                    drop_idx_sample2 = random.sample(idx_epoches_, (epoches-max(idx_dic1.values()) - len(drop_idx)))

                    drop_idx_sample=random.sample(idx_epoches_, (max(idx_dic1.values())-len(drop_idx)))
                    idx_epoches_ = list(set(idx_epoches_).difference(set(drop_idx_sample)))

                    drop_idx_ = list(drop_idx) + drop_idx_sample2
                else:
                    idx_epoches_ =list(set(idx_epoches_))
                    drop_idx_ = idx_epoches_

            else:
                idx_epoches_ = idx_epoches

                drop_idx_sample = random.sample(idx_epoches_, (max(idx_dic1.values())))

                idx_epoches_ = list(set(idx_epoches).difference(set(drop_idx_sample)))

                drop_idx_ = idx_epoches_

            idx_epoches_all.append(idx_epoches_)

            drop_idx_all.append(drop_idx_)



        set1=list(set(drop1s_pos_idx_test_))
        set2=list(set(drop2s_pos_idx_test_))
        print(len(set1),len(set2))
        set0=list(set(set1+set2))
        # print(set0)
        print(len(set0))
        print(np.shape(test_edges_sampled)[0])
        # exit()
        idx_dic1_test=dict()
        idx_dic2_test=dict()
        idx_dic1_test_=dict()
        idx_dic2_test_=dict()
        for idx in set0:
            idx_dic1_test[idx]=0
            idx_dic2_test[idx] = 0
            idx_dic1_test_[idx]=[]
            idx_dic2_test_[idx] = []

        i=0
        for idx in drop1s_pos_idx_test:
            for j in idx:
                idx_dic1_test[j]+=1
                idx_dic1_test_[j].append(i)
            i+=1

        i=0
        for idx in drop2s_pos_idx_test:
            for j in idx:
                idx_dic2_test[j]+=1
                idx_dic2_test_[j].append(i)
            i += 1

        # print(min(idx_dic1.values()),max(idx_dic1.values()))
        # print(min(idx_dic2.values()),max(idx_dic2.values()))

        # print(idx_dic1,idx_dic2)
        idx_dic0_test=[]
        for idx in set0:
            idx_dic0_test.append(idx_dic1_test[idx]+idx_dic2_test[idx])
        # print(idx_dic0)
        # print(min(idx_dic0),max(idx_dic0))

        train_edges_pos_test=[]
        train_edges_neg_test=[]
        for i in edges_mia_pos_idx:
            node1=int(i/99999)
            node2=i%99999
            train_edges_pos_test.append([node1,node2])

        for i in edges_mia_neg_idx:
            node1=int(i/99999)
            node2=i%99999
            train_edges_neg_test.append([node1,node2])

        test_edges_pos=np.array(edges_mia_pos)
        test_edges_neg=np.array(edges_mia_neg)

        # epoches=np.shape(aug1s_embed)[0]
        # idx_epoches=list(range(epoches))

        idx_epoches_all_test=[]
        # drop_idx_all = []
        for i in edges_mia_pos_idx:

            if i in idx_dic1_test_.keys():

                drop_idx=idx_dic1_test_[i]
                idx_epoches_ = list(set(idx_epoches).difference(set(drop_idx)))
                if len(drop_idx)<max(idx_dic1_test.values()):
                    drop_idx_sample2 = random.sample(idx_epoches_, (epoches - max(idx_dic1.values()) - len(drop_idx)))
                    drop_idx_sample=random.sample(idx_epoches_, (max(idx_dic1_test.values())-len(drop_idx)))
                    idx_epoches_test_ = list(set(idx_epoches_).difference(set(drop_idx_sample)))
                    drop_idx_ = list(drop_idx) + drop_idx_sample2
                else:
                    idx_epoches_test_ =list(set(idx_epoches_))
                    drop_idx_ = idx_epoches_

            else:
                idx_epoches_ = idx_epoches

                drop_idx_sample = random.sample(idx_epoches_, (max(idx_dic1_test.values())))

                idx_epoches_test_ = list(set(idx_epoches).difference(set(drop_idx_sample)))

                drop_idx_ = idx_epoches_

            idx_epoches_all_test.append(idx_epoches_test_)
            # drop_idx_all.append(drop_idx_)





        idx_epoches_all=np.array(idx_epoches_all)
        drop_idx_all = np.array(drop_idx_all)
        train_edges_pos=np.array(train_edges_pos)
        train_edges_neg=np.array(train_edges_neg)

        idx_epoches_all_test = np.array(idx_epoches_all_test)

        print()

        print('iii',np.shape(train_edges_pos),np.shape(train_edges_neg))

        # idx_epoches_all_neg_train=[]
        # idx_epoches_all_pos_test=[]
        # idx_epoches_all_neg_test=[]
        #
        # for j in range(np.shape(train_edges_neg)[0]):
        #     tmp=random.sample(range(np.shape(aug1s_embed)[0]), (np.shape(idx_epoches_all)[1]))
        #     idx_epoches_all_neg_train.append(tmp)
        #
        #
        # # print('%%%',np.shape(train_edges_neg),np.shape(test_edges_neg),np.shape(test_edges_pos))
        #
        # for j in range(np.shape(test_edges_pos)[0]):
        #     tmp=random.sample(range(np.shape(aug1s_embed)[0]), (np.shape(idx_epoches_all)[1]))
        #     idx_epoches_all_pos_test.append(tmp)
        #
        # for j in range(np.shape(test_edges_neg)[0]):
        #     tmp=random.sample(range(np.shape(aug1s_embed)[0]), (np.shape(idx_epoches_all)[1]))
        #     idx_epoches_all_neg_test.append(tmp)

        # idx_epoches_all_neg_train = np.array(idx_epoches_all_neg_train)
        # idx_epoches_all_pos_test = np.array(idx_epoches_all_pos_test)
        # idx_epoches_all_neg_test =  np.array(idx_epoches_all_neg_test)

        y_train_train=np.concatenate((train_edges_pos,np.ones(np.shape(train_edges_pos)[0]).reshape(-1,1)),axis=1)
        y_train_test=np.concatenate((train_edges_neg,np.zeros(np.shape(train_edges_neg)[0]).reshape(-1,1)),axis=1)
        y_test_train=np.concatenate((test_edges_pos,np.ones(np.shape(test_edges_pos)[0]).reshape(-1,1)),axis=1)
        y_test_test=np.concatenate((test_edges_neg,np.zeros(np.shape(test_edges_neg)[0]).reshape(-1,1)),axis=1)

        print(np.shape(train_edges_pos),np.shape(idx_epoches_all),np.shape(aug1s_embed))
        pos_train_edge_embs0 = get_edge_embeddings(train_edges_pos, aug1s_embed,idx_epoches_all)
        neg_train_edge_embs0 = get_edge_embeddings(train_edges_neg, aug1s_embed,drop_idx_all)

        # pos_test_edge_embs0 = get_edge_embeddings(test_edges_pos, aug1s_embed,idx_epoches_all_test)
        # neg_test_edge_embs0 = get_edge_embeddings(test_edges_neg, aug1s_embed,idx_epoches_all_test)

        pos_test_edge_embs0 = get_edge_embeddings(test_edges_pos, aug1s_embed, idx_epoches_all)
        neg_test_edge_embs0 = get_edge_embeddings(test_edges_neg, aug1s_embed,drop_idx_all)


        pos_train_edge_embs1 = get_edge_embeddings(train_edges_pos, aug2s_embed,idx_epoches_all)
        neg_train_edge_embs1 = get_edge_embeddings(train_edges_neg, aug2s_embed,drop_idx_all)

        # pos_test_edge_embs1 = get_edge_embeddings(test_edges_pos, aug2s_embed,idx_epoches_all_test)
        # neg_test_edge_embs1 = get_edge_embeddings(test_edges_neg, aug2s_embed,idx_epoches_all_test)

        pos_test_edge_embs1 = get_edge_embeddings(test_edges_pos, aug2s_embed, idx_epoches_all)
        neg_test_edge_embs1 = get_edge_embeddings(test_edges_neg, aug2s_embed,drop_idx_all)

        X_train = np.concatenate((pos_train_edge_embs0 ,neg_train_edge_embs0), axis=0)
        X_test = np.concatenate((pos_test_edge_embs0 , neg_test_edge_embs0), axis=0)
        y_train = np.concatenate((y_train_train, y_train_test), axis=0)
        y_test = np.concatenate((y_test_train, y_test_test), axis=0)


        # # ######################################################################

        from sklearn import metrics
        from sklearn.neural_network import MLPClassifier

        mlp = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(64, 32, 16), random_state=1,
                            max_iter=1000)

        mlp.fit(X_train, y_train[:, 2])

        print("Training set score: %f" % mlp.score(X_train, y_train[:, 2]))
        print("Test set score: %f" % mlp.score(X_test, y_test[:, 2]))

        y_score = mlp.predict(X_test)
        print(metrics.f1_score(y_test[:, 2], y_score, average='micro'))
        print(metrics.classification_report(y_test[:, 2], y_score, labels=range(3)))

        acc_mlp_sim_embed0 = accuracy_score(y_score, y_test[:, 2])

        tsts = []
        for i in range(len(y_score)):
            node1 = y_test[i][0]
            node2 = y_test[i][1]

            tst = [y_score[i], y_test[i][2], y_test[i][0], y_test[i][1]]
            tsts.append(tst)
        name = ['y_score', 'y_test_grd', 'node1', 'node2']
        result = pd.DataFrame(columns=name, data=tsts)
        result.to_csv("{}/{}-embed-mlp_sim0.csv".format(res_dir, dt))

        # # ######################################################################

        from sklearn.ensemble import RandomForestClassifier

        rf = RandomForestClassifier(max_depth=150, random_state=0)
        rf.fit(X_train, y_train[:, 2])

        print("Training set score: %f" % rf.score(X_train, y_train[:, 2]))
        print("Test set score: %f" % rf.score(X_test, y_test[:, 2]))

        y_score = rf.predict(X_test)
        print(metrics.f1_score(y_test[:, 2], y_score, average='micro'))
        print(metrics.classification_report(y_test[:, 2], y_score, labels=range(3)))

        acc_rf_sim_embed0 = accuracy_score(y_score, y_test[:, 2])

        tsts = []
        for i in range(len(y_score)):
            node1 = y_test[i][0]
            node2 = y_test[i][1]

            tst = [y_score[i], y_test[i][2], y_test[i][0], y_test[i][1]]
            tsts.append(tst)
        name = ['y_score', 'y_test_grd', 'node1', 'node2']

        result = pd.DataFrame(columns=name, data=tsts)
        result.to_csv("{}/{}-embed-rf_sim0.csv".format(res_dir, dt))

        # # ######################################################################

        from sklearn.multiclass import OneVsRestClassifier
        from sklearn.svm import SVC

        svm = OneVsRestClassifier(SVC())
        svm.fit(X_train, y_train[:, 2])

        print("Training set score: %f" % svm.score(X_train, y_train[:, 2]))
        print("Test set score: %f" % svm.score(X_test, y_test[:, 2]))

        y_score = svm.predict(X_test)
        print(metrics.f1_score(y_test[:, 2], y_score, average='micro'))
        print(metrics.classification_report(y_test[:, 2], y_score, labels=range(3)))

        acc_svm_sim_embed0 = accuracy_score(y_score, y_test[:, 2])

        tsts = []
        for i in range(len(y_score)):
            node1 = y_test[i][0]
            node2 = y_test[i][1]

            tst = [y_score[i], y_test[i][2], y_test[i][0], y_test[i][1]]
            tsts.append(tst)
        name = ['y_score', 'y_test_grd', 'node1', 'node2']
        result = pd.DataFrame(columns=name, data=tsts)
        result.to_csv("{}/{}-embed-svm_sim0.csv".format(res_dir, dt))


        #
        X_train = np.concatenate((pos_train_edge_embs1 ,neg_train_edge_embs1), axis=0)
        X_test = np.concatenate((pos_test_edge_embs1 , neg_test_edge_embs1), axis=0)
        y_train = np.concatenate((y_train_train, y_train_test), axis=0)
        y_test = np.concatenate((y_test_train, y_test_test), axis=0)


        # # ######################################################################

        from sklearn import metrics
        from sklearn.neural_network import MLPClassifier

        mlp = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(64, 32, 16), random_state=1,
                            max_iter=1000)

        mlp.fit(X_train, y_train[:, 2])

        print("Training set score: %f" % mlp.score(X_train, y_train[:, 2]))
        print("Test set score: %f" % mlp.score(X_test, y_test[:, 2]))

        y_score = mlp.predict(X_test)
        print(metrics.f1_score(y_test[:, 2], y_score, average='micro'))
        print(metrics.classification_report(y_test[:, 2], y_score, labels=range(3)))

        acc_mlp_sim_embed1 = accuracy_score(y_score, y_test[:, 2])

        tsts = []
        for i in range(len(y_score)):
            node1 = y_test[i][0]
            node2 = y_test[i][1]

            tst = [y_score[i], y_test[i][2], y_test[i][0], y_test[i][1]]
            tsts.append(tst)
        name = ['y_score', 'y_test_grd', 'node1', 'node2']
        result = pd.DataFrame(columns=name, data=tsts)
        result.to_csv("{}/{}-embed-mlp_sim1.csv".format(res_dir, dt))

        # # ######################################################################

        from sklearn.ensemble import RandomForestClassifier

        rf = RandomForestClassifier(max_depth=150, random_state=0)
        rf.fit(X_train, y_train[:, 2])

        print("Training set score: %f" % rf.score(X_train, y_train[:, 2]))
        print("Test set score: %f" % rf.score(X_test, y_test[:, 2]))

        y_score = rf.predict(X_test)
        print(metrics.f1_score(y_test[:, 2], y_score, average='micro'))
        print(metrics.classification_report(y_test[:, 2], y_score, labels=range(3)))

        acc_rf_sim_embed1 = accuracy_score(y_score, y_test[:, 2])

        tsts = []
        for i in range(len(y_score)):
            node1 = y_test[i][0]
            node2 = y_test[i][1]

            tst = [y_score[i], y_test[i][2], y_test[i][0], y_test[i][1]]
            tsts.append(tst)
        name = ['y_score', 'y_test_grd', 'node1', 'node2']

        result = pd.DataFrame(columns=name, data=tsts)
        result.to_csv("{}/{}-embed-rf_sim1.csv".format(res_dir, dt))

        # # ######################################################################

        from sklearn.multiclass import OneVsRestClassifier
        from sklearn.svm import SVC

        svm = OneVsRestClassifier(SVC())
        svm.fit(X_train, y_train[:, 2])

        print("Training set score: %f" % svm.score(X_train, y_train[:, 2]))
        print("Test set score: %f" % svm.score(X_test, y_test[:, 2]))

        y_score = svm.predict(X_test)
        print(metrics.f1_score(y_test[:, 2], y_score, average='micro'))
        print(metrics.classification_report(y_test[:, 2], y_score, labels=range(3)))

        acc_svm_sim_embed1 = accuracy_score(y_score, y_test[:, 2])

        tsts = []
        for i in range(len(y_score)):
            node1 = y_test[i][0]
            node2 = y_test[i][1]

            tst = [y_score[i], y_test[i][2], y_test[i][0], y_test[i][1]]
            tsts.append(tst)
        name = ['y_score', 'y_test_grd', 'node1', 'node2']
        result = pd.DataFrame(columns=name, data=tsts)
        result.to_csv("{}/{}-embed-svm_sim1.csv".format(res_dir, dt))

        pos_train_edge_embs1 = np.concatenate((pos_train_edge_embs0 ,pos_train_edge_embs1), axis=1)
        neg_train_edge_embs1 = np.concatenate((neg_train_edge_embs0 ,neg_train_edge_embs1), axis=1)

        pos_test_edge_embs1 = np.concatenate((pos_test_edge_embs0 ,pos_test_edge_embs1), axis=1)
        neg_test_edge_embs1 = np.concatenate((neg_test_edge_embs0 ,neg_test_edge_embs1), axis=1)

        X_train = np.concatenate((pos_train_edge_embs1 ,neg_train_edge_embs1), axis=0)
        X_test = np.concatenate((pos_test_edge_embs1 , neg_test_edge_embs1), axis=0)
        y_train = np.concatenate((y_train_train, y_train_test), axis=0)
        y_test = np.concatenate((y_test_train, y_test_test), axis=0)


        # # ######################################################################

        from sklearn import metrics
        from sklearn.neural_network import MLPClassifier

        mlp = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(64, 32, 16), random_state=1,
                            max_iter=1000)

        mlp.fit(X_train, y_train[:, 2])

        print("Training set score: %f" % mlp.score(X_train, y_train[:, 2]))
        print("Test set score: %f" % mlp.score(X_test, y_test[:, 2]))

        y_score = mlp.predict(X_test)
        print(metrics.f1_score(y_test[:, 2], y_score, average='micro'))
        print(metrics.classification_report(y_test[:, 2], y_score, labels=range(3)))

        acc_mlp_sim_embed2 = accuracy_score(y_score, y_test[:, 2])

        tsts = []
        for i in range(len(y_score)):
            node1 = y_test[i][0]
            node2 = y_test[i][1]

            tst = [y_score[i], y_test[i][2], y_test[i][0], y_test[i][1]]
            tsts.append(tst)
        name = ['y_score', 'y_test_grd', 'node1', 'node2']
        result = pd.DataFrame(columns=name, data=tsts)
        result.to_csv("{}/{}-embed-mlp_sim2.csv".format(res_dir, dt))

        # # ######################################################################

        from sklearn.ensemble import RandomForestClassifier

        rf = RandomForestClassifier(max_depth=150, random_state=0)
        rf.fit(X_train, y_train[:, 2])

        print("Training set score: %f" % rf.score(X_train, y_train[:, 2]))
        print("Test set score: %f" % rf.score(X_test, y_test[:, 2]))

        y_score = rf.predict(X_test)
        print(metrics.f1_score(y_test[:, 2], y_score, average='micro'))
        print(metrics.classification_report(y_test[:, 2], y_score, labels=range(3)))

        acc_rf_sim_embed2 = accuracy_score(y_score, y_test[:, 2])

        tsts = []
        for i in range(len(y_score)):
            node1 = y_test[i][0]
            node2 = y_test[i][1]

            tst = [y_score[i], y_test[i][2], y_test[i][0], y_test[i][1]]
            tsts.append(tst)
        name = ['y_score', 'y_test_grd', 'node1', 'node2']

        result = pd.DataFrame(columns=name, data=tsts)
        result.to_csv("{}/{}-embed-rf_sim2.csv".format(res_dir, dt))

        # # ######################################################################

        from sklearn.multiclass import OneVsRestClassifier
        from sklearn.svm import SVC

        svm = OneVsRestClassifier(SVC())
        svm.fit(X_train, y_train[:, 2])

        print("Training set score: %f" % svm.score(X_train, y_train[:, 2]))
        print("Test set score: %f" % svm.score(X_test, y_test[:, 2]))

        y_score = svm.predict(X_test)
        print(metrics.f1_score(y_test[:, 2], y_score, average='micro'))
        print(metrics.classification_report(y_test[:, 2], y_score, labels=range(3)))

        acc_svm_sim_embed2 = accuracy_score(y_score, y_test[:, 2])

        tsts = []
        for i in range(len(y_score)):
            node1 = y_test[i][0]
            node2 = y_test[i][1]

            tst = [y_score[i], y_test[i][2], y_test[i][0], y_test[i][1]]
            tsts.append(tst)
        name = ['y_score', 'y_test_grd', 'node1', 'node2']
        result = pd.DataFrame(columns=name, data=tsts)
        result.to_csv("{}/{}-embed-svm_sim2.csv".format(res_dir, dt))

        print(acc_mlp_sim_embed0, acc_rf_sim_embed0, acc_svm_sim_embed0)

        print(acc_mlp_sim_embed1, acc_rf_sim_embed1, acc_svm_sim_embed1)

        print(acc_mlp_sim_embed2, acc_rf_sim_embed2, acc_svm_sim_embed2)

        results.append([acc_mlp_sim_embed0, acc_rf_sim_embed0, acc_svm_sim_embed0,acc_mlp_sim_embed1, acc_rf_sim_embed1, acc_svm_sim_embed1,acc_mlp_sim_embed2, acc_rf_sim_embed2, acc_svm_sim_embed2])

result_all = pd.DataFrame(data=results)
result_all.to_csv("{}/results_all.csv".format(res_dir))