LMIA / deepwalk / deepwalk / deepwalk.py
deepwalk.py
Raw
#! /usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import division
import os
import sys
import random
from io import open
from argparse import ArgumentParser, FileType, ArgumentDefaultsHelpFormatter
from collections import Counter
from concurrent.futures import ProcessPoolExecutor
import logging

from . import graph
from . import walks as serialized_walks
from gensim.models import Word2Vec
from .skipgram import Skipgram

from six import text_type as unicode
from six import iteritems
from six.moves import range
#import tensorflow as tf
import numpy as np
import argparse

import pickle
import time
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve

import psutil
import networkx as nx
import torch
import torch.nn as nn

from word2vec import data_reader, model, trainer

from deepwalk_pytorch import word2vec

from multiprocessing import cpu_count

import pandas as pd

p = psutil.Process(os.getpid())
try:
    p.set_cpu_affinity(list(range(cpu_count())))
except AttributeError:
    try:
        p.cpu_affinity(list(range(cpu_count())))
    except AttributeError:
        pass

logger = logging.getLogger(__name__)
LOGFORMAT = "%(asctime).19s %(levelname)s %(filename)s: %(lineno)s %(message)s"


def debug(type_, value, tb):
  if hasattr(sys, 'ps1') or not sys.stderr.isatty():
    sys.__excepthook__(type_, value, tb)
  else:
    import traceback
    import pdb
    traceback.print_exception(type_, value, tb)
    print(u"\n")
    pdb.pm()


def write_walks(args, walks,DATASET,METHOD,F,ego_user):
    if ego_user=='3980' or ego_user=='698':
        file_ = open('./' + METHOD + '-walks-' + F + '-' + str(ego_user), 'w')

    else:
        file_ = open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '-walks-' + F + '-' + str(ego_user), 'w')
    for walk in walks:
        line = str()
        for node in walk:
            line += str(node) + ' '
        line += '\n'
        file_.write(line)
    file_.close()

def write_walks2(args, walks,DATASET,METHOD,F,ego_user,res_dir):
    #file_ = open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '-walks-' + F + '-' + str(ego_user), 'w')
    if ego_user=='3980' or ego_user=='698':
        file_ = open('./' + METHOD + '-walks-' + F + '-' + str(ego_user), 'w')
    else:
        file_ = open(res_dir+ METHOD + '-walks-' + F + '-' + str(ego_user), 'w')
    for walk in walks:
        line = str()
        for node in walk:
            line += str(node) + ' '
        line += '\n'
        file_.write(line)
    file_.close()


def process(args,adj_train,DATASET,METHOD,F,ego_user):

  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)


  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  if data_size < args.max_memory_data_size:
    print("Walking...")
    walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks,
                                        path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))

    write_walks(args, walks, DATASET, METHOD, F, ego_user)
    print("Training...")
    model = Word2Vec(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1, hs=1, workers=args.workers)

    training_loss = model.get_latest_training_loss()

  else:
    print("Data size {} is larger than limit (max-memory-data-size: {}).  Dumping walks to disk.".format(data_size, args.max_memory_data_size))
    print("Walking...")

    walks_filebase = args.output + ".walks"
    walk_files = serialized_walks.write_walks_to_disk(G, walks_filebase, num_paths=args.number_walks,
                                         path_length=args.walk_length, alpha=0, rand=random.Random(args.seed),
                                         num_workers=args.workers)

    print("Counting vertex frequency...")
    if not args.vertex_freq_degree:
      vertex_counts = serialized_walks.count_textfiles(walk_files, args.workers)
    else:
      # use degree distribution for frequency in tree
      vertex_counts = G.degree(nodes=G.iterkeys())

    print("Training...")
    walks_corpus = serialized_walks.WalksCorpus(walk_files)
    model = Skipgram(sentences=walks_corpus, vocabulary_counts=vertex_counts,
                     size=args.representation_size,
                     window=args.window_size, min_count=0, trim_rule=None, workers=args.workers)

  model.wv.save_word2vec_format(args.output)

  # exit()



  # Store embeddings mapping
  emb_mappings = model.wv

  # Create node embeddings matrix (rows = nodes, columns = embedding features)
  emb_list = []
  for node_index in range(0, adj_train.shape[0]):
      node_str = str(node_index)
      node_emb = emb_mappings[node_str]
      emb_list.append(node_emb)
  emb_matrix = np.vstack(emb_list)

  with open('E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user), 'w') as f:
      f.write('%d %d\n' % (adj_train.shape[0], args.representation_size))
      for i in range(adj_train.shape[0]):
          e = ' '.join(map(lambda x: str(x), emb_list[i]))
          f.write('%s %s\n' % (str(i), e))

  return emb_matrix



def process_inf_debias(args,adj_train,DATASET,METHOD,F,ego_user):

  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)


  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  if data_size < args.max_memory_data_size:
    print("Walking...")
    walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks,
                                        path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))

    write_walks(args, walks, DATASET, METHOD, F, ego_user)
    print("Training...")
    model = Word2Vec(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1, hs=1, workers=args.workers)

    training_loss = model.get_latest_training_loss()

  else:
    print("Data size {} is larger than limit (max-memory-data-size: {}).  Dumping walks to disk.".format(data_size, args.max_memory_data_size))
    print("Walking...")

    walks_filebase = args.output + ".walks"
    walk_files = serialized_walks.write_walks_to_disk(G, walks_filebase, num_paths=args.number_walks,
                                         path_length=args.walk_length, alpha=0, rand=random.Random(args.seed),
                                         num_workers=args.workers)

    print("Counting vertex frequency...")
    if not args.vertex_freq_degree:
      vertex_counts = serialized_walks.count_textfiles(walk_files, args.workers)
    else:
      # use degree distribution for frequency in tree
      vertex_counts = G.degree(nodes=G.iterkeys())

    print("Training...")
    walks_corpus = serialized_walks.WalksCorpus(walk_files)
    model = Skipgram(sentences=walks_corpus, vocabulary_counts=vertex_counts,
                     size=args.representation_size,
                     window=args.window_size, min_count=0, trim_rule=None, workers=args.workers)

  model.wv.save_word2vec_format(args.output)

  # exit()



  # Store embeddings mapping
  emb_mappings = model.wv

  # Create node embeddings matrix (rows = nodes, columns = embedding features)
  emb_list = []
  for node_index in range(0, adj_train.shape[0]):
      node_str = str(node_index)
      node_emb = emb_mappings[node_str]
      emb_list.append(node_emb)
  emb_matrix = np.vstack(emb_list)

  with open('E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user), 'w') as f:
      f.write('%d %d\n' % (adj_train.shape[0], args.representation_size))
      for i in range(adj_train.shape[0]):
          e = ' '.join(map(lambda x: str(x), emb_list[i]))
          f.write('%s %s\n' % (str(i), e))

  return emb_matrix







def deepwalk_inf_debias(g_train, train_test_split,DATASET, METHOD,res_dir, ego_user, F):
  parser = ArgumentParser("deepwalk",
                          formatter_class=ArgumentDefaultsHelpFormatter,
                          conflict_handler='resolve')

  parser.add_argument("--debug", dest="debug", action='store_true', default=False,
                      help="drop a debugger if an exception is raised.")

  parser.add_argument('--format', default='edgelist',
                      help='File format of input file')

  parser.add_argument('--input', nargs='?',
                      help='Input graph file')

  parser.add_argument("-l", "--log", dest="log", default="INFO",
                      help="log verbosity level")

  parser.add_argument('--matfile-variable-name', default='network',
                      help='variable name of adjacency matrix inside a .mat file.')

  parser.add_argument('--max-memory-data-size', default=1000000000, type=int,
                      help='Size to start dumping walks to disk, instead of keeping them in memory.')

  parser.add_argument('--number-walks', default=10, type=int,
                      help='Number of random walks to start at each node')

  parser.add_argument('--output',
                      help='Output representation file')

  parser.add_argument('--representation-size', default=128, type=int,
                      help='Number of latent dimensions to learn for each node.')

  parser.add_argument('--seed', default=0, type=int,
                      help='Seed for random walk generator.')

  parser.add_argument('--undirected', default=True, type=bool,
                      help='Treat graph as undirected.')

  parser.add_argument('--vertex-freq-degree', default=False, action='store_true',
                      help='Use vertex degree to estimate the frequency of nodes '
                           'in the random walks. This option is faster than '
                           'calculating the vocabulary.')

  parser.add_argument('--walk-length', default=80, type=int,
                      help='Length of the random walk started at each node')

  parser.add_argument('--window-size', default=10, type=int,
                      help='Window size of skipgram model.')

  parser.add_argument('--workers', default=10, type=int,
                      help='Number of parallel processes.')
  parser.add_argument('--edge_score_mode', default='edge-emb')


  args = parser.parse_args()
  args.input='%s/%s-fair-%s-train.txt' % (res_dir, str(ego_user), F)
  args.output = 'E:\\python\\banlance\\code\\'+DATASET+'\\'+METHOD+'-embeds-'+F+'-'+str(ego_user)
  numeric_level = getattr(logging, args.log.upper(), None)
  logging.basicConfig(format=LOGFORMAT)
  logger.setLevel(numeric_level)

  if args.debug:
   sys.excepthook = debug

  adj_train, train_edges, train_edges_false, test_edges, test_edges_false = train_test_split
  g_train=nx.adjacency_matrix(g_train)
  emb_matrix=process_inf_debias(args,g_train,DATASET,METHOD,F,ego_user)

  test_edge_labels, test_preds=linkpre_scores_inf_debias(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, F)

  return test_edge_labels,test_preds





def deepwalk(g_train, train_test_split,DATASET,METHOD,res_dir, ego_user, F):
  parser = ArgumentParser("deepwalk",
                          formatter_class=ArgumentDefaultsHelpFormatter,
                          conflict_handler='resolve')

  parser.add_argument("--debug", dest="debug", action='store_true', default=False,
                      help="drop a debugger if an exception is raised.")

  parser.add_argument('--format', default='edgelist',
                      help='File format of input file')

  parser.add_argument('--input', nargs='?',
                      help='Input graph file')

  parser.add_argument("-l", "--log", dest="log", default="INFO",
                      help="log verbosity level")

  parser.add_argument('--matfile-variable-name', default='network',
                      help='variable name of adjacency matrix inside a .mat file.')

  parser.add_argument('--max-memory-data-size', default=1000000000, type=int,
                      help='Size to start dumping walks to disk, instead of keeping them in memory.')

  parser.add_argument('--number-walks', default=10, type=int,
                      help='Number of random walks to start at each node')

  parser.add_argument('--output',
                      help='Output representation file')

  parser.add_argument('--representation-size', default=128, type=int,
                      help='Number of latent dimensions to learn for each node.')

  parser.add_argument('--seed', default=0, type=int,
                      help='Seed for random walk generator.')

  parser.add_argument('--undirected', default=True, type=bool,
                      help='Treat graph as undirected.')

  parser.add_argument('--vertex-freq-degree', default=False, action='store_true',
                      help='Use vertex degree to estimate the frequency of nodes '
                           'in the random walks. This option is faster than '
                           'calculating the vocabulary.')

  parser.add_argument('--walk-length', default=80, type=int,
                      help='Length of the random walk started at each node')

  parser.add_argument('--window-size', default=10, type=int,
                      help='Window size of skipgram model.')

  parser.add_argument('--workers', default=10, type=int,
                      help='Number of parallel processes.')
  parser.add_argument('--edge_score_mode', default='edge-emb')


  args = parser.parse_args()
  args.input='%s/%s-fair-%s-train.txt' % (res_dir, str(ego_user), F)
  args.output = 'E:\\python\\banlance\\code\\'+DATASET+'\\'+METHOD+'-embeds-'+F+'-'+str(ego_user)
  numeric_level = getattr(logging, args.log.upper(), None)
  logging.basicConfig(format=LOGFORMAT)
  logger.setLevel(numeric_level)

  if args.debug:
   sys.excepthook = debug

  adj_train_orig, train_edges, train_edges_false, val_edges, val_edges_false, \
  test_edges, test_edges_false = train_test_split
  g_train=nx.adjacency_matrix(g_train)
  emb_matrix=process(args,g_train,DATASET,METHOD,F,ego_user)

  n2v_scores, val_edge_labels, val_preds, test_edge_labels, test_preds=linkpre_scores(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, F)

  return n2v_scores, val_edge_labels, val_preds, test_edge_labels, test_preds



def deepwalk1(g_train, train_test_split,DATASET,METHOD,res_dir, ego_user, F):
  parser = ArgumentParser("deepwalk",
                          formatter_class=ArgumentDefaultsHelpFormatter,
                          conflict_handler='resolve')

  parser.add_argument("--debug", dest="debug", action='store_true', default=False,
                      help="drop a debugger if an exception is raised.")

  parser.add_argument('--format', default='edgelist',
                      help='File format of input file')

  parser.add_argument('--input', nargs='?',
                      help='Input graph file')

  parser.add_argument("-l", "--log", dest="log", default="INFO",
                      help="log verbosity level")

  parser.add_argument('--matfile-variable-name', default='network',
                      help='variable name of adjacency matrix inside a .mat file.')

  parser.add_argument('--max-memory-data-size', default=1000000000, type=int,
                      help='Size to start dumping walks to disk, instead of keeping them in memory.')

  parser.add_argument('--number-walks', default=10, type=int,
                      help='Number of random walks to start at each node')

  parser.add_argument('--output',
                      help='Output representation file')

  parser.add_argument('--representation-size', default=128, type=int,
                      help='Number of latent dimensions to learn for each node.')

  parser.add_argument('--seed', default=0, type=int,
                      help='Seed for random walk generator.')

  parser.add_argument('--undirected', default=True, type=bool,
                      help='Treat graph as undirected.')

  parser.add_argument('--vertex-freq-degree', default=False, action='store_true',
                      help='Use vertex degree to estimate the frequency of nodes '
                           'in the random walks. This option is faster than '
                           'calculating the vocabulary.')

  parser.add_argument('--walk-length', default=80, type=int,
                      help='Length of the random walk started at each node')

  parser.add_argument('--window-size', default=10, type=int,
                      help='Window size of skipgram model.')

  parser.add_argument('--workers', default=10, type=int,
                      help='Number of parallel processes.')
  parser.add_argument('--edge_score_mode', default='edge-emb')


  args = parser.parse_args()
  args.input='%s%s-fair-%s-train.txt' % (res_dir, str(ego_user), F)
  args.output = res_dir+METHOD+'-embeds-'+F+'-'+str(ego_user)
  numeric_level = getattr(logging, args.log.upper(), None)
  logging.basicConfig(format=LOGFORMAT)
  logger.setLevel(numeric_level)

  if args.debug:
   sys.excepthook = debug

  adj_train_orig, train_edges, train_edges_false, val_edges, val_edges_false, \
  test_edges, test_edges_false = train_test_split
  g_train=nx.adjacency_matrix(g_train)
  emb_matrix=process(args,g_train,DATASET,METHOD,F,ego_user)

  n2v_scores,train_edge_labels, test_edge_labels, test_preds,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2=linkpre_scores1(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, F)

  return n2v_scores, train_edge_labels,test_edge_labels, test_preds,emb_matrix,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2



def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def linkpre_scores(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, Flag):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = train_test_split
    start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2
    if args.edge_score_mode == "edge-emb":

        def get_edge_embeddings(edge_list,ego_user,DATASET, Flag, flag):
            embs = []
            for edge in edge_list:
                node1 = edge[0]
                node2 = edge[1]
                emb1 = emb_matrix[node1]
                #print(np.shape(emb1))
                emb2 = emb_matrix[node2]
                edge_emb = np.multiply(emb1, emb2)
                #edge_emb = np.array(emb1) + np.array(emb2)
                print(np.shape(edge_emb))
                embs.append(edge_emb)
            embs = np.array(embs)

            # #with open('/Users/xiulingwang/Downloads/line-master/data/embds/' + str(ego_user) + flag + '-' + Flag, 'w') as f:
            # with open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '/embeds/' + Flag + '-' + str(ego_user) + flag,'w') as f:
            # #with open('/Users/xiulingwang/Downloads/'+DATASET+'/line/3-split/' + str(ego_user)+ '-' + flag + '-' + Flag+'-' +'embds','w') as f:
            #     f.write('%d %d\n' % (edge_list.shape[0], args.representation_size))
            #     for i in range(edge_list.shape[0]):
            #         e = ' '.join(map(lambda x: str(x), embs[i]))
            #         f.write('%s %s %s\n' % (str(edge_list[i][0]), str(edge_list[i][1]), e))

            return embs

        # Train-set edge embeddings
        pos_train_edge_embs = get_edge_embeddings(train_edges,ego_user,DATASET, Flag, flag='pos-train')
        neg_train_edge_embs = get_edge_embeddings(train_edges_false, ego_user,DATASET,Flag, flag='neg-train')
        train_edge_embs = np.concatenate([pos_train_edge_embs, neg_train_edge_embs])

        # Create train-set edge labels: 1 = real edge, 0 = false edge
        train_edge_labels = np.concatenate([np.ones(len(train_edges)), np.zeros(len(train_edges_false))])

        # Val-set edge embeddings, labels
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            pos_val_edge_embs = get_edge_embeddings(val_edges,ego_user,DATASET,Flag, flag='pos-val')
            neg_val_edge_embs = get_edge_embeddings(val_edges_false,ego_user,DATASET,Flag, flag='neg-val')
            val_edge_embs = np.concatenate([pos_val_edge_embs, neg_val_edge_embs])
            val_edge_labels = np.concatenate([np.ones(len(val_edges)), np.zeros(len(val_edges_false))])

        # Test-set edge embeddings, labels
        pos_test_edge_embs = get_edge_embeddings(test_edges,ego_user,DATASET,Flag, flag='pos-test')
        neg_test_edge_embs = get_edge_embeddings(test_edges_false,ego_user,DATASET,Flag, flag='neg-test')
        test_edge_embs = np.concatenate([pos_test_edge_embs, neg_test_edge_embs])

        # Create val-set edge labels: 1 = real edge, 0 = false edge
        test_edge_labels = np.concatenate([np.ones(len(test_edges)), np.zeros(len(test_edges_false))])

        # Train logistic regression classifier on train-set edge embeddings
        edge_classifier = LogisticRegression(random_state=0)
        edge_classifier.fit(train_edge_embs, train_edge_labels)

        # Predicted edge scores: probability of being of class "1" (real edge)
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1]
        test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1]
        print(test_preds)
        print(np.shape(test_preds))

        runtime = time.time() - start_time

        # Calculate scores
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            n2v_val_roc = roc_auc_score(val_edge_labels, val_preds)
            # n2v_val_roc_curve = roc_curve(val_edge_labels, val_preds)
            n2v_val_ap = average_precision_score(val_edge_labels, val_preds)
        else:
            n2v_val_roc = None
            n2v_val_roc_curve = None
            n2v_val_ap = None

        n2v_test_roc = roc_auc_score(test_edge_labels, test_preds)
        # n2v_test_roc_curve = roc_curve(test_edge_labels, test_preds)
        n2v_test_ap = average_precision_score(test_edge_labels, test_preds)


    # Generate edge scores using simple dot product of node embeddings (like in GAE paper)
    elif args.edge_score_mode == "dot-product":
        score_matrix = np.dot(emb_matrix, emb_matrix.T)
        runtime = time.time() - start_time

        # Val set scores
        if len(val_edges) > 0:
            n2v_val_roc, n2v_val_ap = get_roc_score(val_edges, val_edges_false, score_matrix, apply_sigmoid=True)
        else:
            n2v_val_roc = None
            n2v_val_roc_curve = None
            n2v_val_ap = None

        # Test set scores
        n2v_test_roc, n2v_test_ap = get_roc_score(test_edges, test_edges_false, score_matrix, apply_sigmoid=True)

    else:
        print
        "Invalid edge_score_mode! Either use edge-emb or dot-product."

    # Record scores
    n2v_scores = {}

    n2v_scores['test_roc'] = n2v_test_roc
    # n2v_scores['test_roc_curve'] = n2v_test_roc_curve
    n2v_scores['test_ap'] = n2v_test_ap

    n2v_scores['val_roc'] = n2v_val_roc
    # n2v_scores['val_roc_curve'] = n2v_val_roc_curve
    n2v_scores['val_ap'] = n2v_val_ap

    n2v_scores['runtime'] = runtime

    return n2v_scores, val_edge_labels, val_preds, test_edge_labels, test_preds



def linkpre_scores_inf_debias(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, Flag):
    adj_train, train_edges, train_edges_false, test_edges, test_edges_false = train_test_split
    start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2
    if args.edge_score_mode == "edge-emb":

        def get_edge_embeddings(edge_list,ego_user,DATASET, Flag, flag):
            embs = []
            for edge in edge_list:
                node1 = edge[0]
                node2 = edge[1]
                emb1 = emb_matrix[node1]
                #print(np.shape(emb1))
                emb2 = emb_matrix[node2]
                edge_emb = np.multiply(emb1, emb2)
                #edge_emb = np.array(emb1) + np.array(emb2)
                print(np.shape(edge_emb))
                embs.append(edge_emb)
            embs = np.array(embs)

            # #with open('/Users/xiulingwang/Downloads/line-master/data/embds/' + str(ego_user) + flag + '-' + Flag, 'w') as f:
            # with open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '/embeds/' + Flag + '-' + str(ego_user) + flag,'w') as f:
            # #with open('/Users/xiulingwang/Downloads/'+DATASET+'/line/3-split/' + str(ego_user)+ '-' + flag + '-' + Flag+'-' +'embds','w') as f:
            #     f.write('%d %d\n' % (edge_list.shape[0], args.representation_size))
            #     for i in range(edge_list.shape[0]):
            #         e = ' '.join(map(lambda x: str(x), embs[i]))
            #         f.write('%s %s %s\n' % (str(edge_list[i][0]), str(edge_list[i][1]), e))

            return embs

        # Train-set edge embeddings
        pos_train_edge_embs = get_edge_embeddings(train_edges,ego_user,DATASET, Flag, flag='pos-train')
        neg_train_edge_embs = get_edge_embeddings(train_edges_false, ego_user,DATASET,Flag, flag='neg-train')
        train_edge_embs = np.concatenate([pos_train_edge_embs, neg_train_edge_embs])

        # Create train-set edge labels: 1 = real edge, 0 = false edge
        train_edge_labels = np.concatenate([np.ones(len(train_edges)), np.zeros(len(train_edges_false))])

        # # Val-set edge embeddings, labels
        # if len(val_edges) > 0 and len(val_edges_false) > 0:
        #     pos_val_edge_embs = get_edge_embeddings(val_edges,ego_user,DATASET,Flag, flag='pos-val')
        #     neg_val_edge_embs = get_edge_embeddings(val_edges_false,ego_user,DATASET,Flag, flag='neg-val')
        #     val_edge_embs = np.concatenate([pos_val_edge_embs, neg_val_edge_embs])
        #     val_edge_labels = np.concatenate([np.ones(len(val_edges)), np.zeros(len(val_edges_false))])

        # Test-set edge embeddings, labels
        pos_test_edge_embs = get_edge_embeddings(test_edges,ego_user,DATASET,Flag, flag='pos-test')
        neg_test_edge_embs = get_edge_embeddings(test_edges_false,ego_user,DATASET,Flag, flag='neg-test')
        test_edge_embs = np.concatenate([pos_test_edge_embs, neg_test_edge_embs])

        # Create val-set edge labels: 1 = real edge, 0 = false edge
        test_edge_labels = np.concatenate([np.ones(len(test_edges)), np.zeros(len(test_edges_false))])

        # Train logistic regression classifier on train-set edge embeddings
        edge_classifier = LogisticRegression(random_state=0)
        edge_classifier.fit(train_edge_embs, train_edge_labels)

        # Predicted edge scores: probability of being of class "1" (real edge)
        # if len(val_edges) > 0 and len(val_edges_false) > 0:
        #     val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1]
        test_preds = edge_classifier.predict_proba(test_edge_embs)
        print(test_preds)
        print(np.shape(test_preds))

        runtime = time.time() - start_time

        # # Calculate scores
        # if len(val_edges) > 0 and len(val_edges_false) > 0:
        #     n2v_val_roc = roc_auc_score(val_edge_labels, val_preds)
        #     # n2v_val_roc_curve = roc_curve(val_edge_labels, val_preds)
        #     n2v_val_ap = average_precision_score(val_edge_labels, val_preds)
        # else:
        #     n2v_val_roc = None
        #     n2v_val_roc_curve = None
        #     n2v_val_ap = None

        # n2v_test_roc = roc_auc_score(test_edge_labels, test_preds)
        # # n2v_test_roc_curve = roc_curve(test_edge_labels, test_preds)
        # n2v_test_ap = average_precision_score(test_edge_labels, test_preds)


    # Generate edge scores using simple dot product of node embeddings (like in GAE paper)
    elif args.edge_score_mode == "dot-product":
        score_matrix = np.dot(emb_matrix, emb_matrix.T)
        runtime = time.time() - start_time

        # Val set scores
        # if len(val_edges) > 0:
        #     n2v_val_roc, n2v_val_ap = get_roc_score(val_edges, val_edges_false, score_matrix, apply_sigmoid=True)
        # else:
        #     n2v_val_roc = None
        #     n2v_val_roc_curve = None
        #     n2v_val_ap = None

        # Test set scores
        n2v_test_roc, n2v_test_ap = get_roc_score(test_edges, test_edges_false, score_matrix, apply_sigmoid=True)

    else:
        print
        "Invalid edge_score_mode! Either use edge-emb or dot-product."


    return  test_edge_labels,test_preds

    # Record scores
    # n2v_scores = {}
    #
    # n2v_scores['test_roc'] = n2v_test_roc
    # # n2v_scores['test_roc_curve'] = n2v_test_roc_curve
    # n2v_scores['test_ap'] = n2v_test_ap
    #
    # n2v_scores['val_roc'] = n2v_val_roc
    # # n2v_scores['val_roc_curve'] = n2v_val_roc_curve
    # n2v_scores['val_ap'] = n2v_val_ap
    #
    # n2v_scores['runtime'] = runtime
    #
    # return n2v_scores, val_edge_labels, val_preds, test_edge_labels, test_preds







def linkpre_scores_517(emb_matrix,train_edges,train_edges_false,test_edges,test_edges_false):
    # adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    # test_edges, test_edges_false = train_test_split
    # start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2
    # if args.edge_score_mode == "edge-emb":

    def get_edge_embeddings(edge_list):
        embs = []
        for edge in edge_list:
            node1 = edge[0]
            node2 = edge[1]
            emb1 = emb_matrix[node1]
            #print(np.shape(emb1))
            emb2 = emb_matrix[node2]
            edge_emb = np.multiply(emb1, emb2)
            #edge_emb = np.array(emb1) + np.array(emb2)
            print(np.shape(edge_emb))
            embs.append(edge_emb)
        embs = np.array(embs)

        return embs

    # Train-set edge embeddings
    pos_train_edge_embs = get_edge_embeddings(train_edges)
    neg_train_edge_embs = get_edge_embeddings(train_edges_false)
    train_edge_embs = np.concatenate([pos_train_edge_embs, neg_train_edge_embs])

    # Create train-set edge labels: 1 = real edge, 0 = false edge
    train_edge_labels = np.concatenate([np.ones(len(train_edges)), np.zeros(len(train_edges_false))])

    # # Val-set edge embeddings, labels
    # if len(val_edges) > 0 and len(val_edges_false) > 0:
    #     pos_val_edge_embs = get_edge_embeddings(val_edges,ego_user,DATASET,Flag, flag='pos-val')
    #     neg_val_edge_embs = get_edge_embeddings(val_edges_false,ego_user,DATASET,Flag, flag='neg-val')
    #     val_edge_embs = np.concatenate([pos_val_edge_embs, neg_val_edge_embs])
    #     val_edge_labels = np.concatenate([np.ones(len(val_edges)), np.zeros(len(val_edges_false))])

    # Test-set edge embeddings, labels
    pos_test_edge_embs = get_edge_embeddings(test_edges)
    neg_test_edge_embs = get_edge_embeddings(test_edges_false)
    test_edge_embs = np.concatenate([pos_test_edge_embs, neg_test_edge_embs])

    # Create val-set edge labels: 1 = real edge, 0 = false edge
    test_edge_labels = np.concatenate([np.ones(len(test_edges)), np.zeros(len(test_edges_false))])

    # Train logistic regression classifier on train-set edge embeddings
    edge_classifier = LogisticRegression(random_state=0)
    edge_classifier.fit(train_edge_embs, train_edge_labels)

    # Predicted edge scores: probability of being of class "1" (real edge)
    # if len(val_edges) > 0 and len(val_edges_false) > 0:
    #     val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1]
    # test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1]

    test_preds = edge_classifier.predict_proba(test_edge_embs)
    # print(test_preds)
    # print(np.shape(test_preds))

    # runtime = time.time() - start_time
    #
    # Calculate scores
    # if len(val_edges) > 0 and len(val_edges_false) > 0:
    #     n2v_val_roc = roc_auc_score(val_edge_labels, val_preds)
    #     # n2v_val_roc_curve = roc_curve(val_edge_labels, val_preds)
    #     n2v_val_ap = average_precision_score(val_edge_labels, val_preds)
    # else:
    #     n2v_val_roc = None
    #     n2v_val_roc_curve = None
    #     n2v_val_ap = None

    # n2v_test_roc = roc_auc_score(test_edge_labels, test_preds)
    # # n2v_test_roc_curve = roc_curve(test_edge_labels, test_preds)
    # n2v_test_ap = average_precision_score(test_edge_labels, test_preds)


    # Generate edge scores using simple dot product of node embeddings (like in GAE paper)
    # elif args.edge_score_mode == "dot-product":
    #     score_matrix = np.dot(emb_matrix, emb_matrix.T)
    #     runtime = time.time() - start_time
    #
    #     # Val set scores
    #     if len(val_edges) > 0:
    #         n2v_val_roc, n2v_val_ap = get_roc_score(val_edges, val_edges_false, score_matrix, apply_sigmoid=True)
    #     else:
    #         n2v_val_roc = None
    #         n2v_val_roc_curve = None
    #         n2v_val_ap = None
    #
    #     # Test set scores
    #     n2v_test_roc, n2v_test_ap = get_roc_score(test_edges, test_edges_false, score_matrix, apply_sigmoid=True)
    #
    # else:
    #     print
    #     "Invalid edge_score_mode! Either use edge-emb or dot-product."

    # Record scores
    # n2v_scores = {}

    # n2v_scores['test_roc'] = n2v_test_roc
    # # n2v_scores['test_roc_curve'] = n2v_test_roc_curve
    # n2v_scores['test_ap'] = n2v_test_ap
    #
    # n2v_scores['val_roc'] = n2v_val_roc
    # # n2v_scores['val_roc_curve'] = n2v_val_roc_curve
    # n2v_scores['val_ap'] = n2v_val_ap
    #
    # n2v_scores['runtime'] = runtime

    return test_edge_labels, test_preds









# Input: positive test/val edges, negative test/val edges, edge score matrix
# Output: ROC AUC score, ROC Curve (FPR, TPR, Thresholds), AP score
def get_roc_score(edges_pos, edges_neg, score_matrix, apply_sigmoid=False):
    # Edge case
    if len(edges_pos) == 0 or len(edges_neg) == 0:
        return (None, None, None)

    # Store positive edge predictions, actual values
    preds_pos = []
    pos = []
    for edge in edges_pos:
        if apply_sigmoid == True:
            preds_pos.append(sigmoid(score_matrix[edge[0], edge[1]]))
        else:
            preds_pos.append(score_matrix[edge[0], edge[1]])
        pos.append(1)  # actual value (1 for positive)

    # Store negative edge predictions, actual values
    preds_neg = []
    neg = []
    for edge in edges_neg:
        if apply_sigmoid == True:
            preds_neg.append(sigmoid(score_matrix[edge[0], edge[1]]))
        else:
            preds_neg.append(score_matrix[edge[0], edge[1]])
        neg.append(0)  # actual value (0 for negative)

    # Calculate scores
    preds_all = np.hstack([preds_pos, preds_neg])
    labels_all = np.hstack([np.ones(len(preds_pos)), np.zeros(len(preds_neg))])
    roc_score = roc_auc_score(labels_all, preds_all)
    # roc_curve_tuple = roc_curve(labels_all, preds_all)
    ap_score = average_precision_score(labels_all, preds_all)

    # return roc_score, roc_curve_tuple, ap_score
    return roc_score, ap_score


# Return a list of tuples (node1, node2) for networkx link prediction evaluation
def get_ebunch(train_test_split):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = train_test_split

    test_edges_list = test_edges.tolist()  # convert to nested list
    test_edges_list = [tuple(node_pair) for node_pair in test_edges_list]  # convert node-pairs to tuples
    test_edges_false_list = test_edges_false.tolist()
    test_edges_false_list = [tuple(node_pair) for node_pair in test_edges_false_list]
    return (test_edges_list + test_edges_false_list)


def distCosine(x, y):
    """
    :param x: m x k array
    :param y: n x k array
    :return: m x n array
    """
    xx = np.sum(x ** 2, axis=1) ** 0.5
    x = x / xx[:, np.newaxis]
    yy = np.sum(y ** 2, axis=1) ** 0.5
    y = y / yy[:, np.newaxis]
    sim_matrix=np.dot(x, y.transpose())  # 余弦相似度
    return sim_matrix




def linkpre_scores1(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, Flag):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = train_test_split
    start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2
    if args.edge_score_mode == "edge-emb":

        def get_edge_embeddings(edge_list,ego_user,DATASET, Flag, flag):
            embs = []
            sim_matrix=[]
            embs_1=[]
            embs_2 = []
            for edge in edge_list:
                node1 = edge[0]
                node2 = edge[1]
                emb1 = emb_matrix[node1]
                #print(np.shape(emb1))
                emb2 = emb_matrix[node2]
                edge_emb = np.multiply(emb1, emb2)
                sim = np.dot(emb1, emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
                #edge_emb = np.array(emb1) + np.array(emb2)
                print(np.shape(edge_emb))
                embs.append(edge_emb)
                embs_1.append(emb1)
                embs_2.append(emb2)
                sim_matrix.append(sim)
            embs = np.array(embs)
            sim_matrix = np.array(sim_matrix)
            embs_1=np.array(embs_1)
            embs_2 =np.array(embs_2)

            # #with open('/Users/xiulingwang/Downloads/line-master/data/embds/' + str(ego_user) + flag + '-' + Flag, 'w') as f:
            # with open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '/embeds/' + Flag + '-' + str(ego_user) + flag,'w') as f:
            # #with open('/Users/xiulingwang/Downloads/'+DATASET+'/line/3-split/' + str(ego_user)+ '-' + flag + '-' + Flag+'-' +'embds','w') as f:
            #     f.write('%d %d\n' % (edge_list.shape[0], args.representation_size))
            #     for i in range(edge_list.shape[0]):
            #         e = ' '.join(map(lambda x: str(x), embs[i]))
            #         f.write('%s %s %s\n' % (str(edge_list[i][0]), str(edge_list[i][1]), e))

            return embs,sim_matrix,embs_1,embs_2

        # Train-set edge embeddings
        pos_train_edge_embs ,pos_train_sim_matrix,pos_embs_1_train,pos_embs_2_train= get_edge_embeddings(train_edges,ego_user,DATASET, Flag, flag='pos-train')
        neg_train_edge_embs,neg_train_sim_matrix,neg_embs_1_train,neg_embs_2_train = get_edge_embeddings(train_edges_false, ego_user,DATASET,Flag, flag='neg-train')
        train_edge_embs = np.concatenate((pos_train_edge_embs, neg_train_edge_embs), axis=0)
        train_sim_matrix= np.concatenate((pos_train_sim_matrix, neg_train_sim_matrix), axis=0)
        train_embs_1 = np.concatenate((pos_embs_1_train, neg_embs_1_train), axis=0)
        train_embs_2 = np.concatenate((pos_embs_2_train, neg_embs_2_train), axis=0)

        # Create train-set edge labels: 1 = real edge, 0 = false edge
        train_edge_labels = np.concatenate((np.ones(len(train_edges)), np.zeros(len(train_edges_false))), axis=0)

        # Val-set edge embeddings, labels
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            pos_val_edge_embs,pos_val_sim_matrix,pos_embs_1_val,pos_embs_2_val = get_edge_embeddings(val_edges,ego_user,DATASET,Flag, flag='pos-val')
            neg_val_edge_embs,neg_val_sim_matrix,neg_embs_1_val,neg_embs_2_val = get_edge_embeddings(val_edges_false,ego_user,DATASET,Flag, flag='neg-val')
            val_edge_embs = np.concatenate([pos_val_edge_embs, neg_val_edge_embs])
            val_edge_labels = np.concatenate((np.ones(len(val_edges)), np.zeros(len(val_edges_false))), axis=0)
            val_sim_matrix = np.concatenate((pos_val_sim_matrix, neg_val_sim_matrix), axis=0)
            val_embs_1 = np.concatenate((pos_embs_1_val, neg_embs_1_val), axis=0)
            val_embs_2 = np.concatenate((pos_embs_2_val, neg_embs_2_val), axis=0)

        # Test-set edge embeddings, labels
        pos_test_edge_embs,pos_test_sim_matrix,pos_embs_1_test,pos_embs_2_test = get_edge_embeddings(test_edges,ego_user,DATASET,Flag, flag='pos-test')
        neg_test_edge_embs ,neg_test_sim_matrix,neg_embs_1_test,neg_embs_2_test= get_edge_embeddings(test_edges_false,ego_user,DATASET,Flag, flag='neg-test')
        test_edge_embs = np.concatenate((pos_test_edge_embs, neg_test_edge_embs), axis=0)
        test_sim_matrix = np.concatenate((pos_test_sim_matrix, neg_test_sim_matrix), axis=0)
        test_embs_1 = np.concatenate((pos_embs_1_test, neg_embs_1_test), axis=0)
        test_embs_2 = np.concatenate((pos_embs_2_test, neg_embs_2_test), axis=0)

        # Create val-set edge labels: 1 = real edge, 0 = false edge
        test_edge_labels = np.concatenate((np.ones(len(test_edges)), np.zeros(len(test_edges_false))), axis=0)

        # Train logistic regression classifier on train-set edge embeddings
        edge_classifier = LogisticRegression(random_state=0)
        edge_classifier.fit(train_edge_embs, train_edge_labels)

        # Predicted edge scores: probability of being of class "1" (real edge)
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1]
        test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1]
        print(test_preds)
        print(np.shape(test_preds))

        runtime = time.time() - start_time

        # Calculate scores
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            n2v_val_roc = roc_auc_score(val_edge_labels, val_preds)
            # n2v_val_roc_curve = roc_curve(val_edge_labels, val_preds)
            n2v_val_ap = average_precision_score(val_edge_labels, val_preds)
        else:
            n2v_val_roc = None
            n2v_val_roc_curve = None
            n2v_val_ap = None

        n2v_test_roc = roc_auc_score(test_edge_labels, test_preds)
        # n2v_test_roc_curve = roc_curve(test_edge_labels, test_preds)
        n2v_test_ap = average_precision_score(test_edge_labels, test_preds)


    # Generate edge scores using simple dot product of node embeddings (like in GAE paper)
    elif args.edge_score_mode == "dot-product":
        score_matrix = np.dot(emb_matrix, emb_matrix.T)
        runtime = time.time() - start_time

        # Val set scores
        if len(val_edges) > 0:
            n2v_val_roc, n2v_val_ap = get_roc_score(val_edges, val_edges_false, score_matrix, apply_sigmoid=True)
        else:
            n2v_val_roc = None
            n2v_val_roc_curve = None
            n2v_val_ap = None

        # Test set scores
        n2v_test_roc, n2v_test_ap = get_roc_score(test_edges, test_edges_false, score_matrix, apply_sigmoid=True)

    else:
        print
        "Invalid edge_score_mode! Either use edge-emb or dot-product."

    # Record scores
    n2v_scores = {}

    n2v_scores['test_roc'] = n2v_test_roc
    # n2v_scores['test_roc_curve'] = n2v_test_roc_curve
    n2v_scores['test_ap'] = n2v_test_ap

    n2v_scores['val_roc'] = n2v_val_roc
    # n2v_scores['val_roc_curve'] = n2v_val_roc_curve
    n2v_scores['val_ap'] = n2v_val_ap

    n2v_scores['runtime'] = runtime

    return n2v_scores, train_edge_labels,test_edge_labels, test_preds,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2



def deepwalk2(g_train, train_test_split,DATASET,METHOD,res_dir, ego_user, F,dp):
  parser = ArgumentParser("deepwalk",
                          formatter_class=ArgumentDefaultsHelpFormatter,
                          conflict_handler='resolve')

  parser.add_argument("--debug", dest="debug", action='store_true', default=False,
                      help="drop a debugger if an exception is raised.")

  parser.add_argument('--format', default='edgelist',
                      help='File format of input file')

  parser.add_argument('--input', nargs='?',
                      help='Input graph file')

  parser.add_argument("-l", "--log", dest="log", default="INFO",
                      help="log verbosity level")

  parser.add_argument('--matfile-variable-name', default='network',
                      help='variable name of adjacency matrix inside a .mat file.')

  parser.add_argument('--max-memory-data-size', default=1000000000, type=int,
                      help='Size to start dumping walks to disk, instead of keeping them in memory.')

  parser.add_argument('--number-walks', default=10, type=int,
                      help='Number of random walks to start at each node')

  parser.add_argument('--output',
                      help='Output representation file')

  parser.add_argument('--representation-size', default=128, type=int,
                      help='Number of latent dimensions to learn for each node.')

  parser.add_argument('--seed', default=0, type=int,
                      help='Seed for random walk generator.')

  parser.add_argument('--undirected', default=True, type=bool,
                      help='Treat graph as undirected.')

  parser.add_argument('--vertex-freq-degree', default=False, action='store_true',
                      help='Use vertex degree to estimate the frequency of nodes '
                           'in the random walks. This option is faster than '
                           'calculating the vocabulary.')

  parser.add_argument('--walk-length', default=80, type=int,
                      help='Length of the random walk started at each node')

  parser.add_argument('--window-size', default=10, type=int,
                      help='Window size of skipgram model.')

  parser.add_argument('--workers', default=10, type=int,
                      help='Number of parallel processes.')
  parser.add_argument('--edge_score_mode', default='edge-emb')


  args = parser.parse_args()
  args.input='%s%s-fair-%s-train.txt' % (res_dir, str(ego_user), F)
  args.output = res_dir+METHOD+'-embeds-'+F+'-'+str(ego_user)
  numeric_level = getattr(logging, args.log.upper(), None)
  logging.basicConfig(format=LOGFORMAT)
  logger.setLevel(numeric_level)

  if args.debug:
   sys.excepthook = debug

  adj_train_orig, train_edges, train_edges_false, val_edges, val_edges_false, \
  test_edges, test_edges_false = train_test_split
  g_train=nx.adjacency_matrix(g_train)
  emb_matrix=process3(args,g_train,DATASET,METHOD,F,ego_user,res_dir,dp)

  n2v_scores,train_edge_labels, test_edge_labels, test_preds,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2=linkpre_scores2(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, F)

  return n2v_scores, train_edge_labels,test_edge_labels, test_preds,emb_matrix,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2



def linkpre_scores2(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, Flag):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = train_test_split
    start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2
    if args.edge_score_mode == "edge-emb":

        def get_edge_embeddings(edge_list,ego_user,DATASET, Flag, flag):
            embs = []
            sim_matrix=[]
            embs_1=[]
            embs_2 = []
            for edge in edge_list:
                node1 = edge[0]
                node2 = edge[1]
                emb1 = emb_matrix[node1]
                #print(np.shape(emb1))
                emb2 = emb_matrix[node2]
                edge_emb = np.multiply(emb1, emb2)
                sim = np.dot(emb1, emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
                #edge_emb = np.array(emb1) + np.array(emb2)
                #print(np.shape(edge_emb))
                embs.append(edge_emb)
                embs_1.append(emb1)
                embs_2.append(emb2)
                sim_matrix.append(sim)
            embs = np.array(embs)
            sim_matrix = np.array(sim_matrix)
            embs_1=np.array(embs_1)
            embs_2 =np.array(embs_2)

            # #with open('/Users/xiulingwang/Downloads/line-master/data/embds/' + str(ego_user) + flag + '-' + Flag, 'w') as f:
            # with open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '/embeds/' + Flag + '-' + str(ego_user) + flag,'w') as f:
            # #with open('/Users/xiulingwang/Downloads/'+DATASET+'/line/3-split/' + str(ego_user)+ '-' + flag + '-' + Flag+'-' +'embds','w') as f:
            #     f.write('%d %d\n' % (edge_list.shape[0], args.representation_size))
            #     for i in range(edge_list.shape[0]):
            #         e = ' '.join(map(lambda x: str(x), embs[i]))
            #         f.write('%s %s %s\n' % (str(edge_list[i][0]), str(edge_list[i][1]), e))

            return embs,sim_matrix,embs_1,embs_2

        # Train-set edge embeddings
        pos_train_edge_embs ,pos_train_sim_matrix,pos_embs_1_train,pos_embs_2_train= get_edge_embeddings(train_edges,ego_user,DATASET, Flag, flag='pos-train')
        neg_train_edge_embs,neg_train_sim_matrix,neg_embs_1_train,neg_embs_2_train = get_edge_embeddings(train_edges_false, ego_user,DATASET,Flag, flag='neg-train')
        train_edge_embs = np.concatenate((pos_train_edge_embs, neg_train_edge_embs), axis=0)
        train_sim_matrix= np.concatenate((pos_train_sim_matrix, neg_train_sim_matrix), axis=0)
        train_embs_1 = np.concatenate((pos_embs_1_train, neg_embs_1_train), axis=0)
        train_embs_2 = np.concatenate((pos_embs_2_train, neg_embs_2_train), axis=0)

        # Create train-set edge labels: 1 = real edge, 0 = false edge
        train_edge_labels = np.concatenate((np.ones(len(train_edges)), np.zeros(len(train_edges_false))), axis=0)

        # Val-set edge embeddings, labels
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            pos_val_edge_embs,pos_val_sim_matrix,pos_embs_1_val,pos_embs_2_val = get_edge_embeddings(val_edges,ego_user,DATASET,Flag, flag='pos-val')
            neg_val_edge_embs,neg_val_sim_matrix,neg_embs_1_val,neg_embs_2_val = get_edge_embeddings(val_edges_false,ego_user,DATASET,Flag, flag='neg-val')
            val_edge_embs = np.concatenate([pos_val_edge_embs, neg_val_edge_embs])
            val_edge_labels = np.concatenate((np.ones(len(val_edges)), np.zeros(len(val_edges_false))), axis=0)
            val_sim_matrix = np.concatenate((pos_val_sim_matrix, neg_val_sim_matrix), axis=0)
            val_embs_1 = np.concatenate((pos_embs_1_val, neg_embs_1_val), axis=0)
            val_embs_2 = np.concatenate((pos_embs_2_val, neg_embs_2_val), axis=0)

        # Test-set edge embeddings, labels
        pos_test_edge_embs,pos_test_sim_matrix,pos_embs_1_test,pos_embs_2_test = get_edge_embeddings(test_edges,ego_user,DATASET,Flag, flag='pos-test')
        neg_test_edge_embs ,neg_test_sim_matrix,neg_embs_1_test,neg_embs_2_test= get_edge_embeddings(test_edges_false,ego_user,DATASET,Flag, flag='neg-test')
        test_edge_embs = np.concatenate((pos_test_edge_embs, neg_test_edge_embs), axis=0)
        test_sim_matrix = np.concatenate((pos_test_sim_matrix, neg_test_sim_matrix), axis=0)
        test_embs_1 = np.concatenate((pos_embs_1_test, neg_embs_1_test), axis=0)
        test_embs_2 = np.concatenate((pos_embs_2_test, neg_embs_2_test), axis=0)

        # Create val-set edge labels: 1 = real edge, 0 = false edge
        test_edge_labels = np.concatenate((np.ones(len(test_edges)), np.zeros(len(test_edges_false))), axis=0)

        # Train logistic regression classifier on train-set edge embeddings
        edge_classifier = LogisticRegression(random_state=0)
        edge_classifier.fit(train_edge_embs, train_edge_labels)

        # Predicted edge scores: probability of being of class "1" (real edge)
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1]
        test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1]
        # print(test_preds)
        #print(np.shape(test_preds))

        runtime = time.time() - start_time

        # Calculate scores
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            n2v_val_roc = roc_auc_score(val_edge_labels, val_preds)
            # n2v_val_roc_curve = roc_curve(val_edge_labels, val_preds)
            n2v_val_ap = average_precision_score(val_edge_labels, val_preds)
        else:
            n2v_val_roc = None
            n2v_val_roc_curve = None
            n2v_val_ap = None

        n2v_test_roc = roc_auc_score(test_edge_labels, test_preds)
        # n2v_test_roc_curve = roc_curve(test_edge_labels, test_preds)
        n2v_test_ap = average_precision_score(test_edge_labels, test_preds)


    # Generate edge scores using simple dot product of node embeddings (like in GAE paper)
    elif args.edge_score_mode == "dot-product":
        score_matrix = np.dot(emb_matrix, emb_matrix.T)
        runtime = time.time() - start_time

        # Val set scores
        if len(val_edges) > 0:
            n2v_val_roc, n2v_val_ap = get_roc_score(val_edges, val_edges_false, score_matrix, apply_sigmoid=True)
        else:
            n2v_val_roc = None
            n2v_val_roc_curve = None
            n2v_val_ap = None

        # Test set scores
        n2v_test_roc, n2v_test_ap = get_roc_score(test_edges, test_edges_false, score_matrix, apply_sigmoid=True)

    else:
        print
        "Invalid edge_score_mode! Either use edge-emb or dot-product."

    # Record scores
    n2v_scores = {}

    n2v_scores['test_roc'] = n2v_test_roc
    # n2v_scores['test_roc_curve'] = n2v_test_roc_curve
    n2v_scores['test_ap'] = n2v_test_ap

    n2v_scores['val_roc'] = n2v_val_roc
    # n2v_scores['val_roc_curve'] = n2v_val_roc_curve
    n2v_scores['val_ap'] = n2v_val_ap

    n2v_scores['runtime'] = runtime

    return n2v_scores, train_edge_labels,test_edge_labels, test_preds,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2


def process2(args,adj_train,DATASET,METHOD,F,ego_user,res_dir,dp):

  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)


  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  if data_size < args.max_memory_data_size:
    print("Walking...")
    walks = graph.build_deepwalk_corpus1(G, num_paths=args.number_walks,
                                        path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))

    # print(walks)
    # print(type(walks))

    write_walks2(args, walks, DATASET, METHOD, F, ego_user,res_dir)
    print("Training...")

  class Model(torch.nn.Module):
      def __init__(self):
          super(Model, self).__init__()
          self.phi = nn.Parameter(torch.rand((len(G.nodes()), args.representation_size), requires_grad=True))
          self.phi2 = nn.Parameter(torch.rand((args.representation_size, len(G.nodes())), requires_grad=True))

      def forward(self, one_hot):
          hidden = torch.matmul(one_hot, self.phi)
          out = torch.matmul(hidden, self.phi2)
          return out

  model = Model()

  def skip_gram(wvi, w):
      for j in range(len(wvi)):
          for k in range(max(0, j - w), min(j + w, len(wvi))):

              # generate one hot vector
              one_hot = torch.zeros(len(G.nodes()))
              one_hot[wvi[j]] = 1

              out = model(one_hot)
              loss = torch.log(torch.sum(torch.exp(out))) - out[wvi[k]]
              loss.backward()
              lr=0.025
              for param in model.parameters():
                  param.data.sub_(lr * param.grad)
                  param.grad.data.zero_()



  def skip_gram_dp(wvi, w):
      C=1
      sigma=4
      for j in range(len(wvi)):
          for k in range(max(0, j - w), min(j + w, len(wvi))):



              # generate one hot vector
              one_hot = torch.zeros(len(G.nodes()))
              one_hot[wvi[j]] = 1

              out = model(one_hot)
              loss = torch.log(torch.sum(torch.exp(out))) - out[wvi[k]]

              loss.backward()

              l2_norm = torch.tensor(0.0)
              for param in model.parameters():
                  temp_grad=param.grad
                  l2_norm += temp_grad.norm(2) ** 2

              l2_norm = l2_norm.sqrt()
              divisor = max(torch.tensor(1.0), l2_norm / C)

              for param in model.parameters():
                  param.grad += param.grad / divisor

                  param.grad += sigma * C * (torch.randn_like(param.grad))

                  print(param.grad)



              lr=0.025
              for param in model.parameters():
                  param.data.sub_(lr * param.grad)
                  param.grad.data.zero_()


  if (dp==1):
      for walk in walks:
          # print(walk)
          skip_gram_dp(walk, args.window_size)

  if (dp==0):
      for walk in walks:
          # print(walk)
          skip_gram(walk, args.window_size)

  emb_mappings=model.phi
  emb_mappings=emb_mappings.detach().numpy()
  print(np.shape(emb_mappings))

  #   model = Word2Vec(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1, hs=1, workers=args.workers)
  #
  #   training_loss = model.get_latest_training_loss()
  #
  # else:
  #   print("Data size {} is larger than limit (max-memory-data-size: {}).  Dumping walks to disk.".format(data_size, args.max_memory_data_size))
  #   print("Walking...")
  #
  #   walks_filebase = args.output + ".walks"
  #   walk_files = serialized_walks.write_walks_to_disk(G, walks_filebase, num_paths=args.number_walks,
  #                                        path_length=args.walk_length, alpha=0, rand=random.Random(args.seed),
  #                                        num_workers=args.workers)
  #
  #   print("Counting vertex frequency...")
  #   if not args.vertex_freq_degree:
  #     vertex_counts = serialized_walks.count_textfiles(walk_files, args.workers)
  #   else:
  #     # use degree distribution for frequency in tree
  #     vertex_counts = G.degree(nodes=G.iterkeys())
  #
  #   print("Training...")
  #   walks_corpus = serialized_walks.WalksCorpus(walk_files)
  #   model = Skipgram(sentences=walks_corpus, vocabulary_counts=vertex_counts,
  #                    size=args.representation_size,
  #                    window=args.window_size, min_count=0, trim_rule=None, workers=args.workers)
  #
  # model.wv.save_word2vec_format(args.output)
  #
  # exit()



  # Store embeddings mapping
  #emb_mappings = model.wv

  # Create node embeddings matrix (rows = nodes, columns = embedding features)
  emb_list = []
  for node_index in range(0, adj_train.shape[0]):
      node_str = int(node_index)
      node_emb = emb_mappings[node_str]
      emb_list.append(node_emb)
  emb_matrix = np.vstack(emb_list)

  with open(res_dir + DATASET + '-' + METHOD + '-embeds-' + F + '-' + str(ego_user), 'w') as f:
      f.write('%d %d\n' % (adj_train.shape[0], args.representation_size))
      for i in range(adj_train.shape[0]):
          e = ' '.join(map(lambda x: str(x), emb_list[i]))
          f.write('%s %s\n' % (str(i), e))

  return emb_matrix



def process3(args,adj_train,DATASET,METHOD,F,ego_user,res_dir,dp):

  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    print('&&&&&')
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)


  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  if data_size < args.max_memory_data_size:
    print("Walking...")
    walks = graph.build_deepwalk_corpus1(G, num_paths=args.number_walks,
                                        path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))

    # print(walks)
    # print(type(walks))ee4

    write_walks2(args, walks, DATASET, METHOD, F, ego_user,res_dir)
    print("Training...")

    # model = Word2Vec(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1, hs=1,
    #                  workers=args.workers)

    input_file=res_dir + DATASET + '-' + METHOD + '-walks-' + F + '-' + str(ego_user)

    w2v = trainer.Word2VecTrainer(input_file, output_file="out.vec")
    if dp==0:
        emb_mappings=w2v.train(res_dir,DATASET,METHOD, F,ego_user)
    if dp==1:
        emb_mappings=w2v.train_dp(res_dir,DATASET,METHOD, F,ego_user)


  #emb_mappings=w2v.u_embeddings.weight.cpu().data.numpy()
  emb_mappings=emb_mappings.cpu().detach().numpy()
  # print(np.shape(emb_mappings))

  #   model = Word2Vec(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1, hs=1, workers=args.workers)
  #
  #   training_loss = model.get_latest_training_loss()
  #
  # else:
  #   print("Data size {} is larger than limit (max-memory-data-size: {}).  Dumping walks to disk.".format(data_size, args.max_memory_data_size))
  #   print("Walking...")
  #
  #   walks_filebase = args.output + ".walks"
  #   walk_files = serialized_walks.write_walks_to_disk(G, walks_filebase, num_paths=args.number_walks,
  #                                        path_length=args.walk_length, alpha=0, rand=random.Random(args.seed),
  #                                        num_workers=args.workers)
  #
  #   print("Counting vertex frequency...")
  #   if not args.vertex_freq_degree:
  #     vertex_counts = serialized_walks.count_textfiles(walk_files, args.workers)
  #   else:
  #     # use degree distribution for frequency in tree
  #     vertex_counts = G.degree(nodes=G.iterkeys())
  #
  #   print("Training...")
  #   walks_corpus = serialized_walks.WalksCorpus(walk_files)
  #   model = Skipgram(sentences=walks_corpus, vocabulary_counts=vertex_counts,
  #                    size=args.representation_size,
  #                    window=args.window_size, min_count=0, trim_rule=None, workers=args.workers)
  #
  # model.wv.save_word2vec_format(args.output)
  #
  # exit()



  # Store embeddings mapping
  #emb_mappings = model.wv

  # Create node embeddings matrix (rows = nodes, columns = embedding features)
  emb_list = []
  for node_index in range(0, adj_train.shape[0]):
      node_str = int(node_index)
      node_emb = emb_mappings[node_str]
      emb_list.append(node_emb)
  emb_matrix = np.vstack(emb_list)

  with open(res_dir + DATASET + '-' + METHOD + '-embeds-' + F + '-' + str(ego_user), 'w') as f:
      f.write('%d %d\n' % (adj_train.shape[0], args.representation_size))
      for i in range(adj_train.shape[0]):
          e = ' '.join(map(lambda x: str(x), emb_list[i]))
          f.write('%s %s\n' % (str(i), e))

  return emb_matrix


def deepwalk3(g_train, train_test_split,other_edge,DATASET,METHOD,res_dir, ego_user, F,dp):
  parser = ArgumentParser("deepwalk",
                          formatter_class=ArgumentDefaultsHelpFormatter,
                          conflict_handler='resolve')

  parser.add_argument("--debug", dest="debug", action='store_true', default=False,
                      help="drop a debugger if an exception is raised.")

  parser.add_argument('--format', default='edgelist',
                      help='File format of input file')

  parser.add_argument('--input', nargs='?',
                      help='Input graph file')

  parser.add_argument("-l", "--log", dest="log", default="INFO",
                      help="log verbosity level")

  parser.add_argument('--matfile-variable-name', default='network',
                      help='variable name of adjacency matrix inside a .mat file.')

  parser.add_argument('--max-memory-data-size', default=1000000000, type=int,
                      help='Size to start dumping walks to disk, instead of keeping them in memory.')

  parser.add_argument('--number-walks', default=10, type=int,
                      help='Number of random walks to start at each node')

  parser.add_argument('--output',
                      help='Output representation file')

  parser.add_argument('--representation-size', default=128, type=int,
                      help='Number of latent dimensions to learn for each node.')

  parser.add_argument('--seed', default=0, type=int,
                      help='Seed for random walk generator.')

  parser.add_argument('--undirected', default=True, type=bool,
                      help='Treat graph as undirected.')

  parser.add_argument('--vertex-freq-degree', default=False, action='store_true',
                      help='Use vertex degree to estimate the frequency of nodes '
                           'in the random walks. This option is faster than '
                           'calculating the vocabulary.')

  parser.add_argument('--walk-length', default=80, type=int,
                      help='Length of the random walk started at each node')

  parser.add_argument('--window-size', default=10, type=int,
                      help='Window size of skipgram model.')

  parser.add_argument('--workers', default=10, type=int,
                      help='Number of parallel processes.')
  parser.add_argument('--edge_score_mode', default='edge-emb')


  args = parser.parse_args()
  args.input='%s%s-fair-%s-train.txt' % (res_dir, str(ego_user), F)
  args.output = res_dir+METHOD+'-embeds-'+F+'-'+str(ego_user)
  numeric_level = getattr(logging, args.log.upper(), None)
  logging.basicConfig(format=LOGFORMAT)
  logger.setLevel(numeric_level)

  if args.debug:
   sys.excepthook = debug

  adj_train_orig, train_edges, train_edges_false, val_edges, val_edges_false, \
  test_edges, test_edges_false = train_test_split
  g_train=nx.adjacency_matrix(g_train)
  emb_matrix=process3(args,g_train,DATASET,METHOD,F,ego_user,res_dir,dp)

  n2v_scores,train_edge_labels, test_edge_labels, test_preds,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2, other_edge_embs, other_sim_matrix, other_embs_1_test, other_embs_2_test, other_edge_labels=linkpre_scores3(args, emb_matrix, train_test_split, other_edge,ego_user,DATASET,METHOD, F)

  return n2v_scores, train_edge_labels,test_edge_labels, test_preds,emb_matrix,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2, other_edge_embs, other_sim_matrix, other_embs_1_test, other_embs_2_test, other_edge_labels

def linkpre_scores3(args, emb_matrix, train_test_split, other_edge,ego_user,DATASET,METHOD, Flag):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = train_test_split
    start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2
    if args.edge_score_mode == "edge-emb":

        def get_edge_embeddings(edge_list,ego_user,DATASET, Flag, flag):
            embs = []
            sim_matrix=[]
            embs_1=[]
            embs_2 = []
            for edge in edge_list:
                node1 = edge[0]
                node2 = edge[1]
                emb1 = emb_matrix[node1]
                #print(np.shape(emb1))
                emb2 = emb_matrix[node2]
                edge_emb = np.multiply(emb1, emb2)
                sim = np.dot(emb1, emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
                #edge_emb = np.array(emb1) + np.array(emb2)
                #print(np.shape(edge_emb))
                embs.append(edge_emb)
                embs_1.append(emb1)
                embs_2.append(emb2)
                sim_matrix.append(sim)
            embs = np.array(embs)
            sim_matrix = np.array(sim_matrix)
            embs_1=np.array(embs_1)
            embs_2 =np.array(embs_2)

            # #with open('/Users/xiulingwang/Downloads/line-master/data/embds/' + str(ego_user) + flag + '-' + Flag, 'w') as f:
            # with open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '/embeds/' + Flag + '-' + str(ego_user) + flag,'w') as f:
            # #with open('/Users/xiulingwang/Downloads/'+DATASET+'/line/3-split/' + str(ego_user)+ '-' + flag + '-' + Flag+'-' +'embds','w') as f:
            #     f.write('%d %d\n' % (edge_list.shape[0], args.representation_size))
            #     for i in range(edge_list.shape[0]):
            #         e = ' '.join(map(lambda x: str(x), embs[i]))
            #         f.write('%s %s %s\n' % (str(edge_list[i][0]), str(edge_list[i][1]), e))

            return embs,sim_matrix,embs_1,embs_2

        # Train-set edge embeddings
        pos_train_edge_embs ,pos_train_sim_matrix,pos_embs_1_train,pos_embs_2_train= get_edge_embeddings(train_edges,ego_user,DATASET, Flag, flag='pos-train')
        neg_train_edge_embs,neg_train_sim_matrix,neg_embs_1_train,neg_embs_2_train = get_edge_embeddings(train_edges_false, ego_user,DATASET,Flag, flag='neg-train')
        train_edge_embs = np.concatenate((pos_train_edge_embs, neg_train_edge_embs), axis=0)
        train_sim_matrix= np.concatenate((pos_train_sim_matrix, neg_train_sim_matrix), axis=0)
        train_embs_1 = np.concatenate((pos_embs_1_train, neg_embs_1_train), axis=0)
        train_embs_2 = np.concatenate((pos_embs_2_train, neg_embs_2_train), axis=0)

        # Create train-set edge labels: 1 = real edge, 0 = false edge
        train_edge_labels = np.concatenate((np.ones(len(train_edges)), np.zeros(len(train_edges_false))), axis=0)

        # Val-set edge embeddings, labels
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            pos_val_edge_embs,pos_val_sim_matrix,pos_embs_1_val,pos_embs_2_val = get_edge_embeddings(val_edges,ego_user,DATASET,Flag, flag='pos-val')
            neg_val_edge_embs,neg_val_sim_matrix,neg_embs_1_val,neg_embs_2_val = get_edge_embeddings(val_edges_false,ego_user,DATASET,Flag, flag='neg-val')
            val_edge_embs = np.concatenate([pos_val_edge_embs, neg_val_edge_embs])
            val_edge_labels = np.concatenate((np.ones(len(val_edges)), np.zeros(len(val_edges_false))), axis=0)
            val_sim_matrix = np.concatenate((pos_val_sim_matrix, neg_val_sim_matrix), axis=0)
            val_embs_1 = np.concatenate((pos_embs_1_val, neg_embs_1_val), axis=0)
            val_embs_2 = np.concatenate((pos_embs_2_val, neg_embs_2_val), axis=0)

        # Test-set edge embeddings, labels
        pos_test_edge_embs,pos_test_sim_matrix,pos_embs_1_test,pos_embs_2_test = get_edge_embeddings(test_edges,ego_user,DATASET,Flag, flag='pos-test')
        neg_test_edge_embs ,neg_test_sim_matrix,neg_embs_1_test,neg_embs_2_test= get_edge_embeddings(test_edges_false,ego_user,DATASET,Flag, flag='neg-test')

        test_edge_embs = np.concatenate((pos_test_edge_embs, neg_test_edge_embs), axis=0)
        test_sim_matrix = np.concatenate((pos_test_sim_matrix, neg_test_sim_matrix), axis=0)
        test_embs_1 = np.concatenate((pos_embs_1_test, neg_embs_1_test), axis=0)
        test_embs_2 = np.concatenate((pos_embs_2_test, neg_embs_2_test), axis=0)

        # Create val-set edge labels: 1 = real edge, 0 = false edge
        test_edge_labels = np.concatenate((np.ones(len(test_edges)), np.zeros(len(test_edges_false))), axis=0)

        other_edge_embs, other_sim_matrix, other_embs_1_test, other_embs_2_test = get_edge_embeddings(
            other_edge, ego_user, DATASET, Flag, flag='other')
        other_edge_labels = np.zeros(len(other_edge))

        # Train logistic regression classifier on train-set edge embeddings
        edge_classifier = LogisticRegression(random_state=0)
        edge_classifier.fit(train_edge_embs, train_edge_labels)

        # Predicted edge scores: probability of being of class "1" (real edge)
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1]
        test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1]
        # print(test_preds)
        #print(np.shape(test_preds))

        runtime = time.time() - start_time

        # Calculate scores
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            n2v_val_roc = roc_auc_score(val_edge_labels, val_preds)
            # n2v_val_roc_curve = roc_curve(val_edge_labels, val_preds)
            n2v_val_ap = average_precision_score(val_edge_labels, val_preds)
        else:
            n2v_val_roc = None
            n2v_val_roc_curve = None
            n2v_val_ap = None

        n2v_test_roc = roc_auc_score(test_edge_labels, test_preds)
        # n2v_test_roc_curve = roc_curve(test_edge_labels, test_preds)
        n2v_test_ap = average_precision_score(test_edge_labels, test_preds)


    # Generate edge scores using simple dot product of node embeddings (like in GAE paper)
    elif args.edge_score_mode == "dot-product":
        score_matrix = np.dot(emb_matrix, emb_matrix.T)
        runtime = time.time() - start_time

        # Val set scores
        if len(val_edges) > 0:
            n2v_val_roc, n2v_val_ap = get_roc_score(val_edges, val_edges_false, score_matrix, apply_sigmoid=True)
        else:
            n2v_val_roc = None
            n2v_val_roc_curve = None
            n2v_val_ap = None

        # Test set scores
        n2v_test_roc, n2v_test_ap = get_roc_score(test_edges, test_edges_false, score_matrix, apply_sigmoid=True)

    else:
        print
        "Invalid edge_score_mode! Either use edge-emb or dot-product."

    # Record scores
    n2v_scores = {}

    n2v_scores['test_roc'] = n2v_test_roc
    # n2v_scores['test_roc_curve'] = n2v_test_roc_curve
    n2v_scores['test_ap'] = n2v_test_ap

    n2v_scores['val_roc'] = n2v_val_roc
    # n2v_scores['val_roc_curve'] = n2v_val_roc_curve
    n2v_scores['val_ap'] = n2v_val_ap

    n2v_scores['runtime'] = runtime

    return n2v_scores, train_edge_labels,test_edge_labels, test_preds,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2, other_edge_embs, other_sim_matrix, other_embs_1_test, other_embs_2_test, other_edge_labels

def deepwalk4(g_train, train_test_split,DATASET,METHOD,res_dir, ego_user, F,dp):
  parser = ArgumentParser("deepwalk",
                          formatter_class=ArgumentDefaultsHelpFormatter,
                          conflict_handler='resolve')

  parser.add_argument("--debug", dest="debug", action='store_true', default=False,
                      help="drop a debugger if an exception is raised.")

  parser.add_argument('--format', default='edgelist',
                      help='File format of input file')

  parser.add_argument('--input', nargs='?',
                      help='Input graph file')

  parser.add_argument("-l", "--log", dest="log", default="INFO",
                      help="log verbosity level")

  parser.add_argument('--matfile-variable-name', default='network',
                      help='variable name of adjacency matrix inside a .mat file.')

  parser.add_argument('--max-memory-data-size', default=1000000000, type=int,
                      help='Size to start dumping walks to disk, instead of keeping them in memory.')

  parser.add_argument('--number-walks', default=10, type=int,
                      help='Number of random walks to start at each node')

  parser.add_argument('--output',
                      help='Output representation file')

  parser.add_argument('--representation-size', default=128, type=int,
                      help='Number of latent dimensions to learn for each node.')

  parser.add_argument('--seed', default=0, type=int,
                      help='Seed for random walk generator.')

  parser.add_argument('--undirected', default=True, type=bool,
                      help='Treat graph as undirected.')

  parser.add_argument('--vertex-freq-degree', default=False, action='store_true',
                      help='Use vertex degree to estimate the frequency of nodes '
                           'in the random walks. This option is faster than '
                           'calculating the vocabulary.')

  parser.add_argument('--walk-length', default=80, type=int,
                      help='Length of the random walk started at each node')

  parser.add_argument('--window-size', default=10, type=int,
                      help='Window size of skipgram model.')

  parser.add_argument('--workers', default=10, type=int,
                      help='Number of parallel processes.')
  parser.add_argument('--edge_score_mode', default='edge-emb')


  args = parser.parse_args()
  args.input='%s%s-fair-%s-train.txt' % (res_dir, str(ego_user), F)
  args.output = res_dir+METHOD+'-embeds-'+F+'-'+str(ego_user)
  numeric_level = getattr(logging, args.log.upper(), None)
  logging.basicConfig(format=LOGFORMAT)
  logger.setLevel(numeric_level)

  if args.debug:
   sys.excepthook = debug

  adj_train_orig, train_edges, train_edges_false, val_edges, val_edges_false, \
  test_edges, test_edges_false = train_test_split
  g_train=nx.adjacency_matrix(g_train)
  emb_matrix=process3(args,g_train,DATASET,METHOD,F,ego_user,res_dir,dp)

  # n2v_scores,train_edge_labels, test_edge_labels, test_preds,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2, other_edge_embs, other_sim_matrix, other_embs_1_test, other_embs_2_test, other_edge_labels=linkpre_scores4(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, F)

  return emb_matrix

def linkpre_scores4(emb_matrix, train_edges_pos,train_edges_neg,test_edges, other_edge):

    start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2

    def get_edge_embeddings(edge_list):
        embs = []
        sim_matrix=[]
        embs_1=[]
        embs_2 = []
        for edge in edge_list:
            node1 = edge[0]
            node2 = edge[1]
            emb1 = emb_matrix[node1]
            #print(np.shape(emb1))
            emb2 = emb_matrix[node2]
            edge_emb = np.multiply(emb1, emb2)
            sim = np.dot(emb1, emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
            #edge_emb = np.array(emb1) + np.array(emb2)
            #print(np.shape(edge_emb))
            embs.append(edge_emb)
            embs_1.append(emb1)
            embs_2.append(emb2)
            sim_matrix.append(sim)
        embs = np.array(embs)
        sim_matrix = np.array(sim_matrix)
        embs_1=np.array(embs_1)
        embs_2 =np.array(embs_2)

        # #with open('/Users/xiulingwang/Downloads/line-master/data/embds/' + str(ego_user) + flag + '-' + Flag, 'w') as f:
        # with open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '/embeds/' + Flag + '-' + str(ego_user) + flag,'w') as f:
        # #with open('/Users/xiulingwang/Downloads/'+DATASET+'/line/3-split/' + str(ego_user)+ '-' + flag + '-' + Flag+'-' +'embds','w') as f:
        #     f.write('%d %d\n' % (edge_list.shape[0], args.representation_size))
        #     for i in range(edge_list.shape[0]):
        #         e = ' '.join(map(lambda x: str(x), embs[i]))
        #         f.write('%s %s %s\n' % (str(edge_list[i][0]), str(edge_list[i][1]), e))

        return embs,sim_matrix,embs_1,embs_2

    # Train-set edge embeddings
    pos_train_edge_embs ,pos_train_sim_matrix,pos_embs_1_train,pos_embs_2_train= get_edge_embeddings(train_edges_pos)
    neg_train_edge_embs,neg_train_sim_matrix,neg_embs_1_train,neg_embs_2_train = get_edge_embeddings(train_edges_neg)
    train_edge_embs = np.concatenate((pos_train_edge_embs, neg_train_edge_embs), axis=0)
    train_sim_matrix= np.concatenate((pos_train_sim_matrix, neg_train_sim_matrix), axis=0)
    train_embs_1 = np.concatenate((pos_embs_1_train, neg_embs_1_train), axis=0)
    train_embs_2 = np.concatenate((pos_embs_2_train, neg_embs_2_train), axis=0)

    # Create train-set edge labels: 1 = real edge, 0 = false edge
    train_edge_labels = np.concatenate((np.ones(len(train_edges_pos)), np.zeros(len(train_edges_neg))), axis=0)


    # Test-set edge embeddings, labels
    pos_test_edge_embs,pos_test_sim_matrix,pos_embs_1_test,pos_embs_2_test = get_edge_embeddings(test_edges)
    neg_test_edge_embs ,neg_test_sim_matrix,neg_embs_1_test,neg_embs_2_test= get_edge_embeddings(other_edge)

    test_edge_embs = np.concatenate((pos_test_edge_embs, neg_test_edge_embs), axis=0)
    test_sim_matrix = np.concatenate((pos_test_sim_matrix, neg_test_sim_matrix), axis=0)
    test_embs_1 = np.concatenate((pos_embs_1_test, neg_embs_1_test), axis=0)
    test_embs_2 = np.concatenate((pos_embs_2_test, neg_embs_2_test), axis=0)

    # Create val-set edge labels: 1 = real edge, 0 = false edge
    test_edge_labels = np.concatenate((np.ones(len(test_edges)), np.zeros(len(other_edge))), axis=0)

    # Train logistic regression classifier on train-set edge embeddings
    edge_classifier = LogisticRegression(random_state=0)
    edge_classifier.fit(train_edge_embs, train_edge_labels)

    # Predicted edge scores: probability of being of class "1" (real edge)

    test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1]
    # print(test_preds)
    #print(np.shape(test_preds))

    runtime = time.time() - start_time

    # Calculate scores

    n2v_test_roc = roc_auc_score(test_edge_labels, test_preds)
    # n2v_test_roc_curve = roc_curve(test_edge_labels, test_preds)
    n2v_test_ap = average_precision_score(test_edge_labels, test_preds)


    # Record scores
    n2v_scores = {}

    n2v_scores['test_roc'] = n2v_test_roc
    # n2v_scores['test_roc_curve'] = n2v_test_roc_curve
    n2v_scores['test_ap'] = n2v_test_ap

    n2v_scores['runtime'] = runtime

    return n2v_scores, train_edge_labels,test_edge_labels, test_preds,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2


def linkpre_scores5(emb_matrix, train_edges,test_edges):

    start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2

    def get_edge_embeddings(edge_list):
        embs = []
        sim_matrix=[]
        embs_1=[]
        embs_2 = []
        for edge in edge_list:
            node1 = edge[0]
            node2 = edge[1]
            emb1 = emb_matrix[node1]
            #print(np.shape(emb1))
            emb2 = emb_matrix[node2]
            edge_emb = np.multiply(emb1, emb2)
            sim = np.dot(emb1, emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
            #edge_emb = np.array(emb1) + np.array(emb2)
            #print(np.shape(edge_emb))
            embs.append(edge_emb)
            embs_1.append(emb1)
            embs_2.append(emb2)
            sim_matrix.append(sim)
        embs = np.array(embs)
        sim_matrix = np.array(sim_matrix)
        embs_1=np.array(embs_1)
        embs_2 =np.array(embs_2)

        # #with open('/Users/xiulingwang/Downloads/line-master/data/embds/' + str(ego_user) + flag + '-' + Flag, 'w') as f:
        # with open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '/embeds/' + Flag + '-' + str(ego_user) + flag,'w') as f:
        # #with open('/Users/xiulingwang/Downloads/'+DATASET+'/line/3-split/' + str(ego_user)+ '-' + flag + '-' + Flag+'-' +'embds','w') as f:
        #     f.write('%d %d\n' % (edge_list.shape[0], args.representation_size))
        #     for i in range(edge_list.shape[0]):
        #         e = ' '.join(map(lambda x: str(x), embs[i]))
        #         f.write('%s %s %s\n' % (str(edge_list[i][0]), str(edge_list[i][1]), e))

        return embs,sim_matrix,embs_1,embs_2

    # Train-set edge embeddings
    # pos_train_edge_embs ,pos_train_sim_matrix,pos_embs_1_train,pos_embs_2_train= get_edge_embeddings(train_edges_pos)
    # neg_train_edge_embs,neg_train_sim_matrix,neg_embs_1_train,neg_embs_2_train = get_edge_embeddings(train_edges_neg)
    # train_edge_embs = np.concatenate((pos_train_edge_embs, neg_train_edge_embs), axis=0)
    # train_sim_matrix= np.concatenate((pos_train_sim_matrix, neg_train_sim_matrix), axis=0)
    # train_embs_1 = np.concatenate((pos_embs_1_train, neg_embs_1_train), axis=0)
    # train_embs_2 = np.concatenate((pos_embs_2_train, neg_embs_2_train), axis=0)

    train_edge_embs, train_sim_matrix, embs_1_train, embs_2_train = get_edge_embeddings(train_edges)

    # Create train-set edge labels: 1 = real edge, 0 = false edge
    train_edge_labels = np.ones(len(train_edges))


    # Test-set edge embeddings, labels
    test_edge_embs,test_sim_matrix,embs_1_test,embs_2_test = get_edge_embeddings(test_edges)


    # Create val-set edge labels: 1 = real edge, 0 = false edge
    test_edge_labels = np.ones(len(test_edges))

    # Train logistic regression classifier on train-set edge embeddings
    edge_classifier = LogisticRegression(random_state=0)
    edge_classifier.fit(train_edge_embs, train_edge_labels)

    # Predicted edge scores: probability of being of class "1" (real edge)

    test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1]
    # print(test_preds)
    #print(np.shape(test_preds))

    runtime = time.time() - start_time

    # Calculate scores

    n2v_test_roc = roc_auc_score(test_edge_labels, test_preds)
    # n2v_test_roc_curve = roc_curve(test_edge_labels, test_preds)
    n2v_test_ap = average_precision_score(test_edge_labels, test_preds)


    # Record scores
    n2v_scores = {}

    n2v_scores['test_roc'] = n2v_test_roc
    # n2v_scores['test_roc_curve'] = n2v_test_roc_curve
    n2v_scores['test_ap'] = n2v_test_ap

    n2v_scores['runtime'] = runtime

    print(n2v_scores)

    return n2v_scores, train_edge_labels,test_edge_labels, test_preds,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs


def deepwalk6(g_train, train_test_split,DATASET,METHOD,res_dir, ego_user, F,dp):
  parser = ArgumentParser("deepwalk",
                          formatter_class=ArgumentDefaultsHelpFormatter,
                          conflict_handler='resolve')

  parser.add_argument("--debug", dest="debug", action='store_true', default=False,
                      help="drop a debugger if an exception is raised.")

  parser.add_argument('--format', default='edgelist',
                      help='File format of input file')

  parser.add_argument('--input', nargs='?',
                      help='Input graph file')

  parser.add_argument("-l", "--log", dest="log", default="INFO",
                      help="log verbosity level")

  parser.add_argument('--matfile-variable-name', default='network',
                      help='variable name of adjacency matrix inside a .mat file.')

  parser.add_argument('--max-memory-data-size', default=1000000000, type=int,
                      help='Size to start dumping walks to disk, instead of keeping them in memory.')

  parser.add_argument('--number-walks', default=10, type=int,
                      help='Number of random walks to start at each node')

  parser.add_argument('--output',
                      help='Output representation file')

  parser.add_argument('--representation-size', default=128, type=int,
                      help='Number of latent dimensions to learn for each node.')

  parser.add_argument('--seed', default=0, type=int,
                      help='Seed for random walk generator.')

  parser.add_argument('--undirected', default=True, type=bool,
                      help='Treat graph as undirected.')

  parser.add_argument('--vertex-freq-degree', default=False, action='store_true',
                      help='Use vertex degree to estimate the frequency of nodes '
                           'in the random walks. This option is faster than '
                           'calculating the vocabulary.')

  parser.add_argument('--walk-length', default=80, type=int,
                      help='Length of the random walk started at each node')

  parser.add_argument('--window-size', default=10, type=int,
                      help='Window size of skipgram model.')

  parser.add_argument('--workers', default=10, type=int,
                      help='Number of parallel processes.')
  parser.add_argument('--edge_score_mode', default='edge-emb')


  args = parser.parse_args()
  args.input='%s%s-fair-%s-train.txt' % (res_dir, str(ego_user), F)
  args.output = res_dir+METHOD+'-embeds-'+F+'-'+str(ego_user)
  numeric_level = getattr(logging, args.log.upper(), None)
  logging.basicConfig(format=LOGFORMAT)
  logger.setLevel(numeric_level)

  if args.debug:
   sys.excepthook = debug

  adj_train_orig, train_edges, train_edges_false, val_edges, val_edges_false, \
  test_edges, test_edges_false = train_test_split
  g_train=nx.adjacency_matrix(g_train)
  emb_matrix=process3(args,g_train,DATASET,METHOD,F,ego_user,res_dir,dp)

  train_edge_labels, test_edge_labels, train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2,train_edges_sampled=linkpre_scores6(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, F)

  return  train_edge_labels,test_edge_labels, emb_matrix,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2,train_edges_sampled



def linkpre_scores6(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, Flag):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = train_test_split
    start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2
    if args.edge_score_mode == "edge-emb":

        def get_edge_embeddings(edge_list,ego_user,DATASET, Flag, flag):
            embs = []
            sim_matrix=[]
            embs_1=[]
            embs_2 = []
            for edge in edge_list:
                node1 = edge[0]
                node2 = edge[1]
                emb1 = emb_matrix[node1]
                #print(np.shape(emb1))
                emb2 = emb_matrix[node2]
                edge_emb = np.multiply(emb1, emb2)
                # sim = np.dot(emb1, emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
                sim = np.dot(emb1, emb2)
                #edge_emb = np.array(emb1) + np.array(emb2)
                #print(np.shape(edge_emb))
                embs.append(edge_emb)
                embs_1.append(emb1)
                embs_2.append(emb2)
                sim_matrix.append(sim)
            embs = np.array(embs)
            sim_matrix = np.array(sim_matrix)
            embs_1=np.array(embs_1)
            embs_2 =np.array(embs_2)

            # #with open('/Users/xiulingwang/Downloads/line-master/data/embds/' + str(ego_user) + flag + '-' + Flag, 'w') as f:
            # with open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '/embeds/' + Flag + '-' + str(ego_user) + flag,'w') as f:
            # #with open('/Users/xiulingwang/Downloads/'+DATASET+'/line/3-split/' + str(ego_user)+ '-' + flag + '-' + Flag+'-' +'embds','w') as f:
            #     f.write('%d %d\n' % (edge_list.shape[0], args.representation_size))
            #     for i in range(edge_list.shape[0]):
            #         e = ' '.join(map(lambda x: str(x), embs[i]))
            #         f.write('%s %s %s\n' % (str(edge_list[i][0]), str(edge_list[i][1]), e))

            return embs,sim_matrix,embs_1,embs_2

        edgeall = list([list(edge_tuple) for edge_tuple in train_edges])

        # train_edges_sampled = random.sample(edgeall, np.shape(test_edges)[0])
        train_edges_sampled = random.sample(edgeall, np.shape(train_edges)[0])


        # Train-set edge embeddings
        pos_train_edge_embs ,pos_train_sim_matrix,pos_embs_1_train,pos_embs_2_train= get_edge_embeddings(train_edges_sampled,ego_user,DATASET, Flag, flag='pos-train')
        neg_train_edge_embs,neg_train_sim_matrix,neg_embs_1_train,neg_embs_2_train = get_edge_embeddings(train_edges_false, ego_user,DATASET,Flag, flag='neg-train')
        train_edge_embs = pos_train_edge_embs
        train_sim_matrix= pos_train_sim_matrix
        train_embs_1 = pos_embs_1_train
        train_embs_2 = pos_embs_2_train

        # Create train-set edge labels: 1 = real edge, 0 = false edge
        train_edge_labels = np.ones(len(train_edges_sampled))

        # Val-set edge embeddings, labels
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            pos_val_edge_embs,pos_val_sim_matrix,pos_embs_1_val,pos_embs_2_val = get_edge_embeddings(val_edges,ego_user,DATASET,Flag, flag='pos-val')
            neg_val_edge_embs,neg_val_sim_matrix,neg_embs_1_val,neg_embs_2_val = get_edge_embeddings(val_edges_false,ego_user,DATASET,Flag, flag='neg-val')
            val_edge_embs = np.concatenate([pos_val_edge_embs, neg_val_edge_embs])
            val_edge_labels = np.concatenate((np.ones(len(val_edges)), np.zeros(len(val_edges_false))), axis=0)
            val_sim_matrix = np.concatenate((pos_val_sim_matrix, neg_val_sim_matrix), axis=0)
            val_embs_1 = np.concatenate((pos_embs_1_val, neg_embs_1_val), axis=0)
            val_embs_2 = np.concatenate((pos_embs_2_val, neg_embs_2_val), axis=0)

        # Test-set edge embeddings, labels
        pos_test_edge_embs,pos_test_sim_matrix,pos_embs_1_test,pos_embs_2_test = get_edge_embeddings(test_edges,ego_user,DATASET,Flag, flag='pos-test')
        neg_test_edge_embs ,neg_test_sim_matrix,neg_embs_1_test,neg_embs_2_test= get_edge_embeddings(test_edges_false,ego_user,DATASET,Flag, flag='neg-test')
        test_edge_embs = pos_test_edge_embs
        test_sim_matrix = pos_test_sim_matrix
        test_embs_1 = pos_embs_1_test
        test_embs_2 = pos_embs_2_test

        # Create val-set edge labels: 1 = real edge, 0 = false edge
        test_edge_labels = np.ones(len(test_edges))


    #     # Train logistic regression classifier on train-set edge embeddings
    #     edge_classifier = LogisticRegression(random_state=0)
    #     edge_classifier.fit(train_edge_embs, train_edge_labels)
    #
    #     # Predicted edge scores: probability of being of class "1" (real edge)
    #     if len(val_edges) > 0 and len(val_edges_false) > 0:
    #         val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1]
    #     test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1]
    #     # print(test_preds)
    #     #print(np.shape(test_preds))
    #
    #     runtime = time.time() - start_time
    #
    #     # Calculate scores
    #     if len(val_edges) > 0 and len(val_edges_false) > 0:
    #         n2v_val_roc = roc_auc_score(val_edge_labels, val_preds)
    #         # n2v_val_roc_curve = roc_curve(val_edge_labels, val_preds)
    #         n2v_val_ap = average_precision_score(val_edge_labels, val_preds)
    #     else:
    #         n2v_val_roc = None
    #         n2v_val_roc_curve = None
    #         n2v_val_ap = None
    #
    #     n2v_test_roc = roc_auc_score(test_edge_labels, test_preds)
    #     # n2v_test_roc_curve = roc_curve(test_edge_labels, test_preds)
    #     n2v_test_ap = average_precision_score(test_edge_labels, test_preds)
    #
    #
    # # Generate edge scores using simple dot product of node embeddings (like in GAE paper)
    # elif args.edge_score_mode == "dot-product":
    #     score_matrix = np.dot(emb_matrix, emb_matrix.T)
    #     runtime = time.time() - start_time
    #
    #     # Val set scores
    #     if len(val_edges) > 0:
    #         n2v_val_roc, n2v_val_ap = get_roc_score(val_edges, val_edges_false, score_matrix, apply_sigmoid=True)
    #     else:
    #         n2v_val_roc = None
    #         n2v_val_roc_curve = None
    #         n2v_val_ap = None
    #
    #     # Test set scores
    #     n2v_test_roc, n2v_test_ap = get_roc_score(test_edges, test_edges_false, score_matrix, apply_sigmoid=True)
    #
    # else:
    #     print
    #     "Invalid edge_score_mode! Either use edge-emb or dot-product."
    #
    # # Record scores
    # n2v_scores = {}
    #
    # n2v_scores['test_roc'] = n2v_test_roc
    # # n2v_scores['test_roc_curve'] = n2v_test_roc_curve
    # n2v_scores['test_ap'] = n2v_test_ap
    #
    # n2v_scores['val_roc'] = n2v_val_roc
    # # n2v_scores['val_roc_curve'] = n2v_val_roc_curve
    # n2v_scores['val_ap'] = n2v_val_ap
    #
    # n2v_scores['runtime'] = runtime

    return  train_edge_labels,test_edge_labels, train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2,train_edges_sampled

def deepwalk7(g_train, train_test_split,DATASET,METHOD,res_dir, ego_user, F):
  parser = ArgumentParser("deepwalk",
                          formatter_class=ArgumentDefaultsHelpFormatter,
                          conflict_handler='resolve')

  parser.add_argument("--debug", dest="debug", action='store_true', default=False,
                      help="drop a debugger if an exception is raised.")

  parser.add_argument('--format', default='edgelist',
                      help='File format of input file')

  parser.add_argument('--input', nargs='?',
                      help='Input graph file')

  parser.add_argument("-l", "--log", dest="log", default="INFO",
                      help="log verbosity level")

  parser.add_argument('--matfile-variable-name', default='network',
                      help='variable name of adjacency matrix inside a .mat file.')

  parser.add_argument('--max-memory-data-size', default=1000000000, type=int,
                      help='Size to start dumping walks to disk, instead of keeping them in memory.')

  parser.add_argument('--number-walks', default=10, type=int,
                      help='Number of random walks to start at each node')

  parser.add_argument('--output',
                      help='Output representation file')

  parser.add_argument('--representation-size', default=128, type=int,
                      help='Number of latent dimensions to learn for each node.')

  parser.add_argument('--seed', default=0, type=int,
                      help='Seed for random walk generator.')

  parser.add_argument('--undirected', default=True, type=bool,
                      help='Treat graph as undirected.')

  parser.add_argument('--vertex-freq-degree', default=False, action='store_true',
                      help='Use vertex degree to estimate the frequency of nodes '
                           'in the random walks. This option is faster than '
                           'calculating the vocabulary.')

  parser.add_argument('--walk-length', default=80, type=int,
                      help='Length of the random walk started at each node')

  parser.add_argument('--window-size', default=10, type=int,
                      help='Window size of skipgram model.')

  parser.add_argument('--workers', default=10, type=int,
                      help='Number of parallel processes.')
  parser.add_argument('--edge_score_mode', default='edge-emb')


  args = parser.parse_args()
  args.input='%s%s-fair-%s-train.txt' % (res_dir, str(ego_user), F)
  args.output = res_dir+METHOD+'-embeds-'+F+'-'+str(ego_user)
  numeric_level = getattr(logging, args.log.upper(), None)
  logging.basicConfig(format=LOGFORMAT)
  logger.setLevel(numeric_level)

  if args.debug:
   sys.excepthook = debug

  adj_train_orig, train_edges, train_edges_false, val_edges, val_edges_false, \
  test_edges, test_edges_false = train_test_split
  g_train=nx.adjacency_matrix(g_train)
  emb_matrix=process(args,g_train,DATASET,METHOD,F,ego_user)

  n2v_scores,train_edge_labels, test_edge_labels, test_preds,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2,train_edges_sampled=linkpre_scores7(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, F)

  return n2v_scores, train_edge_labels,test_edge_labels, test_preds,emb_matrix,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2,train_edges_sampled


def linkpre_scores7(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, Flag):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = train_test_split
    start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2
    if args.edge_score_mode == "edge-emb":

        def get_edge_embeddings(edge_list,ego_user,DATASET, Flag, flag):
            embs = []
            sim_matrix=[]
            embs_1=[]
            embs_2 = []
            for edge in edge_list:
                node1 = edge[0]
                node2 = edge[1]
                emb1 = emb_matrix[node1]
                #print(np.shape(emb1))
                emb2 = emb_matrix[node2]
                edge_emb = np.multiply(emb1, emb2)
                sim = np.dot(emb1, emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
                #edge_emb = np.array(emb1) + np.array(emb2)
                print(np.shape(edge_emb))
                embs.append(edge_emb)
                embs_1.append(emb1)
                embs_2.append(emb2)
                sim_matrix.append(sim)
            embs = np.array(embs)
            sim_matrix = np.array(sim_matrix)
            embs_1=np.array(embs_1)
            embs_2 =np.array(embs_2)

            # #with open('/Users/xiulingwang/Downloads/line-master/data/embds/' + str(ego_user) + flag + '-' + Flag, 'w') as f:
            # with open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '/embeds/' + Flag + '-' + str(ego_user) + flag,'w') as f:
            # #with open('/Users/xiulingwang/Downloads/'+DATASET+'/line/3-split/' + str(ego_user)+ '-' + flag + '-' + Flag+'-' +'embds','w') as f:
            #     f.write('%d %d\n' % (edge_list.shape[0], args.representation_size))
            #     for i in range(edge_list.shape[0]):
            #         e = ' '.join(map(lambda x: str(x), embs[i]))
            #         f.write('%s %s %s\n' % (str(edge_list[i][0]), str(edge_list[i][1]), e))

            return embs,sim_matrix,embs_1,embs_2

        edgeall = list([list(edge_tuple) for edge_tuple in train_edges])

        train_edges = random.sample(edgeall, np.shape(test_edges)[0])
        print(np.shape(train_edges))
        print(np.shape(test_edges))
        # exit()


        # Train-set edge embeddings
        pos_train_edge_embs ,pos_train_sim_matrix,pos_embs_1_train,pos_embs_2_train= get_edge_embeddings(train_edges,ego_user,DATASET, Flag, flag='pos-train')
        neg_train_edge_embs,neg_train_sim_matrix,neg_embs_1_train,neg_embs_2_train = get_edge_embeddings(train_edges_false, ego_user,DATASET,Flag, flag='neg-train')
        train_edge_embs = np.concatenate((pos_train_edge_embs, neg_train_edge_embs), axis=0)
        train_sim_matrix= np.concatenate((pos_train_sim_matrix, neg_train_sim_matrix), axis=0)
        train_embs_1 = np.concatenate((pos_embs_1_train, neg_embs_1_train), axis=0)
        train_embs_2 = np.concatenate((pos_embs_2_train, neg_embs_2_train), axis=0)

        # Create train-set edge labels: 1 = real edge, 0 = false edge
        train_edge_labels = np.concatenate((np.ones(len(train_edges)), np.zeros(len(train_edges_false))), axis=0)

        # Val-set edge embeddings, labels
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            pos_val_edge_embs,pos_val_sim_matrix,pos_embs_1_val,pos_embs_2_val = get_edge_embeddings(val_edges,ego_user,DATASET,Flag, flag='pos-val')
            neg_val_edge_embs,neg_val_sim_matrix,neg_embs_1_val,neg_embs_2_val = get_edge_embeddings(val_edges_false,ego_user,DATASET,Flag, flag='neg-val')
            val_edge_embs = np.concatenate([pos_val_edge_embs, neg_val_edge_embs])
            val_edge_labels = np.concatenate((np.ones(len(val_edges)), np.zeros(len(val_edges_false))), axis=0)
            val_sim_matrix = np.concatenate((pos_val_sim_matrix, neg_val_sim_matrix), axis=0)
            val_embs_1 = np.concatenate((pos_embs_1_val, neg_embs_1_val), axis=0)
            val_embs_2 = np.concatenate((pos_embs_2_val, neg_embs_2_val), axis=0)

        # Test-set edge embeddings, labels
        pos_test_edge_embs,pos_test_sim_matrix,pos_embs_1_test,pos_embs_2_test = get_edge_embeddings(test_edges,ego_user,DATASET,Flag, flag='pos-test')
        neg_test_edge_embs ,neg_test_sim_matrix,neg_embs_1_test,neg_embs_2_test= get_edge_embeddings(test_edges_false,ego_user,DATASET,Flag, flag='neg-test')
        test_edge_embs = np.concatenate((pos_test_edge_embs, neg_test_edge_embs), axis=0)
        test_sim_matrix = np.concatenate((pos_test_sim_matrix, neg_test_sim_matrix), axis=0)
        test_embs_1 = np.concatenate((pos_embs_1_test, neg_embs_1_test), axis=0)
        test_embs_2 = np.concatenate((pos_embs_2_test, neg_embs_2_test), axis=0)

        # Create val-set edge labels: 1 = real edge, 0 = false edge
        test_edge_labels = np.concatenate((np.ones(len(test_edges)), np.zeros(len(test_edges_false))), axis=0)

        # Train logistic regression classifier on train-set edge embeddings
        edge_classifier = LogisticRegression(random_state=0)
        edge_classifier.fit(train_edge_embs, train_edge_labels)

        # Predicted edge scores: probability of being of class "1" (real edge)
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1]
        test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1]
        print(test_preds)
        print(np.shape(test_preds))

        runtime = time.time() - start_time

        # Calculate scores
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            n2v_val_roc = roc_auc_score(val_edge_labels, val_preds)
            # n2v_val_roc_curve = roc_curve(val_edge_labels, val_preds)
            n2v_val_ap = average_precision_score(val_edge_labels, val_preds)
        else:
            n2v_val_roc = None
            n2v_val_roc_curve = None
            n2v_val_ap = None

        n2v_test_roc = roc_auc_score(test_edge_labels, test_preds)
        # n2v_test_roc_curve = roc_curve(test_edge_labels, test_preds)
        n2v_test_ap = average_precision_score(test_edge_labels, test_preds)


    # Generate edge scores using simple dot product of node embeddings (like in GAE paper)
    elif args.edge_score_mode == "dot-product":
        score_matrix = np.dot(emb_matrix, emb_matrix.T)
        runtime = time.time() - start_time

        # Val set scores
        if len(val_edges) > 0:
            n2v_val_roc, n2v_val_ap = get_roc_score(val_edges, val_edges_false, score_matrix, apply_sigmoid=True)
        else:
            n2v_val_roc = None
            n2v_val_roc_curve = None
            n2v_val_ap = None

        # Test set scores
        n2v_test_roc, n2v_test_ap = get_roc_score(test_edges, test_edges_false, score_matrix, apply_sigmoid=True)

    else:
        print
        "Invalid edge_score_mode! Either use edge-emb or dot-product."

    # Record scores
    n2v_scores = {}

    n2v_scores['test_roc'] = n2v_test_roc
    # n2v_scores['test_roc_curve'] = n2v_test_roc_curve
    n2v_scores['test_ap'] = n2v_test_ap

    n2v_scores['val_roc'] = n2v_val_roc
    # n2v_scores['val_roc_curve'] = n2v_val_roc_curve
    n2v_scores['val_ap'] = n2v_val_ap

    n2v_scores['runtime'] = runtime

    return n2v_scores, train_edge_labels,test_edge_labels, test_preds,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2,train_edges


def process_pytorch(args,adj_train,DATASET,METHOD,F,ego_user,res_dir,dp,sigma):

  global begin

  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)

  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  print("Walking...")

  if dp == 6:
    walks = graph.build_defense_walks(sigma,adj_train,G, num_paths=args.number_walks,
                                          path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))
  else:
    walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks,
                                      path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))

  write_walks2(args, walks, DATASET, METHOD, F, ego_user,res_dir)

  print("Training...")

  # print(walks)

  if dp==1:
    model = word2vec.ModWord2Vec_dp(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1, hs=1, workers=args.workers,compute_loss=True,budget=sigma)
  elif dp==0 or dp==6:
    model = word2vec.ModWordVec(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1,
                                      hs=1, workers=args.workers,compute_loss=True)



  print('%%%%%%%%%%%%%')
  # args.output='E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user)
  emb_matrix=model.save_emb(args.output, len(G.nodes()))

  #
  # # Store embeddings mapping
  # emb_mappings = model.wv
  #
  # # Create node embeddings matrix (rows = nodes, columns = embedding features)
  # emb_list = []
  # for node_index in range(0, adj_train.shape[0]):
  #     node_str = str(node_index)
  #     node_emb = emb_mappings[node_str]
  #     emb_list.append(node_emb)
  # emb_matrix = np.vstack(emb_list)

  with open('E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user), 'w') as f:
      f.write('%d %d\n' % (adj_train.shape[0], args.representation_size))
      for i in range(adj_train.shape[0]):
          e = ' '.join(map(lambda x: str(x), emb_matrix[i]))
          f.write('%s %s\n' % (str(i), e))

  # exit()
  return emb_matrix

def process_pytorch1(args,adj_train,DATASET,METHOD,F,ego_user,res_dir,dp,sigma,train_edges,test_edges):

  global begin

  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)

  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  print("Walking...")
  walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks,
                                      path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))

  write_walks2(args, walks, DATASET, METHOD, F, ego_user,res_dir)

  print("Training...")

  print(walks)

  model = word2vec.ModWord2Vec_defense(train_edges,test_edges,len(G.nodes()),args.output, walks,size=args.representation_size, window=args.window_size, min_count=0, sg=1,
                                   hs=1, workers=args.workers,compute_loss=True)
  print('%%%%%%%%%%%%%')
  # args.output='E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user)
  emb_matrix=model.save_emb(args.output, len(G.nodes()))

  #
  # # Store embeddings mapping
  # emb_mappings = model.wv
  #
  # # Create node embeddings matrix (rows = nodes, columns = embedding features)
  # emb_list = []
  # for node_index in range(0, adj_train.shape[0]):
  #     node_str = str(node_index)
  #     node_emb = emb_mappings[node_str]
  #     emb_list.append(node_emb)
  # emb_matrix = np.vstack(emb_list)

  with open('E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user), 'w') as f:
      f.write('%d %d\n' % (adj_train.shape[0], args.representation_size))
      for i in range(adj_train.shape[0]):
          e = ' '.join(map(lambda x: str(x), emb_matrix[i]))
          f.write('%s %s\n' % (str(i), e))

  # exit()
  return emb_matrix


def process_pytorch2(args,adj_train,DATASET,METHOD,F,ego_user,res_dir,dp,sigma,train_edges,test_edges):

  global begin

  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)

  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  print("Walking...")
  walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks,
                                      path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))

  write_walks2(args, walks, DATASET, METHOD, F, ego_user,res_dir)

  print("Training...")

  print(walks)

  model = word2vec.ModWord2Vec_defense2(train_edges,test_edges,len(G.nodes()),args.output, walks,size=args.representation_size, window=args.window_size, min_count=0, sg=1,
                                   hs=1, workers=args.workers,compute_loss=True)
  print('%%%%%%%%%%%%%')
  # args.output='E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user)
  emb_matrix=model.save_emb(args.output, len(G.nodes()))

  #
  # # Store embeddings mapping
  # emb_mappings = model.wv
  #
  # # Create node embeddings matrix (rows = nodes, columns = embedding features)
  # emb_list = []
  # for node_index in range(0, adj_train.shape[0]):
  #     node_str = str(node_index)
  #     node_emb = emb_mappings[node_str]
  #     emb_list.append(node_emb)
  # emb_matrix = np.vstack(emb_list)

  with open('E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user), 'w') as f:
      f.write('%d %d\n' % (adj_train.shape[0], args.representation_size))
      for i in range(adj_train.shape[0]):
          e = ' '.join(map(lambda x: str(x), emb_matrix[i]))
          f.write('%s %s\n' % (str(i), e))

  # exit()
  return emb_matrix


def process_pytorch3(args,adj_train,DATASET,METHOD,F,ego_user,res_dir,dp,sigma,train_edges,test_edges):

  global begin

  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)

  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  print("Walking...")
  walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks,
                                      path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))

  write_walks2(args, walks, DATASET, METHOD, F, ego_user,res_dir)

  print("Training...")

  print(walks)

  model = word2vec.ModWord2Vec_defense3(F, res_dir,train_edges,test_edges,len(G.nodes()),args.output, walks,size=args.representation_size, window=args.window_size, min_count=0, sg=1,
                                   hs=1, workers=args.workers,compute_loss=True)
  print('%%%%%%%%%%%%%')
  # args.output='E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user)
  emb_matrix=model.save_emb(args.output, len(G.nodes()))

  #
  # # Store embeddings mapping
  # emb_mappings = model.wv
  #
  # # Create node embeddings matrix (rows = nodes, columns = embedding features)
  # emb_list = []
  # for node_index in range(0, adj_train.shape[0]):
  #     node_str = str(node_index)
  #     node_emb = emb_mappings[node_str]
  #     emb_list.append(node_emb)
  # emb_matrix = np.vstack(emb_list)

  # exit()
  return emb_matrix

def process_pytorch5(args,adj_train,DATASET,METHOD,F,ego_user,res_dir,dp,sigma):

  global begin

  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)

  # print(np.shape(adj_train)[0])

  # for i in range(np.shape(adj_train)[0]):
  #     if i not in G.keys():
  #         G[i].append(i)



  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  print("Walking...")
  walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks,
                                      path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))

  write_walks2(args, walks, DATASET, METHOD, F, ego_user,res_dir)

  print("Training...")

  print(walks)

  if dp==1:
    model = word2vec.ModWord2Vec_dp(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1, hs=1, workers=args.workers,compute_loss=True,budget=sigma)
  elif dp==0 or dp==5:
    model = word2vec.ModWord2Vec(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1,
                                      hs=1, workers=args.workers,compute_loss=True)

  print('%%%%%%%%%%%%%')
  # args.output='E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user)
  emb_matrix=model.save_emb(args.output, len(G.nodes()))

  #
  # # Store embeddings mapping
  # emb_mappings = model.wv
  #
  # # Create node embeddings matrix (rows = nodes, columns = embedding features)
  # emb_list = []
  # for node_index in range(0, adj_train.shape[0]):
  #     node_str = str(node_index)
  #     node_emb = emb_mappings[node_str]
  #     emb_list.append(node_emb)
  # emb_matrix = np.vstack(emb_list)

  with open('E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user), 'w') as f:
      f.write('%d %d\n' % (adj_train.shape[0], args.representation_size))
      for i in range(adj_train.shape[0]):
          e = ' '.join(map(lambda x: str(x), emb_matrix[i]))
          f.write('%s %s\n' % (str(i), e))

  # exit()
  return emb_matrix

def process_pytorch9(args,adj_train,DATASET,METHOD,F,ego_user,res_dir,dp,sigma):

  global begin

  if args.format == "adjlist":
    G = graph.load_adjacencylist(args.input, undirected=args.undirected)
  elif args.format == "edgelist":
    G = graph.load_edgelist(args.input, undirected=args.undirected)
  elif args.format == "mat":
    G = graph.load_matfile(args.input, variable_name=args.matfile_variable_name, undirected=args.undirected)
  else:
    raise Exception("Unknown file format: '%s'.  Valid formats: 'adjlist', 'edgelist', 'mat'" % args.format)

  # print(np.shape(adj_train)[0])

  # for i in range(np.shape(adj_train)[0]):
  #     if i not in G.keys():
  #         G[i].append(i)



  print("Number of nodes: {}".format(len(G.nodes())))

  num_walks = len(G.nodes()) * args.number_walks

  print("Number of walks: {}".format(num_walks))

  data_size = num_walks * args.walk_length

  print("Data size (walks*length): {}".format(data_size))

  print("Walking...")
  walks = graph.build_deepwalk_corpus(G, num_paths=args.number_walks,
                                      path_length=args.walk_length, alpha=0, rand=random.Random(args.seed))

  write_walks2(args, walks, DATASET, METHOD, F, ego_user,res_dir)

  print("Training...")

  print(walks)

  if dp==1:
    model = word2vec.ModWord2Vec_dp(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1, hs=1, workers=args.workers,compute_loss=True,budget=sigma)
  elif dp==0 or dp==5:
    model = word2vec.ModWord2Vec(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1,
                                      hs=1, workers=args.workers,compute_loss=True)

  elif dp==9:
    model = word2vec.ModWord2Vec_9(walks, size=args.representation_size, window=args.window_size, min_count=0, sg=1,
                                   hs=1, workers=args.workers, compute_loss=True)

  print('%%%%%%%%%%%%%')
  # args.output='E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user)
  emb_matrix=model.save_emb(args.output, len(G.nodes()))

  #
  # # Store embeddings mapping
  # emb_mappings = model.wv
  #
  # # Create node embeddings matrix (rows = nodes, columns = embedding features)
  # emb_list = []
  # for node_index in range(0, adj_train.shape[0]):
  #     node_str = str(node_index)
  #     node_emb = emb_mappings[node_str]
  #     emb_list.append(node_emb)
  # emb_matrix = np.vstack(emb_list)

  with open('E:\\python\\banlance\\code\\' + DATASET + '\\' + METHOD + '-embeds-' + F + '-' + str(ego_user), 'w') as f:
      f.write('%d %d\n' % (adj_train.shape[0], args.representation_size))
      for i in range(adj_train.shape[0]):
          e = ' '.join(map(lambda x: str(x), emb_matrix[i]))
          f.write('%s %s\n' % (str(i), e))

  # exit()
  return emb_matrix



def deepwalk8(g_train, train_test_split,DATASET,METHOD,res_dir, ego_user, F,dp,sigma):
  parser = ArgumentParser("deepwalk",
                          formatter_class=ArgumentDefaultsHelpFormatter,
                          conflict_handler='resolve')

  parser.add_argument("--debug", dest="debug", action='store_true', default=False,
                      help="drop a debugger if an exception is raised.")

  parser.add_argument('--format', default='edgelist',
                      help='File format of input file')

  parser.add_argument('--input', nargs='?',
                      help='Input graph file')

  parser.add_argument("-l", "--log", dest="log", default="INFO",
                      help="log verbosity level")

  parser.add_argument('--matfile-variable-name', default='network',
                      help='variable name of adjacency matrix inside a .mat file.')

  parser.add_argument('--max-memory-data-size', default=1000000000, type=int,
                      help='Size to start dumping walks to disk, instead of keeping them in memory.')

  parser.add_argument('--number-walks', default=10, type=int,
                      help='Number of random walks to start at each node')

  parser.add_argument('--output',
                      help='Output representation file')

  parser.add_argument('--representation-size', default=128, type=int,
                      help='Number of latent dimensions to learn for each node.')

  parser.add_argument('--seed', default=0, type=int,
                      help='Seed for random walk generator.')

  parser.add_argument('--undirected', default=True, type=bool,
                      help='Treat graph as undirected.')

  parser.add_argument('--vertex-freq-degree', default=False, action='store_true',
                      help='Use vertex degree to estimate the frequency of nodes '
                           'in the random walks. This option is faster than '
                           'calculating the vocabulary.')

  parser.add_argument('--walk-length', default=80, type=int,
                      help='Length of the random walk started at each node')

  parser.add_argument('--window-size', default=10, type=int,
                      help='Window size of skipgram model.')

  parser.add_argument('--workers', default=10, type=int,
                      help='Number of parallel processes.')
  parser.add_argument('--edge_score_mode', default='edge-emb')


  args = parser.parse_args()
  args.input='%s%s-fair-%s-train.txt' % (res_dir, str(ego_user), F)
  args.output = res_dir+METHOD+'-embeds-'+F+'-'+str(ego_user)
  numeric_level = getattr(logging, args.log.upper(), None)
  logging.basicConfig(format=LOGFORMAT)
  logger.setLevel(numeric_level)

  if args.debug:
   sys.excepthook = debug

  adj_train_orig, train_edges, train_edges_false, val_edges, val_edges_false, \
  test_edges, test_edges_false = train_test_split
  g_train=nx.adjacency_matrix(g_train)
  if dp==0 or dp==1 or dp==6:
    emb_matrix=process_pytorch(args,g_train,DATASET,METHOD,F,ego_user,res_dir,dp,sigma)

  if dp == 2:
    emb_matrix = process_pytorch1(args, g_train, DATASET, METHOD, F, ego_user, res_dir, dp, sigma,train_edges,test_edges)

  if dp == 3:
    emb_matrix = process_pytorch2(args, g_train, DATASET, METHOD, F, ego_user, res_dir, dp, sigma,train_edges,test_edges)

  if dp == 4:
    emb_matrix = process_pytorch3(args, g_train, DATASET, METHOD, F, ego_user, res_dir, dp, sigma,train_edges,test_edges)

  if dp==5:
    emb_matrix=process_pytorch5(args,g_train,DATASET,METHOD,F,ego_user,res_dir,dp,sigma)

  # if dp==9:
  #   emb_matrix=process_pytorch9(args,g_train,DATASET,METHOD,F,ego_user,res_dir,dp,sigma)

  train_edge_labels, test_edge_labels, train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2,train_edges_sampled=linkpre_scores8(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, F)

  return train_edge_labels,test_edge_labels, emb_matrix,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2,train_edges_sampled


def linkpre_scores8(args, emb_matrix, train_test_split, ego_user,DATASET,METHOD, Flag):
    adj_train, train_edges, train_edges_false, val_edges, val_edges_false, \
    test_edges, test_edges_false = train_test_split
    start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2
    if args.edge_score_mode == "edge-emb":

        def get_edge_embeddings(edge_list,ego_user,DATASET, Flag, flag):
            embs = []
            sim_matrix=[]
            embs_1=[]
            embs_2 = []
            tsts=[]
            for edge in edge_list:
                node1 = edge[0]
                node2 = edge[1]
                emb1 = emb_matrix[node1]
                # print(np.shape(emb1))
                # print(type(emb1))
                emb2 = emb_matrix[node2]
                edge_emb = np.multiply(emb1, emb2)
                sim = np.dot(emb1, emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
                #edge_emb = np.array(emb1) + np.array(emb2)
                # print(np.shape(edge_emb))
                sim2 = np.dot(emb1, emb2)
                # sim3 = np.sqrt(np.sum(np.sqrt(np.array(emb1)-np.array(emb2))))
                # print(sim3)
                sim3=np.linalg.norm(np.array(emb1)-np.array(emb2))
                # print(sim3)
                sim4=1/(1+sim3)
                embs.append(edge_emb)
                embs_1.append(emb1)
                embs_2.append(emb2)
                sim_matrix.append([sim,sim2,sim3,sim4])

                tst = [node1, node2, sim,sim2,sim3,sim4]
                tsts.append(tst)

            embs = np.array(embs)
            sim_matrix = np.array(sim_matrix)
            embs_1=np.array(embs_1)
            embs_2 =np.array(embs_2)

            name = ['node1', 'node2', 'sim1', 'sim2', 'sim3','sim4']
            result = pd.DataFrame(columns=name, data=tsts)
            result.to_csv("{}{}-similarity.csv".format(Flag, flag))

            # #with open('/Users/xiulingwang/Downloads/line-master/data/embds/' + str(ego_user) + flag + '-' + Flag, 'w') as f:
            # with open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '/embeds/' + Flag + '-' + str(ego_user) + flag,'w') as f:
            # #with open('/Users/xiulingwang/Downloads/'+DATASET+'/line/3-split/' + str(ego_user)+ '-' + flag + '-' + Flag+'-' +'embds','w') as f:
            #     f.write('%d %d\n' % (edge_list.shape[0], args.representation_size))
            #     for i in range(edge_list.shape[0]):
            #         e = ' '.join(map(lambda x: str(x), embs[i]))
            #         f.write('%s %s %s\n' % (str(edge_list[i][0]), str(edge_list[i][1]), e))

            return embs,sim_matrix,embs_1,embs_2

        edgeall = list([list(edge_tuple) for edge_tuple in train_edges])

        # train_edges_sampled = random.sample(edgeall, np.shape(test_edges)[0])
        train_edges_sampled = random.sample(edgeall, np.shape(test_edges)[0])


        # Train-set edge embeddings
        pos_train_edge_embs0 ,pos_train_sim_matrix0,pos_embs_1_train0,pos_embs_2_train0= get_edge_embeddings(edgeall ,ego_user,DATASET, Flag, flag='pos-train-all')

        pos_train_edge_embs ,pos_train_sim_matrix,pos_embs_1_train,pos_embs_2_train= get_edge_embeddings(train_edges_sampled,ego_user,DATASET, Flag, flag='pos-train')
        neg_train_edge_embs,neg_train_sim_matrix,neg_embs_1_train,neg_embs_2_train = get_edge_embeddings(train_edges_false, ego_user,DATASET,Flag, flag='neg-train')
        train_edge_embs = pos_train_edge_embs
        train_sim_matrix= pos_train_sim_matrix
        train_embs_1 = pos_embs_1_train
        train_embs_2 = pos_embs_2_train

        # Create train-set edge labels: 1 = real edge, 0 = false edge
        train_edge_labels = np.ones(len(train_edges_sampled))

        # Val-set edge embeddings, labels
        if len(val_edges) > 0 and len(val_edges_false) > 0:
            pos_val_edge_embs,pos_val_sim_matrix,pos_embs_1_val,pos_embs_2_val = get_edge_embeddings(val_edges,ego_user,DATASET,Flag, flag='pos-val')
            neg_val_edge_embs,neg_val_sim_matrix,neg_embs_1_val,neg_embs_2_val = get_edge_embeddings(val_edges_false,ego_user,DATASET,Flag, flag='neg-val')
            val_edge_embs = np.concatenate([pos_val_edge_embs, neg_val_edge_embs])
            val_edge_labels = np.concatenate((np.ones(len(val_edges)), np.zeros(len(val_edges_false))), axis=0)
            val_sim_matrix = np.concatenate((pos_val_sim_matrix, neg_val_sim_matrix), axis=0)
            val_embs_1 = np.concatenate((pos_embs_1_val, neg_embs_1_val), axis=0)
            val_embs_2 = np.concatenate((pos_embs_2_val, neg_embs_2_val), axis=0)

        # Test-set edge embeddings, labels
        pos_test_edge_embs,pos_test_sim_matrix,pos_embs_1_test,pos_embs_2_test = get_edge_embeddings(test_edges,ego_user,DATASET,Flag, flag='pos-test')
        neg_test_edge_embs ,neg_test_sim_matrix,neg_embs_1_test,neg_embs_2_test= get_edge_embeddings(test_edges_false,ego_user,DATASET,Flag, flag='neg-test')
        test_edge_embs = pos_test_edge_embs
        test_sim_matrix = pos_test_sim_matrix
        test_embs_1 = pos_embs_1_test
        test_embs_2 = pos_embs_2_test

        # Create val-set edge labels: 1 = real edge, 0 = false edge
        test_edge_labels = np.ones(len(test_edges))


    #     # Train logistic regression classifier on train-set edge embeddings
    #     edge_classifier = LogisticRegression(random_state=0)
    #     edge_classifier.fit(train_edge_embs, train_edge_labels)
    #
    #     # Predicted edge scores: probability of being of class "1" (real edge)
    #     if len(val_edges) > 0 and len(val_edges_false) > 0:
    #         val_preds = edge_classifier.predict_proba(val_edge_embs)[:, 1]
    #     test_preds = edge_classifier.predict_proba(test_edge_embs)[:, 1]
    #     print(test_preds)
    #     print(np.shape(test_preds))
    #
    #     runtime = time.time() - start_time
    #
    #     # Calculate scores
    #     if len(val_edges) > 0 and len(val_edges_false) > 0:
    #         n2v_val_roc = roc_auc_score(val_edge_labels, val_preds)
    #         # n2v_val_roc_curve = roc_curve(val_edge_labels, val_preds)
    #         n2v_val_ap = average_precision_score(val_edge_labels, val_preds)
    #     else:
    #         n2v_val_roc = None
    #         n2v_val_roc_curve = None
    #         n2v_val_ap = None
    #
    #     n2v_test_roc = roc_auc_score(test_edge_labels, test_preds)
    #     # n2v_test_roc_curve = roc_curve(test_edge_labels, test_preds)
    #     n2v_test_ap = average_precision_score(test_edge_labels, test_preds)
    #
    #
    # # Generate edge scores using simple dot product of node embeddings (like in GAE paper)
    # elif args.edge_score_mode == "dot-product":
    #     score_matrix = np.dot(emb_matrix, emb_matrix.T)
    #     runtime = time.time() - start_time
    #
    #     # Val set scores
    #     if len(val_edges) > 0:
    #         n2v_val_roc, n2v_val_ap = get_roc_score(val_edges, val_edges_false, score_matrix, apply_sigmoid=True)
    #     else:
    #         n2v_val_roc = None
    #         n2v_val_roc_curve = None
    #         n2v_val_ap = None
    #
    #     # Test set scores
    #     n2v_test_roc, n2v_test_ap = get_roc_score(test_edges, test_edges_false, score_matrix, apply_sigmoid=True)
    #
    # else:
    #     print
    #     "Invalid edge_score_mode! Either use edge-emb or dot-product."
    #
    # # Record scores
    n2v_scores = {}
    #
    # n2v_scores['test_roc'] = n2v_test_roc
    # # n2v_scores['test_roc_curve'] = n2v_test_roc_curve
    # n2v_scores['test_ap'] = n2v_test_ap
    #
    # n2v_scores['val_roc'] = n2v_val_roc
    # # n2v_scores['val_roc_curve'] = n2v_val_roc_curve
    # n2v_scores['val_ap'] = n2v_val_ap
    #
    # n2v_scores['runtime'] = runtime

    return train_edge_labels,test_edge_labels,train_sim_matrix,test_sim_matrix,train_edge_embs,test_edge_embs,train_embs_1,train_embs_2,test_embs_1,test_embs_2,train_edges_sampled

def discriminator(train_edges,test_edges,emb_matrix):

    start_time = time.time()
    # Generate bootstrapped edge embeddings (as is done in node2vec paper)
    # Edge embedding for (v1, v2) = hadamard product of node embeddings for v1, v2

    def get_edge_embeddings(edge_list):
        embs = []
        sim_matrix=[]
        embs_1=[]
        embs_2 = []
        for edge in edge_list:
            node1 = edge[0]
            node2 = edge[1]
            emb1 = emb_matrix[node1]
            #print(np.shape(emb1))
            emb2 = emb_matrix[node2]
            edge_emb = np.multiply(emb1, emb2)
            sim = np.dot(emb1, emb2)/(np.linalg.norm(emb1)*np.linalg.norm(emb2))
            #edge_emb = np.array(emb1) + np.array(emb2)
            # print(np.shape(edge_emb))
            embs.append(edge_emb)
            embs_1.append(emb1)
            embs_2.append(emb2)
            sim_matrix.append(sim)
        embs = np.array(embs)
        sim_matrix = np.array(sim_matrix)
        embs_1=np.array(embs_1)
        embs_2 =np.array(embs_2)

        # #with open('/Users/xiulingwang/Downloads/line-master/data/embds/' + str(ego_user) + flag + '-' + Flag, 'w') as f:
        # with open('E:/python/banlance/code/' + DATASET + '/' + METHOD + '/embeds/' + Flag + '-' + str(ego_user) + flag,'w') as f:
        # #with open('/Users/xiulingwang/Downloads/'+DATASET+'/line/3-split/' + str(ego_user)+ '-' + flag + '-' + Flag+'-' +'embds','w') as f:
        #     f.write('%d %d\n' % (edge_list.shape[0], args.representation_size))
        #     for i in range(edge_list.shape[0]):
        #         e = ' '.join(map(lambda x: str(x), embs[i]))
        #         f.write('%s %s %s\n' % (str(edge_list[i][0]), str(edge_list[i][1]), e))

        return embs,sim_matrix,embs_1,embs_2

    edgeall = list([list(edge_tuple) for edge_tuple in train_edges])

    # train_edges_sampled = random.sample(edgeall, np.shape(test_edges)[0])
    train_edges_sampled = random.sample(edgeall, np.shape(test_edges)[0])


    # Train-set edge embeddings
    pos_train_edge_embs ,pos_train_sim_matrix,pos_embs_1_train,pos_embs_2_train= get_edge_embeddings(train_edges_sampled)
    # neg_train_edge_embs,neg_train_sim_matrix,neg_embs_1_train,neg_embs_2_train = get_edge_embeddings(train_edges_false, ego_user,DATASET,Flag, flag='neg-train')
    train_edge_embs = pos_train_edge_embs
    train_sim_matrix= pos_train_sim_matrix
    train_embs_1 = pos_embs_1_train
    train_embs_2 = pos_embs_2_train

    # Create train-set edge labels: 1 = real edge, 0 = false edge
    train_edge_labels = np.ones(len(train_edges_sampled))


    # Test-set edge embeddings, labels
    pos_test_edge_embs,pos_test_sim_matrix,pos_embs_1_test,pos_embs_2_test = get_edge_embeddings(test_edges)
    # neg_test_edge_embs ,neg_test_sim_matrix,neg_embs_1_test,neg_embs_2_test= get_edge_embeddings(test_edges_false,ego_user,DATASET,Flag, flag='neg-test')
    test_edge_embs = pos_test_edge_embs
    test_sim_matrix = pos_test_sim_matrix
    test_embs_1 = pos_embs_1_test
    test_embs_2 = pos_embs_2_test

    # Create val-set edge labels: 1 = real edge, 0 = false edge
    test_edge_labels = np.ones(len(test_edges))


    ###########sim_svm

    train_edges_list = np.array(train_edges_sampled)
    print(train_edges_list)
    test_edges_list = test_edges
    print(test_edges_list)

    edges_list = np.concatenate((train_edges_list, test_edges_list), axis=0)

    print(type(train_edges_list))
    print(type(test_edges_list))
    print(type(edges_list))

    print(np.shape(train_edges_list))
    print(np.shape(test_edges_list))
    print(np.shape(edges_list))

    ylabel = [1] * train_sim_matrix.shape[0] + [0] * test_sim_matrix.shape[0]

    # print(train_sim_matrix)
    # print(test_sim_matrix)

    sim_matrix = np.concatenate((train_sim_matrix, test_sim_matrix), axis=0)
    # print(sim_matrix)
    print(np.shape(train_sim_matrix))
    print(np.shape(test_sim_matrix))
    sim_matrix = sim_matrix.reshape(-1, 1)
    # print(sim_matrix)
    print(np.shape(sim_matrix))
    # exit()

    sim_matrix_train = train_sim_matrix
    sim_matrix_test = test_sim_matrix

    sim_matrix_train = sim_matrix_train.reshape(-1, 1)
    sim_matrix_test = sim_matrix_test.reshape(-1, 1)

    print(np.shape(sim_matrix_train))
    print(np.shape(sim_matrix_test))

    from sklearn.model_selection import train_test_split

    ylabel1 = ylabel
    ylable1 = np.reshape(len(ylabel1), 1)

    # print((edges_list))
    # print((ylabel1))
    # print(np.shape(ylabel1))
    # print(np.shape(edges_list))
    y_label = np.zeros((np.shape(edges_list)[0], 3))
    for i in range(np.shape(edges_list)[0]):
        y_label[i][0] = edges_list[i][0]
        y_label[i][1] = edges_list[i][1]
        y_label[i][2] = ylabel[i]
    print(np.shape(y_label))

    y_label_train = np.zeros((np.shape(train_edges_list)[0], 3))
    for i in range(np.shape(train_edges_list)[0]):
        y_label_train[i][0] = train_edges_list[i][0]
        y_label_train[i][1] = train_edges_list[i][1]
        y_label_train[i][2] = 1
    print(np.shape(y_label_train))

    y_label_test = np.zeros((np.shape(test_edges_list)[0], 3))
    for i in range(np.shape(test_edges_list)[0]):
        y_label_test[i][0] = test_edges_list[i][0]
        y_label_test[i][1] = test_edges_list[i][1]
        y_label_test[i][2] = 0
    print(np.shape(y_label_test))

    X_train_train, X_train_test, y_train_train, y_train_test = train_test_split(sim_matrix_train, y_label_train,
                                                                                test_size=0.1, random_state=42)

    X_test_train, X_test_test, y_test_train, y_test_test = train_test_split(sim_matrix_test, y_label_test,
                                                                            test_size=0.1, random_state=42)

    X_train = np.concatenate((X_train_train, X_test_train), axis=0)
    X_test = np.concatenate((X_train_test, X_test_test), axis=0)
    y_train = np.concatenate((y_train_train, y_test_train), axis=0)
    y_test = np.concatenate((y_train_test, y_test_test), axis=0)

    from sklearn import metrics
    from sklearn.neural_network import MLPClassifier

    mlp = MLPClassifier(solver='adam', alpha=1e-5, hidden_layer_sizes=(64, 32, 16, 18), random_state=1,
                        max_iter=500)

    mlp.fit(X_train, y_train[:, 2])

    loss=mlp.loss_

    # print("Training set score: %f" % mlp.score(X_train, y_train[:, 2]))
    # print("Test set score: %f" % mlp.score(X_test, y_test[:, 2]))
    #
    # y_score = mlp.predict(X_test)
    # print(metrics.f1_score(y_test[:, 2], y_score, average='micro'))
    # print(metrics.classification_report(y_test[:, 2], y_score, labels=range(3)))

    return loss