import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch.conv import GINConv,GATConv,GraphConv
# from dgl.nn.pytorch.conv import GraphConv
from dgl.nn.pytorch.glob import SumPooling
from utils import *
import ipdb
#因为要做全局表示，就没有训练，可以加上COMPLEX,HYBRID之类的label，做分类任务
#numberofglycos不用特别大
#GNN_ablation:GAT GCN GIN
#从单向图改成双向图 引入对于P的影响
class MLP(nn.Module):
    """Construct two-layer MLP-type aggreator for GIN model"""

    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.linears = nn.ModuleList()
        # two-layer MLP
        self.linears.append(nn.Linear(input_dim, hidden_dim, bias=False))
        self.linears.append(nn.Linear(hidden_dim, output_dim, bias=False))
        self.batch_norm = nn.BatchNorm1d((hidden_dim))

    def forward(self, x):
        h = x
        h=self.linears[0](h)
        h = F.relu(self.batch_norm(h))
        # ipdb.set_trace()
        return self.linears[1](h)


class GIN(nn.Module):
    def __init__(self, numberofglycos, hidden_dim, output_dim,init_eps):
        super().__init__()
        self.ginlayers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()
        self.glyco_embedding=nn.Embedding(numberofglycos, hidden_dim, padding_idx=None)
        #如果输入一直MLP的话，不同的糖会呈现线性关系，这里我们采用embedding
        # num_layers = 5 #层数可以调整
        print("GNN_edge_num_layers ",GNN_edge_num_layers)
        # five-layer GCN with two-layer MLP aggregator and sum-neighbor-pooling scheme
        for layer in range(GNN_edge_num_layers - 1):  # excluding the input layer
            mlp = MLP(hidden_dim, hidden_dim, hidden_dim)
            self.ginlayers.append(
                GINConv(mlp, init_eps=init_eps,learn_eps=False)
            )  # set to True if learning epsilon #学一下这里#aggregator_type
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))
        # linear functions for graph sum poolings of output of each layer
        self.W=nn.Linear(2*hidden_dim,16)
        self.W2=nn.Linear(hidden_dim,16)
        self.W1=nn.Linear(2*hidden_dim,hidden_dim)
        self.W3=nn.Linear(hidden_dim,hidden_dim)
        self.predictH=nn.Parameter(torch.randn(hidden_dim,hidden_dim))
        self.pool = (
            SumPooling()
        )  # change to mean readout (AvgPooling) on social network datasets
    def apply_edges(self, g,targetedges):
        h_u = g.ndata['h'][targetedges[0]]
        h_v = g.ndata['h'][targetedges[1]]
        if GNN_edge_decoder_type=="linear":
            score = self.W(torch.cat([h_u, h_v], 1))
        elif GNN_edge_decoder_type=="mlp":
            score = self.W2(F.relu(self.W1(torch.cat([h_u, h_v], 1))))
        elif GNN_edge_decoder_type=="hadamardlinear":
            score = self.W2(h_u* h_v)
        elif GNN_edge_decoder_type=="hadamardmlp":
            score = self.W2(F.relu(self.W3(h_u* h_v)))
        return {'score': score}
    def forward(self, g, h,peptide_rep=None,peptide_ind=None):
        #g是batched_graph, h是feat
        u , v = g.edges()
        targetedges=[u,v]
        g.add_edges(v , u) # bidirect
        g = g.add_self_loop() #add self-loops
        h=self.glyco_embedding(h)
        if peptide_rep is not None:
            #之前有问题，hidden dim和output dim都是16，替换的不对。
            # import ipdb
            # ipdb.set_trace()
            h[peptide_ind]=peptide_rep
        # ipdb.set_trace()
        # list of hidden representation at each layer (including the input layer)
        hidden_rep = [h]
        for i, layer in enumerate(self.ginlayers):
            # ipdb.set_trace()
            h = layer(g, h) #为什么有两个输入
            # ipdb.set_trace()
            h = self.batch_norms[i](h)
            # ipdb.set_trace()
            h = F.relu(h)
            # ipdb.set_trace()
            hidden_rep.append(h)
        h=sum(hidden_rep)
        g.ndata["h"]=h
        edgescore=self.apply_edges(g,targetedges)["score"]
        return edgescore
    
class GCN(nn.Module):
    def __init__(self, numberofglycos, hidden_dim, output_dim):
        super().__init__()
        self.ginlayers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()
        self.glyco_embedding=nn.Embedding(numberofglycos, hidden_dim, padding_idx=None)
        #如果输入一直MLP的话，不同的糖会呈现线性关系，这里我们采用embedding
        # num_layers = 5 #层数可以调整
        # five-layer GCN with two-layer MLP aggregator and sum-neighbor-pooling scheme
        print("GNN_edge_num_layers ",GNN_edge_num_layers)
        for layer in range(GNN_edge_num_layers - 1):  # excluding the input layer
            self.ginlayers.append(
                GraphConv(in_feats=hidden_dim,out_feats=hidden_dim,allow_zero_in_degree=True)
            )  # set to True if learning epsilon #学一下这里#aggregator_type
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))
        # linear functions for graph sum poolings of output of each layer
        self.W=nn.Linear(2*hidden_dim,16)
        self.W2=nn.Linear(hidden_dim,16)
        self.W1=nn.Linear(2*hidden_dim,hidden_dim)
        self.W3=nn.Linear(hidden_dim,hidden_dim)
        self.predictH=nn.Parameter(torch.randn(hidden_dim,hidden_dim))
        self.pool = (
            SumPooling()
        )  # change to mean readout (AvgPooling) on social network datasets
    def apply_edges(self, g,targetedges):
        h_u = g.ndata['h'][targetedges[0]]
        h_v = g.ndata['h'][targetedges[1]]
        # print("GNN_edge_decoder_type ",GNN_edge_decoder_type)
        if GNN_edge_decoder_type=="linear":
            score = self.W(torch.cat([h_u, h_v], 1))
        elif GNN_edge_decoder_type=="mlp":
            score = self.W2(F.relu(self.W1(torch.cat([h_u, h_v], 1))))
        elif GNN_edge_decoder_type=="hadamardlinear":
            score = self.W2(h_u* h_v)
        elif GNN_edge_decoder_type=="hadamardmlp":
            score = self.W2(F.relu(self.W3(h_u* h_v)))
        return {'score': score}
    def forward(self, g, h,peptide_rep=None,peptide_ind=None):
        #g是batched_graph, h是feat
        u , v = g.edges()
        targetedges=[u,v]
        g.add_edges(v , u) # bidirect
        g = g.add_self_loop() #add self-loops
        h=self.glyco_embedding(h)
        # import ipdb
        # ipdb.set_trace()
        if peptide_rep is not None:
            #之前有问题，hidden dim和output dim都是16，替换的不对。
            # ipdb.set_trace()
            h[peptide_ind]=peptide_rep
        # ipdb.set_trace()
        # list of hidden representation at each layer (including the input layer)
        hidden_rep = [h]
        for i, layer in enumerate(self.ginlayers):
            # ipdb.set_trace()
            h = layer(g, h) #为什么有两个输入
            # ipdb.set_trace()
            h = self.batch_norms[i](h)
            # ipdb.set_trace()
            h = F.relu(h)
            # ipdb.set_trace()
            hidden_rep.append(h)
        h=sum(hidden_rep)
        g.ndata["h"]=h
        edgescore=self.apply_edges(g,targetedges)["score"]
        return edgescore
    
  
class GAT(nn.Module):
    def __init__(self, numberofglycos, hidden_dim, output_dim,num_heads):
        super().__init__()
        self.ginlayers = nn.ModuleList()
        self.batch_norms = nn.ModuleList()
        self.glyco_embedding=nn.Embedding(numberofglycos, hidden_dim, padding_idx=None)
        #如果输入一直MLP的话，不同的糖会呈现线性关系，这里我们采用embedding
        # num_layers = 5 #层数可以调整
        # five-layer GCN with two-layer MLP aggregator and sum-neighbor-pooling scheme
        for layer in range(GNN_edge_num_layers - 1):  # excluding the input layer
            self.ginlayers.append(
                GATConv(in_feats=hidden_dim,out_feats=hidden_dim//num_heads,num_heads=num_heads,allow_zero_in_degree=True)
            )  # set to True if learning epsilon #学一下这里#aggregator_type
            self.batch_norms.append(nn.BatchNorm1d(hidden_dim))
        # linear functions for graph sum poolings of output of each layer
        self.W=nn.Linear(2*hidden_dim,16)
        self.W2=nn.Linear(hidden_dim,16)
        self.W1=nn.Linear(2*hidden_dim,hidden_dim)
        self.W3=nn.Linear(hidden_dim,hidden_dim)
        self.predictH=nn.Parameter(torch.randn(hidden_dim,hidden_dim))
        self.pool = (
            SumPooling()
        )  # change to mean readout (AvgPooling) on social network datasets
    def apply_edges(self, g,targetedges):
        h_u = g.ndata['h'][targetedges[0]]
        h_v = g.ndata['h'][targetedges[1]]
        if GNN_edge_decoder_type=="linear":
            score = self.W(torch.cat([h_u, h_v], 1))
        elif GNN_edge_decoder_type=="mlp":
            score = self.W2(F.relu(self.W1(torch.cat([h_u, h_v], 1))))
        elif GNN_edge_decoder_type=="hadamardlinear":
            score = self.W2(h_u* h_v)
        elif GNN_edge_decoder_type=="hadamardmlp":
            score = self.W2(F.relu(self.W3(h_u* h_v)))
        return {'score': score}
    def forward(self, g, h,peptide_rep=None,peptide_ind=None):
        #g是batched_graph, h是feat
        u , v = g.edges()
        targetedges=[u,v]
        g.add_edges(v , u) # bidirect
        g = g.add_self_loop() #add self-loops
        h=self.glyco_embedding(h)
        if peptide_rep is not None:
            #之前有问题，hidden dim和output dim都是16，替换的不对。
            h[peptide_ind]=peptide_rep
        # list of hidden representation at each layer (including the input layer)
        hidden_rep = [h]
        for i, layer in enumerate(self.ginlayers):
            h = layer(g, h) #为什么有两个输入 【node_dim,head_num,hidden_dim]
            h=h.reshape(-1,GNN_edge_hidden_dim)
            # ipdb.set_trace()
            h = self.batch_norms[i](h)
            # ipdb.set_trace()
            h = F.relu(h)
            # ipdb.set_trace()
            hidden_rep.append(h)
        h=sum(hidden_rep)
        g.ndata["h"]=h
        # ipdb.set_trace()
        edgescore=self.apply_edges(g,targetedges)["score"]
        # ipdb.set_trace()
        return edgescore


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--dataset",
        type=str,
        help="name of dataset",
    )
    args = parser.parse_args()
    print(f"Training with DGL built-in GINConv module with a fixed epsilon")
    #有epsilon以就可以改变自身节点的权重
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # load and split dataset
    dataset_train=torch.load("/remote-home/yxwang/test/zzb/DeepGlyco/model/20230127_test_model_validata")
    dataset=dataset_train['strct_graph'].values.tolist()
    import dgl
    import random #后面随机选择，包括batch内数目改一下
    batchsize=2
    sample=[i[0] for i in random.sample(dataset, batchsize)]
    train_loader = [dgl.batch(sample).to(device)]
    import ipdb
    # ipdb.set_trace()
    # create GIN model
    batched_graph = train_loader[0].to(device)
    # ipdb.set_trace()
    feat = batched_graph.ndata["attr"]
    #我是对边进行embed，进行运算，还是先对节点运算，再进行操作到边
    print("feat",feat)
    number_of_glycos=20
    print(number_of_glycos)
    out_size = 768
    hidden_size=16
    # ipdb.set_trace()
    model = GIN(number_of_glycos, hidden_size, out_size,init_eps=0).to(device)
    print("batchgraph",batched_graph)
    # ipdb.set_trace()
    logits = model(batched_graph, feat)
    print(logits.size())
    ipdb.set_trace()
    print("logits",logits)