KGTOSA / GNN-Methods / LinkPrediction / LHGNN / dataloader.py
dataloader.py
Raw
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from posixpath import split

import numpy as np
from numpy import dtype
from numpy.lib.function_base import copy
import torch
from torch.utils import data
from torch.utils.data import Dataset
from ordered_set import OrderedSet
import random
import time


    
def create_batches(num_ent, batch_size=64, shuffle=True):

    idx_list = np.arange(num_ent, dtype='int32')

    if shuffle:
        np.random.shuffle(idx_list)

    minibatches = []
    num_batch = idx_list.shape[0] // batch_size

    cur_idx = 0
    for i in range(num_batch): 
        minibatches.append(idx_list[cur_idx:cur_idx + batch_size])
        cur_idx += batch_size

    if (cur_idx != num_ent):
        minibatches.append(idx_list[cur_idx:])

    return minibatches



def prepare_batch(data, idx, split='train'):

    #t = time.time()

    if split == 'post':
        edges = idx

    else:
        full = data.triples[split]
        # edges of batch
        edges = full[list(idx)]

    #layer 1 and 2    
    nodes_l1 = OrderedSet()
    nodes_l2 = OrderedSet()

    for edge in edges:
        for i in edge:
            nodes_l2.add(i)
        
    for node in nodes_l2:
        nodes_l1.add(node)
        for i in data.graph[node]:
            nodes_l1.add(i)

    
    # paths for layer 1 and 2
    paths_l1 = data.path_idx[nodes_l1]
    paths_l2 = data.path_idx[nodes_l2]

    mask_l1 = data.mask_idx[nodes_l1]
    mask_l2 = data.mask_idx[nodes_l2]
    
    end_l1 = data.ends[nodes_l1]
    end_l2 = data.ends[nodes_l2]
    

    
    # reindex for layer 1 regarding to layer 2 new index    
    l1_to_l2 = dict()
    idx = 0
    for i in nodes_l1:
        l1_to_l2[i] = idx
        idx += 1

    new_nodes_l2 = np.copy(nodes_l2)
    for i in range(len(nodes_l2)):
        new_nodes_l2[i] = l1_to_l2[nodes_l2[i]]

   
    for i in range(paths_l2.shape[0]):
        for j in range(paths_l2.shape[1]):
            for k in range(paths_l2.shape[2]):        
                if mask_l2[i,j,k] == 0:
                    continue

                paths_l2[i,j,k] = l1_to_l2[paths_l2[i,j,k]]
                end_l2[i,j,0] = paths_l2[i,j,k]
    


    # re-index batch
    l2_to_l0 = dict()
    idx = 0
    for i in new_nodes_l2:
        l2_to_l0[i] = idx
        idx += 1


    for i in range(edges.shape[0]):
        for j in range(edges.shape[1]):
            edges[i,j] = l2_to_l0[l1_to_l2[edges[i,j]]]

    
    return edges, { 'l2': new_nodes_l2, 
                    'l1': nodes_l1, 
                    'paths_l2': paths_l2,
                    'paths_l1': paths_l1,
                    'end_l2': end_l2,
                    'end_l1': end_l1,
                    'mask_l2': mask_l2,
                    'mask_l1': mask_l1,
                    't0': 0}, l1_to_l2

 


def prepare_batch_gcn(data, split, idx):
    full = data.triples[split]
   
    # edges of batch
    edges = full[list(idx)]

    #layer 1 and 2    
    nodes_l1 = OrderedSet()
    nodes_l2 = OrderedSet()


    for edge in edges:
        for i in edge:
            nodes_l2.add(int(i))
    end_l2 = data.nei_mat[nodes_l2]
    mask_l2 = data.mask_mat[nodes_l2]

    for node in nodes_l2:
        nodes_l1.add(node)
        for i in range(data.nei_mat[node].shape[0]):
            nodes_l1.add(data.nei_mat[node,i])
 
    end_l1 = data.nei_mat[nodes_l1]
    mask_l1 = data.mask_mat[nodes_l1]

  
    # reindex for layer 1 regarding to layer 2 new index    
    l1_to_l2 = dict()
    idx = 0
    for i in nodes_l1:
        l1_to_l2[i] = idx
        idx += 1
    

    new_nodes_l2 = np.copy(nodes_l2)
    for i in range(len(nodes_l2)):
        new_nodes_l2[i] = l1_to_l2[nodes_l2[i]]

    for i in range(end_l2.shape[0]):
        for j in range(end_l2.shape[1]):
            end_l2[i,j] = l1_to_l2[end_l2[i,j]]


    # re-index batch
    l2_to_l0 = dict()
    idx = 0
    for i in new_nodes_l2:
        l2_to_l0[i] = idx
        idx += 1
    


    for i in range(edges.shape[0]):
        for j in range(edges.shape[1]):
            edges[i,j] = l2_to_l0[l1_to_l2[edges[i,j]]]

    return edges,  {'l2': new_nodes_l2, 
                    'l1': nodes_l1,
                    'end_l2': end_l2,
                    'end_l1': end_l1,
                    'mask_l2': mask_l2,
                    'mask_l1': mask_l1,
                    'paths_l2': 0,
                    'paths_l1': 0,
                    't0': 0}, l1_to_l2