Lancelot / src / gpudb / gen_synthetic_bench.cu
gen_synthetic_bench.cu
Raw
#include <stdio.h>
#include <stdlib.h>
#include <cstdlib>
#include <iostream>
#include <string>
#include <cuda.h>
#include <cuda_runtime.h>
#include <curand.h>
#include <assert.h>

#include "ssb_utils.h"

//cara ganti segment size
//ganti ssb_utils
//ganti commons
//recompile minmax, minmax_newbench, minmaxsort
//recompile main

#define CUDA_CALL(x) do { if((x)!=cudaSuccess) { \
    printf("Error at %s:%d\n",__FILE__,__LINE__);\
    return EXIT_FAILURE;}} while(0)
#define CURAND_CALL(x) do { if((x)!=CURAND_STATUS_SUCCESS) { \
    printf("Error at %s:%d\n",__FILE__,__LINE__);\
    return EXIT_FAILURE;}} while(0)

__global__ void trim_random_number(unsigned int* data, int* result, int SIZE, int min, int max) {
    int tid = threadIdx.x + blockIdx.x * blockDim.x;
    if (tid < SIZE) {
        result[tid] = (int) (min + (data[tid] % (max+1-min)));
    }
}

__global__ void div_kernel(int* data, int* result, int SIZE, int div) {
    int tid = threadIdx.x + blockIdx.x * blockDim.x;
    if (tid < SIZE) {
        result[tid] = (data[tid] / div);
    }
}

__global__ void generate_sequence_number(int* result, int SIZE) {
    int tid = threadIdx.x + blockIdx.x * blockDim.x;
    if (tid < SIZE) {
        result[tid] = tid;
    }
}

__global__ void combine_date(unsigned int* data, unsigned int* data1, 
        int* result, int SIZE) {
    int tid = threadIdx.x + blockIdx.x * blockDim.x;
    if (tid < SIZE) {
        int year = 1992 + data[tid] % 7;
        int month = 1 + data[tid] % 12;
        int days = 1 + data1[tid] % 30;
        result[tid] = year * 10000 + month * 100 + days;
    }
}

__global__ void combine_date2(unsigned int* data, unsigned int* data1, int SIZE) {
    int tid = threadIdx.x + blockIdx.x * blockDim.x;
    if (tid < SIZE) {
        int year = 1992 + data[tid] % 7;
        int month = 1 + data[tid] % 12;
        int days = 1 + data1[tid] % 30;
        data[tid] = year * 10000 + month * 100 + days;
    }
}

// template<typename T>
// int storeColumnNewBench(string col_name, int num_entries, int* h_col) {
//   string filename = NEW_BENCH_DIR + lookup(col_name);
//   ofstream colData (filename.c_str(), ios::out | ios::binary);
//   if (!colData) {
//     return -1;
//   }

//   colData.write((char*)h_col, num_entries * sizeof(T));
//   return 0;
// }

// template<typename T>
// T* loadColumnNewBench(string col_name, int num_entries) {
//   T* h_col = new T[((num_entries + SEGMENT_SIZE - 1)/SEGMENT_SIZE) * SEGMENT_SIZE];
//   string filename = NEW_BENCH_DIR + lookup(col_name);
//   ifstream colData (filename.c_str(), ios::in | ios::binary);
//   if (!colData) {
//     return NULL;
//   }

//   colData.read((char*)h_col, num_entries * sizeof(T));
//   return h_col;
// }

void generate_random_column_with_range(curandGenerator_t gen, string column_name, int N, int min, int max) {
    unsigned int *devData;
    int *resData, *hostData;

    hostData = (int*)malloc(N * sizeof(int));
    cudaMalloc((void **)&devData, N*sizeof(unsigned int));
    cudaMalloc((void **)&resData, N*sizeof(int));

    curandGenerate(gen, devData, N);
    trim_random_number<<<(N+255)/256, 256>>>(devData, resData, N, min, max);
    cudaMemcpy(hostData, resData, N * sizeof(int), cudaMemcpyDeviceToHost);

    assert(storeColumn<int>(column_name, N, hostData) != -1);

    cudaFree(devData);
    cudaFree(resData);
    free(hostData);
}

void generate_random_date(curandGenerator_t gen, curandGenerator_t gen1, curandGenerator_t gen2, string column_name, int N) {
    unsigned int *devData, *devData1;
    int *resData, *hostData;

    hostData = (int*)malloc(N * sizeof(int));
    cudaMalloc((void **)&devData, N*sizeof(unsigned int));
    cudaMalloc((void **)&devData1, N*sizeof(unsigned int));
    // cudaMalloc((void **)&devData2, N*sizeof(unsigned int));
    cudaMalloc((void **)&resData, N*sizeof(int));

    curandGenerate(gen, devData, N);
    curandGenerate(gen1, devData1, N);
    // curandGenerate(gen2, devData2, N);
    combine_date<<<(N+255)/256, 256>>>(devData, devData1, resData, N);
    cudaMemcpy(hostData, resData, N * sizeof(int), cudaMemcpyDeviceToHost);

    assert(storeColumn<int>(column_name, N, hostData) != -1);

    cudaFree(devData);
    cudaFree(devData1);
    // cudaFree(devData2);
    cudaFree(resData);
    free(hostData);
}

void generate_random_date2(curandGenerator_t gen, curandGenerator_t gen1, curandGenerator_t gen2, string column_name, int N) {
    unsigned int *devData, *devData1;
    int *hostData;

    hostData = (int*)malloc(N * sizeof(int));
    cudaMalloc((void **)&devData, N*sizeof(unsigned int));
    cudaMalloc((void **)&devData1, N*sizeof(unsigned int));
    // cudaMalloc((void **)&devData2, N*sizeof(unsigned int));
    // cudaMalloc((void **)&resData, N*sizeof(int));

    curandGenerate(gen, devData, N);
    curandGenerate(gen1, devData1, N);
    // curandGenerate(gen2, devData2, N);
    combine_date2<<<(N+255)/256, 256>>>(devData, devData1, N);
    cudaMemcpy(hostData, devData, N * sizeof(int), cudaMemcpyDeviceToHost);

    assert(storeColumn<int>(column_name, N, hostData) != -1);

    cudaFree(devData);
    cudaFree(devData1);
    // cudaFree(devData2);
    // cudaFree(resData);
    free(hostData);
}

void generate_sequence_column(string column_name, int N) {
    int *resData, *hostData;

    hostData = (int*)malloc(N * sizeof(int));
    cudaMalloc((void **)&resData, N*sizeof(int));

    generate_sequence_number<<<(N+255)/256, 256>>>(resData, N);
    cudaMemcpy(hostData, resData, N * sizeof(int), cudaMemcpyDeviceToHost);
    
    cout << column_name << endl;
    assert(storeColumn<int>(column_name, N, hostData) != -1);

    cudaFree(resData);
    free(hostData);
}

void printColumn(string column_name, int N) {
    int* newCol = loadColumn<int>(column_name, N);
    /* Show result */
    for(int i = 0; i < N; i++) {
        printf("%d ", newCol[i]);
    }
    printf("\n");
}

void generate_random_column_with_divide(string column_name, string base_name, int N, int div) {
    int *devData;
    int *resData, *hostData;
    int *baseCol = loadColumn<int>(base_name, N);

    hostData = (int*)malloc(N * sizeof(int));
    cudaMalloc((void **)&devData, N*sizeof(int));
    cudaMalloc((void **)&resData, N*sizeof(int));

    cudaMemcpy(devData, baseCol, N * sizeof(int), cudaMemcpyHostToDevice);
    div_kernel<<<(N+255)/256, 256>>>(devData, resData, N, div);
    cudaMemcpy(hostData, resData, N * sizeof(int), cudaMemcpyDeviceToHost);

    assert(storeColumn<int>(column_name, N, hostData) != -1);

    cudaFree(devData);
    cudaFree(resData);
    free(hostData);
    free(baseCol);
}

int main() {
    // size_t fact_len = SEGMENT_SIZE * 10;
    // size_t dim1_len = SEGMENT_SIZE * 8;
    // size_t dim2_len = SEGMENT_SIZE * 2;

    // generate_sequence_column("FACT_PKEY", fact_len);
    // generate_random_column_with_range("FACT_FKEY1", fact_len, dim1_len);
    // generate_random_column_with_range("FACT_FKEY2", fact_len, dim2_len);
    // generate_random_column_with_range("FACT_VAL", fact_len, 100);

    // generate_sequence_column("DIM1_PKEY", dim1_len);
    // generate_random_column_with_range("DIM1_VAL", dim1_len, 100);

    // generate_sequence_column("DIM2_PKEY", dim2_len);
    // generate_random_column_with_range("DIM2_VAL", dim2_len, 100);

    /* Cleanup */

    // printColumn("FACT_FKEY2", 10);

    // size_t lo_len = SEGMENT_SIZE * 10;
    // size_t s_len = SEGMENT_SIZE * 8;
    // size_t c_len = SEGMENT_SIZE * 2;
    // size_t p_len = SEGMENT_SIZE;
    // size_t d_len = SEGMENT_SIZE;

    curandGenerator_t gen, gen1, gen2;

    /* Create pseudo-random number generator */
    curandCreateGenerator(&gen,
        CURAND_RNG_PSEUDO_DEFAULT);
    curandCreateGenerator(&gen1,
        CURAND_RNG_PSEUDO_DEFAULT);
    curandCreateGenerator(&gen2,
        CURAND_RNG_PSEUDO_DEFAULT);

    /* Set seed */
    curandSetPseudoRandomGeneratorSeed(gen,
            1234ULL);
    curandSetPseudoRandomGeneratorSeed(gen1,
            1231ULL);
    curandSetPseudoRandomGeneratorSeed(gen2,
            1232ULL);

    generate_sequence_column("lo_orderkey", LO_LEN);
    generate_random_column_with_range(gen, "lo_suppkey", LO_LEN, 0, S_LEN-1);
    generate_random_column_with_range(gen, "lo_custkey", LO_LEN, 0, C_LEN-1);
    generate_random_column_with_range(gen, "lo_partkey", LO_LEN, 0, P_LEN-1);
    // generate_random_column_with_range(gen, "lo_orderdate", LO_LEN, 0, D_LEN-1);
    generate_random_date2(gen, gen1, gen2, "lo_orderdate", LO_LEN);
    generate_random_column_with_range(gen, "lo_revenue", LO_LEN, 1, 25);
    generate_random_column_with_range(gen, "lo_discount", LO_LEN, 0, 10);
    generate_random_column_with_range(gen, "lo_quantity", LO_LEN, 1, 50);
    generate_random_column_with_range(gen, "lo_extendedprice", LO_LEN, 1, 25);
    generate_random_column_with_range(gen, "lo_supplycost", LO_LEN, 1, 25);
    generate_random_column_with_range(gen, "lo_linenumber", LO_LEN, 1, 25);
    generate_random_column_with_range(gen, "lo_tax", LO_LEN, 1, 25);
    generate_random_column_with_range(gen, "lo_ordtotalprice", LO_LEN, 1, 25);
    generate_random_column_with_range(gen, "lo_commitdate", LO_LEN, 1, 25);

    generate_sequence_column("s_suppkey", S_LEN);
    generate_random_column_with_range(gen, "s_city", S_LEN, 0, 249);
    // generate_random_column_with_range(gen, "s_nation", S_LEN, 0, 24);
    // generate_random_column_with_range(gen, "s_region", S_LEN, 0, 4);
    generate_random_column_with_divide("s_nation", "s_city", S_LEN, 10);
    generate_random_column_with_divide("s_region", "s_nation", S_LEN, 5);

    generate_sequence_column("c_custkey", C_LEN);
    generate_random_column_with_range(gen, "c_city", C_LEN, 0, 249);
    // generate_random_column_with_range(gen, "c_nation", C_LEN, 0, 24);
    // generate_random_column_with_range(gen, "c_region", C_LEN, 0, 4);
    generate_random_column_with_divide("c_nation", "c_city", C_LEN, 10);
    generate_random_column_with_divide("c_region", "c_nation", C_LEN, 5);

    generate_sequence_column("p_partkey", P_LEN);
    generate_random_column_with_range(gen, "p_brand1", P_LEN, 0, 999); //
    // generate_random_column_with_range(gen, "p_category", P_LEN, 0, 24); //
    // generate_random_column_with_range(gen, "p_mfgr", P_LEN, 0, 4); //
    generate_random_column_with_divide("p_category", "p_brand1", P_LEN, 40);
    generate_random_column_with_divide("p_mfgr", "p_category", P_LEN, 5);

    //we will copy date table from SSB
    char s[500] = "cp test/ssb/data/s1_columnar/DDATE* ";
    strcat(s, DATA_DIR);
    system(s);
    // generate_sequence_column("d_datekey", D_LEN);
    // generate_random_column_with_range(gen, "d_year", D_LEN, 0, 7);
    // generate_random_column_with_range(gen, "d_yearmonthnum", D_LEN, 25);

    curandDestroyGenerator(gen);
    curandDestroyGenerator(gen1);
    curandDestroyGenerator(gen2);

    char p[500] = "./bin/gpudb/sort_column_fast";
    system(p);

    // char t[500] = "./minmax.sh";
    // system(t);

    return 0;

}