Lancelot / src / gpudb / ssb_utils.h
ssb_utils.h
Raw
#pragma once

#include <iostream>
#include <fstream>
#include <string>

/*#include <cuda.h>*/
/*#include <cub/util_allocator.cuh>*/
#include <cuda.h>
#include <curand.h>
#include <cub/util_allocator.cuh>

using namespace std;

#define SF 402

#define BASE_PATH "/home/ubuntu/Implementation-GPUDB/test/ssb/data/"
#define NEW_BENCH_DIR "/home/ubuntu/Implementation-GPUDB/test/new_bench/data/1_columnar/"
#define SM_BENCH_DIR "/home/ubuntu/Implementation-GPUDB/test/new_bench/data/sm_columnar/"
#define PRELIM_BENCH_DIR "/home/ubuntu/Implementation-GPUDB/test/new_bench/data/prelim_columnar/"
#define EXPERIMENT_BENCH_DIR "/home/ubuntu/Implementation-GPUDB/test/new_bench/data/"

#define SEGMENT_SIZE 1048576
// #define SEGMENT_SIZE 1024

#if SF == 1
#define DATA_DIR BASE_PATH "s1_columnar/"
#define LO_LEN 6001171
//#define LO_LEN 1000
#define P_LEN 200000
#define S_LEN 2000
#define C_LEN 30000
#define D_LEN 2556
#elif SF == 10
#define DATA_DIR BASE_PATH "s10_columnar/"
#define LO_LEN 59986214
#define P_LEN 800000
#define S_LEN 20000
#define C_LEN 300000
#define D_LEN 2556
#elif SF == 20
#define DATA_DIR BASE_PATH "s20_columnar/"
#define LO_LEN 119994746
#define P_LEN 1000000
#define S_LEN 40000
#define C_LEN 600000
#define D_LEN 2556
#elif SF == 40
#define DATA_DIR BASE_PATH "s40_columnar/"
#define LO_LEN 240012412
#define P_LEN 1200000
#define S_LEN 80000
#define C_LEN 1200000
#define D_LEN 2556
#elif SF == 80
#define DATA_DIR BASE_PATH "s80_columnar/"
#define LO_LEN 480025073
#define P_LEN 1400000
#define S_LEN 160000
#define C_LEN 2400000
#define D_LEN 2556
#elif SF == 160
#define DATA_DIR BASE_PATH "s160_columnar/"
#define LO_LEN 960017453
#define P_LEN 1600000
#define S_LEN 320000
#define C_LEN 4800000
#define D_LEN 2556
#elif SF == 240
#define DATA_DIR BASE_PATH "s240_columnar/"
#define LO_LEN 1440017558
#define P_LEN 1600000
#define S_LEN 480000
#define C_LEN 7200000
#define D_LEN 2556
#elif SF == 320
#define DATA_DIR BASE_PATH "s320_columnar/"
#define LO_LEN 1919988957
#define P_LEN 1800000
#define S_LEN 640000
#define C_LEN 9600000
#define D_LEN 2556
#elif SF == 2
#define DATA_DIR NEW_BENCH_DIR
#define LO_LEN SEGMENT_SIZE * 12
#define P_LEN SEGMENT_SIZE * 2
#define S_LEN SEGMENT_SIZE * 8
#define C_LEN SEGMENT_SIZE * 4
#define D_LEN SEGMENT_SIZE
#elif SF == 3
#define DATA_DIR SM_BENCH_DIR
#define LO_LEN SEGMENT_SIZE * 12
#define P_LEN SEGMENT_SIZE * 2
#define S_LEN SEGMENT_SIZE * 8
#define C_LEN SEGMENT_SIZE * 4
#define D_LEN SEGMENT_SIZE
#elif SF == 4
#define DATA_DIR PRELIM_BENCH_DIR
#define LO_LEN SEGMENT_SIZE * 24
#define P_LEN SEGMENT_SIZE * 8
#define S_LEN SEGMENT_SIZE * 20
#define C_LEN SEGMENT_SIZE * 12
#define D_LEN SEGMENT_SIZE * 4
#elif SF == 5
#define DATA_DIR PRELIM_BENCH_DIR
#define LO_LEN SEGMENT_SIZE * 24
#define P_LEN SEGMENT_SIZE * 20
#define S_LEN SEGMENT_SIZE * 4
#define C_LEN SEGMENT_SIZE * 12
#define D_LEN SEGMENT_SIZE * 24
#elif SF == 41
#define DATA_DIR EXPERIMENT_BENCH_DIR "s41_columnar/"
#define LO_LEN SEGMENT_SIZE * 112
#define S_LEN SEGMENT_SIZE * 8
#define C_LEN SEGMENT_SIZE * 32
#define P_LEN SEGMENT_SIZE * 80
#define D_LEN 2556
#elif SF == 42
#define DATA_DIR EXPERIMENT_BENCH_DIR "s42_columnar/"
#define LO_LEN SEGMENT_SIZE * 160
#define S_LEN SEGMENT_SIZE * 8
#define C_LEN SEGMENT_SIZE * 24
#define P_LEN SEGMENT_SIZE * 40
#define D_LEN 2556
#elif SF == 43
#define DATA_DIR EXPERIMENT_BENCH_DIR "s43_columnar/"
#define LO_LEN SEGMENT_SIZE * 128
#define S_LEN SEGMENT_SIZE * 64
#define C_LEN SEGMENT_SIZE * 8
#define P_LEN SEGMENT_SIZE * 32
#define D_LEN 2556
#elif SF == 81
#define DATA_DIR EXPERIMENT_BENCH_DIR "s81_columnar/"
#define LO_LEN SEGMENT_SIZE * 224
#define S_LEN SEGMENT_SIZE * 16
#define C_LEN SEGMENT_SIZE * 64
#define P_LEN SEGMENT_SIZE * 160
#define D_LEN 2556
#elif SF == 82
#define DATA_DIR EXPERIMENT_BENCH_DIR "s82_columnar/"
#define LO_LEN SEGMENT_SIZE * 320
#define S_LEN SEGMENT_SIZE * 16
#define C_LEN SEGMENT_SIZE * 48
#define P_LEN SEGMENT_SIZE * 80
#define D_LEN 2556
#elif SF == 83
#define DATA_DIR EXPERIMENT_BENCH_DIR "s83_columnar/"
#define LO_LEN SEGMENT_SIZE * 256
#define S_LEN SEGMENT_SIZE * 128
#define C_LEN SEGMENT_SIZE * 16
#define P_LEN SEGMENT_SIZE * 64
#define D_LEN 2556
#elif SF == 84
#define DATA_DIR EXPERIMENT_BENCH_DIR "s84_columnar/"
#define LO_LEN SEGMENT_SIZE * 320
#define S_LEN SEGMENT_SIZE * 80
#define C_LEN SEGMENT_SIZE * 16
#define P_LEN SEGMENT_SIZE * 48
#define D_LEN 2556
#elif SF == 161
#define DATA_DIR EXPERIMENT_BENCH_DIR "s161_columnar/"
#define LO_LEN SEGMENT_SIZE * 448
#define S_LEN SEGMENT_SIZE * 32
#define C_LEN SEGMENT_SIZE * 128
#define P_LEN SEGMENT_SIZE * 320
#define D_LEN 2556
#elif SF == 162
#define DATA_DIR EXPERIMENT_BENCH_DIR "s162_columnar/"
#define LO_LEN SEGMENT_SIZE * 640
#define S_LEN SEGMENT_SIZE * 32
#define C_LEN SEGMENT_SIZE * 96
#define P_LEN SEGMENT_SIZE * 160
#define D_LEN 2556
#elif SF == 163
#define DATA_DIR EXPERIMENT_BENCH_DIR "s163_columnar/"
#define LO_LEN SEGMENT_SIZE * 512
#define S_LEN SEGMENT_SIZE * 256
#define C_LEN SEGMENT_SIZE * 32
#define P_LEN SEGMENT_SIZE * 128
#define D_LEN 2556
#elif SF == 164
#define DATA_DIR EXPERIMENT_BENCH_DIR "s164_columnar/"
#define LO_LEN SEGMENT_SIZE * 640
#define S_LEN SEGMENT_SIZE * 160
#define C_LEN SEGMENT_SIZE * 32
#define P_LEN SEGMENT_SIZE * 96
#define D_LEN 2556
#elif SF == 242
#define DATA_DIR EXPERIMENT_BENCH_DIR "s242_columnar/"
#define LO_LEN SEGMENT_SIZE * 960
#define S_LEN SEGMENT_SIZE * 48
#define C_LEN SEGMENT_SIZE * 144
#define P_LEN SEGMENT_SIZE * 240
#define D_LEN 2556
#elif SF == 321
#define DATA_DIR EXPERIMENT_BENCH_DIR "s321_columnar/"
#define LO_LEN SEGMENT_SIZE * 896
#define S_LEN SEGMENT_SIZE * 64
#define C_LEN SEGMENT_SIZE * 256
#define P_LEN SEGMENT_SIZE * 640
#define D_LEN 2556
#elif SF == 322
#define DATA_DIR EXPERIMENT_BENCH_DIR "s322_columnar/"
#define LO_LEN SEGMENT_SIZE * 1280
#define S_LEN SEGMENT_SIZE * 64
#define C_LEN SEGMENT_SIZE * 192
#define P_LEN SEGMENT_SIZE * 320
#define D_LEN 2556
#elif SF == 323
#define DATA_DIR EXPERIMENT_BENCH_DIR "s323_columnar/"
#define LO_LEN SEGMENT_SIZE * 1024
#define S_LEN SEGMENT_SIZE * 512
#define C_LEN SEGMENT_SIZE * 64
#define P_LEN SEGMENT_SIZE * 256
#define D_LEN 2556
#elif SF == 402
#define DATA_DIR EXPERIMENT_BENCH_DIR "s402_columnar/"
#define LO_LEN SEGMENT_SIZE * 1600
#define S_LEN SEGMENT_SIZE * 80
#define C_LEN SEGMENT_SIZE * 240
#define P_LEN SEGMENT_SIZE * 400
#define D_LEN 2556
#endif

inline int index_of(string* arr, int len, string val) {
  for (int i=0; i<len; i++)
    if (arr[i] == val)
      return i;

  return -1;
}

inline string lookup(string col_name) {
  string lineorder[] = { "lo_orderkey", "lo_linenumber", "lo_custkey", "lo_partkey", "lo_suppkey", "lo_orderdate", "lo_orderpriority", "lo_shippriority", "lo_quantity", "lo_extendedprice", "lo_ordtotalprice", "lo_discount", "lo_revenue", "lo_supplycost", "lo_tax", "lo_commitdate", "lo_shipmode"};
  string part[] = {"p_partkey", "p_name", "p_mfgr", "p_category", "p_brand1", "p_color", "p_type", "p_size", "p_container"};
  string supplier[] = {"s_suppkey", "s_name", "s_address", "s_city", "s_nation", "s_region", "s_phone"};
  string customer[] = {"c_custkey", "c_name", "c_address", "c_city", "c_nation", "c_region", "c_phone", "c_mktsegment"};
  string date[] = {"d_datekey", "d_date", "d_dayofweek", "d_month", "d_year", "d_yearmonthnum", "d_yearmonth", "d_daynuminweek", "d_daynuminmonth", "d_daynuminyear", "d_sellingseason", "d_lastdayinweekfl", "d_lastdayinmonthfl", "d_holidayfl", "d_weekdayfl"};

  if (col_name[0] == 'l') {
    int index = index_of(lineorder, 17, col_name);
    return "LINEORDER" + to_string(index);
  } else if (col_name[0] == 's') {
    int index = index_of(supplier, 7, col_name);
    return "SUPPLIER" + to_string(index);
  } else if (col_name[0] == 'c') {
    int index = index_of(customer, 8, col_name);
    return "CUSTOMER" + to_string(index);
  } else if (col_name[0] == 'p') {
    int index = index_of(part, 9, col_name);
    return "PART" + to_string(index);
  } else if (col_name[0] == 'd') {
    int index = index_of(date, 15, col_name);
    return "DDATE" + to_string(index);
  } else {
    return col_name;
  }

  return "";
}

inline string lookupSort(string col_name) {
  string lineorder[] = { "lo_orderkey", "lo_linenumber", "lo_custkey", "lo_partkey", "lo_suppkey", "lo_orderdate", "lo_orderpriority", "lo_shippriority", "lo_quantity", "lo_extendedprice", "lo_ordtotalprice", "lo_discount", "lo_revenue", "lo_supplycost", "lo_tax", "lo_commitdate", "lo_shipmode"};
  string part[] = {"p_partkey", "p_name", "p_mfgr", "p_category", "p_brand1", "p_color", "p_type", "p_size", "p_container"};
  string supplier[] = {"s_suppkey", "s_name", "s_address", "s_city", "s_nation", "s_region", "s_phone"};
  string customer[] = {"c_custkey", "c_name", "c_address", "c_city", "c_nation", "c_region", "c_phone", "c_mktsegment"};
  string date[] = {"d_datekey", "d_date", "d_dayofweek", "d_month", "d_year", "d_yearmonthnum", "d_yearmonth", "d_daynuminweek", "d_daynuminmonth", "d_daynuminyear", "d_sellingseason", "d_lastdayinweekfl", "d_lastdayinmonthfl", "d_holidayfl", "d_weekdayfl"};

  if (col_name[0] == 'l') {
    int index = index_of(lineorder, 17, col_name);
    return "LINEORDERSORT" + to_string(index);
  } else if (col_name[0] == 's') {
    int index = index_of(supplier, 7, col_name);
    return "SUPPLIERSORT" + to_string(index);
  } else if (col_name[0] == 'c') {
    int index = index_of(customer, 8, col_name);
    return "CUSTOMERSORT" + to_string(index);
  } else if (col_name[0] == 'p') {
    int index = index_of(part, 9, col_name);
    return "PARTSORT" + to_string(index);
  } else if (col_name[0] == 'd') {
    int index = index_of(date, 15, col_name);
    return "DDATESORT" + to_string(index);
  } else {
    return col_name;
  }

  return "";
}

template<typename T>
T* loadColumn(string col_name, int num_entries) {
  T* h_col = new T[((num_entries + SEGMENT_SIZE - 1)/SEGMENT_SIZE) * SEGMENT_SIZE];
  string filename = DATA_DIR + lookup(col_name);
  ifstream colData (filename.c_str(), ios::in | ios::binary);
  if (!colData) {
    return NULL;
  }

  colData.read((char*)h_col, num_entries * sizeof(T));
  return h_col;
}

template<typename T>
T* loadColumnPinned(string col_name, int num_entries) {
  T* h_col;
  CubDebugExit(cudaHostAlloc((void**) &h_col, ((num_entries + SEGMENT_SIZE - 1)/SEGMENT_SIZE) * SEGMENT_SIZE * sizeof(T), cudaHostAllocDefault));
  string filename = DATA_DIR + lookup(col_name);
  ifstream colData (filename.c_str(), ios::in | ios::binary);
  if (!colData) {
    return NULL;
  }

  colData.read((char*)h_col, num_entries * sizeof(T));
  return h_col;
}

template<typename T>
T* loadColumnSort(string col_name, int num_entries) {
  T* h_col = new T[((num_entries + SEGMENT_SIZE - 1)/SEGMENT_SIZE) * SEGMENT_SIZE];
  string filename = DATA_DIR + lookupSort(col_name);
  ifstream colData (filename.c_str(), ios::in | ios::binary);
  if (!colData) {
    return NULL;
  }

  colData.read((char*)h_col, num_entries * sizeof(T));
  return h_col;
}

template<typename T>
T* loadColumnPinnedSort(string col_name, int num_entries) {
  T* h_col;
  CubDebugExit(cudaHostAlloc((void**) &h_col, ((num_entries + SEGMENT_SIZE - 1)/SEGMENT_SIZE) * SEGMENT_SIZE * sizeof(T), cudaHostAllocDefault));
  string filename = DATA_DIR + lookupSort(col_name);
  ifstream colData (filename.c_str(), ios::in | ios::binary);
  if (!colData) {
    return NULL;
  }

  colData.read((char*)h_col, num_entries * sizeof(T));
  return h_col;
}

template<typename T>
int storeColumn(string col_name, int num_entries, int* h_col) {
  string filename = DATA_DIR + lookup(col_name);
  cout << filename << endl;
  ofstream colData (filename.c_str(), ios::out | ios::binary);
  if (!colData) {
    return -1;
  }

  colData.write((char*)h_col, num_entries * sizeof(T));
  return 0;
}