#pragma once #include <iostream> #include <fstream> #include <string> /*#include <cuda.h>*/ /*#include <cub/util_allocator.cuh>*/ #include <cuda.h> #include <curand.h> #include <cub/util_allocator.cuh> using namespace std; #define SF 402 #define BASE_PATH "/home/ubuntu/Implementation-GPUDB/test/ssb/data/" #define NEW_BENCH_DIR "/home/ubuntu/Implementation-GPUDB/test/new_bench/data/1_columnar/" #define SM_BENCH_DIR "/home/ubuntu/Implementation-GPUDB/test/new_bench/data/sm_columnar/" #define PRELIM_BENCH_DIR "/home/ubuntu/Implementation-GPUDB/test/new_bench/data/prelim_columnar/" #define EXPERIMENT_BENCH_DIR "/home/ubuntu/Implementation-GPUDB/test/new_bench/data/" #define SEGMENT_SIZE 1048576 // #define SEGMENT_SIZE 1024 #if SF == 1 #define DATA_DIR BASE_PATH "s1_columnar/" #define LO_LEN 6001171 //#define LO_LEN 1000 #define P_LEN 200000 #define S_LEN 2000 #define C_LEN 30000 #define D_LEN 2556 #elif SF == 10 #define DATA_DIR BASE_PATH "s10_columnar/" #define LO_LEN 59986214 #define P_LEN 800000 #define S_LEN 20000 #define C_LEN 300000 #define D_LEN 2556 #elif SF == 20 #define DATA_DIR BASE_PATH "s20_columnar/" #define LO_LEN 119994746 #define P_LEN 1000000 #define S_LEN 40000 #define C_LEN 600000 #define D_LEN 2556 #elif SF == 40 #define DATA_DIR BASE_PATH "s40_columnar/" #define LO_LEN 240012412 #define P_LEN 1200000 #define S_LEN 80000 #define C_LEN 1200000 #define D_LEN 2556 #elif SF == 80 #define DATA_DIR BASE_PATH "s80_columnar/" #define LO_LEN 480025073 #define P_LEN 1400000 #define S_LEN 160000 #define C_LEN 2400000 #define D_LEN 2556 #elif SF == 160 #define DATA_DIR BASE_PATH "s160_columnar/" #define LO_LEN 960017453 #define P_LEN 1600000 #define S_LEN 320000 #define C_LEN 4800000 #define D_LEN 2556 #elif SF == 240 #define DATA_DIR BASE_PATH "s240_columnar/" #define LO_LEN 1440017558 #define P_LEN 1600000 #define S_LEN 480000 #define C_LEN 7200000 #define D_LEN 2556 #elif SF == 320 #define DATA_DIR BASE_PATH "s320_columnar/" #define LO_LEN 1919988957 #define P_LEN 1800000 #define S_LEN 640000 #define C_LEN 9600000 #define D_LEN 2556 #elif SF == 2 #define DATA_DIR NEW_BENCH_DIR #define LO_LEN SEGMENT_SIZE * 12 #define P_LEN SEGMENT_SIZE * 2 #define S_LEN SEGMENT_SIZE * 8 #define C_LEN SEGMENT_SIZE * 4 #define D_LEN SEGMENT_SIZE #elif SF == 3 #define DATA_DIR SM_BENCH_DIR #define LO_LEN SEGMENT_SIZE * 12 #define P_LEN SEGMENT_SIZE * 2 #define S_LEN SEGMENT_SIZE * 8 #define C_LEN SEGMENT_SIZE * 4 #define D_LEN SEGMENT_SIZE #elif SF == 4 #define DATA_DIR PRELIM_BENCH_DIR #define LO_LEN SEGMENT_SIZE * 24 #define P_LEN SEGMENT_SIZE * 8 #define S_LEN SEGMENT_SIZE * 20 #define C_LEN SEGMENT_SIZE * 12 #define D_LEN SEGMENT_SIZE * 4 #elif SF == 5 #define DATA_DIR PRELIM_BENCH_DIR #define LO_LEN SEGMENT_SIZE * 24 #define P_LEN SEGMENT_SIZE * 20 #define S_LEN SEGMENT_SIZE * 4 #define C_LEN SEGMENT_SIZE * 12 #define D_LEN SEGMENT_SIZE * 24 #elif SF == 41 #define DATA_DIR EXPERIMENT_BENCH_DIR "s41_columnar/" #define LO_LEN SEGMENT_SIZE * 112 #define S_LEN SEGMENT_SIZE * 8 #define C_LEN SEGMENT_SIZE * 32 #define P_LEN SEGMENT_SIZE * 80 #define D_LEN 2556 #elif SF == 42 #define DATA_DIR EXPERIMENT_BENCH_DIR "s42_columnar/" #define LO_LEN SEGMENT_SIZE * 160 #define S_LEN SEGMENT_SIZE * 8 #define C_LEN SEGMENT_SIZE * 24 #define P_LEN SEGMENT_SIZE * 40 #define D_LEN 2556 #elif SF == 43 #define DATA_DIR EXPERIMENT_BENCH_DIR "s43_columnar/" #define LO_LEN SEGMENT_SIZE * 128 #define S_LEN SEGMENT_SIZE * 64 #define C_LEN SEGMENT_SIZE * 8 #define P_LEN SEGMENT_SIZE * 32 #define D_LEN 2556 #elif SF == 81 #define DATA_DIR EXPERIMENT_BENCH_DIR "s81_columnar/" #define LO_LEN SEGMENT_SIZE * 224 #define S_LEN SEGMENT_SIZE * 16 #define C_LEN SEGMENT_SIZE * 64 #define P_LEN SEGMENT_SIZE * 160 #define D_LEN 2556 #elif SF == 82 #define DATA_DIR EXPERIMENT_BENCH_DIR "s82_columnar/" #define LO_LEN SEGMENT_SIZE * 320 #define S_LEN SEGMENT_SIZE * 16 #define C_LEN SEGMENT_SIZE * 48 #define P_LEN SEGMENT_SIZE * 80 #define D_LEN 2556 #elif SF == 83 #define DATA_DIR EXPERIMENT_BENCH_DIR "s83_columnar/" #define LO_LEN SEGMENT_SIZE * 256 #define S_LEN SEGMENT_SIZE * 128 #define C_LEN SEGMENT_SIZE * 16 #define P_LEN SEGMENT_SIZE * 64 #define D_LEN 2556 #elif SF == 84 #define DATA_DIR EXPERIMENT_BENCH_DIR "s84_columnar/" #define LO_LEN SEGMENT_SIZE * 320 #define S_LEN SEGMENT_SIZE * 80 #define C_LEN SEGMENT_SIZE * 16 #define P_LEN SEGMENT_SIZE * 48 #define D_LEN 2556 #elif SF == 161 #define DATA_DIR EXPERIMENT_BENCH_DIR "s161_columnar/" #define LO_LEN SEGMENT_SIZE * 448 #define S_LEN SEGMENT_SIZE * 32 #define C_LEN SEGMENT_SIZE * 128 #define P_LEN SEGMENT_SIZE * 320 #define D_LEN 2556 #elif SF == 162 #define DATA_DIR EXPERIMENT_BENCH_DIR "s162_columnar/" #define LO_LEN SEGMENT_SIZE * 640 #define S_LEN SEGMENT_SIZE * 32 #define C_LEN SEGMENT_SIZE * 96 #define P_LEN SEGMENT_SIZE * 160 #define D_LEN 2556 #elif SF == 163 #define DATA_DIR EXPERIMENT_BENCH_DIR "s163_columnar/" #define LO_LEN SEGMENT_SIZE * 512 #define S_LEN SEGMENT_SIZE * 256 #define C_LEN SEGMENT_SIZE * 32 #define P_LEN SEGMENT_SIZE * 128 #define D_LEN 2556 #elif SF == 164 #define DATA_DIR EXPERIMENT_BENCH_DIR "s164_columnar/" #define LO_LEN SEGMENT_SIZE * 640 #define S_LEN SEGMENT_SIZE * 160 #define C_LEN SEGMENT_SIZE * 32 #define P_LEN SEGMENT_SIZE * 96 #define D_LEN 2556 #elif SF == 242 #define DATA_DIR EXPERIMENT_BENCH_DIR "s242_columnar/" #define LO_LEN SEGMENT_SIZE * 960 #define S_LEN SEGMENT_SIZE * 48 #define C_LEN SEGMENT_SIZE * 144 #define P_LEN SEGMENT_SIZE * 240 #define D_LEN 2556 #elif SF == 321 #define DATA_DIR EXPERIMENT_BENCH_DIR "s321_columnar/" #define LO_LEN SEGMENT_SIZE * 896 #define S_LEN SEGMENT_SIZE * 64 #define C_LEN SEGMENT_SIZE * 256 #define P_LEN SEGMENT_SIZE * 640 #define D_LEN 2556 #elif SF == 322 #define DATA_DIR EXPERIMENT_BENCH_DIR "s322_columnar/" #define LO_LEN SEGMENT_SIZE * 1280 #define S_LEN SEGMENT_SIZE * 64 #define C_LEN SEGMENT_SIZE * 192 #define P_LEN SEGMENT_SIZE * 320 #define D_LEN 2556 #elif SF == 323 #define DATA_DIR EXPERIMENT_BENCH_DIR "s323_columnar/" #define LO_LEN SEGMENT_SIZE * 1024 #define S_LEN SEGMENT_SIZE * 512 #define C_LEN SEGMENT_SIZE * 64 #define P_LEN SEGMENT_SIZE * 256 #define D_LEN 2556 #elif SF == 402 #define DATA_DIR EXPERIMENT_BENCH_DIR "s402_columnar/" #define LO_LEN SEGMENT_SIZE * 1600 #define S_LEN SEGMENT_SIZE * 80 #define C_LEN SEGMENT_SIZE * 240 #define P_LEN SEGMENT_SIZE * 400 #define D_LEN 2556 #endif inline int index_of(string* arr, int len, string val) { for (int i=0; i<len; i++) if (arr[i] == val) return i; return -1; } inline string lookup(string col_name) { string lineorder[] = { "lo_orderkey", "lo_linenumber", "lo_custkey", "lo_partkey", "lo_suppkey", "lo_orderdate", "lo_orderpriority", "lo_shippriority", "lo_quantity", "lo_extendedprice", "lo_ordtotalprice", "lo_discount", "lo_revenue", "lo_supplycost", "lo_tax", "lo_commitdate", "lo_shipmode"}; string part[] = {"p_partkey", "p_name", "p_mfgr", "p_category", "p_brand1", "p_color", "p_type", "p_size", "p_container"}; string supplier[] = {"s_suppkey", "s_name", "s_address", "s_city", "s_nation", "s_region", "s_phone"}; string customer[] = {"c_custkey", "c_name", "c_address", "c_city", "c_nation", "c_region", "c_phone", "c_mktsegment"}; string date[] = {"d_datekey", "d_date", "d_dayofweek", "d_month", "d_year", "d_yearmonthnum", "d_yearmonth", "d_daynuminweek", "d_daynuminmonth", "d_daynuminyear", "d_sellingseason", "d_lastdayinweekfl", "d_lastdayinmonthfl", "d_holidayfl", "d_weekdayfl"}; if (col_name[0] == 'l') { int index = index_of(lineorder, 17, col_name); return "LINEORDER" + to_string(index); } else if (col_name[0] == 's') { int index = index_of(supplier, 7, col_name); return "SUPPLIER" + to_string(index); } else if (col_name[0] == 'c') { int index = index_of(customer, 8, col_name); return "CUSTOMER" + to_string(index); } else if (col_name[0] == 'p') { int index = index_of(part, 9, col_name); return "PART" + to_string(index); } else if (col_name[0] == 'd') { int index = index_of(date, 15, col_name); return "DDATE" + to_string(index); } else { return col_name; } return ""; } inline string lookupSort(string col_name) { string lineorder[] = { "lo_orderkey", "lo_linenumber", "lo_custkey", "lo_partkey", "lo_suppkey", "lo_orderdate", "lo_orderpriority", "lo_shippriority", "lo_quantity", "lo_extendedprice", "lo_ordtotalprice", "lo_discount", "lo_revenue", "lo_supplycost", "lo_tax", "lo_commitdate", "lo_shipmode"}; string part[] = {"p_partkey", "p_name", "p_mfgr", "p_category", "p_brand1", "p_color", "p_type", "p_size", "p_container"}; string supplier[] = {"s_suppkey", "s_name", "s_address", "s_city", "s_nation", "s_region", "s_phone"}; string customer[] = {"c_custkey", "c_name", "c_address", "c_city", "c_nation", "c_region", "c_phone", "c_mktsegment"}; string date[] = {"d_datekey", "d_date", "d_dayofweek", "d_month", "d_year", "d_yearmonthnum", "d_yearmonth", "d_daynuminweek", "d_daynuminmonth", "d_daynuminyear", "d_sellingseason", "d_lastdayinweekfl", "d_lastdayinmonthfl", "d_holidayfl", "d_weekdayfl"}; if (col_name[0] == 'l') { int index = index_of(lineorder, 17, col_name); return "LINEORDERSORT" + to_string(index); } else if (col_name[0] == 's') { int index = index_of(supplier, 7, col_name); return "SUPPLIERSORT" + to_string(index); } else if (col_name[0] == 'c') { int index = index_of(customer, 8, col_name); return "CUSTOMERSORT" + to_string(index); } else if (col_name[0] == 'p') { int index = index_of(part, 9, col_name); return "PARTSORT" + to_string(index); } else if (col_name[0] == 'd') { int index = index_of(date, 15, col_name); return "DDATESORT" + to_string(index); } else { return col_name; } return ""; } template<typename T> T* loadColumn(string col_name, int num_entries) { T* h_col = new T[((num_entries + SEGMENT_SIZE - 1)/SEGMENT_SIZE) * SEGMENT_SIZE]; string filename = DATA_DIR + lookup(col_name); ifstream colData (filename.c_str(), ios::in | ios::binary); if (!colData) { return NULL; } colData.read((char*)h_col, num_entries * sizeof(T)); return h_col; } template<typename T> T* loadColumnPinned(string col_name, int num_entries) { T* h_col; CubDebugExit(cudaHostAlloc((void**) &h_col, ((num_entries + SEGMENT_SIZE - 1)/SEGMENT_SIZE) * SEGMENT_SIZE * sizeof(T), cudaHostAllocDefault)); string filename = DATA_DIR + lookup(col_name); ifstream colData (filename.c_str(), ios::in | ios::binary); if (!colData) { return NULL; } colData.read((char*)h_col, num_entries * sizeof(T)); return h_col; } template<typename T> T* loadColumnSort(string col_name, int num_entries) { T* h_col = new T[((num_entries + SEGMENT_SIZE - 1)/SEGMENT_SIZE) * SEGMENT_SIZE]; string filename = DATA_DIR + lookupSort(col_name); ifstream colData (filename.c_str(), ios::in | ios::binary); if (!colData) { return NULL; } colData.read((char*)h_col, num_entries * sizeof(T)); return h_col; } template<typename T> T* loadColumnPinnedSort(string col_name, int num_entries) { T* h_col; CubDebugExit(cudaHostAlloc((void**) &h_col, ((num_entries + SEGMENT_SIZE - 1)/SEGMENT_SIZE) * SEGMENT_SIZE * sizeof(T), cudaHostAllocDefault)); string filename = DATA_DIR + lookupSort(col_name); ifstream colData (filename.c_str(), ios::in | ios::binary); if (!colData) { return NULL; } colData.read((char*)h_col, num_entries * sizeof(T)); return h_col; } template<typename T> int storeColumn(string col_name, int num_entries, int* h_col) { string filename = DATA_DIR + lookup(col_name); cout << filename << endl; ofstream colData (filename.c_str(), ios::out | ios::binary); if (!colData) { return -1; } colData.write((char*)h_col, num_entries * sizeof(T)); return 0; }