#include "CacheManager.h" #include <bitset> Segment::Segment(ColumnInfo* _column, int* _seg_ptr, int _priority) : column(_column), seg_ptr(_seg_ptr), priority(_priority), seg_size(SEGMENT_SIZE) { stats = new Statistics(); col_ptr = column->col_ptr; segment_id = (seg_ptr - col_ptr)/seg_size; weight = 0; repl_weight = 0; } Segment::Segment(ColumnInfo* _column, int* _seg_ptr) : column(_column), seg_ptr(_seg_ptr), priority(0), seg_size(SEGMENT_SIZE) { stats = new Statistics(); col_ptr = column->col_ptr; segment_id = (seg_ptr - col_ptr)/seg_size; weight = 0; repl_weight = 0; } ColumnInfo::ColumnInfo(TableInfo* _table, string _column_name, int _column_id, int* _col_ptr) : table(_table), column_name(_column_name), column_id(_column_id), col_ptr(_col_ptr) { stats = new Statistics(); tot_seg_in_GPU = 0; weight = 0; seg_ptr = col_ptr; table_id = table->table_id; table_name = table->table_name; LEN = table->LEN; total_segment = (LEN+SEGMENT_SIZE-1)/SEGMENT_SIZE; } TableInfo::TableInfo(string _table_name, int _LEN, int _table_id) : table_name(_table_name), LEN(_LEN), table_id(_table_id) { total_segment = (LEN+SEGMENT_SIZE-1)/SEGMENT_SIZE; } // WILL NOT WORK FOR MULTI GPU Segment* ColumnInfo::getSegment(int index) { Segment* seg = new Segment(this, col_ptr+SEGMENT_SIZE*index); return seg; } CacheManager::CacheManager(size_t _cache_size, size_t _broadcast_size, size_t _processing_size, size_t _pinned_memsize) { tot_cache_size = _cache_size; each_cache_size = _cache_size/NUM_GPU; each_broadcast_size = _broadcast_size/NUM_GPU; cache_total_seg = tot_cache_size/SEGMENT_SIZE; cache_each_tot_seg = each_cache_size/SEGMENT_SIZE; broadcast_each_tot_seg = each_broadcast_size/SEGMENT_SIZE; tot_processing_size = _processing_size; each_processing_size = _processing_size/NUM_GPU; pinned_memsize = _pinned_memsize; TOT_COLUMN = NUM_COLUMN; TOT_TABLE = NUM_TABLE; seg_idx_min = 0; gpuCache = new int*[NUM_GPU]; gpuProcessing = new uint64_t*[NUM_GPU]; gpuPointer = new unsigned int[NUM_GPU]; broadcastPointer = new unsigned int[NUM_GPU]; gpuBroadcast = new int*[NUM_GPU]; for (int gpu = 0; gpu < NUM_GPU; gpu++) { cout << gpu << " " << each_cache_size << endl; cudaSetDevice(gpu); CubDebugExit(cudaMalloc((void**) &gpuCache[gpu], (each_cache_size + each_broadcast_size) * sizeof(int))); // CubDebugExit(cudaMemset(gpuCache[gpu], 0, each_cache_size * sizeof(int))); CubDebugExit(cudaMalloc((void**) &gpuProcessing[gpu], each_processing_size * sizeof(uint64_t))); gpuPointer[gpu] = 0; broadcastPointer[gpu] = 0; gpuBroadcast[gpu] = gpuCache[gpu] + each_cache_size; } cudaSetDevice(0); cpuProcessing = (uint64_t*) malloc(tot_processing_size * sizeof(uint64_t)); CubDebugExit(cudaHostAlloc((void**) &pinnedMemory, pinned_memsize * sizeof(uint64_t), cudaHostAllocDefault)); cpuPointer = 0; pinnedPointer = 0; allColumn.resize(TOT_COLUMN); allTable.resize(TOT_TABLE); index_to_segment.resize(TOT_COLUMN); segment_row_to_gpu.resize(TOT_TABLE); cache_mapper.resize(NUM_GPU); empty_gpu_segment.resize(NUM_GPU); for (int gpu = 0; gpu < NUM_GPU; gpu++) { for(int i = 0; i < cache_each_tot_seg; i++) { empty_gpu_segment[gpu].push(i); } } loadColumnToCPU(); seg_row_to_single_gpu = (int**) malloc (TOT_TABLE * sizeof(int*)); seg_is_replicated = (int**) malloc (TOT_TABLE * sizeof(int*)); for (int table = 0; table < TOT_TABLE; table++) { int total_segment = allTable[table]->total_segment; segment_row_to_gpu[table].resize(total_segment); seg_row_to_single_gpu[table] = (int*) malloc (total_segment * sizeof(int)); seg_is_replicated[table] = (int*) malloc (total_segment * sizeof(int)); memset(seg_row_to_single_gpu[table], -1, total_segment * sizeof(int)); memset(seg_is_replicated[table], 0, total_segment * sizeof(int)); } gpu_segment_row_bitmap = (unsigned int***) malloc (NUM_GPU * sizeof(unsigned int**)); for (int gpu = 0; gpu < NUM_GPU; gpu++) { gpu_segment_row_bitmap[gpu] = (unsigned int**) malloc (TOT_TABLE * sizeof(unsigned int*)); for (int table = 0; table < TOT_TABLE; table++) { gpu_segment_row_bitmap[gpu][table] = (unsigned int*) malloc (allTable[table]->total_segment * sizeof(unsigned int)); memset(gpu_segment_row_bitmap[gpu][table], 0, allTable[table]->total_segment * sizeof(unsigned int)); } } segment_list = (int***) malloc (NUM_GPU * sizeof(int**)); for (int gpu = 0; gpu < NUM_GPU; gpu++) { segment_list[gpu] = (int**) malloc (TOT_COLUMN * sizeof(int*)); for (int i = 0; i < TOT_COLUMN; i++) { int n = allColumn[i]->total_segment; CubDebugExit(cudaHostAlloc((void**) &(segment_list[gpu][i]), n * sizeof(int), cudaHostAllocDefault)); memset(segment_list[gpu][i], -1, n * sizeof(int)); } } segment_min = (int**) malloc (TOT_COLUMN * sizeof(int*)); segment_max = (int**) malloc (TOT_COLUMN * sizeof(int*)); segment_bitmap = (unsigned char**) malloc (TOT_COLUMN * sizeof(unsigned char*)); for (int i = 0; i < TOT_COLUMN; i++) { int n = allColumn[i]->total_segment; segment_bitmap[i] = (unsigned char*) malloc(n * sizeof(unsigned char)); segment_min[i] = (int*) malloc(n * sizeof(int)); segment_max[i] = (int*) malloc(n * sizeof(int)); memset(segment_bitmap[i], 0, n * sizeof(unsigned char)); } readSegmentMinMax(); for (int i = 0; i < TOT_COLUMN; i++) { index_to_segment[i].resize(allColumn[i]->total_segment); for (int j = 0; j < allColumn[i]->total_segment; j++) { index_to_segment[i][j] = allColumn[i]->getSegment(j); } } } // void // CacheManager::resetCache(size_t _cache_size, size_t _processing_size, size_t _pinned_memsize) { // for (int gpu = 0; gpu < NUM_GPU; gpu++) { // cudaSetDevice(gpu); // CubDebugExit(cudaFree(gpuCache[gpu])); // CubDebugExit(cudaFree(gpuProcessing[gpu])); // } // cudaSetDevice(0); // delete[] gpuCache; // delete[] gpuProcessing; // delete[] cpuProcessing; // CubDebugExit(cudaFreeHost(pinnedMemory)); // for (int table = 0; table < TOT_TABLE; table++) { // int total_segment = allTable[table]->total_segment; // for (int seg_id = 0; seg_id < total_segment; seg_id++) { // segment_row_to_gpu[table][seg_id].clear(); // } // free(seg_row_to_single_gpu[table]); // } // free(seg_row_to_single_gpu); // for (int gpu = 0; gpu < NUM_GPU; gpu++) { // cache_mapper[gpu].clear(); // } // for (int i = 0; i < TOT_COLUMN; i++) { // free(segment_bitmap[i]); // } // free(segment_bitmap); // for (int gpu = 0; gpu < NUM_GPU; gpu++) { // for (int i = 0; i < TOT_COLUMN; i++) { // CubDebugExit(cudaFreeHost(segment_list[gpu][i])); // } // free(segment_list[gpu]); // } // free(segment_list); // for (int gpu = 0; gpu < NUM_GPU; gpu++) { // for (int i = 0; i < TOT_TABLE; i++) { // free(gpu_segment_row_bitmap[gpu][i]); // } // free(gpu_segment_row_bitmap[gpu]); // } // free(gpu_segment_row_bitmap); // tot_cache_size = _cache_size; // each_cache_size = _cache_size/NUM_GPU; // cache_total_seg = tot_cache_size/SEGMENT_SIZE; // cache_each_tot_seg = each_cache_size/SEGMENT_SIZE; // tot_processing_size = _processing_size; // each_processing_size = _processing_size/NUM_GPU; // pinned_memsize = _pinned_memsize; // cout << each_cache_size << endl; // gpuCache = new int*[NUM_GPU]; // gpuProcessing = new uint64_t*[NUM_GPU]; // gpuPointer = new unsigned int[NUM_GPU]; // for (int gpu = 0; gpu < NUM_GPU; gpu++) { // cudaSetDevice(gpu); // CubDebugExit(cudaMalloc((void**) &gpuCache[gpu], each_cache_size * sizeof(int))); // CubDebugExit(cudaMemset(gpuCache[gpu], 0, each_cache_size * sizeof(int))); // CubDebugExit(cudaMalloc((void**) &gpuProcessing[gpu], each_processing_size * sizeof(uint64_t))); // gpuPointer[gpu] = 0; // } // cudaSetDevice(0); // cpuProcessing = (uint64_t*) malloc(tot_processing_size * sizeof(uint64_t)); // CubDebugExit(cudaHostAlloc((void**) &pinnedMemory, pinned_memsize * sizeof(uint64_t), cudaHostAllocDefault)); // cpuPointer = 0; // pinnedPointer = 0; // for (int gpu = 0; gpu < NUM_GPU; gpu++) { // while (!empty_gpu_segment[gpu].empty()) { // empty_gpu_segment[gpu].pop(); // } // for(int i = 0; i < cache_each_tot_seg; i++) { // empty_gpu_segment[gpu].push(i); // } // } // seg_row_to_single_gpu = (int**) malloc (TOT_TABLE * sizeof(int*)); // for (int table = 0; table < TOT_TABLE; table++) { // int total_segment = allTable[table]->total_segment; // seg_row_to_single_gpu[table] = (int*) malloc (total_segment * sizeof(int)); // memset(seg_row_to_single_gpu[table], -1, total_segment * sizeof(int)); // } // gpu_segment_row_bitmap = (unsigned int***) malloc (NUM_GPU * sizeof(unsigned int**)); // for (int gpu = 0; gpu < NUM_GPU; gpu++) { // gpu_segment_row_bitmap[gpu] = (unsigned int**) malloc (TOT_TABLE * sizeof(unsigned int*)); // for (int table = 0; table < TOT_TABLE; table++) { // gpu_segment_row_bitmap[gpu][table] = (unsigned int*) malloc (allTable[table]->total_segment * sizeof(unsigned int)); // memset(gpu_segment_row_bitmap[gpu][table], 0, allTable[table]->total_segment * sizeof(unsigned int)); // } // } // segment_list = (int***) malloc (NUM_GPU * sizeof(int**)); // for (int gpu = 0; gpu < NUM_GPU; gpu++) { // segment_list[gpu] = (int**) malloc (TOT_COLUMN * sizeof(int*)); // for (int i = 0; i < TOT_COLUMN; i++) { // int n = allColumn[i]->total_segment; // CubDebugExit(cudaHostAlloc((void**) &(segment_list[gpu][i]), n * sizeof(int), cudaHostAllocDefault)); // memset(segment_list[gpu][i], -1, n * sizeof(int)); // } // } // segment_bitmap = (char**) malloc (TOT_COLUMN * sizeof(char*)); // for (int i = 0; i < TOT_COLUMN; i++) { // int n = allColumn[i]->total_segment; // segment_bitmap[i] = (char*) malloc(n * sizeof(char)); // memset(segment_bitmap[i], 0, n * sizeof(char)); // } // } void CacheManager::readSegmentMinMax() { for (int i = 0; i < TOT_COLUMN; i++) { string line; ifstream myfile (DATA_DIR + allColumn[i]->column_name + "minmax"); if (myfile.is_open()) { int segment_idx = 0; string del = " "; while ( getline (myfile,line) ) { int start = 0; int end = line.find(del); if (end != -1) { string minstring = line.substr(start, end - start); segment_min[i][segment_idx] = stoi(minstring); start = end + del.size(); } string maxstring = line.substr(start, end - start); segment_max[i][segment_idx] = stoi(maxstring); segment_idx++; } assert(segment_idx == allColumn[i]->total_segment); myfile.close(); } else { cout << "Unable to open file" << endl; assert(0); } } } template <typename T> T* CacheManager::customMalloc(int size) { int alloc = ((size * sizeof(T)) + sizeof(uint64_t) - 1)/ sizeof(uint64_t); int start = __atomic_fetch_add(&cpuPointer, alloc, __ATOMIC_RELAXED); assert((start + alloc) < tot_processing_size); return reinterpret_cast<T*>(cpuProcessing + start); }; template <typename T> T* CacheManager::customCudaMalloc(int size, int gpu) { int alloc = ((size * sizeof(T)) + sizeof(uint64_t) - 1)/ sizeof(uint64_t); int start = __atomic_fetch_add(&gpuPointer[gpu], alloc, __ATOMIC_RELAXED); // cout << gpu << " " << gpuPointer[gpu] << " " << each_processing_size << endl; assert((start + alloc) < each_processing_size); return reinterpret_cast<T*>(gpuProcessing[gpu] + start); }; template <typename T> T* CacheManager::customCudaHostAlloc(int size) { int alloc = ((size * sizeof(T)) + sizeof(uint64_t) - 1)/ sizeof(uint64_t); int start = __atomic_fetch_add(&pinnedPointer, alloc, __ATOMIC_RELAXED); assert((start + alloc) < tot_processing_size); return reinterpret_cast<T*>(pinnedMemory + start); }; //perform index transfer to all gpu only if they don't have the indexes yet //the stream synchronize will cause the memcpy to be serialized across GPU, we have to fix in this in the future void CacheManager::indexTransfer(int** col_idx, ColumnInfo* column, cudaStream_t stream, int gpu, bool custom) { if (col_idx[column->column_id] == NULL) { int* desired; // if (custom) desired = (int*) customCudaMalloc<int>(column->total_segment, gpu); // else CubDebugExit(cudaMalloc((void**) &desired, column->total_segment * sizeof(int))); desired = (int*) customCudaMalloc<int>(column->total_segment, gpu); int* expected = NULL; CubDebugExit(cudaSetDevice(gpu)); CubDebugExit(cudaMemcpyAsync(desired, segment_list[gpu][column->column_id], column->total_segment * sizeof(int), cudaMemcpyHostToDevice, stream)); CubDebugExit(cudaStreamSynchronize(stream)); __atomic_compare_exchange_n(&(col_idx[column->column_id]), &expected, desired, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); CubDebugExit(cudaSetDevice(0)); } }; void CacheManager::resetPointer() { for (int gpu = 0; gpu < NUM_GPU; gpu++) { gpuPointer[gpu] = 0; broadcastPointer[gpu] = 0; } cpuPointer = 0; pinnedPointer = 0; }; // modified for multi gpu void CacheManager::cacheSegmentInGPU(Segment* seg, int gpu) { int64_t local_idx = empty_gpu_segment[gpu].front(); //local segment index to each GPU empty_gpu_segment[gpu].pop(); assert(cache_mapper[gpu].find(seg) == cache_mapper[gpu].end()); cache_mapper[gpu][seg] = local_idx; unsigned char old_segment_bitmap = segment_bitmap[seg->column->column_id][seg->segment_id]; unsigned char check = (segment_bitmap[seg->column->column_id][seg->segment_id] << gpu); check = check >> 7; // cout << gpu << " " << std::bitset<8>(segment_bitmap[seg->column->column_id][seg->segment_id]) << " " << std::bitset<8>(check) << endl; assert(check == 0x00); unsigned char code = 0x80 >> gpu; segment_bitmap[seg->column->column_id][seg->segment_id] = segment_bitmap[seg->column->column_id][seg->segment_id] | code; // cout << std::bitset<8>(segment_bitmap[seg->column->column_id][seg->segment_id]) << endl; //segment row management if (gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] == 0x00000000) { //if this segment row has not been cached in this gpu vector<int> vec = segment_row_to_gpu[seg->column->table_id][seg->segment_id]; vector<int>::iterator it = find(vec.begin(), vec.end(), gpu); assert(it == vec.end()); segment_row_to_gpu[seg->column->table_id][seg->segment_id].push_back(gpu); seg_row_to_single_gpu[seg->column->table_id][seg->segment_id] = gpu; if (segment_row_to_gpu[seg->column->table_id][seg->segment_id].size() == NUM_GPU) { // cout << "im here " << seg->column->table_id << endl; seg_is_replicated[seg->column->table_id][seg->segment_id] = 1; seg_row_to_single_gpu[seg->column->table_id][seg->segment_id] = seg->segment_id % NUM_GPU; } } unsigned int check_int = (gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] << seg->column->column_table_id); check_int = check_int >> 31; assert(check_int == 0); unsigned int code_int = 0x80000000 >> seg->column->column_table_id; gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] = gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] | code_int; assert(segment_list[gpu][seg->column->column_id][seg->segment_id] == -1); assert(local_idx >= 0); segment_list[gpu][seg->column->column_id][seg->segment_id] = local_idx; CubDebugExit(cudaSetDevice(gpu)); CubDebugExit(cudaMemcpy(&gpuCache[gpu][local_idx * SEGMENT_SIZE], seg->seg_ptr, SEGMENT_SIZE * sizeof(int), cudaMemcpyHostToDevice)); CubDebugExit(cudaSetDevice(0)); //THIS CHECKING BELOW NO LONGER APPLIES FOR MULTI GPU // allColumn[seg->column->column_id]->tot_seg_in_GPU++; // assert(allColumn[seg->column->column_id]->tot_seg_in_GPU <= allColumn[seg->column->column_id]->total_segment); if (old_segment_bitmap == 0x00) { assert(segment_bitmap[seg->column->column_id][seg->segment_id] != 0x00); allColumn[seg->column->column_id]->tot_seg_in_GPU++; assert(allColumn[seg->column->column_id]->tot_seg_in_GPU <= allColumn[seg->column->column_id]->total_segment); } } void CacheManager::cacheSegmentMultipleGPU(Segment* seg, vector<int> &gpu_list) { for (int i = 0; i < gpu_list.size(); i++) { int gpu = gpu_list[i]; cacheSegmentInGPU(seg, gpu); } } void CacheManager::deleteSegmentInGPU(Segment* seg, int gpu) { assert(cache_mapper[gpu].find(seg) != cache_mapper[gpu].end()); int local_idx = cache_mapper[gpu][seg]; int ret = cache_mapper[gpu].erase(seg); assert(ret == 1); unsigned char old_segment_bitmap = segment_bitmap[seg->column->column_id][seg->segment_id]; // cout << "1 " << std::bitset<8>(old_segment_bitmap) << endl; unsigned char check = (segment_bitmap[seg->column->column_id][seg->segment_id] << gpu); check = check >> 7; assert(check == 0x01); unsigned char code = ~(0x80 >> gpu); //produce all 1 except the gpu bit location which is 0 segment_bitmap[seg->column->column_id][seg->segment_id] = segment_bitmap[seg->column->column_id][seg->segment_id] & code; // cout << "2 " << std::bitset<32>(gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id]) << endl; //segment row management unsigned int check_int = (gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] << seg->column->column_table_id); check_int = check_int >> 31; assert(check_int == 1); unsigned int code_int = ~(0x80000000 >> seg->column->column_table_id); gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] = gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] & code_int; // cout << "3 " << std::bitset<32>(gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id]) << endl; if (gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] == 0x00000000) { //the segment that we deleted is the last segment in this segment row that presents in this gpu // cout << "delete segment row " << segment_row_to_gpu[seg->column->table_id][seg->segment_id].size() << endl; int loc = 0; for (int i = 0; i < segment_row_to_gpu[seg->column->table_id][seg->segment_id].size(); i++) { if (segment_row_to_gpu[seg->column->table_id][seg->segment_id][i] == gpu) loc = i; } assert(segment_row_to_gpu[seg->column->table_id][seg->segment_id][loc] == gpu); segment_row_to_gpu[seg->column->table_id][seg->segment_id].erase(segment_row_to_gpu[seg->column->table_id][seg->segment_id].begin() + loc); // cout << "delete segment row " << segment_row_to_gpu[seg->column->table_id][seg->segment_id].size() << endl; if (segment_row_to_gpu[seg->column->table_id][seg->segment_id].size() < NUM_GPU) { seg_is_replicated[seg->column->table_id][seg->segment_id] = 0; if (segment_row_to_gpu[seg->column->table_id][seg->segment_id].size() == 0) { seg_row_to_single_gpu[seg->column->table_id][seg->segment_id] = -1; } else { int tot_gpu = segment_row_to_gpu[seg->column->table_id][seg->segment_id].size(); int idx = seg->segment_id % tot_gpu; seg_row_to_single_gpu[seg->column->table_id][seg->segment_id] = segment_row_to_gpu[seg->column->table_id][seg->segment_id][idx]; } } } assert(segment_list[gpu][seg->column->column_id][seg->segment_id] != -1); segment_list[gpu][seg->column->column_id][seg->segment_id] = -1; empty_gpu_segment[gpu].push(local_idx); //THIS CHECKING BELOW NO LONGER APPLIES FOR MULTI GPU if (segment_bitmap[seg->column->column_id][seg->segment_id] == 0x00) { assert(old_segment_bitmap != 0x00); seg->column->tot_seg_in_GPU--; assert(seg->column->tot_seg_in_GPU >= 0); } } void CacheManager::updateColumnFrequency(ColumnInfo* column) { column->stats->col_freq+=(1.0 / column->total_segment); // cout << column->column_name << " " << column->stats->col_freq << " " << (1 / column->total_segment) << endl; } void CacheManager::updateColumnWeightDirect(ColumnInfo* column, double speedup) { if (column->table_id == 0) { column->stats->speedup += speedup/column->total_segment; column->weight += speedup/column->total_segment; } else { column->stats->speedup += speedup*3/column->total_segment; column->weight += speedup*3/column->total_segment; } } void CacheManager::updateSegmentWeightDirect(ColumnInfo* column, Segment* segment, double speedup) { // cout << segment->segment_id << endl; if (speedup > 0) { // cout << column->column_name << endl; if (column->table_id == 0) { segment->stats->speedup += speedup/column->total_segment; segment->weight += speedup/column->total_segment; } else { segment->stats->speedup += speedup*3/column->total_segment; segment->weight += speedup*3/column->total_segment; } } // cout << column->column_name << " " << segment->weight << endl; } void CacheManager::updateSegmentWeightCostDirect(ColumnInfo* column, Segment* segment, double speedup) { // cout << segment->segment_id << endl; if (speedup > 0) { // cout << column->column_name << endl; if (column->table_id == 0) { // if (column->column_name.compare("lo_quantity") == 0 || column->column_name.compare("lo_discount") == 0) { // cout << "hello" << endl; // speedup = speedup * 0.2; // } segment->stats->speedup += (speedup/column->total_segment); segment->weight += (speedup/column->total_segment); } else { segment->stats->speedup += (speedup/column->total_segment); segment->weight += (speedup/column->total_segment); } } // cout << column->column_name << " " << segment->weight << endl; } void CacheManager::updateSegmentFreqDirect(ColumnInfo* column, Segment* segment) { segment->stats->col_freq += (1.0 / column->total_segment); } void CacheManager::updateSegmentTimeDirect(ColumnInfo* column, Segment* segment, double timestamp) { segment->stats->backward_t = timestamp - (segment->stats->timestamp * column->total_segment); // cout << timestamp << " " << (segment->stats->timestamp * column->total_segment) << endl; segment->stats->timestamp = (timestamp/ column->total_segment); } void CacheManager::updateColumnTimestamp(ColumnInfo* column, double timestamp) { // cout << column->column_name << " " << timestamp << endl; column->stats->backward_t = timestamp - (column->stats->timestamp * column->total_segment); // cout << column->column_name << " " << timestamp << " " << (column->stats->timestamp * column->total_segment) << endl; column->stats->timestamp = (timestamp/ column->total_segment); // cout << column->column_name << " " << column->stats->timestamp << endl; } void CacheManager::deleteAll() { for (int i = 0; i < TOT_COLUMN; i++) { ColumnInfo* column = allColumn[i]; for (int j = 0; j < column->total_segment; j++) { if (segment_bitmap[column->column_id][j] != 0x00) { Segment* seg = index_to_segment[column->column_id][j]; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (cache_mapper[gpu].find(seg) != cache_mapper[gpu].end()) { deleteSegmentInGPU(seg, gpu); } } } } } } void CacheManager::dumpStats(string filename, Distribution dist) { int data_size = 0; int cached_data = 0; int replicated_data = 0; int replicated_seg_per_column[NUM_COLUMN] = {0}; FILE *fptr = fopen(filename.c_str(), "w"); if (fptr == NULL) { printf("Could not open file\n"); assert(0); } for (int col = 0; col < NUM_COLUMN; col++) { data_size += allColumn[col]->total_segment; cached_data += allColumn[col]->tot_seg_in_GPU; for (int seg_id = 0; seg_id < allColumn[col]->total_segment; seg_id++) { bool replicated = true; Segment* seg = index_to_segment[col][seg_id]; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (cache_mapper[gpu].find(seg) == cache_mapper[gpu].end()) { replicated = false; } } if (replicated) { replicated_data++; replicated_seg_per_column[col]++; } } } fprintf(fptr, "===========================\n"); fprintf(fptr, "======= CACHE INFO ======\n"); fprintf(fptr, "===========================\n"); fprintf(fptr, "\n"); fprintf(fptr, "Segment size: %d\n", SEGMENT_SIZE); fprintf(fptr, "Data size: %d segments\n", data_size); fprintf(fptr, "Cached data: %d segments\n", cached_data); fprintf(fptr, "Replicated data: %d segments\n", replicated_data); fprintf(fptr, "\n"); for (int i = 1; i < NUM_COLUMN; i++) { fprintf(fptr,"%s: %d/%d = %.2f segments cached, %d/%d = %.2f segments replicated\n", allColumn[i]->column_name.c_str(), allColumn[i]->tot_seg_in_GPU, allColumn[i]->total_segment, allColumn[i]->tot_seg_in_GPU * 1.0/allColumn[i]->total_segment, replicated_seg_per_column[i], allColumn[i]->total_segment, replicated_seg_per_column[i] * 1.0/allColumn[i]->total_segment); } fprintf(fptr, "\n"); fprintf(fptr, "\n"); fprintf(fptr, "============================\n"); fprintf(fptr, "======= COLUMN INFO ======\n"); fprintf(fptr, "============================\n"); fprintf(fptr, "\n"); fprintf(fptr, "\n"); int threshold = 19950101; seg_idx_min = 0; for (int col = 1; col < NUM_COLUMN; col++) { fprintf(fptr, "Inspecting column %s\n", allColumn[col]->column_name.c_str()); if (allColumn[col]->tot_seg_in_GPU == 0) { fprintf(fptr, "This column is not cached"); } else { for (int seg_id = 0; seg_id < allColumn[col]->total_segment; seg_id++) { Segment* seg = index_to_segment[col][seg_id]; if (segment_bitmap[col][seg_id] != 0x00) { fprintf(fptr, "Segment %d is cached in GPUs ", seg_id); for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (cache_mapper[gpu].find(seg) != cache_mapper[gpu].end()) { fprintf(fptr, "%d ", gpu); } } fprintf(fptr, "\n"); } } } fprintf(fptr, "\n"); fprintf(fptr, "\n"); } //WARNING: THIS IS A HACK TO ENSURE JOINCPUCHECK = FALSE WHEN COLUMN IS PARTIALLY CACHED IN ZIPF DISTRIBUTION if (dist == Zipf) { for (int seg_id = 0; seg_id < lo_orderdate->total_segment; seg_id++) { if (segment_min[lo_orderdate->column_id][seg_id] <= threshold && segment_max[lo_orderdate->column_id][seg_id] >= threshold) { cout << segment_min[lo_orderdate->column_id][seg_id] << " " << segment_max[lo_orderdate->column_id][seg_id] << endl; seg_idx_min = seg_id; } } } fclose(fptr); } void CacheManager::assignWeight(ColumnInfo* column, int start_seg, int end_seg, double weight, double repl_weight) { for (int seg_id = 0; seg_id < column->total_segment; seg_id++) { Segment* seg = index_to_segment[column->column_id][seg_id]; if (seg_id >= start_seg && seg_id < end_seg) { seg->weight = weight; seg->repl_weight = repl_weight; } else { seg->weight = 0; seg->repl_weight = 0; } } } bool CacheManager::couldReplicateCheck(Segment* seg, double weight, int cache_each_tot_seg, int* temp_buffer_size) { if (weight == 0) return false; bool could_replicate = true; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (temp_buffer_size[gpu] + 1 >= cache_each_tot_seg) { could_replicate = false; break; } } return could_replicate; } bool CacheManager::couldReplicateCachedSegmentRow(Segment* seg, double weight, int cache_each_tot_seg, int* temp_buffer_size, map<Segment*, int> map_segment) { int count[NUM_GPU] = {0}; if (weight == 0) return false; bool could_replicate = true; if (map_segment[seg] != -1) { assert(map_segment[seg] != 99); int cur_gpu = map_segment[seg]; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (gpu != cur_gpu) { if (temp_buffer_size[gpu] + count[gpu] + 1 >= cache_each_tot_seg) { could_replicate = false; break; } else count[gpu]++; } } } if (!could_replicate) return could_replicate; for (int i = 0; i < columns_in_table[seg->column->table_id].size(); i++) { int column_id = columns_in_table[seg->column->table_id][i]; if (column_id != seg->column->column_id) { Segment* next_seg = index_to_segment[column_id][seg->segment_id]; if (map_segment[next_seg] != -1 && map_segment[next_seg] != 99) { int cur_gpu = map_segment[next_seg]; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (gpu != cur_gpu) { if (temp_buffer_size[gpu] + count[gpu] + 1 >= cache_each_tot_seg) { could_replicate = false; break; } else count[gpu]++; } } } } } return could_replicate; } unsigned long long CacheManager::PartitionDataPlacement() { multimap<double, Segment*> access_weight_map; unsigned long long traffic = 0; for (int i = TOT_COLUMN-1; i >= 0; i--) { for (int j = 0; j < allColumn[i]->total_segment; j++) { Segment* segment = index_to_segment[i][j]; if (segment->weight > 0) access_weight_map.insert({segment->weight, segment}); } } int* temp_buffer_size = new int[NUM_GPU](); for (int gpu = 0; gpu < NUM_GPU; gpu++) { temp_buffer_size[gpu] = 0; } vector<set<Segment*>> segments_to_place; multimap<double, Segment*>::reverse_iterator cit; segments_to_place.resize(NUM_GPU); cit = access_weight_map.rbegin(); while (cit != access_weight_map.rend()) { //cache segment to gpu //check if segment is marked as replicated //if its marked, then replicate those to all gpu right away //if its not, then only cache in one gpu Segment* seg = cit->second; //if this segrow is already replicated then you have to replicate everything // int gpu = seg->segment_id % NUM_GPU; //!!!! //THIS ONLY WORKS IF THE TOTAL DIM SEGMENT IS DIVISIBLE BY NUM GPU int gpu = seg->segment_id % NUM_GPU; if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit->first > 0) { temp_buffer_size[gpu]+=1; segments_to_place[gpu].insert(seg); // cout << "Placing "; // cout << seg->column->column_name << " segment " << seg->segment_id; // cout << " in " << gpu << endl; } ++cit; } for (int gpu = 0; gpu < NUM_GPU; gpu++) { assert(temp_buffer_size[gpu] <= cache_each_tot_seg); cout << "Cached segment in GPU " << gpu << " : " << temp_buffer_size[gpu] << " Cache total: " << cache_each_tot_seg << endl; } for (int i = 0; i < TOT_COLUMN; i++) { for (int j = 0; j < allColumn[i]->total_segment; j++) { Segment* segment = index_to_segment[i][j]; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (segments_to_place[gpu].find(segment) == segments_to_place[gpu].end()) { unsigned char check = (segment_bitmap[i][j] << gpu); check = check >> 7; if (check == 0x01) { assert(cache_mapper[gpu].find(segment) != cache_mapper[gpu].end()); // cout << "Deleting segment "; // cout << segment->column->column_name << " segment " << segment->segment_id << endl; deleteSegmentInGPU(segment, gpu); } } } } } for (int gpu = 0; gpu < NUM_GPU; gpu++) { set<Segment*>::const_iterator cit2; for(cit2 = segments_to_place[gpu].cbegin();cit2 != segments_to_place[gpu].cend(); ++cit2){ unsigned char check = (segment_bitmap[(*cit2)->column->column_id][(*cit2)->segment_id] << gpu); check = check >> 7; if (check == 0x00) { assert(cache_mapper[gpu].find(*cit2) == cache_mapper[gpu].end()); // cout << "Caching segment "; // cout << (*cit2)->column->column_name << " " << (*cit2)->segment_id << endl; cacheSegmentInGPU(*cit2, gpu); traffic += SEGMENT_SIZE * sizeof(int); } } } cout << "Successfully cached" << endl; delete[] temp_buffer_size; segments_to_place.clear(); return traffic; } unsigned long long CacheManager::ReplicationDataPlacement() { multimap<double, Segment*> access_weight_map; unsigned long long traffic = 0; for (int i = TOT_COLUMN-1; i >= 0; i--) { for (int j = 0; j < allColumn[i]->total_segment; j++) { Segment* segment = index_to_segment[i][j]; if (segment->weight > 0) access_weight_map.insert({segment->weight, segment}); } } int* temp_buffer_size = new int[NUM_GPU](); for (int gpu = 0; gpu < NUM_GPU; gpu++) { temp_buffer_size[gpu] = 0; } vector<set<Segment*>> segments_to_place; multimap<double, Segment*>::reverse_iterator cit; segments_to_place.resize(NUM_GPU); cit = access_weight_map.rbegin(); while (cit != access_weight_map.rend()) { //cache segment to gpu //check if segment is marked as replicated //if its marked, then replicate those to all gpu right away //if its not, then only cache in one gpu Segment* seg = cit->second; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit->first > 0) { temp_buffer_size[gpu]+=1; segments_to_place[gpu].insert(seg); // cout << "Placing "; // cout << seg->column->column_name << " segment " << seg->segment_id; // cout << " in " << gpu << endl; } } ++cit; } for (int gpu = 0; gpu < NUM_GPU; gpu++) { assert(temp_buffer_size[gpu] <= cache_each_tot_seg); cout << "Cached segment in GPU " << gpu << " : " << temp_buffer_size[gpu] << " Cache total: " << cache_each_tot_seg << endl; } for (int i = 0; i < TOT_COLUMN; i++) { for (int j = 0; j < allColumn[i]->total_segment; j++) { Segment* segment = index_to_segment[i][j]; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (segments_to_place[gpu].find(segment) == segments_to_place[gpu].end()) { unsigned char check = (segment_bitmap[i][j] << gpu); check = check >> 7; if (check == 0x01) { assert(cache_mapper[gpu].find(segment) != cache_mapper[gpu].end()); // cout << "Deleting segment "; // cout << segment->column->column_name << " segment " << segment->segment_id << endl; deleteSegmentInGPU(segment, gpu); } } } } } for (int gpu = 0; gpu < NUM_GPU; gpu++) { set<Segment*>::const_iterator cit2; for(cit2 = segments_to_place[gpu].cbegin();cit2 != segments_to_place[gpu].cend(); ++cit2){ unsigned char check = (segment_bitmap[(*cit2)->column->column_id][(*cit2)->segment_id] << gpu); check = check >> 7; if (check == 0x00) { assert(cache_mapper[gpu].find(*cit2) == cache_mapper[gpu].end()); // cout << "Caching segment "; // cout << (*cit2)->column->column_name << " " << (*cit2)->segment_id << endl; cacheSegmentInGPU(*cit2, gpu); traffic += SEGMENT_SIZE * sizeof(int); } } } cout << "Successfully cached" << endl; delete[] temp_buffer_size; segments_to_place.clear(); return traffic; } unsigned long long CacheManager::ShuffleAwareDataPlacement() { multimap<double, Segment*> access_weight_map; multimap<double, Segment*> replication_weight_map; vector<vector<bool>> segrow_is_replicated; unsigned long long traffic = 0; for (int i = TOT_COLUMN-1; i >= 0; i--) { for (int j = 0; j < allColumn[i]->total_segment; j++) { Segment* segment = index_to_segment[i][j]; if (segment->weight > 0) access_weight_map.insert({segment->weight, segment}); if (segment->repl_weight > 0) replication_weight_map.insert({segment->repl_weight, segment}); // cout << allColumn[i]->column_name << " " << j << " " << segment->weight << endl; } } int* temp_buffer_size = new int[NUM_GPU](); for (int gpu = 0; gpu < NUM_GPU; gpu++) { temp_buffer_size[gpu] = 0; } vector<set<Segment*>> segments_to_place; map<Segment*, int> map_segment; multimap<double, Segment*>::reverse_iterator cit; multimap<double, Segment*>::reverse_iterator cit2; segrow_is_replicated.resize(NUM_TABLE); for (int table = 0; table < NUM_TABLE; table++) { segrow_is_replicated[table].resize(allTable[table]->total_segment); } segments_to_place.resize(NUM_GPU); cit2 = replication_weight_map.rbegin(); cit = access_weight_map.rbegin(); while (cit != access_weight_map.rend() || cit2 != replication_weight_map.rend()) { //cache segment to gpu if ((cit != access_weight_map.rend() && cit2 == replication_weight_map.rend()) || (cit != access_weight_map.rend() && cit2 != replication_weight_map.rend() && cit->first >= cit2->first)) { //check if segment is marked as replicated //if its marked, then replicate those to all gpu right away //if its not, then only cache in one gpu Segment* seg = cit->second; assert(map_segment[seg] != -1); //if this segrow is already replicated then you have to replicate everything if (segrow_is_replicated[seg->column->table_id][seg->segment_id]) { //just checking that all cached segment are replicated for (int i = 0; i < columns_in_table[seg->column->table_id].size(); i++) { int column_id = columns_in_table[seg->column->table_id][i]; Segment* next_seg = index_to_segment[column_id][seg->segment_id]; assert(map_segment[next_seg] == -1 || map_segment[next_seg] == 99); } //check if we have the capacity bool could_replicate = couldReplicateCheck(seg, cit->first, cache_each_tot_seg, temp_buffer_size); if (could_replicate) { for (int gpu = 0; gpu < NUM_GPU; gpu++) { if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit->first > 0) { temp_buffer_size[gpu]+=1; segments_to_place[gpu].insert(seg); // cout << "Placing "; // cout << seg->column->column_name << " segment " << seg->segment_id; // cout << " in " << gpu << endl; } } map_segment[seg] = 99; } } else { // int gpu = seg->segment_id % NUM_GPU; //!!!! //THIS ONLY WORKS IF THE TOTAL DIM SEGMENT IS DIVISIBLE BY NUM GPU int gpu; if (seg->column->table_id == 0) gpu = seg->segment_id % NUM_GPU; else gpu = seg->segment_id * NUM_GPU/ seg->column->total_segment; if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit->first > 0) { temp_buffer_size[gpu]+=1; segments_to_place[gpu].insert(seg); // cout << "Placing "; // cout << seg->column->column_name << " segment " << seg->segment_id; // cout << " in " << gpu << endl; map_segment[seg] = gpu; } } ++cit; } else if ((cit == access_weight_map.rend() && cit2 != replication_weight_map.rend()) || (cit != access_weight_map.rend() && cit2 != replication_weight_map.rend() && cit->first < cit2->first)){ //cache A, cache B, replicate A, replicate B //cache A, replicate A, cache B, replicate B //cache A, replicate A, replicate B, cache B //replicate A, cache A, replicate B, cache B //replicate A, replicate B, cache A, cache B //replicate A, cache A, cache B, replicate B Segment* seg = cit2->second; //if segrow is replicated, then the segment should have been replicated when its cached if (segrow_is_replicated[seg->column->table_id][seg->segment_id] == 1) { for (int i = 0; i < columns_in_table[seg->column->table_id].size(); i++) { int column_id = columns_in_table[seg->column->table_id][i]; Segment* next_seg = index_to_segment[column_id][seg->segment_id]; assert(map_segment[next_seg] == -1 || map_segment[next_seg] == 99); } } else { //this segment is already replicated then do nothing if (map_segment[seg] == 99) { assert(0); //this segment row has not been replicated but seg is already cached } else if (map_segment[seg] != -1) { assert(segrow_is_replicated[seg->column->table_id][seg->segment_id] == 0); bool could_replicate = couldReplicateCachedSegmentRow(seg, cit2->first, cache_each_tot_seg, temp_buffer_size, map_segment); if (could_replicate) { int cur_gpu = map_segment[seg]; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (gpu != cur_gpu) { if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit2->first > 0) { temp_buffer_size[gpu]+=1; segments_to_place[gpu].insert(seg); // cout << "Placing "; // cout << seg->column->column_name << " segment " << seg->segment_id; // cout << " in " << gpu << endl; } } } map_segment[seg] = 99; // a sign that it is replicated //replicate all other segments in the same segment row for (int i = 0; i < columns_in_table[seg->column->table_id].size(); i++) { int column_id = columns_in_table[seg->column->table_id][i]; if (column_id != seg->column->column_id) { Segment* next_seg = index_to_segment[column_id][seg->segment_id]; //if the segment is already cached then you want to replicate it //if the segment is already replicated then do nothing if (map_segment[next_seg] != -1 && map_segment[next_seg] != 99) { int cur_gpu = map_segment[next_seg]; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (gpu != cur_gpu) { if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit2->first > 0) { temp_buffer_size[gpu]+=1; segments_to_place[gpu].insert(next_seg); // cout << "Placing "; // cout << next_seg->column->column_name << " segment " << next_seg->segment_id; // cout << " in " << gpu << endl; } } } map_segment[next_seg] = 99; // a sign that this segment is replicated } } } segrow_is_replicated[seg->column->table_id][seg->segment_id] = 1; } //this segment has not been cached } else if (map_segment[seg] == -1) { bool could_replicate = couldReplicateCachedSegmentRow(seg, cit2->first, cache_each_tot_seg, temp_buffer_size, map_segment); if (could_replicate) { //replicate all other segments in the same segment row for (int i = 0; i < columns_in_table[seg->column->table_id].size(); i++) { int column_id = columns_in_table[seg->column->table_id][i]; if (column_id != seg->column->column_id) { Segment* next_seg = index_to_segment[column_id][seg->segment_id]; //if the segment is already cached then you want to replicate it //if the segment is already replicated then do nothing if (map_segment[next_seg] != -1 && map_segment[next_seg] != 99) { cout << "Warning" << endl; int cur_gpu = map_segment[next_seg]; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (gpu != cur_gpu) { if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit2->first > 0) { temp_buffer_size[gpu]+=1; segments_to_place[gpu].insert(next_seg); // cout << "Placing "; // cout << next_seg->column->column_name << " segment " << next_seg->segment_id; // cout << " in " << gpu << endl; } } } map_segment[next_seg] = 99; // a sign that this segment is replicated } } } segrow_is_replicated[seg->column->table_id][seg->segment_id] = 1; } } } ++cit2; } else { assert(0); } } for (int gpu = 0; gpu < NUM_GPU; gpu++) { assert(temp_buffer_size[gpu] <= cache_each_tot_seg); cout << "Cached segment in GPU " << gpu << " : " << temp_buffer_size[gpu] << " Cache total: " << cache_each_tot_seg << endl; } for (int i = 0; i < TOT_COLUMN; i++) { for (int j = 0; j < allColumn[i]->total_segment; j++) { Segment* segment = index_to_segment[i][j]; for (int gpu = 0; gpu < NUM_GPU; gpu++) { if (segments_to_place[gpu].find(segment) == segments_to_place[gpu].end()) { unsigned char check = (segment_bitmap[i][j] << gpu); check = check >> 7; if (check == 0x01) { assert(cache_mapper[gpu].find(segment) != cache_mapper[gpu].end()); // cout << "Deleting segment "; // cout << segment->column->column_name << " segment " << segment->segment_id << endl; deleteSegmentInGPU(segment, gpu); } } } } } for (int gpu = 0; gpu < NUM_GPU; gpu++) { set<Segment*>::const_iterator cit2; for(cit2 = segments_to_place[gpu].cbegin();cit2 != segments_to_place[gpu].cend(); ++cit2){ unsigned char check = (segment_bitmap[(*cit2)->column->column_id][(*cit2)->segment_id] << gpu); check = check >> 7; if (check == 0x00) { assert(cache_mapper[gpu].find(*cit2) == cache_mapper[gpu].end()); // cout << "Caching segment "; // cout << (*cit2)->column->column_name << " " << (*cit2)->segment_id << endl; cacheSegmentInGPU(*cit2, gpu); traffic += SEGMENT_SIZE * sizeof(int); } } } cout << "Successfully cached" << endl; delete[] temp_buffer_size; map_segment.clear(); segments_to_place.clear(); return traffic; } void CacheManager::ShuffleAware(Distribution dist, bool opt) { int start = 0; if (dist == Zipf) start = (LO_LEN/SEGMENT_SIZE)*2.8/7; // if (dist == Zipf) start = (LO_LEN/SEGMENT_SIZE)/2; if (SF == 322 || SF == 402) { assert(opt == 0); assignWeight(lo_suppkey, start, lo_suppkey->total_segment, 850, 0); assignWeight(lo_custkey, start, lo_custkey->total_segment, 750, 0); assignWeight(lo_partkey, start, lo_partkey->total_segment, 650, 0); assignWeight(lo_orderdate, start, lo_orderdate->total_segment, 550, 0); assignWeight(lo_revenue, start, lo_revenue->total_segment, 540, 0); assignWeight(lo_supplycost, start, lo_supplycost->total_segment, 520, 0); assignWeight(lo_quantity, start, lo_quantity->total_segment, 500, 0); assignWeight(lo_discount, start, lo_discount->total_segment, 100, 0); assignWeight(lo_extendedprice, start, lo_extendedprice->total_segment, 50, 0); assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 400); assignWeight(s_region, 0, s_region->total_segment, 900, 400); assignWeight(s_nation, 0, s_nation->total_segment, 900, 400); assignWeight(s_city, 0, s_city->total_segment, 900, 400); if (dist != Zipf) { assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 0); assignWeight(c_region, 0, c_region->total_segment, 800, 0); assignWeight(c_nation, 0, c_nation->total_segment, 800, 0); assignWeight(c_city, 0, c_city->total_segment, 800, 0); } else { assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 25); assignWeight(c_region, 0, c_region->total_segment, 800, 25); assignWeight(c_nation, 0, c_nation->total_segment, 800, 25); assignWeight(c_city, 0, c_city->total_segment, 800, 25); } assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 0); assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 0); assignWeight(p_category, 0, p_category->total_segment, 700, 0); assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 0); assignWeight(d_datekey, 0, d_datekey->total_segment, 1000, 1000); assignWeight(d_year, 0, d_year->total_segment, 1000, 1000); assignWeight(d_yearmonthnum, 0, d_yearmonthnum->total_segment, 1000, 1000); } else if (SF % 10 == 0) { assert(opt == 0); assert(dist == None); assignWeight(lo_suppkey, start, lo_suppkey->total_segment, 850, 0); assignWeight(lo_custkey, start, lo_custkey->total_segment, 750, 0); assignWeight(lo_partkey, start, lo_partkey->total_segment, 650, 0); assignWeight(lo_orderdate, start, lo_orderdate->total_segment, 550, 0); assignWeight(lo_revenue, start, lo_revenue->total_segment, 540, 0); assignWeight(lo_supplycost, start, lo_supplycost->total_segment, 520, 0); assignWeight(lo_quantity, start, lo_quantity->total_segment, 500, 0); assignWeight(lo_discount, start, lo_discount->total_segment, 100, 0); assignWeight(lo_extendedprice, start, lo_extendedprice->total_segment, 50, 0); assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 900); assignWeight(s_region, 0, s_region->total_segment, 900, 900); assignWeight(s_nation, 0, s_nation->total_segment, 900, 900); assignWeight(s_city, 0, s_city->total_segment, 900, 900); assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 1000); assignWeight(c_region, 0, c_region->total_segment, 900, 900); assignWeight(c_nation, 0, c_nation->total_segment, 900, 900); assignWeight(c_city, 0, c_city->total_segment, 900, 900); assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 1000); assignWeight(p_mfgr, 0, p_mfgr->total_segment, 900, 900); assignWeight(p_category, 0, p_category->total_segment, 900, 900); assignWeight(p_brand1, 0, p_brand1->total_segment, 900, 900); assignWeight(d_datekey, 0, d_datekey->total_segment, 1000, 1000); assignWeight(d_year, 0, d_year->total_segment, 1000, 1000); assignWeight(d_yearmonthnum, 0, d_yearmonthnum->total_segment, 1000, 1000); } else { assignWeight(lo_suppkey, start, lo_suppkey->total_segment, 850, 0); assignWeight(lo_custkey, start, lo_custkey->total_segment, 750, 0); assignWeight(lo_partkey, start, lo_partkey->total_segment, 650, 0); assignWeight(lo_orderdate, start, lo_orderdate->total_segment, 550, 0); assignWeight(lo_revenue, start, lo_revenue->total_segment, 540, 0); assignWeight(lo_supplycost, start, lo_supplycost->total_segment, 520, 0); assignWeight(lo_quantity, start, lo_quantity->total_segment, 500, 0); assignWeight(lo_discount, start, lo_discount->total_segment, 100, 0); assignWeight(lo_extendedprice, start, lo_extendedprice->total_segment, 50, 0); if (opt) { assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 0); assignWeight(s_region, 0, s_region->total_segment, 900, 0); assignWeight(s_nation, 0, s_nation->total_segment, 900, 0); assignWeight(s_city, 0, s_city->total_segment, 900, 0); assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 0); assignWeight(c_region, 0, c_region->total_segment, 800, 0); assignWeight(c_nation, 0, c_nation->total_segment, 800, 0); assignWeight(c_city, 0, c_city->total_segment, 800, 0); assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 0); assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 0); assignWeight(p_category, 0, p_category->total_segment, 700, 0); assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 0); } else { assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 400); assignWeight(s_region, 0, s_region->total_segment, 900, 400); assignWeight(s_nation, 0, s_nation->total_segment, 900, 400); assignWeight(s_city, 0, s_city->total_segment, 900, 400); if (SF == 162 && dist == Zipf && NUM_GPU == 2) { assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 0); assignWeight(c_region, 0, c_region->total_segment, 800, 0); assignWeight(c_nation, 0, c_nation->total_segment, 800, 0); assignWeight(c_city, 0, c_city->total_segment, 800, 0); } else { assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 300); assignWeight(c_region, 0, c_region->total_segment, 800, 300); assignWeight(c_nation, 0, c_nation->total_segment, 800, 300); assignWeight(c_city, 0, c_city->total_segment, 800, 300); } if ((SF == 162 && dist != Zipf && NUM_GPU == 4) || (SF == 162 && dist == Zipf && NUM_GPU == 2)) { assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 0); assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 0); assignWeight(p_category, 0, p_category->total_segment, 700, 0); assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 0); } else { assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 200); assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 200); assignWeight(p_category, 0, p_category->total_segment, 700, 200); assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 200); } } assignWeight(d_datekey, 0, d_datekey->total_segment, 1000, 1000); assignWeight(d_year, 0, d_year->total_segment, 1000, 1000); assignWeight(d_yearmonthnum, 0, d_yearmonthnum->total_segment, 1000, 1000); } ShuffleAwareDataPlacement(); string path; if (dist == None) { path = string("logs/stats/shuffleaware") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF); if (!opt) dumpStats(path, dist); } else if (dist == Zipf) { path = string("logs/stats/shuffleaware") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF) + "Zipf"; if (!opt) dumpStats(path, dist); } } void CacheManager::PartitioningOnly(Distribution dist) { if (SF % 10 == 0) assert(0); int start = 0; // if (dist == Zipf) start = (LO_LEN/SEGMENT_SIZE)*2.8/7; assignWeight(lo_suppkey, start, lo_suppkey->total_segment, 850, 0); assignWeight(lo_custkey, start, lo_custkey->total_segment, 750, 0); assignWeight(lo_partkey, start, lo_partkey->total_segment, 650, 0); assignWeight(lo_orderdate, start, lo_orderdate->total_segment, 550, 0); assignWeight(lo_revenue, start, lo_revenue->total_segment, 540, 0); assignWeight(lo_supplycost, start, lo_supplycost->total_segment, 520, 0); assignWeight(lo_quantity, start, lo_quantity->total_segment, 500, 0); assignWeight(lo_discount, start, lo_discount->total_segment, 100, 0); assignWeight(lo_extendedprice, start, lo_extendedprice->total_segment, 50, 0); assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 0); assignWeight(s_region, 0, s_region->total_segment, 900, 0); assignWeight(s_nation, 0, s_nation->total_segment, 900, 0); assignWeight(s_city, 0, s_city->total_segment, 900, 0); assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 0); assignWeight(c_region, 0, c_region->total_segment, 800, 0); assignWeight(c_nation, 0, c_nation->total_segment, 800, 0); assignWeight(c_city, 0, c_city->total_segment, 800, 0); assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 0); assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 0); assignWeight(p_category, 0, p_category->total_segment, 700, 0); assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 0); assignWeight(d_datekey, 0, d_datekey->total_segment, 1000, 0); assignWeight(d_year, 0, d_year->total_segment, 1000, 0); assignWeight(d_yearmonthnum, 0, d_yearmonthnum->total_segment, 1000, 0); PartitionDataPlacement(); string path; if (dist == None) { path = string("logs/stats/partitiononly") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF); dumpStats(path, dist); } else if (dist == Zipf) { path = string("logs/stats/partitiononly") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF) + "Zipf"; dumpStats(path, dist); } } void CacheManager::ReplicationOnly(Distribution dist) { if (SF % 10 == 0) assert(0); int start = 0; if (dist == Zipf) start = (LO_LEN/SEGMENT_SIZE)*2.8/7; assignWeight(lo_suppkey, start, lo_suppkey->total_segment, 850, 0); assignWeight(lo_custkey, start, lo_custkey->total_segment, 750, 0); assignWeight(lo_partkey, start, lo_partkey->total_segment, 650, 0); assignWeight(lo_orderdate, start, lo_orderdate->total_segment, 550, 0); assignWeight(lo_revenue, start, lo_revenue->total_segment, 540, 0); assignWeight(lo_supplycost, start, lo_supplycost->total_segment, 520, 0); assignWeight(lo_quantity, start, lo_quantity->total_segment, 500, 0); assignWeight(lo_discount, start, lo_discount->total_segment, 100, 0); assignWeight(lo_extendedprice, start, lo_extendedprice->total_segment, 50, 0); assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 0); assignWeight(s_region, 0, s_region->total_segment, 900, 0); assignWeight(s_nation, 0, s_nation->total_segment, 900, 0); assignWeight(s_city, 0, s_city->total_segment, 900, 0); assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 0); assignWeight(c_region, 0, c_region->total_segment, 800, 0); assignWeight(c_nation, 0, c_nation->total_segment, 800, 0); assignWeight(c_city, 0, c_city->total_segment, 800, 0); assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 0); assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 0); assignWeight(p_category, 0, p_category->total_segment, 700, 0); assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 0); assignWeight(d_datekey, 0, d_datekey->total_segment, 1000, 0); assignWeight(d_year, 0, d_year->total_segment, 1000, 0); assignWeight(d_yearmonthnum, 0, d_yearmonthnum->total_segment, 1000, 0); ReplicationDataPlacement(); string path; if (dist == None) { path = string("logs/stats/replicationonly") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF); dumpStats(path, dist); } else if (dist == Zipf) { path = string("logs/stats/replicationonly") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF) + "Zipf"; cout << path << endl; dumpStats(path, dist); } } void CacheManager::loadColumnToCPU() { lo = new TableInfo("lo", LO_LEN, 0); s = new TableInfo("s", S_LEN, 1); c = new TableInfo("c", C_LEN, 2); p = new TableInfo("p", P_LEN, 3); d = new TableInfo("d", D_LEN, 4); allTable[0] = lo; allTable[1] = s; allTable[2] = c; allTable[3] = p; allTable[4] = d; h_lo_orderkey = loadColumnPinned<int>("lo_orderkey", LO_LEN); h_lo_suppkey = loadColumnPinned<int>("lo_suppkey", LO_LEN); h_lo_custkey = loadColumnPinned<int>("lo_custkey", LO_LEN); h_lo_partkey = loadColumnPinned<int>("lo_partkey", LO_LEN); h_lo_orderdate = loadColumnPinned<int>("lo_orderdate", LO_LEN); h_lo_revenue = loadColumnPinned<int>("lo_revenue", LO_LEN); h_lo_discount = loadColumnPinned<int>("lo_discount", LO_LEN); h_lo_quantity = loadColumnPinned<int>("lo_quantity", LO_LEN); h_lo_extendedprice = loadColumnPinned<int>("lo_extendedprice", LO_LEN); h_lo_supplycost = loadColumnPinned<int>("lo_supplycost", LO_LEN); // h_lo_orderkey = loadColumnPinnedSort<int>("lo_orderkey", LO_LEN); // h_lo_suppkey = loadColumnPinnedSort<int>("lo_suppkey", LO_LEN); // h_lo_custkey = loadColumnPinnedSort<int>("lo_custkey", LO_LEN); // h_lo_partkey = loadColumnPinnedSort<int>("lo_partkey", LO_LEN); // h_lo_orderdate = loadColumnPinnedSort<int>("lo_orderdate", LO_LEN); // h_lo_revenue = loadColumnPinnedSort<int>("lo_revenue", LO_LEN); // h_lo_discount = loadColumnPinnedSort<int>("lo_discount", LO_LEN); // h_lo_quantity = loadColumnPinnedSort<int>("lo_quantity", LO_LEN); // h_lo_extendedprice = loadColumnPinnedSort<int>("lo_extendedprice", LO_LEN); // h_lo_supplycost = loadColumnPinnedSort<int>("lo_supplycost", LO_LEN); h_c_custkey = loadColumnPinned<int>("c_custkey", C_LEN); h_c_nation = loadColumnPinned<int>("c_nation", C_LEN); h_c_region = loadColumnPinned<int>("c_region", C_LEN); h_c_city = loadColumnPinned<int>("c_city", C_LEN); h_s_suppkey = loadColumnPinned<int>("s_suppkey", S_LEN); h_s_nation = loadColumnPinned<int>("s_nation", S_LEN); h_s_region = loadColumnPinned<int>("s_region", S_LEN); h_s_city = loadColumnPinned<int>("s_city", S_LEN); h_p_partkey = loadColumnPinned<int>("p_partkey", P_LEN); h_p_brand1 = loadColumnPinned<int>("p_brand1", P_LEN); h_p_category = loadColumnPinned<int>("p_category", P_LEN); h_p_mfgr = loadColumnPinned<int>("p_mfgr", P_LEN); h_d_datekey = loadColumnPinned<int>("d_datekey", D_LEN); h_d_year = loadColumnPinned<int>("d_year", D_LEN); h_d_yearmonthnum = loadColumnPinned<int>("d_yearmonthnum", D_LEN); lo_orderkey = new ColumnInfo(lo, "lo_orderkey", 0, h_lo_orderkey); lo_suppkey = new ColumnInfo(lo, "lo_suppkey", 1, h_lo_suppkey); lo_custkey = new ColumnInfo(lo, "lo_custkey", 2, h_lo_custkey); lo_partkey = new ColumnInfo(lo, "lo_partkey", 3, h_lo_partkey); lo_orderdate = new ColumnInfo(lo, "lo_orderdate", 4, h_lo_orderdate); lo_revenue = new ColumnInfo(lo, "lo_revenue", 5, h_lo_revenue); lo_discount = new ColumnInfo(lo, "lo_discount", 6, h_lo_discount); lo_quantity = new ColumnInfo(lo, "lo_quantity", 7, h_lo_quantity); lo_extendedprice = new ColumnInfo(lo, "lo_extendedprice", 8, h_lo_extendedprice); lo_supplycost = new ColumnInfo(lo, "lo_supplycost", 9, h_lo_supplycost); c_custkey = new ColumnInfo(c, "c_custkey", 10, h_c_custkey); c_nation = new ColumnInfo(c, "c_nation", 11, h_c_nation); c_region = new ColumnInfo(c, "c_region", 12, h_c_region); c_city = new ColumnInfo(c, "c_city", 13, h_c_city); s_suppkey = new ColumnInfo(s, "s_suppkey", 14, h_s_suppkey); s_nation = new ColumnInfo(s, "s_nation", 15, h_s_nation); s_region = new ColumnInfo(s, "s_region", 16, h_s_region); s_city = new ColumnInfo(s, "s_city", 17, h_s_city); p_partkey = new ColumnInfo(p, "p_partkey", 18, h_p_partkey); p_brand1 = new ColumnInfo(p, "p_brand1", 19, h_p_brand1); p_category = new ColumnInfo(p, "p_category", 20, h_p_category); p_mfgr = new ColumnInfo(p, "p_mfgr", 21, h_p_mfgr); d_datekey = new ColumnInfo(d, "d_datekey", 22, h_d_datekey); d_year = new ColumnInfo(d, "d_year", 23, h_d_year); d_yearmonthnum = new ColumnInfo(d, "d_yearmonthnum", 24, h_d_yearmonthnum); allColumn[0] = lo_orderkey; allColumn[1] = lo_suppkey; allColumn[2] = lo_custkey; allColumn[3] = lo_partkey; allColumn[4] = lo_orderdate; allColumn[5] = lo_revenue; allColumn[6] = lo_discount; allColumn[7] = lo_quantity; allColumn[8] = lo_extendedprice; allColumn[9] = lo_supplycost; allColumn[10] = c_custkey; allColumn[11] = c_nation; allColumn[12] = c_region; allColumn[13] = c_city; allColumn[14] = s_suppkey; allColumn[15] = s_nation; allColumn[16] = s_region; allColumn[17] = s_city; allColumn[18] = p_partkey; allColumn[19] = p_brand1; allColumn[20] = p_category; allColumn[21] = p_mfgr; allColumn[22] = d_datekey; allColumn[23] = d_year; allColumn[24] = d_yearmonthnum; for (int i = 0; i <= 9; i++) { allTable[0]->columns.push_back(allColumn[i]); allColumn[i]->column_table_id = allTable[0]->columns.size()-1; } for (int i = 10; i <= 13; i++) { allTable[2]->columns.push_back(allColumn[i]); allColumn[i]->column_table_id = allTable[2]->columns.size()-1; } for (int i = 14; i <= 17; i++) { allTable[1]->columns.push_back(allColumn[i]); allColumn[i]->column_table_id = allTable[1]->columns.size()-1; } for (int i = 18; i <= 21; i++) { allTable[3]->columns.push_back(allColumn[i]); allColumn[i]->column_table_id = allTable[3]->columns.size()-1; } for (int i = 22; i <= 24; i++) { allTable[4]->columns.push_back(allColumn[i]); allColumn[i]->column_table_id = allTable[4]->columns.size()-1; } columns_in_table.resize(TOT_TABLE); for (int i = 0; i < TOT_COLUMN; i++) { columns_in_table[allColumn[i]->table_id].push_back(allColumn[i]->column_id); } // unsigned long long sum = 0; // for (int i = 0; i < LO_LEN; i++) { // sum += h_lo_revenue[i]; // } // cout << "Printing partkey" << endl; // int count = 0; // for (int i = 0; i < S_LEN; i++) { // if (h_s_city[i] ) // printf("%d\n", h_s_city[i]); // } // cout << endl; // cout << "Printing custkey" << endl; // for (int i = 0; i < C_LEN; i++) { // printf("%d %d %d %d\n", h_c_nation[2343346], h_c_nation[8054136], h_c_nation[8244933], h_c_nation[8247076]); // } // cout << "total sum " << sum << endl; } CacheManager::~CacheManager() { for (int gpu = 0; gpu < NUM_GPU; gpu++) { cudaSetDevice(gpu); CubDebugExit(cudaFree(gpuCache[gpu])); CubDebugExit(cudaFree(gpuProcessing[gpu])); } cudaSetDevice(0); delete[] gpuCache; delete[] gpuProcessing; delete[] cpuProcessing; CubDebugExit(cudaFreeHost(pinnedMemory)); CubDebugExit(cudaFreeHost(h_lo_orderkey)); CubDebugExit(cudaFreeHost(h_lo_suppkey)); CubDebugExit(cudaFreeHost(h_lo_custkey)); CubDebugExit(cudaFreeHost(h_lo_partkey)); CubDebugExit(cudaFreeHost(h_lo_orderdate)); CubDebugExit(cudaFreeHost(h_lo_revenue)); CubDebugExit(cudaFreeHost(h_lo_discount)); CubDebugExit(cudaFreeHost(h_lo_quantity)); CubDebugExit(cudaFreeHost(h_lo_extendedprice)); CubDebugExit(cudaFreeHost(h_lo_supplycost)); CubDebugExit(cudaFreeHost(h_c_custkey)); CubDebugExit(cudaFreeHost(h_c_nation)); CubDebugExit(cudaFreeHost(h_c_region)); CubDebugExit(cudaFreeHost(h_c_city)); CubDebugExit(cudaFreeHost(h_s_suppkey)); CubDebugExit(cudaFreeHost(h_s_nation)); CubDebugExit(cudaFreeHost(h_s_region)); CubDebugExit(cudaFreeHost(h_s_city)); CubDebugExit(cudaFreeHost(h_p_partkey)); CubDebugExit(cudaFreeHost(h_p_brand1)); CubDebugExit(cudaFreeHost(h_p_category)); CubDebugExit(cudaFreeHost(h_p_mfgr)); CubDebugExit(cudaFreeHost(h_d_datekey)); CubDebugExit(cudaFreeHost(h_d_year)); CubDebugExit(cudaFreeHost(h_d_yearmonthnum)); delete lo_orderkey; delete lo_orderdate; delete lo_custkey; delete lo_suppkey; delete lo_partkey; delete lo_revenue; delete lo_discount; delete lo_quantity; delete lo_extendedprice; delete lo_supplycost; delete c_custkey; delete c_nation; delete c_region; delete c_city; delete s_suppkey; delete s_nation; delete s_region; delete s_city; delete p_partkey; delete p_brand1; delete p_category; delete p_mfgr; delete d_datekey; delete d_year; delete d_yearmonthnum; for (int i = 0; i < TOT_COLUMN; i++) { free(segment_bitmap[i]); } free(segment_bitmap); for (int gpu = 0; gpu < NUM_GPU; gpu++) { for (int i = 0; i < TOT_COLUMN; i++) { CubDebugExit(cudaFreeHost(segment_list[gpu][i])); } free(segment_list[gpu]); } free(segment_list); for (int gpu = 0; gpu < NUM_GPU; gpu++) { for (int i = 0; i < TOT_TABLE; i++) { free(gpu_segment_row_bitmap[gpu][i]); } free(gpu_segment_row_bitmap[gpu]); } free(gpu_segment_row_bitmap); } template int* CacheManager::customMalloc<int>(int size); template int* CacheManager::customCudaMalloc<int>(int size, int gpu); template int** CacheManager::customCudaMalloc<int*>(int size, int gpu); template int*** CacheManager::customCudaMalloc<int**>(int size, int gpu); template int**** CacheManager::customCudaMalloc<int***>(int size, int gpu); template int* CacheManager::customCudaHostAlloc<int>(int size); template short* CacheManager::customMalloc<short>(int size); template short* CacheManager::customCudaMalloc<short>(int size, int gpu); template short* CacheManager::customCudaHostAlloc<short>(int size);