#include "CacheManager.h"
#include <bitset>
Segment::Segment(ColumnInfo* _column, int* _seg_ptr, int _priority)
: column(_column), seg_ptr(_seg_ptr), priority(_priority), seg_size(SEGMENT_SIZE) {
stats = new Statistics();
col_ptr = column->col_ptr;
segment_id = (seg_ptr - col_ptr)/seg_size;
weight = 0;
repl_weight = 0;
}
Segment::Segment(ColumnInfo* _column, int* _seg_ptr)
: column(_column), seg_ptr(_seg_ptr), priority(0), seg_size(SEGMENT_SIZE) {
stats = new Statistics();
col_ptr = column->col_ptr;
segment_id = (seg_ptr - col_ptr)/seg_size;
weight = 0;
repl_weight = 0;
}
ColumnInfo::ColumnInfo(TableInfo* _table, string _column_name, int _column_id, int* _col_ptr)
: table(_table), column_name(_column_name), column_id(_column_id), col_ptr(_col_ptr) {
stats = new Statistics();
tot_seg_in_GPU = 0;
weight = 0;
seg_ptr = col_ptr;
table_id = table->table_id;
table_name = table->table_name;
LEN = table->LEN;
total_segment = (LEN+SEGMENT_SIZE-1)/SEGMENT_SIZE;
}
TableInfo::TableInfo(string _table_name, int _LEN, int _table_id)
: table_name(_table_name), LEN(_LEN), table_id(_table_id) {
total_segment = (LEN+SEGMENT_SIZE-1)/SEGMENT_SIZE;
}
// WILL NOT WORK FOR MULTI GPU
Segment*
ColumnInfo::getSegment(int index) {
Segment* seg = new Segment(this, col_ptr+SEGMENT_SIZE*index);
return seg;
}
CacheManager::CacheManager(size_t _cache_size, size_t _broadcast_size, size_t _processing_size, size_t _pinned_memsize) {
tot_cache_size = _cache_size;
each_cache_size = _cache_size/NUM_GPU;
each_broadcast_size = _broadcast_size/NUM_GPU;
cache_total_seg = tot_cache_size/SEGMENT_SIZE;
cache_each_tot_seg = each_cache_size/SEGMENT_SIZE;
broadcast_each_tot_seg = each_broadcast_size/SEGMENT_SIZE;
tot_processing_size = _processing_size;
each_processing_size = _processing_size/NUM_GPU;
pinned_memsize = _pinned_memsize;
TOT_COLUMN = NUM_COLUMN;
TOT_TABLE = NUM_TABLE;
seg_idx_min = 0;
gpuCache = new int*[NUM_GPU];
gpuProcessing = new uint64_t*[NUM_GPU];
gpuPointer = new unsigned int[NUM_GPU];
broadcastPointer = new unsigned int[NUM_GPU];
gpuBroadcast = new int*[NUM_GPU];
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
cout << gpu << " " << each_cache_size << endl;
cudaSetDevice(gpu);
CubDebugExit(cudaMalloc((void**) &gpuCache[gpu], (each_cache_size + each_broadcast_size) * sizeof(int)));
// CubDebugExit(cudaMemset(gpuCache[gpu], 0, each_cache_size * sizeof(int)));
CubDebugExit(cudaMalloc((void**) &gpuProcessing[gpu], each_processing_size * sizeof(uint64_t)));
gpuPointer[gpu] = 0;
broadcastPointer[gpu] = 0;
gpuBroadcast[gpu] = gpuCache[gpu] + each_cache_size;
}
cudaSetDevice(0);
cpuProcessing = (uint64_t*) malloc(tot_processing_size * sizeof(uint64_t));
CubDebugExit(cudaHostAlloc((void**) &pinnedMemory, pinned_memsize * sizeof(uint64_t), cudaHostAllocDefault));
cpuPointer = 0;
pinnedPointer = 0;
allColumn.resize(TOT_COLUMN);
allTable.resize(TOT_TABLE);
index_to_segment.resize(TOT_COLUMN);
segment_row_to_gpu.resize(TOT_TABLE);
cache_mapper.resize(NUM_GPU);
empty_gpu_segment.resize(NUM_GPU);
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
for(int i = 0; i < cache_each_tot_seg; i++) {
empty_gpu_segment[gpu].push(i);
}
}
loadColumnToCPU();
seg_row_to_single_gpu = (int**) malloc (TOT_TABLE * sizeof(int*));
seg_is_replicated = (int**) malloc (TOT_TABLE * sizeof(int*));
for (int table = 0; table < TOT_TABLE; table++) {
int total_segment = allTable[table]->total_segment;
segment_row_to_gpu[table].resize(total_segment);
seg_row_to_single_gpu[table] = (int*) malloc (total_segment * sizeof(int));
seg_is_replicated[table] = (int*) malloc (total_segment * sizeof(int));
memset(seg_row_to_single_gpu[table], -1, total_segment * sizeof(int));
memset(seg_is_replicated[table], 0, total_segment * sizeof(int));
}
gpu_segment_row_bitmap = (unsigned int***) malloc (NUM_GPU * sizeof(unsigned int**));
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
gpu_segment_row_bitmap[gpu] = (unsigned int**) malloc (TOT_TABLE * sizeof(unsigned int*));
for (int table = 0; table < TOT_TABLE; table++) {
gpu_segment_row_bitmap[gpu][table] = (unsigned int*) malloc (allTable[table]->total_segment * sizeof(unsigned int));
memset(gpu_segment_row_bitmap[gpu][table], 0, allTable[table]->total_segment * sizeof(unsigned int));
}
}
segment_list = (int***) malloc (NUM_GPU * sizeof(int**));
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
segment_list[gpu] = (int**) malloc (TOT_COLUMN * sizeof(int*));
for (int i = 0; i < TOT_COLUMN; i++) {
int n = allColumn[i]->total_segment;
CubDebugExit(cudaHostAlloc((void**) &(segment_list[gpu][i]), n * sizeof(int), cudaHostAllocDefault));
memset(segment_list[gpu][i], -1, n * sizeof(int));
}
}
segment_min = (int**) malloc (TOT_COLUMN * sizeof(int*));
segment_max = (int**) malloc (TOT_COLUMN * sizeof(int*));
segment_bitmap = (unsigned char**) malloc (TOT_COLUMN * sizeof(unsigned char*));
for (int i = 0; i < TOT_COLUMN; i++) {
int n = allColumn[i]->total_segment;
segment_bitmap[i] = (unsigned char*) malloc(n * sizeof(unsigned char));
segment_min[i] = (int*) malloc(n * sizeof(int));
segment_max[i] = (int*) malloc(n * sizeof(int));
memset(segment_bitmap[i], 0, n * sizeof(unsigned char));
}
readSegmentMinMax();
for (int i = 0; i < TOT_COLUMN; i++) {
index_to_segment[i].resize(allColumn[i]->total_segment);
for (int j = 0; j < allColumn[i]->total_segment; j++) {
index_to_segment[i][j] = allColumn[i]->getSegment(j);
}
}
}
// void
// CacheManager::resetCache(size_t _cache_size, size_t _processing_size, size_t _pinned_memsize) {
// for (int gpu = 0; gpu < NUM_GPU; gpu++) {
// cudaSetDevice(gpu);
// CubDebugExit(cudaFree(gpuCache[gpu]));
// CubDebugExit(cudaFree(gpuProcessing[gpu]));
// }
// cudaSetDevice(0);
// delete[] gpuCache;
// delete[] gpuProcessing;
// delete[] cpuProcessing;
// CubDebugExit(cudaFreeHost(pinnedMemory));
// for (int table = 0; table < TOT_TABLE; table++) {
// int total_segment = allTable[table]->total_segment;
// for (int seg_id = 0; seg_id < total_segment; seg_id++) {
// segment_row_to_gpu[table][seg_id].clear();
// }
// free(seg_row_to_single_gpu[table]);
// }
// free(seg_row_to_single_gpu);
// for (int gpu = 0; gpu < NUM_GPU; gpu++) {
// cache_mapper[gpu].clear();
// }
// for (int i = 0; i < TOT_COLUMN; i++) {
// free(segment_bitmap[i]);
// }
// free(segment_bitmap);
// for (int gpu = 0; gpu < NUM_GPU; gpu++) {
// for (int i = 0; i < TOT_COLUMN; i++) {
// CubDebugExit(cudaFreeHost(segment_list[gpu][i]));
// }
// free(segment_list[gpu]);
// }
// free(segment_list);
// for (int gpu = 0; gpu < NUM_GPU; gpu++) {
// for (int i = 0; i < TOT_TABLE; i++) {
// free(gpu_segment_row_bitmap[gpu][i]);
// }
// free(gpu_segment_row_bitmap[gpu]);
// }
// free(gpu_segment_row_bitmap);
// tot_cache_size = _cache_size;
// each_cache_size = _cache_size/NUM_GPU;
// cache_total_seg = tot_cache_size/SEGMENT_SIZE;
// cache_each_tot_seg = each_cache_size/SEGMENT_SIZE;
// tot_processing_size = _processing_size;
// each_processing_size = _processing_size/NUM_GPU;
// pinned_memsize = _pinned_memsize;
// cout << each_cache_size << endl;
// gpuCache = new int*[NUM_GPU];
// gpuProcessing = new uint64_t*[NUM_GPU];
// gpuPointer = new unsigned int[NUM_GPU];
// for (int gpu = 0; gpu < NUM_GPU; gpu++) {
// cudaSetDevice(gpu);
// CubDebugExit(cudaMalloc((void**) &gpuCache[gpu], each_cache_size * sizeof(int)));
// CubDebugExit(cudaMemset(gpuCache[gpu], 0, each_cache_size * sizeof(int)));
// CubDebugExit(cudaMalloc((void**) &gpuProcessing[gpu], each_processing_size * sizeof(uint64_t)));
// gpuPointer[gpu] = 0;
// }
// cudaSetDevice(0);
// cpuProcessing = (uint64_t*) malloc(tot_processing_size * sizeof(uint64_t));
// CubDebugExit(cudaHostAlloc((void**) &pinnedMemory, pinned_memsize * sizeof(uint64_t), cudaHostAllocDefault));
// cpuPointer = 0;
// pinnedPointer = 0;
// for (int gpu = 0; gpu < NUM_GPU; gpu++) {
// while (!empty_gpu_segment[gpu].empty()) {
// empty_gpu_segment[gpu].pop();
// }
// for(int i = 0; i < cache_each_tot_seg; i++) {
// empty_gpu_segment[gpu].push(i);
// }
// }
// seg_row_to_single_gpu = (int**) malloc (TOT_TABLE * sizeof(int*));
// for (int table = 0; table < TOT_TABLE; table++) {
// int total_segment = allTable[table]->total_segment;
// seg_row_to_single_gpu[table] = (int*) malloc (total_segment * sizeof(int));
// memset(seg_row_to_single_gpu[table], -1, total_segment * sizeof(int));
// }
// gpu_segment_row_bitmap = (unsigned int***) malloc (NUM_GPU * sizeof(unsigned int**));
// for (int gpu = 0; gpu < NUM_GPU; gpu++) {
// gpu_segment_row_bitmap[gpu] = (unsigned int**) malloc (TOT_TABLE * sizeof(unsigned int*));
// for (int table = 0; table < TOT_TABLE; table++) {
// gpu_segment_row_bitmap[gpu][table] = (unsigned int*) malloc (allTable[table]->total_segment * sizeof(unsigned int));
// memset(gpu_segment_row_bitmap[gpu][table], 0, allTable[table]->total_segment * sizeof(unsigned int));
// }
// }
// segment_list = (int***) malloc (NUM_GPU * sizeof(int**));
// for (int gpu = 0; gpu < NUM_GPU; gpu++) {
// segment_list[gpu] = (int**) malloc (TOT_COLUMN * sizeof(int*));
// for (int i = 0; i < TOT_COLUMN; i++) {
// int n = allColumn[i]->total_segment;
// CubDebugExit(cudaHostAlloc((void**) &(segment_list[gpu][i]), n * sizeof(int), cudaHostAllocDefault));
// memset(segment_list[gpu][i], -1, n * sizeof(int));
// }
// }
// segment_bitmap = (char**) malloc (TOT_COLUMN * sizeof(char*));
// for (int i = 0; i < TOT_COLUMN; i++) {
// int n = allColumn[i]->total_segment;
// segment_bitmap[i] = (char*) malloc(n * sizeof(char));
// memset(segment_bitmap[i], 0, n * sizeof(char));
// }
// }
void
CacheManager::readSegmentMinMax() {
for (int i = 0; i < TOT_COLUMN; i++) {
string line;
ifstream myfile (DATA_DIR + allColumn[i]->column_name + "minmax");
if (myfile.is_open()) {
int segment_idx = 0;
string del = " ";
while ( getline (myfile,line) )
{
int start = 0;
int end = line.find(del);
if (end != -1) {
string minstring = line.substr(start, end - start);
segment_min[i][segment_idx] = stoi(minstring);
start = end + del.size();
}
string maxstring = line.substr(start, end - start);
segment_max[i][segment_idx] = stoi(maxstring);
segment_idx++;
}
assert(segment_idx == allColumn[i]->total_segment);
myfile.close();
} else {
cout << "Unable to open file" << endl;
assert(0);
}
}
}
template <typename T>
T*
CacheManager::customMalloc(int size) {
int alloc = ((size * sizeof(T)) + sizeof(uint64_t) - 1)/ sizeof(uint64_t);
int start = __atomic_fetch_add(&cpuPointer, alloc, __ATOMIC_RELAXED);
assert((start + alloc) < tot_processing_size);
return reinterpret_cast<T*>(cpuProcessing + start);
};
template <typename T>
T*
CacheManager::customCudaMalloc(int size, int gpu) {
int alloc = ((size * sizeof(T)) + sizeof(uint64_t) - 1)/ sizeof(uint64_t);
int start = __atomic_fetch_add(&gpuPointer[gpu], alloc, __ATOMIC_RELAXED);
// cout << gpu << " " << gpuPointer[gpu] << " " << each_processing_size << endl;
assert((start + alloc) < each_processing_size);
return reinterpret_cast<T*>(gpuProcessing[gpu] + start);
};
template <typename T>
T*
CacheManager::customCudaHostAlloc(int size) {
int alloc = ((size * sizeof(T)) + sizeof(uint64_t) - 1)/ sizeof(uint64_t);
int start = __atomic_fetch_add(&pinnedPointer, alloc, __ATOMIC_RELAXED);
assert((start + alloc) < tot_processing_size);
return reinterpret_cast<T*>(pinnedMemory + start);
};
//perform index transfer to all gpu only if they don't have the indexes yet
//the stream synchronize will cause the memcpy to be serialized across GPU, we have to fix in this in the future
void
CacheManager::indexTransfer(int** col_idx, ColumnInfo* column, cudaStream_t stream, int gpu, bool custom) {
if (col_idx[column->column_id] == NULL) {
int* desired;
// if (custom) desired = (int*) customCudaMalloc<int>(column->total_segment, gpu);
// else CubDebugExit(cudaMalloc((void**) &desired, column->total_segment * sizeof(int)));
desired = (int*) customCudaMalloc<int>(column->total_segment, gpu);
int* expected = NULL;
CubDebugExit(cudaSetDevice(gpu));
CubDebugExit(cudaMemcpyAsync(desired, segment_list[gpu][column->column_id], column->total_segment * sizeof(int), cudaMemcpyHostToDevice, stream));
CubDebugExit(cudaStreamSynchronize(stream));
__atomic_compare_exchange_n(&(col_idx[column->column_id]), &expected, desired, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
CubDebugExit(cudaSetDevice(0));
}
};
void
CacheManager::resetPointer() {
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
gpuPointer[gpu] = 0;
broadcastPointer[gpu] = 0;
}
cpuPointer = 0;
pinnedPointer = 0;
};
// modified for multi gpu
void
CacheManager::cacheSegmentInGPU(Segment* seg, int gpu) {
int64_t local_idx = empty_gpu_segment[gpu].front(); //local segment index to each GPU
empty_gpu_segment[gpu].pop();
assert(cache_mapper[gpu].find(seg) == cache_mapper[gpu].end());
cache_mapper[gpu][seg] = local_idx;
unsigned char old_segment_bitmap = segment_bitmap[seg->column->column_id][seg->segment_id];
unsigned char check = (segment_bitmap[seg->column->column_id][seg->segment_id] << gpu);
check = check >> 7;
// cout << gpu << " " << std::bitset<8>(segment_bitmap[seg->column->column_id][seg->segment_id]) << " " << std::bitset<8>(check) << endl;
assert(check == 0x00);
unsigned char code = 0x80 >> gpu;
segment_bitmap[seg->column->column_id][seg->segment_id] = segment_bitmap[seg->column->column_id][seg->segment_id] | code;
// cout << std::bitset<8>(segment_bitmap[seg->column->column_id][seg->segment_id]) << endl;
//segment row management
if (gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] == 0x00000000) { //if this segment row has not been cached in this gpu
vector<int> vec = segment_row_to_gpu[seg->column->table_id][seg->segment_id];
vector<int>::iterator it = find(vec.begin(), vec.end(), gpu);
assert(it == vec.end());
segment_row_to_gpu[seg->column->table_id][seg->segment_id].push_back(gpu);
seg_row_to_single_gpu[seg->column->table_id][seg->segment_id] = gpu;
if (segment_row_to_gpu[seg->column->table_id][seg->segment_id].size() == NUM_GPU) {
// cout << "im here " << seg->column->table_id << endl;
seg_is_replicated[seg->column->table_id][seg->segment_id] = 1;
seg_row_to_single_gpu[seg->column->table_id][seg->segment_id] = seg->segment_id % NUM_GPU;
}
}
unsigned int check_int = (gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] << seg->column->column_table_id);
check_int = check_int >> 31;
assert(check_int == 0);
unsigned int code_int = 0x80000000 >> seg->column->column_table_id;
gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] = gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] | code_int;
assert(segment_list[gpu][seg->column->column_id][seg->segment_id] == -1);
assert(local_idx >= 0);
segment_list[gpu][seg->column->column_id][seg->segment_id] = local_idx;
CubDebugExit(cudaSetDevice(gpu));
CubDebugExit(cudaMemcpy(&gpuCache[gpu][local_idx * SEGMENT_SIZE], seg->seg_ptr, SEGMENT_SIZE * sizeof(int), cudaMemcpyHostToDevice));
CubDebugExit(cudaSetDevice(0));
//THIS CHECKING BELOW NO LONGER APPLIES FOR MULTI GPU
// allColumn[seg->column->column_id]->tot_seg_in_GPU++;
// assert(allColumn[seg->column->column_id]->tot_seg_in_GPU <= allColumn[seg->column->column_id]->total_segment);
if (old_segment_bitmap == 0x00) {
assert(segment_bitmap[seg->column->column_id][seg->segment_id] != 0x00);
allColumn[seg->column->column_id]->tot_seg_in_GPU++;
assert(allColumn[seg->column->column_id]->tot_seg_in_GPU <= allColumn[seg->column->column_id]->total_segment);
}
}
void
CacheManager::cacheSegmentMultipleGPU(Segment* seg, vector<int> &gpu_list) {
for (int i = 0; i < gpu_list.size(); i++) {
int gpu = gpu_list[i];
cacheSegmentInGPU(seg, gpu);
}
}
void
CacheManager::deleteSegmentInGPU(Segment* seg, int gpu) {
assert(cache_mapper[gpu].find(seg) != cache_mapper[gpu].end());
int local_idx = cache_mapper[gpu][seg];
int ret = cache_mapper[gpu].erase(seg);
assert(ret == 1);
unsigned char old_segment_bitmap = segment_bitmap[seg->column->column_id][seg->segment_id];
// cout << "1 " << std::bitset<8>(old_segment_bitmap) << endl;
unsigned char check = (segment_bitmap[seg->column->column_id][seg->segment_id] << gpu);
check = check >> 7;
assert(check == 0x01);
unsigned char code = ~(0x80 >> gpu); //produce all 1 except the gpu bit location which is 0
segment_bitmap[seg->column->column_id][seg->segment_id] = segment_bitmap[seg->column->column_id][seg->segment_id] & code;
// cout << "2 " << std::bitset<32>(gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id]) << endl;
//segment row management
unsigned int check_int = (gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] << seg->column->column_table_id);
check_int = check_int >> 31;
assert(check_int == 1);
unsigned int code_int = ~(0x80000000 >> seg->column->column_table_id);
gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] = gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] & code_int;
// cout << "3 " << std::bitset<32>(gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id]) << endl;
if (gpu_segment_row_bitmap[gpu][seg->column->table_id][seg->segment_id] == 0x00000000) { //the segment that we deleted is the last segment in this segment row that presents in this gpu
// cout << "delete segment row " << segment_row_to_gpu[seg->column->table_id][seg->segment_id].size() << endl;
int loc = 0;
for (int i = 0; i < segment_row_to_gpu[seg->column->table_id][seg->segment_id].size(); i++) {
if (segment_row_to_gpu[seg->column->table_id][seg->segment_id][i] == gpu) loc = i;
}
assert(segment_row_to_gpu[seg->column->table_id][seg->segment_id][loc] == gpu);
segment_row_to_gpu[seg->column->table_id][seg->segment_id].erase(segment_row_to_gpu[seg->column->table_id][seg->segment_id].begin() + loc);
// cout << "delete segment row " << segment_row_to_gpu[seg->column->table_id][seg->segment_id].size() << endl;
if (segment_row_to_gpu[seg->column->table_id][seg->segment_id].size() < NUM_GPU) {
seg_is_replicated[seg->column->table_id][seg->segment_id] = 0;
if (segment_row_to_gpu[seg->column->table_id][seg->segment_id].size() == 0) {
seg_row_to_single_gpu[seg->column->table_id][seg->segment_id] = -1;
} else {
int tot_gpu = segment_row_to_gpu[seg->column->table_id][seg->segment_id].size();
int idx = seg->segment_id % tot_gpu;
seg_row_to_single_gpu[seg->column->table_id][seg->segment_id] = segment_row_to_gpu[seg->column->table_id][seg->segment_id][idx];
}
}
}
assert(segment_list[gpu][seg->column->column_id][seg->segment_id] != -1);
segment_list[gpu][seg->column->column_id][seg->segment_id] = -1;
empty_gpu_segment[gpu].push(local_idx);
//THIS CHECKING BELOW NO LONGER APPLIES FOR MULTI GPU
if (segment_bitmap[seg->column->column_id][seg->segment_id] == 0x00) {
assert(old_segment_bitmap != 0x00);
seg->column->tot_seg_in_GPU--;
assert(seg->column->tot_seg_in_GPU >= 0);
}
}
void
CacheManager::updateColumnFrequency(ColumnInfo* column) {
column->stats->col_freq+=(1.0 / column->total_segment);
// cout << column->column_name << " " << column->stats->col_freq << " " << (1 / column->total_segment) << endl;
}
void
CacheManager::updateColumnWeightDirect(ColumnInfo* column, double speedup) {
if (column->table_id == 0) {
column->stats->speedup += speedup/column->total_segment;
column->weight += speedup/column->total_segment;
} else {
column->stats->speedup += speedup*3/column->total_segment;
column->weight += speedup*3/column->total_segment;
}
}
void
CacheManager::updateSegmentWeightDirect(ColumnInfo* column, Segment* segment, double speedup) {
// cout << segment->segment_id << endl;
if (speedup > 0) {
// cout << column->column_name << endl;
if (column->table_id == 0) {
segment->stats->speedup += speedup/column->total_segment;
segment->weight += speedup/column->total_segment;
} else {
segment->stats->speedup += speedup*3/column->total_segment;
segment->weight += speedup*3/column->total_segment;
}
}
// cout << column->column_name << " " << segment->weight << endl;
}
void
CacheManager::updateSegmentWeightCostDirect(ColumnInfo* column, Segment* segment, double speedup) {
// cout << segment->segment_id << endl;
if (speedup > 0) {
// cout << column->column_name << endl;
if (column->table_id == 0) {
// if (column->column_name.compare("lo_quantity") == 0 || column->column_name.compare("lo_discount") == 0) {
// cout << "hello" << endl;
// speedup = speedup * 0.2;
// }
segment->stats->speedup += (speedup/column->total_segment);
segment->weight += (speedup/column->total_segment);
} else {
segment->stats->speedup += (speedup/column->total_segment);
segment->weight += (speedup/column->total_segment);
}
}
// cout << column->column_name << " " << segment->weight << endl;
}
void
CacheManager::updateSegmentFreqDirect(ColumnInfo* column, Segment* segment) {
segment->stats->col_freq += (1.0 / column->total_segment);
}
void
CacheManager::updateSegmentTimeDirect(ColumnInfo* column, Segment* segment, double timestamp) {
segment->stats->backward_t = timestamp - (segment->stats->timestamp * column->total_segment);
// cout << timestamp << " " << (segment->stats->timestamp * column->total_segment) << endl;
segment->stats->timestamp = (timestamp/ column->total_segment);
}
void
CacheManager::updateColumnTimestamp(ColumnInfo* column, double timestamp) {
// cout << column->column_name << " " << timestamp << endl;
column->stats->backward_t = timestamp - (column->stats->timestamp * column->total_segment);
// cout << column->column_name << " " << timestamp << " " << (column->stats->timestamp * column->total_segment) << endl;
column->stats->timestamp = (timestamp/ column->total_segment);
// cout << column->column_name << " " << column->stats->timestamp << endl;
}
void
CacheManager::deleteAll() {
for (int i = 0; i < TOT_COLUMN; i++) {
ColumnInfo* column = allColumn[i];
for (int j = 0; j < column->total_segment; j++) {
if (segment_bitmap[column->column_id][j] != 0x00) {
Segment* seg = index_to_segment[column->column_id][j];
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (cache_mapper[gpu].find(seg) != cache_mapper[gpu].end()) {
deleteSegmentInGPU(seg, gpu);
}
}
}
}
}
}
void
CacheManager::dumpStats(string filename, Distribution dist) {
int data_size = 0;
int cached_data = 0;
int replicated_data = 0;
int replicated_seg_per_column[NUM_COLUMN] = {0};
FILE *fptr = fopen(filename.c_str(), "w");
if (fptr == NULL)
{
printf("Could not open file\n");
assert(0);
}
for (int col = 0; col < NUM_COLUMN; col++) {
data_size += allColumn[col]->total_segment;
cached_data += allColumn[col]->tot_seg_in_GPU;
for (int seg_id = 0; seg_id < allColumn[col]->total_segment; seg_id++) {
bool replicated = true;
Segment* seg = index_to_segment[col][seg_id];
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (cache_mapper[gpu].find(seg) == cache_mapper[gpu].end()) {
replicated = false;
}
}
if (replicated) {
replicated_data++;
replicated_seg_per_column[col]++;
}
}
}
fprintf(fptr, "===========================\n");
fprintf(fptr, "======= CACHE INFO ======\n");
fprintf(fptr, "===========================\n");
fprintf(fptr, "\n");
fprintf(fptr, "Segment size: %d\n", SEGMENT_SIZE);
fprintf(fptr, "Data size: %d segments\n", data_size);
fprintf(fptr, "Cached data: %d segments\n", cached_data);
fprintf(fptr, "Replicated data: %d segments\n", replicated_data);
fprintf(fptr, "\n");
for (int i = 1; i < NUM_COLUMN; i++) {
fprintf(fptr,"%s: %d/%d = %.2f segments cached, %d/%d = %.2f segments replicated\n", allColumn[i]->column_name.c_str(), allColumn[i]->tot_seg_in_GPU, allColumn[i]->total_segment,
allColumn[i]->tot_seg_in_GPU * 1.0/allColumn[i]->total_segment, replicated_seg_per_column[i], allColumn[i]->total_segment, replicated_seg_per_column[i] * 1.0/allColumn[i]->total_segment);
}
fprintf(fptr, "\n");
fprintf(fptr, "\n");
fprintf(fptr, "============================\n");
fprintf(fptr, "======= COLUMN INFO ======\n");
fprintf(fptr, "============================\n");
fprintf(fptr, "\n");
fprintf(fptr, "\n");
int threshold = 19950101;
seg_idx_min = 0;
for (int col = 1; col < NUM_COLUMN; col++) {
fprintf(fptr, "Inspecting column %s\n", allColumn[col]->column_name.c_str());
if (allColumn[col]->tot_seg_in_GPU == 0) {
fprintf(fptr, "This column is not cached");
} else {
for (int seg_id = 0; seg_id < allColumn[col]->total_segment; seg_id++) {
Segment* seg = index_to_segment[col][seg_id];
if (segment_bitmap[col][seg_id] != 0x00) {
fprintf(fptr, "Segment %d is cached in GPUs ", seg_id);
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (cache_mapper[gpu].find(seg) != cache_mapper[gpu].end()) {
fprintf(fptr, "%d ", gpu);
}
}
fprintf(fptr, "\n");
}
}
}
fprintf(fptr, "\n");
fprintf(fptr, "\n");
}
//WARNING: THIS IS A HACK TO ENSURE JOINCPUCHECK = FALSE WHEN COLUMN IS PARTIALLY CACHED IN ZIPF DISTRIBUTION
if (dist == Zipf) {
for (int seg_id = 0; seg_id < lo_orderdate->total_segment; seg_id++) {
if (segment_min[lo_orderdate->column_id][seg_id] <= threshold && segment_max[lo_orderdate->column_id][seg_id] >= threshold) {
cout << segment_min[lo_orderdate->column_id][seg_id] << " " << segment_max[lo_orderdate->column_id][seg_id] << endl;
seg_idx_min = seg_id;
}
}
}
fclose(fptr);
}
void
CacheManager::assignWeight(ColumnInfo* column, int start_seg, int end_seg, double weight, double repl_weight) {
for (int seg_id = 0; seg_id < column->total_segment; seg_id++) {
Segment* seg = index_to_segment[column->column_id][seg_id];
if (seg_id >= start_seg && seg_id < end_seg) {
seg->weight = weight;
seg->repl_weight = repl_weight;
} else {
seg->weight = 0;
seg->repl_weight = 0;
}
}
}
bool
CacheManager::couldReplicateCheck(Segment* seg, double weight, int cache_each_tot_seg, int* temp_buffer_size) {
if (weight == 0) return false;
bool could_replicate = true;
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (temp_buffer_size[gpu] + 1 >= cache_each_tot_seg) {
could_replicate = false; break;
}
}
return could_replicate;
}
bool
CacheManager::couldReplicateCachedSegmentRow(Segment* seg, double weight, int cache_each_tot_seg, int* temp_buffer_size, map<Segment*, int> map_segment) {
int count[NUM_GPU] = {0};
if (weight == 0) return false;
bool could_replicate = true;
if (map_segment[seg] != -1) {
assert(map_segment[seg] != 99);
int cur_gpu = map_segment[seg];
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (gpu != cur_gpu) {
if (temp_buffer_size[gpu] + count[gpu] + 1 >= cache_each_tot_seg) {
could_replicate = false; break;
} else count[gpu]++;
}
}
}
if (!could_replicate) return could_replicate;
for (int i = 0; i < columns_in_table[seg->column->table_id].size(); i++) {
int column_id = columns_in_table[seg->column->table_id][i];
if (column_id != seg->column->column_id) {
Segment* next_seg = index_to_segment[column_id][seg->segment_id];
if (map_segment[next_seg] != -1 && map_segment[next_seg] != 99) {
int cur_gpu = map_segment[next_seg];
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (gpu != cur_gpu) {
if (temp_buffer_size[gpu] + count[gpu] + 1 >= cache_each_tot_seg) {
could_replicate = false; break;
} else count[gpu]++;
}
}
}
}
}
return could_replicate;
}
unsigned long long
CacheManager::PartitionDataPlacement() {
multimap<double, Segment*> access_weight_map;
unsigned long long traffic = 0;
for (int i = TOT_COLUMN-1; i >= 0; i--) {
for (int j = 0; j < allColumn[i]->total_segment; j++) {
Segment* segment = index_to_segment[i][j];
if (segment->weight > 0) access_weight_map.insert({segment->weight, segment});
}
}
int* temp_buffer_size = new int[NUM_GPU]();
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
temp_buffer_size[gpu] = 0;
}
vector<set<Segment*>> segments_to_place;
multimap<double, Segment*>::reverse_iterator cit;
segments_to_place.resize(NUM_GPU);
cit = access_weight_map.rbegin();
while (cit != access_weight_map.rend()) {
//cache segment to gpu
//check if segment is marked as replicated
//if its marked, then replicate those to all gpu right away
//if its not, then only cache in one gpu
Segment* seg = cit->second;
//if this segrow is already replicated then you have to replicate everything
// int gpu = seg->segment_id % NUM_GPU; //!!!!
//THIS ONLY WORKS IF THE TOTAL DIM SEGMENT IS DIVISIBLE BY NUM GPU
int gpu = seg->segment_id % NUM_GPU;
if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit->first > 0) {
temp_buffer_size[gpu]+=1;
segments_to_place[gpu].insert(seg);
// cout << "Placing ";
// cout << seg->column->column_name << " segment " << seg->segment_id;
// cout << " in " << gpu << endl;
}
++cit;
}
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
assert(temp_buffer_size[gpu] <= cache_each_tot_seg);
cout << "Cached segment in GPU " << gpu << " : " << temp_buffer_size[gpu] << " Cache total: " << cache_each_tot_seg << endl;
}
for (int i = 0; i < TOT_COLUMN; i++) {
for (int j = 0; j < allColumn[i]->total_segment; j++) {
Segment* segment = index_to_segment[i][j];
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (segments_to_place[gpu].find(segment) == segments_to_place[gpu].end()) {
unsigned char check = (segment_bitmap[i][j] << gpu);
check = check >> 7;
if (check == 0x01) {
assert(cache_mapper[gpu].find(segment) != cache_mapper[gpu].end());
// cout << "Deleting segment ";
// cout << segment->column->column_name << " segment " << segment->segment_id << endl;
deleteSegmentInGPU(segment, gpu);
}
}
}
}
}
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
set<Segment*>::const_iterator cit2;
for(cit2 = segments_to_place[gpu].cbegin();cit2 != segments_to_place[gpu].cend(); ++cit2){
unsigned char check = (segment_bitmap[(*cit2)->column->column_id][(*cit2)->segment_id] << gpu);
check = check >> 7;
if (check == 0x00) {
assert(cache_mapper[gpu].find(*cit2) == cache_mapper[gpu].end());
// cout << "Caching segment ";
// cout << (*cit2)->column->column_name << " " << (*cit2)->segment_id << endl;
cacheSegmentInGPU(*cit2, gpu);
traffic += SEGMENT_SIZE * sizeof(int);
}
}
}
cout << "Successfully cached" << endl;
delete[] temp_buffer_size;
segments_to_place.clear();
return traffic;
}
unsigned long long
CacheManager::ReplicationDataPlacement() {
multimap<double, Segment*> access_weight_map;
unsigned long long traffic = 0;
for (int i = TOT_COLUMN-1; i >= 0; i--) {
for (int j = 0; j < allColumn[i]->total_segment; j++) {
Segment* segment = index_to_segment[i][j];
if (segment->weight > 0) access_weight_map.insert({segment->weight, segment});
}
}
int* temp_buffer_size = new int[NUM_GPU]();
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
temp_buffer_size[gpu] = 0;
}
vector<set<Segment*>> segments_to_place;
multimap<double, Segment*>::reverse_iterator cit;
segments_to_place.resize(NUM_GPU);
cit = access_weight_map.rbegin();
while (cit != access_weight_map.rend()) {
//cache segment to gpu
//check if segment is marked as replicated
//if its marked, then replicate those to all gpu right away
//if its not, then only cache in one gpu
Segment* seg = cit->second;
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit->first > 0) {
temp_buffer_size[gpu]+=1;
segments_to_place[gpu].insert(seg);
// cout << "Placing ";
// cout << seg->column->column_name << " segment " << seg->segment_id;
// cout << " in " << gpu << endl;
}
}
++cit;
}
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
assert(temp_buffer_size[gpu] <= cache_each_tot_seg);
cout << "Cached segment in GPU " << gpu << " : " << temp_buffer_size[gpu] << " Cache total: " << cache_each_tot_seg << endl;
}
for (int i = 0; i < TOT_COLUMN; i++) {
for (int j = 0; j < allColumn[i]->total_segment; j++) {
Segment* segment = index_to_segment[i][j];
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (segments_to_place[gpu].find(segment) == segments_to_place[gpu].end()) {
unsigned char check = (segment_bitmap[i][j] << gpu);
check = check >> 7;
if (check == 0x01) {
assert(cache_mapper[gpu].find(segment) != cache_mapper[gpu].end());
// cout << "Deleting segment ";
// cout << segment->column->column_name << " segment " << segment->segment_id << endl;
deleteSegmentInGPU(segment, gpu);
}
}
}
}
}
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
set<Segment*>::const_iterator cit2;
for(cit2 = segments_to_place[gpu].cbegin();cit2 != segments_to_place[gpu].cend(); ++cit2){
unsigned char check = (segment_bitmap[(*cit2)->column->column_id][(*cit2)->segment_id] << gpu);
check = check >> 7;
if (check == 0x00) {
assert(cache_mapper[gpu].find(*cit2) == cache_mapper[gpu].end());
// cout << "Caching segment ";
// cout << (*cit2)->column->column_name << " " << (*cit2)->segment_id << endl;
cacheSegmentInGPU(*cit2, gpu);
traffic += SEGMENT_SIZE * sizeof(int);
}
}
}
cout << "Successfully cached" << endl;
delete[] temp_buffer_size;
segments_to_place.clear();
return traffic;
}
unsigned long long
CacheManager::ShuffleAwareDataPlacement() {
multimap<double, Segment*> access_weight_map;
multimap<double, Segment*> replication_weight_map;
vector<vector<bool>> segrow_is_replicated;
unsigned long long traffic = 0;
for (int i = TOT_COLUMN-1; i >= 0; i--) {
for (int j = 0; j < allColumn[i]->total_segment; j++) {
Segment* segment = index_to_segment[i][j];
if (segment->weight > 0) access_weight_map.insert({segment->weight, segment});
if (segment->repl_weight > 0) replication_weight_map.insert({segment->repl_weight, segment});
// cout << allColumn[i]->column_name << " " << j << " " << segment->weight << endl;
}
}
int* temp_buffer_size = new int[NUM_GPU]();
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
temp_buffer_size[gpu] = 0;
}
vector<set<Segment*>> segments_to_place;
map<Segment*, int> map_segment;
multimap<double, Segment*>::reverse_iterator cit;
multimap<double, Segment*>::reverse_iterator cit2;
segrow_is_replicated.resize(NUM_TABLE);
for (int table = 0; table < NUM_TABLE; table++) {
segrow_is_replicated[table].resize(allTable[table]->total_segment);
}
segments_to_place.resize(NUM_GPU);
cit2 = replication_weight_map.rbegin();
cit = access_weight_map.rbegin();
while (cit != access_weight_map.rend() || cit2 != replication_weight_map.rend()) {
//cache segment to gpu
if ((cit != access_weight_map.rend() && cit2 == replication_weight_map.rend()) || (cit != access_weight_map.rend() && cit2 != replication_weight_map.rend() && cit->first >= cit2->first)) {
//check if segment is marked as replicated
//if its marked, then replicate those to all gpu right away
//if its not, then only cache in one gpu
Segment* seg = cit->second;
assert(map_segment[seg] != -1);
//if this segrow is already replicated then you have to replicate everything
if (segrow_is_replicated[seg->column->table_id][seg->segment_id]) {
//just checking that all cached segment are replicated
for (int i = 0; i < columns_in_table[seg->column->table_id].size(); i++) {
int column_id = columns_in_table[seg->column->table_id][i];
Segment* next_seg = index_to_segment[column_id][seg->segment_id];
assert(map_segment[next_seg] == -1 || map_segment[next_seg] == 99);
}
//check if we have the capacity
bool could_replicate = couldReplicateCheck(seg, cit->first, cache_each_tot_seg, temp_buffer_size);
if (could_replicate) {
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit->first > 0) {
temp_buffer_size[gpu]+=1;
segments_to_place[gpu].insert(seg);
// cout << "Placing ";
// cout << seg->column->column_name << " segment " << seg->segment_id;
// cout << " in " << gpu << endl;
}
}
map_segment[seg] = 99;
}
} else {
// int gpu = seg->segment_id % NUM_GPU; //!!!!
//THIS ONLY WORKS IF THE TOTAL DIM SEGMENT IS DIVISIBLE BY NUM GPU
int gpu;
if (seg->column->table_id == 0) gpu = seg->segment_id % NUM_GPU;
else gpu = seg->segment_id * NUM_GPU/ seg->column->total_segment;
if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit->first > 0) {
temp_buffer_size[gpu]+=1;
segments_to_place[gpu].insert(seg);
// cout << "Placing ";
// cout << seg->column->column_name << " segment " << seg->segment_id;
// cout << " in " << gpu << endl;
map_segment[seg] = gpu;
}
}
++cit;
} else if ((cit == access_weight_map.rend() && cit2 != replication_weight_map.rend()) || (cit != access_weight_map.rend() && cit2 != replication_weight_map.rend() && cit->first < cit2->first)){
//cache A, cache B, replicate A, replicate B
//cache A, replicate A, cache B, replicate B
//cache A, replicate A, replicate B, cache B
//replicate A, cache A, replicate B, cache B
//replicate A, replicate B, cache A, cache B
//replicate A, cache A, cache B, replicate B
Segment* seg = cit2->second;
//if segrow is replicated, then the segment should have been replicated when its cached
if (segrow_is_replicated[seg->column->table_id][seg->segment_id] == 1) {
for (int i = 0; i < columns_in_table[seg->column->table_id].size(); i++) {
int column_id = columns_in_table[seg->column->table_id][i];
Segment* next_seg = index_to_segment[column_id][seg->segment_id];
assert(map_segment[next_seg] == -1 || map_segment[next_seg] == 99);
}
} else {
//this segment is already replicated then do nothing
if (map_segment[seg] == 99) {
assert(0);
//this segment row has not been replicated but seg is already cached
} else if (map_segment[seg] != -1) {
assert(segrow_is_replicated[seg->column->table_id][seg->segment_id] == 0);
bool could_replicate = couldReplicateCachedSegmentRow(seg, cit2->first, cache_each_tot_seg, temp_buffer_size, map_segment);
if (could_replicate) {
int cur_gpu = map_segment[seg];
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (gpu != cur_gpu) {
if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit2->first > 0) {
temp_buffer_size[gpu]+=1;
segments_to_place[gpu].insert(seg);
// cout << "Placing ";
// cout << seg->column->column_name << " segment " << seg->segment_id;
// cout << " in " << gpu << endl;
}
}
}
map_segment[seg] = 99; // a sign that it is replicated
//replicate all other segments in the same segment row
for (int i = 0; i < columns_in_table[seg->column->table_id].size(); i++) {
int column_id = columns_in_table[seg->column->table_id][i];
if (column_id != seg->column->column_id) {
Segment* next_seg = index_to_segment[column_id][seg->segment_id];
//if the segment is already cached then you want to replicate it
//if the segment is already replicated then do nothing
if (map_segment[next_seg] != -1 && map_segment[next_seg] != 99) {
int cur_gpu = map_segment[next_seg];
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (gpu != cur_gpu) {
if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit2->first > 0) {
temp_buffer_size[gpu]+=1;
segments_to_place[gpu].insert(next_seg);
// cout << "Placing ";
// cout << next_seg->column->column_name << " segment " << next_seg->segment_id;
// cout << " in " << gpu << endl;
}
}
}
map_segment[next_seg] = 99; // a sign that this segment is replicated
}
}
}
segrow_is_replicated[seg->column->table_id][seg->segment_id] = 1;
}
//this segment has not been cached
} else if (map_segment[seg] == -1) {
bool could_replicate = couldReplicateCachedSegmentRow(seg, cit2->first, cache_each_tot_seg, temp_buffer_size, map_segment);
if (could_replicate) {
//replicate all other segments in the same segment row
for (int i = 0; i < columns_in_table[seg->column->table_id].size(); i++) {
int column_id = columns_in_table[seg->column->table_id][i];
if (column_id != seg->column->column_id) {
Segment* next_seg = index_to_segment[column_id][seg->segment_id];
//if the segment is already cached then you want to replicate it
//if the segment is already replicated then do nothing
if (map_segment[next_seg] != -1 && map_segment[next_seg] != 99) {
cout << "Warning" << endl;
int cur_gpu = map_segment[next_seg];
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (gpu != cur_gpu) {
if(temp_buffer_size[gpu] + 1 < cache_each_tot_seg && cit2->first > 0) {
temp_buffer_size[gpu]+=1;
segments_to_place[gpu].insert(next_seg);
// cout << "Placing ";
// cout << next_seg->column->column_name << " segment " << next_seg->segment_id;
// cout << " in " << gpu << endl;
}
}
}
map_segment[next_seg] = 99; // a sign that this segment is replicated
}
}
}
segrow_is_replicated[seg->column->table_id][seg->segment_id] = 1;
}
}
}
++cit2;
} else {
assert(0);
}
}
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
assert(temp_buffer_size[gpu] <= cache_each_tot_seg);
cout << "Cached segment in GPU " << gpu << " : " << temp_buffer_size[gpu] << " Cache total: " << cache_each_tot_seg << endl;
}
for (int i = 0; i < TOT_COLUMN; i++) {
for (int j = 0; j < allColumn[i]->total_segment; j++) {
Segment* segment = index_to_segment[i][j];
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
if (segments_to_place[gpu].find(segment) == segments_to_place[gpu].end()) {
unsigned char check = (segment_bitmap[i][j] << gpu);
check = check >> 7;
if (check == 0x01) {
assert(cache_mapper[gpu].find(segment) != cache_mapper[gpu].end());
// cout << "Deleting segment ";
// cout << segment->column->column_name << " segment " << segment->segment_id << endl;
deleteSegmentInGPU(segment, gpu);
}
}
}
}
}
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
set<Segment*>::const_iterator cit2;
for(cit2 = segments_to_place[gpu].cbegin();cit2 != segments_to_place[gpu].cend(); ++cit2){
unsigned char check = (segment_bitmap[(*cit2)->column->column_id][(*cit2)->segment_id] << gpu);
check = check >> 7;
if (check == 0x00) {
assert(cache_mapper[gpu].find(*cit2) == cache_mapper[gpu].end());
// cout << "Caching segment ";
// cout << (*cit2)->column->column_name << " " << (*cit2)->segment_id << endl;
cacheSegmentInGPU(*cit2, gpu);
traffic += SEGMENT_SIZE * sizeof(int);
}
}
}
cout << "Successfully cached" << endl;
delete[] temp_buffer_size;
map_segment.clear();
segments_to_place.clear();
return traffic;
}
void
CacheManager::ShuffleAware(Distribution dist, bool opt) {
int start = 0;
if (dist == Zipf) start = (LO_LEN/SEGMENT_SIZE)*2.8/7;
// if (dist == Zipf) start = (LO_LEN/SEGMENT_SIZE)/2;
if (SF == 322 || SF == 402) {
assert(opt == 0);
assignWeight(lo_suppkey, start, lo_suppkey->total_segment, 850, 0);
assignWeight(lo_custkey, start, lo_custkey->total_segment, 750, 0);
assignWeight(lo_partkey, start, lo_partkey->total_segment, 650, 0);
assignWeight(lo_orderdate, start, lo_orderdate->total_segment, 550, 0);
assignWeight(lo_revenue, start, lo_revenue->total_segment, 540, 0);
assignWeight(lo_supplycost, start, lo_supplycost->total_segment, 520, 0);
assignWeight(lo_quantity, start, lo_quantity->total_segment, 500, 0);
assignWeight(lo_discount, start, lo_discount->total_segment, 100, 0);
assignWeight(lo_extendedprice, start, lo_extendedprice->total_segment, 50, 0);
assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 400);
assignWeight(s_region, 0, s_region->total_segment, 900, 400);
assignWeight(s_nation, 0, s_nation->total_segment, 900, 400);
assignWeight(s_city, 0, s_city->total_segment, 900, 400);
if (dist != Zipf) {
assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 0);
assignWeight(c_region, 0, c_region->total_segment, 800, 0);
assignWeight(c_nation, 0, c_nation->total_segment, 800, 0);
assignWeight(c_city, 0, c_city->total_segment, 800, 0);
} else {
assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 25);
assignWeight(c_region, 0, c_region->total_segment, 800, 25);
assignWeight(c_nation, 0, c_nation->total_segment, 800, 25);
assignWeight(c_city, 0, c_city->total_segment, 800, 25);
}
assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 0);
assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 0);
assignWeight(p_category, 0, p_category->total_segment, 700, 0);
assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 0);
assignWeight(d_datekey, 0, d_datekey->total_segment, 1000, 1000);
assignWeight(d_year, 0, d_year->total_segment, 1000, 1000);
assignWeight(d_yearmonthnum, 0, d_yearmonthnum->total_segment, 1000, 1000);
} else if (SF % 10 == 0) {
assert(opt == 0);
assert(dist == None);
assignWeight(lo_suppkey, start, lo_suppkey->total_segment, 850, 0);
assignWeight(lo_custkey, start, lo_custkey->total_segment, 750, 0);
assignWeight(lo_partkey, start, lo_partkey->total_segment, 650, 0);
assignWeight(lo_orderdate, start, lo_orderdate->total_segment, 550, 0);
assignWeight(lo_revenue, start, lo_revenue->total_segment, 540, 0);
assignWeight(lo_supplycost, start, lo_supplycost->total_segment, 520, 0);
assignWeight(lo_quantity, start, lo_quantity->total_segment, 500, 0);
assignWeight(lo_discount, start, lo_discount->total_segment, 100, 0);
assignWeight(lo_extendedprice, start, lo_extendedprice->total_segment, 50, 0);
assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 900);
assignWeight(s_region, 0, s_region->total_segment, 900, 900);
assignWeight(s_nation, 0, s_nation->total_segment, 900, 900);
assignWeight(s_city, 0, s_city->total_segment, 900, 900);
assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 1000);
assignWeight(c_region, 0, c_region->total_segment, 900, 900);
assignWeight(c_nation, 0, c_nation->total_segment, 900, 900);
assignWeight(c_city, 0, c_city->total_segment, 900, 900);
assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 1000);
assignWeight(p_mfgr, 0, p_mfgr->total_segment, 900, 900);
assignWeight(p_category, 0, p_category->total_segment, 900, 900);
assignWeight(p_brand1, 0, p_brand1->total_segment, 900, 900);
assignWeight(d_datekey, 0, d_datekey->total_segment, 1000, 1000);
assignWeight(d_year, 0, d_year->total_segment, 1000, 1000);
assignWeight(d_yearmonthnum, 0, d_yearmonthnum->total_segment, 1000, 1000);
} else {
assignWeight(lo_suppkey, start, lo_suppkey->total_segment, 850, 0);
assignWeight(lo_custkey, start, lo_custkey->total_segment, 750, 0);
assignWeight(lo_partkey, start, lo_partkey->total_segment, 650, 0);
assignWeight(lo_orderdate, start, lo_orderdate->total_segment, 550, 0);
assignWeight(lo_revenue, start, lo_revenue->total_segment, 540, 0);
assignWeight(lo_supplycost, start, lo_supplycost->total_segment, 520, 0);
assignWeight(lo_quantity, start, lo_quantity->total_segment, 500, 0);
assignWeight(lo_discount, start, lo_discount->total_segment, 100, 0);
assignWeight(lo_extendedprice, start, lo_extendedprice->total_segment, 50, 0);
if (opt) {
assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 0);
assignWeight(s_region, 0, s_region->total_segment, 900, 0);
assignWeight(s_nation, 0, s_nation->total_segment, 900, 0);
assignWeight(s_city, 0, s_city->total_segment, 900, 0);
assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 0);
assignWeight(c_region, 0, c_region->total_segment, 800, 0);
assignWeight(c_nation, 0, c_nation->total_segment, 800, 0);
assignWeight(c_city, 0, c_city->total_segment, 800, 0);
assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 0);
assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 0);
assignWeight(p_category, 0, p_category->total_segment, 700, 0);
assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 0);
} else {
assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 400);
assignWeight(s_region, 0, s_region->total_segment, 900, 400);
assignWeight(s_nation, 0, s_nation->total_segment, 900, 400);
assignWeight(s_city, 0, s_city->total_segment, 900, 400);
if (SF == 162 && dist == Zipf && NUM_GPU == 2) {
assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 0);
assignWeight(c_region, 0, c_region->total_segment, 800, 0);
assignWeight(c_nation, 0, c_nation->total_segment, 800, 0);
assignWeight(c_city, 0, c_city->total_segment, 800, 0);
} else {
assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 300);
assignWeight(c_region, 0, c_region->total_segment, 800, 300);
assignWeight(c_nation, 0, c_nation->total_segment, 800, 300);
assignWeight(c_city, 0, c_city->total_segment, 800, 300);
}
if ((SF == 162 && dist != Zipf && NUM_GPU == 4) || (SF == 162 && dist == Zipf && NUM_GPU == 2)) {
assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 0);
assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 0);
assignWeight(p_category, 0, p_category->total_segment, 700, 0);
assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 0);
} else {
assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 200);
assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 200);
assignWeight(p_category, 0, p_category->total_segment, 700, 200);
assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 200);
}
}
assignWeight(d_datekey, 0, d_datekey->total_segment, 1000, 1000);
assignWeight(d_year, 0, d_year->total_segment, 1000, 1000);
assignWeight(d_yearmonthnum, 0, d_yearmonthnum->total_segment, 1000, 1000);
}
ShuffleAwareDataPlacement();
string path;
if (dist == None) {
path = string("logs/stats/shuffleaware") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF);
if (!opt) dumpStats(path, dist);
} else if (dist == Zipf) {
path = string("logs/stats/shuffleaware") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF) + "Zipf";
if (!opt) dumpStats(path, dist);
}
}
void
CacheManager::PartitioningOnly(Distribution dist) {
if (SF % 10 == 0) assert(0);
int start = 0;
// if (dist == Zipf) start = (LO_LEN/SEGMENT_SIZE)*2.8/7;
assignWeight(lo_suppkey, start, lo_suppkey->total_segment, 850, 0);
assignWeight(lo_custkey, start, lo_custkey->total_segment, 750, 0);
assignWeight(lo_partkey, start, lo_partkey->total_segment, 650, 0);
assignWeight(lo_orderdate, start, lo_orderdate->total_segment, 550, 0);
assignWeight(lo_revenue, start, lo_revenue->total_segment, 540, 0);
assignWeight(lo_supplycost, start, lo_supplycost->total_segment, 520, 0);
assignWeight(lo_quantity, start, lo_quantity->total_segment, 500, 0);
assignWeight(lo_discount, start, lo_discount->total_segment, 100, 0);
assignWeight(lo_extendedprice, start, lo_extendedprice->total_segment, 50, 0);
assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 0);
assignWeight(s_region, 0, s_region->total_segment, 900, 0);
assignWeight(s_nation, 0, s_nation->total_segment, 900, 0);
assignWeight(s_city, 0, s_city->total_segment, 900, 0);
assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 0);
assignWeight(c_region, 0, c_region->total_segment, 800, 0);
assignWeight(c_nation, 0, c_nation->total_segment, 800, 0);
assignWeight(c_city, 0, c_city->total_segment, 800, 0);
assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 0);
assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 0);
assignWeight(p_category, 0, p_category->total_segment, 700, 0);
assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 0);
assignWeight(d_datekey, 0, d_datekey->total_segment, 1000, 0);
assignWeight(d_year, 0, d_year->total_segment, 1000, 0);
assignWeight(d_yearmonthnum, 0, d_yearmonthnum->total_segment, 1000, 0);
PartitionDataPlacement();
string path;
if (dist == None) {
path = string("logs/stats/partitiononly") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF);
dumpStats(path, dist);
} else if (dist == Zipf) {
path = string("logs/stats/partitiononly") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF) + "Zipf";
dumpStats(path, dist);
}
}
void
CacheManager::ReplicationOnly(Distribution dist) {
if (SF % 10 == 0) assert(0);
int start = 0;
if (dist == Zipf) start = (LO_LEN/SEGMENT_SIZE)*2.8/7;
assignWeight(lo_suppkey, start, lo_suppkey->total_segment, 850, 0);
assignWeight(lo_custkey, start, lo_custkey->total_segment, 750, 0);
assignWeight(lo_partkey, start, lo_partkey->total_segment, 650, 0);
assignWeight(lo_orderdate, start, lo_orderdate->total_segment, 550, 0);
assignWeight(lo_revenue, start, lo_revenue->total_segment, 540, 0);
assignWeight(lo_supplycost, start, lo_supplycost->total_segment, 520, 0);
assignWeight(lo_quantity, start, lo_quantity->total_segment, 500, 0);
assignWeight(lo_discount, start, lo_discount->total_segment, 100, 0);
assignWeight(lo_extendedprice, start, lo_extendedprice->total_segment, 50, 0);
assignWeight(s_suppkey, 0, s_suppkey->total_segment, 1000, 0);
assignWeight(s_region, 0, s_region->total_segment, 900, 0);
assignWeight(s_nation, 0, s_nation->total_segment, 900, 0);
assignWeight(s_city, 0, s_city->total_segment, 900, 0);
assignWeight(c_custkey, 0, c_custkey->total_segment, 1000, 0);
assignWeight(c_region, 0, c_region->total_segment, 800, 0);
assignWeight(c_nation, 0, c_nation->total_segment, 800, 0);
assignWeight(c_city, 0, c_city->total_segment, 800, 0);
assignWeight(p_partkey, 0, p_partkey->total_segment, 1000, 0);
assignWeight(p_mfgr, 0, p_mfgr->total_segment, 700, 0);
assignWeight(p_category, 0, p_category->total_segment, 700, 0);
assignWeight(p_brand1, 0, p_brand1->total_segment, 700, 0);
assignWeight(d_datekey, 0, d_datekey->total_segment, 1000, 0);
assignWeight(d_year, 0, d_year->total_segment, 1000, 0);
assignWeight(d_yearmonthnum, 0, d_yearmonthnum->total_segment, 1000, 0);
ReplicationDataPlacement();
string path;
if (dist == None) {
path = string("logs/stats/replicationonly") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF);
dumpStats(path, dist);
} else if (dist == Zipf) {
path = string("logs/stats/replicationonly") + to_string(NUM_GPU) + "GPUsSF" + to_string(SF) + "Zipf";
cout << path << endl;
dumpStats(path, dist);
}
}
void
CacheManager::loadColumnToCPU() {
lo = new TableInfo("lo", LO_LEN, 0);
s = new TableInfo("s", S_LEN, 1);
c = new TableInfo("c", C_LEN, 2);
p = new TableInfo("p", P_LEN, 3);
d = new TableInfo("d", D_LEN, 4);
allTable[0] = lo;
allTable[1] = s;
allTable[2] = c;
allTable[3] = p;
allTable[4] = d;
h_lo_orderkey = loadColumnPinned<int>("lo_orderkey", LO_LEN);
h_lo_suppkey = loadColumnPinned<int>("lo_suppkey", LO_LEN);
h_lo_custkey = loadColumnPinned<int>("lo_custkey", LO_LEN);
h_lo_partkey = loadColumnPinned<int>("lo_partkey", LO_LEN);
h_lo_orderdate = loadColumnPinned<int>("lo_orderdate", LO_LEN);
h_lo_revenue = loadColumnPinned<int>("lo_revenue", LO_LEN);
h_lo_discount = loadColumnPinned<int>("lo_discount", LO_LEN);
h_lo_quantity = loadColumnPinned<int>("lo_quantity", LO_LEN);
h_lo_extendedprice = loadColumnPinned<int>("lo_extendedprice", LO_LEN);
h_lo_supplycost = loadColumnPinned<int>("lo_supplycost", LO_LEN);
// h_lo_orderkey = loadColumnPinnedSort<int>("lo_orderkey", LO_LEN);
// h_lo_suppkey = loadColumnPinnedSort<int>("lo_suppkey", LO_LEN);
// h_lo_custkey = loadColumnPinnedSort<int>("lo_custkey", LO_LEN);
// h_lo_partkey = loadColumnPinnedSort<int>("lo_partkey", LO_LEN);
// h_lo_orderdate = loadColumnPinnedSort<int>("lo_orderdate", LO_LEN);
// h_lo_revenue = loadColumnPinnedSort<int>("lo_revenue", LO_LEN);
// h_lo_discount = loadColumnPinnedSort<int>("lo_discount", LO_LEN);
// h_lo_quantity = loadColumnPinnedSort<int>("lo_quantity", LO_LEN);
// h_lo_extendedprice = loadColumnPinnedSort<int>("lo_extendedprice", LO_LEN);
// h_lo_supplycost = loadColumnPinnedSort<int>("lo_supplycost", LO_LEN);
h_c_custkey = loadColumnPinned<int>("c_custkey", C_LEN);
h_c_nation = loadColumnPinned<int>("c_nation", C_LEN);
h_c_region = loadColumnPinned<int>("c_region", C_LEN);
h_c_city = loadColumnPinned<int>("c_city", C_LEN);
h_s_suppkey = loadColumnPinned<int>("s_suppkey", S_LEN);
h_s_nation = loadColumnPinned<int>("s_nation", S_LEN);
h_s_region = loadColumnPinned<int>("s_region", S_LEN);
h_s_city = loadColumnPinned<int>("s_city", S_LEN);
h_p_partkey = loadColumnPinned<int>("p_partkey", P_LEN);
h_p_brand1 = loadColumnPinned<int>("p_brand1", P_LEN);
h_p_category = loadColumnPinned<int>("p_category", P_LEN);
h_p_mfgr = loadColumnPinned<int>("p_mfgr", P_LEN);
h_d_datekey = loadColumnPinned<int>("d_datekey", D_LEN);
h_d_year = loadColumnPinned<int>("d_year", D_LEN);
h_d_yearmonthnum = loadColumnPinned<int>("d_yearmonthnum", D_LEN);
lo_orderkey = new ColumnInfo(lo, "lo_orderkey", 0, h_lo_orderkey);
lo_suppkey = new ColumnInfo(lo, "lo_suppkey", 1, h_lo_suppkey);
lo_custkey = new ColumnInfo(lo, "lo_custkey", 2, h_lo_custkey);
lo_partkey = new ColumnInfo(lo, "lo_partkey", 3, h_lo_partkey);
lo_orderdate = new ColumnInfo(lo, "lo_orderdate", 4, h_lo_orderdate);
lo_revenue = new ColumnInfo(lo, "lo_revenue", 5, h_lo_revenue);
lo_discount = new ColumnInfo(lo, "lo_discount", 6, h_lo_discount);
lo_quantity = new ColumnInfo(lo, "lo_quantity", 7, h_lo_quantity);
lo_extendedprice = new ColumnInfo(lo, "lo_extendedprice", 8, h_lo_extendedprice);
lo_supplycost = new ColumnInfo(lo, "lo_supplycost", 9, h_lo_supplycost);
c_custkey = new ColumnInfo(c, "c_custkey", 10, h_c_custkey);
c_nation = new ColumnInfo(c, "c_nation", 11, h_c_nation);
c_region = new ColumnInfo(c, "c_region", 12, h_c_region);
c_city = new ColumnInfo(c, "c_city", 13, h_c_city);
s_suppkey = new ColumnInfo(s, "s_suppkey", 14, h_s_suppkey);
s_nation = new ColumnInfo(s, "s_nation", 15, h_s_nation);
s_region = new ColumnInfo(s, "s_region", 16, h_s_region);
s_city = new ColumnInfo(s, "s_city", 17, h_s_city);
p_partkey = new ColumnInfo(p, "p_partkey", 18, h_p_partkey);
p_brand1 = new ColumnInfo(p, "p_brand1", 19, h_p_brand1);
p_category = new ColumnInfo(p, "p_category", 20, h_p_category);
p_mfgr = new ColumnInfo(p, "p_mfgr", 21, h_p_mfgr);
d_datekey = new ColumnInfo(d, "d_datekey", 22, h_d_datekey);
d_year = new ColumnInfo(d, "d_year", 23, h_d_year);
d_yearmonthnum = new ColumnInfo(d, "d_yearmonthnum", 24, h_d_yearmonthnum);
allColumn[0] = lo_orderkey;
allColumn[1] = lo_suppkey;
allColumn[2] = lo_custkey;
allColumn[3] = lo_partkey;
allColumn[4] = lo_orderdate;
allColumn[5] = lo_revenue;
allColumn[6] = lo_discount;
allColumn[7] = lo_quantity;
allColumn[8] = lo_extendedprice;
allColumn[9] = lo_supplycost;
allColumn[10] = c_custkey;
allColumn[11] = c_nation;
allColumn[12] = c_region;
allColumn[13] = c_city;
allColumn[14] = s_suppkey;
allColumn[15] = s_nation;
allColumn[16] = s_region;
allColumn[17] = s_city;
allColumn[18] = p_partkey;
allColumn[19] = p_brand1;
allColumn[20] = p_category;
allColumn[21] = p_mfgr;
allColumn[22] = d_datekey;
allColumn[23] = d_year;
allColumn[24] = d_yearmonthnum;
for (int i = 0; i <= 9; i++) {
allTable[0]->columns.push_back(allColumn[i]);
allColumn[i]->column_table_id = allTable[0]->columns.size()-1;
}
for (int i = 10; i <= 13; i++) {
allTable[2]->columns.push_back(allColumn[i]);
allColumn[i]->column_table_id = allTable[2]->columns.size()-1;
}
for (int i = 14; i <= 17; i++) {
allTable[1]->columns.push_back(allColumn[i]);
allColumn[i]->column_table_id = allTable[1]->columns.size()-1;
}
for (int i = 18; i <= 21; i++) {
allTable[3]->columns.push_back(allColumn[i]);
allColumn[i]->column_table_id = allTable[3]->columns.size()-1;
}
for (int i = 22; i <= 24; i++) {
allTable[4]->columns.push_back(allColumn[i]);
allColumn[i]->column_table_id = allTable[4]->columns.size()-1;
}
columns_in_table.resize(TOT_TABLE);
for (int i = 0; i < TOT_COLUMN; i++) {
columns_in_table[allColumn[i]->table_id].push_back(allColumn[i]->column_id);
}
// unsigned long long sum = 0;
// for (int i = 0; i < LO_LEN; i++) {
// sum += h_lo_revenue[i];
// }
// cout << "Printing partkey" << endl;
// int count = 0;
// for (int i = 0; i < S_LEN; i++) {
// if (h_s_city[i] )
// printf("%d\n", h_s_city[i]);
// }
// cout << endl;
// cout << "Printing custkey" << endl;
// for (int i = 0; i < C_LEN; i++) {
// printf("%d %d %d %d\n", h_c_nation[2343346], h_c_nation[8054136], h_c_nation[8244933], h_c_nation[8247076]);
// }
// cout << "total sum " << sum << endl;
}
CacheManager::~CacheManager() {
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
cudaSetDevice(gpu);
CubDebugExit(cudaFree(gpuCache[gpu]));
CubDebugExit(cudaFree(gpuProcessing[gpu]));
}
cudaSetDevice(0);
delete[] gpuCache;
delete[] gpuProcessing;
delete[] cpuProcessing;
CubDebugExit(cudaFreeHost(pinnedMemory));
CubDebugExit(cudaFreeHost(h_lo_orderkey));
CubDebugExit(cudaFreeHost(h_lo_suppkey));
CubDebugExit(cudaFreeHost(h_lo_custkey));
CubDebugExit(cudaFreeHost(h_lo_partkey));
CubDebugExit(cudaFreeHost(h_lo_orderdate));
CubDebugExit(cudaFreeHost(h_lo_revenue));
CubDebugExit(cudaFreeHost(h_lo_discount));
CubDebugExit(cudaFreeHost(h_lo_quantity));
CubDebugExit(cudaFreeHost(h_lo_extendedprice));
CubDebugExit(cudaFreeHost(h_lo_supplycost));
CubDebugExit(cudaFreeHost(h_c_custkey));
CubDebugExit(cudaFreeHost(h_c_nation));
CubDebugExit(cudaFreeHost(h_c_region));
CubDebugExit(cudaFreeHost(h_c_city));
CubDebugExit(cudaFreeHost(h_s_suppkey));
CubDebugExit(cudaFreeHost(h_s_nation));
CubDebugExit(cudaFreeHost(h_s_region));
CubDebugExit(cudaFreeHost(h_s_city));
CubDebugExit(cudaFreeHost(h_p_partkey));
CubDebugExit(cudaFreeHost(h_p_brand1));
CubDebugExit(cudaFreeHost(h_p_category));
CubDebugExit(cudaFreeHost(h_p_mfgr));
CubDebugExit(cudaFreeHost(h_d_datekey));
CubDebugExit(cudaFreeHost(h_d_year));
CubDebugExit(cudaFreeHost(h_d_yearmonthnum));
delete lo_orderkey;
delete lo_orderdate;
delete lo_custkey;
delete lo_suppkey;
delete lo_partkey;
delete lo_revenue;
delete lo_discount;
delete lo_quantity;
delete lo_extendedprice;
delete lo_supplycost;
delete c_custkey;
delete c_nation;
delete c_region;
delete c_city;
delete s_suppkey;
delete s_nation;
delete s_region;
delete s_city;
delete p_partkey;
delete p_brand1;
delete p_category;
delete p_mfgr;
delete d_datekey;
delete d_year;
delete d_yearmonthnum;
for (int i = 0; i < TOT_COLUMN; i++) {
free(segment_bitmap[i]);
}
free(segment_bitmap);
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
for (int i = 0; i < TOT_COLUMN; i++) {
CubDebugExit(cudaFreeHost(segment_list[gpu][i]));
}
free(segment_list[gpu]);
}
free(segment_list);
for (int gpu = 0; gpu < NUM_GPU; gpu++) {
for (int i = 0; i < TOT_TABLE; i++) {
free(gpu_segment_row_bitmap[gpu][i]);
}
free(gpu_segment_row_bitmap[gpu]);
}
free(gpu_segment_row_bitmap);
}
template int*
CacheManager::customMalloc<int>(int size);
template int*
CacheManager::customCudaMalloc<int>(int size, int gpu);
template int**
CacheManager::customCudaMalloc<int*>(int size, int gpu);
template int***
CacheManager::customCudaMalloc<int**>(int size, int gpu);
template int****
CacheManager::customCudaMalloc<int***>(int size, int gpu);
template int*
CacheManager::customCudaHostAlloc<int>(int size);
template short*
CacheManager::customMalloc<short>(int size);
template short*
CacheManager::customCudaMalloc<short>(int size, int gpu);
template short*
CacheManager::customCudaHostAlloc<short>(int size);