#include "QueryOptimizer.h"
#include "CostModel.h"
#include "MultiGPUProcessing.h"
CostModel::CostModel(int _L, int _total_segment, int _n_group_key, int _n_aggr_key, int _sg, int _table_id, QueryOptimizer* _qo) {
L = (double) _L;
ori_L = (double) _L;
n_group_key = _n_group_key;
n_aggr_key = _n_aggr_key;
sg = _sg;
table_id = _table_id;
qo = _qo;
total_segment = _total_segment;
Operator* op = qo->opRoots[table_id][sg];
// cout << op->type << endl;
opPipeline.push_back(op);
op = op->children;
while (op != NULL) {
// cout << op->type << endl;
if (op->type != Materialize && op->type != GPUtoCPU && op->type != CPUtoGPU && op->type != Merge) {
opPipeline.push_back(op);
}
op = op->children;
}
};
void
CostModel::clear() {
selectCPU.clear(); joinCPU.clear(); groupCPU.clear(); buildCPU.clear();
selectGPU.clear(); joinGPU.clear(); groupGPU.clear(); buildGPU.clear();
}
void
CostModel::permute_cost() {
for (int i = 0; i < opPipeline.size(); i++) {
Operator* op = opPipeline[i];
if (op->device == GPU) {
if (op->type == Probe) joinGPU.push_back(op->columns[0]);
else if (op->type == Filter) selectGPU.push_back(op->columns[0]);
else if (op->type == GroupBy || op->type == Aggr) {
for (int k = 0; k < op->columns.size(); k++)
groupGPU.push_back(op->columns[k]);
} else if (op->type == Build) buildGPU.push_back(op->columns[0]);
} else {
if (op->type == Probe) joinCPU.push_back(op->columns[0]);
else if (op->type == Filter) selectCPU.push_back(op->columns[0]);
else if (op->type == GroupBy || op->type == Aggr) {
for (int k = 0; k < op->columns.size(); k++)
groupCPU.push_back(op->columns[k]);
} else if (op->type == Build) buildCPU.push_back(op->columns[0]);
}
}
double default_cost = calculate_cost();
// cout << "sg: " << sg << " default cost: ";;
// printf("%.4f\n", default_cost);
clear();
// for (int seg = 0; seg < qo->segment_group_count[table_id][sg]; seg++) {
// int seg_id = qo->segment_group[table_id][sg * total_segment + seg];
// cout << seg_id << endl;
// }
for (int i = 0; i < opPipeline.size(); i++) {
double cost = 0;
Operator* cur_op = opPipeline[i];
if (cur_op->device == CPU) cur_op->device = GPU;
else if (cur_op->device == GPU) cur_op->device = CPU;
for (int j = 0; j < opPipeline.size(); j++) {
Operator* op = opPipeline[j];
if (op->device == GPU) {
if (op->type == Probe) joinGPU.push_back(op->columns[0]);
else if (op->type == Filter) selectGPU.push_back(op->columns[0]);
else if (op->type == GroupBy || op->type == Aggr) {
for (int k = 0; k < op->columns.size(); k++)
groupGPU.push_back(op->columns[k]);
} else if (op->type == Build) buildGPU.push_back(op->columns[0]);
} else {
if (op->type == Probe) joinCPU.push_back(op->columns[0]);
else if (op->type == Filter) selectCPU.push_back(op->columns[0]);
else if (op->type == GroupBy || op->type == Aggr) {
for (int k = 0; k < op->columns.size(); k++)
groupCPU.push_back(op->columns[k]);
} else if (op->type == Build) buildCPU.push_back(op->columns[0]);
}
}
cost = calculate_cost();
// cout << "select GPU" << endl;
// for (int i = 0; i < selectGPU.size(); i++) {
// cout << selectGPU[i]->column_name << endl;
// }
// cout << "select CPU" << endl;
// for (int i = 0; i < selectCPU.size(); i++) {
// cout << selectCPU[i]->column_name << endl;
// }
// cout << "join GPU" << endl;
// for (int i = 0; i < joinGPU.size(); i++) {
// cout << joinGPU[i]->column_name << endl;
// }
// cout << "join CPU" << endl;
// for (int i = 0; i < joinCPU.size(); i++) {
// cout << joinCPU[i]->column_name << endl;
// }
// cout << "group GPU" << endl;
// for (int i = 0; i < groupGPU.size(); i++) {
// cout << groupGPU[i]->column_name << endl;
// }
// cout << "group CPU" << endl;
// for (int i = 0; i < groupCPU.size(); i++) {
// cout << groupCPU[i]->column_name << endl;
// }
// cout << "sg: " << sg << " playing with " << cur_op->columns[0]->column_name << ": ";
// printf("%.4f\n", cost - default_cost);
// cout << cost << endl;
// chrono::high_resolution_clock::time_point cur_time = chrono::high_resolution_clock::now();
// chrono::duration<double> timestamp = cur_time - qo->cgp->begin_time;
// double time_count = timestamp.count();
// cout << time_count << endl;
// if (table_id == 0 && (selectCPU.size() + selectGPU.size()) > 0) cost = default_cost;
if (cur_op->device == CPU) {
cur_op->device = GPU;
for (int col = 0; col < cur_op->columns.size(); col++) {
ColumnInfo* column = cur_op->columns[col];
for (int seg = 0; seg < qo->segment_group_count[table_id][sg]; seg++) {
int seg_id = qo->segment_group[table_id][sg * total_segment + seg];
Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id];
qo->cm->updateSegmentWeightCostDirect(column, segment, (cost - default_cost) / qo->segment_group_count[table_id][sg] / cur_op->columns.size());
}
}
for (int col = 0; col < cur_op->supporting_columns.size(); col++) {
ColumnInfo* column = cur_op->supporting_columns[col];
for (int seg_id = 0; seg_id < column->total_segment; seg_id++) {
Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id];
qo->cm->updateSegmentWeightCostDirect(column, segment, (cost - default_cost) / column->total_segment);
}
}
} else if (cur_op->device == GPU) {
cur_op->device = CPU;
for (int col = 0; col < cur_op->columns.size(); col++) {
ColumnInfo* column = cur_op->columns[col];
for (int seg = 0; seg < qo->segment_group_count[table_id][sg]; seg++) {
int seg_id = qo->segment_group[table_id][sg * total_segment + seg];
Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id];
qo->cm->updateSegmentWeightCostDirect(column, segment, (default_cost - cost) / qo->segment_group_count[table_id][sg]/ cur_op->columns.size());
}
}
for (int col = 0; col < cur_op->supporting_columns.size(); col++) {
ColumnInfo* column = cur_op->supporting_columns[col];
for (int seg_id = 0; seg_id < column->total_segment; seg_id++) {
Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id];
qo->cm->updateSegmentWeightCostDirect(column, segment, (default_cost - cost) / column->total_segment);
}
}
}
clear();
}
}
void
CostModel::permute_costHE() {
for (int i = 0; i < opPipeline.size(); i++) {
Operator* op = opPipeline[i];
if (op->device == GPU) {
if (op->type == Probe) joinGPU.push_back(op->columns[0]);
else if (op->type == Filter) selectGPU.push_back(op->columns[0]);
else if (op->type == GroupBy || op->type == Aggr) {
for (int k = 0; k < op->columns.size(); k++)
groupGPU.push_back(op->columns[k]);
} else if (op->type == Build) buildGPU.push_back(op->columns[0]);
} else {
if (op->type == Probe) joinCPU.push_back(op->columns[0]);
else if (op->type == Filter) selectCPU.push_back(op->columns[0]);
else if (op->type == GroupBy || op->type == Aggr) {
for (int k = 0; k < op->columns.size(); k++)
groupCPU.push_back(op->columns[k]);
} else if (op->type == Build) buildCPU.push_back(op->columns[0]);
}
}
double default_cost = calculate_cost();
clear();
for (int i = 0; i < opPipeline.size(); i++) {
double cost = 0;
Operator* cur_op = opPipeline[i];
if (cur_op->device == CPU) cur_op->device = GPU;
else if (cur_op->device == GPU) cur_op->device = CPU;
for (int j = 0; j < opPipeline.size(); j++) {
Operator* op = opPipeline[j];
if (op->device == GPU) {
if (op->type == Probe) joinGPU.push_back(op->columns[0]);
else if (op->type == Filter) selectGPU.push_back(op->columns[0]);
else if (op->type == GroupBy || op->type == Aggr) {
for (int k = 0; k < op->columns.size(); k++)
groupGPU.push_back(op->columns[k]);
} else if (op->type == Build) buildGPU.push_back(op->columns[0]);
} else {
if (op->type == Probe) joinCPU.push_back(op->columns[0]);
else if (op->type == Filter) selectCPU.push_back(op->columns[0]);
else if (op->type == GroupBy || op->type == Aggr) {
for (int k = 0; k < op->columns.size(); k++)
groupCPU.push_back(op->columns[k]);
} else if (op->type == Build) buildCPU.push_back(op->columns[0]);
}
}
cost = calculate_cost();
if (cur_op->device == CPU) {
cur_op->device = GPU;
for (int col = 0; col < cur_op->columns.size(); col++) {
ColumnInfo* column = cur_op->columns[col];
for (int seg = 0; seg < qo->segment_group_count[table_id][sg]; seg++) {
int seg_id = sg;
Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id];
qo->cm->updateSegmentWeightCostDirect(column, segment, (cost - default_cost) / qo->segment_group_count[table_id][sg] / cur_op->columns.size());
}
}
for (int col = 0; col < cur_op->supporting_columns.size(); col++) {
ColumnInfo* column = cur_op->supporting_columns[col];
for (int seg_id = 0; seg_id < column->total_segment; seg_id++) {
Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id];
qo->cm->updateSegmentWeightCostDirect(column, segment, (cost - default_cost) / column->total_segment);
}
}
} else if (cur_op->device == GPU) {
cur_op->device = CPU;
for (int col = 0; col < cur_op->columns.size(); col++) {
ColumnInfo* column = cur_op->columns[col];
for (int seg = 0; seg < qo->segment_group_count[table_id][sg]; seg++) {
int seg_id = sg;
Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id];
qo->cm->updateSegmentWeightCostDirect(column, segment, (default_cost - cost) / qo->segment_group_count[table_id][sg]/ cur_op->columns.size());
}
}
for (int col = 0; col < cur_op->supporting_columns.size(); col++) {
ColumnInfo* column = cur_op->supporting_columns[col];
for (int seg_id = 0; seg_id < column->total_segment; seg_id++) {
Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id];
qo->cm->updateSegmentWeightCostDirect(column, segment, (default_cost - cost) / column->total_segment);
}
}
}
clear();
}
}
double
CostModel::calculate_cost() {
double cost = 0;
L = (double) ori_L;
bool fromGPU = false;
if (selectGPU.size() > 0 || joinGPU.size() > 0) {
for (int i = 0; i < selectGPU.size(); ++i) {
ColumnInfo* col = selectGPU[i];
L *= qo->params->real_selectivity[col];
}
for (int i = 0; i < joinGPU.size(); ++i) {
ColumnInfo* col = joinGPU[i];
L *= qo->params->real_selectivity[col];
}
if (selectCPU.size() > 0 || joinCPU.size() > 0 || groupCPU.size() > 0 || buildCPU.size() > 0) {
// if (selectGPU.size() == 0) cost += transfer_cost(joinGPU.size() + 1);
// else cost += transfer_cost(1);
cost += transfer_cost(joinGPU.size() + 1);
fromGPU = true;
}
}
// cout << "1 " << cost << endl;
for (int i = 0; i < selectCPU.size(); i++) {
ColumnInfo* col = selectCPU[i];
if (fromGPU) {
cost += filter_cost(qo->params->real_selectivity[col], 1, 0);
fromGPU = false;
} else cost += filter_cost(qo->params->real_selectivity[col], 0, 0);
}
// cout << "2 " << cost << endl;
for (int i = 0; i < joinCPU.size(); i++) {
ColumnInfo* col = joinCPU[i];
if (fromGPU) {
cost += probe_cost(qo->params->real_selectivity[col], 1, 0);
fromGPU = false;
} else cost += probe_cost(qo->params->real_selectivity[col], 0, 0);
}
// cout << "3 " << cost << endl;
if (groupCPU.size() > 0) {
if (fromGPU){
cost += group_cost(1);
fromGPU = false;
} else cost += group_cost(0);
}
// cout << "4 " << cost << endl;
//TODO: only works for SSB
if (groupGPU.size() > 0 && (selectCPU.size() > 0 || joinCPU.size() > 0)) {
// cost += transfer_cost(joinCPU.size() + joinGPU.size() + 1);
cost += group_cost(0);
} else if (groupGPU.size() > 0 && (selectGPU.size() > 0 || joinGPU.size() > 0)) {
cost = 0;
}
// cout << "5 " << cost << endl;
//TODO: only works for SSB
if (buildGPU.size() > 0 && (selectCPU.size() > 0 || joinCPU.size() > 0)) {
cost += transfer_cost(joinCPU.size() + joinGPU.size() + 1);
} else if (buildGPU.size() > 0 && (selectGPU.size() > 0 || joinGPU.size() > 0)) {
cost = 0;
}
if (buildCPU.size() > 0) {
if (fromGPU) {
cost += build_cost(1);
fromGPU = false;
} else cost += build_cost(0);
}
return cost;
}
double
CostModel::probe_cost(double selectivity, bool mat_start, bool mat_end) {
double cost = 0;
double scan_time = 0, probe_time = 0, write_time = 0;
if (mat_start) scan_time = L * 4/BW_CPU + L * CACHE_LINE/BW_CPU;
else scan_time = L * 4/BW_CPU;
probe_time = L * CACHE_LINE/BW_CPU;
if (mat_end) write_time = L * 4 * selectivity * 2/BW_CPU;
else write_time = 0;
L *= selectivity;
cost = scan_time + probe_time + write_time;
return cost;
}
double
CostModel::transfer_cost(int M) {
double transfer_time = L * 4 * M/BW_PCI;
return transfer_time;
}
double
CostModel::filter_cost(double selectivity, bool mat_start, bool mat_end) {
double cost = 0;
double scan_time = 0, write_time = 0;
if (mat_start) scan_time = L * 4/BW_CPU + L * CACHE_LINE/BW_CPU;
else scan_time = L * 4/BW_CPU;
if (mat_end) write_time = L * 4 * selectivity/BW_CPU;
else write_time = 0;
L *= selectivity;
cost = scan_time + write_time;
return cost;
}
double
CostModel::group_cost(bool mat_start) {
double cost = 0;
double scan_time = 0, group_time = 0;
if (mat_start) scan_time = L * 4 /BW_CPU + L * CACHE_LINE * (n_aggr_key)/BW_CPU; //the cost to random read group key has not been included
else scan_time = L * CACHE_LINE * n_aggr_key/BW_CPU;
group_time = L * CACHE_LINE/BW_CPU;
cost = scan_time + group_time;
return cost;
}
double
CostModel::build_cost(bool mat_start) {
double cost = 0;
double scan_time = 0, build_time = 0;
if (mat_start) scan_time = L * 4/BW_CPU + L * CACHE_LINE/BW_CPU;
else scan_time = L * 4/BW_CPU;
build_time = L * CACHE_LINE/BW_CPU;
cost = scan_time + build_time;
return cost;
}