#include "QueryOptimizer.h" #include "CostModel.h" #include "MultiGPUProcessing.h" CostModel::CostModel(int _L, int _total_segment, int _n_group_key, int _n_aggr_key, int _sg, int _table_id, QueryOptimizer* _qo) { L = (double) _L; ori_L = (double) _L; n_group_key = _n_group_key; n_aggr_key = _n_aggr_key; sg = _sg; table_id = _table_id; qo = _qo; total_segment = _total_segment; Operator* op = qo->opRoots[table_id][sg]; // cout << op->type << endl; opPipeline.push_back(op); op = op->children; while (op != NULL) { // cout << op->type << endl; if (op->type != Materialize && op->type != GPUtoCPU && op->type != CPUtoGPU && op->type != Merge) { opPipeline.push_back(op); } op = op->children; } }; void CostModel::clear() { selectCPU.clear(); joinCPU.clear(); groupCPU.clear(); buildCPU.clear(); selectGPU.clear(); joinGPU.clear(); groupGPU.clear(); buildGPU.clear(); } void CostModel::permute_cost() { for (int i = 0; i < opPipeline.size(); i++) { Operator* op = opPipeline[i]; if (op->device == GPU) { if (op->type == Probe) joinGPU.push_back(op->columns[0]); else if (op->type == Filter) selectGPU.push_back(op->columns[0]); else if (op->type == GroupBy || op->type == Aggr) { for (int k = 0; k < op->columns.size(); k++) groupGPU.push_back(op->columns[k]); } else if (op->type == Build) buildGPU.push_back(op->columns[0]); } else { if (op->type == Probe) joinCPU.push_back(op->columns[0]); else if (op->type == Filter) selectCPU.push_back(op->columns[0]); else if (op->type == GroupBy || op->type == Aggr) { for (int k = 0; k < op->columns.size(); k++) groupCPU.push_back(op->columns[k]); } else if (op->type == Build) buildCPU.push_back(op->columns[0]); } } double default_cost = calculate_cost(); // cout << "sg: " << sg << " default cost: ";; // printf("%.4f\n", default_cost); clear(); // for (int seg = 0; seg < qo->segment_group_count[table_id][sg]; seg++) { // int seg_id = qo->segment_group[table_id][sg * total_segment + seg]; // cout << seg_id << endl; // } for (int i = 0; i < opPipeline.size(); i++) { double cost = 0; Operator* cur_op = opPipeline[i]; if (cur_op->device == CPU) cur_op->device = GPU; else if (cur_op->device == GPU) cur_op->device = CPU; for (int j = 0; j < opPipeline.size(); j++) { Operator* op = opPipeline[j]; if (op->device == GPU) { if (op->type == Probe) joinGPU.push_back(op->columns[0]); else if (op->type == Filter) selectGPU.push_back(op->columns[0]); else if (op->type == GroupBy || op->type == Aggr) { for (int k = 0; k < op->columns.size(); k++) groupGPU.push_back(op->columns[k]); } else if (op->type == Build) buildGPU.push_back(op->columns[0]); } else { if (op->type == Probe) joinCPU.push_back(op->columns[0]); else if (op->type == Filter) selectCPU.push_back(op->columns[0]); else if (op->type == GroupBy || op->type == Aggr) { for (int k = 0; k < op->columns.size(); k++) groupCPU.push_back(op->columns[k]); } else if (op->type == Build) buildCPU.push_back(op->columns[0]); } } cost = calculate_cost(); // cout << "select GPU" << endl; // for (int i = 0; i < selectGPU.size(); i++) { // cout << selectGPU[i]->column_name << endl; // } // cout << "select CPU" << endl; // for (int i = 0; i < selectCPU.size(); i++) { // cout << selectCPU[i]->column_name << endl; // } // cout << "join GPU" << endl; // for (int i = 0; i < joinGPU.size(); i++) { // cout << joinGPU[i]->column_name << endl; // } // cout << "join CPU" << endl; // for (int i = 0; i < joinCPU.size(); i++) { // cout << joinCPU[i]->column_name << endl; // } // cout << "group GPU" << endl; // for (int i = 0; i < groupGPU.size(); i++) { // cout << groupGPU[i]->column_name << endl; // } // cout << "group CPU" << endl; // for (int i = 0; i < groupCPU.size(); i++) { // cout << groupCPU[i]->column_name << endl; // } // cout << "sg: " << sg << " playing with " << cur_op->columns[0]->column_name << ": "; // printf("%.4f\n", cost - default_cost); // cout << cost << endl; // chrono::high_resolution_clock::time_point cur_time = chrono::high_resolution_clock::now(); // chrono::duration<double> timestamp = cur_time - qo->cgp->begin_time; // double time_count = timestamp.count(); // cout << time_count << endl; // if (table_id == 0 && (selectCPU.size() + selectGPU.size()) > 0) cost = default_cost; if (cur_op->device == CPU) { cur_op->device = GPU; for (int col = 0; col < cur_op->columns.size(); col++) { ColumnInfo* column = cur_op->columns[col]; for (int seg = 0; seg < qo->segment_group_count[table_id][sg]; seg++) { int seg_id = qo->segment_group[table_id][sg * total_segment + seg]; Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id]; qo->cm->updateSegmentWeightCostDirect(column, segment, (cost - default_cost) / qo->segment_group_count[table_id][sg] / cur_op->columns.size()); } } for (int col = 0; col < cur_op->supporting_columns.size(); col++) { ColumnInfo* column = cur_op->supporting_columns[col]; for (int seg_id = 0; seg_id < column->total_segment; seg_id++) { Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id]; qo->cm->updateSegmentWeightCostDirect(column, segment, (cost - default_cost) / column->total_segment); } } } else if (cur_op->device == GPU) { cur_op->device = CPU; for (int col = 0; col < cur_op->columns.size(); col++) { ColumnInfo* column = cur_op->columns[col]; for (int seg = 0; seg < qo->segment_group_count[table_id][sg]; seg++) { int seg_id = qo->segment_group[table_id][sg * total_segment + seg]; Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id]; qo->cm->updateSegmentWeightCostDirect(column, segment, (default_cost - cost) / qo->segment_group_count[table_id][sg]/ cur_op->columns.size()); } } for (int col = 0; col < cur_op->supporting_columns.size(); col++) { ColumnInfo* column = cur_op->supporting_columns[col]; for (int seg_id = 0; seg_id < column->total_segment; seg_id++) { Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id]; qo->cm->updateSegmentWeightCostDirect(column, segment, (default_cost - cost) / column->total_segment); } } } clear(); } } void CostModel::permute_costHE() { for (int i = 0; i < opPipeline.size(); i++) { Operator* op = opPipeline[i]; if (op->device == GPU) { if (op->type == Probe) joinGPU.push_back(op->columns[0]); else if (op->type == Filter) selectGPU.push_back(op->columns[0]); else if (op->type == GroupBy || op->type == Aggr) { for (int k = 0; k < op->columns.size(); k++) groupGPU.push_back(op->columns[k]); } else if (op->type == Build) buildGPU.push_back(op->columns[0]); } else { if (op->type == Probe) joinCPU.push_back(op->columns[0]); else if (op->type == Filter) selectCPU.push_back(op->columns[0]); else if (op->type == GroupBy || op->type == Aggr) { for (int k = 0; k < op->columns.size(); k++) groupCPU.push_back(op->columns[k]); } else if (op->type == Build) buildCPU.push_back(op->columns[0]); } } double default_cost = calculate_cost(); clear(); for (int i = 0; i < opPipeline.size(); i++) { double cost = 0; Operator* cur_op = opPipeline[i]; if (cur_op->device == CPU) cur_op->device = GPU; else if (cur_op->device == GPU) cur_op->device = CPU; for (int j = 0; j < opPipeline.size(); j++) { Operator* op = opPipeline[j]; if (op->device == GPU) { if (op->type == Probe) joinGPU.push_back(op->columns[0]); else if (op->type == Filter) selectGPU.push_back(op->columns[0]); else if (op->type == GroupBy || op->type == Aggr) { for (int k = 0; k < op->columns.size(); k++) groupGPU.push_back(op->columns[k]); } else if (op->type == Build) buildGPU.push_back(op->columns[0]); } else { if (op->type == Probe) joinCPU.push_back(op->columns[0]); else if (op->type == Filter) selectCPU.push_back(op->columns[0]); else if (op->type == GroupBy || op->type == Aggr) { for (int k = 0; k < op->columns.size(); k++) groupCPU.push_back(op->columns[k]); } else if (op->type == Build) buildCPU.push_back(op->columns[0]); } } cost = calculate_cost(); if (cur_op->device == CPU) { cur_op->device = GPU; for (int col = 0; col < cur_op->columns.size(); col++) { ColumnInfo* column = cur_op->columns[col]; for (int seg = 0; seg < qo->segment_group_count[table_id][sg]; seg++) { int seg_id = sg; Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id]; qo->cm->updateSegmentWeightCostDirect(column, segment, (cost - default_cost) / qo->segment_group_count[table_id][sg] / cur_op->columns.size()); } } for (int col = 0; col < cur_op->supporting_columns.size(); col++) { ColumnInfo* column = cur_op->supporting_columns[col]; for (int seg_id = 0; seg_id < column->total_segment; seg_id++) { Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id]; qo->cm->updateSegmentWeightCostDirect(column, segment, (cost - default_cost) / column->total_segment); } } } else if (cur_op->device == GPU) { cur_op->device = CPU; for (int col = 0; col < cur_op->columns.size(); col++) { ColumnInfo* column = cur_op->columns[col]; for (int seg = 0; seg < qo->segment_group_count[table_id][sg]; seg++) { int seg_id = sg; Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id]; qo->cm->updateSegmentWeightCostDirect(column, segment, (default_cost - cost) / qo->segment_group_count[table_id][sg]/ cur_op->columns.size()); } } for (int col = 0; col < cur_op->supporting_columns.size(); col++) { ColumnInfo* column = cur_op->supporting_columns[col]; for (int seg_id = 0; seg_id < column->total_segment; seg_id++) { Segment* segment = qo->cm->index_to_segment[column->column_id][seg_id]; qo->cm->updateSegmentWeightCostDirect(column, segment, (default_cost - cost) / column->total_segment); } } } clear(); } } double CostModel::calculate_cost() { double cost = 0; L = (double) ori_L; bool fromGPU = false; if (selectGPU.size() > 0 || joinGPU.size() > 0) { for (int i = 0; i < selectGPU.size(); ++i) { ColumnInfo* col = selectGPU[i]; L *= qo->params->real_selectivity[col]; } for (int i = 0; i < joinGPU.size(); ++i) { ColumnInfo* col = joinGPU[i]; L *= qo->params->real_selectivity[col]; } if (selectCPU.size() > 0 || joinCPU.size() > 0 || groupCPU.size() > 0 || buildCPU.size() > 0) { // if (selectGPU.size() == 0) cost += transfer_cost(joinGPU.size() + 1); // else cost += transfer_cost(1); cost += transfer_cost(joinGPU.size() + 1); fromGPU = true; } } // cout << "1 " << cost << endl; for (int i = 0; i < selectCPU.size(); i++) { ColumnInfo* col = selectCPU[i]; if (fromGPU) { cost += filter_cost(qo->params->real_selectivity[col], 1, 0); fromGPU = false; } else cost += filter_cost(qo->params->real_selectivity[col], 0, 0); } // cout << "2 " << cost << endl; for (int i = 0; i < joinCPU.size(); i++) { ColumnInfo* col = joinCPU[i]; if (fromGPU) { cost += probe_cost(qo->params->real_selectivity[col], 1, 0); fromGPU = false; } else cost += probe_cost(qo->params->real_selectivity[col], 0, 0); } // cout << "3 " << cost << endl; if (groupCPU.size() > 0) { if (fromGPU){ cost += group_cost(1); fromGPU = false; } else cost += group_cost(0); } // cout << "4 " << cost << endl; //TODO: only works for SSB if (groupGPU.size() > 0 && (selectCPU.size() > 0 || joinCPU.size() > 0)) { // cost += transfer_cost(joinCPU.size() + joinGPU.size() + 1); cost += group_cost(0); } else if (groupGPU.size() > 0 && (selectGPU.size() > 0 || joinGPU.size() > 0)) { cost = 0; } // cout << "5 " << cost << endl; //TODO: only works for SSB if (buildGPU.size() > 0 && (selectCPU.size() > 0 || joinCPU.size() > 0)) { cost += transfer_cost(joinCPU.size() + joinGPU.size() + 1); } else if (buildGPU.size() > 0 && (selectGPU.size() > 0 || joinGPU.size() > 0)) { cost = 0; } if (buildCPU.size() > 0) { if (fromGPU) { cost += build_cost(1); fromGPU = false; } else cost += build_cost(0); } return cost; } double CostModel::probe_cost(double selectivity, bool mat_start, bool mat_end) { double cost = 0; double scan_time = 0, probe_time = 0, write_time = 0; if (mat_start) scan_time = L * 4/BW_CPU + L * CACHE_LINE/BW_CPU; else scan_time = L * 4/BW_CPU; probe_time = L * CACHE_LINE/BW_CPU; if (mat_end) write_time = L * 4 * selectivity * 2/BW_CPU; else write_time = 0; L *= selectivity; cost = scan_time + probe_time + write_time; return cost; } double CostModel::transfer_cost(int M) { double transfer_time = L * 4 * M/BW_PCI; return transfer_time; } double CostModel::filter_cost(double selectivity, bool mat_start, bool mat_end) { double cost = 0; double scan_time = 0, write_time = 0; if (mat_start) scan_time = L * 4/BW_CPU + L * CACHE_LINE/BW_CPU; else scan_time = L * 4/BW_CPU; if (mat_end) write_time = L * 4 * selectivity/BW_CPU; else write_time = 0; L *= selectivity; cost = scan_time + write_time; return cost; } double CostModel::group_cost(bool mat_start) { double cost = 0; double scan_time = 0, group_time = 0; if (mat_start) scan_time = L * 4 /BW_CPU + L * CACHE_LINE * (n_aggr_key)/BW_CPU; //the cost to random read group key has not been included else scan_time = L * CACHE_LINE * n_aggr_key/BW_CPU; group_time = L * CACHE_LINE/BW_CPU; cost = scan_time + group_time; return cost; } double CostModel::build_cost(bool mat_start) { double cost = 0; double scan_time = 0, build_time = 0; if (mat_start) scan_time = L * 4/BW_CPU + L * CACHE_LINE/BW_CPU; else scan_time = L * 4/BW_CPU; build_time = L * CACHE_LINE/BW_CPU; cost = scan_time + build_time; return cost; }