#ifndef _QUERY_OPTIMIZER_H_ #define _QUERY_OPTIMIZER_H_ #pragma once #include "CacheManager.h" #include "KernelArgs.h" #include "common.h" #define NUM_QUERIES 13 // #define MAX_GROUPS 128 #define MAX_GROUPS 229 class CPUGPUProcessing; enum OperatorType { Filter, Probe, Build, GroupBy, Aggr, CPUtoGPU, GPUtoCPU, Materialize, Merge }; enum DeviceType { CPU, GPU }; class Normal { default_random_engine generator; normal_distribution<double> distribution; int min; int max; int n; double mean; double stddev; public: pair<int, int> year; pair<int, int> yearmonth; pair<int, int> date; Normal(double mean, double stddev, int min, int max): mean(mean), stddev(stddev), min(min), max(max), distribution(mean, stddev) { n = max-min+1; } int norm() { while (true) { double number = this->distribution(generator); if (number >= this->min && number <= this->max) { number = round(number); return (int) number; } } } void reset(double mean, double stddev) { distribution.reset(); normal_distribution<double> d(mean, stddev); distribution.param(d.param()); } void generateNorm() { int norm_rv_start, norm_rv_end; // Zipf random variable norm_rv_start = norm(); norm_rv_end = norm(); // cout << norm_rv_start << " " << norm_rv_end << endl; if (n <= 7) { //possibility #1 (year predicate) if (norm_rv_start > norm_rv_end) { swap(norm_rv_start, norm_rv_end); } // int x = rand() % 2; // if (x == 0) norm_rv_end = norm_rv_start; // else if (x == 1) { // if (norm_rv_start == min) { // norm_rv_end = norm_rv_start + 1; // } else if (norm_rv_start == max) { // norm_rv_end = norm_rv_start; // norm_rv_start = norm_rv_start - 1; // } else { // int y = rand() % 2; // if (y == 0) { // norm_rv_end = norm_rv_start; // norm_rv_start = norm_rv_start - 1; // } else { // norm_rv_end = norm_rv_start + 1; // } // } // } else if (x == 2) { // if (norm_rv_start == min) { // // norm_rv_start = norm_rv_start; // norm_rv_end = norm_rv_start; // } else if (norm_rv_start == max) { // norm_rv_end = norm_rv_start; // // norm_rv_start = norm_rv_start - 1; // } else { // norm_rv_end = norm_rv_start + 1; // norm_rv_start = norm_rv_start - 1; // } // } year = make_pair(1992 + norm_rv_start, 1992 + norm_rv_end); yearmonth = make_pair(year.first * 100 + 1, year.second * 100 + 12); date = make_pair(yearmonth.first * 100 + 1, yearmonth.second * 100 + 30); } else if (n <= 79) { //possibility #2 (yearmonthnum predicate) if (norm_rv_start > norm_rv_end) { swap(norm_rv_start, norm_rv_end); } int temp_start = norm_rv_start / 12; // int temp_end = norm_rv_end / 12; // year = make_pair(1992 + temp_start, 1992 + temp_end); year = make_pair(1992 + temp_start, 1992 + temp_start); temp_start = norm_rv_start % 12; // temp_end = norm_rv_end % 12; // yearmonth = make_pair(year.first * 100 + temp_start + 1, year.second * 100 + temp_end + 1); yearmonth = make_pair(year.first * 100 + temp_start + 1, year.second * 100 + temp_start + 1); // date = make_pair(yearmonth.first * 100 + 1, yearmonth.second * 100 + 30); date = make_pair(yearmonth.first * 100 + 1, yearmonth.second * 100 + 30); } else if (n <= 316) { //possibility #3 (week predicate) if (norm_rv_start > norm_rv_end) { swap(norm_rv_start, norm_rv_end); } int temp_start = norm_rv_start / 48; // int temp_end = norm_rv_end / 48; // year = make_pair(1992 + temp_start, 1992 + temp_end); year = make_pair(1992 + temp_start, 1992 + temp_start); temp_start = (norm_rv_start % 48)/ 4; // temp_end = (norm_rv_end % 48) / 4; // yearmonth = make_pair(year.first * 100 + temp_start + 1, year.second * 100 + temp_end + 1); yearmonth = make_pair(year.first * 100 + temp_start + 1, year.second * 100 + temp_start + 1); temp_start = (norm_rv_start % 48) % 4; // temp_end = (norm_rv_end % 48) % 4; // date = make_pair(yearmonth.first * 100 + temp_start * 7 + 1, yearmonth.second * 100 + temp_end * 7 + 7); date = make_pair(yearmonth.first * 100 + temp_start * 7 + 1, yearmonth.second * 100 + temp_start * 7 + 7); } } }; class Zipfian { public: int seed; double c; double alpha; long x; int n; int range; const long a = 16807; // Multiplier const long m = 2147483647; // Modulus const long q = 127773; // m div a const long r = 2836; // m mod a pair<int, int> year; pair<int, int> yearmonth; pair<int, int> date; Zipfian (int N, int Range, double alpha) { seed = 123; x = seed; n = N; range = Range; c = 0; for (int i=1; i<=N; i++) c = c + (1.0 / pow((double) i, alpha)); c = 1.0 / c; }; void reset(double alpha) { c = 0; for (int i=1; i<=n; i++) c = c + (1.0 / pow((double) i, alpha)); c = 1.0 / c; } int zipf() { // static int first = TRUE; // Static first time flag // static double c = 0; // Normalization constant double z; // Uniform random number (0 < z < 1) double sum_prob; // Sum of probabilities double zipf_value; // Computed exponential value to be returned // Pull a uniform random number (0 < z < 1) do { z = rand_val(); } while ((z == 0) || (z == 1)); // Map z to the value sum_prob = 0; for (int i=1; i<=n; i++) { sum_prob = sum_prob + c / pow((double) i, alpha); if (sum_prob >= z) { zipf_value = i; break; } }; // Assert that zipf_value is between 1 and N assert((zipf_value >=1) && (zipf_value <= n)); return(zipf_value-1); } double rand_val() { // static long x; // Random int value long x_div_q; // x divided by q long x_mod_q; // x modulo q long x_new; // New x value // RNG using integer arithmetic x_div_q = x / q; x_mod_q = x % q; x_new = (a * x_mod_q) - (r * x_div_q); if (x_new > 0) x = x_new; else x = x_new + m; // Return a random value between 0.0 and 1.0 return((double) x / m); }; void generateZipf() { int zipf_rv; // Zipf random variable zipf_rv = zipf(); // zipf_rv = n - zipf_rv; //n-1 to 0 (n-1 with highest probability) if (n <= 7) { //possibility #1 (year predicate) assert(range < 7); year = make_pair(1998 - zipf_rv - range, 1998 - zipf_rv); yearmonth = make_pair(year.first * 100 + 1, year.second * 100 + 12); date = make_pair(yearmonth.first * 100 + 1, yearmonth.second * 100 + 30); } else if (n <= 79) { //possibility #2 (yearmonthnum predicate) assert(range == 0); if (zipf_rv < 7) { // 0 to 6 int temp = zipf_rv / 12; //zipf_rv = 0 to 6; //min = 0; 1998 //max = 6; 1992 year = make_pair(1998 - temp, 1998 - temp); temp = zipf_rv % 12; //min = 0; 12 //max = 11; 1 yearmonth = make_pair(year.first * 100 + 7 - temp - range, year.second * 100 + 7 - temp); date = make_pair(yearmonth.first * 100 + 1, yearmonth.second * 100 + 30); } else { zipf_rv -= 7; int temp = zipf_rv / 12; //zipf_rv = 0 to 71 or 7 to 78 //min = 0; 1998 //max = 6; 1992 year = make_pair(1997 - temp, 1997 - temp); temp = zipf_rv % 12; //min = 0; 12 //max = 11; 1 yearmonth = make_pair(year.first * 100 + 12 - temp - range, year.second * 100 + 12 - temp); date = make_pair(yearmonth.first * 100 + 1, yearmonth.second * 100 + 30); } } else if (n <= 316) { //possibility #3 (week predicate) assert(range == 0); if (zipf_rv < 28) { // 0 to 27 int temp = zipf_rv / 48; //zipf_rv = 0 to 27; //min = 0; 1998 //max = 6; 1992 year = make_pair(1998 - temp, 1998 - temp); temp = (zipf_rv % 48) / 4; //min = 0; 12 //max = 11; 1 yearmonth = make_pair(year.first * 100 + 7 - temp, year.second * 100 + 7 - temp); temp = (zipf_rv % 48) % 4; //min = 0; 22 - 28 //max = 3; 1 - 7 date = make_pair(yearmonth.first * 100 + 22 - temp * 7 - range * 7, yearmonth.second * 100 + 28 - temp * 7); } else { int temp = zipf_rv / 48; //zipf_rv = 0 to 27; //min = 0; 1998 //max = 6; 1992 year = make_pair(1997 - temp, 1997 - temp); temp = (zipf_rv % 48) / 4; //min = 0; 12 //max = 11; 1 yearmonth = make_pair(year.first * 100 + 12 - temp, year.second * 100 + 12 - temp); temp = (zipf_rv % 48) % 4; //min = 0; 22 - 28 //max = 3; 1 - 7 date = make_pair(yearmonth.first * 100 + 22 - temp * 7 - range * 7, yearmonth.second * 100 + 28 - temp * 7); } } }; }; class Operator { public: DeviceType device; int table_id; OperatorType type; unsigned short sg; short* segment_group; Operator* children; Operator* parents; vector<ColumnInfo*> columns; vector<ColumnInfo*> supporting_columns; Operator(DeviceType _device, unsigned short _sg, int _table_id, OperatorType _type) { type = _type; sg = _sg; table_id = _table_id; device = _device; }; void addChild(Operator* child) { children = child; if (child != NULL) child->parents = this; }; void setDevice(DeviceType _device) { device = _device; }; }; class QueryOptimizer { public: CacheManager* cm; CPUGPUProcessing* cgp; vector<ColumnInfo*> querySelectColumn; vector<ColumnInfo*> queryBuildColumn; vector<ColumnInfo*> queryProbeColumn; vector<ColumnInfo*> queryGroupByColumn; vector<ColumnInfo*> queryAggrColumn; vector<vector<int>> index_to_sg; vector<pair<ColumnInfo*, ColumnInfo*>> join; unordered_map<ColumnInfo*, vector<ColumnInfo*>> aggregation; unordered_map<ColumnInfo*, vector<ColumnInfo*>> groupby_build; unordered_map<ColumnInfo*, vector<ColumnInfo*>> select_probe; unordered_map<ColumnInfo*, vector<ColumnInfo*>> select_build; unordered_map<ColumnInfo*, ColumnInfo*> fkey_pkey; unordered_map<ColumnInfo*, ColumnInfo*> pkey_fkey; vector<vector<vector<vector<Operator*>>>> opGPUPipeline; // for each table, for each segment group, for each pipeline, there is vector of operator vector<vector<vector<vector<Operator*>>>> opCPUPipeline; // for each table, for each segment group, for each pipeline, there is vector of operator vector<vector<Operator*>> opRoots; // for each table, for each segment group there is operator vector<vector<Operator*>> opParsed; // for each table, there is vector of operator vector<vector<ColumnInfo*>> joinGPUPipelineCol; vector<vector<ColumnInfo*>> joinCPUPipelineCol; vector<vector<ColumnInfo*>> selectGPUPipelineCol; vector<vector<ColumnInfo*>> selectCPUPipelineCol; vector<vector<ColumnInfo*>> groupbyGPUPipelineCol; vector<vector<ColumnInfo*>> groupbyCPUPipelineCol; vector<vector<ColumnInfo*>> queryColumn; bool groupGPUcheck; bool joinGPUall; bool* joinGPUcheck, *joinCPUcheck, **joinGPU, **joinCPU; short** segment_group, ***segment_group_each_gpu; short** segment_group_count, ***segment_group_each_gpu_count; short** par_segment; short* par_segment_count; int** last_segment_gpu; int* last_segment; map<int, map<ColumnInfo*, double>> speedup; double** speedup_segment; map<int, Zipfian*> zipfian; map<int, Normal*> normal; QueryParams* params; bool custom; bool skipping; int processed_segment; int skipped_segment; QueryOptimizer(size_t _cache_size, size_t _broadcast_size, size_t _processing_size, size_t _pinned_memsize, CPUGPUProcessing* _cgp); ~QueryOptimizer(); void setDistributionZipfian(double alpha, double alpha1); void setDistributionNormal(double mean, double stddev); void parseQuery(int query); void parseQuery11(); void parseQuery12(); void parseQuery13(); void parseQuery21(); void parseQuery22(); void parseQuery23(); void parseQuery31(); void parseQuery32(); void parseQuery33(); void parseQuery34(); void parseQuery41(); void parseQuery42(); void parseQuery43(); void parseQuery51(); void parseQuery52(); void parseQuery53(); void parseQuery54(); void prepareQuery(int query, Distribution dist = None); void prepareQuery2(int query, Distribution dist = None); void clearParsing(); void clearPlacement(); void clearPrepare(); void prepareOperatorPlacement(Distribution dist = None); void groupBitmapSegmentTable(int table_id, int query, bool broadcast, bool isprofile = 0); bool checkPredicate(int table_id, int segment_idx); void updateSegmentStats(int table_id, int segment_idx, int query); }; #endif