#include "common.h" #include "codecfactory.h" #include "caltime.h" #include "lr.h" #include "poly_fix_integer_template.h" #include "piecewise_fix_integer_template.h" #include "piecewise_cost_merge_integer_template_double.h" #include "FOR_integer_template.h" #include "delta_integer_template.h" #include "delta_cost_integer_template.h" #include "delta_cost_merge_integer_template.h" #include "piecewise_cost_merge_integer_template_test.h" typedef uint64_t leco_type; int random(int m) { return rand() % m; } template static std::vector load_data_binary(const std::string& filename, bool print = true) { std::vector data; std::ifstream in(filename, std::ios::binary); if (!in.is_open()) { std::cerr << "unable to open " << filename << std::endl; exit(EXIT_FAILURE); } // Read size. uint64_t size; in.read(reinterpret_cast(&size), sizeof(uint64_t)); data.resize(size); // Read values. in.read(reinterpret_cast(data.data()), size * sizeof(T)); in.close(); return data; } template static std::vector load_data(const std::string& filename) { std::vector data; std::ifstream srcFile(filename, std::ios::in); if (!srcFile) { std::cout << "error opening source file." << std::endl; return data; } while (srcFile.good()) { T next; srcFile >> next; if (!srcFile.good()) { break; } data.emplace_back(next); } srcFile.close(); return data; } double linearity(std::vector data, int64_t max_val, int64_t min_val){ lr_int_T mylr; mylr.caltheta(data.data(), data.size()); int64_t data_range = max_val - min_val; if (data_range == 0){ return 0; } double metric = 0; for(int i = 0; i < data.size(); i++){ double tmp_val =data[i] - (mylr.theta0 + mylr.theta1 * (double)i); metric += (double)abs(tmp_val) / (double)data_range; } return 1 - metric / (double)data.size(); } double var(std::vector data, int64_t max_val, int64_t min_val, int64_t sum_val){ double mid = (double)sum_val / (double)data.size(); double metric = 0; int64_t data_range = max_val - min_val; if (data_range == 0){ return 1; } for(int i = 0; i < data.size(); i++){ double tmp_val = ((double)data[i] - mid); // std::cout< data; if(binary){ data = load_data_binary(source_file); // data = load_data_binary("../data/" + source_file); } else{ data = load_data(source_file); // data = load_data("../data/" + source_file); } int N = data.size(); int block_size = data.size() / blocks; blocks = data.size() / block_size; if (blocks * block_size < N) { blocks++; } // handle with the last block, maybe < block_size // convert block_size to const // const int contsblock_size = const_cast(block_size); Leco_int_poly codec_degree2; codec_degree2.init(blocks, block_size); Leco_int codec; codec.init(blocks, block_size); std::vector block_start_vec; int poly_degree_count[3] = {0, 0, 0}; uint64_t totalsize = 0; for (int i = 0; i < blocks; i++) { // std::cout<<"block "< delta_seq; int64_t max_val = INT64_MIN; int64_t min_val = INT64_MAX; int64_t sum_val = 0; for(int j=0; j< block_length-1;j++){ int64_t tmp_delta = data[i * block_size + j+1] - data[i * block_size + j]; delta_seq.push_back(tmp_delta); if(tmp_delta > max_val){ max_val = tmp_delta; } if(tmp_delta < min_val){ min_val = tmp_delta; } sum_val += tmp_delta; } int poly_degree = 1; double linearity_metric = linearity(delta_seq, max_val, min_val); double var_metric = var(delta_seq, max_val, min_val, sum_val); // std::cout< threshold){ poly_degree = 2; } uint8_t* descriptor = (uint8_t*)malloc(block_length * sizeof(leco_type) * 4); uint8_t* res = descriptor; uint32_t segment_size = 0; if(poly_degree == 1){ res = codec.encodeArray8_int(data.data() + (i * block_size), block_length, descriptor, i); } else{ res = codec_degree2.encodeArray8_int(data.data() + (i * block_size), block_length, descriptor, i); } // std::cout< recover(data.size()); // double totaltime = 0.0; // // std::cout << "decompress all!" << std::endl; // int repeat = 10; // double start = getNow(); // for (int iter = 0; iter < repeat; iter++) // { // for (int i = 0; i < blocks; i++) // { // int block_length = block_size; // if (i == blocks - 1) // { // block_length = N - (blocks - 1) * block_size; // } // codec.decodeArray8(block_start_vec[i], block_length, recover.data() + i * block_size, i); // } // for (auto index : codec.mul_add_diff_set) // { // recover[index.first] += index.second; // } // #ifndef NDEBUG // for (int j = 0; j < N; j++) // { // if (data[j ] != recover[j ]) // { // std::cout<< " num: " << j << " true is: " << data[j] << " predict is: " << recover[j] << std::endl; // std::cout << "something wrong! decompress all failed" << std::endl; // flag = false; // break; // } // } // if (!flag) // { // break; // } // #endif // } // double end = getNow(); // totaltime += (end - start); // double da_ns = totaltime / (N*repeat) * 1000000000; // // std::cout << "random access decompress!" << std::endl; // std::vector ra_pos; // repeat = 1; // for (int i = 0;i < N * repeat;i++) { // ra_pos.push_back(random(N)); // } // flag = true; // double randomaccesstime = 0.0; // start = getNow(); // leco_type mark = 0; // for (auto index : ra_pos) // { // leco_type tmpvalue = codec.randomdecodeArray8(block_start_vec[(int)index / block_size], index % block_size, NULL, N); // mark += tmpvalue; // #ifndef NDEBUG // if (data[index] != tmpvalue) // { // std::cout << "num: " << index << "true is: " << data[index] << " predict is: " << tmpvalue << std::endl; // flag = false; // std::cout << "something wrong! random access failed" << std::endl; // } // if (!flag) // { // break; // } // #endif // } // end = getNow(); // randomaccesstime += (end - start); // std::ofstream outfile("fix_log", std::ios::app); // outfile<