// selecting codec by predict #include "../headers/common.h" #include "../headers/codecfactory.h" #include "../headers/caltime.h" #include "../headers/lr.h" #include "../headers/create_feature.h" #include "../headers/microunit.h" #include "../headers/easylogging++.h" #include "../headers/MLP.h" #include "../headers/regress_tree.h" #include "../headers/file_manage.h" #include "../headers/model_selection.h" using namespace Eigen; INITIALIZE_EASYLOGGINGPP const int input_size = 7; std::vector weights = {"../reg_model/reg_model_piecewise.txt","../reg_model/reg_model_FOR.txt","../reg_model/reg_model_rle.txt"}; int main() { using namespace Codecset; // We pick a CODEC std::vector data; std::ifstream srcFile("../data/standard/normal_200M_uint32.txt",std::ios::in); //std::ofstream outfile("out.txt", std::ios::app); if(!srcFile) { std::cout << "error opening source file." << std::endl; return 0; } while(1){ uint32_t next ; srcFile >> next; if(srcFile.eof()){break;} data.push_back(next); } srcFile.close(); int N = data.size(); if (data.size() == 0) { std::cout << "Empty vector" << std::endl; return 0; } std::cout << "vector size = " << data.size() << std::endl; std::cout << "vector size = " << data.size() * sizeof(uint32_t) / 1024.0 << "KB" << std::endl; // prepare classifier std::vector models; for (int i=0;i<(int)weights.size();i++){ std::ifstream infile(weights[i], std::ios::in); RegressionTree model; model.rebuild(infile,0); models.push_back(model); infile.close(); } int blocks =1000; int block_size = data.size()/blocks; int delta =0; std::vector codec_fac; std::vector codec_name={"piecewise_fix","FOR","rle"}; //std::vector codec_name={"piecewise_fix"}; for(int i=0;i<(int)codec_name.size();i++){ IntegerCODEC &codec = *CODECFactory::getFromName(codec_name[i]); codec.init(blocks,block_size,delta); codec_fac.push_back(&codec); } std::vector block_start_vec; std::vector method_vec; int totalsize = 0; //outfile<< "len" <<" "<<"avg"<<" "<<"min"<<" "<<"max"<<" "<<"num_distinct"<<" "<<"rl"<<" label"<encodeArray8(data.data()+(i*block_size),block_length ,descriptor,i); int tmp_size = (res-descriptor); double end2 = getNow(); totaltime_realcom +=(end2-start2); //seg.write_feature(outfile,method); method_vec.push_back(pick_method); block_start_vec.push_back(descriptor); totalsize +=tmp_size; } //outfile.close(); double end = getNow(); double totaltime = end -start; std::cout << "compress speed: " << std::setprecision(10) << data.size()/(totaltime*1000) << std::endl; std::cout << "real compress speed: " << std::setprecision(10) << data.size()/(totaltime_realcom*1000) << std::endl; /* for(int i=0;i recover(data.size()); totaltime =0.0; std::cout<<"decompress all!"<decodeArray8(block_start_vec[i], block_length, recover.data()+i*block_size, i); /* for(int j=0;j buffer(data.size()); double randomaccesstime =0.0; start = getNow(); uint32_t mark=0; for(int i=0;irandomdecodeArray8(block_start_vec[(int)i/block_size], i%block_size, buffer.data(), i/block_size); mark+=tmpvalue; /* if(data[i]!=tmpvalue){ std::cout<<"num: "<