Learn-to-Compress / experiments / binary_search.cpp
binary_search.cpp
Raw
// test binary search

#include "../headers/common.h"
#include "../headers/codecfactory.h"
#include "../headers/caltime.h"
#include "../headers/lr.h"

using namespace Codecset;

IntegerCODEC &codec = *CODECFactory::getFromName("piecewise_fix");
std::string file_name = "../data/standard/books_200M_uint32.txt";
std::vector<uint32_t> data;
std::vector<uint8_t*> block_start_vec;
int blocks =1000000;
int block_size = 0;
uint32_t* buffer = (uint32_t*)malloc(200000000 * sizeof(uint32_t));
bool Bsearch( int low, int high, uint32_t key)
{
    int mid;
    if (low > high)
    {
        return false;
    }
    mid = (low + high) / 2;
    if (data[mid] == key)
    {
         return true;
     }
     else if (data[mid] > key)
     {
        return  Bsearch(low, mid - 1, key);
     }
     else if (data[mid] < key)
     {
        return  Bsearch(mid + 1, high, key);
     }
}

bool ourBsearch( int low, int high, uint32_t key)
{
    int mid;
    if (low > high)
    {
        return false;
    }
    mid = (low + high) / 2;
    uint32_t data_mid = codec.randomdecodeArray8(block_start_vec[(int)mid/block_size], mid%block_size, buffer, mid/block_size);
    if (data_mid == key)
    {
         return true;
     }
     else if (data_mid > key)
     {
        return  ourBsearch(low, mid - 1, key);
     }
     else if (data_mid < key)
     {
        return  ourBsearch(mid + 1, high, key);
     }
}


int main() {
  

  // We pick a CODEC
  std::ifstream srcFile(file_name,std::ios::in); 
  if(!srcFile) { 
      std::cout << "error opening source file." << std::endl;
      return 0;
  }
  while(1){
      
      uint32_t next ;
      srcFile >> next;
      if(srcFile.eof()){break;}
      data.push_back(next);

  }
  srcFile.close();
  int N = data.size();
  if (data.size() == 0) {
    std::cout << "Empty vector" << std::endl;
    return 0;
  }
  std::cout << "vector size = " << data.size() << std::endl;
  std::cout << "vector size = " << data.size() * sizeof(uint32_t) / 1024.0 << "KB"
       << std::endl;
 
    
  
  int delta =32;
  block_size = data.size()/blocks;
  codec.init(blocks,block_size,delta);
  std::vector<int> start_index;
  int totalsize = 0;
  for(int i=0;i<blocks;i++){
    uint8_t * descriptor = (uint8_t*)malloc(block_size * sizeof(uint64_t));
    uint8_t * res = descriptor;
    res = codec.encodeArray8(data.data()+(i*block_size),block_size ,descriptor,i);
    descriptor = (uint8_t*)realloc(descriptor, (res-descriptor));
    block_start_vec.push_back(descriptor);
    totalsize += (res-descriptor);
    
 
  }
  
  double compressrate = (totalsize)*100.0  / (4*N*1.0);
  std::cout << "total compression rate:" << std::setprecision(4)<< compressrate << std::endl;
  int sample_size = 1000;
  double start = getNow();
  int data_range = block_size;
  for(int i=0;i<sample_size;i++){
      uint32_t tmpkey = rand();
      //std::cout<<tmpkey<<std::endl;
      ourBsearch(0,data_range,tmpkey);
      
  }
  double end = getNow();
  double ourbinarytime = end - start;
  std::cout << "binary time per time: " << std::setprecision(8)
     << ourbinarytime / sample_size * 1000000000 << "ns" << std::endl;

   for(int i=0;i<(int)block_start_vec.size();i++){
       free(block_start_vec[i]);
   }



  
}