Learn-to-Compress / src / unit.cpp
unit.cpp
Raw
#include <memory>
#include <iomanip>
#include <time.h>
#include "codecfactory.h"
#include "util.h"
#include "ztimer.h"
#include "bitpacking.h"
#include "synthetic.h"
#include "cpubenchmark.h"

using namespace std;
using namespace FastPForLib;

int main() {
  vector<shared_ptr<IntegerCODEC>> myalgos = CODECFactory::allSchemes();
  for (uint32_t b = 0; b <= 28; ++b) {

    cout << "testing... b = " << b << endl;

    for (size_t length = 128; length < (1U << 12); length += 128) {

      vector<uint32_t, cacheallocator> data(length);
      data.reserve(length + 1024);
      for (size_t i = 0; i < data.size(); ++i) {
        data[i] = (static_cast<uint32_t>(i) +
                   (24 - static_cast<uint32_t>(i)) *
                       (12 - static_cast<uint32_t>(i))) %
                  (1U << b);
      }
      for (vector<shared_ptr<IntegerCODEC>>::iterator i = myalgos.begin();
           i != myalgos.end(); ++i) {
        IntegerCODEC &c = **i;
        vector<uint32_t, cacheallocator> out(2 * data.size() + 1024);
        if (c.name() !=
            "VSEncoding") // VSEncoding is fragile, output must be zero
          for (uint32_t k = 0; k < out.size(); ++k)
            out[k] = rand();
        size_t nvalue = out.size();
        // next magic is to satisfy cachepacking aligment requirements; should
        // not hurt others
        uint32_t *aligned_out = &out[0];
        assert(!needPaddingTo64bytes(aligned_out));
        c.encodeArray(&data[0], data.size(), aligned_out, nvalue);
        out.resize(nvalue);
        vector<uint32_t, cacheallocator> recover(data.size() + 1024);

        for (uint32_t k = 0; k < recover.size(); ++k)
          recover[k] = rand();
        size_t recoveredsize = recover.size();
        c.decodeArray(aligned_out, out.size(), &recover[0], recoveredsize);
        recover.resize(recoveredsize);
        if (recover != data) {
          if (recoveredsize != data.size()) {
            cerr << "lengths don't match!" << endl;
            cerr << recoveredsize << endl;
            cerr << data.size() << endl;
          }
          uint32_t howmany = 0;
          for (size_t k = 0; k < data.size(); ++k)
            if (recover[k] != data[k]) {
              cout << "recovered[" << k << "] = " << recover[k] << endl;
              cout << "data[" << k << "] = " << data[k] << endl;
              howmany++;
              if (howmany > 15)
                break;
            }
          cerr << c.name() << endl;
          cerr << length << endl;
          throw logic_error("we have a bug");
        }
      }
    }

    for (size_t length = 0; length < (1U << 8); ++length) {
      vector<uint32_t, cacheallocator> data(length);
      data.reserve(length + 1024);
      for (size_t i = 0; i < data.size(); ++i) {
        data[i] =
            (33231 - static_cast<uint32_t>(i) + static_cast<uint32_t>(i * i)) %
            (1U << b);
      }
      for (vector<shared_ptr<IntegerCODEC>>::iterator i = myalgos.begin();
           i != myalgos.end(); ++i) {
        IntegerCODEC &c = **i;
        vector<uint32_t, cacheallocator> out(2 * data.size() + 1024);
        if (c.name() !=
            "VSEncoding") // VSEncoding is fragile, output must be zero
          for (uint32_t k = 0; k < out.size(); ++k)
            out[k] = rand();
        size_t nvalue = out.size();
        uint32_t *aligned_out = &out[0];
        assert(!needPaddingTo64bytes(aligned_out));
        c.encodeArray(data.data(), data.size(), aligned_out, nvalue);
        out.resize(nvalue);
        vector<uint32_t, cacheallocator> recover(data.size() + 1024);
        for (uint32_t k = 0; k < recover.size(); ++k)
          recover[k] = rand();
        size_t recoveredsize = recover.size();
        c.decodeArray(aligned_out, out.size(), &recover[0], recoveredsize);
        recover.resize(recoveredsize);
        if (recoveredsize != data.size()) {
          cerr << "lengths don't match!" << endl;
          cerr << recoveredsize << endl;
          cerr << data.size() << endl;
          throw logic_error("we have a bug");
        }
        if (!equal(recover.begin(), recover.end(), data.begin())) {

          uint32_t howmany = 0;
          for (size_t k = 0; k < data.size(); ++k)
            if (recover[k] != data[k]) {
              cout << "recovered[" << k << "] = " << recover[k] << endl;
              cout << "data[" << k << "] = " << data[k] << endl;
              howmany++;
              if (howmany > 15)
                break;
            }
          cerr << c.name() << endl;
          cerr << length << endl;
          throw logic_error("we have a bug");
        }
      }
    }
  }
  cout << " testing large zipfian arrays..." << endl;
  for (size_t lengthb = 5; lengthb <= 25; lengthb += 5) {
    uint32_t length = 1U << lengthb;
    cout << endl << "length = " << length << endl;
    vector<uint32_t, cacheallocator> data =
        generateZipfianArray32(length, 2, (1U << 9) - 1);
    data.reserve(data.size() + 1024);
    for (vector<shared_ptr<IntegerCODEC>>::iterator i = myalgos.begin();
         i != myalgos.end(); ++i) {
      IntegerCODEC &c = **i;
      cout << c.name();
      cout.flush();
      vector<uint32_t, cacheallocator> out(2 * data.size() + 1024);
      if (c.name() !=
          "VSEncoding") // VSEncoding is fragile, output must be zero
        for (uint32_t k = 0; k < out.size(); ++k)
          out[k] = rand();
      size_t nvalue = out.size();
      cout << " encoding ...";
      cout.flush();
      uint32_t *aligned_out = &out[0];
      assert(!needPaddingTo64bytes(aligned_out));
      c.encodeArray(&data[0], data.size(), aligned_out, nvalue);
      out.resize(nvalue);
      assert(out.size() == nvalue); // paranoid
      vector<uint32_t, cacheallocator> recover(data.size() + 1024);
      assert(recover.size() == data.size() + 1024); // being paranoid

      for (uint32_t k = 0; k < recover.size(); ++k)
        recover[k] = rand();
      size_t recoveredsize = recover.size();
      cout << " decoding ...";
      cout.flush();
      c.decodeArray(aligned_out, out.size(), &recover[0], recoveredsize);
      recover.resize(recoveredsize);
      if (recoveredsize != data.size()) {
        cerr << "lengths don't match!" << endl;
        cerr << recoveredsize << endl;
        cerr << data.size() << endl;
        throw logic_error("we have a bug");
      }
      if (!equal(recover.begin(), recover.end(), data.begin())) {
        uint32_t howmany = 0;
        for (size_t k = 0; k < data.size(); ++k)
          if (recover[k] != data[k]) {
            cout << "recovered[" << k << "] = " << recover[k] << endl;
            cout << "data[" << k << "] = " << data[k] << endl;
            howmany++;
            if (howmany > 15)
              break;
          }
        cerr << c.name() << endl;
        cerr << length << endl;
        throw logic_error("we have a bug");
      }
      cout << " ok!" << endl;
    }
  }

  cout << endl << endl << "testing...ok. Your code is good." << endl;
}