MiniDatabase / exp3_ungrouped.cpp
exp3_ungrouped.cpp
Raw
// Alternate version of exp3 that allows you to run individual tests by input size.
// Useful if you timeout on teach.cs and need to re-run only an indivual experiment.

#include "main.h"
#include <assert.h>
#include <iostream> 
#include <filesystem>
#include <limits.h>
#include <chrono>

#define BIN_SEARCH 0
#define BTREE_SEARCH 1

#define MEMTABLE_SIZE 1
#define MAX_EXPERIMENT_SIZE 1024
#define BUFFER_SIZE 10
#define BLOOMFILTER_BITS 5
// BUFFER_SIZE and MEMTABLE_SIZE are in MB

double put_test(int data_volume_mb) {
    std::cout << "Running exp3 - put test with data volume = " << data_volume_mb << " MB \n";

    std::filesystem::remove_all("db_t");

    int mem_size_mb = MEMTABLE_SIZE;
    int mem_size = (mem_size_mb * 1024 * 1024) / (2 * sizeof(int));
    int buffer_size = BUFFER_SIZE * 256; // assuming 4KB pages
    DB* db = Open("db_t", mem_size, buffer_size, buffer_size, BTREE_SEARCH, BLOOMFILTER_BITS);
    
    int num_kv = (data_volume_mb * 1024 * 1024) / (2 * sizeof(int));

    clock_t start, end;
    std::cout << "Populating the database...\n";
    /* Recording the starting clock tick.*/
    start = clock();
    auto real_start = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < num_kv; i++){
        db->put(i,-i);
    }
    // Recording the end clock tick.
    auto real_end = std::chrono::high_resolution_clock::now();
    end = clock();
    double time_taken = double(end - start) / double(CLOCKS_PER_SEC);
    double real_time_taken = (std::chrono::duration_cast<std::chrono::microseconds>(real_end - real_start).count())/1000000.0;
    std::cout << "Put test finished. Time taken: " << real_time_taken << " seconds \n";
    std::cout << "Throughput: " << (data_volume_mb / real_time_taken) << " MB/second \n";

    db->Close();

    std::filesystem::remove_all("db_t");
    return real_time_taken;
}

double get_test(int data_volume_mb, int searchALG) {
    std::cout << "Running exp3 - get test with data volume = " << data_volume_mb << " MB \n";

    std::filesystem::remove_all("db_t");

    int mem_size_mb = MEMTABLE_SIZE;
    int mem_size = (mem_size_mb * 1024 * 1024) / (2 * sizeof(int));
    int buffer_size = BUFFER_SIZE * 256; // assuming 4KB pages
    DB* db = Open("db_t", mem_size, buffer_size, buffer_size, searchALG, BLOOMFILTER_BITS);

    int num_kv = (data_volume_mb * 1024 * 1024) / (2 * sizeof(int));

    clock_t start, end;
    std::cout << "Populating the database...\n";
    //progressbar bar(num_kv);
    for (int i = 0; i < num_kv; i++){
        db->put(i,-i);
        //bar.update();
    }
    std::cout << "Database populated, starting test...\n";
    /* Recording the starting clock tick.*/

    int test_size = 4096;

    int diff = num_kv / test_size;

    start = clock();
    auto real_start = std::chrono::high_resolution_clock::now();
    for (int j = 0; j < test_size; j++){
        db->get(j*diff);  // evenly spaced out get
    }
    auto real_end = std::chrono::high_resolution_clock::now();
    end = clock();

    double test_size_mb = (test_size*sizeof(int)*2) / (1024.0 * 1024.0);
    // Recording the end clock tick.
    double time_taken = double(end - start) / double(CLOCKS_PER_SEC);
    double real_time_taken = (std::chrono::duration_cast<std::chrono::microseconds>(real_end - real_start).count())/1000000.0;
    std::cout << "Get test finished. Time taken: " << real_time_taken << " seconds \n";
    std::cout << "Throughput: " << (test_size_mb / real_time_taken) << " MB/second \n";


    db->Close();

    // std::cout << "All tests ended\n\n";
    std::filesystem::remove_all("db_t");
    return real_time_taken;
}

double scan_test(int data_volume_mb, int searchALG) {
    std::cout << "Running exp3 - scan test with data volume = " << data_volume_mb << " MB \n";

    std::filesystem::remove_all("db_t");

    int mem_size_mb = MEMTABLE_SIZE;
    int mem_size = (mem_size_mb * 1024 * 1024) / (2 * sizeof(int));
    int buffer_size = BUFFER_SIZE * 256; // assuming 4KB pages
    DB* db = Open("db_t", mem_size, buffer_size, buffer_size, searchALG, BLOOMFILTER_BITS);

    int num_kv = (data_volume_mb * 1024 * 1024) / (2 * sizeof(int));

    clock_t start, end;
    std::cout << "Populating the database...\n";
    //progressbar bar(num_kv);
    for (int i = 0; i < num_kv; i++){
        db->put(i,-i);
        //bar.update();
    }
    std::cout << "Database populated, starting test...\n";
    /* Recording the starting clock tick.*/

    int test_size = 128;

    int diff = num_kv / test_size;

    start = clock();
    auto real_start = std::chrono::high_resolution_clock::now();
    for (int j = 0; j < test_size; j++){
        db->scan(j*diff, j*diff + 15);  // evenly spaced out scan, 16 items per scan
    }
    auto real_end = std::chrono::high_resolution_clock::now();
    end = clock();

    double test_size_mb = (test_size*sizeof(int)*2*16) / (1024.0 * 1024.0);
    // Recording the end clock tick.
    double time_taken = double(end - start) / double(CLOCKS_PER_SEC);
    double real_time_taken = (std::chrono::duration_cast<std::chrono::microseconds>(real_end - real_start).count())/1000000.0;
    std::cout << "Scan test finished. Time taken: " << real_time_taken << " seconds \n";
    std::cout << "Throughput: " << (test_size_mb / real_time_taken) << " MB/second \n";


    db->Close();

    std::filesystem::remove_all("db_t");
    return real_time_taken;
}


int main(){
    std::cout << "Please enter a number indicating which test you want to perform (1.put, 2.get_binary, 3.get_BTree 4.scan_binary, 5.scan_BTREE): \n";
    int test;
    std::cin >> test;
    
    std::cout << "Please enter a number indicating your desired data size, in MB \n";
    int size;
    std::cin >> size;
    double time_taken;
    FILE* outputFile;
    if (test == 1){
        put_test(size);
    }
    if (test == 2){
        get_test(size, BIN_SEARCH);
    }
    if (test == 3){
        get_test(size, BTREE_SEARCH);
    }
    if (test == 4){
        scan_test(size, BIN_SEARCH);
    }
    if (test == 5){
        scan_test(size, BTREE_SEARCH);
    }
    return 0;
}