MiniDatabase / test.cpp
test.cpp
Raw
// The basic set of correctness tests for our database.

#include "main.h"
#include <assert.h>
#include <iostream> 
#include <filesystem>
#include <limits.h>
#include <unistd.h> 
#include <fcntl.h> 
#include <errno.h>
#include <cstring>

#define PAGE_SIZE 4096
#define BIN_SEARCH 0
#define BTREE_SEARCH 1
#define NO_KEY INT_MIN
#define TOMBSTONE INT_MAX
#define BITS_BLOOM 5

// testing helper to bulk insert. Inserts consecutive db entries from start_val to end_val.
void DB_bulkInsert(DB* db, int start_val, int end_val){
    for(int i = start_val; i <= end_val; i++){
        db->put(i, i*i);
    }
}

// testing helper to print kv_pairs for ease in testing / debugging.
void print_KV_pairs(kv_pairs* scanned_kv){
    std::cout << "KV pairs of size " << scanned_kv->kv_size << "\n";
    for(int i = 0; i < scanned_kv->kv_size; i++){
        std::cout << "(" << scanned_kv->kv[i][0] << ", " << scanned_kv->kv[i][1] << ")\n";
    }
}

void test_simple_put_and_get() {
    // Tests a simple example for put() and get()
    // Tests if put() and get() works with number of inputs less than max size
    std::cout << "Running test_simple_put_and_get()\n";

    std::filesystem::remove_all("db_t");

    DB* t = Open("db_t", 7, 0, 0, BIN_SEARCH, BITS_BLOOM); // AVL Tree with max size 7
    t->put(1,11);
    t->put(5,15);
    t->put(3,13);
    t->put(4,14);

    assert(t->mem_table->max_size == 7);
    assert(t->mem_table->cur_size == 4);
    assert(t->get(1) == 11); // Value for key 1 is 11
    assert(t->get(5) == 15); // Value for key 5 is 15
    assert(t->get(3) == 13);
    assert(t->get(4) == 14);
    
    std::cout << "All tests passed\n\n";
    t->Close();
    std::filesystem::remove_all("db_t");
}

void test_scan_in_range() {
    // Test that all the pairs properly scan if range includes them all and in order

    std::cout << "Running test_scan_in_range()\n";

    std::filesystem::remove_all("db_t");

    DB* t = Open("db_t", 7, 0, 0, BIN_SEARCH, BITS_BLOOM); // AVL Tree with max size 7
    t->put(1,11);
    t->put(5,15);
    t->put(3,13);
    t->put(4,14);


    struct kv_pairs* scanned_kv = t->scan(-100, 100);
    int expected[4][2] = {
        {1, 11},
        {3, 13},
        {4, 14},
        {5, 15}
    };
    
    // Test that the KV pairs that were scanned match what we expect
    assert(scanned_kv->kv_size == 4);
    
    for (int i = 0; i < scanned_kv->kv_size; i++) {
        assert(scanned_kv->kv[i][0] == expected[i][0]);
        assert(scanned_kv->kv[i][1] == expected[i][1]);
    }

    std::cout << "All tests passed\n\n";
    t->freeKVpairs(scanned_kv);
    t->Close();
    std::filesystem::remove_all("db_t");
}

void test_scan_partial_range() {
    std::cout << "Running test_scan_partial_range()\n";

    std::filesystem::remove_all("db_t");

    DB* t = Open("db_t", 7, 0, 0, BIN_SEARCH, BITS_BLOOM); // AVL Tree with max size 7
    t->put(1,11);
    t->put(5,15);
    t->put(3,13);
    t->put(4,14);

    int expected[2][2] = {
        {3, 13},
        {4, 14}
    };

    struct kv_pairs* scanned_kv = t->scan(3, 4);

    // Test that the KV pairs that were scanned match what we expect
    for (int i = 0; i < scanned_kv->kv_size; i++) {
        assert(scanned_kv->kv[i][0] == expected[i][0]);
        assert(scanned_kv->kv[i][1] == expected[i][1]);
    }

    std::cout << "All tests passed\n\n";
    t->freeKVpairs(scanned_kv);
    t->Close();
    std::filesystem::remove_all("db_t");
}
 
void test_transform_sst() {
    // Tests that when the tree is full, we transform it to an SST and create a new tree.

    std::cout << "Running test_transform_sst()\n";

    std::filesystem::remove_all("db_t");

    DB* t = Open("db_t", 3, 4, 4, BIN_SEARCH, BITS_BLOOM); // AVL Tree with max size 3
    t->put(1,11);
    t->put(5,15);
    t->put(3,13);

    t->put(2,12);    
    t->put(4,16);
    t->put(7,19);

    struct kv_pairs* scanned_kv = t->scan(0, 9);

    // Test that the sizes are correct
    assert(scanned_kv->kv_size == 6);
    assert(t->mem_table->cur_size == 3);

    // Test that the KV pairs that were scanned match what we expect

    int expected[6][2] = {
        {1, 11},
        {2, 12},
        {3, 13},
        {4, 16},
        {5, 15},
        {7, 19}
    };

    for (int i = 0; i < scanned_kv->kv_size; i++) {
        assert(scanned_kv->kv[i][0] == expected[i][0]);
        assert(scanned_kv->kv[i][1] == expected[i][1]);
    }


    std::cout << "All tests passed\n\n";
    t->freeKVpairs(scanned_kv);
    t->Close();
    std::filesystem::remove_all("db_t");
}

void test_transform_sst_btree() {
    // Tests that when the tree is full, we transform it to an SST and create a new tree.

    std::cout << "Running test_transform_sst_btree()\n";

    std::filesystem::remove_all("db_t");

    DB* t = Open("db_t", 3, 4, 4, BTREE_SEARCH, BITS_BLOOM); // AVL Tree with max size 3
    t->put(1,11);
    t->put(5,15);
    t->put(3,13);

    t->put(2,12);    
    t->put(4,16);
    t->put(7,19);

    struct kv_pairs* scanned_kv = t->scan(0, 9);

    // Test that the sizes are correct
    assert(scanned_kv->kv_size == 6);
    assert(t->mem_table->cur_size == 3);

    // Test that the KV pairs that were scanned match what we expect

    int expected[6][2] = {
        {1, 11},
        {2, 12},
        {3, 13},
        {4, 16},
        {5, 15},
        {7, 19}
    };

    for (int i = 0; i < scanned_kv->kv_size; i++) {
        assert(scanned_kv->kv[i][0] == expected[i][0]);
        assert(scanned_kv->kv[i][1] == expected[i][1]);
    }


    std::cout << "All tests passed\n\n";
    t->freeKVpairs(scanned_kv);
    t->Close();
    std::filesystem::remove_all("db_t");
}

void test_invalid_get() {
    // Test that when we call get on an invalid key, we get NO_KEY

    std::cout << "Running test_invalid_get()\n";

    DB* t = Open("db_t", 3, 0, 0, BIN_SEARCH, BITS_BLOOM); // AVL Tree with max size 3
    t->put(1,11);
    t->put(5,15);
    t->put(3,13);
    
    assert(t->get(8) == NO_KEY);

    std::cout << "All tests passed\n\n";
    t->Close();
    std::filesystem::remove_all("db_t");
}

void test_large_scan_sst() {
    // The below tests test that the tree is correctly moving to SST and that a new tree is reallocated each time.

    std::cout << "Running test_large_scan_sst()\n";

    DB* t = Open("db_t", 12, 4, 4, BIN_SEARCH, BITS_BLOOM); // AVL Tree with max size 12
    
    int expected[134][2];
    int ex_idx = 0;

    for(int i = 0; i < 100; i++){
        if(i % 3 == 0) {
            t->put(100 + i, 100 + i);
        }
        
        t->put(i, i*i);
        expected[ex_idx][0] = i;
        expected[ex_idx][1] = i * i;
        ex_idx += 1;
    }

    for(int i = 0; i < 100; i++) {  // Another loop so that expected is in sorted order
        if (i % 3 == 0) {
            expected[ex_idx][0] = 100 + i;
            expected[ex_idx][1] = 100 + i;
            ex_idx += 1;
        }
    }
    
    std::cout << "Waiting to scan...\n";


    struct kv_pairs* scanned_kv = t->scan(0, 300);


    //std::cout << "Scan size: " << scanned_kv->kv_size << "\n";
    //print_KV_pairs(scanned_kv);

    assert(scanned_kv->kv_size == 134);
    
    for (int i = 0; i < scanned_kv->kv_size; i++) {
        assert(scanned_kv->kv[i][0] == expected[i][0]);
        assert(scanned_kv->kv[i][1] == expected[i][1]);
    }

    std::cout << "All tests passed\n\n";
    t->freeKVpairs(scanned_kv);
    t->Close();
    std::filesystem::remove_all("db_t");
}

void test_large_scan_sst_btree() {
    // The below tests test that the tree is correctly moving to SST and that a new tree is reallocated each time.

    std::cout << "Running test_large_scan_sst_btree()\n";

    DB* t = Open("db_t", 12, 4, 4, BTREE_SEARCH, BITS_BLOOM); // AVL Tree with max size 12
    
    int expected[134][2];
    int ex_idx = 0;

    for(int i = 0; i < 100; i++){
        if(i % 3 == 0) {
            t->put(100 + i, 100 + i);
        }
        
        t->put(i, i*i);
        expected[ex_idx][0] = i;
        expected[ex_idx][1] = i * i;
        ex_idx += 1;
    }

    for(int i = 0; i < 100; i++) {  // Another loop so that expected is in sorted order
        if (i % 3 == 0) {
            expected[ex_idx][0] = 100 + i;
            expected[ex_idx][1] = 100 + i;
            ex_idx += 1;
        }
    }
    
    std::cout << "Waiting to scan...\n";


    struct kv_pairs* scanned_kv = t->scan(0, 300);

    assert(scanned_kv->kv_size == 134);
    
    for (int i = 0; i < scanned_kv->kv_size; i++) {
        assert(scanned_kv->kv[i][0] == expected[i][0]);
        assert(scanned_kv->kv[i][1] == expected[i][1]);
    }

    std::cout << "All tests passed\n\n";
    t->freeKVpairs(scanned_kv);
    t->Close();
    std::filesystem::remove_all("db_t");
}

void test_open_close_same_db() {
    // Tests that open and close work, and that after closing a DB, opening
    // it at a later time will retain the data stored early in the SSTs
    std::cout << "Running test_open_close_same_db()\n";

    DB* db = Open("db_t", 4, 0, 0, BIN_SEARCH, BITS_BLOOM);
    db->put(1, 11);
    db->put(2, 12);
    db->put(3, 13);
    db->put(4, 14);

    assert(db->mem_table->max_size == 4);
    assert(db->mem_table->cur_size == 4);
    assert(db->get(1) == 11);
    assert(db->get(5) == NO_KEY); // doesn't exist
    assert(db->get(3) == 13);

    db->put(7,17);
    db->put(8,18);
    db->Close();

    DB* db2 = Open("db_t", 4, 0, 0, BIN_SEARCH, BITS_BLOOM);  // the same one as above
    assert(db2->get(1) == 11);
    assert(db2->get(8) == 18);

    db2->put(5, 15);

    struct kv_pairs* scanned_kv = db2->scan(3,8);
    std::cout << scanned_kv->kv_size << " size\n";
    print_KV_pairs(scanned_kv);

    int expected[5][2] =
    {
        {3, 13},
        {4, 14},
        {5, 15},
        {7, 17},
        {8, 18}
    };

    assert(scanned_kv->kv_size == 5);

    for (int i = 0; i < scanned_kv->kv_size; i++) {
        assert(scanned_kv->kv[i][0] == expected[i][0]);
        assert(scanned_kv->kv[i][1] == expected[i][1]);
    }
    
    db2->freeKVpairs(scanned_kv);
    db2->Close();

    std::cout << "All tests passed\n\n";
    std::filesystem::remove_all("db_t");
}

void test_open_close_same_db_btree() {
    // Tests that open and close work, and that after closing a DB, opening
    // it at a later time will retain the data stored early in the SSTs
    std::cout << "Running test_open_close_same_db_btree()\n";

    DB* db = Open("db_t", 4, 0, 0, BTREE_SEARCH, BITS_BLOOM);
    db->put(1, 11);
    db->put(2, 12);
    db->put(3, 13);
    db->put(4, 14);

    assert(db->mem_table->max_size == 4);
    assert(db->mem_table->cur_size == 4);
    assert(db->get(1) == 11);
    assert(db->get(5) == NO_KEY); // doesn't exist
    assert(db->get(3) == 13);

    db->put(7,17);
    db->put(8,18);
    db->Close();

    DB* db2 = Open("db_t", 4, 0, 0, BIN_SEARCH, BITS_BLOOM);  // the same one as above
    assert(db2->get(1) == 11);
    assert(db2->get(8) == 18);

    db2->put(5, 15);

    struct kv_pairs* scanned_kv = db2->scan(3,8);

    int expected[5][2] =
    {
        {3, 13},
        {4, 14},
        {5, 15},
        {7, 17},
        {8, 18}
    };

    assert(scanned_kv->kv_size == 5);

    for (int i = 0; i < scanned_kv->kv_size; i++) {
        assert(scanned_kv->kv[i][0] == expected[i][0]);
        assert(scanned_kv->kv[i][1] == expected[i][1]);
    }
    
    db2->freeKVpairs(scanned_kv);
    db2->Close();

    std::cout << "All tests passed\n\n";
    std::filesystem::remove_all("db_t");
}

void test_update(){
    // test that updates are properly handled by get() and scan().
    std::cout << "Running test_update()\n";
    
    DB* db = Open("db_t", 8, 0, 0, BTREE_SEARCH, BITS_BLOOM);
    DB_bulkInsert(db, 1, 30); // insert 30 entries
    db->update(1, 0);
    db->update(3, 0);
    db->update(22, 0);
    DB_bulkInsert(db, 31, 60); // insert 30 entries
    db->update(45, 0);
    db->update(37, 0);
    db->update(15, 0);
    DB_bulkInsert(db, 61, 90); // insert 30 entries
    db->update(77, 0);
    db->update(55, 0);
    db->update(17, 0);
    db->update(16, 0);

    struct kv_pairs* scanned_kv = db->scan(0,100);
    assert(scanned_kv->kv_size == 90);
    // print_KV_pairs(scanned_kv); // Optional to confirm that the right elements are printed.
    db->freeKVpairs(scanned_kv);

    db->Close();
    std::cout << "All tests passed\n\n";
    std::filesystem::remove_all("db_t");
}

void test_remove_get(){
    // Tests that deletes work in memtable and in SSTs with get().
    std::cout << "Running test_remove_get()\n";

    DB* db = Open("db_t", 4, 0, 0, BIN_SEARCH, BITS_BLOOM);
    db->put(1, 11);
    db->put(2, 12);
    db->put(3, 13);
    assert(db->get(2) == 12); // 2 in in table
    db->remove(2);
    assert(db->get(2) == NO_KEY); // 2 doesn't exist (tombstone works)

    DB_bulkInsert(db, 10, 19); // insert 10 entries, moves 3 out of memtable
    assert(db->get(3) == 13); // 3 in SST
    db->remove(3);
    assert(db->get(3) == NO_KEY); // 3 doesn't exist (tombstone works on memtable)
   
    db->put(2, 22); // put 2 in the DB again (on a different SST than tombstone)
    DB_bulkInsert(db, 20, 29); // insert 10 entries again (2 is flushed into the tombstone)
    assert(db->get(2) == 22); // most recent entry is read over the tombstone
       
    db->Close();
    std::cout << "All tests passed\n\n";
    std::filesystem::remove_all("db_t");
}

void test_remove_scan(){
    // test that tombstones are properly excluded from scans, but also keep removed elements out.
    std::cout << "Running test_remove_scan()\n";
    
    DB* db = Open("db_t", 8, 0, 0, BTREE_SEARCH, BITS_BLOOM);
    DB_bulkInsert(db, 1, 30); // insert 30 entries
    db->remove(2);
    db->remove(7);
    db->remove(13);
    db->remove(21);
    db->remove(27);
    DB_bulkInsert(db, 31, 60); // insert 30 entries
    db->remove(5);
    db->remove(59);

    struct kv_pairs* scanned_kv = db->scan(0,100);
    // print_KV_pairs(scanned_kv);
    assert(scanned_kv->kv_size == 60 - 7);
    //print_KV_pairs(scanned_kv); Optional to confirm that the right elements are printed.
    db->freeKVpairs(scanned_kv);
    db->Close();
    std::cout << "All tests passed\n\n";
    std::filesystem::remove_all("db_t");
}

void test_bloom_filter(){
    std::cout << "Running test_bloom_filter()\n";
    DB* db = Open("db_t", 512, 8, 32, BTREE_SEARCH, BITS_BLOOM);
    for (int i = 0; i < 1024; i++){
        db->put(i, i+1);
    }
    assert(db->get(1) == 2);
    assert(db->get(100) == 101);
    assert(db->get(1000) == 1001);
    // only sst0 with first 512 keys
    bool exist;
    for (int j = 0; j < 512; j++) {
        exist = db->check_bloom_filters(0, j);
        assert(exist);  // all inserted numbers must exist
    }
    int fp_count = 0;
    for (int k = 800; k < 1800; k++){
        bool exist = db->check_bloom_filters(0, k);
        if (exist){
            fp_count++;
        }
    }
    std::cout << "False Positive Rate: " << (fp_count/10.0) << "% \n";
    db->Close();
    DB* db2 = Open("db_t", 512, 8, 32, BTREE_SEARCH, BITS_BLOOM);
    // all 1024 entries are in, merged into sst1, level is 1
    for (int n = 0; n < 1024; n++) {
        exist = db2->check_bloom_filters(1, n);
        if (!exist) {
            std::cout << "Not found: "<< n << "\n";
        }
        assert(exist);  // all inserted numbers must exist
    }
    int fp_count2 = 0;
    for (int m = 2000; m < 12000; m++){
        bool exist = db2->check_bloom_filters(1, m);
        if (exist){
            fp_count2++;
        }
    }
    db2->set_max_dir_size(16);
    assert(db2->get(1) == 2);
    assert(db2->get(100) == 101);
    assert(db2->get(1000) == 1001);
    std::cout << "False Positive Rate: " << (fp_count2/100.0) << "% \n";
    db2->Close();
    std::cout << "All tests passed\n\n";
    std::filesystem::remove_all("db_t");
    
}

void test_compaction() {
   std::filesystem::remove_all("db_t");
   std::cout << "Running test_compaction()\n";
    DB* db = Open("db_t", 512, 0, 0, BTREE_SEARCH, BITS_BLOOM);
    // Insert enough elements to force two memtable flushes, leading to compaction of levle 0 SSTs making a level 1 SST
    for (int i = 0; i < 1536; i++){
        db->put(i, i+1);
    }
    // assert we can scan every element that is outside of the memtable, i.e. in level 1 SST
    struct kv_pairs* scanned_kv = db->scan(0, 1025);
    assert (scanned_kv->kv_size = 1026);

    // Ensure we can get all 1536 elements, i.e. level 1 sst is correctly formed
    for (int i = 0; i < 1536; i++){
        db->get(i);
    }

    assert(db->get(1) == 2);
    assert(db->get(100) == 101);
    assert(db->get(1000) == 1001);
    db->Close();
    std::filesystem::remove_all("db_t");
}

int main()
{
    std::cout << "RUNNING TESTS\n\n";

    std::filesystem::remove_all("db_t");

    test_simple_put_and_get();
    test_scan_in_range();
    test_scan_partial_range();
    test_transform_sst();
    test_transform_sst_btree();
    test_invalid_get();
    test_large_scan_sst();
    test_large_scan_sst_btree();
    test_open_close_same_db();
    test_open_close_same_db_btree();
    test_remove_get();
    test_remove_scan();
    test_update();
    test_bloom_filter();
    test_compaction();

    std::filesystem::remove_all("db_t");
}