// Experiments for performance, run to generate the graphs in our report.
// If the tests are taking too long, you are free to decrease max experiment size to 512 or 256.
#include "main.h"
#include <assert.h>
#include <iostream>
#include <filesystem>
#include <limits.h>
#include <chrono>
#define BIN_SEARCH 0
#define BTREE_SEARCH 1
#define MEMTABLE_SIZE 1
#define MAX_EXPERIMENT_SIZE 1024
#define BUFFER_SIZE 10
#define BLOOMFILTER_BITS 5
// BUFFER_SIZE and MEMTABLE_SIZE are in MB
double put_test(int data_volume_mb) {
std::cout << "Running exp3 - put test with data volume = " << data_volume_mb << " MB \n";
std::filesystem::remove_all("db_t");
int mem_size_mb = MEMTABLE_SIZE;
int mem_size = (mem_size_mb * 1024 * 1024) / (2 * sizeof(int));
int buffer_size = BUFFER_SIZE * 256; // assuming 4KB pages
DB* db = Open("db_t", mem_size, buffer_size, buffer_size, BTREE_SEARCH, BLOOMFILTER_BITS);
int num_kv = (data_volume_mb * 1024 * 1024) / (2 * sizeof(int));
clock_t start, end;
std::cout << "Populating the database...\n";
start = clock(); // cpu clock
auto real_start = std::chrono::high_resolution_clock::now(); // real time clock
for (int i = 0; i < num_kv; i++){
db->put(i,-i);
}
auto real_end = std::chrono::high_resolution_clock::now();
end = clock();
double time_taken = double(end - start) / double(CLOCKS_PER_SEC);
double real_time_taken = (std::chrono::duration_cast<std::chrono::microseconds>(real_end - real_start).count())/1000000.0;
std::cout << "Put test finished. Time taken: " << real_time_taken << " seconds \n";
std::cout << "Throughput: " << (data_volume_mb / real_time_taken) << " MB/second \n";
db->Close();
std::filesystem::remove_all("db_t");
return real_time_taken;
}
double get_test(int data_volume_mb, int searchALG) {
std::cout << "Running exp3 - get test with data volume = " << data_volume_mb << " MB \n";
std::filesystem::remove_all("db_t");
int mem_size_mb = MEMTABLE_SIZE;
int mem_size = (mem_size_mb * 1024 * 1024) / (2 * sizeof(int));
int buffer_size = BUFFER_SIZE * 256; // assuming 4KB pages
DB* db = Open("db_t", mem_size, buffer_size, buffer_size, searchALG, BLOOMFILTER_BITS);
int num_kv = (data_volume_mb * 1024 * 1024) / (2 * sizeof(int));
clock_t start, end;
std::cout << "Populating the database...\n";
//progressbar bar(num_kv);
for (int i = 0; i < num_kv; i++){
db->put(i,-i);
//bar.update();
}
std::cout << "Database populated, starting test...\n";
int test_size = 4096;
int diff = num_kv / test_size;
start = clock();
auto real_start = std::chrono::high_resolution_clock::now();
for (int j = 0; j < test_size; j++){
db->get(j*diff); // evenly spaced out get
}
auto real_end = std::chrono::high_resolution_clock::now();
end = clock();
double test_size_mb = (test_size*sizeof(int)*2) / (1024.0 * 1024.0);
double time_taken = double(end - start) / double(CLOCKS_PER_SEC);
double real_time_taken = (std::chrono::duration_cast<std::chrono::microseconds>(real_end - real_start).count())/1000000.0;
std::cout << "Get test finished. Time taken: " << real_time_taken << " seconds \n";
std::cout << "Throughput: " << (test_size_mb / real_time_taken) << " MB/second \n";
db->Close();
// std::cout << "All tests ended\n\n";
std::filesystem::remove_all("db_t");
return real_time_taken;
}
double scan_test(int data_volume_mb, int searchALG) {
std::cout << "Running exp3 - scan test with data volume = " << data_volume_mb << " MB \n";
std::filesystem::remove_all("db_t");
int mem_size_mb = MEMTABLE_SIZE;
int mem_size = (mem_size_mb * 1024 * 1024) / (2 * sizeof(int));
int buffer_size = BUFFER_SIZE * 256; // assuming 4KB pages
DB* db = Open("db_t", mem_size, buffer_size, buffer_size, searchALG, BLOOMFILTER_BITS);
int num_kv = (data_volume_mb * 1024 * 1024) / (2 * sizeof(int));
clock_t start, end;
std::cout << "Populating the database...\n";
//progressbar bar(num_kv);
for (int i = 0; i < num_kv; i++){
db->put(i,-i);
//bar.update();
}
std::cout << "Database populated, starting test...\n";
int test_size = 128;
int diff = num_kv / test_size;
start = clock();
auto real_start = std::chrono::high_resolution_clock::now();
for (int j = 0; j < test_size; j++){
db->scan(j*diff, j*diff + 15); // evenly spaced out scan, 16 items per scan
}
auto real_end = std::chrono::high_resolution_clock::now();
end = clock();
double test_size_mb = (test_size*sizeof(int)*2*16) / (1024.0 * 1024.0);
double time_taken = double(end - start) / double(CLOCKS_PER_SEC);
double real_time_taken = (std::chrono::duration_cast<std::chrono::microseconds>(real_end - real_start).count())/1000000.0;
std::cout << "Scan test finished. Time taken: " << real_time_taken << " seconds \n";
std::cout << "Throughput: " << (test_size_mb / real_time_taken) << " MB/second \n";
db->Close();
std::filesystem::remove_all("db_t");
return real_time_taken;
}
int main(){
std::cout << "Please enter a number indicating which test you want to perform (0.all tests, 1.put, 2.get_binary, 3.get_BTree 4.scan_binary, 5.scan_BTREE): \n";
int test;
std::cin >> test;
double time_taken;
FILE* outputFile;
if (test == 1 || test == 0){
outputFile = fopen("exp3_put.csv", "w");
fprintf(outputFile, "input size (MB):,");
for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){
fprintf(outputFile, "%d", i);
if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");}
else {fprintf(outputFile, "\n");}
}
fprintf(outputFile, "time taken (sec):,");
for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){
time_taken = put_test(i);
fprintf(outputFile, "%f", time_taken);
if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");}
else {fprintf(outputFile, "\n");}
}
fclose(outputFile);
}
if (test == 2 || test == 0){
outputFile = fopen("exp3_get_binary.csv", "w");
fprintf(outputFile, "input size (MB):,");
for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){
fprintf(outputFile, "%d", i);
if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");}
else {fprintf(outputFile, "\n");}
}
fprintf(outputFile, "time taken (sec):,");
for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){
time_taken = get_test(i, BIN_SEARCH);
fprintf(outputFile, "%f", time_taken);
if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");}
else {fprintf(outputFile, "\n");}
}
fclose(outputFile);
}
if (test == 3 || test == 0){
outputFile = fopen("exp3_get_BTree.csv", "w");
fprintf(outputFile, "input size (MB):,");
for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){
fprintf(outputFile, "%d", i);
if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");}
else {fprintf(outputFile, "\n");}
}
fprintf(outputFile, "time taken (sec):,");
for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){
time_taken = get_test(i, BTREE_SEARCH);
fprintf(outputFile, "%f", time_taken);
if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");}
else {fprintf(outputFile, "\n");}
}
fclose(outputFile);
}
if (test == 4 || test == 0){
outputFile = fopen("exp3_scan_binary.csv", "w");
fprintf(outputFile, "input size (MB):,");
for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){
fprintf(outputFile, "%d", i);
if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");}
else {fprintf(outputFile, "\n");}
}
fprintf(outputFile, "time taken (sec):,");
for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){
time_taken = scan_test(i, BIN_SEARCH);
fprintf(outputFile, "%f", time_taken);
if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");}
else {fprintf(outputFile, "\n");}
}
fclose(outputFile);
}
if (test == 5 || test == 0){
outputFile = fopen("exp3_scan_BTree.csv", "w");
fprintf(outputFile, "input size (MB):,");
for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){
fprintf(outputFile, "%d", i);
if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");}
else {fprintf(outputFile, "\n");}
}
fprintf(outputFile, "time taken (sec):,");
for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){
time_taken = scan_test(i, BTREE_SEARCH);
fprintf(outputFile, "%f", time_taken);
if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");}
else {fprintf(outputFile, "\n");}
}
fclose(outputFile);
}
return 0;
}