// Experiments for performance, run to generate the graphs in our report. // If the tests are taking too long, you are free to decrease max experiment size to 512 or 256. #include "main.h" #include #include #include #include #include #define BIN_SEARCH 0 #define BTREE_SEARCH 1 #define MEMTABLE_SIZE 1 #define MAX_EXPERIMENT_SIZE 1024 #define BUFFER_SIZE 10 #define BLOOMFILTER_BITS 5 // BUFFER_SIZE and MEMTABLE_SIZE are in MB double put_test(int data_volume_mb) { std::cout << "Running exp3 - put test with data volume = " << data_volume_mb << " MB \n"; std::filesystem::remove_all("db_t"); int mem_size_mb = MEMTABLE_SIZE; int mem_size = (mem_size_mb * 1024 * 1024) / (2 * sizeof(int)); int buffer_size = BUFFER_SIZE * 256; // assuming 4KB pages DB* db = Open("db_t", mem_size, buffer_size, buffer_size, BTREE_SEARCH, BLOOMFILTER_BITS); int num_kv = (data_volume_mb * 1024 * 1024) / (2 * sizeof(int)); clock_t start, end; std::cout << "Populating the database...\n"; start = clock(); // cpu clock auto real_start = std::chrono::high_resolution_clock::now(); // real time clock for (int i = 0; i < num_kv; i++){ db->put(i,-i); } auto real_end = std::chrono::high_resolution_clock::now(); end = clock(); double time_taken = double(end - start) / double(CLOCKS_PER_SEC); double real_time_taken = (std::chrono::duration_cast(real_end - real_start).count())/1000000.0; std::cout << "Put test finished. Time taken: " << real_time_taken << " seconds \n"; std::cout << "Throughput: " << (data_volume_mb / real_time_taken) << " MB/second \n"; db->Close(); std::filesystem::remove_all("db_t"); return real_time_taken; } double get_test(int data_volume_mb, int searchALG) { std::cout << "Running exp3 - get test with data volume = " << data_volume_mb << " MB \n"; std::filesystem::remove_all("db_t"); int mem_size_mb = MEMTABLE_SIZE; int mem_size = (mem_size_mb * 1024 * 1024) / (2 * sizeof(int)); int buffer_size = BUFFER_SIZE * 256; // assuming 4KB pages DB* db = Open("db_t", mem_size, buffer_size, buffer_size, searchALG, BLOOMFILTER_BITS); int num_kv = (data_volume_mb * 1024 * 1024) / (2 * sizeof(int)); clock_t start, end; std::cout << "Populating the database...\n"; //progressbar bar(num_kv); for (int i = 0; i < num_kv; i++){ db->put(i,-i); //bar.update(); } std::cout << "Database populated, starting test...\n"; int test_size = 4096; int diff = num_kv / test_size; start = clock(); auto real_start = std::chrono::high_resolution_clock::now(); for (int j = 0; j < test_size; j++){ db->get(j*diff); // evenly spaced out get } auto real_end = std::chrono::high_resolution_clock::now(); end = clock(); double test_size_mb = (test_size*sizeof(int)*2) / (1024.0 * 1024.0); double time_taken = double(end - start) / double(CLOCKS_PER_SEC); double real_time_taken = (std::chrono::duration_cast(real_end - real_start).count())/1000000.0; std::cout << "Get test finished. Time taken: " << real_time_taken << " seconds \n"; std::cout << "Throughput: " << (test_size_mb / real_time_taken) << " MB/second \n"; db->Close(); // std::cout << "All tests ended\n\n"; std::filesystem::remove_all("db_t"); return real_time_taken; } double scan_test(int data_volume_mb, int searchALG) { std::cout << "Running exp3 - scan test with data volume = " << data_volume_mb << " MB \n"; std::filesystem::remove_all("db_t"); int mem_size_mb = MEMTABLE_SIZE; int mem_size = (mem_size_mb * 1024 * 1024) / (2 * sizeof(int)); int buffer_size = BUFFER_SIZE * 256; // assuming 4KB pages DB* db = Open("db_t", mem_size, buffer_size, buffer_size, searchALG, BLOOMFILTER_BITS); int num_kv = (data_volume_mb * 1024 * 1024) / (2 * sizeof(int)); clock_t start, end; std::cout << "Populating the database...\n"; //progressbar bar(num_kv); for (int i = 0; i < num_kv; i++){ db->put(i,-i); //bar.update(); } std::cout << "Database populated, starting test...\n"; int test_size = 128; int diff = num_kv / test_size; start = clock(); auto real_start = std::chrono::high_resolution_clock::now(); for (int j = 0; j < test_size; j++){ db->scan(j*diff, j*diff + 15); // evenly spaced out scan, 16 items per scan } auto real_end = std::chrono::high_resolution_clock::now(); end = clock(); double test_size_mb = (test_size*sizeof(int)*2*16) / (1024.0 * 1024.0); double time_taken = double(end - start) / double(CLOCKS_PER_SEC); double real_time_taken = (std::chrono::duration_cast(real_end - real_start).count())/1000000.0; std::cout << "Scan test finished. Time taken: " << real_time_taken << " seconds \n"; std::cout << "Throughput: " << (test_size_mb / real_time_taken) << " MB/second \n"; db->Close(); std::filesystem::remove_all("db_t"); return real_time_taken; } int main(){ std::cout << "Please enter a number indicating which test you want to perform (0.all tests, 1.put, 2.get_binary, 3.get_BTree 4.scan_binary, 5.scan_BTREE): \n"; int test; std::cin >> test; double time_taken; FILE* outputFile; if (test == 1 || test == 0){ outputFile = fopen("exp3_put.csv", "w"); fprintf(outputFile, "input size (MB):,"); for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){ fprintf(outputFile, "%d", i); if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");} else {fprintf(outputFile, "\n");} } fprintf(outputFile, "time taken (sec):,"); for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){ time_taken = put_test(i); fprintf(outputFile, "%f", time_taken); if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");} else {fprintf(outputFile, "\n");} } fclose(outputFile); } if (test == 2 || test == 0){ outputFile = fopen("exp3_get_binary.csv", "w"); fprintf(outputFile, "input size (MB):,"); for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){ fprintf(outputFile, "%d", i); if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");} else {fprintf(outputFile, "\n");} } fprintf(outputFile, "time taken (sec):,"); for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){ time_taken = get_test(i, BIN_SEARCH); fprintf(outputFile, "%f", time_taken); if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");} else {fprintf(outputFile, "\n");} } fclose(outputFile); } if (test == 3 || test == 0){ outputFile = fopen("exp3_get_BTree.csv", "w"); fprintf(outputFile, "input size (MB):,"); for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){ fprintf(outputFile, "%d", i); if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");} else {fprintf(outputFile, "\n");} } fprintf(outputFile, "time taken (sec):,"); for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){ time_taken = get_test(i, BTREE_SEARCH); fprintf(outputFile, "%f", time_taken); if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");} else {fprintf(outputFile, "\n");} } fclose(outputFile); } if (test == 4 || test == 0){ outputFile = fopen("exp3_scan_binary.csv", "w"); fprintf(outputFile, "input size (MB):,"); for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){ fprintf(outputFile, "%d", i); if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");} else {fprintf(outputFile, "\n");} } fprintf(outputFile, "time taken (sec):,"); for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){ time_taken = scan_test(i, BIN_SEARCH); fprintf(outputFile, "%f", time_taken); if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");} else {fprintf(outputFile, "\n");} } fclose(outputFile); } if (test == 5 || test == 0){ outputFile = fopen("exp3_scan_BTree.csv", "w"); fprintf(outputFile, "input size (MB):,"); for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){ fprintf(outputFile, "%d", i); if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");} else {fprintf(outputFile, "\n");} } fprintf(outputFile, "time taken (sec):,"); for(int i = 1; i <= MAX_EXPERIMENT_SIZE; i*= 2){ time_taken = scan_test(i, BTREE_SEARCH); fprintf(outputFile, "%f", time_taken); if (i != MAX_EXPERIMENT_SIZE){fprintf(outputFile, ",");} else {fprintf(outputFile, "\n");} } fclose(outputFile); } return 0; }