MiniDatabase / buffer.cpp
buffer.cpp
Raw
#include <vector>
#include <string>
#include <list>
#include <cstdlib>
#include "xxhash32.h"
#include "buffer.h"
#include <iostream>

ExtendibleHashDirectory::ExtendibleHashDirectory(int initialDepth, int maxDepth)
    : globalDepth(initialDepth), maxDepth(maxDepth), max_chaining(5) {
    directoryVec.resize(1 << globalDepth);  // 2^globalDepth entries
}

int ExtendibleHashDirectory::addPage(int pageId, PageFrame* pageFrame) {
    uint32_t hash = XXHash32::hash(&pageId, sizeof(int), 0);
    int index = 0;
    if (globalDepth > 0) {
        index = hash >> (32 - globalDepth); // Use the first globalDepth bits as the index
    }
    if (globalDepth < maxDepth && directoryVec[index].size() >= max_chaining) {
        // Overflow when the chain is length 5, can be tuned.
        // Increase the size will add overhead for remove (and all functions in general)
        directoryVec[index].push_back(pageFrame);
        globalDepth++;
        std::vector<std::list<PageFrame*>> newDirectory(1 << globalDepth);
        for (int i = 0; i < (1 << (globalDepth - 1)); i++) {
            for (PageFrame* frame : directoryVec[i]) {
                int framePageId = frame->pageId;
                uint32_t frameHash = XXHash32::hash(&framePageId, sizeof(int), 0);  // Rehash the exisiting pages
                int newIndex = 0;
                if (globalDepth > 0){
                    newIndex = frameHash >> (32 - globalDepth);
                }
                newDirectory[newIndex].push_back(frame);
            }
        }
        directoryVec = std::move(newDirectory);
        return 1;
    }
    else if (directoryVec[index].size() >= max_chaining) {  // max size reached but overflowed again, need evict
        return 0;
    }
    else {
        directoryVec[index].push_back(pageFrame);
        return 1;
    }
}

int ExtendibleHashDirectory::getGlobalDepth() {
    return globalDepth;
}

std::list<PageFrame*>& ExtendibleHashDirectory::getPageFrames(int index) {
    return directoryVec[index];
}

BufferPool::BufferPool(int initialDepth, int maxDepth)
    : directory(initialDepth, maxDepth), curSize(0), head(-1), tail(-1) {}

void BufferPool::insertPage(int pageId, char* data) {
    // std::cout << "Page " << pageId <<" is being inserted.\n";
    // Check if the page is already in the buffer pool
    if (getPage(pageId) != nullptr) {
        return; // Page is already in the buffer pool
    }
    PageFrame *pg = new PageFrame(pageId, data);

    // Set up info for LRU algorithm. Inserted page is at head of queue, pointing to old head.
    
    if(head == -1){ // Special case: buffer pool is empty
        head = pageId;
        tail = pageId;
        pg->next = -1;
        pg->previous = -1;
    }
    else{ // buffer pool is non-empty
        getPageFrame(head)->previous = pageId; // old head points to new
        pg->next = head; // new head points to old
        pg->previous = -1; // new head has no prev element
        head = pageId; // head global variable now points to new head
    }

    // Add the page to the buffer pool and directory
    int result = directory.addPage(pageId, pg);
    
    // If the page is unable to be successfully inserted, keep removing pages until it can be.
    while (result == 0) {
        evictPage(evictPolicy());
        result = directory.addPage(pageId, pg);
    }
    // std::cout << "Page " << pageId <<" has been inserted.\n";
    curSize++;

}
// Could have getPageFrame integrated. Do this later?
char* BufferPool::getPage(int pageId) {
    uint32_t hash = XXHash32::hash(&pageId, sizeof(int), 0);
    uint32_t hashPrefix = 0;
    if (directory.getGlobalDepth() > 0){
        hashPrefix = hash >> (32 - directory.getGlobalDepth());
    }
    
    if (hashPrefix < 0 || hashPrefix >= directory.directoryVec.size()){
        return nullptr;
    }
    std::list<PageFrame*> pageList = directory.getPageFrames(hashPrefix);
    for (PageFrame* frame : pageList) {
        if (frame->pageId == pageId) { // we have found the desired pageframe.

            // Update LRU queue info (if this page is the head, no updates are needed).
            if(pageId != head){
                if(pageId == tail){ // There is a new tail for the queue. Update it.
                    tail = frame->previous;
                    getPageFrame(frame->previous)->next = -1;
                    
                }
                else{ // current page is not head or tail, is a middle element.
                    PageFrame* previous_page = getPageFrame(frame->previous);
                    PageFrame* next_page = getPageFrame(frame->next);
                    previous_page->next = next_page->pageId;
                    next_page->previous = previous_page->pageId;
                }
                // Now, move page to front of queue.
                frame->next = head; // current frame points to old head.
                frame->previous = -1; // current frame has no prior element.
                getPageFrame(head)->previous = pageId; // old head points to current frame as previous element.
                head = pageId; // now we can make the new frame the head safely.
            }
            return frame->getData();
        }
    }
    return nullptr; // Page not found in the buffer pool
}

// Helper function, only for use by the page replacement algorithm.
// This function assumes the page corresponding to pageId exists. Nullptr is not meant to be returned, if it is there is a flaw in the code.
PageFrame* BufferPool::getPageFrame(int pageId) {
    uint32_t hash = XXHash32::hash(&pageId, sizeof(int), 0);
    uint32_t hashPrefix = 0;
    if (directory.getGlobalDepth() > 0){
        hashPrefix = hash >> (32 - directory.getGlobalDepth());
    }
    if (hashPrefix < 0 || hashPrefix >= directory.directoryVec.size()){
        std::cerr << "Page replacement error: Buffer page with pageId " << pageId << " queried for, but does not exist.\n";
        return nullptr;
    }
    std::list<PageFrame*> pageList = directory.getPageFrames(hashPrefix);
    for (PageFrame* frame : pageList) {
        if (frame->pageId == pageId) {
            return frame;
        }
    }
    std::cerr << "Page replacement error: Buffer page with pageId " << pageId << " queried for, but does not exist.\n";
    return nullptr; // Page not found in the buffer pool
}

void BufferPool::setMaxDirectoryDepth(int depth) {
    if(depth == 0){no_buffer = true;}
    else{no_buffer = false;}
    
    if (depth > directory.globalDepth) {
        directory.maxDepth = depth;
        return;
    }
    else{
        std::vector<std::list<PageFrame*>> newDirectory(1 << depth);
        for (int i = 0; i < (1 << directory.globalDepth); i++) {
            for (PageFrame* frame : directory.directoryVec[i]) {
                int framePageId = frame->pageId;
                uint32_t frameHash = XXHash32::hash(&framePageId, sizeof(int), 0);  // Rehash the exisiting pages
                int newIndex = 0;
                if (depth > 0) {
                    newIndex = frameHash >> (32 - depth);
                }
                
                newDirectory[newIndex].push_back(frame);
            }
        }
        directory.directoryVec = std::move(newDirectory);
        directory.maxDepth = depth;
        directory.globalDepth = depth;
        for (int i = 0; i < (1 << directory.globalDepth); i++) {
            while (directory.directoryVec[i].size() >= directory.max_chaining){
                evictPage(evictPolicy());
            }
        }
    }
}

void BufferPool::evictPage(PageFrame* pageFrame) {
    // std::cout << "Page " << pageFrame->pageId <<" is being evicted.\n";
    uint32_t hash = XXHash32::hash(&pageFrame->pageId, sizeof(int), 0);
    uint32_t hashPrefix = 0;
    if (directory.getGlobalDepth() > 0){
        hashPrefix = hash >> (32 - directory.getGlobalDepth());
    }
    // Remove is O(N) in nature, but pageList length <= 5 here. Constant time.
    // A handmade Linked list can be created for this but in sacrific of the clean,
    // safe code of std::list
    free(pageFrame->data);
    directory.directoryVec[hashPrefix].remove(pageFrame);
    delete(pageFrame);
    curSize -= 1;
}

PageFrame* BufferPool::evictPolicy(){
    // LRU eviction policy: evict the least recently used page. We then need to update our linked list.

    // Edge case: empty linked list, buffer pool has been exhausted
    if(tail == -1) return nullptr;
        // std::cerr << "Page replacement error: eviction called on empty buffer pool.\n";

    // Edge case: only one element in linked list.
    if(head == tail){
        int frameNum = head;
        head = -1;
        tail = -1;
        return getPageFrame(frameNum);
    } 

    // Can safely assume linked list size >= 2 (oldTail->previous exists)
    PageFrame* oldTail = getPageFrame(tail); // Page to be evicted
    PageFrame* newTail = getPageFrame(oldTail->previous); // new page to be set as tail.
    newTail->next = -1; //new tail has no next element.
    tail = newTail->pageId; // set global tail variable to new tail.

    return oldTail;
}

// Policy that is used after compaction. Empties the buffer pool to remove invalid pages.
void BufferPool::emptyBuffer(){
    PageFrame* currPage = evictPolicy();
    while (currPage != nullptr){
        evictPage(currPage);
        currPage = evictPolicy();
    }
}

// Potential alternate policy that can be used after compaction. Removes ALL pages that correspond to a given SST.
// for baseline_size, pass in LSM_level0_pagecount_upperbound().
void BufferPool::evictFromSST(int baseline_size, int SST_num){
    int start_index = (((1 << SST_num) - 1) * baseline_size); // start of pages for current SST
    int end_index = (((1 << (SST_num + 1)) - 1) * baseline_size); // start of pages for next SST
    evict_in_range(start_index, end_index);
}

// Evicts all pages with start_index <= PageID < end_index.
void BufferPool::evict_in_range(int start_index, int end_index){
    int curr = head;
    PageFrame* currPage;
    int nextPage;

    // Iterate through linked list of buffer pages
    while(curr != 1){ // curr will be -1 if the buffer is empty, or if we have tried to iterate past the tail.
        currPage = getPageFrame(curr); // get page frame for current page. Should always exist.
        if(currPage == nullptr){ // error checking- should never be nullputr
            std::cerr << "Error in evict_in_range: Buffer page with pageId " << currPage << " queried for, but does not exist.\n";
            return;
        }

        nextPage = currPage->next; // set next page.
        if((currPage->pageId >= start_index) && (currPage->pageId < end_index)){ // if curr belongs to SST we want to evict from
            if(head == tail){ // one element in list, indicate buffer is now empty
                head = -1;
                tail = -1;
            }
            else if(currPage->pageId == head){ // front of list, update head
                head = currPage->next;
            }
            else if(currPage->pageId == tail){ // end of list, update tail
                tail = currPage->previous;
            }
            else{ // middle of list, update next and previous pointers
                PageFrame* previous_page = getPageFrame(currPage->previous);
                PageFrame* next_page = getPageFrame(currPage->next);
                previous_page->next = next_page->pageId;
                next_page->previous = previous_page->pageId;
            }
            evictPage(currPage);
        }
        curr = nextPage; // move to next page
    }
    
}

void BufferPool::destroyBuffer(){

    for (int i = 0; i < (1 << directory.globalDepth); i++) {
            for (PageFrame* frame : directory.directoryVec[i]) {
                free(frame->data);
                delete(frame);
            }
    }
    delete(this);
}