#include "encode.h" // Bit Unpacking Variables static unsigned char byte = 0; static int bitIdx = 0; static int bitLen = 9; unsigned int hash(int prefix, unsigned char c) { return (((unsigned long)(prefix) << CHAR_BIT) | (unsigned)(c)) % HASH_SIZE; } void hashInsert(EncodeEntry **hashTable, EncodeEntry **codeToEntry, EncodeEntry *t) { int index = hash(t -> prefix, t -> character); if (hashTable[index] == NULL) { hashTable[index] = t; } else { EncodeEntry *item = hashTable[index]; while (item -> next != NULL) { item = item -> next; } item -> next = t; } if (codeToEntry) { codeToEntry[t -> code] = t; } } EncodeEntry *hashFind(EncodeEntry **hashTable, int prefix, unsigned char c) { int index = hash(prefix, c); EncodeEntry *item = hashTable[index]; while (item != NULL) { if (item -> prefix == prefix && item -> character == c) { return item; } item = item -> next; } return NULL; } void initEncodeTable(EncodeEntry **hashTable, EncodeEntry **codeToEntry, int table_size) { for (int i = 0; i < 256; i++) { EncodeEntry *item = (EncodeEntry *)malloc(sizeof(EncodeEntry)); if (item == NULL) { fprintf(stderr, "Memory allocation failed\n"); exit(1); } item -> code = i; item -> prefix = EMPTY; item -> character = (unsigned char)i; item -> next = NULL; hashInsert(hashTable, codeToEntry, item); } } void freeTable(EncodeEntry **hashTable, EncodeEntry **codeToEntry) { for (int i = 0; i < HASH_SIZE; i++) { EncodeEntry* current = hashTable[i]; while (current != NULL) { EncodeEntry* next = current -> next; free(current); current = next; } hashTable[i] = NULL; } free(codeToEntry); free(hashTable); } int pruneEncodeTable(EncodeEntry ***hashTable, EncodeEntry ***codeToEntry, int table_size) { bool *usedAsPrefix = calloc(table_size, sizeof(bool)); if (usedAsPrefix == NULL) { fprintf(stderr, "Memory allocation for prefix tracker failed\n"); return -1; } for (int code = 256; code < table_size; code++) { EncodeEntry *item = (*codeToEntry)[code]; usedAsPrefix[item -> prefix] = true; } int *codeMap = calloc(table_size, sizeof(int)); if (codeMap == NULL) { fprintf(stderr, "Memory allocation for code mapping table failed\n"); free(usedAsPrefix); return -1; } for (int i = 0; i < table_size; i++) { codeMap[i] = EMPTY; } EncodeEntry **newTable = calloc(HASH_SIZE, sizeof(EncodeEntry *)); EncodeEntry **newToEntry; if (newTable == NULL) { fprintf(stderr, "Memory allocation for new hash table failed\n"); free(usedAsPrefix); free(codeMap); return -1; } newToEntry = calloc(table_size, sizeof(EncodeEntry*)); if (newToEntry == NULL) { fprintf(stderr, "Memory allocation for new codeToEntry table failed\n"); free(usedAsPrefix); free(codeMap); freeTable(newTable, NULL); return -1; } initEncodeTable(newTable, newToEntry, table_size); int nextCode = 256; for (int code = 0; code < 256; code++) { free((*codeToEntry)[code]); } for (int code = 256; code < table_size; code++) { EncodeEntry *item = (*codeToEntry)[code]; if (usedAsPrefix[code]) { int findPrefix = item -> prefix; if (findPrefix > 255 && codeMap[findPrefix] != EMPTY) { findPrefix = codeMap[findPrefix]; } codeMap[code] = nextCode; if (item -> prefix > 255 && codeMap[item -> prefix] != EMPTY) { item -> prefix = codeMap[item -> prefix]; } item -> code = nextCode++; item -> next = NULL; hashInsert(newTable, newToEntry, item); } else { free(item); } } free(usedAsPrefix); free(codeMap); free(*hashTable); free(*codeToEntry); *hashTable = newTable; *codeToEntry = newToEntry; return nextCode; } void encode(int pruning, int maxBits) { char* stage = getenv("STAGE"); if (!stage) { stage = "3"; } if (!strcmp(stage, "3")) { unsigned char coderConfig = maxBits | (pruning << 7); fwrite(&coderConfig, sizeof(unsigned char), 1, stdout); } else if (!strcmp(stage, "2")) { fprintf(stdout, "%d\n", maxBits); fprintf(stdout, "%d\n", pruning); } int table_size = (1 << maxBits); // 2^MAXBITS EncodeEntry **hashTable = calloc(HASH_SIZE, sizeof(EncodeEntry*)); EncodeEntry **codeToEntry = NULL; if (hashTable == NULL) { fprintf(stderr, "Memory allocation for hash table failed\n"); exit(1); } if (getenv("DBG") != NULL || pruning) { codeToEntry = calloc(table_size, sizeof(EncodeEntry*)); if (codeToEntry == NULL) { free(hashTable); fprintf(stderr, "Memory allocation for codeToEntry table failed\n"); exit(1); } } initEncodeTable(hashTable, codeToEntry, table_size); int nextCode = 256; int C = EMPTY; int K; while ((K = fgetc(stdin)) != EOF) { unsigned char character = (unsigned char)K; EncodeEntry* found = hashFind(hashTable, C, character); if (found != NULL) { C = found -> code; } else { if (nextCode >= table_size && pruning) { nextCode = pruneEncodeTable(&hashTable, &codeToEntry, table_size); if (nextCode < 256) { if (nextCode != -1) { fprintf(stderr, "Table pruning failed: invalid nextCode value %d\n", nextCode); } freeTable(hashTable, codeToEntry); exit(1); } } if (strcmp(stage, "3")) { fprintf(stdout, "%d\n", C); } else { bitPacker(C); } if (nextCode < table_size) { EncodeEntry* newEntry = (EncodeEntry *)malloc(sizeof(EncodeEntry)); if (newEntry == NULL) { fprintf(stderr, "Memory allocation for new string table entry failed\n"); freeTable(hashTable, codeToEntry); exit(1); } newEntry -> code = nextCode++; newEntry -> prefix = C; newEntry -> character = character; newEntry -> next = NULL; hashInsert(hashTable, codeToEntry, newEntry); if (!strcmp(stage, "3")) { if (nextCode >> bitLen) { flushRemBits(); bitLen++; } } } EncodeEntry* emptyPair = hashFind(hashTable, EMPTY, character); if (emptyPair == NULL) { fprintf(stderr, "Failed to find (EMPTY,K) pair in table\n"); freeTable(hashTable, codeToEntry); exit(1); } C = emptyPair -> code; } } if (C != EMPTY) { if (strcmp(stage, "3")) { fprintf(stdout, "%d\n", C); } else { bitPacker(C); flushRemBits(); } } if (getenv("DBG") != NULL) { dumpEncodeTable(hashTable, codeToEntry, table_size); } freeTable(hashTable, codeToEntry); } void bitPacker(int code) { int bitsAvailable = BYTE_SIZE - bitIdx; bitIdx = 0; byte |= code >> (bitLen - bitsAvailable); fwrite(&byte, sizeof(unsigned char), 1, stdout); byte = 0; int remBits = bitLen - bitsAvailable; code &= (1 << remBits) - 1; while (bitIdx == 0) { if (remBits >= BYTE_SIZE) { byte = code >> (remBits - BYTE_SIZE); fwrite(&byte, sizeof(unsigned char), 1, stdout); if (remBits == BYTE_SIZE) { byte = 0; break; } remBits -= BYTE_SIZE; code &= (1 << remBits) - 1; } else { byte = 0; byte |= code << (BYTE_SIZE - remBits); bitIdx = remBits; } } } void flushRemBits() { if (bitIdx > 0) { fwrite(&byte, sizeof(unsigned char), 1, stdout); byte = 0; bitIdx = 0; } } void printEncodeEntry(EncodeEntry* entry) { if (entry == NULL) { printf("NULL EncodeEntry\n"); return; } printf("EncodeEntry:\n"); printf(" code: %d\n", entry -> code); printf(" prefix: %d\n", entry -> prefix); printf(" character: %d\n", entry -> character); printf(" next: %p\n", entry -> next); } void dumpEncodeTable(EncodeEntry **hashTable, EncodeEntry **codeToEntry, int table_size) { char filename[256]; snprintf(filename, sizeof(filename), "./DBG.%s", "encode"); FILE* fp = fopen(filename, "w"); if (!fp) { fprintf(stderr, "Error opening file %s: %s (errno: %d)\n", filename, strerror(errno), errno); return; } fprintf(fp, "CODE\tPREF\tCHAR\tString\n"); if (!codeToEntry) { fprintf(stderr, "ERROR: codeToEntry is NULL\n"); fclose(fp); return; } for (int code = 0; code < table_size; code++) { EncodeEntry* curr = codeToEntry[code]; if (!curr) continue; char* string = calloc(table_size + 1, sizeof(char)); if (!string) { fprintf(stderr, "Memory allocation failed for string buffer\n"); fclose(fp); return; } int len = 0; if (len < table_size) { string[len++] = curr -> character; } int prefix = curr -> prefix; int safety = 0; while (prefix != EMPTY && len < table_size && safety++ < table_size) { if (prefix < 0 || prefix >= table_size) { fprintf(stderr, "Invalid prefix value: %d\n", prefix); free(string); continue; } EncodeEntry* prefix_entry = codeToEntry[prefix]; if (!prefix_entry) { fprintf(stderr, "NULL prefix entry for code %d\n", prefix); free(string); continue; } string[len++] = prefix_entry -> character; prefix = prefix_entry -> prefix; } if (fprintf(fp, "%d\t%d\t%d\t", code, curr -> prefix, curr -> character) < 0) { fprintf(stderr, "Error writing entry to file\n"); free(string); fclose(fp); return; } for (int j = len - 1; j >= 0; j--) { fputc(string[j], fp); } fprintf(fp, "\n"); free(string); } fclose(fp); }