p2lzw / encode.c
encode.c
Raw
#include "encode.h"

// Bit Unpacking Variables
static unsigned char byte = 0;
static int bitIdx = 0;
static int bitLen = 9;



unsigned int hash(int prefix, unsigned char c)
{
  return (((unsigned long)(prefix) << CHAR_BIT) | (unsigned)(c)) % HASH_SIZE;
}

void hashInsert(EncodeEntry **hashTable, 
                EncodeEntry **codeToEntry, 
                EncodeEntry *t)
{
  int index = hash(t -> prefix, t -> character);
  if (hashTable[index] == NULL) 
  {
    hashTable[index] = t;
  } else {
    EncodeEntry *item = hashTable[index];
    while (item -> next != NULL) 
    {
      item = item -> next;
    }
    item -> next = t;
  }

  if (codeToEntry)
  {
    codeToEntry[t -> code] = t;
  }
}

EncodeEntry *hashFind(EncodeEntry **hashTable, int prefix, unsigned char c) 
{
  int index = hash(prefix, c);
  EncodeEntry *item = hashTable[index];
    
  while (item != NULL)
  {
    if (item -> prefix == prefix && item -> character == c)
    {
        return item;
    }
    item = item -> next;
  }
  return NULL;
}



void initEncodeTable(EncodeEntry **hashTable, 
                    EncodeEntry **codeToEntry, 
                    int table_size)
{
  for (int i = 0; i < 256; i++) 
  {
    EncodeEntry *item = (EncodeEntry *)malloc(sizeof(EncodeEntry));
    if (item == NULL)
    {
      fprintf(stderr, "Memory allocation failed\n");
      exit(1);
    }
    item -> code = i;
    item -> prefix = EMPTY;
    item -> character = (unsigned char)i;
    item -> next = NULL;
    hashInsert(hashTable, codeToEntry, item);
  }
}

void freeTable(EncodeEntry **hashTable, EncodeEntry **codeToEntry)
{
  for (int i = 0; i < HASH_SIZE; i++) 
  {
      EncodeEntry* current = hashTable[i];
      while (current != NULL) {
          EncodeEntry* next = current -> next;
          free(current);
          current = next;
      }
      hashTable[i] = NULL;
  }

  free(codeToEntry);
  free(hashTable);
}

int pruneEncodeTable(EncodeEntry ***hashTable, 
                    EncodeEntry ***codeToEntry, 
                    int table_size)
{
  bool *usedAsPrefix = calloc(table_size, sizeof(bool));
  if (usedAsPrefix == NULL)
  {
    fprintf(stderr, "Memory allocation for prefix tracker failed\n");
    return -1;
  }
  for (int code = 256; code < table_size; code++)
  {
    EncodeEntry *item = (*codeToEntry)[code];
    usedAsPrefix[item -> prefix] = true;
  }

  int *codeMap = calloc(table_size, sizeof(int));
  if (codeMap == NULL)
  {
    fprintf(stderr, "Memory allocation for code mapping table failed\n");
    free(usedAsPrefix);
    return -1;
  }
  for (int i = 0; i < table_size; i++) 
  {
    codeMap[i] = EMPTY;
  }

  EncodeEntry **newTable = calloc(HASH_SIZE, sizeof(EncodeEntry *));
  EncodeEntry **newToEntry;
  if (newTable == NULL)
  {
    fprintf(stderr, "Memory allocation for new hash table failed\n");
    free(usedAsPrefix);
    free(codeMap);
    return -1;
  }
  
  newToEntry = calloc(table_size, sizeof(EncodeEntry*));
  if (newToEntry == NULL)
  {
    fprintf(stderr, "Memory allocation for new codeToEntry table failed\n");
    free(usedAsPrefix);
    free(codeMap);
    freeTable(newTable, NULL);
    return -1;
  }

  initEncodeTable(newTable, newToEntry, table_size);
  int nextCode = 256;

  for (int code = 0; code < 256; code++) 
  {
    free((*codeToEntry)[code]);
  }

  for (int code = 256; code < table_size; code++)
  {
    EncodeEntry *item = (*codeToEntry)[code];
    if (usedAsPrefix[code])
    {
      int findPrefix = item -> prefix;
      if (findPrefix > 255 && codeMap[findPrefix] != EMPTY)
      {
        findPrefix = codeMap[findPrefix];
      }

      codeMap[code] = nextCode;
      if (item -> prefix > 255 && codeMap[item -> prefix] != EMPTY) 
      {
        item -> prefix = codeMap[item -> prefix];
      }

      item -> code = nextCode++;
      item -> next = NULL;
      hashInsert(newTable, newToEntry, item);
    } else {
      free(item);
    }
  }
  
  free(usedAsPrefix);
  free(codeMap);
  free(*hashTable);
  free(*codeToEntry);

  *hashTable = newTable;
  *codeToEntry = newToEntry;

  return nextCode;
}



void encode(int pruning, int maxBits) 
{
  char* stage = getenv("STAGE");
  if (!stage)
  {
    stage = "3";
  }

  if (!strcmp(stage, "3"))
  {
    unsigned char coderConfig = maxBits | (pruning << 7);
    fwrite(&coderConfig, sizeof(unsigned char), 1, stdout);
  } else if (!strcmp(stage, "2"))
  {
    fprintf(stdout, "%d\n", maxBits);
    fprintf(stdout, "%d\n", pruning);
  }

  int table_size = (1 << maxBits);  // 2^MAXBITS

  EncodeEntry **hashTable = calloc(HASH_SIZE, sizeof(EncodeEntry*));
  
  EncodeEntry **codeToEntry = NULL;
  
  if (hashTable == NULL) 
  {
    fprintf(stderr, "Memory allocation for hash table failed\n");
    exit(1);
  }

  if (getenv("DBG") != NULL || pruning)
  {
    codeToEntry = calloc(table_size, sizeof(EncodeEntry*));
    if (codeToEntry == NULL)
    {
      free(hashTable);
      fprintf(stderr, "Memory allocation for codeToEntry table failed\n");
      exit(1);
    }
  }
  
  initEncodeTable(hashTable, codeToEntry, table_size);

  int nextCode = 256;
  int C = EMPTY;
  int K;
  
  while ((K = fgetc(stdin)) != EOF) 
  {
    unsigned char character = (unsigned char)K;
    EncodeEntry* found = hashFind(hashTable, C, character);

    if (found != NULL) 
    {
      C = found -> code;
    } else {
      if (nextCode >= table_size && pruning)
      {
        nextCode = pruneEncodeTable(&hashTable, &codeToEntry, table_size);
        if (nextCode < 256)
        {
          if (nextCode != -1) 
          {
            fprintf(stderr, "Table pruning failed: invalid nextCode value %d\n", 
                    nextCode);
          }
          freeTable(hashTable, codeToEntry);
          exit(1);
        }
      }

      if (strcmp(stage, "3"))
      {
        fprintf(stdout, "%d\n", C);
      } else {
        bitPacker(C);
      }

      if (nextCode < table_size) 
      {
        EncodeEntry* newEntry = (EncodeEntry *)malloc(sizeof(EncodeEntry));
        if (newEntry == NULL)
        {
          fprintf(stderr, "Memory allocation for new string table entry failed\n");
          freeTable(hashTable, codeToEntry);
          exit(1);
        }
        newEntry -> code = nextCode++;
        newEntry -> prefix = C;
        newEntry -> character = character;
        newEntry -> next = NULL;

        hashInsert(hashTable, codeToEntry, newEntry);

        if (!strcmp(stage, "3"))
        {
          if (nextCode >> bitLen)
          {
            flushRemBits();
            bitLen++;
          }
        }
      }

      EncodeEntry* emptyPair = hashFind(hashTable, EMPTY, character);
      if (emptyPair == NULL) 
      {
        fprintf(stderr, "Failed to find (EMPTY,K) pair in table\n");
        freeTable(hashTable, codeToEntry);
        exit(1);
      }
      C = emptyPair -> code;
    }
  }
  
  if (C != EMPTY) 
  {
    if (strcmp(stage, "3"))
    {
      fprintf(stdout, "%d\n", C);
    } else {
      bitPacker(C);
      flushRemBits();
    }
  }
  
  if (getenv("DBG") != NULL)
  {
    dumpEncodeTable(hashTable, codeToEntry, table_size);
  }
  
  freeTable(hashTable, codeToEntry);
}

void bitPacker(int code)
{
  int bitsAvailable = BYTE_SIZE - bitIdx;
  bitIdx = 0;

  byte |= code >> (bitLen - bitsAvailable);

  fwrite(&byte, sizeof(unsigned char), 1, stdout);
  byte = 0;

  int remBits = bitLen - bitsAvailable;
  code &= (1 << remBits) - 1;

  while (bitIdx == 0) 
  {
    if (remBits >= BYTE_SIZE)
    {
      byte = code >> (remBits - BYTE_SIZE);
      fwrite(&byte, sizeof(unsigned char), 1, stdout);
      if (remBits == BYTE_SIZE)
      {
        byte = 0;
        break;
      }
      remBits -= BYTE_SIZE;
      code &= (1 << remBits) - 1;
    } else {
      byte = 0;
      byte |= code << (BYTE_SIZE - remBits);
      bitIdx = remBits;
    }
  }
}

void flushRemBits()
{
  if (bitIdx > 0)
  {
    fwrite(&byte, sizeof(unsigned char), 1, stdout);
    byte = 0;
    bitIdx = 0;
  }
}


void printEncodeEntry(EncodeEntry* entry) 
{
  if (entry == NULL) 
  {
    printf("NULL EncodeEntry\n");
    return;
  }

  printf("EncodeEntry:\n");
  printf("  code: %d\n", entry -> code);
  printf("  prefix: %d\n", entry -> prefix);
  printf("  character: %d\n", entry -> character);
  printf("  next: %p\n", entry -> next);
}

void dumpEncodeTable(EncodeEntry **hashTable, 
                    EncodeEntry **codeToEntry, 
                    int table_size) 
{
  char filename[256];
  snprintf(filename, sizeof(filename), "./DBG.%s", "encode");

  FILE* fp = fopen(filename, "w");
  if (!fp) 
  {
    fprintf(stderr, "Error opening file %s: %s (errno: %d)\n", 
            filename, strerror(errno), errno);
    return;
  }

  fprintf(fp, "CODE\tPREF\tCHAR\tString\n");

  if (!codeToEntry) 
  {
    fprintf(stderr, "ERROR: codeToEntry is NULL\n");
    fclose(fp);
    return;
  }

  for (int code = 0; code < table_size; code++) 
  {
    EncodeEntry* curr = codeToEntry[code];
    
    if (!curr) continue;

    char* string = calloc(table_size + 1, sizeof(char));
    if (!string) 
    {
      fprintf(stderr, "Memory allocation failed for string buffer\n");
      fclose(fp);
      return;
    }

    int len = 0;

    if (len < table_size) 
    {
      string[len++] = curr -> character;
    }

    int prefix = curr -> prefix;
    int safety = 0;
    while (prefix != EMPTY && len < table_size && safety++ < table_size) 
    {
      if (prefix < 0 || prefix >= table_size) 
      {
        fprintf(stderr, "Invalid prefix value: %d\n", prefix);
        free(string);
        continue;
      }

      EncodeEntry* prefix_entry = codeToEntry[prefix];
      if (!prefix_entry) 
      {
        fprintf(stderr, "NULL prefix entry for code %d\n", prefix);
        free(string);
        continue;
      }

      string[len++] = prefix_entry -> character;
      prefix = prefix_entry -> prefix;
    }

    if (fprintf(fp, "%d\t%d\t%d\t", 
                code, curr -> prefix, curr -> character) < 0)
    {
      fprintf(stderr, "Error writing entry to file\n");
      free(string);
      fclose(fp);
      return; 
    }
    
    for (int j = len - 1; j >= 0; j--) 
    {
      fputc(string[j], fp);
    }
    fprintf(fp, "\n");

    free(string);
  }

  fclose(fp);
}