lc4-disassembler / lc4_disassembler.c
lc4_disassembler.c
Raw
#include "lc4_disassembler.h"
#include "lc4_memory.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define GET_OPCODE(X) ((X >> 12) & 0xF)

// Opcodes for all instrs
#define OPCODE_BR 0x0
#define OPCODE_ARI 0x1 // arithmetic
#define OPCODE_CMP 0x2
#define OPCODE_JSR 0x4
#define OPCODE_BIT 0x5 // bitwise
#define OPCODE_LDR 0x6
#define OPCODE_STR 0x7
#define OPCODE_RTI 0x8
#define OPCODE_CON 0x9 // const
#define OPCODE_SHF 0xA // shift
#define OPCODE_JMP 0xC
#define OPCODE_HIC 0xD // hiconst
#define OPCODE_TRA 0xF
#define NUM_OPCODES 13

#define USER_CODE_START 0x0
#define USER_CODE_END 0x1FFF
#define OS_CODE_START 0x8000
#define OS_CODE_END 0x9FFF

int reverse_assemble(row_of_memory *memory) {
  if (!memory) {
    return 0;
  }

  row_of_memory *head = memory;

  unsigned short opcodes[] = {OPCODE_BR,  OPCODE_ARI, OPCODE_CMP, OPCODE_JSR,
                              OPCODE_BIT, OPCODE_LDR, OPCODE_STR, OPCODE_RTI,
                              OPCODE_CON, OPCODE_SHF, OPCODE_JMP, OPCODE_HIC,
                              OPCODE_TRA};

  for (int i = 0; i < NUM_OPCODES; i++) {
    unsigned short op = opcodes[i];
    head = memory; // reset head

    while (head) {
      row_of_memory *node = search_opcode(head, op);

      if (!node) { // end of list
        break;
      }

      // only convert if it's in a valid CODE region
      if (is_valid_region(USER_CODE_START, USER_CODE_END, node->address) == 0 ||
          is_valid_region(OS_CODE_START, OS_CODE_END, node->address) == 0) {

        // only valid inputs will be tested, =NULL if get_assembly returns NULL
        node->assembly = get_assembly(node, memory);
      }

      head = node->next; // repeat this process for the entire LL
    }
  }

  return 0;
}

char *get_assembly(row_of_memory *memory, row_of_memory *head) {
  if (!memory || !memory->contents) {
    return NULL;
  }

  unsigned short instr = memory->contents;
  unsigned short opcode = GET_OPCODE(instr);

  switch (opcode) {
  case OPCODE_BR:
    return parse_branch(memory, head);
  case OPCODE_ARI:
    return parse_arithmetic(instr);
  case OPCODE_CMP:
    return parse_compare(instr);
  case OPCODE_JSR:
    return parse_jump_sub(memory, head);
  case OPCODE_BIT:
    return parse_bitwise(instr);
  case OPCODE_LDR:
  case OPCODE_STR:
    return parse_mem_op(instr); // memory operations
  case OPCODE_RTI:
    return parse_return();
  case OPCODE_CON:
    return parse_const(instr);
  case OPCODE_SHF:
    return parse_shift(instr);
  case OPCODE_JMP:
    return parse_jump(memory, head);
  case OPCODE_HIC:
    return parse_hiconst(instr);
  case OPCODE_TRA:
    return parse_trap(instr);
  default:
    return NULL;
  }
}

char *parse_branch(row_of_memory *curr, row_of_memory *head) {
  char *assembly = malloc(77); // max len of a label = 70
  if (!assembly)
    return NULL;

  unsigned short instr = curr->contents;
  unsigned short n = get_bits(instr, 11, 11);
  unsigned short z = get_bits(instr, 10, 10);
  unsigned short p = get_bits(instr, 9, 9);
  short imm9 = sign_extend(instr, 9);

  row_of_memory *next = search_address(head, curr->address + 1 + imm9);

  if (n || z || p) {
    sprintf(assembly, "BR%s%s%s %s", n ? "n" : "", z ? "z" : "", p ? "p" : "",
            next->label ? next->label : "");
  } else {
    sprintf(assembly, "NOP");
  }

  return assembly;
}

char *parse_arithmetic(unsigned short instr) {
  char *assembly = malloc(20);
  if (!assembly) {
    return NULL;
  }

  unsigned short int subop;
  unsigned short int dest;
  unsigned short int src1;
  short int src2;

  dest = get_bits(instr, 11, 9);
  src1 = get_bits(instr, 8, 6);

  // two cases: 1. bit 5 = 1, parse IMM5; 2. bit 5 = 0, parse Rt
  if (get_bits(instr, 5, 5)) {
    src2 = sign_extend(instr, 5);
    sprintf(assembly, "ADD R%d, R%d, #%d", dest, src1, src2);
  } else {
    subop = get_bits(instr, 5, 3);
    src2 = get_bits(instr, 2, 0);

    switch (subop) {
    case 0:
      sprintf(assembly, "ADD R%d, R%d, R%d", dest, src1, src2);
      break;
    case 1:
      sprintf(assembly, "MUL R%d, R%d, R%d", dest, src1, src2);
      break;
    case 2:
      sprintf(assembly, "SUB R%d, R%d, R%d", dest, src1, src2);
      break;
    case 3:
      sprintf(assembly, "DIV R%d, R%d, R%d", dest, src1, src2);
      break;
    default:
      free(assembly); // unsupported instructions
      return NULL;
    }
  }

  return assembly;
}

char *parse_compare(unsigned short instr) {
  char *assembly = malloc(20);
  if (!assembly)
    return NULL;

  unsigned short subop;
  unsigned short src1;
  short src2;

  subop = get_bits(instr, 8, 7);
  src1 = get_bits(instr, 11, 9);

  // two cases:
  // 1. Rs and Rt
  // 2. Rs and IMM/UIMM7
  switch (subop) {
  case 0:
    src2 = get_bits(instr, 2, 0);
    sprintf(assembly, "CMP R%d, R%d", src1, src2);
    break;
  case 1:
    src2 = get_bits(instr, 2, 0);
    sprintf(assembly, "CMPU R%d, R%d", src1, src2);
    break;
  case 2:
    src2 = sign_extend(instr, 7);
    sprintf(assembly, "CMPI R%d, #%d", src1, src2);
    break;
  case 3:
    src2 = (instr & 0x7F);
    sprintf(assembly, "CMPIU R%d, #%hu", src1, src2);
    break;
  default:
    free(assembly); // unsupported
    return NULL;
  }

  return assembly;
}

char *parse_jump_sub(row_of_memory *curr, row_of_memory *head) {
  char *assembly;
  unsigned short subop;
  unsigned short src;

  unsigned short instr = curr->contents;
  subop = get_bits(instr, 11, 11);

  switch (subop) {
  case 0:
    assembly = malloc(10);
    if (!assembly)
      return NULL;
    src = get_bits(instr, 8, 6);
    sprintf(assembly, "JSRR R%d", src);
    break;
  case 1:
    assembly = malloc(80);
    if (!assembly)
      return NULL;

    src = sign_extend(instr, 11);
    row_of_memory *next =
        search_address(head, ((curr->address) & 0x8000) | (src << 4));
    sprintf(assembly, "JSR %s", next->label ? next->label : "");
    break;
  default:
    return NULL;
  }

  return assembly;
}

char *parse_bitwise(unsigned short instr) {
  char *assembly = malloc(20);
  if (!assembly)
    return NULL;

  unsigned short int subop;
  unsigned short int dest;
  unsigned short int src1;
  short int src2;

  dest = get_bits(instr, 11, 9);
  src1 = get_bits(instr, 8, 6);

  if ((instr >> 5) & 0x1) {
    src2 = sign_extend(instr, 5);
    sprintf(assembly, "AND R%d, R%d, #%d", dest, src1, src2);
  } else {
    subop = get_bits(instr, 5, 3);
    src2 = get_bits(instr, 2, 0);

    switch (subop) {
    case 0:
      sprintf(assembly, "AND R%d, R%d, R%d", dest, src1, src2);
      break;
    case 1:
      sprintf(assembly, "NOT R%d, R%d", dest, src1);
      break;
    case 2:
      sprintf(assembly, "OR R%d, R%d, R%d", dest, src1, src2);
      break;
    case 3:
      sprintf(assembly, "XOR R%d, R%d, R%d", dest, src1, src2);
      break;
    default:
      free(assembly); // unsupported
      return NULL;
    }
  }

  return assembly;
}

char *parse_mem_op(unsigned short instr) {
  char *assembly = malloc(20);
  if (!assembly)
    return NULL;

  unsigned short opcode = GET_OPCODE(instr);
  char operation[4];

  unsigned short int dest = get_bits(instr, 11, 9);
  unsigned short int src1 = get_bits(instr, 8, 6);
  short int src2 = sign_extend(instr, 6);

  switch (opcode) {
  case OPCODE_LDR:
    strcpy(operation, "LDR");
    break;
  case OPCODE_STR:
    strcpy(operation, "STR");
    break;
  default:
    free(assembly);
    return NULL;
  }

  sprintf(assembly, "%s R%d, R%d, #%d", operation, dest, src1, src2);
  return assembly;
}

char *parse_return() {
  char *assembly = malloc(4);
  if (!assembly)
    return NULL;

  sprintf(assembly, "RTI");
  return assembly;
}

char *parse_const(unsigned short instr) {
  char *assembly = malloc(20);
  if (!assembly)
    return NULL;
  unsigned short int rd = get_bits(instr, 11, 9);
  short int imm9 = get_bits(instr, 8, 0);
  sprintf(assembly, "CONST R%d, #%d", rd, imm9);

  return assembly;
}

char *parse_shift(unsigned short instr) {
  char *assembly = malloc(20);
  if (!assembly)
    return NULL;

  // Extract the subopcode for shift instructions (bits 5-4)
  short int opcode = get_bits(instr, 5, 4);

  switch (opcode) {
  case 0:
    sprintf(assembly, "SLL R%d, R%d, #%d", get_bits(instr, 11, 9),
            get_bits(instr, 8, 6), get_bits(instr, 3, 0));
    break;
  case 1:
    sprintf(assembly, "SRA R%d, R%d, #%d", get_bits(instr, 11, 9),
            get_bits(instr, 8, 6), get_bits(instr, 3, 0));
    break;
  case 2:
    sprintf(assembly, "SRL R%d, R%d, #%d", get_bits(instr, 11, 9),
            get_bits(instr, 8, 6), get_bits(instr, 3, 0));
    break;
  case 3:
    sprintf(assembly, "MOD R%d, R%d, R%d", get_bits(instr, 11, 9),
            get_bits(instr, 8, 6), get_bits(instr, 2, 0));
    break;
  default:
    free(assembly);
    return NULL;
  }

  return assembly;
}

char *parse_jump(row_of_memory *curr, row_of_memory *head) {
  char *assembly = malloc(80);
  if (!assembly)
    return NULL;

  unsigned short instr = curr->contents;
  unsigned short subop = get_bits(instr, 11, 11);

  switch (subop) {
  case 0: {
    unsigned short src = get_bits(instr, 8, 6);
    sprintf(assembly, "JMPR R%d", src);
    break;
  }
  case 1: {
    short imm11 = sign_extend(instr & 0x7FF, 11); // Extract imm11
    row_of_memory *next = search_address(head, curr->address + 1 + imm11);
    sprintf(assembly, "JMP %s", next && next->label ? next->label : "");
    break;
  }
  default:
    free(assembly);
    return NULL;
  }

  return assembly;
}

char *parse_hiconst(unsigned short instr) {
  char *assembly = malloc(20);
  if (!assembly)
    return NULL;
  unsigned short int dest = get_bits(instr, 11, 9);
  unsigned short int imm8 = get_bits(instr, 7, 0);
  sprintf(assembly, "HICONST R%d, #%hu", dest, imm8);
  return assembly;
}

char *parse_trap(unsigned short instr) {
  char *assembly = malloc(10);
  if (!assembly)
    return NULL;

  unsigned short inp;
  inp = get_bits(instr, 7, 0);
  sprintf(assembly, "TRAP #%hu", inp);

  return assembly;
}

unsigned short get_bits(unsigned short value, int high_bit, int low_bit) {
  // Calculate number of bits to extract
  int num_bits = high_bit - low_bit + 1;
  // Create a mask of the appropriate width
  unsigned short mask = (1 << num_bits) - 1;
  // Shift down so the desired bits are at the bottom
  return ((value >> low_bit) & mask);
}

short sign_extend(unsigned short instr, int bits) {
  unsigned short mask = (0xFFFF) >> (16 - bits);
  instr &= mask;

  if ((instr >> (bits - 1)) == 1) { // check if the sign bit is set
    unsigned short mask = 0xFFFF & (~((1 << bits) - 1));
    instr |= mask; // set all bits above bit_count to 1
  }

  return (short)instr;
}

int is_valid_region(unsigned short start, unsigned short end,
                    unsigned short address) {
  return !(start <= address && address <= end);
}