CS-PROJECTS / c02_assembler / ASMParser.c
ASMParser.c
Raw
#include "ASMParser.h"
#include "SymbolTable.h"

#include <inttypes.h>
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>

// On my honor:
//
// - I have not discussed the C language code in my program with
// anyone other than my instructor or the teaching assistants
// assigned to this course.
//
// - I have not used C language code obtained from another student,
// the Internet, or any other unauthorized source, either modified
// or unmodified.
//
// - If any C language code or documentation used in my program
// was obtained from an authorized source, such as a text book or
// course notes, that has been clearly noted with a proper citation
// in the comments of my program.
//
// - I have not designed this program in such a way as to defeat or
// interfere with the normal operation of the grading code.
//
// <Jordan Harrington>
// <jordanha23>

/***  Add include directives for here as needed.  ***/
static struct
{
   char *mnem;
   char *address;
} registers[] = {
    {"$at", "00001"},
    {"$a0", "00100"},
    {"$a1", "00101"},
    {"$a2", "00110"},
    {"$a3", "00111"},
    {"$t0", "01000"},
    {"$t1", "01001"},
    {"$t2", "01010"},
    {"$t3", "01011"},
    {"$t4", "01100"},
    {"$t5", "01101"},
    {"$t6", "01110"},
    {"$t7", "01111"},
    {"$t8", "11000"},
    {"$t9", "11001"},
    {"$s0", "10000"},
    {"$s1", "10001"},
    {"$s2", "10010"},
    {"$s3", "10011"},
    {"$s4", "10100"},
    {"$s5", "10101"},
    {"$s6", "10110"},
    {"$s7", "10111"},
    {"$v0", "00010"},
    {"$v1", "00011"},
    {"$zero", "00000"},
    {"$k0", "11010"},
    {"$k1", "11011"},
    {NULL, 0}};

static struct
{
   char *mnem;
   char *opCode;
} op[] = {
    {"lw", "100011"},
    {"sub", "000000"},
    {"add", "000000"},
    {"nor", "000000"},
    {"mult", "000000"},
    {"syscall", "000000"},
    {"mul", "011100"},
    {"slti", "001010"},
    {"lui", "001111"},
    {"addi", "001000"},
    {"la", "001000"},
    {"beq", "000100"},
    {"bne", "000101"},
    {"sw", "101011"},
    {"nop", "000000"},
    {"sll", "000000"},
    {"sra", "000000"},
    {"srav", "000000"},
    {"slt", "000000"},
    {"blt", "101010"},
    {"j", "000010"},
    {"addu", "000000"},
    {"move", "000000"},
    {"addiu", "001001"},
    {"li", "001001"},
    {"blez", "000110"},
    {"bgtz", "000111"},
    {NULL, 0}};

static struct
{
   char *mnem;
   char *funcCode;
} func[] = {
    {"sub", "100010"},
    {"slt", "101010"},
    {"addu", "100001"},
    {"move", "100001"},
    {"sra", "000011"},
    {"srav", "000111"},
    {"add", "100000"},
    {"mult", "011000"},
    {"nor", "100111"},
    {"mul", "000010"},
    {"syscall", "001100"},
    {"sll", "000000"},
    {"nop", "000000"},
    {NULL, 0}};

static struct
{
   char *mnem;
   char *mType;
} type[] = {
    {"sub", "r"},
    {"nor", "r"},
    {"add", "r"},
    {"syscall", "r"},
    {"mul", "r"},
    {"mult", "s"},
    {"lw", "i"},
    {"lui", "i"},
    {"addi", "i"},
    {"slti", "i"},
    {"la", "i"},
    {"beq", "i"},
    {"bne", "i"},
    {"sw", "i"},
    {"nop", "r"},
    {"sll", "r"},
    {"sra", "r"},
    {"srav", "r"},
    {"slt", "r"},
    {"blt", "r"},
    {"j", "ju"},
    {"addu", "r"},
    {"move", "r"},
    {"addiu", "i"},
    {"li", "i"},
    {"blez", "i"},
    {"bgtz", "i"},
    {NULL, 0}};

#define REG_LENGTH 4
#define OP_FUNC_CODE_LENGTH 7
#define REG_ADD_LENGTH 6
#define TARGET_LENGTH 27

static bool blt = false;

char *findReg(char *inp);
char *findFunc(char *inp);
char *findOp(char *inp);
char *findType(char *inp);
uint8_t binaryToDec(char *addy);
uint16_t findInstructionNum(char *word);
uint16_t findImm(char *word);
char *unDecToBinary(char *word);

ParseResult *parseASM(const char *const pASM, uint16_t instructionNumber)
{
   // Create ParseResult
   ParseResult *res = calloc(1, sizeof(ParseResult));
   res->ASMInstruction = calloc(strlen(pASM) + 1, sizeof(char));
   strcpy(res->ASMInstruction, pASM);

   res->rd = 0;
   res->rs = 0;
   res->rt = 0;

   char *line = calloc(strlen(pASM) + 1, sizeof(char)),
        *str[4];
   strcpy(line, pASM);

   // Grabs arguments
   int i = 0;
   str[i] = strtok(line, " ,\t");
   while (str[i++])
      str[i] = strtok(NULL, " ,\t");

   // Sets mnemonic for type checking
   res->Mnemonic = calloc(strlen(str[0]) + 1, sizeof(char));
   strcpy(res->Mnemonic, (blt) ? "bne" : str[0]);


   // Allocates fields of ParseResult based on instruction type
   if (!strcmp(findType(res->Mnemonic), "ju"))
      res->Opcode = calloc(OP_FUNC_CODE_LENGTH, sizeof(char));
   else if (!strcmp(findType(res->Mnemonic), "i"))
   {
      res->RT = calloc(REG_ADD_LENGTH, sizeof(char));
      res->RS = calloc(REG_ADD_LENGTH, sizeof(char));
      res->Opcode = calloc(OP_FUNC_CODE_LENGTH, sizeof(char));
      res->rtName = calloc(REG_LENGTH, sizeof(char));
      res->rsName = calloc(REG_LENGTH, sizeof(char));
      res->IMM = calloc(IMM_LENGTH, sizeof(char));
   }
   else if (!strcmp(findType(res->Mnemonic), "r"))
   {
      res->rtName = calloc(REG_LENGTH, sizeof(char));
      res->rsName = calloc(REG_LENGTH, sizeof(char));
      res->rdName = calloc(REG_LENGTH, sizeof(char));
      res->Funct = calloc(OP_FUNC_CODE_LENGTH, sizeof(char));
      res->Opcode = calloc(OP_FUNC_CODE_LENGTH, sizeof(char));
      res->RT = calloc(REG_ADD_LENGTH, sizeof(char));
      res->RS = calloc(REG_ADD_LENGTH, sizeof(char));
      res->RD = calloc(REG_ADD_LENGTH, sizeof(char));
      res->shamt = calloc(REG_ADD_LENGTH, sizeof(char));
   }
   else if (!strcmp(findType(res->Mnemonic), "s"))
   {
      res->rtName = calloc(REG_LENGTH, sizeof(char));
      res->rsName = calloc(REG_LENGTH, sizeof(char));
      res->RT = calloc(REG_ADD_LENGTH, sizeof(char));
      res->RS = calloc(REG_ADD_LENGTH, sizeof(char));
      res->RD = calloc(REG_ADD_LENGTH, sizeof(char));
      res->Funct = calloc(OP_FUNC_CODE_LENGTH, sizeof(char));
      res->Opcode = calloc(OP_FUNC_CODE_LENGTH, sizeof(char));
   }

   // R-type

   if (!strcmp(res->Mnemonic, "mul") || !strcmp(str[0], "sub") ||
       !strcmp(str[0], "add") || !strcmp(str[0], "nor") ||
       !strcmp(str[0], "slt") || !strcmp(str[0], "addu"))
   {
      strcpy(res->rdName, str[1]);
      strcpy(res->rsName, str[2]);
      strcpy(res->rtName, str[3]);
      strcpy(res->shamt, "00000");

      strcpy(res->RD, findReg(str[1]));
      strcpy(res->RS, findReg(str[2]));
      strcpy(res->RT, findReg(str[3]));

      strcpy(res->Funct, findFunc(str[0]));
      strcpy(res->Opcode, findOp(str[0]));

      res->rd = binaryToDec(res->RD);
      res->rs = binaryToDec(res->RS);
      res->rt = binaryToDec(res->RT);
   }

   else if (!strcmp(res->Mnemonic, "sll") || !strcmp(res->Mnemonic, "sra"))
   {
      strcpy(res->rdName, str[1]);
      strcpy(res->rtName, str[2]);

      strncpy(res->shamt, &decToBinary(str[3])[11], 6);

      strcpy(res->RD, findReg(str[1]));
      strcpy(res->RT, findReg(str[2]));
      strcpy(res->RS, "00000");

      strcpy(res->Funct, findFunc(str[0]));
      strcpy(res->Opcode, findOp(str[0]));

      res->rd = binaryToDec(res->RD);
      res->rt = binaryToDec(res->RT);
   }

   else if (!strcmp(res->Mnemonic, "move"))
   {
      strcpy(res->rdName, str[1]);
      strcpy(res->rtName, str[2]);
      strcpy(res->shamt, "00000");

      strcpy(res->RD, findReg(str[1]));
      strcpy(res->RT, findReg(str[2]));
      strcpy(res->RS, findReg("$zero"));

      strcpy(res->Funct, findFunc(str[0]));
      strcpy(res->Opcode, findOp(str[0]));

      res->rd = binaryToDec(res->RD);
      res->rs = binaryToDec(res->RS);
      res->rt = binaryToDec(res->RT);
   }

   else if (!strcmp(res->Mnemonic, "srav"))
   {
      strcpy(res->rdName, str[1]);
      strcpy(res->rsName, str[3]);
      strcpy(res->rtName, str[2]);
      strcpy(res->shamt, "00000");

      strcpy(res->RD, findReg(str[1]));
      strcpy(res->RS, findReg(str[3]));
      strcpy(res->RT, findReg(str[2]));

      strcpy(res->Funct, findFunc(str[0]));
      strcpy(res->Opcode, findOp(str[0]));

      res->rd = binaryToDec(res->RD);
      res->rs = binaryToDec(res->RS);
      res->rt = binaryToDec(res->RT);
   }

   else if (!strcmp(res->Mnemonic, "syscall"))
   {
      strcpy(res->RD, "00000");
      strcpy(res->RS, "00000");
      strcpy(res->RT, "00000");
      strcpy(res->shamt, "00000");
      strcpy(res->Funct, findFunc(str[0]));
      strcpy(res->Opcode, findOp(str[0]));
   }

   else if (!strcmp(res->Mnemonic, "nop"))
   {
      strcpy(res->shamt, "00000");

      strcpy(res->RD, findReg("$zero"));
      strcpy(res->RT, findReg("$zero"));
      strcpy(res->RS, "00000");

      strcpy(res->Funct, findFunc(str[0]));
      strcpy(res->Opcode, findOp(str[0]));

      res->rd = binaryToDec(res->RD);
      res->rt = binaryToDec(res->RT);
   }

   else if (!strcmp(res->Mnemonic, "blt"))
   {
      strcpy(res->rdName, "$at");
      strcpy(res->rsName, str[1]);
      strcpy(res->rtName, str[2]);
      strcpy(res->shamt, "00000");
      strcpy(res->RD, findReg("$at"));
      strcpy(res->RS, findReg(str[1]));
      strcpy(res->RT, findReg(str[2]));
      strcpy(res->Funct, findFunc("slt"));
      strcpy(res->Opcode, findOp("slt"));

      res->rd = binaryToDec(res->RD);
      res->rs = binaryToDec(res->RS);
      res->rt = binaryToDec(res->RT);

      blt = true;
   }

   // I-type

   else if (!strcmp(res->Mnemonic, "blez") || !strcmp(res->Mnemonic, "bgtz"))
   {
      strcpy(res->rsName, str[1]);
      strcpy(res->RS, findReg(str[1]));
      strcpy(res->RT, findReg("$zero"));
      strcpy(res->Opcode, findOp(str[0]));

      res->rs = binaryToDec(res->RS);
      res->rt = binaryToDec(res->RT);

      int16_t symbolInstructNum = findInstructionNum(str[2]);
      int16_t i = (symbolInstructNum - instructionNumber) - 1;

      char *imm = calloc(TARGET_LENGTH, sizeof(char));
      snprintf(imm, sizeof(char *), "%d", i);
      char *bin = decToBinary(imm);
      strcpy(res->IMM, bin);

      free(bin);
      free(imm);

      res->Imm = i;
   }

   else if (!strcmp(res->Mnemonic, "bne") || !strcmp(res->Mnemonic, "beq"))
   {

      if (blt)
      {
         strcpy(res->rsName, "$at");
         strcpy(res->RS, findReg("$at"));
         strcpy(res->RT, findReg("$zero"));
         strcpy(res->Opcode, findOp("bne"));

         res->rs = binaryToDec(res->RS);
         res->rt = binaryToDec(res->RT);

         int16_t symbolInstructNum = findInstructionNum(str[3]);
         int16_t i = (symbolInstructNum - instructionNumber);

         if (i < 0)
            i--;

         char *imm = calloc(TARGET_LENGTH, sizeof(char));
         snprintf(imm, sizeof(char *), "%d", i);
         char *bin = decToBinary(imm);
         strcpy(res->IMM, bin);

         free(bin);
         free(imm);

         res->Imm = i;

         blt = false;
      }
      else
      {
         strcpy(res->rsName, str[1]);
         strcpy(res->rtName, str[2]);
         strcpy(res->RS, findReg(str[1]));
         strcpy(res->RT, findReg(str[2]));
         strcpy(res->Opcode, findOp(str[0]));

         res->rs = binaryToDec(res->RS);
         res->rt = binaryToDec(res->RT);

         int16_t symbolInstructNum = findInstructionNum(str[3]);
         int16_t i = (symbolInstructNum - instructionNumber) - 1;

         char *imm = calloc(TARGET_LENGTH, sizeof(char));
         snprintf(imm, sizeof(char *), "%d", i);

         char *bin = decToBinary(imm);
         strcpy(res->IMM, bin);

         free(bin);
         free(imm);

         res->Imm = i;
      }
   }

   else if (!strcmp(res->Mnemonic, "li"))
   {
      strcpy(res->rtName, str[1]);
      strcpy(res->RT, findReg(str[1]));
      strcpy(res->RS, findReg("$zero"));
      strcpy(res->Opcode, findOp(str[0]));

      res->rs = binaryToDec(res->RS);
      res->rt = binaryToDec(res->RT);

      char *bin = unDecToBinary(str[2]);

      strcpy(res->IMM, bin);
      free(bin);

      res->Imm = strtoul(str[2], NULL, 10);

      res->rs = binaryToDec(res->RS);
      res->rt = binaryToDec(res->RT);
   }

   else if (!strcmp(res->Mnemonic, "la"))
   {
      strcpy(res->rtName, str[1]);
      strcpy(res->RT, findReg(str[1]));
      strcpy(res->RS, findReg("$zero"));
      strcpy(res->Opcode, findOp(str[0]));

      res->rs = binaryToDec(res->RS);
      res->rt = binaryToDec(res->RT);

      uint16_t i = findImm(str[2]);

      char *imm = calloc(TARGET_LENGTH, sizeof(char));
      snprintf(imm, sizeof(char *), "%d", i);

      char *bin = decToBinary(imm);
      strcpy(res->IMM, bin);

      free(bin);
      free(imm);

      res->Imm = i;
   }

   else if (!strcmp(res->Mnemonic, "addi") || !strcmp(res->Mnemonic, "slti") ||
            !strcmp(res->Mnemonic, "addiu"))
   {
      strcpy(res->rtName, str[1]);
      strcpy(res->rsName, str[2]);
      strcpy(res->RT, findReg(str[1]));
      strcpy(res->RS, findReg(str[2]));
      strcpy(res->Opcode, findOp(str[0]));

      char *bin;
      if (!strcmp(res->Mnemonic, "addiu"))
         bin = unDecToBinary(str[3]);
      else
         bin = decToBinary(str[3]);

      strcpy(res->IMM, bin);
      free(bin);

      if (!strcmp(res->Mnemonic, "addiu"))
         res->Imm = strtoul(str[3], NULL, 10);
      else
         res->Imm = atol(str[3]);

      res->rs = binaryToDec(res->RS);
      res->rt = binaryToDec(res->RT);
   }

   else if (!strcmp(res->Mnemonic, "lw") || !strcmp(res->Mnemonic, "sw"))
   {
      uint16_t x = 0;
      bool lwLabel = false;
      char *trash = calloc(1, sizeof(char)),
           offset[50], regi[4];

      if (!strcmp(res->Mnemonic, "lw"))
         if ((x = findImm(str[2])) != 0)
            lwLabel = true;

      if (!lwLabel)
      {
         sscanf(str[2], " %[^(]%c %[^)]", offset, trash, regi);

         strcpy(res->rtName, str[1]);
         strcpy(res->rsName, regi);
         strcpy(res->RT, findReg(str[1]));
         strcpy(res->RS, findReg(regi));
         strcpy(res->Opcode, findOp(str[0]));

         char *bin = decToBinary(offset);
         strcpy(res->IMM, bin);
         free(bin);

         res->Imm = atol(offset);
         res->rs = binaryToDec(res->RS);
         res->rt = binaryToDec(res->RT);

         free(trash);
      }
      else
      {
         strcpy(res->rtName, str[1]);
         strcpy(res->RT, findReg(str[1]));
         strcpy(res->Opcode, findOp(str[0]));
         strcpy(res->RS, findReg("$zero"));

         char *imm = calloc(TARGET_LENGTH, sizeof(char));
         snprintf(imm, sizeof(char *), "%d", x);
         char *bin = decToBinary(imm);
         strcpy(res->IMM, bin);

         free(bin);
         free(imm);

         res->Imm = x;
      }
   }

   else if (!strcmp(res->Mnemonic, "lui"))
   {
      strcpy(res->rtName, str[1]);
      strcpy(res->RT, findReg(str[1]));
      strcpy(res->Opcode, findOp(str[0]));
      strcpy(res->RS, "00000");

      free(res->rsName);
      res->rsName = NULL;

      char *bin = decToBinary(str[2]);
      strcpy(res->IMM, bin);
      free(bin);

      res->Imm = atol(str[2]);
      res->rt = binaryToDec(res->RT);
   }

   // S-type

   else if (!strcmp(res->Mnemonic, "mult"))
   {
      strcpy(res->rsName, str[1]);
      strcpy(res->rtName, str[2]);
      strcpy(res->RS, findReg(str[1]));
      strcpy(res->RT, findReg(str[2]));
      strcpy(res->Funct, findFunc(str[0]));
      strcpy(res->Opcode, findOp(str[0]));
      strcpy(res->RD, "00000");

      res->rs = binaryToDec(res->RS);
      res->rt = binaryToDec(res->RT);
   }

   // J-type

   else if (!strcmp(res->Mnemonic, "j"))
   {
      strcpy(res->Opcode, findOp(str[0]));

      int16_t symbolInstructNum = findInstructionNum(str[1]);
      if (symbolInstructNum)
         symbolInstructNum--;

      char *imm = calloc(1, sizeof(char));
      snprintf(imm, sizeof(char *), "%d", symbolInstructNum);
      char *bin = decToBinary26(imm);
      res->Target = bin;

      free(imm);
   }

   free(line);

   return res;
}

uint8_t binaryToDec(char *addy)
{
   int dec = 0;
   while (*addy)
   {
      dec *= 2;
      if (*addy == '1')
         dec++;
      addy++;
   }
   return dec;
}

char *findReg(char *inp)
{
   int i = -1;
   while (registers[++i].mnem)
      if (!strcmp(inp, registers[i].mnem))
         return registers[i].address;
   return NULL;
}

char *findFunc(char *inp)
{
   int i = -1;
   while (func[++i].mnem)
      if (!strcmp(inp, func[i].mnem))
         return func[i].funcCode;
   return NULL;
}

char *findType(char *inp)
{
   int i = -1;
   while (type[++i].mnem)
      if (!strcmp(inp, type[i].mnem))
         return type[i].mType;
   return NULL;
}

char *findOp(char *inp)
{
   int i = -1;
   while (op[++i].mnem)
      if (!strcmp(inp, op[i].mnem))
         return op[i].opCode;
   return NULL;
}

uint16_t findInstructionNum(char *word)
{
   int i = -1;
   while (textSymbols[++i].key)
      if (!strcmp(word, textSymbols[i].key))
         return textSymbols[i].instructionNum;
   return 0;
}