DLX-Microprocessor / scripts / dlxasm.pl
dlxasm.pl
Raw
#!/usr/bin/perl
#
# Assemble dlx code into machine code.  This program only works for a single
# file, and generates binary code that can be loaded into a simulator.
#
# Copyright (C) 1999 by Ethan L. Miller
#
# The main data structure is the symbol table that tracks the symbols defined
# in the code.  They're defined in the first pass and used in the second
# pass.
#
# $Id: dlxasm,v 1.3 2003/10/03 08:09:20 elm Exp $
#

use Getopt::Long;

# Parse options.  These include the file to assemble, output file, and
# start locations for text (code) & data.
$pn = $0;
$pn =~ s?.*\/??g;
$debug = 0;
$asmfile = "";
$symfile = "";
$listfile = "";
$startlabel="_main";
$exefile = "";
$executable = "";
$exemagic = 0x444c5821;

sub printusageandexit {
  die ("Unrecognized option.  Correct usage is $pn <srcfile> with options:\n" .
       "\t-output <dstfile> : specify output file\n" .
       "\t-init <startsymbol> : initial location to start exeuction\n" .
       "\t-sym <symfile> : file to print a symbol table in\n" .
       "\t-list <listfile> : file to print a listing in\n" .
       "\t-debug : turn on debugging\n" .
       "NOTE: options may be specified with just the first letter if desired.\n");
};

&GetOptions ("debug" => \$debug,
	     "o=s" => \$exefile,
	     "executable=s" => \$exefile,
	     "sym=s" => \$symfile,
	     "list=s" => \$listfile,
	     "init=s" => \$startlabel,
	     "X=s" => \$asmfile,
	     "help" => \&printusageandexit,
	    );

&printusageandexit if ($#ARGV != 0);

$srcfile = pop @ARGV;
if ($srcfile =~ /^(.*)\.dlx$/) {
  $exefile = $1 . ".exe" if $exefile eq "";
} else {
  $exefile = $srcfile . ".exe" if $exefile eq "";
}

%instTbl = (
# Register-register instructions
  "sll"  => "r,0x04",
  "srl"  => "r,0x06",
  "sra"  => "r,0x07",
  "add"  => "r,0x20",
  "addu" => "r,0x21",
  "sub"  => "r,0x22",
  "subu" => "r,0x23",
  "and"  => "r,0x24",
  "or"   => "r,0x25",
  "xor"  => "r,0x26",
  "seq"  => "r,0x28",
  "sne"  => "r,0x29",
  "slt"  => "r,0x2a",
  "sgt"  => "r,0x2b",
  "sle"  => "r,0x2c",
  "sge"  => "r,0x2d",
  "movi2s" => "r2,0x30",
  "movs2i" => "r2,0x31",
  "movf" => "r2,0x32",
  "movd" => "r2,0x33",
  "movfp2i" => "r2,0x34",
  "movi2fp" => "r2,0x35",
  "movi2t" => "r,0x36",
  "movt2i" => "r,0x37",
  "sltu"  => "r,0x3a",
  "sgtu"  => "r,0x3b",
  "sleu"  => "r,0x3c",
  "sgeu"  => "r,0x3d",
# Floating-point instructions
  "addf"  => "f,0x00",
  "subf"  => "f,0x01",
  "multf" => "f,0x02",
  "divf"  => "f,0x03",
  "addd"  => "f,0x04",
  "subd"  => "f,0x05",
  "multd" => "f,0x06",
  "divd"  => "f,0x07",
  "cvtf2d" => "fd,0x08",
  "cvtf2i" => "fd,0x09",
  "cvtd2f" => "fd,0x0a",
  "cvtd2i" => "fd,0x0b",
  "cvti2f" => "fd,0x0c",
  "cvti2d" => "fd,0x0d",
  "mult"  => "f,0x0e",
  "div"   => "f,0x0f",
  "eqf"   => "f2,0x10",
  "nef"   => "f2,0x11",
  "ltf"   => "f2,0x12",
  "gtf"   => "f2,0x13",
  "lef"   => "f2,0x14",
  "gef"   => "f2,0x15",
  "multu" => "f,0x16",
  "divu"  => "f,0x17",
  "eqd"   => "f2,0x18",
  "ned"   => "f2,0x19",
  "ltd"   => "f2,0x1a",
  "gtd"   => "f2,0x1b",
  "led"   => "f2,0x1c",
  "ged"   => "f2,0x1d",
# General instructions
  "j"    => "j,0x02",
  "jal"  => "j,0x03",
  "beqz" => "b,0x04",
  "bnez" => "b,0x05",
  "bfpt" => "b0,0x06",
  "bfpf" => "b0,0x07",
  "addi" => "i,0x08",
  "addui" => "i,0x09",
  "subi" => "i,0x0a",
  "subui" => "i,0x0b",
  "andi" => "i,0x0c",
  "ori"  => "i,0x0d",
  "xori" => "i,0x0e",
  "lhi"  => "i1,0x0f",
  "rfe"  => "n,0x10",
  "trap" => "t,0x11",
  "jr"   => "jr,0x12",
  "jalr" => "jr,0x13",
  "slli" => "i,0x14",
  "nop"  => "n,0x15",
  "srli" => "i,0x16",
  "srai" => "i,0x17",
  "seqi" => "i,0x18",
  "snei" => "i,0x19",
  "slti" => "i,0x1a",
  "sgti" => "i,0x1b",
  "slei" => "i,0x1c",
  "sgei" => "i,0x1d",
  "lb"   => "l,0x20",
  "lh"   => "l,0x21",
  "lw"   => "l,0x23",
  "lbu"  => "l,0x24",
  "lhu"  => "l,0x25",
  "lf"   => "l,0x26",
  "ld"   => "l,0x27",
  "sb"   => "s,0x28",
  "sh"   => "s,0x29",
  "sw"   => "s,0x2b",
  "sf"   => "s,0x2e",
  "sd"   => "s,0x2f",
  "itlb" => "n,0x38",
  "sltui" => "i,0x3a",
  "sgtui" => "i,0x3b",
  "sleui" => "i,0x3c",
  "sgeui" => "i,0x3d",
);
%specialreg = ("pc" => 0,
	       "ir31" => 2,
	       "isr" => 3,
	       "iar" => 4,
	       "status" => 5,
	       "cause" => 6,
	       "intrvec" => 8,
	       "fault" => 9,
	       "ptbase" => 16,
	       "ptsize" => 17,
	       "ptbits" => 18,
	       "tlbentry" => 20,
	       "tlbvaddr" => 21,
	       "tlbpaddr" => 22,
	      );

# Do pass one.  In this pass, we just figure out label values.  To allow
# for relocation as late as possible, both text and data labels are
# computed as offsets from start of text or data.

$section = "t";
$start{"t"} = $start{"d"} = -1;
open (SRC, $srcfile) or die "Couldn't open $srcfile for assembly.";
open (ASM, ">$asmfile") or die "Couldn't open $asmfile for output."
  if $asmfile ne "";
open (EXE, ">$exefile") or die "Couldn't open $exefile for output." if
  $exefile ne "";
open (HDR, ">$exefile.hdr") or die "Couldn't open $exefile.hdr for output." if
  $exefile ne "";


for ($pass = 1; $pass <= 2; $pass++) {
  if ($pass != 1) {
    $maxdaddr = $addr{"d"};
    $maxtaddr = $addr{"t"};
    if (defined $val{$startlabel}) {
      $startloc = $val{$startlabel};
    } else {
      $startloc = $textstart;
    }
    $endaddr = (($maxtaddr > $maxdaddr) ? $maxtaddr : $maxdaddr);
    if ($start{"t"} == -1) {
      $start{"t"} = 0;
    }
    if ($start{"d"} == -1) {
      $start{"d"} = 0;
    }
    if ($asmfile ne "") {
      printf ASM "start:%08x %08x ", $startloc, $endaddr;
      printf (ASM "%08x %08x %08x %08x\n", $start{"t"},
	      $maxtaddr-$start{"t"}, $start{"d"}, $maxdaddr-$start{"d"});
    }
    if ($listfile ne "") {
      open (LISTING, ">$listfile") or
	die "Couldn't open $listfile for output.";
      printf LISTING "%5s  %8s\t%8s\n", "line", "address", "contents";
    }
    if ($exefile ne "") {
      $executable = "\x00" x $endaddr;
      $hdr = pack ("L*", $exemagic, $endaddr, $startloc,
		   $start{"t"}, $maxtaddr-$start{"t"},
		   $start{"d"}, $maxdaddr-$start{"d"},
		   $start{"b"}, $bsslen);
      # substr ($executable, 0, length ($hdr), $hdr);
    }
  }
  $addr{"d"} = 0;
  $addr{"t"} = 0;
  seek (SRC, 0, 0);
  print "Starting pass $pass.\n";
  $lineno = 0;
line:
  while (<SRC>) {
    $lineno++;
    $curaddr = $addr{$section};
    $out = "";
    # remove leading whitespace
    s/^\s+//;
    $curline = $_;
    chomp $curline;
    # skip comments
    if (/^\;/) {
      if (($pass == 2) and ($listfile ne "")) {
	printf LISTING "%5d  %20s%s\n", $lineno, "", $curline;
      }
      next line;
    }
    # Do an operation based on the first word on the line
    /^([a-zA-Z0-9:_.]+)/;
    if ($1 eq "") {
      next line;
    }
    $op = $1;
    print STDERR "Op is '$op'\n" if ($debug);
    if ($op =~ /^([a-zA-Z0-9_]+)\:$/) {
      if ($pass == 1) {
	# set label value
	$val{$1} = $addr{$section};
      }
    } elsif (/^[a-zA-Z]+/) {
      if ($pass == 1) {
	if ($section eq "d") {
	  warn "Instructions not allowed in data segment " .
	    "(at line $lineno)\n";
	  $error = 1;
	}
      } else {
	# Handle instructions for second pass.  This means outputting
	# the correct code.
	$out = pack ("N", &forminstr ($_));
      }
      $addr{$section} += 4;
    } elsif (/^\.(text|data)/) {
      s/\;.*$//;
      $tmp = "";
      ($section, $tmp) = split (/\s+/, $_, 3);
      $section = substr($section, 1, 1);
      if ($tmp ne "") {
	if ($tmp =~ /^0/) {
	  $addr{$section} = oct ($tmp);
	} else {
	  $addr{$section} = $tmp;
	}
	if ($start{$section} == -1) {
	  $start{$section} = $addr{$section};
	}
      }
      print "Section $section now at $addr{$section}.\n" if ($debug);
    } elsif (/^\.(proc|endproc|global)/) {
      # Ignore directives - we don't need them yet
    } elsif (/^\.space/) {
      # .space simply adds to the address pointer
      ($op, $n, $rest) = split (/\s+/, $_, 3);
      print "SPACE: line is '$_' op = $op, n = $n, rest = $rest\n" if ($debug);
      if ($section eq "t") {
	warn ".space can't be used in the text segment " .
	  "(at line $lineno)!\n";
	$error = 1;
      }
      $addr{$section} += $n;
    } elsif (/^\.ascii(z?)/) {
      $out = &getascii ($_, ($1 eq "z"));
      $addr{$section} += length ($out);
    } elsif (/^\.(byte|word|float|double)/) {
      # Add one byte for each entry.  The first "entry" is the word
      # itself, so pop it off.
      my $tp = $1;
      s /\;.*$//;
      my @args = split (/\s*,\s*/);
      $args[0] =~ s/\.[a-z]+\s+//;
      if ($tp =~ /byte|word/) {
	for ($i = 0; $i <= $#args; $i++) {
	  $args[$i] = &getimm ($args[$i]);
	}
      }
      $out = pack ("C*", @args) if ($tp eq "byte");
      $out = pack ("N*", @args) if ($tp eq "word");
      $out = pack ("f*", @args) if ($tp eq "float");
      $out = pack ("d*", @args) if ($tp eq "double");
      $addr{$section} += length ($out);
    } elsif (/^\.align/) {
      # Align so that the lowest n bits are all 0's.
      ($op, $n, $rest) = split (/\s+/, $_, 3);
      my $mask = (1 << $n) - 1;
      # This will leave things alone if the address is already
      # correctly aligned, and align to the next possible point if
      # it's not aligned.
      if (($addr{$section} & $mask) != 0) {
	$addr{$section} += $mask;
	$addr{$section} &= ~$mask;
	# Force the next line to include an address at the start
	$prevaddr = $addr{$section};
      }
    }
    if ($pass == 2) {
      if ($out ne "") {
	# Output the current value
	if ($asmfile ne "") {
	  if ($curaddr != ($prevaddr + 4)) {
	    printf (ASM "%08x", $curaddr);
	  }
	  my $data = unpack ("H*", $out);
	  my $j;
	  for ($j = 0; $j < length ($data); $j += 8) {
	    print ASM ":" . substr ($data,$j,8) . "\n";
	  }
	  $prevaddr = $curaddr;
	}
	if ($exefile ne "") {
	  substr ($executable, $curaddr, length ($out), $out);
	}
      }
      if ($listfile ne "") {
	# Generate the list of numbers to output
	my $data = unpack ("H*", $out);
	my $i = $curaddr;
	my $j;
	$data = " " if ($data eq "");
	for ($j = 0; $j < length ($data); $j += 8) {
	  printf (LISTING "%5d  %08x  %-8s\t%s\n", $lineno,
		  $i, substr ($data, $j, 8), $curline);
	  $i += 4;
	  $curline = "";
	}
      }
    }
  }
  if ($error == 1) {
    die "Errors occurred during assembly.  Exiting....\n";
  }
  if (($pass == 1) && ($symfile ne "")) {
    open (SYM, ">$symfile") or die "Couldn't open symbol file $symfile.";
    foreach $sym (sort keys %val) {
      printf SYM "%-20s %08x\n", $sym, $val{$sym};
    }
    close SYM;
  }
}

printf ("Last text address: 0x%x\n", $addr{'t'});
printf ("Last data address: 0x%x\n", $addr{'d'});

close ASM if $asmfile ne "";
print HDR $hdr if $exefile ne "";
close HDR if $exefile ne "";
print EXE $executable if $exefile ne "";
close EXE if $exefile ne "";

exit;

sub getreg {
  my $r = lc (@_[0]);
  my $rnum = -1;
  if ($r =~ /^[f$r]([0-9]+)/) {
    $rnum = $1;
  } elsif (defined $specialreg{$r}) {
    $rnum = $specialreg{$r};
  }
  if ($rnum == -1) {
    warn "Illegal register number ($r) at line $lineno.\n";
    $rnum = 0;
  }
  return ($rnum);
}

sub getimm {
  my $imm = @_[0];
  $imm =~ s/#//g;
  my @p = split (/\b/, $imm);
  my ($ival, $i);
  for ($i = 0; $i <= $#p; $i++) {
    if ($p[$i] =~ /^[_a-zA-Z]/) {
      # Look up value in symbol table, and replace it
      if (! defined ($val{$p[$i]})) {
	if ($pass != 1) {
	  warn "Undefined symbol: $p[$i]\n";
	}
	$p[$i] = 0;
      } else {
	$p[$i] = $val{$p[$i]};
      }
    }
  }
  $ival = eval (join ("", @p));
  return ($ival);
}

sub forminstr {
  my ($itype, $op);
  my ($src1, $src2, $dst, $out);
  my @a;
  chomp @_[0];
  @_[0] =~ s/\;.*$//;
  @a = split (/[\s,]+/, @_[0]);
  ($itype,$op) = split (/,/, $instTbl{$a[0]});
  $itype = lc ($itype);
  if ($itype eq "") {
    warn "Illegal instruction ($a[0]) at line $lineno\n";
  }
  $op = hex ($op);
  if ($itype =~ /^r/) {
    $src1 = &getreg ($a[2]);
    if ($itype eq "r") {
      $src2 = &getreg ($a[3]);
    } else {
      $src2 = 0;
    }
    $dst = &getreg ($a[1]);
    $out = 0x00000000 | ($src1 << 21) | ($src2 << 16) | ($dst << 11) |
      $op;
  } elsif ($itype eq "i") {
    $src1 = &getreg ($a[2]);
    $dst = &getreg ($a[1]);
    $src2 = &getimm ($a[3]);
    $out = ($op << 26) | ($src1 << 21) | ($dst << 16) | ($src2 & 0xffff);
  } elsif ($itype eq "i1") {
    # Immediates with a single operand
    $dst = &getreg ($a[1]);
    $src2 = &getimm ($a[2]);
    $out = ($op << 26) | ($dst << 16) | ($src2 & 0xffff);
  } elsif ($itype eq "n") {
    # Instructions with no operands
    $out = ($op << 26);
  } elsif (($itype eq "s") || ($itype eq "l")) {
    # load and store operations
    if ($itype eq "s") {
      $src1 = $a[1];
      $dst = &getreg ($a[2]);
    } else {
      $src1 = $a[2];
      $dst = &getreg($a[1]);
    }
    $src1 =~ /(.*)\((r[0-9]+)\)$/;
    if ($1 ne "") {
      $src2 = &getimm ($1);
    } else {
      $src2 = 0;
    }
    $src1 = &getreg ($2);
    $out = ($op << 26) | ($src1 << 21) | ($dst << 16) | ($src2 & 0xffff);
  } elsif ($itype =~ /^f/) {
    # floating point operations
    if ($itype eq "f") {
      $dst = &getreg ($a[1]);
      $src1 = &getreg ($a[2]);
      $src2 = &getreg ($a[3]);
    } elsif ($itype eq "f2") {
      $src1 = &getreg ($a[1]);
      $src2 = &getreg ($a[2]);
      $dst = 0;
    } else {
      # type fd
      $dst = &getreg ($a[1]);
      $src1 = &getreg ($a[2]);
      $src2 = 0;
    }
    $out = 0x04000000 | ($src1 << 21) | ($src2 << 16) | ($dst << 11) | $op;
  } elsif ($itype =~ /^b/) {
    if ($itype eq "b") {
      $src1 = &getreg ($a[1]);
      $dst = &getimm ($a[2]);
    } else {	# b0 - branches w/o operands
      $src1 = 0;
      $dst = &getimm ($a[1]);
	   }
    $dst -= $addr{t} + 4;
    $out = ($op << 26) | ($src1 << 21) | ($dst & 0xffff);
  } elsif ($itype eq "j") {
    $dst = &getimm ($a[1]);
    $dst -= $addr{t} + 4;
    $out = ($op << 26) | ($dst & 0x3ffffff);
  } elsif ($itype eq "jr") {
    $dst = &getreg ($a[1]);
    $out = ($op << 26) | ($dst << 21);
  } elsif ($itype eq "t") {
    $dst = &getimm ($a[1]);
    $out = ($op << 26) | ($dst & 0x3ffffff);
  }
  return ($out);
}

sub getascii {
  local $val;
  local $str = @_[0];
  local $zpad = @_[1];
  $str =~ s/^\.ascii(z?)\s+//;
  $str = "\$val = " . $str;
  eval ($str);
  local $pstr = "a*";
  $pstr = "a*x" if ($zpad);
  return (pack ($pstr, $val));
}