#!/usr/bin/perl # # Assemble dlx code into machine code. This program only works for a single # file, and generates binary code that can be loaded into a simulator. # # Copyright (C) 1999 by Ethan L. Miller # # The main data structure is the symbol table that tracks the symbols defined # in the code. They're defined in the first pass and used in the second # pass. # # $Id: dlxasm,v 1.3 2003/10/03 08:09:20 elm Exp $ # use Getopt::Long; # Parse options. These include the file to assemble, output file, and # start locations for text (code) & data. $pn = $0; $pn =~ s?.*\/??g; $debug = 0; $asmfile = ""; $symfile = ""; $listfile = ""; $startlabel="_main"; $exefile = ""; $executable = ""; $exemagic = 0x444c5821; sub printusageandexit { die ("Unrecognized option. Correct usage is $pn with options:\n" . "\t-output : specify output file\n" . "\t-init : initial location to start exeuction\n" . "\t-sym : file to print a symbol table in\n" . "\t-list : file to print a listing in\n" . "\t-debug : turn on debugging\n" . "NOTE: options may be specified with just the first letter if desired.\n"); }; &GetOptions ("debug" => \$debug, "o=s" => \$exefile, "executable=s" => \$exefile, "sym=s" => \$symfile, "list=s" => \$listfile, "init=s" => \$startlabel, "X=s" => \$asmfile, "help" => \&printusageandexit, ); &printusageandexit if ($#ARGV != 0); $srcfile = pop @ARGV; if ($srcfile =~ /^(.*)\.dlx$/) { $exefile = $1 . ".exe" if $exefile eq ""; } else { $exefile = $srcfile . ".exe" if $exefile eq ""; } %instTbl = ( # Register-register instructions "sll" => "r,0x04", "srl" => "r,0x06", "sra" => "r,0x07", "add" => "r,0x20", "addu" => "r,0x21", "sub" => "r,0x22", "subu" => "r,0x23", "and" => "r,0x24", "or" => "r,0x25", "xor" => "r,0x26", "seq" => "r,0x28", "sne" => "r,0x29", "slt" => "r,0x2a", "sgt" => "r,0x2b", "sle" => "r,0x2c", "sge" => "r,0x2d", "movi2s" => "r2,0x30", "movs2i" => "r2,0x31", "movf" => "r2,0x32", "movd" => "r2,0x33", "movfp2i" => "r2,0x34", "movi2fp" => "r2,0x35", "movi2t" => "r,0x36", "movt2i" => "r,0x37", "sltu" => "r,0x3a", "sgtu" => "r,0x3b", "sleu" => "r,0x3c", "sgeu" => "r,0x3d", # Floating-point instructions "addf" => "f,0x00", "subf" => "f,0x01", "multf" => "f,0x02", "divf" => "f,0x03", "addd" => "f,0x04", "subd" => "f,0x05", "multd" => "f,0x06", "divd" => "f,0x07", "cvtf2d" => "fd,0x08", "cvtf2i" => "fd,0x09", "cvtd2f" => "fd,0x0a", "cvtd2i" => "fd,0x0b", "cvti2f" => "fd,0x0c", "cvti2d" => "fd,0x0d", "mult" => "f,0x0e", "div" => "f,0x0f", "eqf" => "f2,0x10", "nef" => "f2,0x11", "ltf" => "f2,0x12", "gtf" => "f2,0x13", "lef" => "f2,0x14", "gef" => "f2,0x15", "multu" => "f,0x16", "divu" => "f,0x17", "eqd" => "f2,0x18", "ned" => "f2,0x19", "ltd" => "f2,0x1a", "gtd" => "f2,0x1b", "led" => "f2,0x1c", "ged" => "f2,0x1d", # General instructions "j" => "j,0x02", "jal" => "j,0x03", "beqz" => "b,0x04", "bnez" => "b,0x05", "bfpt" => "b0,0x06", "bfpf" => "b0,0x07", "addi" => "i,0x08", "addui" => "i,0x09", "subi" => "i,0x0a", "subui" => "i,0x0b", "andi" => "i,0x0c", "ori" => "i,0x0d", "xori" => "i,0x0e", "lhi" => "i1,0x0f", "rfe" => "n,0x10", "trap" => "t,0x11", "jr" => "jr,0x12", "jalr" => "jr,0x13", "slli" => "i,0x14", "nop" => "n,0x15", "srli" => "i,0x16", "srai" => "i,0x17", "seqi" => "i,0x18", "snei" => "i,0x19", "slti" => "i,0x1a", "sgti" => "i,0x1b", "slei" => "i,0x1c", "sgei" => "i,0x1d", "lb" => "l,0x20", "lh" => "l,0x21", "lw" => "l,0x23", "lbu" => "l,0x24", "lhu" => "l,0x25", "lf" => "l,0x26", "ld" => "l,0x27", "sb" => "s,0x28", "sh" => "s,0x29", "sw" => "s,0x2b", "sf" => "s,0x2e", "sd" => "s,0x2f", "itlb" => "n,0x38", "sltui" => "i,0x3a", "sgtui" => "i,0x3b", "sleui" => "i,0x3c", "sgeui" => "i,0x3d", ); %specialreg = ("pc" => 0, "ir31" => 2, "isr" => 3, "iar" => 4, "status" => 5, "cause" => 6, "intrvec" => 8, "fault" => 9, "ptbase" => 16, "ptsize" => 17, "ptbits" => 18, "tlbentry" => 20, "tlbvaddr" => 21, "tlbpaddr" => 22, ); # Do pass one. In this pass, we just figure out label values. To allow # for relocation as late as possible, both text and data labels are # computed as offsets from start of text or data. $section = "t"; $start{"t"} = $start{"d"} = -1; open (SRC, $srcfile) or die "Couldn't open $srcfile for assembly."; open (ASM, ">$asmfile") or die "Couldn't open $asmfile for output." if $asmfile ne ""; open (EXE, ">$exefile") or die "Couldn't open $exefile for output." if $exefile ne ""; open (HDR, ">$exefile.hdr") or die "Couldn't open $exefile.hdr for output." if $exefile ne ""; for ($pass = 1; $pass <= 2; $pass++) { if ($pass != 1) { $maxdaddr = $addr{"d"}; $maxtaddr = $addr{"t"}; if (defined $val{$startlabel}) { $startloc = $val{$startlabel}; } else { $startloc = $textstart; } $endaddr = (($maxtaddr > $maxdaddr) ? $maxtaddr : $maxdaddr); if ($start{"t"} == -1) { $start{"t"} = 0; } if ($start{"d"} == -1) { $start{"d"} = 0; } if ($asmfile ne "") { printf ASM "start:%08x %08x ", $startloc, $endaddr; printf (ASM "%08x %08x %08x %08x\n", $start{"t"}, $maxtaddr-$start{"t"}, $start{"d"}, $maxdaddr-$start{"d"}); } if ($listfile ne "") { open (LISTING, ">$listfile") or die "Couldn't open $listfile for output."; printf LISTING "%5s %8s\t%8s\n", "line", "address", "contents"; } if ($exefile ne "") { $executable = "\x00" x $endaddr; $hdr = pack ("L*", $exemagic, $endaddr, $startloc, $start{"t"}, $maxtaddr-$start{"t"}, $start{"d"}, $maxdaddr-$start{"d"}, $start{"b"}, $bsslen); # substr ($executable, 0, length ($hdr), $hdr); } } $addr{"d"} = 0; $addr{"t"} = 0; seek (SRC, 0, 0); print "Starting pass $pass.\n"; $lineno = 0; line: while () { $lineno++; $curaddr = $addr{$section}; $out = ""; # remove leading whitespace s/^\s+//; $curline = $_; chomp $curline; # skip comments if (/^\;/) { if (($pass == 2) and ($listfile ne "")) { printf LISTING "%5d %20s%s\n", $lineno, "", $curline; } next line; } # Do an operation based on the first word on the line /^([a-zA-Z0-9:_.]+)/; if ($1 eq "") { next line; } $op = $1; print STDERR "Op is '$op'\n" if ($debug); if ($op =~ /^([a-zA-Z0-9_]+)\:$/) { if ($pass == 1) { # set label value $val{$1} = $addr{$section}; } } elsif (/^[a-zA-Z]+/) { if ($pass == 1) { if ($section eq "d") { warn "Instructions not allowed in data segment " . "(at line $lineno)\n"; $error = 1; } } else { # Handle instructions for second pass. This means outputting # the correct code. $out = pack ("N", &forminstr ($_)); } $addr{$section} += 4; } elsif (/^\.(text|data)/) { s/\;.*$//; $tmp = ""; ($section, $tmp) = split (/\s+/, $_, 3); $section = substr($section, 1, 1); if ($tmp ne "") { if ($tmp =~ /^0/) { $addr{$section} = oct ($tmp); } else { $addr{$section} = $tmp; } if ($start{$section} == -1) { $start{$section} = $addr{$section}; } } print "Section $section now at $addr{$section}.\n" if ($debug); } elsif (/^\.(proc|endproc|global)/) { # Ignore directives - we don't need them yet } elsif (/^\.space/) { # .space simply adds to the address pointer ($op, $n, $rest) = split (/\s+/, $_, 3); print "SPACE: line is '$_' op = $op, n = $n, rest = $rest\n" if ($debug); if ($section eq "t") { warn ".space can't be used in the text segment " . "(at line $lineno)!\n"; $error = 1; } $addr{$section} += $n; } elsif (/^\.ascii(z?)/) { $out = &getascii ($_, ($1 eq "z")); $addr{$section} += length ($out); } elsif (/^\.(byte|word|float|double)/) { # Add one byte for each entry. The first "entry" is the word # itself, so pop it off. my $tp = $1; s /\;.*$//; my @args = split (/\s*,\s*/); $args[0] =~ s/\.[a-z]+\s+//; if ($tp =~ /byte|word/) { for ($i = 0; $i <= $#args; $i++) { $args[$i] = &getimm ($args[$i]); } } $out = pack ("C*", @args) if ($tp eq "byte"); $out = pack ("N*", @args) if ($tp eq "word"); $out = pack ("f*", @args) if ($tp eq "float"); $out = pack ("d*", @args) if ($tp eq "double"); $addr{$section} += length ($out); } elsif (/^\.align/) { # Align so that the lowest n bits are all 0's. ($op, $n, $rest) = split (/\s+/, $_, 3); my $mask = (1 << $n) - 1; # This will leave things alone if the address is already # correctly aligned, and align to the next possible point if # it's not aligned. if (($addr{$section} & $mask) != 0) { $addr{$section} += $mask; $addr{$section} &= ~$mask; # Force the next line to include an address at the start $prevaddr = $addr{$section}; } } if ($pass == 2) { if ($out ne "") { # Output the current value if ($asmfile ne "") { if ($curaddr != ($prevaddr + 4)) { printf (ASM "%08x", $curaddr); } my $data = unpack ("H*", $out); my $j; for ($j = 0; $j < length ($data); $j += 8) { print ASM ":" . substr ($data,$j,8) . "\n"; } $prevaddr = $curaddr; } if ($exefile ne "") { substr ($executable, $curaddr, length ($out), $out); } } if ($listfile ne "") { # Generate the list of numbers to output my $data = unpack ("H*", $out); my $i = $curaddr; my $j; $data = " " if ($data eq ""); for ($j = 0; $j < length ($data); $j += 8) { printf (LISTING "%5d %08x %-8s\t%s\n", $lineno, $i, substr ($data, $j, 8), $curline); $i += 4; $curline = ""; } } } } if ($error == 1) { die "Errors occurred during assembly. Exiting....\n"; } if (($pass == 1) && ($symfile ne "")) { open (SYM, ">$symfile") or die "Couldn't open symbol file $symfile."; foreach $sym (sort keys %val) { printf SYM "%-20s %08x\n", $sym, $val{$sym}; } close SYM; } } printf ("Last text address: 0x%x\n", $addr{'t'}); printf ("Last data address: 0x%x\n", $addr{'d'}); close ASM if $asmfile ne ""; print HDR $hdr if $exefile ne ""; close HDR if $exefile ne ""; print EXE $executable if $exefile ne ""; close EXE if $exefile ne ""; exit; sub getreg { my $r = lc (@_[0]); my $rnum = -1; if ($r =~ /^[f$r]([0-9]+)/) { $rnum = $1; } elsif (defined $specialreg{$r}) { $rnum = $specialreg{$r}; } if ($rnum == -1) { warn "Illegal register number ($r) at line $lineno.\n"; $rnum = 0; } return ($rnum); } sub getimm { my $imm = @_[0]; $imm =~ s/#//g; my @p = split (/\b/, $imm); my ($ival, $i); for ($i = 0; $i <= $#p; $i++) { if ($p[$i] =~ /^[_a-zA-Z]/) { # Look up value in symbol table, and replace it if (! defined ($val{$p[$i]})) { if ($pass != 1) { warn "Undefined symbol: $p[$i]\n"; } $p[$i] = 0; } else { $p[$i] = $val{$p[$i]}; } } } $ival = eval (join ("", @p)); return ($ival); } sub forminstr { my ($itype, $op); my ($src1, $src2, $dst, $out); my @a; chomp @_[0]; @_[0] =~ s/\;.*$//; @a = split (/[\s,]+/, @_[0]); ($itype,$op) = split (/,/, $instTbl{$a[0]}); $itype = lc ($itype); if ($itype eq "") { warn "Illegal instruction ($a[0]) at line $lineno\n"; } $op = hex ($op); if ($itype =~ /^r/) { $src1 = &getreg ($a[2]); if ($itype eq "r") { $src2 = &getreg ($a[3]); } else { $src2 = 0; } $dst = &getreg ($a[1]); $out = 0x00000000 | ($src1 << 21) | ($src2 << 16) | ($dst << 11) | $op; } elsif ($itype eq "i") { $src1 = &getreg ($a[2]); $dst = &getreg ($a[1]); $src2 = &getimm ($a[3]); $out = ($op << 26) | ($src1 << 21) | ($dst << 16) | ($src2 & 0xffff); } elsif ($itype eq "i1") { # Immediates with a single operand $dst = &getreg ($a[1]); $src2 = &getimm ($a[2]); $out = ($op << 26) | ($dst << 16) | ($src2 & 0xffff); } elsif ($itype eq "n") { # Instructions with no operands $out = ($op << 26); } elsif (($itype eq "s") || ($itype eq "l")) { # load and store operations if ($itype eq "s") { $src1 = $a[1]; $dst = &getreg ($a[2]); } else { $src1 = $a[2]; $dst = &getreg($a[1]); } $src1 =~ /(.*)\((r[0-9]+)\)$/; if ($1 ne "") { $src2 = &getimm ($1); } else { $src2 = 0; } $src1 = &getreg ($2); $out = ($op << 26) | ($src1 << 21) | ($dst << 16) | ($src2 & 0xffff); } elsif ($itype =~ /^f/) { # floating point operations if ($itype eq "f") { $dst = &getreg ($a[1]); $src1 = &getreg ($a[2]); $src2 = &getreg ($a[3]); } elsif ($itype eq "f2") { $src1 = &getreg ($a[1]); $src2 = &getreg ($a[2]); $dst = 0; } else { # type fd $dst = &getreg ($a[1]); $src1 = &getreg ($a[2]); $src2 = 0; } $out = 0x04000000 | ($src1 << 21) | ($src2 << 16) | ($dst << 11) | $op; } elsif ($itype =~ /^b/) { if ($itype eq "b") { $src1 = &getreg ($a[1]); $dst = &getimm ($a[2]); } else { # b0 - branches w/o operands $src1 = 0; $dst = &getimm ($a[1]); } $dst -= $addr{t} + 4; $out = ($op << 26) | ($src1 << 21) | ($dst & 0xffff); } elsif ($itype eq "j") { $dst = &getimm ($a[1]); $dst -= $addr{t} + 4; $out = ($op << 26) | ($dst & 0x3ffffff); } elsif ($itype eq "jr") { $dst = &getreg ($a[1]); $out = ($op << 26) | ($dst << 21); } elsif ($itype eq "t") { $dst = &getimm ($a[1]); $out = ($op << 26) | ($dst & 0x3ffffff); } return ($out); } sub getascii { local $val; local $str = @_[0]; local $zpad = @_[1]; $str =~ s/^\.ascii(z?)\s+//; $str = "\$val = " . $str; eval ($str); local $pstr = "a*"; $pstr = "a*x" if ($zpad); return (pack ($pstr, $val)); }