project-sls-6502-emulator / src / make_test_files / 6502_test_opcodes_scraper.ipynb
6502_test_opcodes_scraper.ipynb
Raw
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import math\n",
    "# source: http://www.qmtpro.com/~nes/misc/nestest.log"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('6502_test_opcodes.txt') as f:\n",
    "    lines = f.readlines()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "opcode_array = [\"00\" for _ in range(int(math.pow(2,16)))]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'JMP'"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lines[0][16:19]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "instructions = [lines[x][16:19] for x in range(len(lines))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'JSR'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "instructions[872]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'CFDB  A1 80     LDA ($80,X) @ 80 = 0200 = 5A    A:5D X:00 Y:69 P:27 SP:FB PPU: 22,139 CYC:2547\\n'"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lines[1086]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "address_w_opcodes = [lines[x][:16] for x in range(len(lines))]\n",
    "addresses = [address_w_opcodes[x][:4] for x in range(len(address_w_opcodes)) ]\n",
    "addresses = [int(address, 16) for address in addresses]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "opcodes = [address_w_opcodes[x][4:] for x in range(len(address_w_opcodes))]\n",
    "opcodes = [opcode_seq.lstrip().rstrip() for opcode_seq in opcodes]\n",
    "opcodes = [opcode_seq.split() for opcode_seq in opcodes ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "working_opcodes = opcodes[:1100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "import itertools\n",
    "flat_opcode_list = list(itertools.chain(*working_opcodes))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "total_opcode_list = list(itertools.chain(*opcodes))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total amount of opcodes(w/ addressing modes) =  254\n",
      "Total amount of consecutive working opcodes(w/ addressing modes) =  171\n"
     ]
    }
   ],
   "source": [
    "print(\"Total amount of opcodes(w/ addressing modes) = \", len(set(total_opcode_list)))\n",
    "print(\"Total amount of consecutive working opcodes(w/ addressing modes) = \", len(set(set(flat_opcode_list))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.6732283464566929"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "171/254"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'00',\n",
       " '01',\n",
       " '02',\n",
       " '03',\n",
       " '04',\n",
       " '05',\n",
       " '07',\n",
       " '08',\n",
       " '09',\n",
       " '0A',\n",
       " '0B',\n",
       " '0D',\n",
       " '0F',\n",
       " '10',\n",
       " '11',\n",
       " '12',\n",
       " '13',\n",
       " '14',\n",
       " '15',\n",
       " '16',\n",
       " '17',\n",
       " '18',\n",
       " '19',\n",
       " '1A',\n",
       " '1B',\n",
       " '1C',\n",
       " '1D',\n",
       " '1E',\n",
       " '1F',\n",
       " '20',\n",
       " '21',\n",
       " '22',\n",
       " '23',\n",
       " '24',\n",
       " '25',\n",
       " '27',\n",
       " '28',\n",
       " '29',\n",
       " '2A',\n",
       " '2B',\n",
       " '2C',\n",
       " '2D',\n",
       " '2E',\n",
       " '2F',\n",
       " '30',\n",
       " '31',\n",
       " '32',\n",
       " '33',\n",
       " '34',\n",
       " '35',\n",
       " '37',\n",
       " '38',\n",
       " '39',\n",
       " '3B',\n",
       " '3D',\n",
       " '3F',\n",
       " '40',\n",
       " '41',\n",
       " '44',\n",
       " '47',\n",
       " '48',\n",
       " '49',\n",
       " '4A',\n",
       " '4B',\n",
       " '4C',\n",
       " '4D',\n",
       " '4E',\n",
       " '50',\n",
       " '54',\n",
       " '55',\n",
       " '56',\n",
       " '57',\n",
       " '58',\n",
       " '5A',\n",
       " '5B',\n",
       " '5C',\n",
       " '5D',\n",
       " '5E',\n",
       " '5F',\n",
       " '60',\n",
       " '62',\n",
       " '63',\n",
       " '64',\n",
       " '65',\n",
       " '66',\n",
       " '67',\n",
       " '68',\n",
       " '69',\n",
       " '6A',\n",
       " '6B',\n",
       " '6D',\n",
       " '6E',\n",
       " '6F',\n",
       " '70',\n",
       " '71',\n",
       " '72',\n",
       " '76',\n",
       " '77',\n",
       " '78',\n",
       " '7B',\n",
       " '7D',\n",
       " '7E',\n",
       " '7F',\n",
       " '80',\n",
       " '81',\n",
       " '82',\n",
       " '83',\n",
       " '84',\n",
       " '85',\n",
       " '86',\n",
       " '87',\n",
       " '88',\n",
       " '8A',\n",
       " '8D',\n",
       " '8E',\n",
       " '90',\n",
       " '91',\n",
       " '96',\n",
       " '98',\n",
       " '99',\n",
       " '9A',\n",
       " '9D',\n",
       " '9F',\n",
       " 'A0',\n",
       " 'A1',\n",
       " 'A2',\n",
       " 'A5',\n",
       " 'A8',\n",
       " 'A9',\n",
       " 'AA',\n",
       " 'AB',\n",
       " 'AC',\n",
       " 'AD',\n",
       " 'AE',\n",
       " 'AF',\n",
       " 'B0',\n",
       " 'B8',\n",
       " 'BA',\n",
       " 'C0',\n",
       " 'C5',\n",
       " 'C6',\n",
       " 'C7',\n",
       " 'C8',\n",
       " 'C9',\n",
       " 'CA',\n",
       " 'CB',\n",
       " 'CC',\n",
       " 'CD',\n",
       " 'CE',\n",
       " 'CF',\n",
       " 'D0',\n",
       " 'D8',\n",
       " 'D9',\n",
       " 'DB',\n",
       " 'DE',\n",
       " 'E0',\n",
       " 'E3',\n",
       " 'E7',\n",
       " 'E8',\n",
       " 'E9',\n",
       " 'EA',\n",
       " 'EE',\n",
       " 'EF',\n",
       " 'F0',\n",
       " 'F3',\n",
       " 'F5',\n",
       " 'F8',\n",
       " 'F9',\n",
       " 'FB',\n",
       " 'FE',\n",
       " 'FF'}"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "set(flat_opcode_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "for step, index in enumerate(addresses): \n",
    "    for i in range(len(opcodes[step])):\n",
    "        opcode_array[index + i] = opcodes[step][i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "hexdump = [opcode_array[x:x+16] for x in range(0, len(opcode_array), 16)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('../test_opcodes.txt', 'w') as f:\n",
    "    for line in hexdump:\n",
    "        f.write(''.join(line))\n",
    "        f.write('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "register_vals = [lines[x][48:73] for x in range(len(lines))]\n",
    "register_vals = [sample_reg_val.split() for sample_reg_val in register_vals]\n",
    "register_vals = np.array(register_vals, dtype = 'object')\n",
    "A, X, Y, P, SP = np.split(register_vals, 5, axis = 1)\n",
    "A, X, Y, P, SP = A.flatten().tolist(), X.flatten().tolist(), Y.flatten().tolist(), P.flatten().tolist(), SP.flatten().tolist()\n",
    "A = [A[step][2:] for step in range(len(A))]\n",
    "X = [X[step][2:] for step in range(len(X))]\n",
    "Y = [Y[step][2:] for step in range(len(Y))]\n",
    "P = [P[step][2:] for step in range(len(P))]\n",
    "SP = [SP[step][3:] for step in range(len(SP))]\n",
    "\n",
    "A = [int(a, 16) for a in A]\n",
    "X = [int(x, 16) for x in X]\n",
    "Y = [int(y, 16) for y in Y]\n",
    "P = [int(p, 16) for p in P]\n",
    "SP = [int(sp, 16) for sp in SP]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "8991"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(A)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "116"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "int(\"74\", 16)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CDFA  85 01     STA $01 = FF                    A:FF X:FB Y:01 P:A5 SP:FB PPU: 17, 20 CYC:1939\n",
      "\n",
      "CDFC  BA        TSX                             A:FF X:FB Y:01 P:A5 SP:FB PPU: 17, 29 CYC:1942\n",
      "\n",
      "CDFD  8E FF 07  STX $07FF = FB                  A:FF X:FB Y:01 P:A5 SP:FB PPU: 17, 35 CYC:1944\n",
      "\n",
      "CE00  EA        NOP                             A:FF X:FB Y:01 P:A5 SP:FB PPU: 17, 47 CYC:1948\n",
      "\n",
      "CE01  A2 80     LDX #$80                        A:FF X:FB Y:01 P:A5 SP:FB PPU: 17, 53 CYC:1950\n",
      "\n",
      "CE03  9A        TXS                             A:FF X:80 Y:01 P:A5 SP:FB PPU: 17, 59 CYC:1952\n",
      "\n",
      "CE04  A9 33     LDA #$33                        A:FF X:80 Y:01 P:A5 SP:80 PPU: 17, 65 CYC:1954\n",
      "\n",
      "CE06  48        PHA                             A:33 X:80 Y:01 P:25 SP:80 PPU: 17, 71 CYC:1956\n",
      "\n",
      "CE07  A9 69     LDA #$69                        A:33 X:80 Y:01 P:25 SP:7F PPU: 17, 80 CYC:1959\n",
      "\n",
      "CE09  48        PHA                             A:69 X:80 Y:01 P:25 SP:7F PPU: 17, 86 CYC:1961\n",
      "\n",
      "CE0A  BA        TSX                             A:69 X:80 Y:01 P:25 SP:7E PPU: 17, 95 CYC:1964\n",
      "\n",
      "CE0B  E0 7E     CPX #$7E                        A:69 X:7E Y:01 P:25 SP:7E PPU: 17,101 CYC:1966\n",
      "\n",
      "CE0D  D0 20     BNE $CE2F                       A:69 X:7E Y:01 P:27 SP:7E PPU: 17,107 CYC:1968\n",
      "\n",
      "CE0F  68        PLA                             A:69 X:7E Y:01 P:27 SP:7E PPU: 17,113 CYC:1970\n",
      "\n",
      "CE10  C9 69     CMP #$69                        A:69 X:7E Y:01 P:25 SP:7F PPU: 17,125 CYC:1974\n",
      "\n",
      "CE12  D0 1B     BNE $CE2F                       A:69 X:7E Y:01 P:27 SP:7F PPU: 17,131 CYC:1976\n",
      "\n",
      "CE14  68        PLA                             A:69 X:7E Y:01 P:27 SP:7F PPU: 17,137 CYC:1978\n",
      "\n",
      "CE15  C9 33     CMP #$33                        A:33 X:7E Y:01 P:25 SP:80 PPU: 17,149 CYC:1982\n",
      "\n",
      "CE17  D0 16     BNE $CE2F                       A:33 X:7E Y:01 P:27 SP:80 PPU: 17,155 CYC:1984\n",
      "\n",
      "CE19  BA        TSX                             A:33 X:7E Y:01 P:27 SP:80 PPU: 17,161 CYC:1986\n",
      "\n",
      "CE1A  E0 80     CPX #$80                        A:33 X:80 Y:01 P:A5 SP:80 PPU: 17,167 CYC:1988\n",
      "\n",
      "CE1C  D0 11     BNE $CE2F                       A:33 X:80 Y:01 P:27 SP:80 PPU: 17,173 CYC:1990\n",
      "\n",
      "CE1E  AD 80 01  LDA $0180 = 33                  A:33 X:80 Y:01 P:27 SP:80 PPU: 17,179 CYC:1992\n",
      "\n",
      "CE21  C9 33     CMP #$33                        A:33 X:80 Y:01 P:25 SP:80 PPU: 17,191 CYC:1996\n",
      "\n",
      "CE23  D0 0A     BNE $CE2F                       A:33 X:80 Y:01 P:27 SP:80 PPU: 17,197 CYC:1998\n",
      "\n",
      "CE25  AD 7F 01  LDA $017F = 69                  A:33 X:80 Y:01 P:27 SP:80 PPU: 17,203 CYC:2000\n",
      "\n",
      "CE28  C9 69     CMP #$69                        A:69 X:80 Y:01 P:25 SP:80 PPU: 17,215 CYC:2004\n",
      "\n",
      "CE2A  D0 03     BNE $CE2F                       A:69 X:80 Y:01 P:27 SP:80 PPU: 17,221 CYC:2006\n",
      "\n",
      "CE2C  4C 33 CE  JMP $CE33                       A:69 X:80 Y:01 P:27 SP:80 PPU: 17,227 CYC:2008\n",
      "\n",
      "CE33  EA        NOP                             A:69 X:80 Y:01 P:27 SP:80 PPU: 17,236 CYC:2011\n",
      "\n",
      "CE34  A2 80     LDX #$80                        A:69 X:80 Y:01 P:27 SP:80 PPU: 17,242 CYC:2013\n",
      "\n",
      "CE36  9A        TXS                             A:69 X:80 Y:01 P:A5 SP:80 PPU: 17,248 CYC:2015\n",
      "\n",
      "CE37  20 3D CE  JSR $CE3D                       A:69 X:80 Y:01 P:A5 SP:80 PPU: 17,254 CYC:2017\n",
      "\n",
      "CE3D  BA        TSX                             A:69 X:80 Y:01 P:A5 SP:7E PPU: 17,272 CYC:2023\n",
      "\n",
      "CE3E  E0 7E     CPX #$7E                        A:69 X:7E Y:01 P:25 SP:7E PPU: 17,278 CYC:2025\n",
      "\n",
      "CE40  D0 19     BNE $CE5B                       A:69 X:7E Y:01 P:27 SP:7E PPU: 17,284 CYC:2027\n",
      "\n",
      "CE42  68        PLA                             A:69 X:7E Y:01 P:27 SP:7E PPU: 17,290 CYC:2029\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for x in range(840, 877):\n",
    "    print(lines[x])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('../test_vals.h', 'w') as f:\n",
    "\n",
    "    f.write(\" #ifndef TEST_VALS_H \\n #define TEST_VALS_H \\n #include \\\"6502.h\\\" \\n \")\n",
    "\n",
    "    f.write(\"byte A[] = { \")\n",
    "    for a in A[:-1]:\n",
    "        f.write(\"%s ,\" % a)\n",
    "\n",
    "    f.write(\"%s }; \\n\" % A[-1])\n",
    "\n",
    "    f.write(\"byte X[] = { \")\n",
    "    for x in X[:-1]:\n",
    "        f.write(\"%s ,\" % x)\n",
    "\n",
    "    f.write(\"%s }; \\n\" % X[-1])\n",
    "\n",
    "    f.write(\"byte Y[] = { \")\n",
    "    for y in Y[:-1]:\n",
    "        f.write(\"%s ,\" % y)\n",
    "\n",
    "    f.write(\"%s }; \\n\" % Y[-1])\n",
    "\n",
    "    \"\"\"\n",
    "    f.write(\"byte P[] = { \")\n",
    "    for p in P[:-1]:\n",
    "        f.write(\"%s ,\" % p)\n",
    "\n",
    "    f.write(\"%s }; \\n\" % P[-1]) \"\"\"\n",
    "\n",
    "    f.write(\"byte SP[] = { \")\n",
    "    for sp in SP[:-1]:\n",
    "        f.write(\"%s ,\" % sp)\n",
    "\n",
    "    f.write(\"%s }; \\n\" % SP[-1])\n",
    "\n",
    "    f.write(\"address PCs[] = { \")\n",
    "    for address in addresses[:-1]:\n",
    "        f.write(\"%s ,\" % address)\n",
    "\n",
    "    f.write(\"%s }; \\n\" % addresses[-1])\n",
    "\n",
    "    f.write(\"#endif // TEST_VALS_H \")\n"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "949777d72b0d2535278d3dc13498b2535136f6dfe0678499012e853ee9abcab1"
  },
  "kernelspec": {
   "display_name": "Python 3.9.9 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.9"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}