DLX-Microprocessor / components / windowed_RF.vhd
windowed_RF.vhd
Raw
library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;
use ieee.math_real.all;

use work.constants.all;

-- Read the README.txt or README.pdf as there is all the informations on on the implementation
entity windowed_RF is
  generic (
    NBit    : integer := numBit; -- number of bit per memory locaiton
    NReg    : integer := NumReg; -- number of registers per IN,LOCAL, I/O, GLOBAL
    NWindow : integer := numWindow -- number of windows
  );
  port (
    clk, reset, enable                    : in std_logic;
    write_rq, mem_write_rq                : in std_logic; -- to handle write requests from the cpu
    call                                  : in std_logic; -- to handle a call of a subroutine thus the new window
    ret                                   : in std_logic; -- to handle a return of a subroutine thus the old window
    rd_rq_1                               : in std_logic; -- ALU read request 1
    rd_rq_2                               : in std_logic; -- ALU read request 2
    addr_cpu                              : in std_logic_vector(integer(ceil(log2(real((3 * NReg) + (NWindow * NReg))))) - 1 downto 0);
    addr_read_1, addr_read_2, mem_addr_in : in std_logic_vector(integer(ceil(log2(real(3 * NReg * NWindow)))) - 1 downto 0); -- used from the ALU to read the operands from the register file
    data_in, data_mem_in                  : in std_logic_vector(NBit - 1 downto 0); -- data input form the spu and the memory for the fill
    data_out_1, data_out_2, data_mem_out  : out std_logic_vector(NBit - 1 downto 0) -- output operands for the alu and the data that needs to be spilled in memory

  );
end entity;

architecture rtl of windowed_RF is

  constant active_window_bits : integer := integer(ceil(log2(real((3 * NReg) + (NWindow * NReg)))));
  constant RF_addr_bits       : integer := integer(ceil(log2(real(3 * NReg * NWindow))));
  constant window_bits        : integer := integer(ceil(log2(real(NWindow))));
  constant reg_page_size      : integer := integer(ceil(log2(real(2 * NReg))));

  component register_file is
    generic (
      NBit : integer := NBit; -- number of bit per register
      NReg : integer := (3 * NReg * NWindow) -- number of registers
    );
    port (
      CLK     : in std_logic; -- Clock Signal
      RESET   : in std_logic; -- Reset Signal
      ENABLE  : in std_logic; -- Enable Signal
      RD1     : in std_logic; -- Read 1 Signal
      RD2     : in std_logic; -- Read 2 Signal
      WR      : in std_logic; -- Write Signal
      ADD_WR  : in std_logic_vector(integer(ceil(log2(real(NReg)))) - 1 downto 0); -- Write Address
      ADD_RD1 : in std_logic_vector(integer(ceil(log2(real(NReg)))) - 1 downto 0); -- Read 1 Address
      ADD_RD2 : in std_logic_vector(integer(ceil(log2(real(NReg)))) - 1 downto 0); -- Read 2 Address
      DATAIN  : in std_logic_vector(NBit - 1 downto 0); -- Data in Signal
      OUT1    : out std_logic_vector(NBit - 1 downto 0); -- Data Out 1 Signal
      OUT2    : out std_logic_vector(NBit - 1 downto 0) -- Data Out 2 Signal
    );
  end component;
  type STAR_TYPE is (S0, S1, S2);
  -- S0:  Idle
  -- S1:  Spill
  -- S2:  Fill
  signal STAR : STAR_TYPE;

  signal CAN_SAVE, CAN_RESTORE  : std_logic;
  signal CWP, O_CWP, N_CWP, SWP : std_logic_vector(window_bits - 1 downto 0); -- counter for the high part of the address of the currrent window and swap window
  signal RF_ADDR                : std_logic_vector(RF_addr_bits - 1 downto 0);
  signal RF_WR_RQ               : std_logic;
  signal RF_DATA_IN             : std_logic_vector(NBit - 1 downto 0);
  signal RF_ADDR_VALUE          : std_logic_vector(RF_addr_bits - 1 downto 0);

  -- the managing of the address is done like in paging, thus we keep a counter of the current page/window with a size of 16 (or 2*Nregs)
  -- memory locations, this allows us to increase/decrease only the counters as needed only in the spill/fill situations. Thus this organizations 
  -- doesn't need an adder for the actual address computation but only concatenations of the correct bits
  -- IN/LOCAL registers 
  -- | 0 |  CWP  | addr(log_2(2*Nregs) downto 0) |
  -- I/O registers
  -- | 0 |  N_CWP  | addr(log_2(2*Nregs) downto 0) |
  -- Globals
  -- | 1 | addr |

  -- SWP keeps track of the window that is currently not swapped thus is NOT the actual address, for the actual address it should be added
  -- trailng zeros as the log_2(2*Numregs)
  -- | 0 |  SWP  | 000000 |

begin
  REG_FILE : register_file
  generic map(
    NBit => NBit, -- number of bit per register
    NReg => 3 * NReg * NWindow -- number of registers
  )
  port map
  (
    CLK     => clk, -- Clock Signal
    RESET   => reset, -- Reset Signal
    ENABLE  => '1', -- Enable Signal
    RD1     => rd_rq_1, -- Read 1 Signal
    RD2     => rd_rq_2, -- Read 2 Signal
    WR      => RF_WR_RQ, -- Write Signal
    ADD_WR  => RF_ADDR, -- Write Address
    ADD_RD1 => addr_read_1, -- Read 1 Address
    ADD_RD2 => addr_read_2, -- Read 2 Address
    DATAIN  => RF_DATA_IN, -- Data in Signal
    OUT1    => data_out_1, -- Data Out 1 Signal
    OUT2    => data_out_2 -- Data Out 2 Signal
  );

  -- If the register file is not in the state S0 thus the memory needs to control the data lines to spill and fill to/from 
  -- memory
  RF_ADDR <= RF_ADDR_VALUE when STAR = S0 else
    mem_addr_in;
  RF_WR_RQ <= write_rq when STAR = S0 else
    mem_write_rq;
  RF_DATA_IN <= data_in when STAR = S0 else
    data_mem_in;

  -- Logic to filter the address from the producer that intecarts with the ACTIVE WINDOW
  process (write_rq, addr_cpu, data_in)
    variable RF_ADDR_TMP : std_logic_vector(RF_addr_bits - 1 downto 0);
  begin
    -- if the MSB is 1 then the request is for the IN/LOCAL/IO sections 
    if addr_cpu(active_window_bits - 1) = '1' then
      if addr_cpu(active_window_bits - 2 downto active_window_bits - 3) = "01" then
        -- The producer requests the IO portion of the active window thus i need to use the CWP+1
        -- as the next INPUT portion is part of the next window
        RF_ADDR_TMP := '0' & N_CWP & addr_cpu(reg_page_size - 1 downto 0);
      elsif addr_cpu(active_window_bits - 2 downto active_window_bits - 3) = "00" then
        -- The producer requests the INPUT or LOCAL portion of memory 
        -- thus we need to concatenate the CWP
        RF_ADDR_TMP := '0' & CWP & addr_cpu(reg_page_size - 1 downto 0);
      else
        RF_ADDR_TMP := (others => 'Z'); -- address not valid
      end if;

    else
      -- The CPU is accessing the globals section
      RF_ADDR_TMP := '1' & addr_cpu;
    end if;
    RF_ADDR_VALUE <= RF_ADDR_TMP;
  end process;

  -- The control part checks every time as the window moves back and forward 
  -- if there is space available to avoid spilling or filling from memory
  CAN_SAVE <= '0' when (unsigned(N_CWP) + 1) = unsigned(SWP) else
    '1';
  CAN_RESTORE <= '0' when (unsigned(CWP)) = unsigned(SWP) else
    '1';

  process (clk)
  begin
    if rising_edge(clk) then
      if reset = '1' then
        STAR  <= S0;
        O_CWP <= (others => '1');
        CWP   <= (others => '0');
        N_CWP <= (0 => '1', others => '0');
        SWP   <= (others => '0');
      else
        case STAR is
          when S0 =>
            -- "S0": Base State
            if call = '1' then
              if CAN_SAVE = '1' then
                -- New Window without spill
                O_CWP <= CWP;
                CWP   <= N_CWP;
                N_CWP <= std_logic_vector(unsigned(N_CWP) + 1);

              elsif CAN_SAVE = '0' then
                -- Spill needed to move the window
                STAR <= S1;

              end if;
            elsif ret = '1' then
              if CAN_RESTORE = '1' then
                -- Old Window without fill
                O_CWP <= std_logic_vector(unsigned(N_CWP) - 1);
                CWP   <= O_CWP;
                N_CWP <= CWP;

              elsif CAN_RESTORE = '0' then
                -- Fill needed to restore the previous context
                STAR <= S2;

              end if;
            end if;

          when S1 =>
            -- "S1":  Spill
            -- spill logic for the memory
            -- like push to the stack 16 memory locations (one per clock cycle) 
            -- or even faster push more locations at a time, but need to modify 
            -- heavly the bus

            -- for our implementation we only increase the SWP pointer without caring 
            -- for the memory content
            SWP          <= std_logic_vector(unsigned(SWP) + 1);
            data_mem_out <= (others => '1');
            -- after the spill the actual window is increased
            O_CWP <= CWP;
            CWP   <= N_CWP;
            N_CWP <= std_logic_vector(unsigned(N_CWP) + 1);
            STAR  <= S0;
          when S2 =>

            -- "10":  Fill
            -- fill logic for the memory
            -- like pop fro mthe stack 16 memory locations (one per clock cycle) 
            -- or even faster pop more locations at a time, but need to modify 
            -- heavly the bus

            -- for our implementation we only decrease the SWP pointer without caring 
            -- for the memory content
            SWP          <= std_logic_vector(unsigned(SWP) - 1);
            data_mem_out <= (others => '0');
            -- after the fill the actual window is decreased
            O_CWP <= std_logic_vector(unsigned(N_CWP) - 1);
            CWP   <= O_CWP;
            N_CWP <= CWP;
            STAR  <= S0;
          when others =>
            STAR <= S0;
        end case;
      end if;
    end if;
  end process;
end architecture;