My favorites | Sign in
Logo
             
Search
for
Updated May 21, 2009 by jackokring
WhatIsVHDL  
VHDL by Example

Introduction

The following VHDL is for the minimal nibzF version. Clock around 30 MHz.

-- Nibz Moore State Machine
-- (C) K Ring Technologies Semiconductor 2008-2009
-- http://nibz.googlecode.com

-- BSD or 1 core per chip licence
-- 1 core per chip (two conditions)
-- a. Print K Ring Technologies logo on
-- or near chip at any resolution.
-- b. Any documentation must acknowledge copyright
-- and have http://nibz.googlecode.com URL.
 
-- Maintained by Simon Jackson, BEng.
-- E-mail: jackokring@gmail.com

-- A Moore machine's outputs are dependent only on the current state.
-- The output is written only when the state changes.  (State
-- transitions are synchronous.)


-- A note on sensitivity lists...
-- ==============================
-- Please note the sensitivity lists are not all encompassing.
-- This may lead to excess power density in high density VLSI
-- chips. The simulation of the design does not display the
-- transitions of signals which are not critical in evaluating
-- some state outputs. If your sythesis tool supports this low
-- power technique, then good, your ok. If it does not then
-- all is not lost. You have two options. Either add all signals
-- needed to supress warnings to the sensitivity list, or
-- just sythesize and treat the simulation Fmax with respect.

-- Some of the fan-outs are high, excedding 30. This
-- is a big number for fan-out. Sythesis for speed may insert
-- fan-out split drivers for speed, resulting in better
-- performance for little extra area.

-- The design was developed on Quartus II 6.1 - 8.0
-- in MAX IIZ CPLD technology. It is not limited to this.
-- With low area optimization an fmax of 30 MHz is
-- not out of reach. I have not gone to the fast optimization
-- have a go if you need the speed, but make sure your
-- memory is up to the challenge. Try 10 MHz as an easy
-- target.
 
library ieee;

use ieee.std_logic_1164.all;
use ieee.std_logic_arith.all;

entity nibzF is

	generic(
		wide	: natural  :=	16
		-- the generic processor width
	);
	
	port(
		-- SoC WISHBONE Modified Interface
		RST_I	: in		std_logic;
		CLK_I	: in		std_logic;
		A_O		: buffer	std_logic_vector(wide downto 0);
		D_I		: in		std_logic_vector((wide-1)/2 downto 0);
		D_O		: out		std_logic_vector((wide-1)/2 downto 0);
		RW_O	: buffer	std_logic;
		-- '0' on write, '1' on read
		CYC_I	: in		std_logic;
		IRQ_I	: in		std_logic
	);

end entity;


architecture rtl of nibzF is
	
	-- Build an enumerated type for the state machine
	type cycle_type is (fetch, execute);

	-- Direct selector
	type reg_seld is	(dirp, dirq, dirr, dirs,
						dira, dirx,	dirad);
												
	-- Indirection selector
	type reg_seli is	(indp, indq, indr, inds);

	type mem_op is		(rd, wr);

	type mem_side is	(lo, hi);

	-- Register to hold the current state
	signal cycle   	: cycle_type;
	
	-- The register set
	signal p, q, r, s	: std_logic_vector(wide-1 downto 0);
	signal a, ir		: std_logic_vector(wide-1 downto 0);
	
	-- The control signals
	signal ind			: reg_seli;
	signal wrt			: mem_op;
	signal pre			: std_logic_vector(wide-1 downto 0);
	signal dir			: reg_seld;
	signal xout			: std_logic_vector(wide-1 downto 0);
	signal adrtmp		: std_logic_vector(wide-1 downto 0);
	signal din			: std_logic_vector(wide-1 downto 0);
	signal dout			: std_logic_vector(wide-1 downto 0);

	-- The Half Width Data Bus
	signal din_lo		: std_logic_vector((wide-1)/2 downto 0);
	signal swaprw		: std_logic; -- swap write high for read low
	signal swaprw2		: std_logic;
	signal hilo			: mem_side;
	signal addrw		: std_logic_vector(wide-1 downto 0);
	signal addrr		: std_logic_vector(wide-1 downto 0);

	-- Useful constants
	constant z			: std_logic_vector(wide-1 downto 0) := (others => '0');
	constant z4			: std_logic_vector(wide-5 downto 0) := (others => '0');
	constant nul		: std_logic_vector(wide-1 downto 0) := (others => 'Z');
	
	-- alu
	signal x0			: std_logic_vector(wide-1 downto 0);
	signal a0			: std_logic_vector(wide-1 downto 0);
	signal x1			: std_logic_vector(wide-1 downto 0);
	signal a1			: std_logic_vector(wide-1 downto 0);
	signal car			: std_logic_vector(wide-1 downto 0);
	signal ctmp			: std_logic_vector(wide-1 downto 0);
	
	-- alu control
	signal cin			: std_logic;
	
	-- reset delay
	signal rs1, rs2		: std_logic;
	
	-- irq control
	signal irq			: std_logic;
		
begin
						
	-- reset delay
	process (CLK_I)
	begin
		if(rising_edge(CLK_I)) then
			rs2 <= rs1;
			rs1 <= RST_I;
		end if;
	end process;

	-- Logic to advance to the next state
	process (CLK_I, rs2)
	begin
		if(rs2 = '1') then
			cycle <= fetch; -- so that clock causes fetch
			ind <= indp; -- program fetch
			wrt <= rd;
			pre <= z; -- register first address
			p <= z;
			q <= z;
			r <= z;
			s <= z;
			a <= z;
			ir <= z;
			hilo <= lo;
			swaprw <= '0';
			swaprw2 <= '0';
			irq <= '0';
		elsif (rising_edge(CLK_I)) then
			-- when not waiting cycle processor
			if(CYC_I = '1' and swaprw = '0') then
				if(wrt = wr) then
					if(hilo = lo) then	
						case ind is
							--pre decrement??
							when indp =>
								-- not used so make blank
							when indq =>
								q <= pre;
							when indr =>
								r <= pre;
							when inds =>
								s <= pre;		
						end case;
					end if;
				else
					if(hilo = hi) then
						case ind is
							--post increment
							when indp =>
								p <= adrtmp;
							when indq =>
								q <= adrtmp;
							when indr =>
								r <= adrtmp;
							when inds =>
								s <= adrtmp;
						end case;
					end if;
				end if;
				-- right time to complete execution?
				if((wrt = wr and hilo = lo) or (wrt = rd and hilo = hi)) then
					case cycle is
						when execute =>
							case dir is
								when dirp =>
									if(irq = '1') then
										irq <= '0';
									end if;
									p <= din;
								when dirq =>
									if(wrt=rd) then
										q <= din;
									end if;
								when dirr =>
									if(wrt=rd) then
										r <= din;
									end if;
								when dirs =>
									if(wrt=rd) then
										s <= din;
									end if;
								when dira =>
									if(wrt=rd) then
										a <= din;
									end if;
								when dirx =>
									a <= xout;
									if(not(ir(3 downto 2) = "01")) then
										-- not on xor
										cin <= ctmp(wide-1);
									end if;	
								when dirad =>
									p <= ir;
							end case;
							cycle <= fetch;
							ind <= indp; -- program fetch setup
							wrt <= rd;
							pre <= p;
						when fetch =>
							if(not(din(wide-1 downto 4)=z4)) then
								-- jump (there are no branch delay slots!!)
								ind <= indr;
								wrt <= wr;
								dir <= dirad;
								pre <= unsigned(r) - 1;
								-- botch to do post increment of fetch
								dout <= unsigned(p) + 1;
							else
								case din(3 downto 0) is
									when "0000" =>
										-- BAck (no delay)
										if(irq = '1') then
											ind <= indp;
											pre <= p;
										else
											ind <= indr;
											pre <= r;
										end if;
										wrt <= rd;
										dir <= dirp;
									when "0001" =>
										-- Fetch In
										ind <= indq;
										wrt <= rd;
										dir <= dira;
										pre <= q;
									when "0010" =>
										-- Return In
										ind <= indr;
										wrt <= rd;
										dir <= dirq;
										pre <= r;
									when "0011" =>
										-- Stack In
										ind <= inds;
										wrt <= rd;
										dir <= dira;
										pre <= s;
									when "0100" =>
										-- DIfference
										ind <= inds;
										wrt <= rd;
										dir <= dirx;
										pre <= s;
									when "0101" =>
										-- Fetch Address
										ind <= inds;
										wrt <= rd;
										dir <= dirq;
										pre <= s;
									when "0110" =>
										-- Return Address
										ind <= inds;
										wrt <= rd;
										dir <= dirr;
										pre <= s;
									when "0111" =>
										-- Stack Address
										ind <= inds;
										wrt <= rd;
										dir <= dirs;
										pre <= s;
									when "1000" =>
										-- BOth (AND 2*)
										ind <= inds;
										wrt <= rd;
										dir <= dirx;
										pre <= s;
									when "1001" =>
										-- Fetch Out
										ind <= indq;
										wrt <= wr;
										dir <= dira;
										pre <= unsigned(q) - 1;
										dout <= a;
									when "1010" =>
										-- Return Out
										ind <= indr;
										wrt <= wr;
										dir <= dirq;
										pre <= unsigned(r) - 1;
										dout <= q;
									when "1011" =>
										-- Stack Out
										ind <= inds;
										wrt <= wr;
										dir <= dira;
										pre <= unsigned(s) - 1;
										dout <= a;
									when "1100" =>
										-- SUm
										ind <= inds;
										wrt <= rd;
										dir <= dirx;
										pre <= s;
									when "1101" =>
										-- Fetch Ends
										ind <= inds;
										wrt <= wr;
										dir <= dirq;
										pre <= unsigned(s) - 1;
										dout <= q;
									when "1110" =>
										-- Return Ends
										ind <= inds;
										wrt <= wr;
										dir <= dirr;
										pre <= unsigned(s) - 1;
										dout <= r;
									when "1111" =>
										-- Stack Ends
										ind <= inds;
										wrt <= wr;
										dir <= dirs;
										pre <= unsigned(s) - 1;
										dout <= s;
								end case;
							end if;
							ir <= din;
							cycle <= execute;
					end case;
				end if;
				-- IO control
				swaprw <= swaprw2;
				if(hilo = lo) then
					if(wrt = rd) then
						-- read lo
						-- pre is correct
						hilo <= hi;
						addrr <= pre;
						din_lo <= D_I;
						if(swaprw2 = '1') then
							-- perform hi write delayed setup
							-- revert pre for completion
							pre <= addrw;
							wrt <= wr;
							swaprw2 <= '0';
						else
							-- perform hi read setup
						end if;
					else
						-- write lo
						swaprw2 <= '1';
						addrw <= pre;
						dout((wide-1)/2 downto 0) <= dout(wide-1 downto wide/2); 
						-- pre is correct
						-- instruction completes
					end if;
				else
					-- read hi
					-- instruction completes
					hilo <= lo;
				end if;
			elsif(CYC_I = '1' and swaprw = '1') then
				-- clear delayed write
				swaprw <= swaprw2;
				pre <= addrr;
				wrt <= rd;
			end if;
			-- set interrupt
			if(IRQ_I = '1') then
				irq <= '1';
			end if;
		end if;
	end process;
	
	process(ir, a, din, car, cin)
		-- alu process
		-- for greater energy efficiency
		-- re-evaluation of the ALU on each cycle
		-- may be avoided if some latches are generated
		-- not done in this version.
	begin
		case ir(3 downto 2) is
			when "00" =>
				-- first row of half adders
				x0 <= z;
				a0 <= z;
				-- second row of half adders
				x1 <= x0 xor car;
				a1 <= z;
			when "01" =>
				-- first row of half adders
				x0 <= a xor din;
				a0 <= z;
				-- second row of half adders
				x1 <= x0 xor car;
				a1 <= z;
			when "10" =>
				-- first row of half adders
				x0 <= z;
				a0 <= a and din;
				-- second row of half adders
				x1 <= x0 xor car;
				a1 <= x0 and car;
			when "11" =>
				-- first row of half adders
				x0 <= a xor din;
				a0 <= a and din;
				-- second row of half adders
				x1 <= x0 xor car;
				a1 <= x0 and car;
		end case;
		-- STANDARD CARRY
		ctmp <= a1 or a0;
		car <= ctmp(wide-2 downto 0)&cin;
		--sum output
		xout <= x1(wide-1 downto 0);
	end process;
	
	process(pre, wrt, hilo, D_I, din_lo, dout)
	-- read/write process
	-- moved indirect assignment here
	-- to allow decode of instruction
	begin
		-- decrement
		if(wrt = wr) then
			RW_O <= '0';
		else
			RW_O <= '1';
		end if;
		-- possible post increment calculation
		adrtmp <= unsigned(pre) + 1;
		-- A_O from pre
		if(hilo = hi) then
			A_O <= pre&'0';
		else
			A_O <= pre&'1';
		end if;
		D_O <= dout(wide/2-1 downto 0);
		din <= D_I&din_lo;
	end process;
end rtl;

Sign in to add a comment
Hosted by Google Code