I found this working asynchronous FIFO code online. Just like any other FIFO, it outputs one data at a time. I want to modify the code such that it outputs 4 data at a time.
For example, if the FIFO contents are 1,2,3,,4,5,6,7,8,9,10,11,12.........256. Then the output at every read_clock cycle should be 1,2,3,4........5,6,7,8........9,10,11,12...and so on.
*Write_clock is faster than read_clock.
module fifo(i_wclk, i_wrst_n, i_wr, i_wdata, o_wfull, i_rclk, i_rrst_n, i_rd, o_rdata, o_rempty);
parameter r = 4; // number of output data at every read_clock cycle
parameter DSIZE = 16;
parameter ASIZE = 6;
localparam DW = DSIZE;
localparam AW = ASIZE;
input i_wclk, i_wrst_n, i_wr;
input [DW-1:0] i_wdata;
output reg o_wfull;
input i_rclk, i_rrst_n, i_rd;
output [DW-1:0] o_rdata [r-1:0];
output reg o_rempty;
wire [AW-1:0] waddr, raddr;
wire wfull_next, rempty_next;
reg [AW:0] wgray, wbin, wq2_rgray, wq1_rgray, rgray, rbin, rq2_wgray, rq1_wgray;
//
wire [AW:0] wgraynext, wbinnext;
wire [AW:0] rgraynext, rbinnext;
reg [DW-1:0] mem [0:((1<<AW)-1)];
//
// Cross clock domains
//
// Cross the read Gray pointer into the write clock domain
initial { wq2_rgray, wq1_rgray } = 0;
always @(posedge i_wclk or negedge i_wrst_n)
if (~i_wrst_n)
{ wq2_rgray, wq1_rgray } <= 0;
else
{ wq2_rgray, wq1_rgray } <= { wq1_rgray, rgray };
// Calculate the next write address, and the next graycode pointer.
assign wbinnext = wbin + { {(AW){1'b0}}, ((i_wr) && (!o_wfull)) };
assign wgraynext = (wbinnext >> 1) ^ wbinnext;
assign waddr = wbin[AW-1:0];
// Register these two values--the address and its Gray code
// representation
initial { wbin, wgray } = 0;
always @(posedge i_wclk or negedge i_wrst_n)
if (~i_wrst_n)
{ wbin, wgray } <= 0;
else
{ wbin, wgray } <= { wbinnext, wgraynext };
assign wfull_next = (wgraynext == { ~wq2_rgray[AW:AW-1],
wq2_rgray[AW-2:0] });
//
// Calculate whether or not the register will be full on the next
// clock.
initial o_wfull = 0;
always @(posedge i_wclk or negedge i_wrst_n)
if (~i_wrst_n)
o_wfull <= 1'b0;
else
o_wfull <= wfull_next;
//
// Write to the FIFO on a clock
always @(posedge i_wclk)
if ((i_wr)&&(!o_wfull))
mem[waddr] <= i_wdata;
//
// Cross clock domains
//
// Cross the write Gray pointer into the read clock domain
initial { rq2_wgray, rq1_wgray } = 0;
always @(posedge i_rclk or negedge i_rrst_n)
if (~i_rrst_n)
{ rq2_wgray, rq1_wgray } <= 0;
else
{ rq2_wgray, rq1_wgray } <= { rq1_wgray, wgray };
// Calculate the next read address,
assign rbinnext = rbin + { {(AW){1'b0}}, ((i_rd)&&(!o_rempty)) };
// and the next Gray code version associated with it
assign rgraynext = (rbinnext >> 1) ^ rbinnext;
// Register these two values, the read address and the Gray code version
// of it, on the next read clock
//
initial { rbin, rgray } = 0;
always @(posedge i_rclk or negedge i_rrst_n)
if (~i_rrst_n)
{ rbin, rgray } <= 0;
else
{ rbin, rgray } <= { rbinnext, rgraynext };
// Memory read address Gray code and pointer calculation
assign raddr = rbin[AW-1:0];
// Determine if we'll be empty on the next clock
assign rempty_next = (rgraynext == rq2_wgray);
initial o_rempty = 1;
always @(posedge i_rclk or negedge i_rrst_n)
if (~i_rrst_n)
o_rempty <= 1'b1;
else
o_rempty <= rempty_next;
//
// Read from the memory--a clockless read here, clocked by the next
// read FLOP in the next processing stage (somewhere else)
//
// I modified this part to use generate block, earlier was a single assign statement
// assign o_rdata = mem[raddr];
genvar i;
generate
for (i = 0; i < r; i = i + 1)
begin
assign o_rdata[i] = mem[raddr+i];
end
endgenerate
endmodule
I used generate block like above. The output is 1,0,0,0.......2,1,0,0......3,2,1,0.........4,3,2,1...........5,4,3,2.....6,5,4,3....
As we can see it still is outputting only 1 data at every clock cycle. I mean 1 then 2 then 3, 4 so on....not like 1,2,3,4.
Any inputs will be appreciated.