// registerd output version of easy FIFO (to avoid timing violation)
//	to do so, this has a latency through data
// support full/empty output
// SYNC RAM 2 latency version (need to registered RAM output)

`timescale 1 ps / 1 ps
module fifo_regout #(
	parameter DATAWIDTH = 29,
	parameter ADDRWIDTH = 13 //, 2**13 = 8192
)
(
		input	clk,
		input	reset_n,
		input	we,
		input  [DATAWIDTH-1:0] din,
		input	re,
		// input sclr, // restart as empty
		output [DATAWIDTH-1:0] dout,
		output	[3:0] flags //, { full, empty, ovf, udf } // ,
);

localparam	DEPTH = 2 ** ADDRWIDTH ;

// registered outputs
reg		[DATAWIDTH-1:0] dout_reg; // dout
reg		full, empty, overrun, underrun; // flags

// double buffer and selction state flag
reg		[DATAWIDTH-1:0] byp1_reg, byp0_reg; // bypass data or copy from fifo out
reg		byp1_valid, byp0_valid; // indicates bypass data register valid

reg		unsigned [ADDRWIDTH:0] mcount; // fifo and registers total stocked data

// fifo ram array and control counter
reg		[DATAWIDTH-1:0] fifobuf [DEPTH-1:0]; // 2-D array (assigned to sync. RAM)
reg		unsigned [ADDRWIDTH-1:0]	ffwecnt, ffrecnt;
reg		[DATAWIDTH-1:0] dout_ff; // fifo read data (FROM RAM ARRAY)
reg		[DATAWIDTH-1:0] ffout_reg; // fifo synchronous output
reg		ff_pre_valid, ff_valid ; // valid data indicator for ffout_reg

reg		[DATAWIDTH-1:0] din_reg; // temporaly data input register
reg		din_valid;

// CONDITION FLAG LOGIC
wire	cond_inc, cond_dec, cond_sft; // cond. for increase (wr. only), dec. (read only) and shift (wr. and rd.)
wire	cond_nop; // condition no operation (keep data) (but fifo out register may be transferred to byp reg)
wire	cond_amf, cond_ge4, cond_ge3, cond_ge2, cond_thr, cond_two, cond_one; 
				// condition of mcount: almost full (DEPTH+2), >=4..2, 3...1
wire	ff_we, ff_re; // condition signal write and read from FIFO buffer

assign	cond_inc = ( {we,re} == 2'b10 ) ? 1'b1 : 1'b0 ;
assign	cond_dec = ( {we,re} == 2'b01 ) ? 1'b1 : 1'b0 ;
assign	cond_sft = ( {we,re} == 2'b11 ) ? 1'b1 : 1'b0 ;
assign	cond_nop = ( {we,re} == 2'b00 ) ? 1'b1 : 1'b0 ;
assign	cond_amf = ( mcount == DEPTH+2 ) ? 1'b1 : 1'b0 ;
assign	cond_ge4 = ( mcount >= 4 ) ? 1'b1 : 1'b0 ;
assign	cond_ge3 = ( mcount >= 3 ) ? 1'b1 : 1'b0 ;
assign	cond_ge2 = ( mcount >= 2 ) ? 1'b1 : 1'b0 ;
assign	cond_thr = ( mcount == 3 ) ? 1'b1 : 1'b0 ;
assign	cond_two = ( mcount == 2 ) ? 1'b1 : 1'b0 ;
assign	cond_one = ( mcount == 1 ) ? 1'b1 : 1'b0 ;

assign	ff_re = ( cond_ge4 & re ) ? 1'b1 :  1'b0 ;
assign	ff_we = ( cond_ge4 & we ) ? 1'b1 :
					( cond_thr & cond_inc ) ? 1'b1 : 1'b0 ;
				
// Basic count state and flag registers
 always @(posedge clk or negedge reset_n)
	if( ~reset_n ) begin
			mcount <= { {(ADDRWIDTH+1){1'b0}} };
			{ full, empty, overrun, underrun } <= 4'b0100;
		// end else if ( sclr ) begin
		// 	mcount <= { {(ADDRWIDTH+1){1'b0}} };
		//	{ full, empty, overrun, underrun } <= 4'b0100;
		end else begin
			mcount <= ( cond_inc ) ? ( mcount + 1 ) :
						( cond_dec ) ? ( mcount - 1 ) :
												mcount ;
			full <= ( cond_amf & cond_inc ) ? 1'b1 : // set condition
						( full & cond_dec ) ? 1'b0 : // clear condition (assuming not overflow occured)
											full ; // keep condition
			empty <= ( cond_one & cond_dec ) ? 1'b1 :  // set condition
						( empty & cond_inc ) ? 1'b0 : // clear condition (assuming not underflow occured)
											empty ;
			overrun <= ( full & cond_inc ) ? 1'b1 : overrun ; // detect overflow
			underrun <= ( empty & re ) ? 1'b1 : underrun ; // detect underflow (restlictly)
		end

// dataflow control (0): din_reg and valid
always @(posedge clk or negedge reset_n)
	if( ~reset_n ) din_reg <= { {(DATAWIDTH){1'b0}} };
	else din_reg <= din;
	
always @(posedge clk or negedge reset_n)
	if( ~reset_n ) din_valid <= 1'b0;
	// else if ( sclr ) din_valid <= 1'b0;
	else if( cond_one & cond_inc ) din_valid <= 1'b1;
	else if( cond_two & we ) din_valid <= 1'b1;
	else if( cond_thr & cond_sft ) din_valid <= 1'b1;
	else din_valid <= 1'b0;
	
// dataflow control (1): dout_reg
always @(posedge clk or negedge reset_n)
	if( ~reset_n ) dout_reg <= { {(DATAWIDTH){1'b0}} };
	else if( empty & we ) dout_reg <= din; // assuming no read (increase)
	else if( cond_one & cond_sft ) dout_reg <= din;
	else if( cond_two & din_valid & re ) dout_reg <= din_reg;
	else if( cond_ge2 & byp0_valid & re ) dout_reg <= byp0_reg;
	else if( cond_ge2 & ff_valid & re ) dout_reg <= ffout_reg;
	// else keep

// dataflow control (2): bypass0
always @(posedge clk or negedge reset_n)
	if( ~reset_n ) { byp0_valid, byp0_reg } <= { {(DATAWIDTH+1){1'b0}} };
	// else if( sclr ) { byp0_valid, byp0_reg } <= { {(DATAWIDTH+1){1'b0}} };
	else if( cond_two & din_valid & (~re) ) { byp0_valid, byp0_reg } <= { 1'b1, din_reg }; // nop, inc
	else if( cond_two & ff_valid & (~re) ) { byp0_valid, byp0_reg } <= { 1'b1, ffout_reg }; // nop, inc
	else if( cond_two & re ) byp0_valid <= 1'b0; // dec, sft
	else if( cond_thr & din_valid & re ) { byp0_valid, byp0_reg } <= { 1'b1, din_reg };  // dec, sft
	else if( cond_thr & din_valid & ff_valid & (~re) )
									{ byp0_valid, byp0_reg } <= { 1'b1, ffout_reg };  // nop, inc
	else if( cond_ge3 & ff_valid & byp0_valid & re )
									{ byp0_valid, byp0_reg } <= { 1'b1, ffout_reg };  // dec, sft
	else if( cond_ge3 & ff_valid & (~byp0_valid) & (~re) )
									{ byp0_valid, byp0_reg } <= { 1'b1, ffout_reg }; // nop, inc
	else if( cond_ge3 & byp1_valid & re ) { byp0_valid, byp0_reg } <= { 1'b1, byp1_reg };  // dec, sft
	else if( cond_ge3 & ff_pre_valid & re ) byp0_valid <= 1'b0; // dec, sft
	// else keep

// dataflow control (3): bypass1
always @(posedge clk or negedge reset_n)
	if( ~reset_n ) { byp1_valid, byp1_reg } <= { {(DATAWIDTH+1){1'b0}} };
	// else if( sclr ) { byp1_valid, byp1_reg } <= { {(DATAWIDTH+1){1'b0}} };
	else if( cond_thr & din_valid & (~re) ) { byp1_valid, byp1_reg } <= { 1'b1, din_reg };  // nop, inc
	else if( cond_ge3 & ff_valid & byp0_valid & (~re) )
									{ byp1_valid, byp1_reg } <= { 1'b1, ffout_reg };  // nop, inc
	else if( cond_ge3 & byp1_valid & re ) byp1_valid <= 1'b0; // dec, sft
	// else keep

// fifo addressing control
 always @(posedge clk or negedge reset_n)
	if( ~reset_n ) { ffwecnt, ffrecnt } <= { {(ADDRWIDTH*2){1'b0}} };
		// else if( sclr ) { ffwecnt, ffrecnt } <= { {(ADDRWIDTH*2){1'b0}} };
		else begin
			ffwecnt <= ( ff_we ) ? ( ffwecnt + 1 ) : ffwecnt ;
			ffrecnt <= ( ff_re ) ? ( ffrecnt + 1 ) : ffrecnt ;
		end

// fifo output register valid flags (shift values of "ff_re")
always @(posedge clk or negedge reset_n)
	if( ~reset_n ) { ff_pre_valid, ff_valid } <= 2'b00;
	// else if ( sclr ) { ff_pre_valid, ff_valid } <= 2'b00;
	else { ff_pre_valid, ff_valid } <= { ff_re, ff_pre_valid };
	
// fifo buffer 2 clock latency sync. RAM version
always @(posedge clk)
	if( ff_we ) fifobuf[ ffwecnt ] <= din;
	// else if( ~full ) fifobuf[ ffwecnt ] <= { {(DATAWIDTH){1'b0}} };

always @(posedge clk)
	dout_ff <= fifobuf[ ffrecnt ];
// assign dout_ff = fifobuf[ ffrecnt ];

always @(posedge clk or negedge reset_n)
	if( ~reset_n ) ffout_reg <= { {(DATAWIDTH){1'b0}} };
	else ffout_reg <= dout_ff;
//

// output ports assignment
assign dout = dout_reg;
assign flags = { full, empty, overrun, underrun };

endmodule
