Ok, #2, lets start from scratch:
//---------------------------------------------------------------------------
//MCU interface for passing data in and out the FPGA based on a command set
//To avoid the loss of tri state control on the mcu databus, hierarchy is discarded
(*keep_hierarchy = "no"*)
module mcu_interface
(
//Main clock input
input i_main_clk,
//Control input
input i_clk, //Clock signal: Active low going pulse from the mcu to clock data
input i_rw, //Direction select signal: Read 0 / Write 1
input i_dc, //Type select signal: Data 0 / Command 1
//Output to other module
output [31:0] o_channel1_negative_signal_step,
output [31:0] o_channel1_positive_signal_step,
output [31:0] o_channel2_negative_signal_step,
output [31:0] o_channel2_positive_signal_step,
//Data bus
inout [7:0] io_data
);
//---------------------------------------------------------------------------
//Main registers
reg [7:0] command; //Stores the latest command
reg [7:0] data_out; //Stores data byte to be read by mcu
reg [1:0] data_index; //Index counter for multiple data byte handling
//---------------------------------------------------------------------------
//Command related data.
reg [7:0] channel1_negative_signal_step[3:0]; //Command 0x00
reg [7:0] channel1_positive_signal_step[3:0]; //Command 0x01
reg [7:0] channel2_negative_signal_step[3:0]; //Command 0x02
reg [7:0] channel2_positive_signal_step[3:0]; //Command 0x03
//---------------------------------------------------------------------------
//Sample the MCU control inputs to our system clock.
reg ireg_i_clk ;
reg ireg_i_rw ;
reg ireg_i_dc ;
reg ireg_i_data ;
reg dly_i_clk ;
reg dly_i_rw ;
reg dly_i_dc ;
always @(posedge i_main_clk) begin
ireg_i_clk <= i_clk ;
ireg_i_rw <= i_rw ;
ireg_i_dc <= i_dc ;
ireg_i_data <= io_data ;
dly_i_clk <= ireg_i_clk ;
dly_i_rw <= ireg_i_rw ;
dly_i_dc <= ireg_i_dc ;
end
//---------------------------------------------------------------------------
//Ensure data_out is high Z during write. i_rw == 1
//And actual data for read. i_rw == 0
assign io_data = ireg_i_rw ? 8'bZ : data_out;
endmodule
//---------------------------------------------------------------------------
Why is this first step a good idea?
What did I just do?
Does it remind you of something else?
Why will the next step fix all your timing problems?
Now, everything you code next should minimize transitions on the MCI control port.
Everything looks to be 32bit controls. You need to decide whether to latch 4bytes in a command,
or the entire 16 bytes in 1 shot.