To answer Bassman59's question about why not "signed" on the module? Shrug, I don't have an opinion either way. My view is that outside the MAC block they are just bits, with no real meaning.
Here is the working DFT code.
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;
entity dft_256 is
Port ( clk : in STD_LOGIC;
din : in STD_LOGIC_VECTOR (15 downto 0);
din_valid : in STD_LOGIC;
dout_r : out STD_LOGIC_VECTOR (15 downto 0) := (others => '0');
dout_i : out STD_LOGIC_VECTOR (15 downto 0) := (others => '0');
dout_valid : out STD_LOGIC := '0');
end dft_256;
architecture Behavioral of dft_256 is
signal sequence_counter : unsigned(7 downto 0) := (others => '1');
signal sequence_counter_delay_1 : unsigned(7 downto 0) := (others => '1');
signal sequence_counter_delay_2 : unsigned(7 downto 0) := (others => '1');
signal sequence_counter_delay_3 : unsigned(7 downto 0) := (others => '1');
signal sample_count : unsigned(7 downto 0) := (others => '1');
signal data_reg : std_logic_vector(17 downto 0);
type a_working is array(0 to 127) of std_logic_vector(40 downto 0);
signal working_r : a_working := (others => (others => '0'));
signal working_i : a_working := (others => (others => '0'));
signal temp_r : std_logic_vector(40 downto 0) := (others => '0');
signal temp_i : std_logic_vector(40 downto 0) := (others => '0');
signal running_total_r : std_logic_vector(40 downto 0) := (others => '0');
signal running_total_i : std_logic_vector(40 downto 0) := (others => '0');
signal result_r : std_logic_vector(40 downto 0) := (others => '0');
signal result_i : std_logic_vector(40 downto 0) := (others => '0');
type a_sine_table is array(0 to 255) of std_logic_vector(15 downto 0);
-- Table body generaeted with this C code
--
-- #include <math.h>
-- #include <stdio.h>
--
-- int main(void) {
-- for(int i = 0; i < 256; i++) {
-- if(i%8 == 0) {
-- printf(" ");
-- }
-- printf(" x\"%04X\",", (int)(sin(i*2*M_PI/256)*32767)&0xFFFF);
-- if(i%8 == 7) {
-- printf("\n");
-- }
-- }
-- }
signal sine_table : a_sine_table := (
x"0000", x"0324", x"0647", x"096A", x"0C8B", x"0FAB", x"12C7", x"15E1",
x"18F8", x"1C0B", x"1F19", x"2223", x"2527", x"2826", x"2B1E", x"2E10",
x"30FB", x"33DE", x"36B9", x"398C", x"3C56", x"3F16", x"41CD", x"447A",
x"471C", x"49B3", x"4C3F", x"4EBF", x"5133", x"539A", x"55F4", x"5842",
x"5A81", x"5CB3", x"5ED6", x"60EB", x"62F1", x"64E7", x"66CE", x"68A5",
x"6A6C", x"6C23", x"6DC9", x"6F5E", x"70E1", x"7254", x"73B5", x"7503",
x"7640", x"776B", x"7883", x"7989", x"7A7C", x"7B5C", x"7C29", x"7CE2",
x"7D89", x"7E1C", x"7E9C", x"7F08", x"7F61", x"7FA6", x"7FD7", x"7FF5",
x"7FFF", x"7FF5", x"7FD7", x"7FA6", x"7F61", x"7F08", x"7E9C", x"7E1C",
x"7D89", x"7CE2", x"7C29", x"7B5C", x"7A7C", x"7989", x"7883", x"776B",
x"7640", x"7503", x"73B5", x"7254", x"70E1", x"6F5E", x"6DC9", x"6C23",
x"6A6C", x"68A5", x"66CE", x"64E7", x"62F1", x"60EB", x"5ED6", x"5CB3",
x"5A81", x"5842", x"55F4", x"539A", x"5133", x"4EBF", x"4C3F", x"49B3",
x"471C", x"447A", x"41CD", x"3F16", x"3C56", x"398C", x"36B9", x"33DE",
x"30FB", x"2E10", x"2B1E", x"2826", x"2527", x"2223", x"1F19", x"1C0B",
x"18F8", x"15E1", x"12C7", x"0FAB", x"0C8B", x"096A", x"0647", x"0324",
x"0000", x"FCDC", x"F9B9", x"F696", x"F375", x"F055", x"ED39", x"EA1F",
x"E708", x"E3F5", x"E0E7", x"DDDD", x"DAD9", x"D7DA", x"D4E2", x"D1F0",
x"CF05", x"CC22", x"C947", x"C674", x"C3AA", x"C0EA", x"BE33", x"BB86",
x"B8E4", x"B64D", x"B3C1", x"B141", x"AECD", x"AC66", x"AA0C", x"A7BE",
x"A57F", x"A34D", x"A12A", x"9F15", x"9D0F", x"9B19", x"9932", x"975B",
x"9594", x"93DD", x"9237", x"90A2", x"8F1F", x"8DAC", x"8C4B", x"8AFD",
x"89C0", x"8895", x"877D", x"8677", x"8584", x"84A4", x"83D7", x"831E",
x"8277", x"81E4", x"8164", x"80F8", x"809F", x"805A", x"8029", x"800B",
x"8001", x"800B", x"8029", x"805A", x"809F", x"80F8", x"8164", x"81E4",
x"8277", x"831E", x"83D7", x"84A4", x"8584", x"8677", x"877D", x"8895",
x"89C0", x"8AFD", x"8C4B", x"8DAC", x"8F1F", x"90A2", x"9237", x"93DD",
x"9594", x"975B", x"9932", x"9B19", x"9D0F", x"9F15", x"A12A", x"A34D",
x"A57F", x"A7BE", x"AA0C", x"AC66", x"AECD", x"B141", x"B3C1", x"B64D",
x"B8E4", x"BB86", x"BE33", x"C0EA", x"C3AA", x"C674", x"C947", x"CC22",
x"CF05", x"D1F0", x"D4E2", x"D7DA", x"DAD9", x"DDDD", x"E0E7", x"E3F5",
x"E708", x"EA1F", x"ED39", x"F055", x"F375", x"F696", x"F9B9", x"FCDC");
signal trig_entry : unsigned(7 downto 0);
signal trig_entry_c : unsigned(7 downto 0);
signal sin_value : std_logic_vector(15 downto 0);
signal cos_value : std_logic_vector(15 downto 0);
component mac_block is
port (
clk : in std_logic;
a : in std_logic_vector(17 downto 0);
b : in std_logic_vector(15 downto 0);
c : in std_logic_vector(40 downto 0);
r : out std_logic_vector(40 downto 0)
);
end component;
begin
mac_block_r: mac_block port map (
clk => clk,
a => data_reg,
b => cos_value,
c => running_total_r,
r => result_r);
mac_block_i: mac_block port map (
clk => clk,
a => data_reg,
b => sin_value,
c => running_total_i,
r => result_i);
-- For the first sample in a block set the running total to zero
running_total_r <= (others => '0') when sample_count = 0 else temp_r;
running_total_i <= (others => '0') when sample_count = 0 else temp_i;
trig_entry_c <= trig_entry + 64;
process(clk)
begin
if rising_edge(clk) then
-- Output the last completed DFT while inputting sample 0 of the next
if sample_count = 0 and sequence_counter >= 1 and sequence_counter < 128+1 then
dout_r <= std_logic_vector(temp_r(temp_r'high downto temp_r'high-15));
dout_i <= std_logic_vector(temp_i(temp_i'high downto temp_i'high-15));
dout_valid <= '1';
else
dout_r <= (others => '0');
dout_i <= (others => '0');
dout_valid <= '0';
end if;
-- Write back any update value
if sequence_counter >= 3 and sequence_counter < 128+3 then
working_r(to_integer(sequence_counter_delay_3)) <= result_r;
working_i(to_integer(sequence_counter_delay_3)) <= result_i;
end if;
-- Look up the working values and the sin/cos values
if sequence_counter <= working_r'high then
temp_r <= working_r(to_integer(sequence_counter));
temp_i <= working_i(to_integer(sequence_counter));
sin_value <= sine_table(to_integer(trig_entry));
cos_value <= sine_table(to_integer(trig_entry_c));
end if;
-- Restart the sequencer when a new sample arrives
if din_valid = '1' then
data_reg <= din & "00";
sequence_counter <= (others => '0');
trig_entry <= (others => '0');
sample_count <= sample_count + 1;
elsif sequence_counter /= 255 then
sequence_counter <= sequence_counter + 1;
trig_entry <= trig_entry + sample_count;
end if;
-- Delayed sequence count for the write-back to working_r and working_i
sequence_counter_delay_3 <= sequence_counter_delay_2;
sequence_counter_delay_2 <= sequence_counter_delay_1;
sequence_counter_delay_1 <= sequence_counter;
end if;
end process;
end Behavioral;
You could play around with the scaling factors of the 'sine' table, to tweak where the full scale output is on the DFT.
And here is the MAC block:
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;
entity mac_block is
Port ( clk : in STD_LOGIC;
a : in STD_LOGIC_VECTOR (17 downto 0);
b : in STD_LOGIC_VECTOR (15 downto 0);
c : in STD_LOGIC_VECTOR (40 downto 0);
r : out STD_LOGIC_VECTOR (40 downto 0));
end mac_block;
architecture Behavioral of mac_block is
signal a_s : signed (17 downto 0);
signal b_s : signed (15 downto 0);
signal c_s : signed (40 downto 0);
signal r_s : signed (40 downto 0);
begin
r <= std_logic_vector(r_s);
process(clk)
begin
if rising_edge(clk) then
r_s <= a_s*b_s+c_s;
-- Register the inputs
a_s <= signed(a);
b_s <= signed(b);
c_s <= signed(c);
end if;
end process;
end Behavioral;
Here's a test sample source:
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;
entity source is
Port ( clk : in STD_LOGIC;
counter_max : in std_logic_vector(15 downto 0);
data : out STD_LOGIC_VECTOR (15 downto 0) := (others => '0');
data_valid : out std_logic := '0'
);
end source;
architecture Behavioral of source is
type a_data is array(0 to 31) of std_logic_vector(15 downto 0);
signal values : a_data := (
-- Sligtly offset source sine
-- x"30FB", x"471C", x"5A81", x"6A6C", x"7640", x"7D89", x"7FFF", x"7D89",
-- x"7640", x"6A6C", x"5A81", x"471C", x"30FB", x"18F8", x"0000", x"E708",
-- x"CF05", x"B8E4", x"A57F", x"9594", x"89C0", x"8277", x"8001", x"8277",
-- x"89C0", x"9594", x"A57F", x"B8E4", x"CF05", x"E708", x"0000", x"18F8"
-- Perfectly aligned square
x"0000", x"7FFF", x"7FFF", x"7FFF", x"7FFF", x"7FFF", x"7FFF", x"7FFF",
x"7FFF", x"7FFF", x"7FFF", x"7FFF", x"7FFF", x"7FFF", x"7FFF", x"7FFF",
x"0000", x"8001", x"8001", x"8001", x"8001", x"8001", x"8001", x"8001",
x"8001", x"8001", x"8001", x"8001", x"8001", x"8001", x"8001", x"8001"
);
signal counter : unsigned (15 downto 0) := (others => '0');
signal sample : unsigned(9 downto 0) := (others => '0');
begin
process(clk)
begin
if rising_edge(clk) then
if counter = 0 then
data <= values(to_integer(sample));
data_valid <= '1';
counter <= unsigned(counter_max);
if sample = values'high then
sample <= (others => '0');
else
sample <= sample+1;
end if;
else
counter <= counter-1;
data_valid <= '0';
end if;
end if;
end process;
end Behavioral;
Top level test bench:
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
entity tb_dft_top is
end tb_dft_top;
architecture Behavioral of tb_dft_top is
component dft_top is
Port ( clk : in STD_LOGIC;
fft_r : out STD_LOGIC_VECTOR (15 downto 0);
fft_i : out STD_LOGIC_VECTOR (15 downto 0);
fft_valid : out STD_LOGIC);
end component;
signal clk : STD_LOGIC;
signal fft_r : STD_LOGIC_VECTOR (15 downto 0);
signal fft_i : STD_LOGIC_VECTOR (15 downto 0);
signal fft_valid : STD_LOGIC;
begin
clk_proc: process
begin
clk <= '0';
wait for 10 ns;
clk <= '1';
wait for 10 ns;
end process;
uut: dft_top port map (
clk => clk,
fft_r => fft_r,
fft_i => fft_i,
fft_valid => fft_valid
);
end Behavioral;
Images attached are simulations of the sine and square test waves, showing the spectrum as it is streamed out.
Still doesn't address the issue of windowing the data on the way in, or calculating the "power = sqrt(r*r+i*i)" on the way out of the DFT..
For the former, this line in the DFT could be adapted:
data_reg <= din & "00";
For the later, a CORDIC magnitude would be sweet....