-- block_dist.vhd : Distribute 8 input lanes into 20 output lanes
--                     
-- Copyright (C) 2012 CESNET
-- Author(s): Stepan Friedl <friedl@cesnet.cz>
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions
-- are met:
-- 1. Redistributions of source code must retain the above copyright
--    notice, this list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above copyright
--    notice, this list of conditions and the following disclaimer in
--    the documentation and/or other materials provided with the
--    distribution.
-- 3. Neither the name of the Company nor the names of its contributors
--    may be used to endorse or promote products derived from this
--    software without specific prior written permission.
--
-- This software is provided ``as is'', and any express or implied
-- warranties, including, but not limited to, the implied warranties of
-- merchantability and fitness for a particular purpose are disclaimed.
-- In no event shall the company or contributors be liable for any
-- direct, indirect, incidental, special, exemplary, or consequential
-- damages (including, but not limited to, procurement of substitute
-- goods or services; loss of use, data, or profits; or business
-- interruption) however caused and on any theory of liability, whether
-- in contract, strict liability, or tort (including negligence or
-- otherwise) arising in any way out of the use of this software, even
-- if advised of the possibility of such damage.
--
-- $Id: $
--
-- NOTES:

library ieee;
use ieee.std_logic_1164.all;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;

entity block_dist_8x20 is
--     generic (
--        NUM_LANES : natural := 8
--    );
   port (
      -- Input port D - 8 word data, high freq
      RESET_D : in std_logic;       
      CLK_D   : in std_logic;   -- D clock, 2.5 * CLK_Q, phase aligned
      RE_D    : out std_logic;  -- read request
      D       : in std_logic_vector(8*66-1 downto 0);  -- Input data
      -- Output port Q - 20 word data, low freq
      RESET_Q : in std_logic;             
      CLK_Q   : in std_logic; -- Q clock
      RE_Q    : in std_logic; -- port Q read enable
      Q       : out std_logic_vector(20*66-1 downto 0);  -- Output data
      --
      DBG     : out std_logic_vector(31 downto 0)   -- Debug vector
   );
end block_dist_8x20;

architecture behavioral of block_dist_8x20 is

COMPONENT blockfifo
  PORT (
    wr_clk : IN STD_LOGIC;
    rd_clk : IN STD_LOGIC;
    din : IN STD_LOGIC_VECTOR(65 DOWNTO 0);
    wr_en : IN STD_LOGIC;
    rd_en : IN STD_LOGIC;
    dout : OUT STD_LOGIC_VECTOR(65 DOWNTO 0);
    full : OUT STD_LOGIC;
    empty : OUT STD_LOGIC
  );
END COMPONENT;

ATTRIBUTE SYN_BLACK_BOX : BOOLEAN;
ATTRIBUTE SYN_BLACK_BOX OF blockfifo : COMPONENT IS TRUE;
ATTRIBUTE BLACK_BOX_PAD_PIN : STRING;
ATTRIBUTE BLACK_BOX_PAD_PIN OF blockfifo : COMPONENT IS "wr_clk,rd_clk,din[65:0],wr_en,rd_en,dout[65:0],full,empty";

constant DLY : time := 1 ns;

signal r0  : std_logic_vector(8*66-1 downto 0);
signal r1  : std_logic_vector(8*66-1 downto 0);
signal r2h : std_logic_vector(4*66-1 downto 0);
signal we0 : std_logic;
signal we1 : std_logic;
signal we2 : std_logic;

signal out_sel : std_logic;
type t_cycle is (zero,one,two,three,four);
signal cycle, next_cycle : t_cycle;
signal sync_reg   : std_logic := '0';
signal neg_reg_r  : std_logic := '0';
signal neg_reg_f  : std_logic := '0';
signal edge_det_r : std_logic := '0';
signal edge_det_f : std_logic := '0';
signal read_r     : std_logic := '0';
signal read_set   : std_logic;
signal read_clr   : std_logic;

signal fifo_di    : std_logic_vector(20*66-1 downto 0);
signal fifo_do    : std_logic_vector(20*66-1 downto 0);
signal fifo_we    : std_logic;
signal fifo_rd    : std_logic;
signal fifo_full  : std_logic;
signal fifo_full_v : std_logic_vector(19 downto 0);
signal fifo_status : std_logic_vector(1 downto 0);
signal fifo_empty_v : std_logic_vector(19 downto 0);
signal fifo_empty_reg  : std_logic;
signal fifo_empty_reg2 : std_logic;  
signal fifo_read_enable: std_logic;

begin

WRITE_REGS: process(CLK_D)
begin
   if CLK_D'event and CLK_D = '1' then
      if we0 = '1' then
         r0 <= D after DLY;
      end if;
      if we1 = '1' then
         r1 <= D after DLY;
      end if;
   end if;
end process;

WRITE_CONTROL: process(cycle)
begin
   we0 <= '0';
   we1 <= '0';
   fifo_we <= '0';
   out_sel <= '0';
   
   case cycle is
      when zero => 
         we0 <= '1';
         next_cycle <= one;
      when one => 
         we1 <= '1';
         next_cycle <= two;
      when two => -- Output data will be sampled here (in the middle of this clock cycle)
         we0     <= '1';
         out_sel <= '0';
         fifo_we <= '1';
         next_cycle <= three;
      when three => 
         we1 <= '1';
         next_cycle <= four;
      when four => -- Output data will be sampled here (with the next clock edge)
         out_sel <= '1';
         fifo_we <= '1';
         next_cycle <= zero;
      when others => 
         next_cycle <= zero;
         null;
   end case;
end process;

CYCLE_SEQ: process(CLK_D)
begin
   if CLK_D'event and CLK_D = '1' then
      if (fifo_full = '0') then
         --cycle <= ((cycle + 1) mod 5) after DLY;
           cycle <= next_cycle;
      end if;
   end if;
end process;

fifo_di <= D(4*66-1 downto 0) & r1 & r0 when out_sel = '0' else
           D & r1 & r0(8*66-1 downto 4*66);

-- The generic ASFIFO can be used when the Xilinx FIFO core is not available
--
--GEN_FIFOS: for i in 0 to 19 generate
--   FIFO : blockfifo
--     PORT MAP (
--       wr_clk => CLK_D,
--       din    => fifo_di((i+1)*66-1 downto i*66),
--       wr_en  => fifo_we,
--       full   => fifo_full_v(i),
--       --
--       rd_clk => CLK_Q,       
--       rd_en  => fifo_rd,
--       dout   => fifo_do((i+1)*66-1 downto i*66),
--       empty  => fifo_empty_v(i)
--     );
--end generate;
--fifo_full <= fifo_full_v(0);

FIFO: entity work.asfifo 
generic map (
  -- Data Width
   DATA_WIDTH   => 66*20,
   ITEMS        => 8,
   STATUS_WIDTH => 2
)
port map (
   -- Write interface
   RST_WR   => RESET_D,
   CLK_WR   => CLK_D,
   DI       => fifo_di,
   WR       => fifo_we,
   FULL     => fifo_full,
   STATUS   => fifo_status,
   -- Read interface
   RST_RD   => RESET_Q,
   CLK_RD   => CLK_Q,
   DO       => fifo_do, 
   RD       => fifo_rd,
   EMPTY    => fifo_empty_v(0) 
);


-- Stop reading when the FIFO goes empty (should occur a few cycles after reset only)
FIFO_EMPTY_FFS: process(CLK_Q)
begin
   if CLK_Q'event and CLK_Q = '1' then
      fifo_empty_reg  <= fifo_empty_v(0);
      fifo_empty_reg2 <= fifo_empty_reg;
      fifo_read_enable <= (not fifo_empty_v(0)) and (not fifo_empty_reg) and (not fifo_empty_reg2);  
   end if; 
end process;

fifo_rd <= fifo_read_enable and RE_Q;
 
---------------------------------------------------------------------------------------------------

DBG(0) <= fifo_empty_v(0)  or fifo_empty_v(1)  or fifo_empty_v(2)  or fifo_empty_v(3)  or fifo_empty_v(4)  or fifo_empty_v(5)  or
          fifo_empty_v(6)  or fifo_empty_v(7)  or fifo_empty_v(8)  or fifo_empty_v(9)  or fifo_empty_v(10) or fifo_empty_v(11) or
          fifo_empty_v(12) or fifo_empty_v(13) or fifo_empty_v(14) or fifo_empty_v(15) or fifo_empty_v(16) or fifo_empty_v(17) or
          fifo_empty_v(18) or fifo_empty_v(19); --
          
DBG(1) <= RE_Q; --

DBG(2) <= RESET_Q; --
-- 

DBG(7 downto 4) <= X"0" when cycle = zero else
                   X"1" when cycle = one else
                   X"2" when cycle = two else
                   X"3" when cycle = three else
                   X"4" when cycle = four else
                   X"F";
                   
DBG(8) <= we0;                   

DBG(9) <= we1;

DBG(10) <= fifo_we;

DBG(11) <= out_sel;

DBG(12) <= fifo_full_v(0);

DBG(13) <= fifo_we;
  
DBG(15) <= RESET_D;

RE_GEN: process(CLK_D)
begin
   if CLK_D'event and CLK_D = '1' then
      
   end if;
end process;

OUT_REG: process(CLK_Q)
begin
   if CLK_Q'event and CLK_Q = '1' then
      if RE_Q = '1' then
         Q <= fifo_do;
      end if;
   end if;
end process;

RE_D <= not fifo_full; -- The FIFO status signal can be used here to pipeline the RE_D and improve timing

-- Old version of the design - without the FIFO

-- !! 1320 FFs will be generated here !!
-- OUT_REGS: process(CLK_Q)
-- begin
--    if CLK_Q'event and CLK_Q = '1' then
--       if out_sel = '0' then
--          Q <= r1 & r0 & r2h after DLY;
--       else
--          Q <= D(4*66-1 downto 0) & r1 & r0 after DLY; 
--       end if;
--    end if;
-- end process;
   
-- WRITE_CONTROL: process(cycle, RE_Q)
-- begin
--    we0 <= '0';
--    we1 <= '0';
--    we2 <= '0';
--    out_sel  <= '0';
--    RE_D     <= read_r;
--    read_set <= '0';
--    read_clr <= '0';
--    
--    case cycle is
--       when 0 => 
--          read_set <= RE_Q;
--          RE_D     <= RE_Q;
--          we0 <= '1';
--          out_sel <= '0';
--       when 1 => 
--          we1 <= '1';
--          out_sel <= '0';
--       when 2 => -- Output data will be sampled here (in the middle of this clock cycle)
--          we0 <= '1';
--          out_sel  <= '0';
--          --read_set <= RE_Q;
--          read_clr <= not RE_Q;
--          RE_D     <= RE_Q;
--       when 3 => 
--          read_set <= RE_Q;
--          --read_clr <= not RE_Q;
--          RE_D     <= RE_Q;
--          we1 <= '1';
--          out_sel <= '1';
--       when 4 => -- Output data will be sampled here (with the next clock edge)
--          we2 <= '1';
--          out_sel <= '1';
--          read_clr <= not RE_Q;
--          RE_D <= RE_Q;
--       when others => 
--          null;
--    end case;
-- end process;
--    
-- CYCLE_SEQ: process(CLK_D)
-- begin
--    if CLK_D'event and CLK_D = '1' then
--       if (edge_det_r = '1') and (edge_det_f = '1') then
--          cycle <= 3 after DLY;
--       else
--          --if not (((cycle = 2) or (cycle = 4)) and (RE_Q = '0')) then
--          cycle <= ((cycle + 1) mod 5) after DLY;
--          --end if;
--       end if;
--       if read_set = '1' then
--          read_r <= '1';
--       elsif read_clr = '1' then
--          read_r <= '0';
--       end if;
--    end if;
-- end process;
-- 
-- -- Clock sysnchronization - detect aligninig rising clock edges
-- 
-- -- Detect rising edge of the output clock to synchronize processes
-- SYNC_REG_Q: process(CLK_Q)
-- begin
--    if CLK_Q'event and CLK_Q = '1' then
--       sync_reg <= (not sync_reg) after DLY;
--    end if;
-- end process;
-- 
-- SYNC_DLY_R: process(CLK_D)
-- begin
--    if CLK_D'event and CLK_D = '1' then -- Rissing edge
--       neg_reg_r <= sync_reg after DLY;
--    end if;
-- end process;
-- 
-- SYNC_DLY_F: process(CLK_D)
-- begin
--    if CLK_Q'event and CLK_Q = '0' then -- Falling edge
--       neg_reg_f <= sync_reg after DLY;
--    end if;
-- end process;
-- 
-- edge_det_r <= sync_reg xor neg_reg_r;
-- edge_det_f <= sync_reg xor neg_reg_f;

end behavioral;
