-- hw_rxbuf.vhd - Hardware RX DMA Buffer
-- Copyright (C) 2013 CESNET
-- Author(s): Martin Spinler <spinler@cesnet.cz>
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions
-- are met:
-- 1. Redistributions of source code must retain the above copyright
--    notice, this list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above copyright
--    notice, this list of conditions and the following disclaimer in
--    the documentation and/or other materials provided with the
--    distribution.
-- 3. Neither the name of the Company nor the names of its contributors
--    may be used to endorse or promote products derived from this
--    software without specific prior written permission.
--
-- This software is provided ``as is'', and any express or implied
-- warranties, including, but not limited to, the implied warranties of
-- merchantability and fitness for a particular purpose are disclaimed.
-- In no event shall the company or contributors be liable for any
-- direct, indirect, incidental, special, exemplary, or consequential
-- damages (including, but not limited to, procurement of substitute
-- goods or services; loss of use, data, or profits; or business
-- interruption) however caused and on any theory of liability, whether
-- in contract, strict liability, or tort (including negligence or
-- otherwise) arising in any way out of the use of this software, even
-- if advised of the possibility of such damage.
--

library IEEE;
use IEEE.std_logic_1164.all;
use IEEE.std_logic_unsigned.all;
use IEEE.std_logic_arith.all;
use IEEE.numeric_std.all;
use work.math_pack.all;

-- ----------------------------------------------------------------------
--                            Entity declaration
-- ----------------------------------------------------------------------
entity DMA_BUFFER_RX is
   generic(
      CHANNELS          : integer := 8;
      --! Size of each buffer in bytes
      BUFFER_SIZE       : integer := 16384;
      --! Data align in bytes
      DATA_ALIGN        : integer := 8;
      --! Data width of module
      FLU_WIDTH         : integer := 512;
      --! Input FLU Specifics
      SOP_WIDTH         : integer := 1
   );
   port(
      --! Common interface
      CLK               : in  std_logic;
      RESET             : in  std_logic;

      --! Enable specific channel
      ENABLE            : in  std_logic_vector(CHANNELS-1 downto 0);
      --! Discard enable on specific channel
      DISCARD           : in  std_logic_vector(CHANNELS-1 downto 0);

      --! FrameLink Unaligned interface
      RX_CHANNEL        : in  std_logic_vector(log2(CHANNELS)-1 downto 0);
      RX_DATA           : in  std_logic_vector(FLU_WIDTH-1 downto 0);
      RX_SOP_POS        : in  std_logic_vector(SOP_WIDTH-1 downto 0);
      RX_EOP_POS        : in  std_logic_vector(log2(FLU_WIDTH/8)-1 downto 0);
      RX_SOP            : in  std_logic;
      RX_EOP            : in  std_logic;
      RX_SRC_RDY        : in  std_logic;
      RX_DST_RDY        : out std_logic;

      --! Interface to DMA Controller
      PACKET_NEW        : inout std_logic;
      PACKET_DISC       : inout std_logic;
      PACKET_LENGTH     : inout std_logic_vector(log2(BUFFER_SIZE) downto 0);
      PACKET_CHANNEL    : inout std_logic_vector(log2(CHANNELS)-1 downto 0);

      --! FIFO-like interface to DMA Bus, without VALID signal
      --! Read n WORDs
      DMA_READ          : in  std_logic_vector(log2(FLU_WIDTH/(DATA_ALIGN*8)) downto 0);
      --! Read channel
      DMA_CHANNEL       : in  std_logic_vector(log2(CHANNELS)-1 downto 0);
      --! Outgoing data. Valid 1CLK after READ request (1CLK latency)
      DMA_DATA          : out std_logic_vector(FLU_WIDTH-1 downto 0);

      DMA_DV            : out std_logic
   );
end entity;

-- ----------------------------------------------------------------------
--                      Architecture declaration
-- ----------------------------------------------------------------------
architecture behavioral of DMA_BUFFER_RX is

   constant BUFFERS        : integer := (FLU_WIDTH / 8) / DATA_ALIGN;
   constant BUFFER_WIDTH   : integer := (FLU_WIDTH / BUFFERS);
   constant BLOCK_COUNT    : integer := (BUFFER_SIZE / DATA_ALIGN) / BUFFERS;

   constant FIFO_PACKET_WIDTH   : integer := 2+log2(CHANNELS)+log2(BUFFER_SIZE)-log2(DATA_ALIGN);

   type t_ptr                 is array(BUFFERS-1 downto 0) of integer range 0 to BUFFERS-1;
   type t_data                is array(BUFFERS-1 downto 0) of std_logic_vector(FLU_WIDTH / BUFFERS-1 downto 0);
   type t_datach              is array(CHANNELS-1 downto 0) of t_data;

   type t_bufferptr           is array(CHANNELS-1 downto 0) of integer range 0 to BUFFERS-1;
   type t_buffer              is array(CHANNELS-1 downto 0) of std_logic_vector(BUFFERS-1 downto 0);

   -- RX Signals
   signal rx_dst_rdy_o        : std_logic;

   signal rx_sop_pos_a        : std_logic_vector(log2(BUFFERS)-1 downto 0);
   signal rx_length           : std_logic_vector(15-log2(DATA_ALIGN) downto 0);
   signal rx_length_na        : std_logic_vector(15 downto 0);

   -- Input signals
   signal out_data            : std_logic_vector(FLU_WIDTH-1 downto 0);
   signal iin_channel         : integer range 0 to CHANNELS-1;
   signal in_data_valid       : std_logic;
   signal in_channel          : std_logic_vector(log2(CHANNELS)-1 downto 0);
   signal in_data             : std_logic_vector(FLU_WIDTH-1 downto 0);
   signal in_sop_pos          : std_logic_vector(log2(BUFFERS)-1 downto 0);
   signal in_eop_pos          : std_logic_vector(log2(BUFFERS)-1 downto 0);
   signal in_sop              : std_logic;
   signal in_eop              : std_logic;
   signal in_ocp              : std_logic;
   signal in_src_rdy          : std_logic;
   signal in_length           : std_logic_vector(15-log2(DATA_ALIGN) downto 0);

   -- Check stage
   signal icheck_channel      : integer range 0 to CHANNELS-1;
   signal check_packet_end    : std_logic;
   signal check_packet_start  : std_logic;
   signal check_data          : std_logic_vector(FLU_WIDTH-1 downto 0);
   signal check_data_valid    : std_logic;
   signal check_channel       : std_logic_vector(log2(CHANNELS)-1 downto 0);
   signal check_sop_pos       : std_logic_vector(log2(BUFFERS)-1 downto 0);
   signal check_eop_pos       : std_logic_vector(log2(BUFFERS)-1 downto 0);
   signal check_sop           : std_logic;
   signal check_eop           : std_logic;
   signal check_ocp           : std_logic;
   signal check_length        : std_logic_vector(15-log2(DATA_ALIGN) downto 0);
   signal check_discard       : std_logic := '0';
   signal check_discarding    : std_logic := '0';
   signal check_hold          : std_logic := '0';

   signal check_write_sop     : std_logic_vector(BUFFERS-1 downto 0);
   signal check_write_eop     : std_logic_vector(BUFFERS-1 downto 0);
   signal check_write_sopaeop : std_logic_vector(BUFFERS-1 downto 0);
   signal check_write_sop_count:integer range 0 to BUFFERS;
   signal check_write_eop_count:integer range 0 to BUFFERS;

   signal check_hold_length   : std_logic_vector(15-log2(DATA_ALIGN) downto 0);
   signal check_hold_usage    : integer range 0 to BUFFER_SIZE / DATA_ALIGN;
   signal check_hold_channel  : std_logic_vector(log2(CHANNELS)-1 downto 0);

   -- Split stage
   signal split_packet_end    : std_logic;
   signal split_reg_ichannel  : integer range 0 to CHANNELS-1;
   signal split_channel1      : integer range 0 to CHANNELS-1;
   signal split_channel2      : integer range 0 to CHANNELS-1;
   signal split_data          : t_data;
   signal split_start1        : integer range 0 to BUFFERS-1;
   signal split_start2        : integer range 0 to BUFFERS-1;
   signal split_write1        : std_logic_vector(BUFFERS-1 downto 0);
   signal split_write2        : std_logic_vector(BUFFERS-1 downto 0);
   signal split_merge         : std_logic_vector(BUFFERS-1 downto 0);
   signal split_index         : integer range 0 to BUFFERS-1;

   signal split_count_write1  : integer range 0 to BUFFERS;
   signal split_count_write2  : integer range 0 to BUFFERS;

   signal next_write1         : integer range 0 to BUFFERS-1;
   signal next_write2         : integer range 0 to BUFFERS-1;

   -- Rotate stage
   signal rotate_packet_end   : std_logic;
   signal rotate_write1       : std_logic_vector(BUFFERS-1 downto 0);
   signal rotate_write2       : std_logic_vector(BUFFERS-1 downto 0);
   signal rotate_data1        : t_data;
   signal rotate_data2        : t_data;

   signal split_chdec1        : std_logic_vector(CHANNELS-1 DOWNTO 0);
   signal split_chdec2        : std_logic_vector(CHANNELS-1 DOWNTO 0);
   signal rotate_chdec1       : std_logic_vector(CHANNELS-1 DOWNTO 0);
   signal rotate_chdec2       : std_logic_vector(CHANNELS-1 DOWNTO 0);

   -- Buffer write stage
   signal buffer_packet_end   : std_logic;
   signal buffer_write        : t_buffer;
   signal buffer_write_data   : t_datach;

   -- Buffer usage
   signal buffer_usage        : std_logic_Vector(log2(BUFFER_SIZE / DATA_ALIGN)-1 downto 0);
   signal written_wr          : std_logic_Vector(log2(BUFFER_SIZE / DATA_ALIGN)-1 downto 0);
   signal written_rd          : std_logic_Vector(log2(BUFFER_SIZE / DATA_ALIGN)-1 downto 0);
   signal written             : std_logic_Vector(log2(BUFFER_SIZE / DATA_ALIGN)-1 downto 0);
   signal written_we          : std_logic;

   signal readen_wr           : std_logic_Vector(log2(BUFFER_SIZE / DATA_ALIGN)-1 downto 0);
   signal readen_rd           : std_logic_Vector(log2(BUFFER_SIZE / DATA_ALIGN)-1 downto 0);
   signal readen              : std_logic_Vector(log2(BUFFER_SIZE / DATA_ALIGN)-1 downto 0);
   signal readen_we           : std_logic;

   -- Specifies buffer, which receive first word of next packet
   signal last_write          : t_bufferptr;
   -- Specifies buffer, which hold first word of packet to be read
   signal last_read           : t_bufferptr;

   -- Signals for READ stage
   signal reg_last_read       : integer range 0 to BUFFERS-1;
   signal reg_reg_last_read   : integer range 0 to BUFFERS-1;
   signal reg_reg_reg_last_read: integer range 0 to BUFFERS-1;

   -- Buffer read
   signal read                : std_logic_vector(BUFFERS-1 downto 0);
   signal read_rotated        : std_logic_vector(BUFFERS-1 downto 0);
   signal buffer_read         : t_buffer;
   signal buffer_read_data    : t_datach;
   signal buffer_data         : t_data;

   signal buffer_dv           : std_logic;
   signal buffer_dv2          : std_logic;

   signal iDMA_CHANNEL        : integer range 0 to CHANNELS-1;
   signal reg_dma_channel     : std_logic_vector(log2(CHANNELS)-1 downto 0);
   signal reg_reg_dma_channel : std_logic_vector(log2(CHANNELS)-1 downto 0);

   signal reg_dma_read        : std_logic_vector(log2(FLU_WIDTH/(DATA_ALIGN*8)) downto 0);

   -- Signals for Packet interface
   signal fifo_packet_in      : std_logic_vector(FIFO_PACKET_WIDTH-1 downto 0);
   signal fifo_packet_out     : std_logic_vector(FIFO_PACKET_WIDTH-1 downto 0);

   signal init                : std_logic_vector(CHANNELS-1 downto 0);

   constant zeros             : std_logic_vector(511 downto 0) := (others => '0');

   function count_ones(slv : std_logic_vector) return natural is
   variable n_ones : natural := 0;
   begin
     for i in slv'range loop
       if slv(i) = '1' then
         n_ones := n_ones + 1;
       end if;
     end loop;
     return n_ones;
   end function count_ones;

   function decode( ARG : std_logic_vector ) return std_logic_vector is
   variable result : std_logic_vector(2**(ARG'length)-1 downto 0);
   begin
      for i in 0 to result'length-1 loop
         if conv_integer(ARG) =  i then
            result(i) := '1';
         else
            result(i) := '0';
         end if;
      end loop;
      return result;
   end;

   function decodefill( ARG : std_logic_vector; reverse : boolean ) return std_logic_vector is
   variable result : std_logic_vector(2**(ARG'length)-1 downto 0);
   begin
      for i in 0 to result'length-1 loop
         if(conv_integer(ARG) >= i and reverse = false) or (conv_integer (ARG) <= i and reverse = true) then
            result(i) := '1';
         else
            result(i) := '0';
         end if;
      end loop;
      return result;
   end;

   function fill( ARG:natural; count : natural ) return std_logic_vector is
   variable result : std_logic_vector(count-1 downto 0);
   begin
      for i in 0 to count-1 loop
         if(conv_integer (ARG) > i) then
            result(i) := '1';
         else
            result(i) := '0';
         end if;
      end loop;
      return result;
   end;

begin

   init <= not ENABLE;

   -- TODO: Distribute RESET, add write / read channel chain for timing improve

   assert (SOP_WIDTH <= log2((FLU_WIDTH/8) / DATA_ALIGN))
      report "RX Buffer cannot handle this SOP_WIDTH"
      severity error;

   -- ----------------------------------------------
   -- Buffer is splitted to 5 write stages.
   -- 1) Get length of packet
   -- 2) Check, wheter packet fits to buffer, otherwise discard it (or HOLD it, but first word of packet is written immediately)
   -- 3) Parse packet - Split / merge
   -- 4) Rotate words to correct buffer in dependency on last_write
   -- 5) Distribute signals to corresponding channel

   -- ----------------------------------------------
   -- In process - only get length from packet

   RX_DST_RDY              <= rx_dst_rdy_o;
   rx_sop_pos_a            <= RX_SOP_POS & zeros(log2(FLU_WIDTH/64)-1 downto SOP_WIDTH);
   rx_length_na            <= RX_DATA(conv_integer(rx_sop_pos_a)*BUFFER_WIDTH+15 downto conv_integer(rx_sop_pos_a)*BUFFER_WIDTH);
   rx_length               <= rx_length_na(15 downto log2(DATA_ALIGN)) when conv_integer(rx_length_na(log2(DATA_ALIGN)-1 downto 0)) = 0 else
                              rx_length_na(15 downto log2(DATA_ALIGN)) + 1;

   iin_channel             <= conv_integer(in_channel);

   reg_inp : process(CLK, rx_dst_rdy_o)
   begin
      if(CLK'event and CLK = '1' and rx_dst_rdy_o = '1') then
         if(rx_sop = '1') then
            in_channel        <= RX_CHANNEL;
            -- Temporary for verification only:
--          in_channel        <= RX_DATA(conv_integer(rx_sop_pos_a)*BUFFER_WIDTH+16+log2(CHANNELS)-1 downto conv_integer(rx_sop_pos_a)*BUFFER_WIDTH+16);
         end if;

         -- Align RX_SOP_POS and RX_EOP_POS to DATA_ALIGN
         in_length         <= rx_length;
         in_eop_pos        <= RX_EOP_POS(log2(FLU_WIDTH/8)-1 downto 3);
         in_sop_pos        <= RX_SOP_POS & zeros(log2(FLU_WIDTH/64)-1 downto SOP_WIDTH);
         in_sop            <= RX_SRC_RDY and RX_SOP;
         in_eop            <= RX_SRC_RDY and RX_EOP;
         in_data_valid     <= RX_SRC_RDY;
         in_src_rdy        <= RX_SRC_RDY;
         in_data           <= RX_DATA;
      end if;
   end process;

   -- ----------------------------------------------
   -- Check processes - check, if packet fits in buffer or not

   -- One-cycle-packet flag. Is set, when packet starts and ends in same clock cycle.
   in_ocp               <= '1' when in_sop = '1' and in_eop = '1' and in_sop_pos <= in_eop_pos else '0';
   icheck_channel       <= conv_integer(check_channel);

   checkp : process (CLK)
   begin
      if(CLK'event and CLK = '1') then    -- TODO: count with READ latency!
         check_data           <= in_data;
         check_data_valid     <= in_data_valid;
         check_length         <= in_length;

         check_eop_pos        <= in_eop_pos;
         check_sop_pos        <= in_sop_pos;

         check_packet_start   <= in_sop;
         check_packet_end     <= in_eop;

         -- Generate write enable for starting packet
         check_write_sop      <= decodefill(in_sop_pos, true);
         -- Generate write enable for ending packet
         check_write_eop      <= decodefill(in_eop_pos, false);
         -- Generate write enable for one cycle packet
         check_write_sopaeop  <= decodefill(in_sop_pos, true) and decodefill(in_eop_pos, false);
         -- Count ones in write enable on sop_pos and eop_pos
         check_write_sop_count<= count_ones(decodefill(in_sop_pos, true));
         check_write_eop_count<= count_ones(decodefill(in_eop_pos, false));

         check_channel        <= in_channel;
         check_discard        <= '0';

         if(check_hold = '1') then
            check_packet_start         <= '0';
            check_packet_end           <= '0';
            check_sop                  <= '0';
            check_eop                  <= '0';
            check_ocp                  <= '0';
            check_data_valid           <= '0';
            check_discard              <= '0';
            check_discarding           <= '0';

            if(not(check_hold_usage >= ((BUFFER_SIZE / DATA_ALIGN) - BUFFERS *8 - check_hold_length))) then
               check_hold              <= '0';
            end if;

            if(reg_dma_channel = check_hold_channel) then
               check_hold_usage        <= (check_hold_usage - conv_integer(reg_dma_read)) mod (BUFFER_SIZE/DATA_ALIGN);
            end if;
         elsif(in_sop = '1' and (ENABLE(iin_channel) = '0' or buffer_usage >= ((BUFFER_SIZE / DATA_ALIGN) - BUFFERS * 8 - in_length))) then
            if(DISCARD(iin_channel) = '1' or ENABLE(iin_channel) = '0') then
               if(in_ocp = '1') then
                  check_ocp            <= '0';
                  check_sop            <= '0';
                  check_eop            <= '0';
                  check_data_valid     <= '0';
                  check_discard        <= '1';
               else
                  check_ocp            <= '0';
                  check_sop            <= '0';
                  check_eop            <= in_eop and not check_discarding;
                  check_data_valid     <= in_eop and not check_discarding;
                  check_discard        <= '1';
                  check_discarding     <= '1';
               end if;
            else
               check_hold              <= '1';
               check_hold_length       <= in_length;
               check_hold_channel      <= in_channel;
               check_hold_usage        <= conv_integer(buffer_usage);

               -- INFO: First word pass thru, even if packet holded.
               if(in_ocp = '1') then
                  check_ocp            <= '1';
                  check_sop            <= '1';
                  check_eop            <= '1';
                  check_data_valid     <= '1';
                  check_discard        <= '0';
               else
                  check_ocp            <= '0';
                  check_sop            <= '1';
                  check_eop            <= in_eop and not check_discarding;
                  check_data_valid     <= in_src_rdy;
                  check_discard        <= '0';
                  check_discarding     <= '0';
               end if;
            end if;
         else
            check_sop         <= in_sop;
            check_eop         <= in_eop and not check_discarding;
            check_ocp         <= in_ocp;
            check_data_valid  <= in_src_rdy and not check_discarding;

            if(in_eop = '1') then
               check_discarding  <= '0';
            end if;
         end if;
      end if;
   end process;

   -- Buffer usage registers
   reg_readen_i: entity work.DP_DISTMEM
   generic map(
      DATA_WIDTH   	=> log2(BUFFER_SIZE/DATA_ALIGN),
      ITEMS        	=> CHANNELS,
      DISTMEM_TYPE 	=> 16
   )
   port map(
      WCLK    			=> CLK,
      RESET   			=> RESET,

      DI     			=> readen_wr,
      WE      			=> readen_we,
      ADDRA   			=> reg_dma_channel,
      DOA     			=> readen_rd,

      ADDRB   			=> in_channel,
      DOB     			=> readen
   );
   reg_written_i: entity work.DP_DISTMEM
   generic map(
      DATA_WIDTH   	=> log2(BUFFER_SIZE/DATA_ALIGN),
      ITEMS        	=> CHANNELS,
      DISTMEM_TYPE 	=> 16
   )
   port map(
      WCLK    			=> CLK,
      RESET   			=> RESET,

      DI     			=> written_wr,
      WE      			=> written_we,
      ADDRA   			=> check_channel,
      DOA     			=> written_rd,

      ADDRB   			=> in_channel,
      DOB     			=> written
   );

   readen_we         <= not RESET;
   readen_wr         <= readen_rd + reg_dma_read;

   written_wr        <= written_rd + check_length(log2(BUFFER_SIZE/DATA_ALIGN)-1 downto 0);
   written_we        <= check_sop and not check_discard;

   buffer_usage      <= written - readen;

   -- If discard not enabled and buffer is full, RX_DST_RDY is set to 0
   rx_dst_rdy_o      <= not check_hold;

   -- ----------------------------------------------
   -- Parse packet - Split / Merge process

   gen_split_writes : for i in 0 to BUFFERS-1 generate
      split_data(i)                 <= out_data(BUFFER_WIDTH*(i+1)-1 downto BUFFER_WIDTH*i);
   end generate;

   splitp : process(CLK)
   begin
      if(CLK = '1' and CLK'event) then
         out_data                   <= check_data;
         split_start1               <= 0;
         split_start2               <= 0;
         split_write1               <= (others => '0');
         split_write2               <= (others => '0');
         split_count_write1         <= 0;
         split_count_write2         <= 0;

         split_merge                <= (others => '0');
         split_chdec1               <= decode(conv_std_logic_vector(split_reg_ichannel, log2(CHANNELS)));
         split_channel1             <= split_reg_ichannel;
         split_packet_end           <= check_packet_end;

         if(check_ocp = '1') then
            split_write1            <= check_write_sopaeop;
            split_count_write1      <= count_ones(check_write_sopaeop);
            split_start1            <= conv_integer(check_sop_pos);
            split_channel1          <= icheck_channel;
            split_chdec1            <= decode(conv_std_logic_vector(icheck_channel, log2(CHANNELS)));
         elsif(check_sop = '1' and check_eop = '1') then
            split_write1            <= check_write_eop;
            split_write2            <= check_write_sop;
            split_count_write1      <= count_ones(check_write_eop);
            split_count_write2      <= count_ones(check_write_sop);
            split_start1            <= 0;
            split_start2            <= conv_integer(check_sop_pos);
            split_chdec1            <= decode(conv_std_logic_vector(split_reg_ichannel, log2(CHANNELS)));
            split_chdec2            <= decode(conv_std_logic_vector(icheck_channel, log2(CHANNELS)));
            split_channel1          <= split_reg_ichannel;
            split_channel2          <= icheck_channel;
            split_reg_ichannel      <= icheck_channel;

            split_index             <= (conv_integer(check_sop_pos) - check_write_eop_count) mod BUFFERS;

            if(split_reg_ichannel = icheck_channel) then
               split_merge          <= not check_write_eop;
               split_write1         <= fill(check_write_sop_count + check_write_eop_count, BUFFERS);
               split_write2         <= (others => '0');
               split_count_write1   <= check_write_sop_count + check_write_eop_count;
               split_count_write2   <= 0;
            end if;
         elsif(check_sop = '1' and check_eop = '0') then
            split_write1            <= check_write_sop;
            split_count_write1      <= count_ones(check_write_sop);
            split_start1            <= conv_integer(check_sop_pos);
            split_channel1          <= icheck_channel;
            split_chdec1            <= decode(conv_std_logic_vector(icheck_channel, log2(CHANNELS)));
            split_reg_ichannel      <= icheck_channel;
         elsif(check_sop = '0' and check_eop = '1') then
            split_write1            <= check_write_eop;
            split_count_write1      <= count_ones(check_write_eop);
         elsif(check_data_valid = '1') then
            split_write1            <= (others => '1');
            split_count_write1      <= 8;
         end if;
      end if;
   end process;

   -- ----------------------------------------------
   -- IN Rotator processes

   next_write1                      <= (last_write(split_channel1) + split_start1) mod BUFFERS;
   next_write2                      <= (last_write(split_channel2) + split_start2) mod BUFFERS;

   rotator_regs : process(CLK, split_chdec1, split_chdec2, split_packet_end)
   begin
      if(CLK'event and CLK = '1') then
         rotate_packet_end          <= split_packet_end;

         rotate_chdec1              <= split_chdec1;
         rotate_chdec2              <= split_chdec2;
      end if;
   end process;

   gen_last_write: for i in 0 to CHANNELS-1 generate
      last_writep : process(CLK, next_write1, next_write2, split_write1, split_write2, split_chdec1, split_chdec2)
      begin
         if(CLK = '1' and CLK'event) then
            if(split_chdec1(i) = '1') then
               last_write(i)        <= (last_write(i) - split_count_write1) mod BUFFERS;
            elsif(split_chdec2(i) = '1') then
               last_write(i)        <= (last_write(i) - split_count_write2) mod BUFFERS;
            end if;
         end if;
      end process;
  end generate;

   gen_rotator_buffers: for i in 0 to BUFFERS-1 generate
      rotator_regs : process(CLK, next_write1, next_write2, split_data, split_write1, split_write2, split_index )
      begin
         if(CLK'event and CLK = '1') then
            rotate_data1(i)         <= split_data((next_write1 + i) mod BUFFERS);
            rotate_data2(i)         <= split_data((next_write2 + i) mod BUFFERS);

            rotate_write1(i)        <= split_write1((next_write1 + i) mod BUFFERS);
            rotate_write2(i)        <= split_write2((next_write2 + i) mod BUFFERS);

            if(split_merge((next_write1 + i)mod BUFFERS) = '1') then
               rotate_data1(i)      <= split_data((next_write1 + i + split_index) mod BUFFERS);
            end if;
         end if;
      end process;
   end generate;

   -- ----------------------------------------------
   -- Write and Read FIFO processes

   iDMA_CHANNEL                  <= conv_integer(DMA_CHANNEL);

   read_signals : process(CLK)
   begin
      if(CLK = '1' and CLK'event) then
         buffer_packet_end       <= rotate_packet_end;

         last_read(iDMA_CHANNEL) <= (last_read(iDMA_CHANNEL) + conv_integer(DMA_READ)) mod BUFFERS;
         reg_last_read           <= last_read(iDMA_CHANNEL);
         reg_reg_last_read       <= reg_last_read;
         reg_reg_reg_last_read   <= reg_reg_last_read;

         reg_dma_channel         <= DMA_CHANNEL;
         reg_reg_dma_channel     <= reg_dma_channel;
         reg_dma_read            <= DMA_READ;
         buffer_dv2              <= buffer_dv;
         DMA_DV                  <= buffer_dv2;

         buffer_data             <= buffer_read_data(conv_integer(reg_reg_dma_channel));

         if(conv_integer(DMA_READ) /= 0) then
            buffer_dv            <= '1';
         else
            buffer_dv            <= '0';
         end if;

      end if;
   end process;

   gen_read_signals : for i in 0 to BUFFERS-1 generate
      -- Read from first n buffers
      read(i)                    <= '1' when DMA_READ > i else '0';
      -- Shift read signals by last_read position
      read_rotated(i)            <= read((i - last_read(iDMA_CHANNEL)) mod BUFFERS);
      -- Output data shifted by last_read position
      DMA_DATA(BUFFER_WIDTH*(i+1)-1 downto BUFFER_WIDTH*i)  <= buffer_data((i + reg_reg_reg_last_read) mod BUFFERS);
   end generate;

   gen_fifo_channels : for i in 0 to CHANNELS-1 generate

      read_signals : process(CLK)
      begin
         if(CLK = '1' and CLK'event) then
            if(iDMA_CHANNEL = i) then
               buffer_read(i) <= read_rotated;
            else
               buffer_read(i) <= (others => '0');
            end if;
         end if;
      end process;

      -- Generate write signals
      gen_write_signals : process(CLK, rotate_data1, rotate_write1, rotate_data2, rotate_write2, rotate_chdec2, rotate_chdec1)
      begin
--         if(CLK = '1' and CLK'event) then
            buffer_write_data(i)    <= rotate_data1;
            buffer_write(i)         <= (others => '0');

            if(rotate_chdec1(i) = '1') then
               buffer_write(i)      <= rotate_write1;
               buffer_write_data(i) <= rotate_data1;
            elsif(rotate_chdec2(i) = '1') then
               buffer_write(i)      <= rotate_write2;
               buffer_write_data(i) <= rotate_data2;
            end if;
--         end if;
      end process;

      -- Generate 2D array of buffers
      gen_fifo_buffers: for j in 0 to BUFFERS-1 generate
         fifo: entity work.DMA_BUFFER_RX_FIFO
         generic map(
            DATA_WIDTH           => BUFFER_WIDTH,
            ITEMS                => BLOCK_COUNT
         )
         port map(
            CLK                  => CLK,
            RESET                => RESET,

            IN_DATA              => buffer_write_data(i)(j),
            IN_WRITE             => buffer_write(i)(j),

            OUT_READ             => buffer_read(i)(j),
            OUT_DATA             => buffer_read_data(i)(j)
         );
      end generate;
   end generate;

   -- ------------------------------------
   -- FIFO for PACKET interface

   fifo_packet_in(0)                         <= check_discard;
   fifo_packet_in(log2(CHANNELS)+0 downto 1) <= check_channel;
   fifo_packet_in(log2(CHANNELS)+1+log2(BUFFER_SIZE)-log2(DATA_ALIGN) downto log2(CHANNELS)+1) <= check_length(log2(BUFFER_SIZE)-log2(DATA_ALIGN) downto 0);

   packet_fifo: entity work.FIFO
   generic map(
      DATA_WIDTH        => FIFO_PACKET_WIDTH,
      ITEMS             => 8
   )
   port map(
      RESET             => RESET,
      CLK               => CLK,
      DATA_IN           => fifo_packet_in,
      WRITE_REQ         => check_packet_start,
      FULL              => open,
      LSTBLK            => open,

      DATA_OUT          => fifo_packet_out,
      READ_REQ          => buffer_packet_end,
      EMPTY             => open
   );

   PACKET_NEW        <= not fifo_packet_out(0) and buffer_packet_end;
   PACKET_DISC       <=     fifo_packet_out(0) and buffer_packet_end;
   PACKET_CHANNEL    <= fifo_packet_out(log2(CHANNELS)+0 downto 1);
   PACKET_LENGTH     <= fifo_packet_out(log2(CHANNELS)+1+log2(BUFFER_SIZE)-log2(DATA_ALIGN) downto log2(CHANNELS)+1) & conv_std_logic_vector(0, log2(DATA_ALIGN));

end architecture;
-- ----------------------------------------------------------------------------
