-- tx_path_100G.vhd : 100G PCS for Virtex7 with GTZ transceivers - TX module 
--                    top level
--
-- Copyright (C) 2012 CESNET
-- Author(s): Stepan Friedl <friedl@cesnet.cz>
--
-- Redistribution and use in source and binary forms, with or without
-- modification, are permitted provided that the following conditions
-- are met:
-- 1. Redistributions of source code must retain the above copyright
--    notice, this list of conditions and the following disclaimer.
-- 2. Redistributions in binary form must reproduce the above copyright
--    notice, this list of conditions and the following disclaimer in
--    the documentation and/or other materials provided with the
--    distribution.
-- 3. Neither the name of the Company nor the names of its contributors
--    may be used to endorse or promote products derived from this
--    software without specific prior written permission.
--
-- This software is provided ``as is'', and any express or implied
-- warranties, including, but not limited to, the implied warranties of
-- merchantability and fitness for a particular purpose are disclaimed.
-- In no event shall the company or contributors be liable for any
-- direct, indirect, incidental, special, exemplary, or consequential
-- damages (including, but not limited to, procurement of substitute
-- goods or services; loss of use, data, or profits; or business
-- interruption) however caused and on any theory of liability, whether
-- in contract, strict liability, or tort (including negligence or
-- otherwise) arising in any way out of the use of this software, even
-- if advised of the possibility of such damage.
--
-- $Id: comboi10g4_arch.vhd 13948 2010-06-04 15:49:43Z xfried00 $

library ieee;
use ieee.std_logic_1164.all;
use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;

entity tx_path_100g is
   generic ( 
      MAC_LANES : natural := 8 -- Determines width of the CGMII 
   );
   port (
      RESET_MII  : in std_logic;
      RESET_INT  : in std_logic;
      RESET_PCS  : in std_logic;
      -- 
      CGMII_CLK  : in std_logic; -- CGMII (MAC) clock, 195.3125MHz (5.12 ns)
      CGMII_TXD  : in std_logic_vector(64*MAC_LANES-1 downto 0); -- TX data
      CGMII_TXC  : in std_logic_vector( 8*MAC_LANES-1 downto 0); -- TX command
      -- Control
      CLKINT     : in std_logic; -- internal clock 2.5 * PCS_CLK = 201,416015625 MHz (4.96485 ns)      
      SCR_BYPASS : in std_logic; -- scrambler bypass
      ENC_BYPASS : in std_logic; -- 64/66 encoder bypass
      -- PMA interface
      PCS_CLK : in std_logic; -- PCS data Clock - ((CGMII_CLK*8/20)*(66/64)) = 80,56640625 MHz (12.412 ns)
      PCS_TXD : out std_logic_vector(20*66-1 downto 0); -- TX data - PCS lane 0..19
      PCS_TXD_EN : in std_logic
   );
end tx_path_100g;

-- ----------------------------------------------------------------------------
--       Architecture declaration  --  Transmitt path of the 100GBASE-R PCS  --
-- ----------------------------------------------------------------------------
architecture structural of tx_path_100g is

-- COMPONENT cgmii_ila
--  PORT (
--    clk : IN STD_LOGIC;
--    probe0 : IN STD_LOGIC_VECTOR(511 DOWNTO 0);
--    probe1 : IN STD_LOGIC_VECTOR(63 DOWNTO 0);
--    probe2 : IN STD_LOGIC_VECTOR(31 DOWNTO 0)
--  );
--END COMPONENT;

signal probe0 : std_logic_vector(511 downto 0);
signal probe1 : std_logic_vector(63 downto 0);
signal probe2 : std_logic_vector(31 downto 0);

signal ila_clk  : std_logic;
signal data_from_scr_dbg  : std_logic_vector(64*20-1 downto 0);
signal data_to_scr_dbg  : std_logic_vector(64*20-1 downto 0);

constant PCS_LANES : natural := 20;

  signal encoded_data  : std_logic_vector(66*MAC_LANES-1 downto 0);
  signal zeros         : std_logic_vector(127 downto 0);
  signal data_to_scr   : std_logic_vector(64*MAC_LANES-1 downto 0);
  signal data_from_scr : std_logic_vector(64*MAC_LANES-1 downto 0);
  signal txdata_scr    : std_logic_vector(66*MAC_LANES-1 downto 0);
  signal txdata_scr_bd : std_logic_vector(66*PCS_LANES-1 downto 0);
  signal txdata_scr_bd_am : std_logic_vector(66*PCS_LANES-1 downto 0);
  --signal gb_data_out   : std_logic_vector(64*PCS_LANES-1 downto 0);
  signal fifo_rd_en    : std_logic;
  signal data_from_fifo: std_logic_vector(66*MAC_LANES-1 downto 0);
  signal sync_hdr_bypass : std_logic_vector(2*MAC_LANES-1 downto 0) := (others => '0');
  signal am_rd_en      : std_logic;
  signal rd_en         : std_logic;
  --
  signal bd_dbg       : std_logic_vector(31 downto 0);     
  signal bd_dbg_sync  : std_logic_vector(31 downto 0);
  signal txfifo_empty : std_logic;
  signal txfifo_full  : std_logic;

begin

zeros <= (others => '0');

-- 64/66 encoder
ENCODE: entity work.block_encode
generic map (
   NUM_LANES => MAC_LANES
)
port map (
   RESET  => RESET_MII,
   BYPASS => ENC_BYPASS,
   CLK    => CGMII_CLK,
   TXD    => CGMII_TXD,
   TXC    => CGMII_TXC,
   Q      => encoded_data
);

-- FIFO for removing IPG for aligment marker insertion
TXFIFO: entity work.pcs_tx_fifo
generic map (
   NUM_LANES => MAC_LANES)
port map (
   RESET_D => RESET_MII,
   CLK     => CGMII_CLK,
   D       => encoded_data,
   --
   RESET_Q => RESET_INT,
   TXCLK   => CLKINT,
   RE      => fifo_rd_en,
   Q       => data_from_fifo,
   --
   FIFO_EMPTY_O => txfifo_empty, 
   FIFO_FULL_O  => txfifo_full    
   
);

GEN_SCR_DATA: for i in 0 to MAC_LANES-1 generate
-- TODO: improve timing
--    TXFIFOSCR_PIPE: process(CLKINT)
--    begin 
--       if CLKINT'event and CLKINT = '1' then
--          if fifo_rd_en = '1' then
            data_to_scr((i+1)*64-1 downto i*64) <= data_from_fifo(66*(i+1)-1 downto i*66+2);
--          end if;
--       end if;
--    end process;
   
end generate;

-- Store sync headers to bypass the scrambler
SYNC_HDR_STORE: process(CLKINT)
begin
   if CLKINT'event and CLKINT = '1' then
      for i in 0 to MAC_LANES-1 loop
         if fifo_rd_en = '1' then
            sync_hdr_bypass((i+1)*2-1 downto i*2) <= data_from_fifo(66*i+1 downto 66*i);
         end if;
      end loop;
   end if;
end process;

SCRAMBLE: entity work.scrambler_gen
generic map (
   WIDTH => MAC_LANES*64,
   OREG  => true -- Q has one clock cycle delay when TRUE
)
port map (
   RESET  => RESET_INT,
   CLK    => CLKINT,
   EN     => fifo_rd_en,
   BYPASS => SCR_BYPASS,
   SEED   => zeros(57 downto 0),
   D      => data_to_scr,
   Q      => data_from_scr
);

GEN_TX_DATA: for i in 0 to MAC_LANES-1 generate
   txdata_scr(66*(i+1)-1 downto 66*i+2) <= data_from_scr(64*(i+1)-1 downto 64*i); -- 64-bit block scrambled payload
   txdata_scr(66*i+1 downto 66*i)       <= sync_hdr_bypass((i+1)*2-1 downto i*2); -- Sync bits
end generate;

-- Distribute the data among the 20 PCS lanes - use a width conversion FIFO 
BLOCK_DISTR: entity work.block_dist_8x20
port map (
   -- Input port D - 8 word data, high freq
   RESET_D => RESET_INT,
   CLK_D   => CLKINT,
   RE_D    => fifo_rd_en,
   D       => txdata_scr,
   -- Output port Q - 20 word data, low freq
   RESET_Q => RESET_PCS,   
   CLK_Q   => PCS_CLK,
   RE_Q    => rd_en,
   Q       => txdata_scr_bd
   -- Debug
   -- DBG     => bd_dbg  
);

rd_en <= PCS_TXD_EN and am_rd_en;
 
-- Insert al. marker
AM_INS: entity work.am_ins
generic map (
   NUM_LANES => PCS_LANES
)
port map (
   RESET => RESET_PCS,
   CLK   => PCS_CLK,
   EN    => PCS_TXD_EN,
   RD    => am_rd_en,
   D     => txdata_scr_bd,
   Q     => txdata_scr_bd_am
);

-- Output data register
TXD_OUT_REGS: process(PCS_CLK)
begin
   if PCS_CLK'event and PCS_CLK = '1' then
      if PCS_TXD_EN = '1' then
         PCS_TXD <= txdata_scr_bd_am;
      end if;
   end if;
end process;


-- ------------------------------------------------------------------------------------------

-- GEN_SCR_DATA_DBG: for i in 0 to 19 generate
--   data_to_scr_dbg((i+1)*64-1 downto i*64) <= txdata_scr_bd(66*(i+1)-1 downto i*66+2); -- Exclude sync bits
-- end generate;
-- 

-- DESCRAMBLE_DBG: entity work.descrambler_gen
-- generic map (
--    WIDTH => 20*64
-- )
-- port map (
--    RESET => RESET_PCS,
--    CLK   => PCS_CLK,
--    EN    => rd_en,
--    BYPASS => SCR_BYPASS,
--    SEED  => zeros(57 downto 0),
--    D     => data_to_scr_dbg,
--    Q     => data_from_scr_dbg
-- );

--  ILA : cgmii_ila
--  PORT MAP (
--    clk    => ila_clk, 
--    probe0 => probe0,
--    probe1 => probe1,
--    probe2 => probe2
--  );
  
--    probe0 <= data_from_fifo(527 downto 464) & data_from_fifo(461 downto 398) & data_from_fifo(395 downto 332) & data_from_fifo(329 downto 266) & -- 
--              data_from_fifo(263 downto 200) & data_from_fifo(197 downto 134) & data_from_fifo(131 downto  68) & data_from_fifo( 65 downto   2);  -- Data blocks
              
--    probe1<= fifo_rd_en & "000" & -- 63:60
--             X"0000" &               -- 59:44
--             X"00000" & -- 43:24
--             X"00" &        -- 23:16
--             data_from_fifo(463 downto 462) & data_from_fifo(397 downto 396) & data_from_fifo(331 downto 330) & data_from_fifo(265 downto 264) & -- Sync headers 15:7
--             data_from_fifo(199 downto 198) & data_from_fifo(133 downto 132) & data_from_fifo( 67 downto  66) & data_from_fifo(  1 downto   0);  -- 7:0
--    ila_clk <= CLKINT;
--    probe2(15 downto 4) <= bd_dbg(15 downto 4);
--GEN_ILA_SYNC: for i in 0 to 3 generate

--   ASYNC: entity work.ASYNC_OPEN_LOOP 
--   port map (
--      ACLK     => PCS_CLK, 
--      ARST     => '0',      
--      ADATAIN  => bd_dbg(i), 
--      --! B clock domain   
--      BCLK     => ila_clk,
--      BRST     => '0',  
--      BDATAOUT => probe2(i)
--   );
   
--end generate;

    probe0 <= data_from_scr_dbg(64*12-1 downto 64*11) & data_from_scr_dbg(64*11-1 downto 64*10) & data_from_scr_dbg(64*10-1 downto 64*9) & data_from_scr_dbg(64*9-1 downto 64*8) & -- 
              data_from_scr_dbg(64*8-1 downto   64*7) & data_from_scr_dbg(64*7-1 downto   64*6) & data_from_scr_dbg(64*6-1 downto  64*5) & data_from_scr_dbg(64*5-1 downto 64*4);  -- Data blocks lanes 11..4
    probe1<= rd_en & "000" & -- 63:60
                                X"0000" &                -- 59:44
                                X"00000" &               -- 43:24
                                X"00" &                  -- 23:16
                                txdata_scr_bd( 66*11+1 downto 66*11) & txdata_scr_bd( 66*10+1 downto 66*10) & txdata_scr_bd( 66*9+1 downto 66*9) & txdata_scr_bd( 66*8+1 downto 66*8) & -- Sync headers 15:7
                                txdata_scr_bd( 66*7+1 downto 66*7) & txdata_scr_bd( 66*6+1 downto 66*6) & txdata_scr_bd( 66*5+1 downto 66*5) & txdata_scr_bd(66*4+1 downto 66*4);  -- 7:0
              
--    probe0 <= data_from_scr_dbg(64*8-1 downto 64*7) & data_from_scr_dbg(64*7-1 downto 64*6) & data_from_scr_dbg(64*6-1 downto 64*5) & data_from_scr_dbg(64*5-1 downto 64*4) & -- 
                                --              data_from_scr_dbg(64*4-1 downto 64*3) & data_from_scr_dbg(64*3-1 downto 64*2) & data_from_scr_dbg(64*2-1 downto 64*1) & data_from_scr_dbg(64*1-1 downto 64*0);  -- Data blocks lanes 7..8             
--    probe1<= rd_en & "000" & -- 63:60
--             X"0000" &                -- 59:44
--             X"00000" &               -- 43:24
--             X"00" &                  -- 23:16
--             txdata_scr_bd(463 downto 462) & txdata_scr_bd(397 downto 396) & txdata_scr_bd(331 downto 330) & txdata_scr_bd(265 downto 264) & -- Sync headers 15:7
--             txdata_scr_bd(199 downto 198) & txdata_scr_bd(133 downto 132) & txdata_scr_bd( 67 downto  66) & txdata_scr_bd(  1 downto   0);  -- 7:0
                                  
    probe2(3 downto 0) <= bd_dbg(3 downto 0);
    ila_clk <= PCS_CLK;
    
-- GEN_ILA_SYNC2: for i in 4 to 15 generate
--
--   ASYNC: entity work.ASYNC_OPEN_LOOP 
--   port map (
--      ACLK     => CLKINT, 
--      ARST     => '0',      
--      ADATAIN  => bd_dbg(i), 
--      --! B clock domain   
--      BCLK     => ila_clk,
--      BRST     => '0',  
--      BDATAOUT => probe2(i)
--   );
   
--end generate;

--ASYNC1: entity work.ASYNC_OPEN_LOOP 
--port map (
--   ACLK     => CLKINT, 
--   ARST     => '0',      
--   ADATAIN  => txfifo_empty, 
--   --! B clock domain   
--   BCLK     => ila_clk,
--   BRST     => '0',  
--   BDATAOUT => probe2(16)
--);
   
--   ASYNC2: entity work.ASYNC_OPEN_LOOP 
--   port map (
--      ACLK     => CGMII_CLK, 
--      ARST     => '0',      
--      ADATAIN  => txfifo_full, 
--      --! B clock domain   
--      BCLK     => ila_clk,
--      BRST     => '0',  
--      BDATAOUT => probe2(17)
--   );
   
end structural;
