#ifndef PBBCELIB_H
#define PBBCELIB_H

/**
 * enum dfu_fp01_operations - codes of operations supported by accelerator DFU_FP01
 *
 * These codes are used with functions %pb2dfu_start_op and %pd2dfu_restart_op
 * to select operation performed in the accelerator.
 */
#define DFU_VCOPY  1    /* copy vector          mi(0)[i] <= mi(1)[j] */
#define DFU_VADD   2    /* add vectors          mi(0)[i] <= mi(1)[j] + mi(2)[k] */
#define DFU_VMUL   3    /* mul vectors          mi(0)[i] <= mi(1)[j] * mi(2)[k] */
#define DFU_VMAC   4    /* multiple-accumulate   mi(0)[i] <= (mi(1)[j] * mi(2)[k]) + mi(3)[l]  */
#define DFU_DPROD  5    /* Dot Product          mi(0)[0] <= Sum(mi(1)[j] * mi(2)[k]) */
#define DFU_VSUB     6  /* Vector subtraction                     mi(0)[i] <= mi(1)[j] - mi(2)[k] */
#define DFU_VSUM     7  /* Summation                              mi(0)[0] <= Sum(mi(1)[i] - mi(2)[i]) */
#define DFU_VCMPLT   8  /* Vector less-than comparison            mi(0)[i] <= (mi(1)[i] < mi(2)[i]) ? 0xffff_ffff : 0x0000_0000 */
#define DFU_VSAD     9  /* Vector square-add                      mi(0)[i] <= mi(2)[j] + ( mi(1)[k] * mi(1)[k] ) */
#define DFU_VMAX     10 /* Maximum value in a vector              mi(0)[0] <= MAX(mi(1)[i]) */
#define DFU_VMIN     11 /* Minimum value in a vector              mi(0)[0] <= MIN(mi(1)[i]) */
#define DFU_INDEXMAX 12 /* Index of the maximal value in a vector mi(0)[0] <= INDEXMAX(mi(1)[i]) */
#define DFU_INDEXMIN 13 /* Index of the minimal value in a vector mi(0)[0] <= INDEXMIN(mi(1)[i]) */
#define DFU_VSQR     14 /* Vector square                          mi(0)[i] <= mi(1)[j] * mi(1)[j] */
#define DFU_VSELECT  15 /* Selection                              mi(0)[i] <= (mi(1)[j] != 0x0000_0000) ? mi(2)[k] : mi(3)[l] */
#define DFU_VGTE     16 /* Vector element-wise greater or equal   mi(0)[i] <= (mi(1)[j] < mi(2)[k]) ? mi(2)[k] : mi(1)[j] */
#define DFU_VLTE     17 /* Vector element-wise lesser or equal    mi(0)[i] <= (mi(1)[j] < mi(2)[k]) ? mi(1)[j] : mi(2)[k] */
#define DFU_VBAND    18 /* Vector element-wise binary AND         mi(0)[i] <= BITWISE_AND(mi(1)[j], mi(2)[k]) */
#define DFU_VBOR     19 /* Vector element-wise binary OR          mi(0)[i] <= BITWISE_OR(mi(1)[j], mi(2)[k]) */
#define DFU_VBNOT    20 /* Vector element-wise binary NOT         mi(0)[i] <= BITWISE_NOT(mi(1)[j]) */
#define DFU_VCONVR   21 /* Convert RED color to float             mi(0)[i] <= color2float(mi(1)[j], 3) */
#define DFU_VCONVG   22 /* Convert GREEN color to float           mi(0)[i] <= color2float(mi(1)[j], 2) */
#define DFU_VCONVB   23 /* Convert BLUE color to float            mi(0)[i] <= color2float(mi(1)[j], 1) */
#define DFU_VMSUBAC  24 /* multiple-subtract    mi(0)[i] <= mi[3][l] - (mi(1)[j] * mi(2)[k]) */
#define DFU_VAND3E   25 /* Logical AND between three consecutive elements */
#define DFU_VAND3V   26 /* Logical AND between three elements with the same index */
#define DFU_VOR3E    27 /* Logical OR between three consecutive elements */
#define DFU_VOR3V    28 /* Logical OR between three elements with the same index */


/**
 * DFU Arguments / Address Generators.
 */
/* NOTE 1: the values of DFUAG_* are in fact base addresses in picoblaze i/o registers
 * of the address generators */
/* NOTE 2: this is for the 4-master-ag setup, with indexing ags following */
#define DFUAG_0         0x30
#define DFUAG_1         0x40
#define DFUAG_2         0x50
#define DFUAG_3         0x60

#define DFUAG_IDX_0     0x70
#define DFUAG_IDX_1     0x80
#define DFUAG_IDX_2     0x90
#define DFUAG_IDX_3     0xA0

/**
 * Physical Local memory banks.
 */
#define MBANK_A         0x00
#define MBANK_B         0x01
#define MBANK_C         0x02
#define MBANK_D         0x03

/**
 * Address generator flags/modes.
 * This is used in pb2dfu_set_agflags().
 * NOTE: The AGFL_USE_IDX and AGFL_STEP_IDXBND flags can be used independently.
 *       That is, it is possible to step the main AG by the boundary condition of the slave AG,
 *       while not using the indices received from it. However, the slave AG will still read data from
 *       the BRAM even if they are not used in the main AG.
 */
#define AGFL_USE_IDX            0x01    ///< Offset each address produced in the main AG by an index delivered from the slave AG.
#define AGFL_STEP_IDXBND        0x02    ///< Increment address of the main AG only when the slave AG is reaching the boundary.
#define AGFL_NUMBGEN            0x04    ///< Number-generator mode (bypass BRAM)


/**
 * External opcodes incomming from MB,
 * as given in WAL
 */
/* General ops. */
#define WAL_OP_GETID    0
#define WAL_OP_GETCAP   1
#define WAL_OP_GETLIC   2
/* JSY ops. */
#define WAL_BCE_JSY_OP_VCPY   0x03       /* copy vector    M0[i] <= M1[j]            */
#define WAL_BCE_JSY_OP_VADD   0x04       /* add vectors    M0[i] <= M1[j] + M2[k]    */
#define WAL_BCE_JSY_OP_RUNX   0x05
/* special */
#define WAL_OP_NONE     0xFF


/* BCE IDs for the WAL_OP_GETID operation */
#define WAL_BCE_JSY_ID_GENERIC_3D  1
#define WAL_BCE_JSY_ID_GENERIC_4D  2

/* capabilities constants */
#define WAL_BCE_JSY_CAP_NONE    0x00000000
#define WAL_BCE_JSY_CAP_VCPY    0x00000001
#define WAL_BCE_JSY_CAP_VADD    0x00000002
#define WAL_BCE_JSY_CAP_RUNX    0x00000004


/***********************************/
/* functions for interfacing MB-PB */

/**
 * Exchange byte with microblaze using the barrier synchronization.
 * This function blocks.
 */
unsigned char mbpb_exchange_data(unsigned char data);

/**
 * mb2pb_read_data - read (blocking) data from a host CPU (MB)
 *
 * The function communicates with a host CPU (MB) and waits for
 * one data byte which is acknowledged.
 * Return Value: The function returns value of the received data byte.
 */
// unsigned char mb2pb_read_data();
// #define mb2pb_read_data()               mbpb_exchange_data(0)

/**
 * pb2mb_write_data - send (blocking) data to a host CPU (MB)
 * @data: one byte which will be send to MB
 *
 * The function sends one data byte and wait for acknowledgement.
 * Return Value: The function doesn't return any value.
 */
// void pb2mb_write_data(unsigned char data);
// #define pb2mb_write_data(data)          mbpb_exchange_data(data)


/**
 * Sets the R and B bits in the CFG Status register
 * to report that the firmware has sucessfully started and is busy.
 * This function does NOT block.
 * It should be called immediately when picoblaze starts up.
 * NOTE: This could be integrated into picoblaze C library
 * to be called automatically upon startup.
 */
static inline void pb2mb_report_running();



/*
 * pb2mb_eoc - send (blocking) a message 'End Of Computation' to a host CPU (MB)
 * @data: one data byte which will be send to MB with the EOC message
 *
 * The function sends message End of Computation (EOC) with an optional
 * data byte to a host CPU (MB). Then it waits for acknowlegement from
 * the host CPU. The message should be send anytime the accelerator wants
 * to synchronize computation with a host CPU. Usually, the message is
 * sent after the entire computation.
 * Return Value: The function doesn't return any value.
 */
// JSY: useless things not supported
// void pb2mb_eoc(unsigned char data);

/*
 * pb2mb_req_reset - send (blocking) a message 'Request for Reset' to a host CPU (MB)
 * @data: one data byte which will be send to MB with the RR message
 *
 * The function sends message 'Request for Reset' (RR) with an optional
 * data byte to a host CPU (MB). Then it waits for acknowlegement from
 * the host CPU. The message should be send before reseting controller
 * to inform the host CPU that the accelerator's controller will be
 * reset to the initial state.
 * Return Value: The function doesn't return any value.
 */
// JSY: silly things not supported
// void pb2mb_req_reset(unsigned char data);

/*
 * pb2mb_reset - reset accelerator itself
 *
 * The function sets accelerator's controller to the initial state and
 * set the accelerator's status bit.
 * Return Value: The function doesn't return any value.
 */
// JSY: silly things not supported
// void pb2mb_reset();


/**
 * Write the BCE ID into the cfg output memory.
 */
void write_bce_id_to_cmem(unsigned char fam1);

/**
 * Write the BCE CAP word (32b) into the cfg output memory.
 * DEPRACATED, WILL BE REMOVED. USE write_dfu_caps_to_cmem();
 */
#if 1   //TODO: some olde b* tests still use it
void write_bce_cap_to_cmem(unsigned char cap3, unsigned char cap2, unsigned char cap1, unsigned char cap0);
#endif

/**
 * Write the whole DFU capabilities bitmap (256bits = 32 B = 8 words) into the cfg output memory,
 * starting at address 0x81 of the ctrl/status memory.
 */
void write_dfu_caps_to_cmem();


/**
 * Read u8 value from the BCE input cfg region.
 * @cfgaddr = word address in the config mem.
 * @byteidx = [0; 3] Byte index, 0 = LL, 3 = HH
 */
unsigned char read_bce_cmem_u8(unsigned char cfgaddr, unsigned char byteidx);

/**
 * Read u16 value from the BCE input cfg region.
 * @cfgaddr = word address in the config mem.
 * @widx = {0; 2} Word index, 0 = Lo, 2 = Hi
 */
unsigned int read_bce_cmem_u16(unsigned char cfgaddr, unsigned char widx);

/**
 * Write u16 value into BCE cfg memory.
 * The @cfgaddr shall be in the output status region, ie. 0x80 - 0xff.
 * @widx = {0; 2} Word index, 0 = Lo, 2 = Hi
 */
void write_bce_cmem_u16(unsigned char cfgaddr, unsigned char widx, unsigned int dt);

/**
 * Get the running time in clock-cycles of the last DFU operation executed.
 */
unsigned int pcnt_get_dfutime();

/**
 * Get the program running time in clock-cycles.
 * The counter is 32b in total, thus we provide two functions to access its lo/hi part.
 */
unsigned int pcnt_get_prgtime_lo();
unsigned int pcnt_get_prgtime_hi();

/**
 * Reset the program running-time counter.
 */
void pcnt_reset_prgtime();

/**
 * Read licence counter (or only license flag which indicates when licence is run out) from DFU.
 */
unsigned char get_dfulic();

/************************************/
/* functions for interfacing PB-DFU */

/**
 * pb2dfu_wait4hw - PB will wait for end of computation
 *
 * The function waits for finishing computation in the accelerator.
 * The function should be called before subsequent run of the next
 * operation. The next operation can be prepared before the waiting
 * to speed up the entire computation.
 * Return Value: Zero if ok, Non-zero on DFU error
 */
unsigned char pb2dfu_wait4hw();

/**
 * pb2dfu_start_op - start operation in DFU with specified length of data vectors
 * @op: DFU operation (constants %DFU_OP_xxx)
 * @cnt: length of input data vectors
 *
 * The function covers two functions (pb2dfu_set_cnt and pb2dfu_restart_op).
 * Return Value: The function doesn't return any value.
 *
 * NOTE: When MCU_KIND=PB3A the function will stall the CPU until DFU is ready
 *       to accept the operation.
 */
//void pb2dfu_start_op(unsigned char op, unsigned int cnt);
static inline void pb2dfu_start_op(unsigned char op, unsigned int cnt);


/**
 * pb2dfu_start_insn - fetch instruction from the VP-Memory,
 * load AGs 0, 1, 2 using vectors in the instruction, and issue DFU operation.
 *
 * @insn: instruction position (index) in the VP-Memory
 *
 * NOTE: When MCU_KIND=PB3A the function will stall the CPU until DFU is ready
 *       to accept the operation.
 */
static inline void pb2dfu_start_insn(unsigned int insn);

/**
 * pb2dfu_restart_op - start operation in DFU
 * @op: DFU operation (constants DFU_OP_xxx)
 *
 * All parameters of the operation must be set before this function.
 * All parameters are registered and so only changed parameters from
 * previous operations must be set. On the other hand, the operation
 * must be always set because the function starts a required operation
 * in the DFU.
 * Return Value: The function doesn't return any value.
 *
 * NOTE: When MCU_KIND=PB3A the function will stall the CPU until DFU is ready
 *       to accept the operation.
 */
static inline void pb2dfu_restart_op(unsigned char op);

/**
 * pb2dfu_set_cnt - set length of input data vectors for the next operation
 * @cnt: length of input data vectors
 *
 * The function sets length of the input data vectors.
 * The simple operations (as VADD, VMULT) will be performed @cnt-times
 * as one pipelined operation.
 * Return Value: The function doesn't return any value.
 */
static inline void pb2dfu_set_cnt(unsigned int cnt);

/**
 * pb2dfu_set_addr - set the base address of vector for the given DFU argument
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @addr: the initial address of the vector
 */
static inline void pb2dfu_set_addr(unsigned char dfuag, unsigned int addr);

/**
 * pb2dfu_set_bank - select bank for specified memory
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @bank: the memory bank which will be used for the next operation (constant %MBANK_x)
 */
static inline void pb2dfu_set_bank(unsigned char dfuag, unsigned char mbank);

/**
 * pb2dfu_set_fulladdr - set full address (bank and offset) of the first element in the vector
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @mbank: the memory bank which will be used for the next operation (constant %MBANK_x)
 * @addr: the initial address of the vector
 */
static inline void pb2dfu_set_fulladdr(unsigned char dfuag, unsigned char mbank, unsigned int addr);

/**
 * pb2dfu_set_vector - load AG parameters from the vector partition
 * @dfuag: select the DFU argument number (constants %DFUAG_x)
 * @mvp: position in the vector partition memory (constants MVP_x)
 */
static inline void pb2dfu_set_vector(unsigned char dfuag, unsigned char mvp);

/**
 * pb2dfu_set_inc - set the stride of the vector for the DFU argument
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @inc: increment between two elements of vector
 */
static inline void pb2dfu_set_inc(unsigned char dfuag, int inc);

/**
 * pb2dfu_set_bound_addr - set boundary addresses for vector accesses
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @lo_bound: lower addres boundary
 * @hi_bound: higher addres boundary
 */
void pb2dfu_set_bound_addr(unsigned char dfuag, unsigned int lo_bound, unsigned int hi_bound);

/**
 * pb2dfu_set_agflags - set operation flags/mode of the specified address generator (DFU argument)
 * @dfuag: select the DFU argument number (constant %DFUAG_x)
 * @agflags: bitmap of flags to set (constants %AGFL_x)
 */
static inline void pb2dfu_set_agflags(unsigned char dfuag, unsigned char agflags);

/**
 * pb2dfu_set_repetitions - set the number of repetitions of a DFU operation.
 * @nrep: the number of times the following DFU operation will be restarted.
 */
static inline void pb2dfu_set_repetitions(unsigned char nrep);


/** ******************************************************************************************** **/
/** functions for scalar network access */

/** get address of the local node */
unsigned char rh_get_node_id(void);

/** set the destination address: target, endpoint */
void rh_set_target(unsigned char target);
void rh_set_endp(unsigned char endp);

/** set the flit kind */
void rh_set_flkind(unsigned char flkind);

/** set all the target, endpoint, and flkind fields */
void rh_set_target_endp_flkind(unsigned char target, unsigned char endp, unsigned char flkind);

/** send data */
void rh_send_data_u8(unsigned char dt);
void rh_send_data_u16(unsigned int dt);
void rh_send_data_u32(unsigned int dth, unsigned int dtl);         /* if 32b payloads are configured in hw! */

/** is message available in the receive buffer? */
unsigned char rh_is_rcv();

/** wait until a message is available in the input buffer;
 * return the u8 data received (ie. the same as rh_get_rcv_data_u8())
 */
unsigned char rh_wait_for_recv();

/** wait until a message is available in the input buffer;
 * return the u16 data received (ie. the same as rh_get_rcv_data_u16())
 */
unsigned int rh_wait_for_recv_u16();

/**
 * Read the flit kind of the message in the receive buffer;
 * Precondition: rh_is_rcv() is true
 */
unsigned char rh_get_rcv_flkind();

/** Read the message data in the receive buffer.
 * Precondition: rh_is_rcv() is true
 */
unsigned char rh_get_rcv_data_u8();
unsigned int rh_get_rcv_data_u16();
unsigned int rh_get_rcv_data_u16hi();

/**
 * Remove (pop) a message from the receive buffer;
 * Precondition: rh_is_rcv() is true
 */
void rh_remove_rcv();

/** ******************************************************************************************** **/
/** Functions for remote control of workers over the network */

/**
 * Reset the remote worker, by writing 0x00 into its Control Word.
 * All the fields (ITAG, PM, G) in the Control Word of the remote BCE are set to zero.
 * The function returns immediately.
 */
void rh_remote_reset(unsigned char target);

/**
 * Start the remote worker, using the program memory 'pm'.
 * In the remote Control Word the fields are set as follows:
 *   ITAG = 0, PM = pm, G = 1.
 * The function returns immediately.
 */
void rh_remote_start(unsigned char target, unsigned char pm);


/** ******************************************************************************************** **/
/** Functions for accessing the DMA configuration registers */

/**
 * DMA Flags
 */
#define DMAFL_RNW       0x01    ///< read not write
#define DMAFL_READMEM   0x01    ///< read
#define DMAFL_WRITEMEM  0x00    ///< write
#define DMAFL_IRQ       0x02    ///< raise interrupt when transfer has finished. Interrupt is a single pulse [NOT IMPLEMENTED!!]
#define DMAFL_SUP       0x04    ///< sram_addr += length when transfer has finished
#define DMAFL_BUP       0x08    ///< bram_addr += length when transfer has finished

/**
 * DMA Channels, for use in the 'dmachan' parameter
 */
#define DMACHAN_0       0x00
#define DMACHAN_1       0x08
#define DMACHAN_2       0x10
#define DMACHAN_3       0x18
#define DMACHAN_4       0x20
#define DMACHAN_5       0x28
#define DMACHAN_6       0x30
#define DMACHAN_7       0x38

/**
 * DMA Channel masks, for use in dma_get_status()
 */
#define DMACHMASK_0     0x01
#define DMACHMASK_1     0x02
#define DMACHMASK_2     0x04
#define DMACHMASK_3     0x08
#define DMACHMASK_4     0x10
#define DMACHMASK_5     0x20
#define DMACHMASK_6     0x40
#define DMACHMASK_7     0x80

/**
 * Write 8b data to the DMA configuration register identified by the address.
 */
void dma_rawcfg_write_u8(unsigned char addr, unsigned char dt);

/**
 * Read 8b data from the DMA configuration register.
 */
unsigned char dma_rawcfg_read_u8(unsigned char addr);

/**
 * Setup the external DDR memory adddress for the given DMA channel.
 * The 32b external address [hi:lo] has to be aligned on an 8-byte boundary.
 */
void dma_set_extaddr(unsigned char dmachan, unsigned int addr_hi, unsigned int addr_lo);

/**
 * Setup the local BRAM address for the given DMA channel.
 * The 16b local address has to be aligned on a 4-byte boundary.
 */
void dma_set_locaddr(unsigned char dmachan, unsigned int addr);

/**
 * Setup the transfer length for the given DMA channel.
 * The length has be aligned on an 8-byte boudary.
 * The minimal transfer length is 8 Bytes (2 words).
 */
void dma_set_length(unsigned char dmachan, unsigned int length);

/**
 * Setup all the parameters of the DMA channel.
 * The 32b external address [hi:lo] has to be aligned on an 8-byte boundary.
 * The 16b local address has to be aligned on a 4-byte boundary.
 * The length has be aligned on an 8-byte boudary.
 */
void dma_set_all(unsigned char dmachan,
                 unsigned int extaddr_hi, unsigned int extaddr_lo,
                 unsigned int locaddr, unsigned int length);

/**
 * Start the DMA transfer on the channel given.
 * The flags specify transfer direction (DMAFL_RNW),
 * interrupt generation (DMAFL_IRQ -- NOT IMPLEMENTED!!),
 * and address incrementation (DMAFL_SUP, DMAFL_BUP).
 */
void dma_start_channel(unsigned char dmachan, unsigned char cmdfl);

/**
 * Read the DMA status register.
 * The status bits correspond to the eight DMA channels.
 * A value of 1 in a bit indicates that the corresponding channel is busy,
 * a value of 0 indicates an operation has completed.
 */
static inline unsigned char dma_get_status();


#include <pbbcelib-priv.h>


#endif /* _DFU_FP01_1X1_HEADER_ */
