root/usr/src/lib/udapl/udapl_tavor/tavor/dapl_tavor_hw.h
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#ifndef _DAPL_TAVOR_HW_H
#define _DAPL_TAVOR_HW_H

/*
 * dapl_tavor_hw.h
 *    Contains all the structure definitions and #defines for all Tavor
 *    hardware resources and registers.
 *    Most of these definitions have been replicated from the tavor_hw.h
 *    header file used by the tavor device driver.
 */

#ifdef __cplusplus
extern "C" {
#endif

#include "dapl.h"
#include "dapl_tavor_ibtf.h"


/*
 * Ownership flags used to define hardware or software ownership for
 * various Tavor resources
 */
#define TAVOR_HW_OWNER                  0x1U
#define TAVOR_SW_OWNER                  0x0

/*
 * Tavor Completion Queue Entries (CQE)
 *    Each CQE contains enough information for the software to associate the
 *    completion with the Work Queue Element (WQE) to which it corresponds.
 *
 *    Note: The following structure is not #define'd with both little-endian
 *    and big-endian definitions.  This is because each CQE's individual
 *    fields are not directly accessed except through the macros defined below.
 */

/*
 * The following defines are used for Tavor CQ error handling.  Note: For
 * CQEs which correspond to error events, the Tavor device requires some
 * special handling by software.  These defines are used to identify and
 * extract the necessary information from each error CQE, including status
 * code (above), doorbell count, and whether a error completion is for a
 * send or receive work request.
 */
#define TAVOR_CQE_ERR_STATUS_SHIFT      24
#define TAVOR_CQE_ERR_STATUS_MASK       0xFF
#define TAVOR_CQE_ERR_DBDCNT_MASK       0xFFFF
#define TAVOR_CQE_SEND_ERR_OPCODE       0xFF
#define TAVOR_CQE_RECV_ERR_OPCODE       0xFE
#define TAVOR_CQ_SYNC_AND_DB            0
#define TAVOR_CQ_RECYCLE_ENTRY          1

/*
 * These are the defines for the Tavor CQ entry types.  They are also
 * specified by the Tavor register specification.  They indicate what type
 * of work request is completing (for successful completions).  Note: The
 * "SND" or "RCV" in each define is used to indicate whether the completion
 * work request was from the Send work queue or the Receive work queue on
 * the associated QP.
 */
#define TAVOR_CQE_SND_RDMAWR            0x8
#define TAVOR_CQE_SND_RDMAWR_IMM        0x9
#define TAVOR_CQE_SND_SEND              0xA
#define TAVOR_CQE_SND_SEND_IMM          0xB
#define TAVOR_CQE_SND_RDMARD            0x10
#define TAVOR_CQE_SND_ATOMIC_CS         0x11
#define TAVOR_CQE_SND_ATOMIC_FA         0x12
#define TAVOR_CQE_SND_BIND_MW           0x18
#define TAVOR_CQE_RCV_RECV_IMM          0x3
#define TAVOR_CQE_RCV_RECV_IMM2         0x5
#define TAVOR_CQE_RCV_RECV              0x2
#define TAVOR_CQE_RCV_RECV2             0x4
#define TAVOR_CQE_RCV_RDMAWR_IMM        0x9
#define TAVOR_CQE_RCV_RDMAWR_IMM2       0xB

/*
 * These are the defines for the Tavor CQ completion statuses.  They are
 * specified by the Tavor register specification.
 */
#define TAVOR_CQE_SUCCESS               0x0
#define TAVOR_CQE_LOC_LEN_ERR           0x1
#define TAVOR_CQE_LOC_OP_ERR            0x2
#define TAVOR_CQE_LOC_EEC_ERR           0x3     /* unsupported: RD */
#define TAVOR_CQE_LOC_PROT_ERR          0x4
#define TAVOR_CQE_WR_FLUSHED_ERR        0x5
#define TAVOR_CQE_MW_BIND_ERR           0x6
#define TAVOR_CQE_BAD_RESPONSE_ERR      0x10
#define TAVOR_CQE_LOCAL_ACCESS_ERR      0x11
#define TAVOR_CQE_REM_INV_REQ_ERR       0x12
#define TAVOR_CQE_REM_ACC_ERR           0x13
#define TAVOR_CQE_REM_OP_ERR            0x14
#define TAVOR_CQE_TRANS_TO_ERR          0x15
#define TAVOR_CQE_RNRNAK_TO_ERR         0x16
#define TAVOR_CQE_LOCAL_RDD_VIO_ERR     0x20    /* unsupported: RD */
#define TAVOR_CQE_REM_INV_RD_REQ_ERR    0x21    /* unsupported: RD */
#define TAVOR_CQE_EEC_REM_ABORTED_ERR   0x22    /* unsupported: RD */
#define TAVOR_CQE_INV_EEC_NUM_ERR       0x23    /* unsupported: RD */
#define TAVOR_CQE_INV_EEC_STATE_ERR     0x24    /* unsupported: RD */

typedef struct tavor_hw_cqe_s {
        uint32_t        ver             :4;
        uint32_t                        :4;
        uint32_t        my_qpn          :24;
        uint32_t                        :8;
        uint32_t        my_ee           :24;
        uint32_t                        :8;
        uint32_t        rqpn            :24;
        uint32_t        sl              :4;
        uint32_t                        :4;
        uint32_t        grh             :1;
        uint32_t        ml_path         :7;
        uint32_t        rlid            :16;
        uint32_t        imm_eth_pkey_cred;
        uint32_t        byte_cnt;
        uint32_t        wqe_addr        :26;
        uint32_t        wqe_sz          :6;
        uint32_t        opcode          :8;
        uint32_t        send_or_recv    :1;
        uint32_t                        :15;
        uint32_t        owner           :1;
        uint32_t        status          :7;
} tavor_hw_cqe_t;
#define TAVOR_COMPLETION_RECV           0x0
#define TAVOR_COMPLETION_SEND           0x1

#define TAVOR_CQE_DEFAULT_VERSION       0x0

/*
 * The following macros are used for extracting (and in some cases filling in)
 * information from CQEs
 */
#define TAVOR_CQE_QPNUM_MASK            0x00FFFFFF
#define TAVOR_CQE_QPNUM_SHIFT           0
#define TAVOR_CQE_DQPN_MASK             0x00FFFFFF
#define TAVOR_CQE_DQPN_SHIFT            0
#define TAVOR_CQE_SL_MASK               0xF0000000
#define TAVOR_CQE_SL_SHIFT              28
#define TAVOR_CQE_GRH_MASK              0x00800000
#define TAVOR_CQE_GRH_SHIFT             23
#define TAVOR_CQE_PATHBITS_MASK         0x007F0000
#define TAVOR_CQE_PATHBITS_SHIFT        16
#define TAVOR_CQE_DLID_MASK             0x0000FFFF
#define TAVOR_CQE_DLID_SHIFT            0
#define TAVOR_CQE_OPCODE_MASK           0xFF000000
#define TAVOR_CQE_OPCODE_SHIFT          24
#define TAVOR_CQE_SENDRECV_MASK         0x00800000
#define TAVOR_CQE_SENDRECV_SHIFT        23
#define TAVOR_CQE_OWNER_MASK            0x00000080
#define TAVOR_CQE_OWNER_SHIFT           7

#define TAVOR_CQE_QPNUM_GET(cqe)                                        \
        ((BETOH_32(((uint32_t *)(cqe))[0]) & TAVOR_CQE_QPNUM_MASK) >>   \
            TAVOR_CQE_QPNUM_SHIFT)
#define TAVOR_CQE_DQPN_GET(cqe)                                         \
        ((BETOH_32(((uint32_t *)(cqe))[2]) & TAVOR_CQE_DQPN_MASK) >>    \
            TAVOR_CQE_DQPN_SHIFT)
#define TAVOR_CQE_SL_GET(cqe)                                           \
        ((BETOH_32(((uint32_t *)(cqe))[3]) & TAVOR_CQE_SL_MASK) >>      \
            TAVOR_CQE_SL_SHIFT)
#define TAVOR_CQE_GRH_GET(cqe)                                          \
        ((BETOH_32(((uint32_t *)(cqe))[3]) & TAVOR_CQE_GRH_MASK) >>     \
            TAVOR_CQE_GRH_SHIFT)
#define TAVOR_CQE_PATHBITS_GET(cqe)                                     \
        ((BETOH_32(((uint32_t *)(cqe))[3]) & TAVOR_CQE_PATHBITS_MASK) >>\
            TAVOR_CQE_PATHBITS_SHIFT)
#define TAVOR_CQE_DLID_GET(cqe)                                         \
        ((BETOH_32(((uint32_t *)(cqe))[3]) & TAVOR_CQE_DLID_MASK) >>    \
            TAVOR_CQE_DLID_SHIFT)
#define TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cqe)                            \
        (BETOH_32(((uint32_t *)(cqe))[4]))
#define TAVOR_CQE_IMM_ETH_PKEY_CRED_SET(cqe, arg)                       \
        (((uint32_t *)(cqe))[4] = HTOBE_32((arg)))
#define TAVOR_CQE_BYTECNT_GET(cqe)                                      \
        (BETOH_32(((uint32_t *)(cqe))[5]))
#define TAVOR_CQE_WQEADDRSZ_GET(cqe)                                    \
        (BETOH_32(((uint32_t *)(cqe))[6]))
#define TAVOR_CQE_WQEADDRSZ_SET(cqe, arg)                               \
        (((uint32_t *)(cqe))[6] = HTOBE_32((arg)))
#define TAVOR_CQE_OPCODE_GET(cqe)                                       \
        ((BETOH_32(((uint32_t *)(cqe))[7]) & TAVOR_CQE_OPCODE_MASK) >>  \
            TAVOR_CQE_OPCODE_SHIFT)
#define TAVOR_CQE_SENDRECV_GET(cqe)                                     \
        ((BETOH_32(((uint32_t *)(cqe))[7]) & TAVOR_CQE_SENDRECV_MASK) >>\
            TAVOR_CQE_SENDRECV_SHIFT)
#define TAVOR_CQE_OWNER_IS_SW(cqe)                                      \
        (((BETOH_32(((uint32_t *)(cqe))[7]) & TAVOR_CQE_OWNER_MASK) >>  \
            TAVOR_CQE_OWNER_SHIFT) == TAVOR_SW_OWNER)
#define TAVOR_CQE_OWNER_SET_HW(cqe)                                     \
        (((uint32_t *)(cqe))[7] =                                       \
            BETOH_32((TAVOR_HW_OWNER << TAVOR_CQE_OWNER_SHIFT) &        \
            TAVOR_CQE_OWNER_MASK))

/*
 * Tavor User Access Region (UAR)
 *    Tavor doorbells are each rung by writing to the doorbell registers that
 *    form a User Access Region (UAR).  A doorbell is a write-only hardware
 *    register which enables passing information from software to hardware
 *    with minimum software latency. A write operation from the host software
 *    to these doorbell registers passes information about the HCA resources
 *    and initiates processing of the doorbell data.  There are 6 types of
 *    doorbells in Tavor.
 *
 *    "Send Doorbell" for synchronizing the attachment of a WQE (or a chain
 *        of WQEs) to the send queue.
 *    "RD Send Doorbell" (Same as above, except for RD QPs) is not supported.
 *    "Receive Doorbell" for synchronizing the attachment of a WQE (or a chain
 *        of WQEs) to the receive queue.
 *    "CQ Doorbell" for updating the CQ consumer index and requesting
 *        completion notifications.
 *    "EQ Doorbell" for updating the EQ consumer index, arming interrupt
 *        triggering, and disarming CQ notification requests.
 *    "InfiniBlast" (which would have enabled access to the "InfiniBlast
 *        buffer") is not supported.
 *
 *    Note: The tavor_hw_uar_t below is the container for all of the various
 *    doorbell types.  Below we first define several structures which make up
 *    the contents of those doorbell types.
 *
 *    Note also: The following structures are not #define'd with both little-
 *    endian and big-endian definitions.  This is because each doorbell type
 *    is not directly accessed except through a single ddi_put64() operation
 *    (see tavor_qp_send_doorbell, tavor_qp_recv_doorbell, tavor_cq_doorbell,
 *    or tavor_eq_doorbell)
 */
typedef struct tavor_hw_uar_send_s {
        uint32_t        nda             :26;
        uint32_t        fence           :1;
        uint32_t        nopcode         :5;
        uint32_t        qpn             :24;
        uint32_t                        :2;
        uint32_t        nds             :6;
} tavor_hw_uar_send_t;
#define TAVOR_QPSNDDB_NDA_MASK          0xFFFFFFC0
#define TAVOR_QPSNDDB_NDA_SHIFT         0x20
#define TAVOR_QPSNDDB_F_SHIFT           0x25
#define TAVOR_QPSNDDB_NOPCODE_SHIFT     0x20
#define TAVOR_QPSNDDB_QPN_SHIFT         0x8

typedef struct tavor_hw_uar_recv_s {
        uint32_t        nda             :26;
        uint32_t        nds             :6;
        uint32_t        qpn             :24;
        uint32_t        credits         :8;
} tavor_hw_uar_recv_t;
#define TAVOR_QPRCVDB_NDA_MASK          0xFFFFFFC0
#define TAVOR_QPRCVDB_NDA_SHIFT         0x20
#define TAVOR_QPRCVDB_NDS_SHIFT         0x20
#define TAVOR_QPRCVDB_QPN_SHIFT         0x8
/* Max descriptors per Tavor doorbell */
#define TAVOR_QP_MAXDESC_PER_DB         256

typedef struct tavor_hw_uar_cq_s {
        uint32_t        cmd             :8;
        uint32_t        cqn             :24;
        uint32_t        param;
} tavor_hw_uar_cq_t;
#define TAVOR_CQDB_CMD_SHIFT            0x38
#define TAVOR_CQDB_CQN_SHIFT            0x20

#define TAVOR_CQDB_INCR_CONSINDX        0x01
#define TAVOR_CQDB_NOTIFY_CQ            0x02
#define TAVOR_CQDB_NOTIFY_CQ_SOLICIT    0x03
#define TAVOR_CQDB_SET_CONSINDX         0x04
#define TAVOR_CQDB_NOTIFY_NCQ           0x05
/* Default value for use in NOTIFY_CQ doorbell */
#define TAVOR_CQDB_DEFAULT_PARAM        0xFFFFFFFF

typedef struct tavor_hw_uar_eq_s {
        uint32_t        cmd             :8;
        uint32_t                        :18;
        uint32_t        eqn             :6;
        uint32_t        param;
} tavor_hw_uar_eq_t;

typedef struct tavor_hw_uar_s {
        uint32_t                rsrv0[4];       /* "RD Send" unsupported */
        uint64_t                send;           /* tavor_hw_uar_send_t */
        uint64_t                recv;           /* tavor_hw_uar_recv_t */
        uint64_t                cq;             /* tavor_hw_uar_cq_t   */
        uint64_t                eq;             /* tavor_hw_uar_eq_t   */
        uint32_t                rsrv1[244];
        uint32_t                iblast[256];    /* "InfiniBlast" unsupported */
} tavor_hw_uar_t;

typedef struct tavor_hw_uar32_s {
        uint32_t                rsrv0[4];       /* "RD Send" unsupported */
        uint32_t                send[2];        /* tavor_hw_uar_send_t */
        uint32_t                recv[2];        /* tavor_hw_uar_recv_t */
        uint32_t                cq[2];          /* tavor_hw_uar_cq_t   */
        uint32_t                eq[2];          /* tavor_hw_uar_eq_t   */
        uint32_t                rsrv1[244];
        uint32_t                iblast[256];    /* "InfiniBlast" unsupported */
} tavor_hw_uar32_t;


/*
 * Tavor Send Work Queue Element (WQE)
 *    A Tavor Send WQE is built of the following segments, each of which is a
 *    multiple of 16 bytes.  Note: Each individual WQE may contain only a
 *    subset of these segments described below (according to the operation type
 *    and transport type of the QP).
 *
 *    The first 16 bytes of ever WQE are formed from the "Next/Ctrl" segment.
 *    This segment contains the address of the next WQE to be executed and the
 *    information required in order to allocate the resources to execute the
 *    next WQE.  The "Ctrl" part of this segment contains the control
 *    information required to execute the WQE, including the opcode and other
 *    control information.
 *    The "Datagram" segment contains address information required in order to
 *    form a UD message.
 *    The "Bind" segment contains the parameters required for a Bind Memory
 *    Window operation.
 *    The "Remote Address" segment is present only in RDMA or Atomic WQEs and
 *    specifies remote virtual addresses and RKey, respectively.  Length of
 *    the remote access is calculated from the scatter/gather list (for
 *    RDMA-write/RDMA-read) or set to eight (for Atomic).
 *    The "Atomic" segment is present only in Atomic WQEs and specifies
 *    Swap/Add and Compare data.
 *
 *    Note: The following structures are not #define'd with both little-endian
 *    and big-endian definitions.  This is because their individual fields are
 *    not directly accessed except through macros defined below.
 */
typedef struct tavor_hw_snd_wqe_nextctrl_s {
        uint32_t        next_wqe_addr   :26;
        uint32_t                        :1;
        uint32_t        nopcode         :5;
        uint32_t        next_eec        :24;
        uint32_t        dbd             :1;
        uint32_t        fence           :1;
        uint32_t        nds             :6;

        uint32_t                        :28;
        uint32_t        c               :1;
        uint32_t        e               :1;
        uint32_t        s               :1;
        uint32_t        i               :1;
        uint32_t        immediate       :32;
} tavor_hw_snd_wqe_nextctrl_t;

#define TAVOR_WQE_NDA_MASK              0x00000000FFFFFFC0
#define TAVOR_WQE_NDS_MASK              0x3F
#define TAVOR_WQE_DBD_MASK              0x80

#define TAVOR_WQE_SEND_FENCE_MASK       0x40
#define TAVOR_WQE_SEND_NOPCODE_RDMAW    0x8
#define TAVOR_WQE_SEND_NOPCODE_RDMAWI   0x9
#define TAVOR_WQE_SEND_NOPCODE_SEND     0xA
#define TAVOR_WQE_SEND_NOPCODE_SENDI    0xB
#define TAVOR_WQE_SEND_NOPCODE_RDMAR    0x10
#define TAVOR_WQE_SEND_NOPCODE_ATMCS    0x11
#define TAVOR_WQE_SEND_NOPCODE_ATMFA    0x12
#define TAVOR_WQE_SEND_NOPCODE_BIND     0x18

#define TAVOR_WQE_SEND_SIGNALED_MASK    0x800000000ULL
#define TAVOR_WQE_SEND_EVENT_MASK       0x400000000ULL
#define TAVOR_WQE_SEND_SOLICIT_MASK     0x200000000ULL
#define TAVOR_WQE_SEND_IMMEDIATE_MASK   0x100000000ULL

#define TAVOR_WQE_SENDHDR_UD_AV_MASK    0xFFFFFFFFFFFFFFE0
#define TAVOR_WQE_SENDHDR_UD_DQPN_MASK  0xFFFFFF

typedef struct tavor_hw_snd_wqe_bind_s {
        uint32_t        ae              :1;
        uint32_t        rw              :1;
        uint32_t        rr              :1;
        uint32_t                        :29;
        uint32_t                        :32;
        uint32_t        new_rkey;
        uint32_t        reg_lkey;
        uint64_t        addr;
        uint64_t        len;
} tavor_hw_snd_wqe_bind_t;
#define TAVOR_WQE_SENDHDR_BIND_ATOM     0x8000000000000000ULL
#define TAVOR_WQE_SENDHDR_BIND_WR       0x4000000000000000ULL
#define TAVOR_WQE_SENDHDR_BIND_RD       0x2000000000000000ULL

typedef struct tavor_hw_snd_wqe_remaddr_s {
        uint64_t        vaddr;
        uint32_t        rkey;
        uint32_t                        :32;
} tavor_hw_snd_wqe_remaddr_t;

/*
 * Tavor Receive Work Queue Element (WQE)
 *    Like the Send WQE, the Receive WQE is built of 16-byte segments. The
 *    segment is the "Next/Ctrl" segment (defined below).  It is followed by
 *    some number of scatter list entries for the incoming message.
 *
 *    The format of the scatter-gather list entries is also shown below.  For
 *    Receive WQEs the "inline_data" field must be cleared (i.e. data segments
 *    cannot contain inline data).
 */
typedef struct tavor_hw_rcv_wqe_nextctrl_s {
        uint32_t        next_wqe_addr   :26;
        uint32_t                        :5;
        uint32_t        one             :1;
        uint32_t                        :24;
        uint32_t        dbd             :1;
        uint32_t                        :1;
        uint32_t        nds             :6;

        uint32_t                        :28;
        uint32_t        c               :1;
        uint32_t        e               :1;
        uint32_t                        :2;
        uint32_t                        :32;
} tavor_hw_rcv_wqe_nextctrl_t;

/*
 * This bit must be set in the next/ctrl field of all Receive WQEs
 * as a workaround to a Tavor hardware erratum related to having
 * the first 32-bits in the WQE set to zero.
 */
#define TAVOR_RCV_WQE_NDA0_WA_MASK      0x0000000100000000ULL
#define TAVOR_WQE_RCV_SIGNALED_MASK     0x800000000ULL
#define TAVOR_WQE_RCV_EVENT_MASK        0x400000000ULL

typedef struct tavor_hw_wqe_sgl_s {
        uint32_t        inline_data     :1;
        uint32_t        byte_cnt        :31;
        uint32_t        lkey;
        uint64_t        addr;
} tavor_hw_wqe_sgl_t;
#define TAVOR_WQE_SGL_BYTE_CNT_MASK     0x7FFFFFFF
#define TAVOR_WQE_SGL_INLINE_MASK       0x80000000
/*
 * The tavor_sw_wqe_dbinfo_t structure is used internally by the Tavor
 * driver to return information (from the tavor_wqe_mlx_build_nextctl() and
 * tavor_wqe_send_build_nextctl() routines) regarding the type of Tavor
 * doorbell necessary.
 */
typedef struct tavor_sw_wqe_dbinfo_s {
        uint_t  db_nopcode;
        uint_t  db_fence;
} tavor_sw_wqe_dbinfo_t;


/*
 * The following macros are used for building each of the individual
 * segments that can make up a Tavor WQE.  Note: We try not to use the
 * structures (with their associated bitfields) here, instead opting to
 * build and put 64-bit or 32-bit chunks to the WQEs as appropriate,
 * primarily because using the bitfields appears to force more read-modify-
 * write operations.
 *
 *    TAVOR_WQE_BUILD_REMADDR           - Builds Remote Address Segment using
 *                                          RDMA info from the work request
 *    TAVOR_WQE_BUILD_BIND              - Builds the Bind Memory Window
 *                                          Segment using bind info from the
 *                                          work request
 *    TAVOR_WQE_LINKNEXT                - Links the current WQE to the
 *                                          previous one
 *    TAVOR_WQE_LINKFIRST               - Links the first WQE on the current
 *                                          chain to the previous WQE
 */

#define TAVOR_WQE_BUILD_REMADDR(ra,  wr_rdma)                           \
{                                                                       \
        uint64_t                *tmp;                                   \
                                                                        \
        tmp     = (uint64_t *)(ra);                                     \
        tmp[0] = HTOBE_64((wr_rdma)->rdma_raddr);                       \
        tmp[1] = HTOBE_64((uint64_t)(wr_rdma)->rdma_rkey << 32);        \
}
#define TAVOR_WQE_BUILD_BIND(bn, wr_bind)                               \
{                                                                       \
        uint64_t                *tmp;                                   \
        uint64_t                bn0_tmp;                                \
        ibt_bind_flags_t        bind_flags;                             \
                                                                        \
        tmp        = (uint64_t *)(bn);                                  \
        bind_flags = (wr_bind)->bind_flags;                             \
        bn0_tmp    = (bind_flags & IBT_WR_BIND_ATOMIC) ?                \
            TAVOR_WQE_SENDHDR_BIND_ATOM : 0;                            \
        bn0_tmp   |= (bind_flags & IBT_WR_BIND_WRITE) ?                 \
            TAVOR_WQE_SENDHDR_BIND_WR : 0;                              \
        bn0_tmp   |= (bind_flags & IBT_WR_BIND_READ) ?                  \
            TAVOR_WQE_SENDHDR_BIND_RD : 0;                              \
        tmp[0] = HTOBE_64(bn0_tmp);                                     \
        tmp[1] = HTOBE_64(((uint64_t)(wr_bind)->bind_rkey_out << 32) |  \
                        (wr_bind)->bind_lkey);                          \
        tmp[2] = HTOBE_64((wr_bind)->bind_va);                          \
        tmp[3] = HTOBE_64((wr_bind)->bind_len);                         \
}

#define TAVOR_WQE_BUILD_DATA_SEG(ds, sgl)                               \
{                                                                       \
        uint64_t                *tmp;                                   \
                                                                        \
        tmp     = (uint64_t *)(ds);                                     \
        tmp[0]  = HTOBE_64(((uint64_t)((sgl)->ds_len &                  \
                TAVOR_WQE_SGL_BYTE_CNT_MASK) << 32) | (sgl)->ds_key);   \
        tmp[1]  = HTOBE_64((sgl)->ds_va);                               \
}

#define TAVOR_WQE_LINKNEXT(prev, ctrl, next)                            \
{                                                                       \
        ((uint64_t *)(prev))[1] = HTOBE_64((ctrl));                     \
        ((uint64_t *)(prev))[0] = HTOBE_64((next));                     \
}

#define TAVOR_WQE_LINKFIRST(prev, next)                                 \
{                                                                       \
        ((uint64_t *)(prev))[0] = HTOBE_64((next));                     \
}

/*
 * The following macro is used to convert WQE address and size into the
 * "wqeaddrsz" value needed in the tavor_wrid_entry_t (see below).
 */
#define TAVOR_QP_WQEADDRSZ(addr, size)                                  \
        ((((uintptr_t)(addr)) & ~TAVOR_WQE_NDS_MASK) |                   \
        ((size) & TAVOR_WQE_NDS_MASK))

/*
 * The following macros are used to calculate pointers to the Send or Receive
 * WQEs on a given QP, respectively
 */
#define TAVOR_QP_SQ_ENTRY(qp, tail)                                     \
        ((uint64_t *)((uintptr_t)((qp)->qp_sq_buf) +                    \
        ((tail) * (qp)->qp_sq_wqesz)))
#define TAVOR_QP_SQ_DESC(qp, tail)                                      \
        ((uint32_t)((qp)->qp_sq_desc_addr +                             \
        ((tail) * (qp)->qp_sq_wqesz)))
#define TAVOR_QP_RQ_ENTRY(qp, tail)                                     \
        ((uint64_t *)((uintptr_t)((qp)->qp_rq_buf) +                    \
        ((tail) * (qp)->qp_rq_wqesz)))
#define TAVOR_QP_RQ_DESC(qp, tail)                                      \
        ((uint32_t)((qp)->qp_rq_desc_addr +                             \
        ((tail) * (qp)->qp_rq_wqesz)))
#define TAVOR_SRQ_RQ_ENTRY(srq, tail)                                   \
        ((uint64_t *)((uintptr_t)((srq)->srq_wq_buf) +                  \
        ((tail) * (srq)->srq_wq_wqesz)))
#define TAVOR_SRQ_RQ_DESC(srq, tail)                                    \
        ((uint32_t)((srq)->srq_wq_desc_addr +                           \
        ((tail) * (srq)->srq_wq_wqesz)))
#define TAVOR_SRQ_WQ_INDEX(srq_wq_desc_addr, desc_addr, wqesz)          \
        ((uint32_t)(((desc_addr) - (srq_wq_desc_addr)) / (wqesz)))
#define TAVOR_SRQ_WQ_ENTRY(srq, index)                                  \
        ((uint64_t *)(((uintptr_t)(srq)->srq_addr) +                    \
        ((index) * (srq)->srq_wq_wqesz)))

/*
 * Maximum header before the data bytes when inlining data.
 * "Header" includes the link (nextctrl) struct, a remote address struct
 * (only for RDMA Write, not for Send) and the 32-bit byte count field.
 */
#define TAVOR_INLINE_HEADER_SIZE_MAX    0x40    /* from tavor driver */
#define TAVOR_INLINE_HEADER_SIZE_RDMAW  \
        (sizeof (tavor_hw_snd_wqe_nextctrl_t) + \
        sizeof (tavor_hw_snd_wqe_remaddr_t) + \
        sizeof (uint32_t))
#define TAVOR_INLINE_HEADER_SIZE_SEND \
        (sizeof (tavor_hw_snd_wqe_nextctrl_t) + \
        sizeof (uint32_t))

/*
 * Function signatures
 */
extern int dapls_tavor_max_inline(void);

#ifdef __cplusplus
}
#endif

#endif  /* _DAPL_TAVOR_HW_H */