root/usr/src/lib/udapl/udapl_tavor/tavor/dapl_tavor_ibtf_util.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include "dapl.h"
#include "dapl_adapter_util.h"
#include "dapl_evd_util.h"
#include "dapl_cr_util.h"
#include "dapl_lmr_util.h"
#include "dapl_rmr_util.h"
#include "dapl_cookie.h"
#include "dapl_ring_buffer_util.h"
#include "dapl_vendor.h"
#include "dapl_tavor_ibtf_impl.h"

/* Function prototypes */
static DAT_RETURN dapli_ib_cq_resize_internal(DAPL_EVD *, DAT_COUNT);

/*
 * The following declarations/fn are to used by the base library
 * place holder for now
 */

int     g_loopback_connection = 0;

/*
 * dapl_ib_cq_alloc
 *
 * Alloc a CQ
 *
 * Input:
 *      ia_handle               IA handle
 *      evd_ptr                 pointer to EVD struct
 *      cno_ptr                 pointer to CNO struct
 *      cqlen                   minimum QLen
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_cq_alloc(
        IN  DAPL_IA             *ia_ptr,
        IN  DAPL_EVD            *evd_ptr,
        IN  DAPL_CNO            *cno_ptr,
        IN  DAT_COUNT           *cqlen)
{
        dapl_evd_create_t       create_msg;
        dapl_evd_free_t         free_msg;
        ib_cq_handle_t          cq_handle = IB_INVALID_HANDLE;
        int                     ia_fd;
        int                     hca_fd;
        int                     retval;
        mlnx_umap_cq_data_out_t *mcq;

        /* cq handle is created even for non-cq type events */
        /* since cq handle is where the evd fd gets stored. */
        cq_handle = (ib_cq_handle_t)
            dapl_os_alloc(sizeof (struct dapls_ib_cq_handle));
        if (cq_handle == NULL) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "cq_alloc: evd_ptr 0x%p, cq_handle == NULL\n",
                    evd_ptr);
                return (DAT_INSUFFICIENT_RESOURCES);
        }

        (void) dapl_os_memzero(cq_handle, sizeof (*cq_handle));

        /* get the hca information from ia_ptr */
        (void) dapl_os_memzero(&create_msg, sizeof (create_msg));
        create_msg.evd_flags = evd_ptr->evd_flags;
        create_msg.evd_cookie = (uintptr_t)evd_ptr;
        if (cno_ptr != NULL) {
                create_msg.evd_cno_hkey =
                    (uint64_t)cno_ptr->ib_cno_handle;
        }
        if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) {
                create_msg.evd_cq_size = (uint32_t)*cqlen;
        }

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "cq_alloc: evd 0x%p, flags 0x%x, cookie 0x%llx, hkey 0x%llx,\n"
            "          cno_hkey 0x%llx, cq_size %d\n", evd_ptr,
            create_msg.evd_flags, create_msg.evd_cookie, create_msg.evd_hkey,
            create_msg.evd_cno_hkey, create_msg.evd_cq_size);

        ia_fd = ia_ptr->hca_ptr->ib_hca_handle->ia_fd;
        hca_fd = ia_ptr->hca_ptr->ib_hca_handle->hca_fd;
        mcq = (mlnx_umap_cq_data_out_t *)create_msg.evd_cq_data_out;

        /* The next line is only needed for backward compatibility */
        mcq->mcq_rev = MLNX_UMAP_IF_VERSION;

        /* call into driver to allocate cq */
        retval = ioctl(ia_fd, DAPL_EVD_CREATE, &create_msg);
        if (retval != 0 || mcq->mcq_rev != MLNX_UMAP_IF_VERSION) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "cq_alloc: evd_create failed, %s\n", strerror(errno));
                dapl_os_free(cq_handle, sizeof (struct dapls_ib_cq_handle));
                return (dapls_convert_error(errno, retval));
        }
        (void) dapl_os_memzero(cq_handle, sizeof (struct dapls_ib_cq_handle));
        dapl_os_lock_init(&cq_handle->cq_wrid_wqhdr_lock);

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "cq_alloc: created, evd 0x%p, hkey 0x%016llx\n\n", evd_ptr,
            create_msg.evd_hkey);

        cq_handle->evd_hkey = create_msg.evd_hkey;

        if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) {

                /*
                 * allocate a hash table for wrid management, the key is
                 * a combination of QPnumber and SEND/RECV type. This is
                 * required only for evd which have a CQ mapped to
                 * it.
                 */
                if (DAT_SUCCESS != dapls_hash_create(DAPL_MED_HASHSIZE,
                    DAT_FALSE, &cq_handle->cq_wrid_wqhdr_list)) {
                        dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                            "cq_alloc: hash_create failed\n");
                        dapl_os_free(cq_handle,
                            sizeof (struct dapls_ib_cq_handle));
                        return (DAT_INSUFFICIENT_RESOURCES |
                            DAT_RESOURCE_MEMORY);
                }

                dapl_os_assert(create_msg.evd_cq_real_size > 0);

                /* In the case of Arbel or Hermon */
                if (mcq->mcq_polldbr_mapoffset != 0 ||
                    mcq->mcq_polldbr_maplen != 0)
                        cq_handle->cq_poll_dbp = dapls_ib_get_dbp(
                            mcq->mcq_polldbr_maplen, hca_fd,
                            mcq->mcq_polldbr_mapoffset,
                            mcq->mcq_polldbr_offset);
                if (mcq->mcq_armdbr_mapoffset != 0 ||
                    mcq->mcq_armdbr_maplen != 0)
                        cq_handle->cq_arm_dbp = dapls_ib_get_dbp(
                            mcq->mcq_armdbr_maplen, hca_fd,
                            mcq->mcq_armdbr_mapoffset,
                            mcq->mcq_armdbr_offset);

                cq_handle->cq_addr = (tavor_hw_cqe_t *)(void *) mmap64(
                    (void *)0, mcq->mcq_maplen,
                    (PROT_READ | PROT_WRITE), MAP_SHARED, hca_fd,
                    mcq->mcq_mapoffset);

                if (cq_handle->cq_addr == MAP_FAILED ||
                    cq_handle->cq_poll_dbp == MAP_FAILED ||
                    cq_handle->cq_arm_dbp == MAP_FAILED) {
                        free_msg.evf_hkey = cq_handle->evd_hkey;
                        retval = ioctl(ia_fd, DAPL_EVD_FREE, &free_msg);
                        if (retval != 0) {
                                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                                    "cq_alloc: EVD_FREE err:%s\n",
                                    strerror(errno));
                        }

                        dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                            "cq_alloc: DAPL_CQ_ALLOC failed\n");
                        /* free the hash table we created */
                        (void) dapls_hash_free(cq_handle->cq_wrid_wqhdr_list);
                        dapl_os_free(cq_handle,
                            sizeof (struct dapls_ib_cq_handle));
                        return (DAT_INSUFFICIENT_RESOURCES);
                }

                cq_handle->cq_map_offset = mcq->mcq_mapoffset;
                cq_handle->cq_map_len = mcq->mcq_maplen;
                cq_handle->cq_num = mcq->mcq_cqnum;
                /*
                 * cq_size is the actual depth of the CQ which is 1 more
                 * than what ibt_alloc_cq reports. However the application
                 * can only use (cq_size - 1) entries.
                 */
                cq_handle->cq_size = create_msg.evd_cq_real_size + 1;
                cq_handle->cq_cqesz = mcq->mcq_cqesz;
                cq_handle->cq_iauar = ia_ptr->hca_ptr->ib_hca_handle->ia_uar;
                *cqlen = create_msg.evd_cq_real_size;

                DAPL_INIT_CQ(ia_ptr)(cq_handle);
        }

        evd_ptr->ib_cq_handle = cq_handle;
        return (DAT_SUCCESS);
}


/*
 * dapl_ib_cq_resize
 *
 * Resize a CQ
 *
 * Input:
 *      evd_ptr                 pointer to EVD struct
 *      cqlen                   new length of the cq
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INVALID_HANDLE
 *      DAT_INTERNAL_ERROR
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_cq_resize(
        IN  DAPL_EVD            *evd_ptr,
        IN  DAT_COUNT           cqlen)
{
        ib_cq_handle_t  cq_handle;
        DAT_RETURN      dat_status;

        dat_status = dapli_ib_cq_resize_internal(evd_ptr, cqlen);
        if (DAT_INSUFFICIENT_RESOURCES == dat_status) {
                cq_handle = evd_ptr->ib_cq_handle;
                /* attempt to resize back to the current size */
                dat_status = dapli_ib_cq_resize_internal(evd_ptr,
                    cq_handle->cq_size - 1);
                if (DAT_SUCCESS != dat_status) {
                        /*
                         * XXX this is catastrophic need to post an event
                         * to the async evd
                         */
                        return (DAT_INTERNAL_ERROR);
                }
        }

        return (dat_status);
}

/*
 * dapli_ib_cq_resize_internal
 *
 * An internal routine to resize a CQ.
 *
 * Input:
 *      evd_ptr                 pointer to EVD struct
 *      cqlen                   new length of the cq
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INVALID_HANDLE
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
static DAT_RETURN
dapli_ib_cq_resize_internal(
        IN  DAPL_EVD            *evd_ptr,
        IN  DAT_COUNT           cqlen)
{
        ib_cq_handle_t          cq_handle;
        dapl_cq_resize_t        resize_msg;
        int                     ia_fd;
        int                     hca_fd;
        int                     retval;
        mlnx_umap_cq_data_out_t *mcq;
        DAPL_HCA                *hca_ptr;
        dapls_hw_cqe_t          cq_addr;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "dapls_ib_cq_resize: evd 0x%p cq 0x%p "
            "evd_hkey 0x%016llx cqlen %d\n",
            evd_ptr, (void *)evd_ptr->ib_cq_handle,
            evd_ptr->ib_cq_handle->evd_hkey, cqlen);

        cq_handle = evd_ptr->ib_cq_handle;
        /*
         * Since CQs are created in powers of 2 with one non-usable slot,
         * its possible that the previously allocated CQ has sufficient
         * entries. If the current cq is big enough and it is mapped in
         * we are done.
         */
        if ((cqlen < cq_handle->cq_size) && (cq_handle->cq_addr)) {
                return (DAT_SUCCESS);
        }

        hca_ptr = evd_ptr->header.owner_ia->hca_ptr;

        /* unmap the CQ before resizing it */
        if (hca_ptr->hermon_resize_cq == 0) {
                if ((cq_handle->cq_addr) &&
                    (munmap((char *)cq_handle->cq_addr,
                    cq_handle->cq_map_len) < 0)) {
                        dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                            "cq_resize: munmap(%p:0x%llx) failed(%d)\n",
                            cq_handle->cq_addr, cq_handle->cq_map_len, errno);
                        return (DAT_INVALID_HANDLE);
                }
                /* cq_addr is unmapped and no longer valid */
                cq_handle->cq_addr = NULL;
        }

        ia_fd = hca_ptr->ib_hca_handle->ia_fd;
        hca_fd = hca_ptr->ib_hca_handle->hca_fd;

        (void) dapl_os_memzero(&resize_msg, sizeof (resize_msg));
        mcq = (mlnx_umap_cq_data_out_t *)resize_msg.cqr_cq_data_out;
        resize_msg.cqr_evd_hkey = cq_handle->evd_hkey;
        resize_msg.cqr_cq_new_size = cqlen;

        /* The next line is only needed for backward compatibility */
        mcq->mcq_rev = MLNX_UMAP_IF_VERSION;
        retval = ioctl(ia_fd, DAPL_CQ_RESIZE, &resize_msg);
        if (retval != 0 || mcq->mcq_rev != MLNX_UMAP_IF_VERSION) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "dapls_ib_cq_resize: evd 0x%p, err: %s\n",
                    evd_ptr, strerror(errno));
                if (errno == EINVAL) { /* Couldn't find evd for this cq */
                        return (DAT_INVALID_HANDLE);
                } else { /* Need to retry resize with a smaller qlen */
                        return (DAT_INSUFFICIENT_RESOURCES);
                }
        }

        dapl_os_assert(cq_handle->cq_num == mcq->mcq_cqnum);

        /* In the case of Arbel or Hermon */
        if (mcq->mcq_polldbr_mapoffset != 0 ||
            mcq->mcq_polldbr_maplen != 0)
                cq_handle->cq_poll_dbp = dapls_ib_get_dbp(
                    mcq->mcq_polldbr_maplen, hca_fd,
                    mcq->mcq_polldbr_mapoffset,
                    mcq->mcq_polldbr_offset);
        if (mcq->mcq_armdbr_mapoffset != 0 ||
            mcq->mcq_armdbr_maplen != 0)
                cq_handle->cq_arm_dbp = dapls_ib_get_dbp(
                    mcq->mcq_armdbr_maplen, hca_fd,
                    mcq->mcq_armdbr_mapoffset,
                    mcq->mcq_armdbr_offset);

        cq_addr = (tavor_hw_cqe_t *)(void *)mmap64((void *)0,
            mcq->mcq_maplen, (PROT_READ | PROT_WRITE),
            MAP_SHARED, hca_fd, mcq->mcq_mapoffset);

        if (cq_addr == MAP_FAILED ||
            cq_handle->cq_poll_dbp == MAP_FAILED ||
            cq_handle->cq_arm_dbp == MAP_FAILED) {
                if (hca_ptr->hermon_resize_cq == 0)
                        cq_handle->cq_addr = NULL;
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "cq_resize: mmap failed(%d)\n", errno);
                /* Need to retry resize with a smaller qlen */
                return (DAT_INSUFFICIENT_RESOURCES);
        }

        if (hca_ptr->hermon_resize_cq == 0) {
                cq_handle->cq_addr = cq_addr;
                cq_handle->cq_map_offset = mcq->mcq_mapoffset;
                cq_handle->cq_map_len = mcq->mcq_maplen;
                cq_handle->cq_size = resize_msg.cqr_cq_real_size + 1;
                cq_handle->cq_cqesz = mcq->mcq_cqesz;
                /*
                 * upon resize the old events are moved to the start of the CQ
                 * hence we need to reset the consumer index too
                 */
                cq_handle->cq_consindx = 0;
        } else {        /* Hermon */
                cq_handle->cq_resize_addr = cq_addr;
                cq_handle->cq_resize_map_offset = mcq->mcq_mapoffset;
                cq_handle->cq_resize_map_len = mcq->mcq_maplen;
                cq_handle->cq_resize_size = resize_msg.cqr_cq_real_size + 1;
                cq_handle->cq_resize_cqesz = mcq->mcq_cqesz;
        }

        return (DAT_SUCCESS);
}

/*
 * dapl_ib_cq_free
 *
 * Free a CQ
 *
 * Input:
 *      ia_handle               IA handle
 *      evd_ptr                 pointer to EVD struct
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INVALID_HANDLE
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_cq_free(
        IN  DAPL_IA             *ia_ptr,
        IN  DAPL_EVD            *evd_ptr)
{
        dapl_evd_free_t         args;
        int                     retval;
        ib_cq_handle_t          cq_handle = evd_ptr->ib_cq_handle;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "dapls_ib_cq_free: evd 0x%p cq 0x%p hkey %016llx\n", evd_ptr,
            (void *)evd_ptr->ib_cq_handle, evd_ptr->ib_cq_handle->evd_hkey);

        /* If the cq was mmap'd unmap it before freeing it */
        if ((cq_handle->cq_addr) &&
            (munmap((char *)cq_handle->cq_addr, cq_handle->cq_map_len) < 0)) {
                        dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                            "cq_free: (%p:0x%llx)\n", cq_handle->cq_addr,
                            cq_handle->cq_map_len);
        }


        args.evf_hkey = cq_handle->evd_hkey;

        retval = ioctl(ia_ptr->hca_ptr->ib_hca_handle->ia_fd,
            DAPL_EVD_FREE, &args);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "dapls_ib_cq_free: evd 0x%p, err: %s\n",
                    evd_ptr, strerror(errno));
                return (dapls_convert_error(errno, retval));
        }

        dapl_os_free(cq_handle, sizeof (struct dapls_ib_cq_handle));
        evd_ptr->ib_cq_handle = NULL;

        return (DAT_SUCCESS);
}

/*
 * dapl_set_cq_notify
 *
 * Set up CQ completion notifications
 *
 * Input:
 *      ia_handle               IA handle
 *      evd_ptr                 pointer to EVD struct
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INVALID_HANDLE
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
/* ARGSUSED */
DAT_RETURN
dapls_set_cq_notify(
        IN  DAPL_IA             *ia_ptr,
        IN  DAPL_EVD            *evd_ptr)
{
        int                     retval;
        ib_cq_handle_t          cq_handle = evd_ptr->ib_cq_handle;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "dapls_ib_cq_notify: evd 0x%p cq 0x%p\n", evd_ptr,
            (void *)cq_handle);

        retval = DAPL_NOTIFY(evd_ptr)(cq_handle, IB_NOTIFY_ON_NEXT_COMP, 0);

        return (retval);

}

/* ARGSUSED */
DAT_RETURN
dapls_set_cqN_notify(
        IN  DAPL_IA             *ia_ptr,
        IN  DAPL_EVD            *evd_ptr,
        IN  uint32_t            num_events)
{
        int                     retval;
        ib_cq_handle_t          cq_handle = evd_ptr->ib_cq_handle;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "dapls_set_cqN_notify:evd %p cq %p num_events %d\n", evd_ptr,
            (void *)cq_handle, num_events);

        retval = DAPL_NOTIFY(evd_ptr)(cq_handle, IB_NOTIFY_ON_NEXT_NCOMP,
            num_events);

        return (retval);

}

/*
 * dapls_ib_cqd_create
 *
 * Set up CQ notification event thread
 *
 * Input:
 *      ia_handle               IA handle
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INVALID_HANDLE
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
/* ARGSUSED */
DAT_RETURN
dapls_ib_cqd_create(
        IN  DAPL_HCA            *hca_ptr)
{
        return (DAT_SUCCESS);
}


/*
 * dapl_cqd_destroy
 *
 * Destroy CQ notification event thread
 *
 * Input:
 *      ia_handle               IA handle
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INVALID_HANDLE
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_cqd_destroy(
        IN  DAPL_HCA            *hca_ptr)
{
        dapl_evd_free_t         args;
        ib_cq_handle_t          cq_handle;
        int                     retval;

        if (hca_ptr->null_ib_cq_handle != IB_INVALID_HANDLE) {
                /* free up the dummy cq */
                cq_handle = hca_ptr->null_ib_cq_handle;
                dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
                    "dapls_ib_cqd_destroy: cq %p\n", (void *)cq_handle);

                args.evf_hkey = cq_handle->evd_hkey;

                retval = ioctl(hca_ptr->ib_hca_handle->ia_fd,
                    DAPL_EVD_FREE, &args);
                if (retval != 0) {
                        dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                            "dapls_ib_cqd_destroy: EVD_FREE err:%d errno:%d\n",
                            retval, errno);
                }

                dapl_os_free(cq_handle, sizeof (struct dapls_ib_cq_handle));
                hca_ptr->null_ib_cq_handle = IB_INVALID_HANDLE;
        }

        return (DAT_SUCCESS);
}


/*
 * dapl_ib_pd_alloc
 *
 * Alloc a PD
 *
 * Input:
 *      ia_handle               IA handle
 *      PZ_ptr                  pointer to PZEVD struct
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_pd_alloc(
        IN  DAPL_IA             *ia,
        IN  DAPL_PZ             *pz)
{
        struct dapls_ib_pd_handle *pd_p;
        dapl_pd_alloc_t args;
        int retval;

        pd_p = (struct dapls_ib_pd_handle *)dapl_os_alloc(sizeof (*pd_p));
        if (pd_p == NULL) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "pd_alloc: ia 0x%p, pz 0x%p, cannot allocate pd\n",
                    ia, pz);
                return (DAT_INSUFFICIENT_RESOURCES);
        }
        retval = ioctl(ia->hca_ptr->ib_hca_handle->ia_fd,
            DAPL_PD_ALLOC, &args);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "pd_alloc: ia 0x%p, pz 0x%p, cannot create pd, "
                    "err: %s\n", ia, pz, strerror(errno));
                dapl_os_free(pd_p, sizeof (*pd_p));
                return (dapls_convert_error(errno, retval));
        }

        pd_p->pd_hkey = args.pda_hkey;
        pz->pd_handle = pd_p;
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "pd_alloc: successful, ia 0x%p, pz 0x%p, hkey %016llx\n",
            ia, pz, args.pda_hkey);

        return (DAT_SUCCESS);
}


/*
 * dapl_ib_pd_free
 *
 * Free a PD
 *
 * Input:
 *      ia_handle               IA handle
 *      PZ_ptr                  pointer to PZ struct
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_pd_free(
        IN  DAPL_PZ             *pz)
{
        struct dapls_ib_pd_handle *pd_p;
        dapl_pd_free_t args;
        int retval;

        pd_p = (struct dapls_ib_pd_handle *)pz->pd_handle;
        args.pdf_hkey = pd_p->pd_hkey;

        retval = ioctl(pz->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd,
            DAPL_PD_FREE, &args);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "pd_free: pz 0x%p, cannot free pd\n", pz);
                return (dapls_convert_error(errno, retval));
        }
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "pd_free: pz 0x%p, hkey %016llx, freed\n", pz, pd_p->pd_hkey);
        dapl_os_free((void *)pd_p, sizeof (*pd_p));
        pz->pd_handle = NULL;
        return (DAT_SUCCESS);
}


/*
 * dapl_ib_mr_register
 *
 * Register a virtual memory region
 *
 * Input:
 *      ia_handle               IA handle
 *      lmr                     pointer to dapl_lmr struct
 *      virt_addr               virtual address of beginning of mem region
 *      length                  length of memory region
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_mr_register(
        IN  DAPL_IA             *ia,
        IN  DAPL_LMR            *lmr,
        IN  DAT_PVOID           virt_addr,
        IN  DAT_VLEN            length,
        IN  DAT_MEM_PRIV_FLAGS  privileges)
{
        dapl_mr_register_t      reg_msg;
        ib_mr_handle_t          mr_handle;
        DAPL_PZ *               pz_handle;
        int                     ia_fd;
        int                     retval;

        ia_fd = ia->hca_ptr->ib_hca_handle->ia_fd;
        mr_handle = dapl_os_alloc(sizeof (struct dapls_ib_mr_handle));
        if (mr_handle == NULL) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mr_register: lmr 0x%p, ia 0x%p, "
                    "cannot alloc mr_handle\n", lmr, ia);
                return (DAT_INSUFFICIENT_RESOURCES);
        }
        pz_handle = ((DAPL_PZ *)lmr->param.pz_handle);
        if (pz_handle == NULL) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mr_register: lmr 0x%p, ia 0x%p, "
                    "pz_handle == NULL!\n", lmr, ia);
                dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
                return (DAT_INVALID_PARAMETER);
        }
        reg_msg.mr_pd_hkey = pz_handle->pd_handle->pd_hkey;
        reg_msg.mr_vaddr = (ib_vaddr_t)(uintptr_t)virt_addr;
        reg_msg.mr_len = (ib_memlen_t)length;
        reg_msg.mr_flags = (ibt_mr_flags_t)
            dapl_lmr_convert_privileges(privileges);
        reg_msg.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mr_register: lmr 0x%p, pd_hkey 0x%016llx, vaddr 0x%016llx, "
            "len %llu, flags 0x%x\n", lmr, reg_msg.mr_pd_hkey,
            reg_msg.mr_vaddr, reg_msg.mr_len, reg_msg.mr_flags);

        /* call into driver to allocate MR resource */
        retval = ioctl(ia_fd, DAPL_MR_REGISTER, &reg_msg);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mr_register: lmr 0x%p, failed (%s)\n",
                    lmr, strerror(errno));
                dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
                return (dapls_convert_error(errno, retval));
        }
        mr_handle->mr_hkey = reg_msg.mr_hkey;
        lmr->param.lmr_context = (DAT_LMR_CONTEXT)reg_msg.mr_lkey;
        lmr->param.rmr_context = (DAT_RMR_CONTEXT)reg_msg.mr_rkey;
        lmr->param.registered_address = reg_msg.mr_vaddr;
        lmr->param.registered_size = reg_msg.mr_len;
        lmr->mr_handle = mr_handle;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mr_register: successful, lmr 0x%p, mr_hkey 0x%016llx, "
            "lmr_ctx 0x%08x\n\n", lmr, reg_msg.mr_hkey,
            reg_msg.mr_lkey);
        return (DAT_SUCCESS);
}

/*
 * dapl_ib_mr_register_shared
 *
 * Register a shared virtual memory region
 *
 * Input:
 *      ia_handle               IA handle
 *      lmr                     pointer to dapl_lmr struct
 *      virt_addr               virtual address of beginning of mem region
 *      cookie                  shared memory identifer
 *      length                  length of memory region
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_mr_register_shared(
        IN  DAPL_IA             *ia,
        IN  DAPL_LMR            *lmr,
        IN  DAT_PVOID           virt_addr,
        IN  DAT_VLEN            length,
        IN  DAT_LMR_COOKIE      cookie,
        IN  DAT_MEM_PRIV_FLAGS  privileges)
{
        dapl_mr_register_shared_t       reg_msg;
        ib_mr_handle_t                  mr_handle;
        DAPL_PZ                         *pz_handle;
        int                             ia_fd, i;
        int                             retval;

        ia_fd = ia->hca_ptr->ib_hca_handle->ia_fd;
        mr_handle = dapl_os_alloc(sizeof (struct dapls_ib_mr_handle));
        if (mr_handle == NULL) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mr_register_shared: lmr 0x%p, ia 0x%p, "
                    "cannot alloc mr_handle\n", lmr, ia);
                return (DAT_INSUFFICIENT_RESOURCES);
        }
        pz_handle = ((DAPL_PZ *)lmr->param.pz_handle);
        if (pz_handle == NULL) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mr_register_shared: lmr 0x%p, ia 0x%p, "
                    "pz_handle == NULL!\n", lmr, ia);
                dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
                return (DAT_INVALID_PARAMETER);
        }
        reg_msg.mrs_pd_hkey = pz_handle->pd_handle->pd_hkey;
        reg_msg.mrs_vaddr = (ib_vaddr_t)(uintptr_t)virt_addr;
        reg_msg.mrs_len = (ib_memlen_t)length;
        reg_msg.mrs_flags = (ibt_mr_flags_t)
            dapl_lmr_convert_privileges(privileges);
        reg_msg.mrs_flags |= IBT_MR_ENABLE_WINDOW_BIND;
        /*CONSTCOND*/
        dapl_os_assert(DAT_LMR_COOKIE_SIZE == sizeof (reg_msg.mrs_shm_cookie));
        (void) dapl_os_memcpy((void *)&reg_msg.mrs_shm_cookie, (void *)cookie,
            DAT_LMR_COOKIE_SIZE);

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mr_register_shared: lmr 0x%p, pd_hkey 0x%016llx, "
            "vaddr 0x%016llx, len %llu, flags 0x%x\n",
            lmr, reg_msg.mrs_pd_hkey, reg_msg.mrs_vaddr, reg_msg.mrs_len,
            reg_msg.mrs_flags);

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mr_register_shared: cookie \n0x");
        for (i = 4; i >= 0; i--) {
                dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
                    "%016llx", reg_msg.mrs_shm_cookie.mc_uint_arr[i]);
        }
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "\n");

        /* call into driver to allocate MR resource */
        retval = ioctl(ia_fd, DAPL_MR_REGISTER_SHARED, &reg_msg);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mr_register_shared: lmr 0x%p, failed (%s)\n",
                    lmr, strerror(errno));
                dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
                return (dapls_convert_error(errno, retval));
        }
        mr_handle->mr_hkey = reg_msg.mrs_hkey;
        lmr->param.lmr_context = (DAT_LMR_CONTEXT)reg_msg.mrs_lkey;
        lmr->param.rmr_context = (DAT_RMR_CONTEXT)reg_msg.mrs_rkey;
        lmr->param.registered_address = reg_msg.mrs_vaddr;
        lmr->param.registered_size = reg_msg.mrs_len;
        lmr->mr_handle = mr_handle;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mr_register_shared: successful, lmr 0x%p, mr_hkey 0x%016llx, "
            "lmr_ctx 0x%08x\n\n", lmr, reg_msg.mrs_hkey,
            reg_msg.mrs_lkey);
        return (DAT_SUCCESS);
}

/*
 * dapl_ib_mr_deregister
 *
 * Free a memory region
 *
 * Input:
 *      lmr                     pointer to dapl_lmr struct
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_mr_deregister(
        IN  DAPL_LMR            *lmr)
{
        dapl_mr_deregister_t args;
        int retval;

        args.mrd_hkey = lmr->mr_handle->mr_hkey;
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mr_deregister: lmr 0x%p, hkey 0x%016llx, lmr_ctx 0x%08x\n"
            "               vaddr 0x%016llx, len %llu, flags 0x%x\n",
            lmr, args.mrd_hkey, lmr->param.lmr_context,
            lmr->param.registered_address, lmr->param.registered_size,
            dapl_lmr_convert_privileges(lmr->param.mem_priv) |
            IBT_MR_ENABLE_WINDOW_BIND);

        /* call into driver to do MR deregister */
        retval = ioctl(lmr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd,
            DAPL_MR_DEREGISTER, &args);

        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mr_deregister: lmr 0x%p, failed (%s)\n",
                    lmr, strerror(errno));
                return (dapls_convert_error(errno, retval));
        }

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mr_deregister: successful\n\n");
        dapl_os_free(lmr->mr_handle, sizeof (struct dapls_ib_mr_handle));
        lmr->mr_handle = NULL;
        return (DAT_SUCCESS);
}


/*
 * dapl_ib_mr_register_lmr
 *
 * Register a memory region based on attributes of an existing one
 *
 * Input:
 *      ia_handle               IA handle
 *      lmr                     pointer to dapl_lmr struct
 *      virt_addr               virtual address of beginning of mem region
 *      length                  length of memory region
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_mr_register_lmr(
        IN  DAPL_IA                     *ia,
        IN  DAPL_LMR                    *lmr,
        IN  DAT_MEM_PRIV_FLAGS          privileges)
{
        dapl_mr_register_lmr_t          regl_msg;
        DAPL_LMR                        *orig_lmr;
        struct dapls_ib_mr_handle       *orig_mr_handle;
        ib_mr_handle_t                  mr_handle;
        int                             ia_fd;
        int                             retval;

        ia_fd = ia->hca_ptr->ib_hca_handle->ia_fd;
        mr_handle = dapl_os_alloc(sizeof (struct dapls_ib_mr_handle));
        if (mr_handle == NULL) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mr_register_lmr: lmr 0x%p, ia 0x%p, "
                    "cannot alloc mr_handle\n", lmr, ia);
                return (DAT_INSUFFICIENT_RESOURCES);
        }

        orig_lmr = (DAPL_LMR *)lmr->param.region_desc.for_lmr_handle;
        orig_mr_handle = (struct dapls_ib_mr_handle *)orig_lmr->mr_handle;
        regl_msg.mrl_orig_hkey = orig_mr_handle->mr_hkey;
        regl_msg.mrl_flags = (ibt_mr_flags_t)
            dapl_lmr_convert_privileges(privileges);
        regl_msg.mrl_flags |= IBT_MR_ENABLE_WINDOW_BIND;
        regl_msg.mrl_lkey = regl_msg.mrl_rkey = 0;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mr_register_lmr: lmr 0x%p, hkey 0x%016llx, lmr_ctx 0x%08x\n"
            "                 vaddr 0x%016llx, len %llu, flags 0x%x\n",
            lmr, mr_handle->mr_hkey, lmr->param.lmr_context,
            orig_lmr->param.registered_address,
            orig_lmr->param.registered_size,
            dapl_lmr_convert_privileges(orig_lmr->param.mem_priv) |
            IBT_MR_ENABLE_WINDOW_BIND);


        /* call into driver to allocate MR resource */
        retval = ioctl(ia_fd, DAPL_MR_REGISTER_LMR, &regl_msg);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mr_register_lmr: failed (%s), orig_hkey (%016llx)\n",
                    strerror(errno), orig_mr_handle->mr_hkey);
                dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
                return (dapls_convert_error(errno, retval));
        }

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mr_registered_lmr: successful, lmr 0x%p, hkey 0x%016llx\n",
            lmr, regl_msg.mrl_hkey);

        mr_handle->mr_hkey = regl_msg.mrl_hkey;
        lmr->param.lmr_context = (DAT_LMR_CONTEXT)regl_msg.mrl_lkey;
        lmr->param.rmr_context = (DAT_RMR_CONTEXT)regl_msg.mrl_rkey;
        lmr->param.registered_address = orig_lmr->param.registered_address;
        lmr->param.registered_size = orig_lmr->param.registered_size;
        lmr->mr_handle = mr_handle;

        return (DAT_SUCCESS);
}


/*
 * dapls_ib_mw_alloc
 *
 * Bind a protection domain to a memory window
 *
 * Input:
 *      rmr                     Initialized rmr to hold binding handles
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_mw_alloc(
        IN  DAPL_RMR    *rmr)
{
        DAPL_IA         *ia_hdl = (DAPL_IA *)rmr->param.ia_handle;
        DAPL_PZ         *pz_hdl = rmr->param.pz_handle;
        dapl_mw_alloc_t args;
        ib_mw_handle_t  mw_handle;
        int             ia_fd;
        int             retval;

        ia_fd = ((struct dapls_ib_hca_handle *)(ia_hdl->hca_ptr->
            ib_hca_handle))->ia_fd;

        mw_handle = dapl_os_alloc(sizeof (struct dapls_ib_mw_handle));
        if (mw_handle == NULL) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mw_alloc: rmr 0x%p, cannot alloc mw_handle\n", rmr);
                return (DAT_INSUFFICIENT_RESOURCES);
        }
        args.mw_pd_hkey = ((struct dapls_ib_pd_handle *)
            (pz_hdl->pd_handle))->pd_hkey;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mw_alloc: rmr 0x%p, pd_hkey 0x%016llx\n",
            rmr, args.mw_pd_hkey);

        retval = ioctl(ia_fd, DAPL_MW_ALLOC, &args);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mw_alloc: rmr 0x%p, failed (%s)\n", rmr, strerror(errno));
                dapl_os_free(mw_handle, sizeof (struct dapls_ib_mr_handle));
                return (dapls_convert_error(errno, retval));
        }

        mw_handle->mw_hkey = args.mw_hkey;
        rmr->mw_handle = mw_handle;
        rmr->param.rmr_context = (DAT_RMR_CONTEXT) args.mw_rkey;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mw_alloc: successful, rmr 0x%p, mw_hkey 0x%llx, "
            "rmr_ctx 0x%x\n\n", rmr, (uint64_t)args.mw_hkey,
            rmr->param.rmr_context);

        return (DAT_SUCCESS);
}


/*
 * dapls_ib_mw_free
 *
 * Release bindings of a protection domain to a memory window
 *
 * Input:
 *      rmr                     Initialized rmr to hold binding handles
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_mw_free(
        IN  DAPL_RMR    *rmr)
{
        DAPL_IA         *ia_hdl = rmr->param.ia_handle;
        dapl_mw_free_t  args;
        int             ia_fd;
        int             retval;

        ia_fd = ((struct dapls_ib_hca_handle *)(ia_hdl->hca_ptr->
            ib_hca_handle))->ia_fd;

        args.mw_hkey = rmr->mw_handle->mw_hkey;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mw_free: rmr 0x%p, mw_hkey 0x%016llx\n", rmr, args.mw_hkey);

        retval = ioctl(ia_fd, DAPL_MW_FREE, &args);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mw_free: rmr 0x%p, failed (%s)\n", rmr, strerror(errno));
                return (dapls_convert_error(errno, retval));
        }

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "mw_free: successful\n\n");
        dapl_os_free(rmr->mw_handle, sizeof (struct dapls_ib_mr_handle));
        rmr->mw_handle = NULL;

        return (DAT_SUCCESS);
}

/*
 * dapls_ib_mw_bind
 *
 * Bind a protection domain to a memory window
 *
 * Input:
 *      rmr                     Initialized rmr to hold binding handles
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_mw_bind(
        IN  DAPL_RMR            *rmr,
        IN  DAT_LMR_CONTEXT     lmr_context,
        IN  DAPL_EP             *ep,
        IN  DAPL_COOKIE         *cookie,
        IN  DAT_VADDR           virtual_address,
        IN  DAT_VLEN            length,
        IN  DAT_MEM_PRIV_FLAGS  mem_priv,
        IN  DAT_COMPLETION_FLAGS completion_flags)
{
        ibt_send_wr_t   wre;
        ibt_wr_bind_t   wrbind;
        boolean_t       suppress_notification;
        int             retval;

        if (length > 0) {
                wrbind.bind_flags = (ibt_bind_flags_t)
                    (dapl_rmr_convert_privileges(mem_priv) |
                    IBT_WR_BIND_ATOMIC);
        } else {
                wrbind.bind_flags = (ibt_bind_flags_t)NULL;
        }
        wrbind.bind_rkey = rmr->param.rmr_context;
        wrbind.bind_va = virtual_address;
        wrbind.bind_len = length;
        wrbind.bind_lkey = lmr_context;

        wre.wr_id = (ibt_wrid_t)(uintptr_t)cookie;
        /*
         * wre.wr_flags = (is_signaled) ? IBT_WR_SEND_SIGNAL :
         *   IBT_WR_NO_FLAGS;
         * Till we fix the chan alloc flags do the following -
         */
        /* Translate dapl flags */
        wre.wr_flags = (DAT_COMPLETION_BARRIER_FENCE_FLAG &
            completion_flags) ? IBT_WR_SEND_FENCE : 0;
        /* suppress completions */
        wre.wr_flags |= (DAT_COMPLETION_SUPPRESS_FLAG &
            completion_flags) ? 0 : IBT_WR_SEND_SIGNAL;

        wre.wr_trans = IBT_RC_SRV;
        wre.wr_opcode = IBT_WRC_BIND;
        wre.wr_nds = 0;
        wre.wr_sgl = NULL;
        wre.wr.rc.rcwr.bind = &wrbind;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mw_bind: rmr 0x%p, wr_flags 0x%x, rkey 0x%x, bind_flags 0x%x\n"
            "         bind_va 0x%llx, bind_len 0x%llx, mem_priv 0x%x\n",
            rmr, wre.wr_flags, wrbind.bind_rkey, wrbind.bind_flags,
            wrbind.bind_va, wrbind.bind_len, mem_priv);

        if (ep->param.ep_attr.recv_completion_flags &
            DAT_COMPLETION_UNSIGNALLED_FLAG) {
                /* This flag is used to control notification of completions */
                suppress_notification = (completion_flags &
                    DAT_COMPLETION_UNSIGNALLED_FLAG) ? B_TRUE : B_FALSE;
        } else {
                /*
                 * The evd waiter will use threshold to control wakeups
                 * Hence the event notification will be done via arming the
                 * CQ so we do not need special notification generation
                 * hence set suppression to true
                 */
                suppress_notification = B_TRUE;
        }

        retval = DAPL_SEND(ep)(ep, &wre, suppress_notification);

        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "mw_bind: rmr 0x%p, failed (%s)\n", rmr, strerror(errno));
                return (dapls_convert_error(errno, retval));
        }

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mw_bind: new_rkey = 0x%x\n", wrbind.bind_rkey_out);
        rmr->param.rmr_context = (DAT_RMR_CONTEXT) wrbind.bind_rkey_out;

        return (DAT_SUCCESS);
}

/*
 * dapls_ib_mw_unbind
 *
 * Unbind a protection domain from a memory window
 *
 * Input:
 *      rmr                     Initialized rmr to hold binding handles
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INSUFFICIENT_RESOURCES
 *
 */
DAT_RETURN
dapls_ib_mw_unbind(
        IN  DAPL_RMR            *rmr,
        IN  DAT_LMR_CONTEXT     lmr_context,
        IN  DAPL_EP             *ep,
        IN  DAPL_COOKIE         *cookie,
        IN  DAT_COMPLETION_FLAGS completion_flags)
{
        DAT_RETURN retval;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mw_unbind: rmr 0x%p, enter\n", rmr);

        retval = dapls_ib_mw_bind(rmr, lmr_context, ep, cookie,
            (DAT_VADDR)0, (DAT_VLEN)0, (DAT_MEM_PRIV_FLAGS)NULL,
            completion_flags);

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "mw_unbind: rmr 0x%p, exit\n\n", rmr);

        return (retval);
}

/*
 * Processes async events and calls appropriate callbacks so that events
 * can be posted to the async evd.
 */
void
dapls_ib_async_callback(
        IN    DAPL_EVD            *async_evd,
        IN    ib_hca_handle_t     hca_handle,
        IN    ib_error_record_t   *event_ptr,
        IN    void                *context)
{
        DAPL_IA                 *ia_ptr;
        DAPL_EP                 *ep_ptr;
        DAPL_EVD                *evd_ptr;
        dapl_ib_async_event_t   *async_evp;

        ia_ptr = (DAPL_IA *)context;

        dapl_os_assert(event_ptr != NULL);
        async_evp = (dapl_ib_async_event_t *)event_ptr;

        switch (async_evp->ibae_type) {
        case IBT_ERROR_INVALID_REQUEST_CHAN:
        case IBT_ERROR_CATASTROPHIC_CHAN:
                /*
                 * Walk the EPs to match this EP, then invoke the
                 * routine when we have the EP we need
                 */
                dapl_os_assert(!dapl_llist_is_empty(&ia_ptr->ep_list_head));
                dapl_os_lock(&ia_ptr->header.lock);

                ep_ptr = (DAPL_EP *)dapl_llist_next_entry(&ia_ptr->ep_list_head,
                    NULL);
                while (ep_ptr != NULL) {
                        if (ep_ptr ==
                            (DAPL_EP *)(uintptr_t)async_evp->ibae_cookie) {
                                break;
                        }

                        ep_ptr = (DAPL_EP *) dapl_llist_next_entry(
                            &ia_ptr->ep_list_head,
                            &ep_ptr->header.ia_list_entry);
                }

                dapl_os_unlock(&ia_ptr->header.lock);
                dapl_os_assert(ep_ptr != NULL);
                dapl_evd_qp_async_error_callback(hca_handle, NULL, event_ptr,
                    (void *)ep_ptr);
                break;
        case IBT_ERROR_CQ:
                /*
                 * Walk the EVDs to match this EVD, then invoke the
                 * routine when we have the EVD we need
                 */
                dapl_os_assert(!dapl_llist_is_empty(&ia_ptr->evd_list_head));
                dapl_os_lock(&ia_ptr->header.lock);

                evd_ptr = (DAPL_EVD *) dapl_llist_next_entry(
                    &ia_ptr->evd_list_head, NULL);
                while (evd_ptr != NULL) {
                        if (evd_ptr ==
                            (DAPL_EVD *)(uintptr_t)async_evp->ibae_cookie) {
                                break;
                        }
                        evd_ptr = (DAPL_EVD *)
                            dapl_llist_next_entry(&ia_ptr->evd_list_head,
                            &evd_ptr->header.ia_list_entry);
                }
                dapl_os_unlock(&ia_ptr->header.lock);
                dapl_os_assert(evd_ptr != NULL);
                dapl_evd_cq_async_error_callback(hca_handle, NULL, event_ptr,
                    (void *)evd_ptr);
                break;
        case IBT_ERROR_PORT_DOWN:
        case IBT_ERROR_LOCAL_CATASTROPHIC:
                dapl_evd_un_async_error_callback(hca_handle, event_ptr,
                    (void *)async_evd);
                break;
        default:
                /*
                 * We are not interested in the following events
                 * case IBT_EVENT_PATH_MIGRATED:
                 * case IBT_EVENT_COM_EST:
                 * case IBT_EVENT_SQD:
                 * case IBT_ERROR_PATH_MIGRATE_REQ:
                 * case IBT_EVENT_PORT_UP:
                 */
                dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
                    "dapls_ib_async_callback: unhandled async code:%x\n",
                    async_evp->ibae_type);
                break;
        }
}

/*
 * dapls_ib_setup_async_callback
 * The reference implementation calls this to register callbacks,
 * but since our model of polling for events is based on retrieving
 * events by the waiting thread itself this is a NOOP for us.
 */
/* ARGSUSED */
DAT_RETURN
dapls_ib_setup_async_callback(
        IN  DAPL_IA                     *ia_ptr,
        IN  DAPL_ASYNC_HANDLER_TYPE     handler_type,
        IN  unsigned int                *callback_handle,
        IN  ib_async_handler_t          callback,
        IN  void                        *context)
{
        return (DAT_SUCCESS);
}

/*
 * dapls_ib_query_hca
 *
 * Set up an asynchronous callbacks of various kinds
 *
 * Input:
 *      hca_handl               hca handle
 *      ep_attr                 attribute of the ep
 *
 * Output:
 *      none
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_INVALID_PARAMETER
 */

/* these are just arbitrary values for now */

static DAT_RETURN
dapls_ib_query_ia(
        IN  dapl_hca_attr_t             *hca_attr,
        IN  DAPL_HCA                    *hca_ptr,
        OUT DAT_IA_ATTR                 *ia_attr)
{
        (void) dapl_os_memzero(ia_attr, sizeof (*ia_attr));

        (void) dapl_os_strcpy(ia_attr->adapter_name, VN_ADAPTER_NAME);

        (void) sprintf(ia_attr->vendor_name, "0x%08x:0x%08x",
            hca_attr->dhca_vendor_id, hca_attr->dhca_device_id);

        ia_attr->hardware_version_major = (DAT_UINT32)hca_attr->dhca_version_id;
        ia_attr->ia_address_ptr = (DAT_IA_ADDRESS_PTR)&hca_ptr->hca_address;
        ia_attr->max_eps = (DAT_COUNT)hca_attr->dhca_max_chans;
        ia_attr->max_dto_per_ep = (DAT_COUNT)hca_attr->dhca_max_chan_sz;
        ia_attr->max_rdma_read_per_ep_in = hca_attr->dhca_max_rdma_in_chan;
        ia_attr->max_rdma_read_per_ep_out = hca_attr->dhca_max_rdma_out_chan;
        ia_attr->max_evds = (DAT_COUNT)hca_attr->dhca_max_cq;
        ia_attr->max_evd_qlen = (DAT_COUNT)hca_attr->dhca_max_cq_sz;
        /* max_iov_segments_per_dto is for non-RDMA */
        ia_attr->max_iov_segments_per_dto = (DAT_COUNT)hca_attr->dhca_max_sgl;
        ia_attr->max_lmrs = (DAT_COUNT)hca_attr->dhca_max_memr;
        ia_attr->max_lmr_block_size = (DAT_VLEN)hca_attr->dhca_max_memr_len;
        ia_attr->max_lmr_virtual_address = (DAT_VADDR)DAPL_MAX_ADDRESS;
        ia_attr->max_pzs = (DAT_COUNT)hca_attr->dhca_max_pd;
        ia_attr->max_mtu_size = (DAT_VLEN)DAPL_IB_MAX_MESSAGE_SIZE;
        ia_attr->max_rdma_size = (DAT_VLEN)DAPL_IB_MAX_MESSAGE_SIZE;
        ia_attr->max_rmrs = (DAT_COUNT)hca_attr->dhca_max_mem_win;
        ia_attr->max_rmr_target_address = (DAT_VADDR)DAPL_MAX_ADDRESS;
        ia_attr->max_iov_segments_per_rdma_read =
            (DAT_COUNT)hca_attr->dhca_max_sgl;
        ia_attr->max_iov_segments_per_rdma_write =
            (DAT_COUNT)hca_attr->dhca_max_sgl;
        /* all instances of IA */
        ia_attr->max_rdma_read_in = hca_attr->dhca_max_rdma_in_chan *
            hca_attr->dhca_max_chans;
        ia_attr->max_rdma_read_out = hca_attr->dhca_max_rdma_out_chan *
            hca_attr->dhca_max_chans;
        ia_attr->max_rdma_read_per_ep_in_guaranteed = DAT_TRUE;
        ia_attr->max_rdma_read_per_ep_out_guaranteed = DAT_TRUE;
        ia_attr->max_srqs = (DAT_COUNT)hca_attr->dhca_max_srqs;
        ia_attr->max_ep_per_srq = ia_attr->max_eps;
        ia_attr->max_recv_per_srq = (DAT_COUNT)hca_attr->dhca_max_srqs_sz;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "IA Attributes:\n"
            "\tadapter_name %s\n "
            "\tvendor_name %s\n "
            "\thardware_version_major 0x%08x\n"
            "\tmax_eps %d\n"
            "\tmax_dto_per_ep %d\n"
            "\tmax_rdma_read_per_ep_in %d\n"
            "\tmax_rdma_read_per_ep_out %d\n"
            "\tmax_evds %d\n"
            "\tmax_evd_qlen %d\n"
            "\tmax_iov_segments_per_dto %d\n"
            "\tmax_lmrs %d\n"
            "\tmax_lmr_block_size 0x%016llx\n"
            "\tmax_lmr_virtual_address 0x%016llx\n"
            "\tmax_pzs %d\n"
            "\tmax_mtu_size 0x%016llx\n"
            "\tmax_rdma_size 0x%016llx\n"
            "\tmax_rmrs %d\n"
            "\tmax_rmr_target_address 0x%016llx\n"
            "\tmax_iov_segments_per_rdma_read %d\n"
            "\tmax_iov_segments_per_rdma_write %d\n"
            "\tmax_rdma_read_in %d\n"
            "\tmax_rdma_read_out %d\n"
            "\tmax_srqs %d\n"
            "\tmax_ep_per_srq %d\n"
            "\tmax_recv_per_srq %d\n"
            "\n",
            ia_attr->adapter_name,
            ia_attr->vendor_name,
            ia_attr->hardware_version_major,
            ia_attr->max_eps,
            ia_attr->max_dto_per_ep,
            ia_attr->max_rdma_read_per_ep_in,
            ia_attr->max_rdma_read_per_ep_out,
            ia_attr->max_evds,
            ia_attr->max_evd_qlen,
            ia_attr->max_iov_segments_per_dto,
            ia_attr->max_lmrs,
            ia_attr->max_lmr_block_size,
            ia_attr->max_lmr_virtual_address,
            ia_attr->max_pzs,
            ia_attr->max_mtu_size,
            ia_attr->max_rdma_size,
            ia_attr->max_rmrs,
            ia_attr->max_rmr_target_address,
            ia_attr->max_iov_segments_per_rdma_read,
            ia_attr->max_iov_segments_per_rdma_write,
            ia_attr->max_rdma_read_in,
            ia_attr->max_rdma_read_out,
            ia_attr->max_srqs,
            ia_attr->max_ep_per_srq,
            ia_attr->max_recv_per_srq);

        return (DAT_SUCCESS);
}

/* ARGSUSED */
static DAT_RETURN
dapls_ib_query_ep(
        IN  dapl_hca_attr_t             *hca_attr,
        IN  DAPL_HCA                    *hca_ptr,
        OUT DAT_EP_ATTR                 *ep_attr)
{
        (void) dapl_os_memzero(ep_attr, sizeof (*ep_attr));
        ep_attr->service_type = DAT_SERVICE_TYPE_RC;
        ep_attr->max_mtu_size = DAPL_IB_MAX_MESSAGE_SIZE;
        ep_attr->max_rdma_size = DAPL_IB_MAX_MESSAGE_SIZE;
        ep_attr->qos = DAT_QOS_BEST_EFFORT;
        ep_attr->max_recv_dtos = hca_attr->dhca_max_chan_sz;
        ep_attr->max_request_dtos = hca_attr->dhca_max_chan_sz;
        ep_attr->max_recv_iov = hca_attr->dhca_max_sgl;
        ep_attr->max_request_iov = hca_attr->dhca_max_sgl;
        ep_attr->request_completion_flags = DAT_COMPLETION_DEFAULT_FLAG;
        ep_attr->recv_completion_flags = DAT_COMPLETION_DEFAULT_FLAG;
        ep_attr->srq_soft_hw = DAT_HW_DEFAULT;
        return (DAT_SUCCESS);
}

static void
dapls_ib_query_srq(
        IN  dapl_hca_attr_t             *hca_attr,
        OUT DAT_SRQ_ATTR                *srq_attr)
{
        (void) dapl_os_memzero(srq_attr, sizeof (*srq_attr));
        srq_attr->max_recv_dtos = hca_attr->dhca_max_srqs_sz;
        srq_attr->max_recv_iov = hca_attr->dhca_max_srq_sgl;
        srq_attr->low_watermark = DAT_SRQ_LW_DEFAULT;
}

/* ARGSUSED */
DAT_RETURN
dapls_ib_query_hca(
        IN  DAPL_HCA                    *hca_ptr,
        OUT DAT_IA_ATTR                 *ia_attr,
        OUT DAT_EP_ATTR                 *ep_attr,
        OUT DAT_SOCK_ADDR6              *ip_addr,
        OUT DAT_SRQ_ATTR                *srq_attr)
{
        dapl_ia_query_t args;
        int ia_fd, retval;

        if (hca_ptr == NULL) {
                return (DAT_INVALID_PARAMETER);
        }

        ia_fd = hca_ptr->ib_hca_handle->ia_fd;
        retval = ioctl(ia_fd, DAPL_IA_QUERY, &args);
        if (retval != 0) {
                return (dapls_convert_error(errno, retval));
        }

        if (ia_attr != NULL) {
                (void) dapls_ib_query_ia(&args.hca_attr, hca_ptr, ia_attr);
        }
        if (ep_attr != NULL) {
                (void) dapls_ib_query_ep(&args.hca_attr, hca_ptr, ep_attr);
        }
        if (srq_attr != NULL) {
                (void) dapls_ib_query_srq(&args.hca_attr, srq_attr);
        }
        if (ia_attr == NULL && ep_attr == NULL && srq_attr == NULL) {
                return (DAT_INVALID_PARAMETER);
        }
        return (DAT_SUCCESS);
}

void
dapls_ib_store_premature_events(
        IN ib_qp_handle_t       qp_ptr,
        IN ib_work_completion_t *cqe_ptr)
{
        ib_srq_handle_t srqp;
        int             head;

        if (qp_ptr->qp_srq_enabled) {
                /*
                 * For QPs with SRQ attached store the premature event in the
                 * SRQ's premature event list
                 */
                srqp = qp_ptr->qp_srq;
                dapl_os_assert(srqp->srq_freepr_num_events > 0);
                head = srqp->srq_freepr_events[srqp->srq_freepr_head];
                /*
                 * mark cqe as valid before storing it in the
                 * premature events list
                 */
                DAPL_SET_CQE_VALID(cqe_ptr);
                (void) dapl_os_memcpy(&(srqp->srq_premature_events[head]),
                    cqe_ptr, sizeof (*cqe_ptr));
                srqp->srq_freepr_head = (srqp->srq_freepr_head + 1) %
                    srqp->srq_wq_numwqe;
                srqp->srq_freepr_num_events--;
        } else {
                (void) dapl_os_memcpy(&(qp_ptr->qp_premature_events[
                    qp_ptr->qp_num_premature_events]),
                    cqe_ptr, sizeof (*cqe_ptr));
        }
        qp_ptr->qp_num_premature_events++;
}

void
dapls_ib_poll_premature_events(
        IN  DAPL_EP                     *ep_ptr,
        OUT ib_work_completion_t        **cqe_ptr,
        OUT int                         *nevents)
{
        ib_qp_handle_t qp = ep_ptr->qp_handle;

        if (qp->qp_srq_enabled) {
                *cqe_ptr = qp->qp_srq->srq_premature_events;
        } else {
                *cqe_ptr = qp->qp_premature_events;
        }

        *nevents = qp->qp_num_premature_events;
        qp->qp_num_premature_events = 0;
}

/*
 * Return the premature events to the free list after processing it
 * This function is called only for premature events on the SRQ
 */
void
dapls_ib_free_premature_events(
        IN  DAPL_EP     *ep_ptr,
        IN  int         free_index)
{
        ib_qp_handle_t  qp_ptr;
        ib_srq_handle_t srq_ptr;
        int             tail;

        qp_ptr = ep_ptr->qp_handle;
        srq_ptr = qp_ptr->qp_srq;

        dapl_os_assert(qp_ptr->qp_srq_enabled);

        tail = srq_ptr->srq_freepr_tail;
        srq_ptr->srq_freepr_events[tail] = free_index;
        srq_ptr->srq_freepr_tail = (tail + 1) % srq_ptr->srq_wq_numwqe;
        srq_ptr->srq_freepr_num_events++;
        DAPL_SET_CQE_INVALID(&srq_ptr->srq_premature_events[free_index]);
}

/*
 * dapls_ib_get_async_event
 *
 * Translate an asynchronous event type to the DAT event.
 * Note that different providers have different sets of errors.
 *
 * Input:
 *      cause_ptr               provider event cause
 *
 * Output:
 *      async_event             DAT mapping of error
 *
 * Returns:
 *      DAT_SUCCESS
 *      DAT_NOT_IMPLEMENTED     Caller is not interested this event
 */

DAT_RETURN dapls_ib_get_async_event(
        IN  ib_error_record_t           *cause_ptr,
        OUT DAT_EVENT_NUMBER            *async_event)
{
        ibt_async_code_t        code;
        DAT_RETURN              dat_status;

        dat_status = DAT_SUCCESS;
        code = (ibt_async_code_t)((dapl_ib_async_event_t *)cause_ptr->
            ibae_type);

        switch (code) {
        case IBT_ERROR_CQ:
        case IBT_ERROR_ACCESS_VIOLATION_CHAN:
        case IBT_ERROR_INVALID_REQUEST_CHAN:
                *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
                break;
        /* CATASTROPHIC errors */
        case IBT_ERROR_CATASTROPHIC_CHAN:
        case IBT_ERROR_LOCAL_CATASTROPHIC:
        case IBT_ERROR_PORT_DOWN:
                *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
                break;
        default:
                /*
                 * Errors we are not interested in reporting:
                 * IBT_EVENT_PATH_MIGRATED
                 * IBT_ERROR_PATH_MIGRATE_REQ
                 * IBT_EVENT_COM_EST
                 * IBT_EVENT_SQD
                 * IBT_EVENT_PORT_UP
                 */
                dat_status = DAT_NOT_IMPLEMENTED;
        }
        return (dat_status);
}

DAT_RETURN
dapls_ib_event_poll(
        IN DAPL_EVD             *evd_ptr,
        IN uint64_t             timeout,
        IN uint_t               threshold,
        OUT dapl_ib_event_t     *evp_ptr,
        OUT int                 *num_events)
{
        dapl_event_poll_t       evp_msg;
        int                     ia_fd;
        int                     retval;

        *num_events = 0;
        ia_fd = evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd;

        evp_msg.evp_evd_hkey = evd_ptr->ib_cq_handle->evd_hkey;
        evp_msg.evp_threshold = threshold;
        evp_msg.evp_timeout = timeout;
        evp_msg.evp_ep = evp_ptr;
        if (evp_ptr) {
                evp_msg.evp_num_ev =
                    DAPL_MAX(evd_ptr->threshold, NUM_EVENTS_PER_POLL);
        } else {
                evp_msg.evp_num_ev = 0;
        }
        evp_msg.evp_num_polled = 0;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "event_poll: evd 0x%p, hkey 0x%llx, threshold %d,\n"
            "            timeout 0x%llx, evp_ptr 0x%p, num_ev %d\n",
            evd_ptr, evp_msg.evp_evd_hkey, evp_msg.evp_threshold,
            timeout, evp_ptr, evp_msg.evp_num_ev);

        /*
         * Poll the EVD and if there are no events then we wait in
         * the kernel.
         */
        retval = ioctl(ia_fd, DAPL_EVENT_POLL, &evp_msg);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_EVD,
                    "event_poll: evd 0x%p, retval %d err: %s\n",
                    evd_ptr, retval, strerror(errno));
                *num_events = evp_msg.evp_num_polled;
                return (dapls_convert_error(errno, retval));
        }

        dapl_dbg_log(DAPL_DBG_TYPE_EVD,
            "dapls_ib_event_poll: evd %p nevents %d\n", evd_ptr,
            evp_msg.evp_num_polled);

        *num_events = evp_msg.evp_num_polled;

        return (DAT_SUCCESS);
}

DAT_RETURN
dapls_ib_event_wakeup(
        IN DAPL_EVD             *evd_ptr)
{
        dapl_event_wakeup_t     evw_msg;
        int                     ia_fd;
        int                     retval;

        ia_fd = evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd;

        evw_msg.evw_hkey = evd_ptr->ib_cq_handle->evd_hkey;

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "event_wakeup: evd 0x%p, hkey 0x%llx\n",
            evd_ptr, evw_msg.evw_hkey);

        /*
         * Wakeup any thread waiting in the kernel on this EVD
         */
        retval = ioctl(ia_fd, DAPL_EVENT_WAKEUP, &evw_msg);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_EVD,
                    "event_wakeup: evd 0x%p, retval %d err: %s\n",
                    evd_ptr, retval, strerror(errno));
                return (dapls_convert_error(errno, retval));
        }

        return (DAT_SUCCESS);
}

/*
 * dapls_ib_cq_peek is used by dapl_cno_wait(). After the CQ has been
 * inspected we arm the CQ if it was empty.
 *
 */
void dapls_ib_cq_peek(
        IN DAPL_EVD     *evd_ptr,
        OUT int         *num_cqe)
{
        DAPL_IA         *ia_ptr;

        *num_cqe = 0;
        if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) {
                DAPL_PEEK(evd_ptr)(evd_ptr->ib_cq_handle, num_cqe);
                /* No events found in CQ arm it now */
                if (*num_cqe == 0) {
                        ia_ptr = evd_ptr->header.owner_ia;
                        (void) dapls_set_cq_notify(ia_ptr, evd_ptr);
                        dapl_dbg_log(DAPL_DBG_TYPE_EVD,
                            "dapls_ib_cq_peek: set_cq_notify\n");
                }
        }
}

/*
 * Modifies the CNO associated to an EVD
 */
DAT_RETURN dapls_ib_modify_cno(
        IN DAPL_EVD     *evd_ptr,
        IN DAPL_CNO     *cno_ptr)
{
        dapl_evd_modify_cno_t   evmc_msg;
        int                     ia_fd;
        int                     retval;

        ia_fd = evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd;

        evmc_msg.evmc_hkey = evd_ptr->ib_cq_handle->evd_hkey;

        if (cno_ptr) {
                evmc_msg.evmc_cno_hkey = (uint64_t)cno_ptr->ib_cno_handle;
        } else {
                evmc_msg.evmc_cno_hkey = 0;
        }

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "modify_cno: evd 0x%p, hkey 0x%llx, cno 0x%p, cno_hkey 0x%llx\n",
            evd_ptr, evmc_msg.evmc_hkey, cno_ptr, evmc_msg.evmc_cno_hkey);

        /*
         * modify CNO associated with the EVD
         */
        retval = ioctl(ia_fd, DAPL_EVD_MODIFY_CNO, &evmc_msg);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_EVD,
                    "modify_cno: evd 0x%p, cno %p retval %d err: %s\n",
                    evd_ptr, cno_ptr, retval, strerror(errno));
                return (dapls_convert_error(errno, retval));
        }

        return (DAT_SUCCESS);
}

DAT_RETURN
dapls_ib_cno_wait(
        IN DAPL_CNO     *cno_ptr,
        IN DAT_TIMEOUT  timeout,
        IN DAPL_EVD     **evd_ptr_p)
{
        dapl_cno_wait_t         args;
        int                     retval;

        args.cnw_hkey = (uint64_t)cno_ptr->ib_cno_handle;
        if (timeout == DAT_TIMEOUT_INFINITE) {
                args.cnw_timeout = UINT64_MAX;
        } else {
                args.cnw_timeout = (uint64_t)timeout & 0x00000000ffffffff;
        }

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "cno_wait: cno 0x%p, hkey 0x%016llx, timeout 0x%016llx\n",
            cno_ptr, args.cnw_hkey, args.cnw_timeout);

        retval = ioctl(cno_ptr->header.owner_ia->hca_ptr->
            ib_hca_handle->ia_fd, DAPL_CNO_WAIT, &args);

        if (retval != 0) {
                *evd_ptr_p = (DAPL_EVD *)NULL;
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "cno_wait: cno 0x%p ioctl err: %s\n",
                    cno_ptr, strerror(errno));
                return (dapls_convert_error(errno, retval));
        }

        *evd_ptr_p = (DAPL_EVD *)(uintptr_t)args.cnw_evd_cookie;
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "cno_wait: woken up, cno 0x%p, evd 0x%p\n\n",
            cno_ptr, *evd_ptr_p);

        return (DAT_SUCCESS);
}

DAT_RETURN
dapls_ib_cno_alloc(
        IN DAPL_IA      *ia_ptr,
        IN DAPL_CNO     *cno_ptr)
{
        dapl_cno_alloc_t        args;
        int                     retval;

        if (cno_ptr->cno_wait_agent.instance_data != NULL ||
            cno_ptr->cno_wait_agent.proxy_agent_func != NULL) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "cno_alloc: cno 0x%p, wait_agent != NULL\n", cno_ptr);
                return (DAT_NOT_IMPLEMENTED);
        }

        retval = ioctl(ia_ptr->hca_ptr->ib_hca_handle->ia_fd,
            DAPL_CNO_ALLOC, &args);
        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "cno_alloc: cno 0x%p ioctl err: %s\n",
                    cno_ptr, strerror(errno));
                return (dapls_convert_error(errno, retval));
        }

        cno_ptr->ib_cno_handle = (ib_cno_handle_t)args.cno_hkey;
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "cno_alloc: cno 0x%p allocated, ia_ptr 0x%p, hkey 0x%016llx\n",
            cno_ptr, ia_ptr, args.cno_hkey);

        return (DAT_SUCCESS);
}

DAT_RETURN
dapls_ib_cno_free(
        IN DAPL_CNO     *cno_ptr)
{
        dapl_cno_free_t         args;
        int                     retval;

        args.cnf_hkey = (uint64_t)cno_ptr->ib_cno_handle;
        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "cno_free: cno 0x%p, hkey 0x%016llx\n",
            cno_ptr, args.cnf_hkey);

        retval = ioctl(cno_ptr->header.owner_ia->hca_ptr->
            ib_hca_handle->ia_fd, DAPL_CNO_FREE, &args);

        if (retval != 0) {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "cno_free: cno 0x%p ioctl err: %s\n",
                    cno_ptr, strerror(errno));
                return (dapls_convert_error(errno, retval));
        }

        dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
            "cno_free: cno 0x%p freed\n", cno_ptr);

        return (DAT_SUCCESS);
}

DAT_RETURN
dapls_convert_error(int errnum, int retval)
{
        if (retval < 0) {
                switch (errnum) {
                case EINVAL:
                        return (DAT_INVALID_PARAMETER);
                case ENOMEM:
                        return (DAT_INSUFFICIENT_RESOURCES);
                case ETIME:
                        return (DAT_TIMEOUT_EXPIRED);
                case EINTR:
                        return (DAT_INTERRUPTED_CALL);
                case EFAULT:
                        return (DAT_INTERNAL_ERROR);
                default:
                        return (DAT_INTERNAL_ERROR);
                }
        } else {
                dapl_dbg_log(DAPL_DBG_TYPE_ERR,
                    "ERROR: got IBTF error %d\n", retval);
                switch (retval) {
                case IBT_SERVICE_RECORDS_NOT_FOUND:
                        /*
                         * Connecting to a non-existant conn qual gets
                         * us here
                         */
                        return (DAT_ERROR(DAT_INVALID_PARAMETER,
                            DAT_INVALID_ADDRESS_UNREACHABLE));
                case IBT_INSUFF_RESOURCE:
                case IBT_INSUFF_KERNEL_RESOURCE:
                        return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0));
                case IBT_AR_NOT_REGISTERED:
                        /*
                         * forward ipaddr lookup failed
                         */
                        return (DAT_ERROR(DAT_INVALID_ADDRESS, 0));
                default:
                        return (DAT_INTERNAL_ERROR);
                }
        }
}

typedef struct dapls_ib_dbp_page_s {
        uint32_t                        *dbp_page_addr;
        uint64_t                        dbp_mapoffset;
        struct dapls_ib_dbp_page_s      *next;
        int                             fd;
} dapls_ib_dbp_page_t;

dapls_ib_dbp_page_t     *dapls_ib_pagelist = NULL;

/* Function that returns a pointer to the specified doorbell entry */
uint32_t *dapls_ib_get_dbp(uint64_t maplen, int fd, uint64_t mapoffset,
    uint32_t offset)
{
        dapls_ib_dbp_page_t     *new_page;
        dapls_ib_dbp_page_t     *cur_page;

        dapl_os_lock(&dapls_ib_dbp_lock);
        /* Check to see if page already mapped for entry */
        for (cur_page = dapls_ib_pagelist; cur_page != NULL;
            cur_page = cur_page->next)
                if (cur_page->dbp_mapoffset == mapoffset &&
                    cur_page->fd == fd) {
                        dapl_os_unlock(&dapls_ib_dbp_lock);
                        return ((uint32_t *)
                            (offset + (uintptr_t)cur_page->dbp_page_addr));
                }

        /* If not, map a new page and prepend to pagelist */
        new_page = malloc(sizeof (dapls_ib_dbp_page_t));
        if (new_page == NULL) {
                dapl_os_unlock(&dapls_ib_dbp_lock);
                return (MAP_FAILED);
        }
        new_page->dbp_page_addr = (uint32_t *)(void *)mmap64((void *)0,
            maplen, (PROT_READ | PROT_WRITE), MAP_SHARED, fd, mapoffset);
        if (new_page->dbp_page_addr == MAP_FAILED) {
                free(new_page);
                dapl_os_unlock(&dapls_ib_dbp_lock);
                return (MAP_FAILED);
        }
        new_page->next = dapls_ib_pagelist;
        new_page->dbp_mapoffset = mapoffset;
        new_page->fd = fd;
        dapls_ib_pagelist = new_page;
        dapl_os_unlock(&dapls_ib_dbp_lock);
        return ((uint32_t *)(offset + (uintptr_t)new_page->dbp_page_addr));
}