root/usr/src/uts/common/io/ib/adapters/hermon/hermon_mr.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
 */

/*
 * hermon_mr.c
 *    Hermon Memory Region/Window Routines
 *
 *    Implements all the routines necessary to provide the requisite memory
 *    registration verbs.  These include operations like RegisterMemRegion(),
 *    DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
 *    etc., that affect Memory Regions.  It also includes the verbs that
 *    affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
 *    and QueryMemWindow().
 */

#include <sys/types.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/modctl.h>
#include <sys/esunddi.h>

#include <sys/ib/adapters/hermon/hermon.h>

extern uint32_t hermon_kernel_data_ro;
extern uint32_t hermon_user_data_ro;
extern int hermon_rdma_debug;

/*
 * Used by hermon_mr_keycalc() below to fill in the "unconstrained" portion
 * of Hermon memory keys (LKeys and RKeys)
 */
static  uint_t hermon_memkey_cnt = 0x00;
#define HERMON_MEMKEY_SHIFT     24

/* initial state of an MPT */
#define HERMON_MPT_SW_OWNERSHIP 0xF     /* memory regions */
#define HERMON_MPT_FREE         0x3     /* allocate lkey */

static int hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
    hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
    hermon_mpt_rsrc_type_t mpt_type);
static int hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
    hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
    hermon_mr_options_t *op);
static int hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
    hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
    uint_t sleep, uint_t *dereg_level);
static uint64_t hermon_mr_nummtt_needed(hermon_state_t *state,
    hermon_bind_info_t *bind, uint_t *mtt_pgsize);
static int hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
    ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer);
static void hermon_mr_mem_unbind(hermon_state_t *state,
    hermon_bind_info_t *bind);
static int hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
    hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits);
static int hermon_mr_fast_mtt_write_fmr(hermon_state_t *state,
    hermon_rsrc_t *mtt, ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits);
static uint_t hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc);
static uint_t hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc);


/*
 * The Hermon umem_lockmemory() callback ops.  When userland memory is
 * registered, these callback ops are specified.  The hermon_umap_umemlock_cb()
 * callback will be called whenever the memory for the corresponding
 * ddi_umem_cookie_t is being freed.
 */
static struct umem_callback_ops hermon_umem_cbops = {
        UMEM_CALLBACK_VERSION,
        hermon_umap_umemlock_cb,
};



/*
 * hermon_mr_register()
 *    Context: Can be called from interrupt or base context.
 */
int
hermon_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
    ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
    hermon_mpt_rsrc_type_t mpt_type)
{
        hermon_bind_info_t      bind;
        int                     status;

        /*
         * Fill in the "bind" struct.  This struct provides the majority
         * of the information that will be used to distinguish between an
         * "addr" binding (as is the case here) and a "buf" binding (see
         * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
         * which does most of the "heavy lifting" for the Hermon memory
         * registration routines.
         */
        bind.bi_type  = HERMON_BINDHDL_VADDR;
        bind.bi_addr  = mr_attr->mr_vaddr;
        bind.bi_len   = mr_attr->mr_len;
        bind.bi_as    = mr_attr->mr_as;
        bind.bi_flags = mr_attr->mr_flags;
        status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op,
            mpt_type);
        return (status);
}


/*
 * hermon_mr_register_buf()
 *    Context: Can be called from interrupt or base context.
 */
int
hermon_mr_register_buf(hermon_state_t *state, hermon_pdhdl_t pd,
    ibt_smr_attr_t *mr_attr, struct buf *buf, hermon_mrhdl_t *mrhdl,
    hermon_mr_options_t *op, hermon_mpt_rsrc_type_t mpt_type)
{
        hermon_bind_info_t      bind;
        int                     status;

        /*
         * Fill in the "bind" struct.  This struct provides the majority
         * of the information that will be used to distinguish between an
         * "addr" binding (see above) and a "buf" binding (as is the case
         * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
         * which does most of the "heavy lifting" for the Hermon memory
         * registration routines.  Note: We have chosen to provide
         * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
         * not set).  It is not critical what value we choose here as it need
         * only be unique for the given RKey (which will happen by default),
         * so the choice here is somewhat arbitrary.
         */
        bind.bi_type  = HERMON_BINDHDL_BUF;
        bind.bi_buf   = buf;
        if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
                bind.bi_addr  = mr_attr->mr_vaddr;
        } else {
                bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
        }
        bind.bi_as    = NULL;
        bind.bi_len   = (uint64_t)buf->b_bcount;
        bind.bi_flags = mr_attr->mr_flags;
        status = hermon_mr_common_reg(state, pd, &bind, mrhdl, op, mpt_type);
        return (status);
}


/*
 * hermon_mr_register_shared()
 *    Context: Can be called from interrupt or base context.
 */
int
hermon_mr_register_shared(hermon_state_t *state, hermon_mrhdl_t mrhdl,
    hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new)
{
        hermon_rsrc_t           *mpt, *mtt, *rsrc;
        hermon_umap_db_entry_t  *umapdb;
        hermon_hw_dmpt_t        mpt_entry;
        hermon_mrhdl_t          mr;
        hermon_bind_info_t      *bind;
        ddi_umem_cookie_t       umem_cookie;
        size_t                  umem_len;
        caddr_t                 umem_addr;
        uint64_t                mtt_addr, pgsize_msk;
        uint_t                  sleep, mr_is_umem;
        int                     status, umem_flags;

        /*
         * Check the sleep flag.  Ensure that it is consistent with the
         * current thread context (i.e. if we are currently in the interrupt
         * context, then we shouldn't be attempting to sleep).
         */
        sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP :
            HERMON_SLEEP;
        if ((sleep == HERMON_SLEEP) &&
            (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
                status = IBT_INVALID_PARAM;
                goto mrshared_fail;
        }

        /* Increment the reference count on the protection domain (PD) */
        hermon_pd_refcnt_inc(pd);

        /*
         * Allocate an MPT entry.  This will be filled in with all the
         * necessary parameters to define the shared memory region.
         * Specifically, it will be made to reference the currently existing
         * MTT entries and ownership of the MPT will be passed to the hardware
         * in the last step below.  If we fail here, we must undo the
         * protection domain reference count.
         */
        status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto mrshared_fail1;
        }

        /*
         * Allocate the software structure for tracking the shared memory
         * region (i.e. the Hermon Memory Region handle).  If we fail here, we
         * must undo the protection domain reference count and the previous
         * resource allocation.
         */
        status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto mrshared_fail2;
        }
        mr = (hermon_mrhdl_t)rsrc->hr_addr;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))

        /*
         * Setup and validate the memory region access flags.  This means
         * translating the IBTF's enable flags into the access flags that
         * will be used in later operations.
         */
        mr->mr_accflag = 0;
        if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
                mr->mr_accflag |= IBT_MR_WINDOW_BIND;
        if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
                mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
        if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
                mr->mr_accflag |= IBT_MR_REMOTE_READ;
        if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
                mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
        if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
                mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;

        /*
         * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
         * from a certain number of "constrained" bits (the least significant
         * bits) and some number of "unconstrained" bits.  The constrained
         * bits must be set to the index of the entry in the MPT table, but
         * the unconstrained bits can be set to any value we wish.  Note:
         * if no remote access is required, then the RKey value is not filled
         * in.  Otherwise both Rkey and LKey are given the same value.
         */
        mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);

        /* Grab the MR lock for the current memory region */
        mutex_enter(&mrhdl->mr_lock);

        /*
         * Check here to see if the memory region has already been partially
         * deregistered as a result of a hermon_umap_umemlock_cb() callback.
         * If so, this is an error, return failure.
         */
        if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
                mutex_exit(&mrhdl->mr_lock);
                status = IBT_MR_HDL_INVALID;
                goto mrshared_fail3;
        }

        /*
         * Determine if the original memory was from userland and, if so, pin
         * the pages (again) with umem_lockmemory().  This will guarantee a
         * separate callback for each of this shared region's MR handles.
         * If this is userland memory, then allocate an entry in the
         * "userland resources database".  This will later be added to
         * the database (after all further memory registration operations are
         * successful).  If we fail here, we must undo all the above setup.
         */
        mr_is_umem = mrhdl->mr_is_umem;
        if (mr_is_umem) {
                umem_len   = ptob(btopr(mrhdl->mr_bindinfo.bi_len));
                umem_addr  = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
                    ~PAGEOFFSET);
                umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
                    DDI_UMEMLOCK_LONGTERM);
                status = umem_lockmemory(umem_addr, umem_len, umem_flags,
                    &umem_cookie, &hermon_umem_cbops, NULL);
                if (status != 0) {
                        mutex_exit(&mrhdl->mr_lock);
                        status = IBT_INSUFF_RESOURCE;
                        goto mrshared_fail3;
                }

                umapdb = hermon_umap_db_alloc(state->hs_instance,
                    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
                    (uint64_t)(uintptr_t)rsrc);
                if (umapdb == NULL) {
                        mutex_exit(&mrhdl->mr_lock);
                        status = IBT_INSUFF_RESOURCE;
                        goto mrshared_fail4;
                }
        }

        /*
         * Copy the MTT resource pointer (and additional parameters) from
         * the original Hermon Memory Region handle.  Note: this is normally
         * where the hermon_mr_mem_bind() routine would be called, but because
         * we already have bound and filled-in MTT entries it is simply a
         * matter here of managing the MTT reference count and grabbing the
         * address of the MTT table entries (for filling in the shared region's
         * MPT entry).
         */
        mr->mr_mttrsrcp   = mrhdl->mr_mttrsrcp;
        mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
        mr->mr_bindinfo   = mrhdl->mr_bindinfo;
        mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
        mutex_exit(&mrhdl->mr_lock);
        bind = &mr->mr_bindinfo;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
        mtt = mr->mr_mttrsrcp;

        /*
         * Increment the MTT reference count (to reflect the fact that
         * the MTT is now shared)
         */
        (void) hermon_mtt_refcnt_inc(mr->mr_mttrefcntp);

        /*
         * Update the new "bind" virtual address.  Do some extra work here
         * to ensure proper alignment.  That is, make sure that the page
         * offset for the beginning of the old range is the same as the
         * offset for this new mapping
         */
        pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
        bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
            (mr->mr_bindinfo.bi_addr & pgsize_msk));

        /*
         * Fill in the MPT entry.  This is the final step before passing
         * ownership of the MPT entry to the Hermon hardware.  We use all of
         * the information collected/calculated above to fill in the
         * requisite portions of the MPT.
         */
        bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
        mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
        mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
        mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
        mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
        mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
        mpt_entry.lr      = 1;
        mpt_entry.reg_win = HERMON_MPT_IS_REGION;
        mpt_entry.entity_sz     = mr->mr_logmttpgsz;
        mpt_entry.mem_key       = mr->mr_lkey;
        mpt_entry.pd            = pd->pd_pdnum;
        mpt_entry.start_addr    = bind->bi_addr;
        mpt_entry.reg_win_len   = bind->bi_len;
        mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
        mpt_entry.mtt_addr_h = mtt_addr >> 32;
        mpt_entry.mtt_addr_l = mtt_addr >> 3;

        /*
         * Write the MPT entry to hardware.  Lastly, we pass ownership of
         * the entry to the hardware.  Note: in general, this operation
         * shouldn't fail.  But if it does, we have to undo everything we've
         * done above before returning error.
         */
        status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
            sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
                    status);
                if (status == HERMON_CMD_INVALID_STATUS) {
                        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                }
                status = ibc_get_ci_failure(0);
                goto mrshared_fail5;
        }

        /*
         * Fill in the rest of the Hermon Memory Region handle.  Having
         * successfully transferred ownership of the MPT, we can update the
         * following fields for use in further operations on the MR.
         */
        mr->mr_mptrsrcp   = mpt;
        mr->mr_mttrsrcp   = mtt;
        mr->mr_mpt_type   = HERMON_MPT_DMPT;
        mr->mr_pdhdl      = pd;
        mr->mr_rsrcp      = rsrc;
        mr->mr_is_umem    = mr_is_umem;
        mr->mr_is_fmr     = 0;
        mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
        mr->mr_umem_cbfunc = NULL;
        mr->mr_umem_cbarg1 = NULL;
        mr->mr_umem_cbarg2 = NULL;
        mr->mr_lkey        = hermon_mr_key_swap(mr->mr_lkey);
        mr->mr_rkey        = hermon_mr_key_swap(mr->mr_rkey);

        /*
         * If this is userland memory, then we need to insert the previously
         * allocated entry into the "userland resources database".  This will
         * allow for later coordination between the hermon_umap_umemlock_cb()
         * callback and hermon_mr_deregister().
         */
        if (mr_is_umem) {
                hermon_umap_db_add(umapdb);
        }

        *mrhdl_new = mr;

        return (DDI_SUCCESS);

/*
 * The following is cleanup for all possible failure cases in this routine
 */
mrshared_fail5:
        (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
        if (mr_is_umem) {
                hermon_umap_db_free(umapdb);
        }
mrshared_fail4:
        if (mr_is_umem) {
                ddi_umem_unlock(umem_cookie);
        }
mrshared_fail3:
        hermon_rsrc_free(state, &rsrc);
mrshared_fail2:
        hermon_rsrc_free(state, &mpt);
mrshared_fail1:
        hermon_pd_refcnt_dec(pd);
mrshared_fail:
        return (status);
}

/*
 * hermon_mr_alloc_fmr()
 *    Context: Can be called from interrupt or base context.
 */
int
hermon_mr_alloc_fmr(hermon_state_t *state, hermon_pdhdl_t pd,
    hermon_fmrhdl_t fmr_pool, hermon_mrhdl_t *mrhdl)
{
        hermon_rsrc_t           *mpt, *mtt, *rsrc;
        hermon_hw_dmpt_t        mpt_entry;
        hermon_mrhdl_t          mr;
        hermon_bind_info_t      bind;
        uint64_t                mtt_addr;
        uint64_t                nummtt;
        uint_t                  sleep, mtt_pgsize_bits;
        int                     status;
        offset_t                i;
        hermon_icm_table_t      *icm_table;
        hermon_dma_info_t       *dma_info;
        uint32_t                index1, index2, rindx;

        /*
         * Check the sleep flag.  Ensure that it is consistent with the
         * current thread context (i.e. if we are currently in the interrupt
         * context, then we shouldn't be attempting to sleep).
         */
        sleep = (fmr_pool->fmr_flags & IBT_MR_SLEEP) ? HERMON_SLEEP :
            HERMON_NOSLEEP;
        if ((sleep == HERMON_SLEEP) &&
            (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
                return (IBT_INVALID_PARAM);
        }

        /* Increment the reference count on the protection domain (PD) */
        hermon_pd_refcnt_inc(pd);

        /*
         * Allocate an MPT entry.  This will be filled in with all the
         * necessary parameters to define the FMR.  Specifically, it will be
         * made to reference the currently existing MTT entries and ownership
         * of the MPT will be passed to the hardware in the last step below.
         * If we fail here, we must undo the protection domain reference count.
         */

        status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto fmralloc_fail1;
        }

        /*
         * Allocate the software structure for tracking the fmr memory
         * region (i.e. the Hermon Memory Region handle).  If we fail here, we
         * must undo the protection domain reference count and the previous
         * resource allocation.
         */
        status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto fmralloc_fail2;
        }
        mr = (hermon_mrhdl_t)rsrc->hr_addr;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))

        /*
         * Setup and validate the memory region access flags.  This means
         * translating the IBTF's enable flags into the access flags that
         * will be used in later operations.
         */
        mr->mr_accflag = 0;
        if (fmr_pool->fmr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
                mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
        if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_READ)
                mr->mr_accflag |= IBT_MR_REMOTE_READ;
        if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
                mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
        if (fmr_pool->fmr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
                mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;

        /*
         * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
         * from a certain number of "constrained" bits (the least significant
         * bits) and some number of "unconstrained" bits.  The constrained
         * bits must be set to the index of the entry in the MPT table, but
         * the unconstrained bits can be set to any value we wish.  Note:
         * if no remote access is required, then the RKey value is not filled
         * in.  Otherwise both Rkey and LKey are given the same value.
         */
        mr->mr_fmr_key = 1;     /* ready for the next reload */
        mr->mr_rkey = mr->mr_lkey = mpt->hr_indx;

        /*
         * Determine number of pages spanned.  This routine uses the
         * information in the "bind" struct to determine the required
         * number of MTT entries needed (and returns the suggested page size -
         * as a "power-of-2" - for each MTT entry).
         */
        /* Assume address will be page aligned later */
        bind.bi_addr = 0;
        /* Calculate size based on given max pages */
        bind.bi_len = fmr_pool->fmr_max_pages << PAGESHIFT;
        nummtt = hermon_mr_nummtt_needed(state, &bind, &mtt_pgsize_bits);

        /*
         * Allocate the MTT entries.  Use the calculations performed above to
         * allocate the required number of MTT entries.  If we fail here, we
         * must not only undo all the previous resource allocation (and PD
         * reference count), but we must also unbind the memory.
         */
        status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
        if (status != DDI_SUCCESS) {
                IBTF_DPRINTF_L2("FMR", "FATAL: too few MTTs");
                status = IBT_INSUFF_RESOURCE;
                goto fmralloc_fail3;
        }
        mr->mr_logmttpgsz = mtt_pgsize_bits;

        /*
         * Fill in the MPT entry.  This is the final step before passing
         * ownership of the MPT entry to the Hermon hardware.  We use all of
         * the information collected/calculated above to fill in the
         * requisite portions of the MPT.
         */
        bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
        mpt_entry.en_bind = 0;
        mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
        mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
        mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
        mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
        mpt_entry.lr      = 1;
        mpt_entry.reg_win = HERMON_MPT_IS_REGION;
        mpt_entry.pd            = pd->pd_pdnum;

        mpt_entry.entity_sz     = mr->mr_logmttpgsz;
        mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);
        mpt_entry.fast_reg_en = 1;
        mpt_entry.mtt_size = (uint_t)nummtt;
        mpt_entry.mtt_addr_h = mtt_addr >> 32;
        mpt_entry.mtt_addr_l = mtt_addr >> 3;
        mpt_entry.mem_key = mr->mr_lkey;

        /*
         * FMR sets these to 0 for now.  Later during actual fmr registration
         * these values are filled in.
         */
        mpt_entry.start_addr    = 0;
        mpt_entry.reg_win_len   = 0;

        /*
         * Write the MPT entry to hardware.  Lastly, we pass ownership of
         * the entry to the hardware.  Note: in general, this operation
         * shouldn't fail.  But if it does, we have to undo everything we've
         * done above before returning error.
         */
        status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
            sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
                    status);
                if (status == HERMON_CMD_INVALID_STATUS) {
                        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                }
                status = ibc_get_ci_failure(0);
                goto fmralloc_fail4;
        }

        /*
         * Fill in the rest of the Hermon Memory Region handle.  Having
         * successfully transferred ownership of the MPT, we can update the
         * following fields for use in further operations on the MR.  Also, set
         * that this is an FMR region.
         */
        mr->mr_mptrsrcp   = mpt;
        mr->mr_mttrsrcp   = mtt;

        mr->mr_mpt_type   = HERMON_MPT_DMPT;
        mr->mr_pdhdl      = pd;
        mr->mr_rsrcp      = rsrc;
        mr->mr_is_fmr     = 1;
        mr->mr_lkey        = hermon_mr_key_swap(mr->mr_lkey);
        mr->mr_rkey        = hermon_mr_key_swap(mr->mr_rkey);
        mr->mr_mttaddr     = mtt_addr;
        (void) memcpy(&mr->mr_bindinfo, &bind, sizeof (hermon_bind_info_t));

        /* initialize hr_addr for use during register/deregister/invalidate */
        icm_table = &state->hs_icm[HERMON_DMPT];
        rindx = mpt->hr_indx;
        hermon_index(index1, index2, rindx, icm_table, i);
        dma_info = icm_table->icm_dma[index1] + index2;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mpt))
        mpt->hr_addr = (void *)((uintptr_t)(dma_info->vaddr + i * mpt->hr_len));

        *mrhdl = mr;

        return (DDI_SUCCESS);

/*
 * The following is cleanup for all possible failure cases in this routine
 */
fmralloc_fail4:
        kmem_free(mtt, sizeof (hermon_rsrc_t) * nummtt);
fmralloc_fail3:
        hermon_rsrc_free(state, &rsrc);
fmralloc_fail2:
        hermon_rsrc_free(state, &mpt);
fmralloc_fail1:
        hermon_pd_refcnt_dec(pd);
        return (status);
}


/*
 * hermon_mr_register_physical_fmr()
 *    Context: Can be called from interrupt or base context.
 */
/*ARGSUSED*/
int
hermon_mr_register_physical_fmr(hermon_state_t *state,
    ibt_pmr_attr_t *mem_pattr_p, hermon_mrhdl_t mr, ibt_pmr_desc_t *mem_desc_p)
{
        hermon_rsrc_t           *mpt;
        uint64_t                *mpt_table;
        int                     status;
        uint32_t                key;

        mutex_enter(&mr->mr_lock);
        mpt = mr->mr_mptrsrcp;
        mpt_table = (uint64_t *)mpt->hr_addr;

        /* Write MPT status to SW bit */
        *(uint8_t *)mpt_table = 0xF0;

        membar_producer();

        /*
         * Write the mapped addresses into the MTT entries.  FMR needs to do
         * this a little differently, so we call the fmr specific fast mtt
         * write here.
         */
        status = hermon_mr_fast_mtt_write_fmr(state, mr->mr_mttrsrcp,
            mem_pattr_p, mr->mr_logmttpgsz);
        if (status != DDI_SUCCESS) {
                mutex_exit(&mr->mr_lock);
                status = ibc_get_ci_failure(0);
                goto fmr_reg_fail1;
        }

        /*
         * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
         * from a certain number of "constrained" bits (the least significant
         * bits) and some number of "unconstrained" bits.  The constrained
         * bits must be set to the index of the entry in the MPT table, but
         * the unconstrained bits can be set to any value we wish.  Note:
         * if no remote access is required, then the RKey value is not filled
         * in.  Otherwise both Rkey and LKey are given the same value.
         */
        key = mpt->hr_indx | (mr->mr_fmr_key++ << HERMON_MEMKEY_SHIFT);
        mr->mr_lkey = mr->mr_rkey = hermon_mr_key_swap(key);

        /* write mem key value */
        *(uint32_t *)&mpt_table[1] = htonl(key);

        /* write length value */
        mpt_table[3] = htonll(mem_pattr_p->pmr_len);

        /* write start addr value */
        mpt_table[2] = htonll(mem_pattr_p->pmr_iova);

        /* write lkey value */
        *(uint32_t *)&mpt_table[4] = htonl(key);

        membar_producer();

        /* Write MPT status to HW bit */
        *(uint8_t *)mpt_table = 0x00;

        /* Fill in return parameters */
        mem_desc_p->pmd_lkey = mr->mr_lkey;
        mem_desc_p->pmd_rkey = mr->mr_rkey;
        mem_desc_p->pmd_iova = mem_pattr_p->pmr_iova;
        mem_desc_p->pmd_phys_buf_list_sz = mem_pattr_p->pmr_len;

        /* Fill in MR bindinfo struct for later sync or query operations */
        mr->mr_bindinfo.bi_addr = mem_pattr_p->pmr_iova;
        mr->mr_bindinfo.bi_flags = mem_pattr_p->pmr_flags & IBT_MR_NONCOHERENT;

        mutex_exit(&mr->mr_lock);

        return (DDI_SUCCESS);

fmr_reg_fail1:
        /*
         * Note, we fail here, and purposely leave the memory ownership in
         * software.  The memory tables may be corrupt, so we leave the region
         * unregistered.
         */
        return (status);
}


/*
 * hermon_mr_deregister()
 *    Context: Can be called from interrupt or base context.
 */
/* ARGSUSED */
int
hermon_mr_deregister(hermon_state_t *state, hermon_mrhdl_t *mrhdl, uint_t level,
    uint_t sleep)
{
        hermon_rsrc_t           *mpt, *mtt, *rsrc, *mtt_refcnt;
        hermon_umap_db_entry_t  *umapdb;
        hermon_pdhdl_t          pd;
        hermon_mrhdl_t          mr;
        hermon_bind_info_t      *bind;
        uint64_t                value;
        int                     status;
        uint_t                  shared_mtt;

        /*
         * Check the sleep flag.  Ensure that it is consistent with the
         * current thread context (i.e. if we are currently in the interrupt
         * context, then we shouldn't be attempting to sleep).
         */
        if ((sleep == HERMON_SLEEP) &&
            (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
                status = IBT_INVALID_PARAM;
                return (status);
        }

        /*
         * Pull all the necessary information from the Hermon Memory Region
         * handle.  This is necessary here because the resource for the
         * MR handle is going to be freed up as part of the this
         * deregistration
         */
        mr      = *mrhdl;
        mutex_enter(&mr->mr_lock);
        mpt     = mr->mr_mptrsrcp;
        mtt     = mr->mr_mttrsrcp;
        mtt_refcnt = mr->mr_mttrefcntp;
        rsrc    = mr->mr_rsrcp;
        pd      = mr->mr_pdhdl;
        bind    = &mr->mr_bindinfo;

        /*
         * Check here if the memory region is really an FMR.  If so, this is a
         * bad thing and we shouldn't be here.  Return failure.
         */
        if (mr->mr_is_fmr) {
                mutex_exit(&mr->mr_lock);
                return (IBT_INVALID_PARAM);
        }

        /*
         * Check here to see if the memory region has already been partially
         * deregistered as a result of the hermon_umap_umemlock_cb() callback.
         * If so, then jump to the end and free the remaining resources.
         */
        if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
                goto mrdereg_finish_cleanup;
        }
        if (hermon_rdma_debug & 0x4)
                IBTF_DPRINTF_L2("mr", "dereg: mr %p  key %x",
                    mr, mr->mr_rkey);

        /*
         * We must drop the "mr_lock" here to ensure that both SLEEP and
         * NOSLEEP calls into the firmware work as expected.  Also, if two
         * threads are attemping to access this MR (via de-register,
         * re-register, or otherwise), then we allow the firmware to enforce
         * the checking, that only one deregister is valid.
         */
        mutex_exit(&mr->mr_lock);

        /*
         * Reclaim MPT entry from hardware (if necessary).  Since the
         * hermon_mr_deregister() routine is used in the memory region
         * reregistration process as well, it is possible that we will
         * not always wish to reclaim ownership of the MPT.  Check the
         * "level" arg and, if necessary, attempt to reclaim it.  If
         * the ownership transfer fails for any reason, we check to see
         * what command status was returned from the hardware.  The only
         * "expected" error status is the one that indicates an attempt to
         * deregister a memory region that has memory windows bound to it
         */
        if (level >= HERMON_MR_DEREG_ALL) {
                if (mr->mr_mpt_type >= HERMON_MPT_DMPT) {
                        status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
                            NULL, 0, mpt->hr_indx, sleep);
                        if (status != HERMON_CMD_SUCCESS) {
                                if (status == HERMON_CMD_REG_BOUND) {
                                        return (IBT_MR_IN_USE);
                                } else {
                                        cmn_err(CE_CONT, "Hermon: HW2SW_MPT "
                                            "command failed: %08x\n", status);
                                        if (status ==
                                            HERMON_CMD_INVALID_STATUS) {
                                                hermon_fm_ereport(state,
                                                    HCA_SYS_ERR,
                                                    DDI_SERVICE_LOST);
                                        }
                                        return (IBT_INVALID_PARAM);
                                }
                        }
                }
        }

        /*
         * Re-grab the mr_lock here.  Since further access to the protected
         * 'mr' structure is needed, and we would have returned previously for
         * the multiple deregistration case, we can safely grab the lock here.
         */
        mutex_enter(&mr->mr_lock);

        /*
         * If the memory had come from userland, then we do a lookup in the
         * "userland resources database".  On success, we free the entry, call
         * ddi_umem_unlock(), and continue the cleanup.  On failure (which is
         * an indication that the umem_lockmemory() callback has called
         * hermon_mr_deregister()), we call ddi_umem_unlock() and invalidate
         * the "mr_umemcookie" field in the MR handle (this will be used
         * later to detect that only partial cleaup still remains to be done
         * on the MR handle).
         */
        if (mr->mr_is_umem) {
                status = hermon_umap_db_find(state->hs_instance,
                    (uint64_t)(uintptr_t)mr->mr_umemcookie,
                    MLNX_UMAP_MRMEM_RSRC, &value, HERMON_UMAP_DB_REMOVE,
                    &umapdb);
                if (status == DDI_SUCCESS) {
                        hermon_umap_db_free(umapdb);
                        ddi_umem_unlock(mr->mr_umemcookie);
                } else {
                        ddi_umem_unlock(mr->mr_umemcookie);
                        mr->mr_umemcookie = NULL;
                }
        }

        /* mtt_refcnt is NULL in the case of hermon_dma_mr_register() */
        if (mtt_refcnt != NULL) {
                /*
                 * Decrement the MTT reference count.  Since the MTT resource
                 * may be shared between multiple memory regions (as a result
                 * of a "RegisterSharedMR" verb) it is important that we not
                 * free up or unbind resources prematurely.  If it's not shared
                 * (as indicated by the return status), then free the resource.
                 */
                shared_mtt = hermon_mtt_refcnt_dec(mtt_refcnt);
                if (!shared_mtt) {
                        hermon_rsrc_free(state, &mtt_refcnt);
                }

                /*
                 * Free up the MTT entries and unbind the memory.  Here,
                 * as above, we attempt to free these resources only if
                 * it is appropriate to do so.
                 * Note, 'bind' is NULL in the alloc_lkey case.
                 */
                if (!shared_mtt) {
                        if (level >= HERMON_MR_DEREG_NO_HW2SW_MPT) {
                                hermon_mr_mem_unbind(state, bind);
                        }
                        hermon_rsrc_free(state, &mtt);
                }
        }

        /*
         * If the MR handle has been invalidated, then drop the
         * lock and return success.  Note: This only happens because
         * the umem_lockmemory() callback has been triggered.  The
         * cleanup here is partial, and further cleanup (in a
         * subsequent hermon_mr_deregister() call) will be necessary.
         */
        if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
                mutex_exit(&mr->mr_lock);
                return (DDI_SUCCESS);
        }

mrdereg_finish_cleanup:
        mutex_exit(&mr->mr_lock);

        /* Free the Hermon Memory Region handle */
        hermon_rsrc_free(state, &rsrc);

        /* Free up the MPT entry resource */
        if (mpt != NULL)
                hermon_rsrc_free(state, &mpt);

        /* Decrement the reference count on the protection domain (PD) */
        hermon_pd_refcnt_dec(pd);

        /* Set the mrhdl pointer to NULL and return success */
        *mrhdl = NULL;

        return (DDI_SUCCESS);
}

/*
 * hermon_mr_dealloc_fmr()
 *    Context: Can be called from interrupt or base context.
 */
/* ARGSUSED */
int
hermon_mr_dealloc_fmr(hermon_state_t *state, hermon_mrhdl_t *mrhdl)
{
        hermon_rsrc_t           *mpt, *mtt, *rsrc;
        hermon_pdhdl_t          pd;
        hermon_mrhdl_t          mr;

        /*
         * Pull all the necessary information from the Hermon Memory Region
         * handle.  This is necessary here because the resource for the
         * MR handle is going to be freed up as part of the this
         * deregistration
         */
        mr      = *mrhdl;
        mutex_enter(&mr->mr_lock);
        mpt     = mr->mr_mptrsrcp;
        mtt     = mr->mr_mttrsrcp;
        rsrc    = mr->mr_rsrcp;
        pd      = mr->mr_pdhdl;
        mutex_exit(&mr->mr_lock);

        /* Free the MTT entries */
        hermon_rsrc_free(state, &mtt);

        /* Free the Hermon Memory Region handle */
        hermon_rsrc_free(state, &rsrc);

        /* Free up the MPT entry resource */
        hermon_rsrc_free(state, &mpt);

        /* Decrement the reference count on the protection domain (PD) */
        hermon_pd_refcnt_dec(pd);

        /* Set the mrhdl pointer to NULL and return success */
        *mrhdl = NULL;

        return (DDI_SUCCESS);
}


/*
 * hermon_mr_query()
 *    Context: Can be called from interrupt or base context.
 */
/* ARGSUSED */
int
hermon_mr_query(hermon_state_t *state, hermon_mrhdl_t mr,
    ibt_mr_query_attr_t *attr)
{
        int                     status;
        hermon_hw_dmpt_t        mpt_entry;
        uint32_t                lkey;

        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))

        mutex_enter(&mr->mr_lock);

        /*
         * Check here to see if the memory region has already been partially
         * deregistered as a result of a hermon_umap_umemlock_cb() callback.
         * If so, this is an error, return failure.
         */
        if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
                mutex_exit(&mr->mr_lock);
                return (IBT_MR_HDL_INVALID);
        }

        status = hermon_cmn_query_cmd_post(state, QUERY_MPT, 0,
            mr->mr_lkey >> 8, &mpt_entry, sizeof (hermon_hw_dmpt_t),
            HERMON_NOSLEEP);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_CONT, "Hermon: QUERY_MPT failed: status %x", status);
                mutex_exit(&mr->mr_lock);
                return (ibc_get_ci_failure(0));
        }

        /* Update the mr sw struct from the hw struct. */
        lkey = mpt_entry.mem_key;
        mr->mr_lkey = mr->mr_rkey = (lkey >> 8) | (lkey << 24);
        mr->mr_bindinfo.bi_addr = mpt_entry.start_addr;
        mr->mr_bindinfo.bi_len = mpt_entry.reg_win_len;
        mr->mr_accflag = (mr->mr_accflag & IBT_MR_RO_DISABLED) |
            (mpt_entry.lw ? IBT_MR_LOCAL_WRITE : 0) |
            (mpt_entry.rr ? IBT_MR_REMOTE_READ : 0) |
            (mpt_entry.rw ? IBT_MR_REMOTE_WRITE : 0) |
            (mpt_entry.atomic ? IBT_MR_REMOTE_ATOMIC : 0) |
            (mpt_entry.en_bind ? IBT_MR_WINDOW_BIND : 0);
        mr->mr_mttaddr = ((uint64_t)mpt_entry.mtt_addr_h << 32) |
            (mpt_entry.mtt_addr_l << 3);
        mr->mr_logmttpgsz = mpt_entry.entity_sz;

        /* Fill in the queried attributes */
        attr->mr_lkey_state =
            (mpt_entry.status == HERMON_MPT_FREE) ? IBT_KEY_FREE :
            (mpt_entry.status == HERMON_MPT_SW_OWNERSHIP) ? IBT_KEY_INVALID :
            IBT_KEY_VALID;
        attr->mr_phys_buf_list_sz = mpt_entry.mtt_size;
        attr->mr_attr_flags = mr->mr_accflag;
        attr->mr_pd = (ibt_pd_hdl_t)mr->mr_pdhdl;

        /* Fill in the "local" attributes */
        attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
        attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
        attr->mr_lbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;

        /*
         * Fill in the "remote" attributes (if necessary).  Note: the
         * remote attributes are only valid if the memory region has one
         * or more of the remote access flags set.
         */
        if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
            (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
            (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
                attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
                attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
                attr->mr_rbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
        }

        /*
         * If region is mapped for streaming (i.e. noncoherent), then set sync
         * is required
         */
        attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
            IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;

        mutex_exit(&mr->mr_lock);
        return (DDI_SUCCESS);
}


/*
 * hermon_mr_reregister()
 *    Context: Can be called from interrupt or base context.
 */
int
hermon_mr_reregister(hermon_state_t *state, hermon_mrhdl_t mr,
    hermon_pdhdl_t pd, ibt_mr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl_new,
    hermon_mr_options_t *op)
{
        hermon_bind_info_t      bind;
        int                     status;

        /*
         * Fill in the "bind" struct.  This struct provides the majority
         * of the information that will be used to distinguish between an
         * "addr" binding (as is the case here) and a "buf" binding (see
         * below).  The "bind" struct is later passed to hermon_mr_mem_bind()
         * which does most of the "heavy lifting" for the Hermon memory
         * registration (and reregistration) routines.
         */
        bind.bi_type  = HERMON_BINDHDL_VADDR;
        bind.bi_addr  = mr_attr->mr_vaddr;
        bind.bi_len   = mr_attr->mr_len;
        bind.bi_as    = mr_attr->mr_as;
        bind.bi_flags = mr_attr->mr_flags;
        status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
        return (status);
}


/*
 * hermon_mr_reregister_buf()
 *    Context: Can be called from interrupt or base context.
 */
int
hermon_mr_reregister_buf(hermon_state_t *state, hermon_mrhdl_t mr,
    hermon_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
    hermon_mrhdl_t *mrhdl_new, hermon_mr_options_t *op)
{
        hermon_bind_info_t      bind;
        int                     status;

        /*
         * Fill in the "bind" struct.  This struct provides the majority
         * of the information that will be used to distinguish between an
         * "addr" binding (see above) and a "buf" binding (as is the case
         * here).  The "bind" struct is later passed to hermon_mr_mem_bind()
         * which does most of the "heavy lifting" for the Hermon memory
         * registration routines.  Note: We have chosen to provide
         * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
         * not set).  It is not critical what value we choose here as it need
         * only be unique for the given RKey (which will happen by default),
         * so the choice here is somewhat arbitrary.
         */
        bind.bi_type  = HERMON_BINDHDL_BUF;
        bind.bi_buf   = buf;
        if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
                bind.bi_addr  = mr_attr->mr_vaddr;
        } else {
                bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
        }
        bind.bi_len   = (uint64_t)buf->b_bcount;
        bind.bi_flags = mr_attr->mr_flags;
        bind.bi_as    = NULL;
        status = hermon_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
        return (status);
}


/*
 * hermon_mr_sync()
 *    Context: Can be called from interrupt or base context.
 */
/* ARGSUSED */
int
hermon_mr_sync(hermon_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
{
        hermon_mrhdl_t          mrhdl;
        uint64_t                seg_vaddr, seg_len, seg_end;
        uint64_t                mr_start, mr_end;
        uint_t                  type;
        int                     status, i;

        /* Process each of the ibt_mr_sync_t's */
        for (i = 0; i < num_segs; i++) {
                mrhdl = (hermon_mrhdl_t)mr_segs[i].ms_handle;

                /* Check for valid memory region handle */
                if (mrhdl == NULL) {
                        status = IBT_MR_HDL_INVALID;
                        goto mrsync_fail;
                }

                mutex_enter(&mrhdl->mr_lock);

                /*
                 * Check here to see if the memory region has already been
                 * partially deregistered as a result of a
                 * hermon_umap_umemlock_cb() callback.  If so, this is an
                 * error, return failure.
                 */
                if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
                        mutex_exit(&mrhdl->mr_lock);
                        status = IBT_MR_HDL_INVALID;
                        goto mrsync_fail;
                }

                /* Check for valid bounds on sync request */
                seg_vaddr = mr_segs[i].ms_vaddr;
                seg_len   = mr_segs[i].ms_len;
                seg_end   = seg_vaddr + seg_len - 1;
                mr_start  = mrhdl->mr_bindinfo.bi_addr;
                mr_end    = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
                if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
                        mutex_exit(&mrhdl->mr_lock);
                        status = IBT_MR_VA_INVALID;
                        goto mrsync_fail;
                }
                if ((seg_end < mr_start) || (seg_end > mr_end)) {
                        mutex_exit(&mrhdl->mr_lock);
                        status = IBT_MR_LEN_INVALID;
                        goto mrsync_fail;
                }

                /* Determine what type (i.e. direction) for sync */
                if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
                        type = DDI_DMA_SYNC_FORDEV;
                } else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
                        type = DDI_DMA_SYNC_FORCPU;
                } else {
                        mutex_exit(&mrhdl->mr_lock);
                        status = IBT_INVALID_PARAM;
                        goto mrsync_fail;
                }

                (void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
                    (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);

                mutex_exit(&mrhdl->mr_lock);
        }

        return (DDI_SUCCESS);

mrsync_fail:
        return (status);
}


/*
 * hermon_mw_alloc()
 *    Context: Can be called from interrupt or base context.
 */
int
hermon_mw_alloc(hermon_state_t *state, hermon_pdhdl_t pd, ibt_mw_flags_t flags,
    hermon_mwhdl_t *mwhdl)
{
        hermon_rsrc_t           *mpt, *rsrc;
        hermon_hw_dmpt_t                mpt_entry;
        hermon_mwhdl_t          mw;
        uint_t                  sleep;
        int                     status;

        if (state != NULL)      /* XXX - bogus test that is always TRUE */
                return (IBT_INSUFF_RESOURCE);

        /*
         * Check the sleep flag.  Ensure that it is consistent with the
         * current thread context (i.e. if we are currently in the interrupt
         * context, then we shouldn't be attempting to sleep).
         */
        sleep = (flags & IBT_MW_NOSLEEP) ? HERMON_NOSLEEP : HERMON_SLEEP;
        if ((sleep == HERMON_SLEEP) &&
            (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
                status = IBT_INVALID_PARAM;
                goto mwalloc_fail;
        }

        /* Increment the reference count on the protection domain (PD) */
        hermon_pd_refcnt_inc(pd);

        /*
         * Allocate an MPT entry (for use as a memory window).  Since the
         * Hermon hardware uses the MPT entry for memory regions and for
         * memory windows, we will fill in this MPT with all the necessary
         * parameters for the memory window.  And then (just as we do for
         * memory regions) ownership will be passed to the hardware in the
         * final step below.  If we fail here, we must undo the protection
         * domain reference count.
         */
        status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto mwalloc_fail1;
        }

        /*
         * Allocate the software structure for tracking the memory window (i.e.
         * the Hermon Memory Window handle).  Note: This is actually the same
         * software structure used for tracking memory regions, but since many
         * of the same properties are needed, only a single structure is
         * necessary.  If we fail here, we must undo the protection domain
         * reference count and the previous resource allocation.
         */
        status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto mwalloc_fail2;
        }
        mw = (hermon_mwhdl_t)rsrc->hr_addr;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))

        /*
         * Calculate an "unbound" RKey from MPT index.  In much the same way
         * as we do for memory regions (above), this key is constructed from
         * a "constrained" (which depends on the MPT index) and an
         * "unconstrained" portion (which may be arbitrarily chosen).
         */
        mw->mr_rkey = hermon_mr_keycalc(mpt->hr_indx);

        /*
         * Fill in the MPT entry.  This is the final step before passing
         * ownership of the MPT entry to the Hermon hardware.  We use all of
         * the information collected/calculated above to fill in the
         * requisite portions of the MPT.  Note: fewer entries in the MPT
         * entry are necessary to allocate a memory window.
         */
        bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
        mpt_entry.reg_win       = HERMON_MPT_IS_WINDOW;
        mpt_entry.mem_key       = mw->mr_rkey;
        mpt_entry.pd            = pd->pd_pdnum;
        mpt_entry.lr            = 1;

        /*
         * Write the MPT entry to hardware.  Lastly, we pass ownership of
         * the entry to the hardware.  Note: in general, this operation
         * shouldn't fail.  But if it does, we have to undo everything we've
         * done above before returning error.
         */
        status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
            sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
                    status);
                if (status == HERMON_CMD_INVALID_STATUS) {
                        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                }
                status = ibc_get_ci_failure(0);
                goto mwalloc_fail3;
        }

        /*
         * Fill in the rest of the Hermon Memory Window handle.  Having
         * successfully transferred ownership of the MPT, we can update the
         * following fields for use in further operations on the MW.
         */
        mw->mr_mptrsrcp = mpt;
        mw->mr_pdhdl    = pd;
        mw->mr_rsrcp    = rsrc;
        mw->mr_rkey     = hermon_mr_key_swap(mw->mr_rkey);
        *mwhdl = mw;

        return (DDI_SUCCESS);

mwalloc_fail3:
        hermon_rsrc_free(state, &rsrc);
mwalloc_fail2:
        hermon_rsrc_free(state, &mpt);
mwalloc_fail1:
        hermon_pd_refcnt_dec(pd);
mwalloc_fail:
        return (status);
}


/*
 * hermon_mw_free()
 *    Context: Can be called from interrupt or base context.
 */
int
hermon_mw_free(hermon_state_t *state, hermon_mwhdl_t *mwhdl, uint_t sleep)
{
        hermon_rsrc_t           *mpt, *rsrc;
        hermon_mwhdl_t          mw;
        int                     status;
        hermon_pdhdl_t          pd;

        /*
         * Check the sleep flag.  Ensure that it is consistent with the
         * current thread context (i.e. if we are currently in the interrupt
         * context, then we shouldn't be attempting to sleep).
         */
        if ((sleep == HERMON_SLEEP) &&
            (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
                status = IBT_INVALID_PARAM;
                return (status);
        }

        /*
         * Pull all the necessary information from the Hermon Memory Window
         * handle.  This is necessary here because the resource for the
         * MW handle is going to be freed up as part of the this operation.
         */
        mw      = *mwhdl;
        mutex_enter(&mw->mr_lock);
        mpt     = mw->mr_mptrsrcp;
        rsrc    = mw->mr_rsrcp;
        pd      = mw->mr_pdhdl;
        mutex_exit(&mw->mr_lock);
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))

        /*
         * Reclaim the MPT entry from hardware.  Note: in general, it is
         * unexpected for this operation to return an error.
         */
        status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
            0, mpt->hr_indx, sleep);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: %08x\n",
                    status);
                if (status == HERMON_CMD_INVALID_STATUS) {
                        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                }
                return (ibc_get_ci_failure(0));
        }

        /* Free the Hermon Memory Window handle */
        hermon_rsrc_free(state, &rsrc);

        /* Free up the MPT entry resource */
        hermon_rsrc_free(state, &mpt);

        /* Decrement the reference count on the protection domain (PD) */
        hermon_pd_refcnt_dec(pd);

        /* Set the mwhdl pointer to NULL and return success */
        *mwhdl = NULL;

        return (DDI_SUCCESS);
}


/*
 * hermon_mr_keycalc()
 *    Context: Can be called from interrupt or base context.
 *    NOTE:  Produces a key in the form of
 *              KKKKKKKK IIIIIIII IIIIIIII IIIIIIIII
 *    where K == the arbitrary bits and I == the index
 */
uint32_t
hermon_mr_keycalc(uint32_t indx)
{
        uint32_t tmp_key, tmp_indx;

        /*
         * Generate a simple key from counter.  Note:  We increment this
         * static variable _intentionally_ without any kind of mutex around
         * it.  First, single-threading all operations through a single lock
         * would be a bad idea (from a performance point-of-view).  Second,
         * the upper "unconstrained" bits don't really have to be unique
         * because the lower bits are guaranteed to be (although we do make a
         * best effort to ensure that they are).  Third, the window for the
         * race (where both threads read and update the counter at the same
         * time) is incredibly small.
         * And, lastly, we'd like to make this into a "random" key
         */
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(hermon_memkey_cnt))
        tmp_key = (hermon_memkey_cnt++) << HERMON_MEMKEY_SHIFT;
        tmp_indx = indx & 0xffffff;
        return (tmp_key | tmp_indx);
}


/*
 * hermon_mr_key_swap()
 *    Context: Can be called from interrupt or base context.
 *    NOTE:  Produces a key in the form of
 *              IIIIIIII IIIIIIII IIIIIIIII KKKKKKKK
 *    where K == the arbitrary bits and I == the index
 */
uint32_t
hermon_mr_key_swap(uint32_t indx)
{
        /*
         * The memory key format to pass down to the hardware is
         * (key[7:0],index[23:0]), which defines the index to the
         * hardware resource. When the driver passes this as a memory
         * key, (i.e. to retrieve a resource) the format is
         * (index[23:0],key[7:0]).
         */
        return (((indx >> 24) & 0x000000ff) | ((indx << 8) & 0xffffff00));
}

/*
 * hermon_mr_common_reg()
 *    Context: Can be called from interrupt or base context.
 */
static int
hermon_mr_common_reg(hermon_state_t *state, hermon_pdhdl_t pd,
    hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl, hermon_mr_options_t *op,
    hermon_mpt_rsrc_type_t mpt_type)
{
        hermon_rsrc_t           *mpt, *mtt, *rsrc, *mtt_refcnt;
        hermon_umap_db_entry_t  *umapdb;
        hermon_sw_refcnt_t      *swrc_tmp;
        hermon_hw_dmpt_t        mpt_entry;
        hermon_mrhdl_t          mr;
        ibt_mr_flags_t          flags;
        hermon_bind_info_t      *bh;
        ddi_dma_handle_t        bind_dmahdl;
        ddi_umem_cookie_t       umem_cookie;
        size_t                  umem_len;
        caddr_t                 umem_addr;
        uint64_t                mtt_addr, max_sz;
        uint_t                  sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
        int                     status, umem_flags, bind_override_addr;

        /*
         * Check the "options" flag.  Currently this flag tells the driver
         * whether or not the region should be bound normally (i.e. with
         * entries written into the PCI IOMMU), whether it should be
         * registered to bypass the IOMMU, and whether or not the resulting
         * address should be "zero-based" (to aid the alignment restrictions
         * for QPs).
         */
        if (op == NULL) {
                bind_type   = HERMON_BINDMEM_NORMAL;
                bind_dmahdl = NULL;
                bind_override_addr = 0;
        } else {
                bind_type          = op->mro_bind_type;
                bind_dmahdl        = op->mro_bind_dmahdl;
                bind_override_addr = op->mro_bind_override_addr;
        }

        /* check what kind of mpt to use */

        /* Extract the flags field from the hermon_bind_info_t */
        flags = bind->bi_flags;

        /*
         * Check for invalid length.  Check is the length is zero or if the
         * length is larger than the maximum configured value.  Return error
         * if it is.
         */
        max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
        if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
                status = IBT_MR_LEN_INVALID;
                goto mrcommon_fail;
        }

        /*
         * Check the sleep flag.  Ensure that it is consistent with the
         * current thread context (i.e. if we are currently in the interrupt
         * context, then we shouldn't be attempting to sleep).
         */
        sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
        if ((sleep == HERMON_SLEEP) &&
            (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
                status = IBT_INVALID_PARAM;
                goto mrcommon_fail;
        }

        /* Increment the reference count on the protection domain (PD) */
        hermon_pd_refcnt_inc(pd);

        /*
         * Allocate an MPT entry.  This will be filled in with all the
         * necessary parameters to define the memory region.  And then
         * ownership will be passed to the hardware in the final step
         * below.  If we fail here, we must undo the protection domain
         * reference count.
         */
        if (mpt_type == HERMON_MPT_DMPT) {
                status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
                if (status != DDI_SUCCESS) {
                        status = IBT_INSUFF_RESOURCE;
                        goto mrcommon_fail1;
                }
        } else {
                mpt = NULL;
        }

        /*
         * Allocate the software structure for tracking the memory region (i.e.
         * the Hermon Memory Region handle).  If we fail here, we must undo
         * the protection domain reference count and the previous resource
         * allocation.
         */
        status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto mrcommon_fail2;
        }
        mr = (hermon_mrhdl_t)rsrc->hr_addr;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))

        /*
         * Setup and validate the memory region access flags.  This means
         * translating the IBTF's enable flags into the access flags that
         * will be used in later operations.
         */
        mr->mr_accflag = 0;
        if (flags & IBT_MR_ENABLE_WINDOW_BIND)
                mr->mr_accflag |= IBT_MR_WINDOW_BIND;
        if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
                mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
        if (flags & IBT_MR_ENABLE_REMOTE_READ)
                mr->mr_accflag |= IBT_MR_REMOTE_READ;
        if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
                mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
        if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
                mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;

        /*
         * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
         * from a certain number of "constrained" bits (the least significant
         * bits) and some number of "unconstrained" bits.  The constrained
         * bits must be set to the index of the entry in the MPT table, but
         * the unconstrained bits can be set to any value we wish.  Note:
         * if no remote access is required, then the RKey value is not filled
         * in.  Otherwise both Rkey and LKey are given the same value.
         */
        if (mpt)
                mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);

        /*
         * Determine if the memory is from userland and pin the pages
         * with umem_lockmemory() if necessary.
         * Then, if this is userland memory, allocate an entry in the
         * "userland resources database".  This will later be added to
         * the database (after all further memory registration operations are
         * successful).  If we fail here, we must undo the reference counts
         * and the previous resource allocations.
         */
        mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
        if (mr_is_umem) {
                umem_len   = ptob(btopr(bind->bi_len +
                    ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
                umem_addr  = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
                umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
                    DDI_UMEMLOCK_LONGTERM);
                status = umem_lockmemory(umem_addr, umem_len, umem_flags,
                    &umem_cookie, &hermon_umem_cbops, NULL);
                if (status != 0) {
                        status = IBT_INSUFF_RESOURCE;
                        goto mrcommon_fail3;
                }

                _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
                _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))

                bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
                    B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
                if (bind->bi_buf == NULL) {
                        status = IBT_INSUFF_RESOURCE;
                        goto mrcommon_fail3;
                }
                bind->bi_type = HERMON_BINDHDL_UBUF;
                bind->bi_buf->b_flags |= B_READ;

                _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
                _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))

                umapdb = hermon_umap_db_alloc(state->hs_instance,
                    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
                    (uint64_t)(uintptr_t)rsrc);
                if (umapdb == NULL) {
                        status = IBT_INSUFF_RESOURCE;
                        goto mrcommon_fail4;
                }
        }

        /*
         * Setup the bindinfo for the mtt bind call
         */
        bh = &mr->mr_bindinfo;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
        bcopy(bind, bh, sizeof (hermon_bind_info_t));
        bh->bi_bypass = bind_type;
        status = hermon_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
            &mtt_pgsize_bits, mpt != NULL);
        if (status != DDI_SUCCESS) {
                /*
                 * When mtt_bind fails, freerbuf has already been done,
                 * so make sure not to call it again.
                 */
                bind->bi_type = bh->bi_type;
                goto mrcommon_fail5;
        }
        mr->mr_logmttpgsz = mtt_pgsize_bits;

        /*
         * Allocate MTT reference count (to track shared memory regions).
         * This reference count resource may never be used on the given
         * memory region, but if it is ever later registered as "shared"
         * memory region then this resource will be necessary.  If we fail
         * here, we do pretty much the same as above to clean up.
         */
        status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
            &mtt_refcnt);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto mrcommon_fail6;
        }
        mr->mr_mttrefcntp = mtt_refcnt;
        swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
        HERMON_MTT_REFCNT_INIT(swrc_tmp);

        mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);

        /*
         * Fill in the MPT entry.  This is the final step before passing
         * ownership of the MPT entry to the Hermon hardware.  We use all of
         * the information collected/calculated above to fill in the
         * requisite portions of the MPT.  Do this ONLY for DMPTs.
         */
        if (mpt == NULL)
                goto no_passown;

        bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));

        mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
        mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
        mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
        mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
        mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
        mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
        mpt_entry.lr      = 1;
        mpt_entry.phys_addr = 0;
        mpt_entry.reg_win = HERMON_MPT_IS_REGION;

        mpt_entry.entity_sz     = mr->mr_logmttpgsz;
        mpt_entry.mem_key       = mr->mr_lkey;
        mpt_entry.pd            = pd->pd_pdnum;
        mpt_entry.rem_acc_en = 0;
        mpt_entry.fast_reg_en = 0;
        mpt_entry.en_inval = 0;
        mpt_entry.lkey = 0;
        mpt_entry.win_cnt = 0;

        if (bind_override_addr == 0) {
                mpt_entry.start_addr = bh->bi_addr;
        } else {
                bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
                mpt_entry.start_addr = bh->bi_addr;
        }
        mpt_entry.reg_win_len   = bh->bi_len;

        mpt_entry.mtt_addr_h = mtt_addr >> 32;  /* only 8 more bits */
        mpt_entry.mtt_addr_l = mtt_addr >> 3;   /* only 29 bits */

        /*
         * Write the MPT entry to hardware.  Lastly, we pass ownership of
         * the entry to the hardware if needed.  Note: in general, this
         * operation shouldn't fail.  But if it does, we have to undo
         * everything we've done above before returning error.
         *
         * For Hermon, this routine (which is common to the contexts) will only
         * set the ownership if needed - the process of passing the context
         * itself to HW will take care of setting up the MPT (based on type
         * and index).
         */

        mpt_entry.bnd_qp = 0;   /* dMPT for a qp, check for window */
        status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
            sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
                    status);
                if (status == HERMON_CMD_INVALID_STATUS) {
                        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                }
                status = ibc_get_ci_failure(0);
                goto mrcommon_fail7;
        }
        if (hermon_rdma_debug & 0x4)
                IBTF_DPRINTF_L2("mr", "  reg: mr %p  key %x",
                    mr, hermon_mr_key_swap(mr->mr_rkey));
no_passown:

        /*
         * Fill in the rest of the Hermon Memory Region handle.  Having
         * successfully transferred ownership of the MPT, we can update the
         * following fields for use in further operations on the MR.
         */
        mr->mr_mttaddr     = mtt_addr;

        mr->mr_log2_pgsz   = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
        mr->mr_mptrsrcp    = mpt;
        mr->mr_mttrsrcp    = mtt;
        mr->mr_pdhdl       = pd;
        mr->mr_rsrcp       = rsrc;
        mr->mr_is_umem     = mr_is_umem;
        mr->mr_is_fmr      = 0;
        mr->mr_umemcookie  = (mr_is_umem != 0) ? umem_cookie : NULL;
        mr->mr_umem_cbfunc = NULL;
        mr->mr_umem_cbarg1 = NULL;
        mr->mr_umem_cbarg2 = NULL;
        mr->mr_lkey        = hermon_mr_key_swap(mr->mr_lkey);
        mr->mr_rkey        = hermon_mr_key_swap(mr->mr_rkey);
        mr->mr_mpt_type    = mpt_type;

        /*
         * If this is userland memory, then we need to insert the previously
         * allocated entry into the "userland resources database".  This will
         * allow for later coordination between the hermon_umap_umemlock_cb()
         * callback and hermon_mr_deregister().
         */
        if (mr_is_umem) {
                hermon_umap_db_add(umapdb);
        }

        *mrhdl = mr;

        return (DDI_SUCCESS);

/*
 * The following is cleanup for all possible failure cases in this routine
 */
mrcommon_fail7:
        hermon_rsrc_free(state, &mtt_refcnt);
mrcommon_fail6:
        hermon_mr_mem_unbind(state, bh);
        bind->bi_type = bh->bi_type;
mrcommon_fail5:
        if (mr_is_umem) {
                hermon_umap_db_free(umapdb);
        }
mrcommon_fail4:
        if (mr_is_umem) {
                /*
                 * Free up the memory ddi_umem_iosetup() allocates
                 * internally.
                 */
                if (bind->bi_type == HERMON_BINDHDL_UBUF) {
                        freerbuf(bind->bi_buf);
                        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
                        bind->bi_type = HERMON_BINDHDL_NONE;
                        _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
                }
                ddi_umem_unlock(umem_cookie);
        }
mrcommon_fail3:
        hermon_rsrc_free(state, &rsrc);
mrcommon_fail2:
        if (mpt != NULL)
                hermon_rsrc_free(state, &mpt);
mrcommon_fail1:
        hermon_pd_refcnt_dec(pd);
mrcommon_fail:
        return (status);
}

/*
 * hermon_dma_mr_register()
 *    Context: Can be called from base context.
 */
int
hermon_dma_mr_register(hermon_state_t *state, hermon_pdhdl_t pd,
    ibt_dmr_attr_t *mr_attr, hermon_mrhdl_t *mrhdl)
{
        hermon_rsrc_t           *mpt, *rsrc;
        hermon_hw_dmpt_t        mpt_entry;
        hermon_mrhdl_t          mr;
        ibt_mr_flags_t          flags;
        uint_t                  sleep;
        int                     status;

        /* Extract the flags field */
        flags = mr_attr->dmr_flags;

        /*
         * Check the sleep flag.  Ensure that it is consistent with the
         * current thread context (i.e. if we are currently in the interrupt
         * context, then we shouldn't be attempting to sleep).
         */
        sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
        if ((sleep == HERMON_SLEEP) &&
            (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
                status = IBT_INVALID_PARAM;
                goto mrcommon_fail;
        }

        /* Increment the reference count on the protection domain (PD) */
        hermon_pd_refcnt_inc(pd);

        /*
         * Allocate an MPT entry.  This will be filled in with all the
         * necessary parameters to define the memory region.  And then
         * ownership will be passed to the hardware in the final step
         * below.  If we fail here, we must undo the protection domain
         * reference count.
         */
        status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto mrcommon_fail1;
        }

        /*
         * Allocate the software structure for tracking the memory region (i.e.
         * the Hermon Memory Region handle).  If we fail here, we must undo
         * the protection domain reference count and the previous resource
         * allocation.
         */
        status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto mrcommon_fail2;
        }
        mr = (hermon_mrhdl_t)rsrc->hr_addr;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
        bzero(mr, sizeof (*mr));

        /*
         * Setup and validate the memory region access flags.  This means
         * translating the IBTF's enable flags into the access flags that
         * will be used in later operations.
         */
        mr->mr_accflag = 0;
        if (flags & IBT_MR_ENABLE_WINDOW_BIND)
                mr->mr_accflag |= IBT_MR_WINDOW_BIND;
        if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
                mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
        if (flags & IBT_MR_ENABLE_REMOTE_READ)
                mr->mr_accflag |= IBT_MR_REMOTE_READ;
        if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
                mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
        if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
                mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;

        /*
         * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
         * from a certain number of "constrained" bits (the least significant
         * bits) and some number of "unconstrained" bits.  The constrained
         * bits must be set to the index of the entry in the MPT table, but
         * the unconstrained bits can be set to any value we wish.  Note:
         * if no remote access is required, then the RKey value is not filled
         * in.  Otherwise both Rkey and LKey are given the same value.
         */
        if (mpt)
                mr->mr_rkey = mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);

        /*
         * Fill in the MPT entry.  This is the final step before passing
         * ownership of the MPT entry to the Hermon hardware.  We use all of
         * the information collected/calculated above to fill in the
         * requisite portions of the MPT.  Do this ONLY for DMPTs.
         */
        bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));

        mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
        mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
        mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
        mpt_entry.rw      = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
        mpt_entry.rr      = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
        mpt_entry.lw      = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
        mpt_entry.lr      = 1;
        mpt_entry.phys_addr = 1;        /* critical bit for this */
        mpt_entry.reg_win = HERMON_MPT_IS_REGION;

        mpt_entry.entity_sz     = mr->mr_logmttpgsz;
        mpt_entry.mem_key       = mr->mr_lkey;
        mpt_entry.pd            = pd->pd_pdnum;
        mpt_entry.rem_acc_en = 0;
        mpt_entry.fast_reg_en = 0;
        mpt_entry.en_inval = 0;
        mpt_entry.lkey = 0;
        mpt_entry.win_cnt = 0;

        mpt_entry.start_addr = mr_attr->dmr_paddr;
        mpt_entry.reg_win_len = mr_attr->dmr_len;
        if (mr_attr->dmr_len == 0)
                mpt_entry.len_b64 = 1;  /* needed for 2^^64 length */

        mpt_entry.mtt_addr_h = 0;
        mpt_entry.mtt_addr_l = 0;

        /*
         * Write the MPT entry to hardware.  Lastly, we pass ownership of
         * the entry to the hardware if needed.  Note: in general, this
         * operation shouldn't fail.  But if it does, we have to undo
         * everything we've done above before returning error.
         *
         * For Hermon, this routine (which is common to the contexts) will only
         * set the ownership if needed - the process of passing the context
         * itself to HW will take care of setting up the MPT (based on type
         * and index).
         */

        mpt_entry.bnd_qp = 0;   /* dMPT for a qp, check for window */
        status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
            sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
                    status);
                if (status == HERMON_CMD_INVALID_STATUS) {
                        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                }
                status = ibc_get_ci_failure(0);
                goto mrcommon_fail7;
        }

        /*
         * Fill in the rest of the Hermon Memory Region handle.  Having
         * successfully transferred ownership of the MPT, we can update the
         * following fields for use in further operations on the MR.
         */
        mr->mr_mttaddr     = 0;

        mr->mr_log2_pgsz   = 0;
        mr->mr_mptrsrcp    = mpt;
        mr->mr_mttrsrcp    = NULL;
        mr->mr_pdhdl       = pd;
        mr->mr_rsrcp       = rsrc;
        mr->mr_is_umem     = 0;
        mr->mr_is_fmr      = 0;
        mr->mr_umemcookie  = NULL;
        mr->mr_umem_cbfunc = NULL;
        mr->mr_umem_cbarg1 = NULL;
        mr->mr_umem_cbarg2 = NULL;
        mr->mr_lkey        = hermon_mr_key_swap(mr->mr_lkey);
        mr->mr_rkey        = hermon_mr_key_swap(mr->mr_rkey);
        mr->mr_mpt_type    = HERMON_MPT_DMPT;

        *mrhdl = mr;

        return (DDI_SUCCESS);

/*
 * The following is cleanup for all possible failure cases in this routine
 */
mrcommon_fail7:
        hermon_rsrc_free(state, &rsrc);
mrcommon_fail2:
        hermon_rsrc_free(state, &mpt);
mrcommon_fail1:
        hermon_pd_refcnt_dec(pd);
mrcommon_fail:
        return (status);
}

/*
 * hermon_mr_alloc_lkey()
 *    Context: Can be called from base context.
 */
int
hermon_mr_alloc_lkey(hermon_state_t *state, hermon_pdhdl_t pd,
    ibt_lkey_flags_t flags, uint_t nummtt, hermon_mrhdl_t *mrhdl)
{
        hermon_rsrc_t           *mpt, *mtt, *rsrc, *mtt_refcnt;
        hermon_sw_refcnt_t      *swrc_tmp;
        hermon_hw_dmpt_t        mpt_entry;
        hermon_mrhdl_t          mr;
        uint64_t                mtt_addr;
        uint_t                  sleep;
        int                     status;

        /* Increment the reference count on the protection domain (PD) */
        hermon_pd_refcnt_inc(pd);

        sleep = (flags & IBT_KEY_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;

        /*
         * Allocate an MPT entry.  This will be filled in with "some" of the
         * necessary parameters to define the memory region.  And then
         * ownership will be passed to the hardware in the final step
         * below.  If we fail here, we must undo the protection domain
         * reference count.
         *
         * The MTTs will get filled in when the FRWR is processed.
         */
        status = hermon_rsrc_alloc(state, HERMON_DMPT, 1, sleep, &mpt);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto alloclkey_fail1;
        }

        /*
         * Allocate the software structure for tracking the memory region (i.e.
         * the Hermon Memory Region handle).  If we fail here, we must undo
         * the protection domain reference count and the previous resource
         * allocation.
         */
        status = hermon_rsrc_alloc(state, HERMON_MRHDL, 1, sleep, &rsrc);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto alloclkey_fail2;
        }
        mr = (hermon_mrhdl_t)rsrc->hr_addr;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
        bzero(mr, sizeof (*mr));
        mr->mr_bindinfo.bi_type = HERMON_BINDHDL_LKEY;

        mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);

        status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, &mtt);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto alloclkey_fail3;
        }
        mr->mr_logmttpgsz = PAGESHIFT;

        /*
         * Allocate MTT reference count (to track shared memory regions).
         * This reference count resource may never be used on the given
         * memory region, but if it is ever later registered as "shared"
         * memory region then this resource will be necessary.  If we fail
         * here, we do pretty much the same as above to clean up.
         */
        status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1, sleep,
            &mtt_refcnt);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto alloclkey_fail4;
        }
        mr->mr_mttrefcntp = mtt_refcnt;
        swrc_tmp = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
        HERMON_MTT_REFCNT_INIT(swrc_tmp);

        mtt_addr = (mtt->hr_indx << HERMON_MTT_SIZE_SHIFT);

        bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
        mpt_entry.status = HERMON_MPT_FREE;
        mpt_entry.lw = 1;
        mpt_entry.lr = 1;
        mpt_entry.reg_win = HERMON_MPT_IS_REGION;
        mpt_entry.entity_sz = mr->mr_logmttpgsz;
        mpt_entry.mem_key = mr->mr_lkey;
        mpt_entry.pd = pd->pd_pdnum;
        mpt_entry.fast_reg_en = 1;
        mpt_entry.rem_acc_en = 1;
        mpt_entry.en_inval = 1;
        if (flags & IBT_KEY_REMOTE) {
                mpt_entry.ren_inval = 1;
        }
        mpt_entry.mtt_size = nummtt;
        mpt_entry.mtt_addr_h = mtt_addr >> 32;  /* only 8 more bits */
        mpt_entry.mtt_addr_l = mtt_addr >> 3;   /* only 29 bits */

        /*
         * Write the MPT entry to hardware.  Lastly, we pass ownership of
         * the entry to the hardware if needed.  Note: in general, this
         * operation shouldn't fail.  But if it does, we have to undo
         * everything we've done above before returning error.
         *
         * For Hermon, this routine (which is common to the contexts) will only
         * set the ownership if needed - the process of passing the context
         * itself to HW will take care of setting up the MPT (based on type
         * and index).
         */
        status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
            sizeof (hermon_hw_dmpt_t), mpt->hr_indx, sleep);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_CONT, "Hermon: alloc_lkey: SW2HW_MPT command "
                    "failed: %08x\n", status);
                if (status == HERMON_CMD_INVALID_STATUS) {
                        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                }
                status = ibc_get_ci_failure(0);
                goto alloclkey_fail5;
        }

        /*
         * Fill in the rest of the Hermon Memory Region handle.  Having
         * successfully transferred ownership of the MPT, we can update the
         * following fields for use in further operations on the MR.
         */
        mr->mr_accflag = IBT_MR_LOCAL_WRITE;
        mr->mr_mttaddr = mtt_addr;
        mr->mr_log2_pgsz = (mr->mr_logmttpgsz - HERMON_PAGESHIFT);
        mr->mr_mptrsrcp = mpt;
        mr->mr_mttrsrcp = mtt;
        mr->mr_pdhdl = pd;
        mr->mr_rsrcp = rsrc;
        mr->mr_lkey = hermon_mr_key_swap(mr->mr_lkey);
        mr->mr_rkey = mr->mr_lkey;
        mr->mr_mpt_type = HERMON_MPT_DMPT;

        *mrhdl = mr;
        return (DDI_SUCCESS);

alloclkey_fail5:
        hermon_rsrc_free(state, &mtt_refcnt);
alloclkey_fail4:
        hermon_rsrc_free(state, &mtt);
alloclkey_fail3:
        hermon_rsrc_free(state, &rsrc);
alloclkey_fail2:
        hermon_rsrc_free(state, &mpt);
alloclkey_fail1:
        hermon_pd_refcnt_dec(pd);
        return (status);
}

/*
 * hermon_mr_fexch_mpt_init()
 *    Context: Can be called from base context.
 *
 * This is the same as alloc_lkey, but not returning an mrhdl.
 */
int
hermon_mr_fexch_mpt_init(hermon_state_t *state, hermon_pdhdl_t pd,
    uint32_t mpt_indx, uint_t nummtt, uint64_t mtt_addr, uint_t sleep)
{
        hermon_hw_dmpt_t        mpt_entry;
        int                     status;

        /*
         * The MTTs will get filled in when the FRWR is processed.
         */

        bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));
        mpt_entry.status = HERMON_MPT_FREE;
        mpt_entry.lw = 1;
        mpt_entry.lr = 1;
        mpt_entry.rw = 1;
        mpt_entry.rr = 1;
        mpt_entry.reg_win = HERMON_MPT_IS_REGION;
        mpt_entry.entity_sz = PAGESHIFT;
        mpt_entry.mem_key = mpt_indx;
        mpt_entry.pd = pd->pd_pdnum;
        mpt_entry.fast_reg_en = 1;
        mpt_entry.rem_acc_en = 1;
        mpt_entry.en_inval = 1;
        mpt_entry.ren_inval = 1;
        mpt_entry.mtt_size = nummtt;
        mpt_entry.mtt_addr_h = mtt_addr >> 32;  /* only 8 more bits */
        mpt_entry.mtt_addr_l = mtt_addr >> 3;   /* only 29 bits */

        /*
         * Write the MPT entry to hardware.  Lastly, we pass ownership of
         * the entry to the hardware if needed.  Note: in general, this
         * operation shouldn't fail.  But if it does, we have to undo
         * everything we've done above before returning error.
         *
         * For Hermon, this routine (which is common to the contexts) will only
         * set the ownership if needed - the process of passing the context
         * itself to HW will take care of setting up the MPT (based on type
         * and index).
         */
        status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
            sizeof (hermon_hw_dmpt_t), mpt_indx, sleep);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_CONT, "Hermon: fexch_mpt_init: SW2HW_MPT command "
                    "failed: %08x\n", status);
                if (status == HERMON_CMD_INVALID_STATUS) {
                        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                }
                status = ibc_get_ci_failure(0);
                return (status);
        }
        /* Increment the reference count on the protection domain (PD) */
        hermon_pd_refcnt_inc(pd);

        return (DDI_SUCCESS);
}

/*
 * hermon_mr_fexch_mpt_fini()
 *    Context: Can be called from base context.
 *
 * This is the same as deregister_mr, without an mrhdl.
 */
int
hermon_mr_fexch_mpt_fini(hermon_state_t *state, hermon_pdhdl_t pd,
    uint32_t mpt_indx, uint_t sleep)
{
        int                     status;

        status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT,
            NULL, 0, mpt_indx, sleep);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_CONT, "Hermon: fexch_mpt_fini: HW2SW_MPT command "
                    "failed: %08x\n", status);
                if (status == HERMON_CMD_INVALID_STATUS) {
                        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                }
                status = ibc_get_ci_failure(0);
                return (status);
        }

        /* Decrement the reference count on the protection domain (PD) */
        hermon_pd_refcnt_dec(pd);

        return (DDI_SUCCESS);
}

/*
 * hermon_mr_mtt_bind()
 *    Context: Can be called from interrupt or base context.
 */
int
hermon_mr_mtt_bind(hermon_state_t *state, hermon_bind_info_t *bind,
    ddi_dma_handle_t bind_dmahdl, hermon_rsrc_t **mtt, uint_t *mtt_pgsize_bits,
    uint_t is_buffer)
{
        uint64_t                nummtt;
        uint_t                  sleep;
        int                     status;

        /*
         * Check the sleep flag.  Ensure that it is consistent with the
         * current thread context (i.e. if we are currently in the interrupt
         * context, then we shouldn't be attempting to sleep).
         */
        sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ?
            HERMON_NOSLEEP : HERMON_SLEEP;
        if ((sleep == HERMON_SLEEP) &&
            (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
                status = IBT_INVALID_PARAM;
                goto mrmttbind_fail;
        }

        /*
         * Bind the memory and determine the mapped addresses.  This is
         * the first of two routines that do all the "heavy lifting" for
         * the Hermon memory registration routines.  The hermon_mr_mem_bind()
         * routine takes the "bind" struct with all its fields filled
         * in and returns a list of DMA cookies (for the PCI mapped addresses
         * corresponding to the specified address region) which are used by
         * the hermon_mr_fast_mtt_write() routine below.  If we fail here, we
         * must undo all the previous resource allocation (and PD reference
         * count).
         */
        status = hermon_mr_mem_bind(state, bind, bind_dmahdl, sleep, is_buffer);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto mrmttbind_fail;
        }

        /*
         * Determine number of pages spanned.  This routine uses the
         * information in the "bind" struct to determine the required
         * number of MTT entries needed (and returns the suggested page size -
         * as a "power-of-2" - for each MTT entry).
         */
        nummtt = hermon_mr_nummtt_needed(state, bind, mtt_pgsize_bits);

        /*
         * Allocate the MTT entries.  Use the calculations performed above to
         * allocate the required number of MTT entries. If we fail here, we
         * must not only undo all the previous resource allocation (and PD
         * reference count), but we must also unbind the memory.
         */
        status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt, sleep, mtt);
        if (status != DDI_SUCCESS) {
                status = IBT_INSUFF_RESOURCE;
                goto mrmttbind_fail2;
        }

        /*
         * Write the mapped addresses into the MTT entries.  This is part two
         * of the "heavy lifting" routines that we talked about above.  Note:
         * we pass the suggested page size from the earlier operation here.
         * And if we fail here, we again do pretty much the same huge clean up.
         */
        status = hermon_mr_fast_mtt_write(state, *mtt, bind, *mtt_pgsize_bits);
        if (status != DDI_SUCCESS) {
                /*
                 * hermon_mr_fast_mtt_write() returns DDI_FAILURE
                 * only if it detects a HW error during DMA.
                 */
                hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                status = ibc_get_ci_failure(0);
                goto mrmttbind_fail3;
        }
        return (DDI_SUCCESS);

/*
 * The following is cleanup for all possible failure cases in this routine
 */
mrmttbind_fail3:
        hermon_rsrc_free(state, mtt);
mrmttbind_fail2:
        hermon_mr_mem_unbind(state, bind);
mrmttbind_fail:
        return (status);
}


/*
 * hermon_mr_mtt_unbind()
 *    Context: Can be called from interrupt or base context.
 */
int
hermon_mr_mtt_unbind(hermon_state_t *state, hermon_bind_info_t *bind,
    hermon_rsrc_t *mtt)
{
        /*
         * Free up the MTT entries and unbind the memory.  Here, as above, we
         * attempt to free these resources only if it is appropriate to do so.
         */
        hermon_mr_mem_unbind(state, bind);
        hermon_rsrc_free(state, &mtt);

        return (DDI_SUCCESS);
}


/*
 * hermon_mr_common_rereg()
 *    Context: Can be called from interrupt or base context.
 */
static int
hermon_mr_common_rereg(hermon_state_t *state, hermon_mrhdl_t mr,
    hermon_pdhdl_t pd, hermon_bind_info_t *bind, hermon_mrhdl_t *mrhdl_new,
    hermon_mr_options_t *op)
{
        hermon_rsrc_t           *mpt;
        ibt_mr_attr_flags_t     acc_flags_to_use;
        ibt_mr_flags_t          flags;
        hermon_pdhdl_t          pd_to_use;
        hermon_hw_dmpt_t        mpt_entry;
        uint64_t                mtt_addr_to_use, vaddr_to_use, len_to_use;
        uint_t                  sleep, dereg_level;
        int                     status;

        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))

        /*
         * Check here to see if the memory region corresponds to a userland
         * mapping.  Reregistration of userland memory regions is not
         * currently supported.  Return failure.
         */
        if (mr->mr_is_umem) {
                status = IBT_MR_HDL_INVALID;
                goto mrrereg_fail;
        }

        mutex_enter(&mr->mr_lock);

        /* Pull MPT resource pointer from the Hermon Memory Region handle */
        mpt = mr->mr_mptrsrcp;

        /* Extract the flags field from the hermon_bind_info_t */
        flags = bind->bi_flags;

        /*
         * Check the sleep flag.  Ensure that it is consistent with the
         * current thread context (i.e. if we are currently in the interrupt
         * context, then we shouldn't be attempting to sleep).
         */
        sleep = (flags & IBT_MR_NOSLEEP) ? HERMON_NOSLEEP: HERMON_SLEEP;
        if ((sleep == HERMON_SLEEP) &&
            (sleep != HERMON_SLEEPFLAG_FOR_CONTEXT())) {
                mutex_exit(&mr->mr_lock);
                status = IBT_INVALID_PARAM;
                goto mrrereg_fail;
        }

        /*
         * First step is to temporarily invalidate the MPT entry.  This
         * regains ownership from the hardware, and gives us the opportunity
         * to modify the entry.  Note: The HW2SW_MPT command returns the
         * current MPT entry contents.  These are saved away here because
         * they will be reused in a later step below.  If the region has
         * bound memory windows that we fail returning an "in use" error code.
         * Otherwise, this is an unexpected error and we deregister the
         * memory region and return error.
         *
         * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
         * against holding the lock around this rereg call in all contexts.
         */
        status = hermon_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
            sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
        if (status != HERMON_CMD_SUCCESS) {
                mutex_exit(&mr->mr_lock);
                if (status == HERMON_CMD_REG_BOUND) {
                        return (IBT_MR_IN_USE);
                } else {
                        cmn_err(CE_CONT, "Hermon: HW2SW_MPT command failed: "
                            "%08x\n", status);
                        if (status == HERMON_CMD_INVALID_STATUS) {
                                hermon_fm_ereport(state, HCA_SYS_ERR,
                                    HCA_ERR_SRV_LOST);
                        }
                        /*
                         * Call deregister and ensure that all current
                         * resources get freed up
                         */
                        if (hermon_mr_deregister(state, &mr,
                            HERMON_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
                                HERMON_WARNING(state, "failed to deregister "
                                    "memory region");
                        }
                        return (ibc_get_ci_failure(0));
                }
        }

        /*
         * If we're changing the protection domain, then validate the new one
         */
        if (flags & IBT_MR_CHANGE_PD) {

                /* Check for valid PD handle pointer */
                if (pd == NULL) {
                        mutex_exit(&mr->mr_lock);
                        /*
                         * Call deregister and ensure that all current
                         * resources get properly freed up. Unnecessary
                         * here to attempt to regain software ownership
                         * of the MPT entry as that has already been
                         * done above.
                         */
                        if (hermon_mr_deregister(state, &mr,
                            HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
                            DDI_SUCCESS) {
                                HERMON_WARNING(state, "failed to deregister "
                                    "memory region");
                        }
                        status = IBT_PD_HDL_INVALID;
                        goto mrrereg_fail;
                }

                /* Use the new PD handle in all operations below */
                pd_to_use = pd;

        } else {
                /* Use the current PD handle in all operations below */
                pd_to_use = mr->mr_pdhdl;
        }

        /*
         * If we're changing access permissions, then validate the new ones
         */
        if (flags & IBT_MR_CHANGE_ACCESS) {
                /*
                 * Validate the access flags.  Both remote write and remote
                 * atomic require the local write flag to be set
                 */
                if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
                    (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
                    !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
                        mutex_exit(&mr->mr_lock);
                        /*
                         * Call deregister and ensure that all current
                         * resources get properly freed up. Unnecessary
                         * here to attempt to regain software ownership
                         * of the MPT entry as that has already been
                         * done above.
                         */
                        if (hermon_mr_deregister(state, &mr,
                            HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) !=
                            DDI_SUCCESS) {
                                HERMON_WARNING(state, "failed to deregister "
                                    "memory region");
                        }
                        status = IBT_MR_ACCESS_REQ_INVALID;
                        goto mrrereg_fail;
                }

                /*
                 * Setup and validate the memory region access flags.  This
                 * means translating the IBTF's enable flags into the access
                 * flags that will be used in later operations.
                 */
                acc_flags_to_use = 0;
                if (flags & IBT_MR_ENABLE_WINDOW_BIND)
                        acc_flags_to_use |= IBT_MR_WINDOW_BIND;
                if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
                        acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
                if (flags & IBT_MR_ENABLE_REMOTE_READ)
                        acc_flags_to_use |= IBT_MR_REMOTE_READ;
                if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
                        acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
                if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
                        acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;

        } else {
                acc_flags_to_use = mr->mr_accflag;
        }

        /*
         * If we're modifying the translation, then figure out whether
         * we can reuse the current MTT resources.  This means calling
         * hermon_mr_rereg_xlat_helper() which does most of the heavy lifting
         * for the reregistration.  If the current memory region contains
         * sufficient MTT entries for the new regions, then it will be
         * reused and filled in.  Otherwise, new entries will be allocated,
         * the old ones will be freed, and the new entries will be filled
         * in.  Note:  If we're not modifying the translation, then we
         * should already have all the information we need to update the MPT.
         * Also note: If hermon_mr_rereg_xlat_helper() fails, it will return
         * a "dereg_level" which is the level of cleanup that needs to be
         * passed to hermon_mr_deregister() to finish the cleanup.
         */
        if (flags & IBT_MR_CHANGE_TRANSLATION) {
                status = hermon_mr_rereg_xlat_helper(state, mr, bind, op,
                    &mtt_addr_to_use, sleep, &dereg_level);
                if (status != DDI_SUCCESS) {
                        mutex_exit(&mr->mr_lock);
                        /*
                         * Call deregister and ensure that all resources get
                         * properly freed up.
                         */
                        if (hermon_mr_deregister(state, &mr, dereg_level,
                            sleep) != DDI_SUCCESS) {
                                HERMON_WARNING(state, "failed to deregister "
                                    "memory region");
                        }
                        goto mrrereg_fail;
                }
                vaddr_to_use = mr->mr_bindinfo.bi_addr;
                len_to_use   = mr->mr_bindinfo.bi_len;
        } else {
                mtt_addr_to_use = mr->mr_mttaddr;
                vaddr_to_use = mr->mr_bindinfo.bi_addr;
                len_to_use   = mr->mr_bindinfo.bi_len;
        }

        /*
         * Calculate new keys (Lkey, Rkey) from MPT index.  Just like they were
         * when the region was first registered, each key is formed from
         * "constrained" bits and "unconstrained" bits.  Note:  If no remote
         * access is required, then the RKey value is not filled in.  Otherwise
         * both Rkey and LKey are given the same value.
         */
        mr->mr_lkey = hermon_mr_keycalc(mpt->hr_indx);
        if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
            (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
            (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
                mr->mr_rkey = mr->mr_lkey;
        } else
                mr->mr_rkey = 0;

        /*
         * Fill in the MPT entry.  This is the final step before passing
         * ownership of the MPT entry to the Hermon hardware.  We use all of
         * the information collected/calculated above to fill in the
         * requisite portions of the MPT.
         */
        bzero(&mpt_entry, sizeof (hermon_hw_dmpt_t));

        mpt_entry.status  = HERMON_MPT_SW_OWNERSHIP;
        mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND)   ? 1 : 0;
        mpt_entry.atomic  = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
        mpt_entry.rw      = (acc_flags_to_use & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
        mpt_entry.rr      = (acc_flags_to_use & IBT_MR_REMOTE_READ)   ? 1 : 0;
        mpt_entry.lw      = (acc_flags_to_use & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
        mpt_entry.lr      = 1;
        mpt_entry.phys_addr = 0;
        mpt_entry.reg_win = HERMON_MPT_IS_REGION;

        mpt_entry.entity_sz     = mr->mr_logmttpgsz;
        mpt_entry.mem_key       = mr->mr_lkey;
        mpt_entry.pd            = pd_to_use->pd_pdnum;

        mpt_entry.start_addr    = vaddr_to_use;
        mpt_entry.reg_win_len   = len_to_use;
        mpt_entry.mtt_addr_h = mtt_addr_to_use >> 32;
        mpt_entry.mtt_addr_l = mtt_addr_to_use >> 3;

        /*
         * Write the updated MPT entry to hardware
         *
         * We use HERMON_CMD_NOSLEEP_SPIN here always because we must protect
         * against holding the lock around this rereg call in all contexts.
         */
        status = hermon_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
            sizeof (hermon_hw_dmpt_t), mpt->hr_indx, HERMON_CMD_NOSLEEP_SPIN);
        if (status != HERMON_CMD_SUCCESS) {
                mutex_exit(&mr->mr_lock);
                cmn_err(CE_CONT, "Hermon: SW2HW_MPT command failed: %08x\n",
                    status);
                if (status == HERMON_CMD_INVALID_STATUS) {
                        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                }
                /*
                 * Call deregister and ensure that all current resources get
                 * properly freed up. Unnecessary here to attempt to regain
                 * software ownership of the MPT entry as that has already
                 * been done above.
                 */
                if (hermon_mr_deregister(state, &mr,
                    HERMON_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
                        HERMON_WARNING(state, "failed to deregister memory "
                            "region");
                }
                return (ibc_get_ci_failure(0));
        }

        /*
         * If we're changing PD, then update their reference counts now.
         * This means decrementing the reference count on the old PD and
         * incrementing the reference count on the new PD.
         */
        if (flags & IBT_MR_CHANGE_PD) {
                hermon_pd_refcnt_dec(mr->mr_pdhdl);
                hermon_pd_refcnt_inc(pd);
        }

        /*
         * Update the contents of the Hermon Memory Region handle to reflect
         * what has been changed.
         */
        mr->mr_pdhdl      = pd_to_use;
        mr->mr_accflag    = acc_flags_to_use;
        mr->mr_is_umem    = 0;
        mr->mr_is_fmr     = 0;
        mr->mr_umemcookie = NULL;
        mr->mr_lkey       = hermon_mr_key_swap(mr->mr_lkey);
        mr->mr_rkey       = hermon_mr_key_swap(mr->mr_rkey);

        /* New MR handle is same as the old */
        *mrhdl_new = mr;
        mutex_exit(&mr->mr_lock);

        return (DDI_SUCCESS);

mrrereg_fail:
        return (status);
}


/*
 * hermon_mr_rereg_xlat_helper
 *    Context: Can be called from interrupt or base context.
 *    Note: This routine expects the "mr_lock" to be held when it
 *    is called.  Upon returning failure, this routine passes information
 *    about what "dereg_level" should be passed to hermon_mr_deregister().
 */
static int
hermon_mr_rereg_xlat_helper(hermon_state_t *state, hermon_mrhdl_t mr,
    hermon_bind_info_t *bind, hermon_mr_options_t *op, uint64_t *mtt_addr,
    uint_t sleep, uint_t *dereg_level)
{
        hermon_rsrc_t           *mtt, *mtt_refcnt;
        hermon_sw_refcnt_t      *swrc_old, *swrc_new;
        ddi_dma_handle_t        dmahdl;
        uint64_t                nummtt_needed, nummtt_in_currrsrc, max_sz;
        uint_t                  mtt_pgsize_bits, bind_type, reuse_dmahdl;
        int                     status;

        ASSERT(MUTEX_HELD(&mr->mr_lock));

        /*
         * Check the "options" flag.  Currently this flag tells the driver
         * whether or not the region should be bound normally (i.e. with
         * entries written into the PCI IOMMU) or whether it should be
         * registered to bypass the IOMMU.
         */
        if (op == NULL) {
                bind_type = HERMON_BINDMEM_NORMAL;
        } else {
                bind_type = op->mro_bind_type;
        }

        /*
         * Check for invalid length.  Check is the length is zero or if the
         * length is larger than the maximum configured value.  Return error
         * if it is.
         */
        max_sz = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_mrw_sz);
        if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
                /*
                 * Deregister will be called upon returning failure from this
                 * routine. This will ensure that all current resources get
                 * properly freed up. Unnecessary to attempt to regain
                 * software ownership of the MPT entry as that has already
                 * been done above (in hermon_mr_reregister())
                 */
                *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT;

                status = IBT_MR_LEN_INVALID;
                goto mrrereghelp_fail;
        }

        /*
         * Determine the number of pages necessary for new region and the
         * number of pages supported by the current MTT resources
         */
        nummtt_needed = hermon_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
        nummtt_in_currrsrc = mr->mr_mttrsrcp->hr_len >> HERMON_MTT_SIZE_SHIFT;

        /*
         * Depending on whether we have enough pages or not, the next step is
         * to fill in a set of MTT entries that reflect the new mapping.  In
         * the first case below, we already have enough entries.  This means
         * we need to unbind the memory from the previous mapping, bind the
         * memory for the new mapping, write the new MTT entries, and update
         * the mr to reflect the changes.
         * In the second case below, we do not have enough entries in the
         * current mapping.  So, in this case, we need not only to unbind the
         * current mapping, but we need to free up the MTT resources associated
         * with that mapping.  After we've successfully done that, we continue
         * by binding the new memory, allocating new MTT entries, writing the
         * new MTT entries, and updating the mr to reflect the changes.
         */

        /*
         * If this region is being shared (i.e. MTT refcount != 1), then we
         * can't reuse the current MTT resources regardless of their size.
         * Instead we'll need to alloc new ones (below) just as if there
         * hadn't been enough room in the current entries.
         */
        swrc_old = (hermon_sw_refcnt_t *)mr->mr_mttrefcntp->hr_addr;
        if (HERMON_MTT_IS_NOT_SHARED(swrc_old) &&
            (nummtt_needed <= nummtt_in_currrsrc)) {

                /*
                 * Unbind the old mapping for this memory region, but retain
                 * the ddi_dma_handle_t (if possible) for reuse in the bind
                 * operation below.  Note:  If original memory region was
                 * bound for IOMMU bypass and the new region can not use
                 * bypass, then a new DMA handle will be necessary.
                 */
                if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
                        mr->mr_bindinfo.bi_free_dmahdl = 0;
                        hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
                        dmahdl = mr->mr_bindinfo.bi_dmahdl;
                        reuse_dmahdl = 1;
                } else {
                        hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
                        dmahdl = NULL;
                        reuse_dmahdl = 0;
                }

                /*
                 * Bind the new memory and determine the mapped addresses.
                 * As described, this routine and hermon_mr_fast_mtt_write()
                 * do the majority of the work for the memory registration
                 * operations.  Note:  When we successfully finish the binding,
                 * we will set the "bi_free_dmahdl" flag to indicate that
                 * even though we may have reused the ddi_dma_handle_t we do
                 * wish it to be freed up at some later time.  Note also that
                 * if we fail, we may need to cleanup the ddi_dma_handle_t.
                 */
                bind->bi_bypass = bind_type;
                status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
                if (status != DDI_SUCCESS) {
                        if (reuse_dmahdl) {
                                ddi_dma_free_handle(&dmahdl);
                        }

                        /*
                         * Deregister will be called upon returning failure
                         * from this routine. This will ensure that all
                         * current resources get properly freed up.
                         * Unnecessary to attempt to regain software ownership
                         * of the MPT entry as that has already been done
                         * above (in hermon_mr_reregister()).  Also unnecessary
                         * to attempt to unbind the memory.
                         */
                        *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;

                        status = IBT_INSUFF_RESOURCE;
                        goto mrrereghelp_fail;
                }
                if (reuse_dmahdl) {
                        bind->bi_free_dmahdl = 1;
                }

                /*
                 * Using the new mapping, but reusing the current MTT
                 * resources, write the updated entries to MTT
                 */
                mtt    = mr->mr_mttrsrcp;
                status = hermon_mr_fast_mtt_write(state, mtt, bind,
                    mtt_pgsize_bits);
                if (status != DDI_SUCCESS) {
                        /*
                         * Deregister will be called upon returning failure
                         * from this routine. This will ensure that all
                         * current resources get properly freed up.
                         * Unnecessary to attempt to regain software ownership
                         * of the MPT entry as that has already been done
                         * above (in hermon_mr_reregister()).  Also unnecessary
                         * to attempt to unbind the memory.
                         *
                         * But we do need to unbind the newly bound memory
                         * before returning.
                         */
                        hermon_mr_mem_unbind(state, bind);
                        *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;

                        /*
                         * hermon_mr_fast_mtt_write() returns DDI_FAILURE
                         * only if it detects a HW error during DMA.
                         */
                        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
                        status = ibc_get_ci_failure(0);
                        goto mrrereghelp_fail;
                }

                /* Put the updated information into the Mem Region handle */
                mr->mr_bindinfo   = *bind;
                mr->mr_logmttpgsz = mtt_pgsize_bits;

        } else {
                /*
                 * Check if the memory region MTT is shared by any other MRs.
                 * Since the resource may be shared between multiple memory
                 * regions (as a result of a "RegisterSharedMR()" verb) it is
                 * important that we not unbind any resources prematurely.
                 */
                if (!HERMON_MTT_IS_SHARED(swrc_old)) {
                        /*
                         * Unbind the old mapping for this memory region, but
                         * retain the ddi_dma_handle_t for reuse in the bind
                         * operation below. Note: This can only be done here
                         * because the region being reregistered is not
                         * currently shared.  Also if original memory region
                         * was bound for IOMMU bypass and the new region can
                         * not use bypass, then a new DMA handle will be
                         * necessary.
                         */
                        if (HERMON_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
                                mr->mr_bindinfo.bi_free_dmahdl = 0;
                                hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
                                dmahdl = mr->mr_bindinfo.bi_dmahdl;
                                reuse_dmahdl = 1;
                        } else {
                                hermon_mr_mem_unbind(state, &mr->mr_bindinfo);
                                dmahdl = NULL;
                                reuse_dmahdl = 0;
                        }
                } else {
                        dmahdl = NULL;
                        reuse_dmahdl = 0;
                }

                /*
                 * Bind the new memory and determine the mapped addresses.
                 * As described, this routine and hermon_mr_fast_mtt_write()
                 * do the majority of the work for the memory registration
                 * operations.  Note:  When we successfully finish the binding,
                 * we will set the "bi_free_dmahdl" flag to indicate that
                 * even though we may have reused the ddi_dma_handle_t we do
                 * wish it to be freed up at some later time.  Note also that
                 * if we fail, we may need to cleanup the ddi_dma_handle_t.
                 */
                bind->bi_bypass = bind_type;
                status = hermon_mr_mem_bind(state, bind, dmahdl, sleep, 1);
                if (status != DDI_SUCCESS) {
                        if (reuse_dmahdl) {
                                ddi_dma_free_handle(&dmahdl);
                        }

                        /*
                         * Deregister will be called upon returning failure
                         * from this routine. This will ensure that all
                         * current resources get properly freed up.
                         * Unnecessary to attempt to regain software ownership
                         * of the MPT entry as that has already been done
                         * above (in hermon_mr_reregister()).  Also unnecessary
                         * to attempt to unbind the memory.
                         */
                        *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;

                        status = IBT_INSUFF_RESOURCE;
                        goto mrrereghelp_fail;
                }
                if (reuse_dmahdl) {
                        bind->bi_free_dmahdl = 1;
                }

                /*
                 * Allocate the new MTT entries resource
                 */
                status = hermon_rsrc_alloc(state, HERMON_MTT, nummtt_needed,
                    sleep, &mtt);
                if (status != DDI_SUCCESS) {
                        /*
                         * Deregister will be called upon returning failure
                         * from this routine. This will ensure that all
                         * current resources get properly freed up.
                         * Unnecessary to attempt to regain software ownership
                         * of the MPT entry as that has already been done
                         * above (in hermon_mr_reregister()).  Also unnecessary
                         * to attempt to unbind the memory.
                         *
                         * But we do need to unbind the newly bound memory
                         * before returning.
                         */
                        hermon_mr_mem_unbind(state, bind);
                        *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;

                        status = IBT_INSUFF_RESOURCE;
                        goto mrrereghelp_fail;
                }

                /*
                 * Allocate MTT reference count (to track shared memory
                 * regions).  As mentioned elsewhere above, this reference
                 * count resource may never be used on the given memory region,
                 * but if it is ever later registered as a "shared" memory
                 * region then this resource will be necessary.  Note:  This
                 * is only necessary here if the existing memory region is
                 * already being shared (because otherwise we already have
                 * a useable reference count resource).
                 */
                if (HERMON_MTT_IS_SHARED(swrc_old)) {
                        status = hermon_rsrc_alloc(state, HERMON_REFCNT, 1,
                            sleep, &mtt_refcnt);
                        if (status != DDI_SUCCESS) {
                                /*
                                 * Deregister will be called upon returning
                                 * failure from this routine. This will ensure
                                 * that all current resources get properly
                                 * freed up.  Unnecessary to attempt to regain
                                 * software ownership of the MPT entry as that
                                 * has already been done above (in
                                 * hermon_mr_reregister()).  Also unnecessary
                                 * to attempt to unbind the memory.
                                 *
                                 * But we need to unbind the newly bound
                                 * memory and free up the newly allocated MTT
                                 * entries before returning.
                                 */
                                hermon_mr_mem_unbind(state, bind);
                                hermon_rsrc_free(state, &mtt);
                                *dereg_level =
                                    HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;

                                status = IBT_INSUFF_RESOURCE;
                                goto mrrereghelp_fail;
                        }
                        swrc_new = (hermon_sw_refcnt_t *)mtt_refcnt->hr_addr;
                        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
                        HERMON_MTT_REFCNT_INIT(swrc_new);
                } else {
                        mtt_refcnt = mr->mr_mttrefcntp;
                }

                /*
                 * Using the new mapping and the new MTT resources, write the
                 * updated entries to MTT
                 */
                status = hermon_mr_fast_mtt_write(state, mtt, bind,
                    mtt_pgsize_bits);
                if (status != DDI_SUCCESS) {
                        /*
                         * Deregister will be called upon returning failure
                         * from this routine. This will ensure that all
                         * current resources get properly freed up.
                         * Unnecessary to attempt to regain software ownership
                         * of the MPT entry as that has already been done
                         * above (in hermon_mr_reregister()).  Also unnecessary
                         * to attempt to unbind the memory.
                         *
                         * But we need to unbind the newly bound memory,
                         * free up the newly allocated MTT entries, and
                         * (possibly) free the new MTT reference count
                         * resource before returning.
                         */
                        if (HERMON_MTT_IS_SHARED(swrc_old)) {
                                hermon_rsrc_free(state, &mtt_refcnt);
                        }
                        hermon_mr_mem_unbind(state, bind);
                        hermon_rsrc_free(state, &mtt);
                        *dereg_level = HERMON_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;

                        status = IBT_INSUFF_RESOURCE;
                        goto mrrereghelp_fail;
                }

                /*
                 * Check if the memory region MTT is shared by any other MRs.
                 * Since the resource may be shared between multiple memory
                 * regions (as a result of a "RegisterSharedMR()" verb) it is
                 * important that we not free up any resources prematurely.
                 */
                if (HERMON_MTT_IS_SHARED(swrc_old)) {
                        /* Decrement MTT reference count for "old" region */
                        (void) hermon_mtt_refcnt_dec(mr->mr_mttrefcntp);
                } else {
                        /* Free up the old MTT entries resource */
                        hermon_rsrc_free(state, &mr->mr_mttrsrcp);
                }

                /* Put the updated information into the mrhdl */
                mr->mr_bindinfo   = *bind;
                mr->mr_logmttpgsz = mtt_pgsize_bits;
                mr->mr_mttrsrcp   = mtt;
                mr->mr_mttrefcntp = mtt_refcnt;
        }

        /*
         * Calculate and return the updated MTT address (in the DDR address
         * space).  This will be used by the caller (hermon_mr_reregister) in
         * the updated MPT entry
         */
        *mtt_addr = mtt->hr_indx << HERMON_MTT_SIZE_SHIFT;

        return (DDI_SUCCESS);

mrrereghelp_fail:
        return (status);
}


/*
 * hermon_mr_nummtt_needed()
 *    Context: Can be called from interrupt or base context.
 */
/* ARGSUSED */
static uint64_t
hermon_mr_nummtt_needed(hermon_state_t *state, hermon_bind_info_t *bind,
    uint_t *mtt_pgsize_bits)
{
        uint64_t        pg_offset_mask;
        uint64_t        pg_offset, tmp_length;

        /*
         * For now we specify the page size as 8Kb (the default page size for
         * the sun4u architecture), or 4Kb for x86.  Figure out optimal page
         * size by examining the dmacookies
         */
        *mtt_pgsize_bits = PAGESHIFT;

        pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
        pg_offset = bind->bi_addr & pg_offset_mask;
        tmp_length = pg_offset + (bind->bi_len - 1);
        return ((tmp_length >> *mtt_pgsize_bits) + 1);
}


/*
 * hermon_mr_mem_bind()
 *    Context: Can be called from interrupt or base context.
 */
static int
hermon_mr_mem_bind(hermon_state_t *state, hermon_bind_info_t *bind,
    ddi_dma_handle_t dmahdl, uint_t sleep, uint_t is_buffer)
{
        ddi_dma_attr_t  dma_attr;
        int             (*callback)(caddr_t);
        int             status;

        /* bi_type must be set to a meaningful value to get a bind handle */
        ASSERT(bind->bi_type == HERMON_BINDHDL_VADDR ||
            bind->bi_type == HERMON_BINDHDL_BUF ||
            bind->bi_type == HERMON_BINDHDL_UBUF);

        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))

        /* Set the callback flag appropriately */
        callback = (sleep == HERMON_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;

        /*
         * Initialize many of the default DMA attributes.  Then, if we're
         * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
         */
        if (dmahdl == NULL) {
                hermon_dma_attr_init(state, &dma_attr);
#ifdef  __sparc
                if (bind->bi_bypass == HERMON_BINDMEM_BYPASS) {
                        dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
                }
#endif

                /* set RO if needed - tunable set and 'is_buffer' is non-0 */
                if (is_buffer) {
                        if (! (bind->bi_flags & IBT_MR_DISABLE_RO)) {
                                if ((bind->bi_type != HERMON_BINDHDL_UBUF) &&
                                    (hermon_kernel_data_ro ==
                                    HERMON_RO_ENABLED)) {
                                        dma_attr.dma_attr_flags |=
                                            DDI_DMA_RELAXED_ORDERING;
                                }
                                if (((bind->bi_type == HERMON_BINDHDL_UBUF) &&
                                    (hermon_user_data_ro ==
                                    HERMON_RO_ENABLED))) {
                                        dma_attr.dma_attr_flags |=
                                            DDI_DMA_RELAXED_ORDERING;
                                }
                        }
                }

                /* Allocate a DMA handle for the binding */
                status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
                    callback, NULL, &bind->bi_dmahdl);
                if (status != DDI_SUCCESS) {
                        return (status);
                }
                bind->bi_free_dmahdl = 1;

        } else  {
                bind->bi_dmahdl = dmahdl;
                bind->bi_free_dmahdl = 0;
        }


        /*
         * Bind the memory to get the PCI mapped addresses.  The decision
         * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
         * is determined by the "bi_type" flag.  Note: if the bind operation
         * fails then we have to free up the DMA handle and return error.
         */
        if (bind->bi_type == HERMON_BINDHDL_VADDR) {
                status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
                    (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
                    (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback, NULL,
                    &bind->bi_dmacookie, &bind->bi_cookiecnt);

        } else {  /* HERMON_BINDHDL_BUF or HERMON_BINDHDL_UBUF */

                status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
                    bind->bi_buf, (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), callback,
                    NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
        }
        if (status != DDI_DMA_MAPPED) {
                if (bind->bi_free_dmahdl != 0) {
                        ddi_dma_free_handle(&bind->bi_dmahdl);
                }
                return (status);
        }

        return (DDI_SUCCESS);
}


/*
 * hermon_mr_mem_unbind()
 *    Context: Can be called from interrupt or base context.
 */
static void
hermon_mr_mem_unbind(hermon_state_t *state, hermon_bind_info_t *bind)
{
        int     status;

        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
        /* there is nothing to unbind for alloc_lkey */
        if (bind->bi_type == HERMON_BINDHDL_LKEY)
                return;

        /*
         * In case of HERMON_BINDHDL_UBUF, the memory bi_buf points to
         * is actually allocated by ddi_umem_iosetup() internally, then
         * it's required to free it here. Reset bi_type to HERMON_BINDHDL_NONE
         * not to free it again later.
         */
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
        if (bind->bi_type == HERMON_BINDHDL_UBUF) {
                freerbuf(bind->bi_buf);
                bind->bi_type = HERMON_BINDHDL_NONE;
        }
        _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))

        /*
         * Unbind the DMA memory for the region
         *
         * Note: The only way ddi_dma_unbind_handle() currently
         * can return an error is if the handle passed in is invalid.
         * Since this should never happen, we choose to return void
         * from this function!  If this does return an error, however,
         * then we print a warning message to the console.
         */
        status = ddi_dma_unbind_handle(bind->bi_dmahdl);
        if (status != DDI_SUCCESS) {
                HERMON_WARNING(state, "failed to unbind DMA mapping");
                return;
        }

        /* Free up the DMA handle */
        if (bind->bi_free_dmahdl != 0) {
                ddi_dma_free_handle(&bind->bi_dmahdl);
        }
}


/*
 * hermon_mr_fast_mtt_write()
 *    Context: Can be called from interrupt or base context.
 */
static int
hermon_mr_fast_mtt_write(hermon_state_t *state, hermon_rsrc_t *mtt,
    hermon_bind_info_t *bind, uint32_t mtt_pgsize_bits)
{
        hermon_icm_table_t      *icm_table;
        hermon_dma_info_t       *dma_info;
        uint32_t                index1, index2, rindx;
        ddi_dma_cookie_t        dmacookie;
        uint_t                  cookie_cnt;
        uint64_t                *mtt_table;
        uint64_t                mtt_entry;
        uint64_t                addr, endaddr;
        uint64_t                pagesize;
        offset_t                i, start;
        uint_t                  per_span;
        int                     sync_needed;

        /*
         * XXX According to the PRM, we are to use the WRITE_MTT
         * command to write out MTTs. Tavor does not do this,
         * instead taking advantage of direct access to the MTTs,
         * and knowledge that Mellanox FMR relies on our ability
         * to write directly to the MTTs without any further
         * notification to the firmware. Likewise, we will choose
         * to not use the WRITE_MTT command, but to simply write
         * out the MTTs.
         */

        /* Calculate page size from the suggested value passed in */
        pagesize = ((uint64_t)1 << mtt_pgsize_bits);

        /* Walk the "cookie list" and fill in the MTT table entries */
        dmacookie  = bind->bi_dmacookie;
        cookie_cnt = bind->bi_cookiecnt;

        icm_table = &state->hs_icm[HERMON_MTT];
        rindx = mtt->hr_indx;
        hermon_index(index1, index2, rindx, icm_table, i);
        start = i;

        per_span   = icm_table->span;
        dma_info   = icm_table->icm_dma[index1] + index2;
        mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;

        sync_needed = 0;
        while (cookie_cnt-- > 0) {
                addr    = dmacookie.dmac_laddress;
                endaddr = addr + (dmacookie.dmac_size - 1);
                addr    = addr & ~((uint64_t)pagesize - 1);

                while (addr <= endaddr) {

                        /*
                         * Fill in the mapped addresses (calculated above) and
                         * set HERMON_MTT_ENTRY_PRESENT flag for each MTT entry.
                         */
                        mtt_entry = addr | HERMON_MTT_ENTRY_PRESENT;
                        mtt_table[i] = htonll(mtt_entry);
                        i++;
                        rindx++;

                        if (i == per_span) {

                                (void) ddi_dma_sync(dma_info->dma_hdl,
                                    start * sizeof (hermon_hw_mtt_t),
                                    (i - start) * sizeof (hermon_hw_mtt_t),
                                    DDI_DMA_SYNC_FORDEV);

                                if ((addr + pagesize > endaddr) &&
                                    (cookie_cnt == 0))
                                        return (DDI_SUCCESS);

                                hermon_index(index1, index2, rindx, icm_table,
                                    i);
                                start = i * sizeof (hermon_hw_mtt_t);
                                dma_info = icm_table->icm_dma[index1] + index2;
                                mtt_table =
                                    (uint64_t *)(uintptr_t)dma_info->vaddr;

                                sync_needed = 0;
                        } else {
                                sync_needed = 1;
                        }

                        addr += pagesize;
                        if (addr == 0) {
                                static int do_once = 1;
                                _NOTE(SCHEME_PROTECTS_DATA("safe sharing",
                                    do_once))
                                if (do_once) {
                                        do_once = 0;
                                        cmn_err(CE_NOTE, "probable error in "
                                            "dma_cookie address from caller\n");
                                }
                                break;
                        }
                }

                /*
                 * When we've reached the end of the current DMA cookie,
                 * jump to the next cookie (if there are more)
                 */
                if (cookie_cnt != 0) {
                        ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
                }
        }

        /* done all the cookies, now sync the memory for the device */
        if (sync_needed)
                (void) ddi_dma_sync(dma_info->dma_hdl,
                    start * sizeof (hermon_hw_mtt_t),
                    (i - start) * sizeof (hermon_hw_mtt_t),
                    DDI_DMA_SYNC_FORDEV);

        return (DDI_SUCCESS);
}

/*
 * hermon_mr_fast_mtt_write_fmr()
 *    Context: Can be called from interrupt or base context.
 */
/* ARGSUSED */
static int
hermon_mr_fast_mtt_write_fmr(hermon_state_t *state, hermon_rsrc_t *mtt,
    ibt_pmr_attr_t *mem_pattr, uint32_t mtt_pgsize_bits)
{
        hermon_icm_table_t      *icm_table;
        hermon_dma_info_t       *dma_info;
        uint32_t                index1, index2, rindx;
        uint64_t                *mtt_table;
        offset_t                i, j;
        uint_t                  per_span;

        icm_table = &state->hs_icm[HERMON_MTT];
        rindx = mtt->hr_indx;
        hermon_index(index1, index2, rindx, icm_table, i);
        per_span   = icm_table->span;
        dma_info   = icm_table->icm_dma[index1] + index2;
        mtt_table  = (uint64_t *)(uintptr_t)dma_info->vaddr;

        /*
         * Fill in the MTT table entries
         */
        for (j = 0; j < mem_pattr->pmr_num_buf; j++) {
                mtt_table[i] = mem_pattr->pmr_addr_list[j].p_laddr;
                i++;
                rindx++;
                if (i == per_span) {
                        hermon_index(index1, index2, rindx, icm_table, i);
                        dma_info = icm_table->icm_dma[index1] + index2;
                        mtt_table = (uint64_t *)(uintptr_t)dma_info->vaddr;
                }
        }

        return (DDI_SUCCESS);
}


/*
 * hermon_mtt_refcnt_inc()
 *    Context: Can be called from interrupt or base context.
 */
static uint_t
hermon_mtt_refcnt_inc(hermon_rsrc_t *rsrc)
{
        hermon_sw_refcnt_t *rc;

        rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
        return (atomic_inc_uint_nv(&rc->swrc_refcnt));
}


/*
 * hermon_mtt_refcnt_dec()
 *    Context: Can be called from interrupt or base context.
 */
static uint_t
hermon_mtt_refcnt_dec(hermon_rsrc_t *rsrc)
{
        hermon_sw_refcnt_t *rc;

        rc = (hermon_sw_refcnt_t *)rsrc->hr_addr;
        return (atomic_dec_uint_nv(&rc->swrc_refcnt));
}