root/usr/src/uts/common/io/ib/adapters/hermon/hermon.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
 */

/*
 * hermon.c
 *    Hermon (InfiniBand) HCA Driver attach/detach Routines
 *
 *    Implements all the routines necessary for the attach, setup,
 *    initialization (and subsequent possible teardown and detach) of the
 *    Hermon InfiniBand HCA driver.
 */

#include <sys/types.h>
#include <sys/file.h>
#include <sys/open.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/modctl.h>
#include <sys/stat.h>
#include <sys/pci.h>
#include <sys/pci_cap.h>
#include <sys/bitmap.h>
#include <sys/policy.h>

#include <sys/ib/adapters/hermon/hermon.h>

/* /etc/system can tune this down, if that is desirable. */
int hermon_msix_max = HERMON_MSIX_MAX;

/* The following works around a problem in pre-2_7_000 firmware. */
#define HERMON_FW_WORKAROUND

int hermon_verbose = 0;

/* Hermon HCA State Pointer */
void *hermon_statep;

int debug_vpd = 0;

/* Disable the internal error-check polling thread */
int hermon_no_inter_err_chk = 0;

/*
 * The Hermon "userland resource database" is common to instances of the
 * Hermon HCA driver.  This structure "hermon_userland_rsrc_db" contains all
 * the necessary information to maintain it.
 */
hermon_umap_db_t hermon_userland_rsrc_db;

static int hermon_attach(dev_info_t *, ddi_attach_cmd_t);
static int hermon_detach(dev_info_t *, ddi_detach_cmd_t);
static int hermon_open(dev_t *, int, int, cred_t *);
static int hermon_close(dev_t, int, int, cred_t *);
static int hermon_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);

static int hermon_drv_init(hermon_state_t *state, dev_info_t *dip,
    int instance);
static void hermon_drv_fini(hermon_state_t *state);
static void hermon_drv_fini2(hermon_state_t *state);
static int hermon_isr_init(hermon_state_t *state);
static void hermon_isr_fini(hermon_state_t *state);

static int hermon_hw_init(hermon_state_t *state);

static void hermon_hw_fini(hermon_state_t *state,
    hermon_drv_cleanup_level_t cleanup);
static int hermon_soft_state_init(hermon_state_t *state);
static void hermon_soft_state_fini(hermon_state_t *state);
static int hermon_icm_config_setup(hermon_state_t *state,
    hermon_hw_initqueryhca_t *inithca);
static void hermon_icm_tables_init(hermon_state_t *state);
static void hermon_icm_tables_fini(hermon_state_t *state);
static int hermon_icm_dma_init(hermon_state_t *state);
static void hermon_icm_dma_fini(hermon_state_t *state);
static void hermon_inithca_set(hermon_state_t *state,
    hermon_hw_initqueryhca_t *inithca);
static int hermon_hca_port_init(hermon_state_t *state);
static int hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init);
static int hermon_internal_uarpg_init(hermon_state_t *state);
static void hermon_internal_uarpg_fini(hermon_state_t *state);
static int hermon_special_qp_contexts_reserve(hermon_state_t *state);
static void hermon_special_qp_contexts_unreserve(hermon_state_t *state);
static int hermon_sw_reset(hermon_state_t *state);
static int hermon_mcg_init(hermon_state_t *state);
static void hermon_mcg_fini(hermon_state_t *state);
static int hermon_fw_version_check(hermon_state_t *state);
static void hermon_device_info_report(hermon_state_t *state);
static int hermon_pci_capability_list(hermon_state_t *state,
    ddi_acc_handle_t hdl);
static void hermon_pci_capability_vpd(hermon_state_t *state,
    ddi_acc_handle_t hdl, uint_t offset);
static int hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset,
    uint32_t addr, uint32_t *data);
static int hermon_intr_or_msi_init(hermon_state_t *state);
static int hermon_add_intrs(hermon_state_t *state, int intr_type);
static int hermon_intr_or_msi_fini(hermon_state_t *state);
void hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
    uint_t offset);

static uint64_t hermon_size_icm(hermon_state_t *state);

/* X86 fastreboot support */
static ushort_t get_msix_ctrl(dev_info_t *);
static size_t get_msix_tbl_size(dev_info_t *);
static size_t get_msix_pba_size(dev_info_t *);
static void hermon_set_msix_info(hermon_state_t *);
static int hermon_intr_disable(hermon_state_t *);
static int hermon_quiesce(dev_info_t *);


/* Character/Block Operations */
static struct cb_ops hermon_cb_ops = {
        hermon_open,            /* open */
        hermon_close,           /* close */
        nodev,                  /* strategy (block) */
        nodev,                  /* print (block) */
        nodev,                  /* dump (block) */
        nodev,                  /* read */
        nodev,                  /* write */
        hermon_ioctl,           /* ioctl */
        hermon_devmap,          /* devmap */
        NULL,                   /* mmap */
        nodev,                  /* segmap */
        nochpoll,               /* chpoll */
        ddi_prop_op,            /* prop_op */
        NULL,                   /* streams */
        D_NEW | D_MP |
        D_64BIT | D_HOTPLUG |
        D_DEVMAP,               /* flags */
        CB_REV                  /* rev */
};

/* Driver Operations */
static struct dev_ops hermon_ops = {
        DEVO_REV,               /* struct rev */
        0,                      /* refcnt */
        hermon_getinfo,         /* getinfo */
        nulldev,                /* identify */
        nulldev,                /* probe */
        hermon_attach,          /* attach */
        hermon_detach,          /* detach */
        nodev,                  /* reset */
        &hermon_cb_ops,         /* cb_ops */
        NULL,                   /* bus_ops */
        nodev,                  /* power */
        hermon_quiesce,         /* devo_quiesce */
};

/* Module Driver Info */
static struct modldrv hermon_modldrv = {
        &mod_driverops,
        "ConnectX IB Driver",
        &hermon_ops
};

/* Module Linkage */
static struct modlinkage hermon_modlinkage = {
        MODREV_1,
        &hermon_modldrv,
        NULL
};

/*
 * This extern refers to the ibc_operations_t function vector that is defined
 * in the hermon_ci.c file.
 */
extern ibc_operations_t hermon_ibc_ops;

/*
 * _init()
 */
int
_init()
{
        int     status;

        status = ddi_soft_state_init(&hermon_statep, sizeof (hermon_state_t),
            (size_t)HERMON_INITIAL_STATES);
        if (status != 0) {
                return (status);
        }

        status = ibc_init(&hermon_modlinkage);
        if (status != 0) {
                ddi_soft_state_fini(&hermon_statep);
                return (status);
        }

        status = mod_install(&hermon_modlinkage);
        if (status != 0) {
                ibc_fini(&hermon_modlinkage);
                ddi_soft_state_fini(&hermon_statep);
                return (status);
        }

        /* Initialize the Hermon "userland resources database" */
        hermon_umap_db_init();

        return (status);
}


/*
 * _info()
 */
int
_info(struct modinfo *modinfop)
{
        int     status;

        status = mod_info(&hermon_modlinkage, modinfop);
        return (status);
}


/*
 * _fini()
 */
int
_fini()
{
        int     status;

        status = mod_remove(&hermon_modlinkage);
        if (status != 0) {
                return (status);
        }

        /* Destroy the Hermon "userland resources database" */
        hermon_umap_db_fini();

        ibc_fini(&hermon_modlinkage);
        ddi_soft_state_fini(&hermon_statep);

        return (status);
}


/*
 * hermon_getinfo()
 */
/* ARGSUSED */
static int
hermon_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
{
        dev_t           dev;
        hermon_state_t  *state;
        minor_t         instance;

        switch (cmd) {
        case DDI_INFO_DEVT2DEVINFO:
                dev = (dev_t)arg;
                instance = HERMON_DEV_INSTANCE(dev);
                state = ddi_get_soft_state(hermon_statep, instance);
                if (state == NULL) {
                        return (DDI_FAILURE);
                }
                *result = (void *)state->hs_dip;
                return (DDI_SUCCESS);

        case DDI_INFO_DEVT2INSTANCE:
                dev = (dev_t)arg;
                instance = HERMON_DEV_INSTANCE(dev);
                *result = (void *)(uintptr_t)instance;
                return (DDI_SUCCESS);

        default:
                break;
        }

        return (DDI_FAILURE);
}


/*
 * hermon_open()
 */
/* ARGSUSED */
static int
hermon_open(dev_t *devp, int flag, int otyp, cred_t *credp)
{
        hermon_state_t          *state;
        hermon_rsrc_t           *rsrcp;
        hermon_umap_db_entry_t  *umapdb, *umapdb2;
        minor_t                 instance;
        uint64_t                key, value;
        uint_t                  hr_indx;
        dev_t                   dev;
        int                     status;

        instance = HERMON_DEV_INSTANCE(*devp);
        state = ddi_get_soft_state(hermon_statep, instance);
        if (state == NULL) {
                return (ENXIO);
        }

        /*
         * Only allow driver to be opened for character access, and verify
         * whether exclusive access is allowed.
         */
        if ((otyp != OTYP_CHR) || ((flag & FEXCL) &&
            secpolicy_excl_open(credp) != 0)) {
                return (EINVAL);
        }

        /*
         * Search for the current process PID in the "userland resources
         * database".  If it is not found, then attempt to allocate a UAR
         * page and add the ("key", "value") pair to the database.
         * Note:  As a last step we always return a devp appropriate for
         * the open.  Either we return a new minor number (based on the
         * instance and the UAR page index) or we return the current minor
         * number for the given client process.
         *
         * We also add an entry to the database to allow for lookup from
         * "dev_t" to the current process PID.  This is necessary because,
         * under certain circumstance, the process PID that calls the Hermon
         * close() entry point may not be the same as the one who called
         * open().  Specifically, this can happen if a child process calls
         * the Hermon's open() entry point, gets a UAR page, maps it out (using
         * mmap()), and then exits without calling munmap().  Because mmap()
         * adds a reference to the file descriptor, at the exit of the child
         * process the file descriptor is "inherited" by the parent (and will
         * be close()'d by the parent's PID only when it exits).
         *
         * Note: We use the hermon_umap_db_find_nolock() and
         * hermon_umap_db_add_nolock() database access routines below (with
         * an explicit mutex_enter of the database lock - "hdl_umapdb_lock")
         * to ensure that the multiple accesses (in this case searching for,
         * and then adding _two_ database entries) can be done atomically.
         */
        key = ddi_get_pid();
        mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
        status = hermon_umap_db_find_nolock(instance, key,
            MLNX_UMAP_UARPG_RSRC, &value, 0, NULL);
        if (status != DDI_SUCCESS) {
                /*
                 * If we are in 'maintenance mode', we cannot alloc a UAR page.
                 * But we still need some rsrcp value, and a mostly unique
                 * hr_indx value.  So we set rsrcp to NULL for maintenance
                 * mode, and use a rolling count for hr_indx.  The field
                 * 'hs_open_hr_indx' is used only in this maintenance mode
                 * condition.
                 *
                 * Otherwise, if we are in operational mode then we allocate
                 * the UAR page as normal, and use the rsrcp value and tr_indx
                 * value from that allocation.
                 */
                if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
                        rsrcp = NULL;
                        hr_indx = state->hs_open_ar_indx++;
                } else {
                        /* Allocate a new UAR page for this process */
                        status = hermon_rsrc_alloc(state, HERMON_UARPG, 1,
                            HERMON_NOSLEEP, &rsrcp);
                        if (status != DDI_SUCCESS) {
                                mutex_exit(
                                    &hermon_userland_rsrc_db.hdl_umapdb_lock);
                                return (EAGAIN);
                        }

                        hr_indx = rsrcp->hr_indx;
                }

                /*
                 * Allocate an entry to track the UAR page resource in the
                 * "userland resources database".
                 */
                umapdb = hermon_umap_db_alloc(instance, key,
                    MLNX_UMAP_UARPG_RSRC, (uint64_t)(uintptr_t)rsrcp);
                if (umapdb == NULL) {
                        mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
                        /* If in "maintenance mode", don't free the rsrc */
                        if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
                                hermon_rsrc_free(state, &rsrcp);
                        }
                        return (EAGAIN);
                }

                /*
                 * Create a new device number.  Minor number is a function of
                 * the UAR page index (15 bits) and the device instance number
                 * (3 bits).
                 */
                dev = makedevice(getmajor(*devp), (hr_indx <<
                    HERMON_MINORNUM_SHIFT) | instance);

                /*
                 * Allocate another entry in the "userland resources database"
                 * to track the association of the device number (above) to
                 * the current process ID (in "key").
                 */
                umapdb2 = hermon_umap_db_alloc(instance, dev,
                    MLNX_UMAP_PID_RSRC, (uint64_t)key);
                if (umapdb2 == NULL) {
                        mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
                        hermon_umap_db_free(umapdb);
                        /* If in "maintenance mode", don't free the rsrc */
                        if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
                                hermon_rsrc_free(state, &rsrcp);
                        }
                        return (EAGAIN);
                }

                /* Add the entries to the database */
                hermon_umap_db_add_nolock(umapdb);
                hermon_umap_db_add_nolock(umapdb2);

        } else {
                /*
                 * Return the same device number as on the original open()
                 * call.  This was calculated as a function of the UAR page
                 * index (top 16 bits) and the device instance number
                 */
                rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
                dev = makedevice(getmajor(*devp), (rsrcp->hr_indx <<
                    HERMON_MINORNUM_SHIFT) | instance);
        }
        mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);

        *devp = dev;

        return (0);
}


/*
 * hermon_close()
 */
/* ARGSUSED */
static int
hermon_close(dev_t dev, int flag, int otyp, cred_t *credp)
{
        hermon_state_t          *state;
        hermon_rsrc_t           *rsrcp;
        hermon_umap_db_entry_t  *umapdb;
        hermon_umap_db_priv_t   *priv;
        minor_t                 instance;
        uint64_t                key, value;
        int                     status, reset_status = 0;

        instance = HERMON_DEV_INSTANCE(dev);
        state = ddi_get_soft_state(hermon_statep, instance);
        if (state == NULL) {
                return (ENXIO);
        }

        /*
         * Search for "dev_t" in the "userland resources database".  As
         * explained above in hermon_open(), we can't depend on using the
         * current process ID here to do the lookup because the process
         * that ultimately closes may not be the same one who opened
         * (because of inheritance).
         * So we lookup the "dev_t" (which points to the PID of the process
         * that opened), and we remove the entry from the database (and free
         * it up).  Then we do another query based on the PID value.  And when
         * we find that database entry, we free it up too and then free the
         * Hermon UAR page resource.
         *
         * Note: We use the hermon_umap_db_find_nolock() database access
         * routine below (with an explicit mutex_enter of the database lock)
         * to ensure that the multiple accesses (which attempt to remove the
         * two database entries) can be done atomically.
         *
         * This works the same in both maintenance mode and HCA mode, except
         * for the call to hermon_rsrc_free().  In the case of maintenance mode,
         * this call is not needed, as it was not allocated in hermon_open()
         * above.
         */
        key = dev;
        mutex_enter(&hermon_userland_rsrc_db.hdl_umapdb_lock);
        status = hermon_umap_db_find_nolock(instance, key, MLNX_UMAP_PID_RSRC,
            &value, HERMON_UMAP_DB_REMOVE, &umapdb);
        if (status == DDI_SUCCESS) {
                /*
                 * If the "hdb_priv" field is non-NULL, it indicates that
                 * some "on close" handling is still necessary.  Call
                 * hermon_umap_db_handle_onclose_cb() to do the handling (i.e.
                 * to invoke all the registered callbacks).  Then free up
                 * the resources associated with "hdb_priv" and continue
                 * closing.
                 */
                priv = (hermon_umap_db_priv_t *)umapdb->hdbe_common.hdb_priv;
                if (priv != NULL) {
                        reset_status = hermon_umap_db_handle_onclose_cb(priv);
                        kmem_free(priv, sizeof (hermon_umap_db_priv_t));
                        umapdb->hdbe_common.hdb_priv = (void *)NULL;
                }

                hermon_umap_db_free(umapdb);

                /*
                 * Now do another lookup using PID as the key (copy it from
                 * "value").  When this lookup is complete, the "value" field
                 * will contain the hermon_rsrc_t pointer for the UAR page
                 * resource.
                 */
                key = value;
                status = hermon_umap_db_find_nolock(instance, key,
                    MLNX_UMAP_UARPG_RSRC, &value, HERMON_UMAP_DB_REMOVE,
                    &umapdb);
                if (status == DDI_SUCCESS) {
                        hermon_umap_db_free(umapdb);
                        /* If in "maintenance mode", don't free the rsrc */
                        if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
                                rsrcp = (hermon_rsrc_t *)(uintptr_t)value;
                                hermon_rsrc_free(state, &rsrcp);
                        }
                }
        }
        mutex_exit(&hermon_userland_rsrc_db.hdl_umapdb_lock);
        return (reset_status);
}


/*
 * hermon_attach()
 *    Context: Only called from attach() path context
 */
static int
hermon_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
        hermon_state_t  *state;
        ibc_clnt_hdl_t  tmp_ibtfpriv;
        ibc_status_t    ibc_status;
        int             instance;
        int             status;

#ifdef __lock_lint
        (void) hermon_quiesce(dip);
#endif

        switch (cmd) {
        case DDI_ATTACH:
                instance = ddi_get_instance(dip);
                status = ddi_soft_state_zalloc(hermon_statep, instance);
                if (status != DDI_SUCCESS) {
                        cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
                            "attach_ssz_fail", instance);
                        goto fail_attach_nomsg;

                }
                state = ddi_get_soft_state(hermon_statep, instance);
                if (state == NULL) {
                        ddi_soft_state_free(hermon_statep, instance);
                        cmn_err(CE_NOTE, "hermon%d: driver failed to attach: "
                            "attach_gss_fail", instance);
                        goto fail_attach_nomsg;
                }

                /* clear the attach error buffer */
                HERMON_ATTACH_MSG_INIT(state->hs_attach_buf);

                /* Save away devinfo and instance before hermon_fm_init() */
                state->hs_dip = dip;
                state->hs_instance = instance;

                hermon_fm_init(state);

                /*
                 * Initialize Hermon driver and hardware.
                 *
                 * Note: If this initialization fails we may still wish to
                 * create a device node and remain operational so that Hermon
                 * firmware can be updated/flashed (i.e. "maintenance mode").
                 * If this is the case, then "hs_operational_mode" will be
                 * equal to HERMON_MAINTENANCE_MODE.  We will not attempt to
                 * attach to the IBTF or register with the IBMF (i.e. no
                 * InfiniBand interfaces will be enabled).
                 */
                status = hermon_drv_init(state, dip, instance);
                if ((status != DDI_SUCCESS) &&
                    (HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
                        goto fail_attach;
                }

                /*
                 * Change the Hermon FM mode
                 */
                if ((hermon_get_state(state) & HCA_PIO_FM) &&
                    HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
                        /*
                         * Now we wait for 50ms to give an opportunity
                         * to Solaris FMA so that HW errors can be notified.
                         * Then check if there are HW errors or not. If
                         * a HW error is detected, the Hermon attachment
                         * must be failed.
                         */
                        delay(drv_usectohz(50000));
                        if (hermon_init_failure(state)) {
                                hermon_drv_fini(state);
                                HERMON_WARNING(state, "unable to "
                                    "attach Hermon due to a HW error");
                                HERMON_ATTACH_MSG(state->hs_attach_buf,
                                    "hermon_attach_failure");
                                goto fail_attach;
                        }

                        /*
                         * There seems no HW errors during the attachment,
                         * so let's change the Hermon FM state to the
                         * ereport only mode.
                         */
                        if (hermon_fm_ereport_init(state) != DDI_SUCCESS) {
                                /* unwind the resources */
                                hermon_drv_fini(state);
                                HERMON_ATTACH_MSG(state->hs_attach_buf,
                                    "hermon_attach_failure");
                                goto fail_attach;
                        }
                }

                /* Create the minor node for device */
                status = ddi_create_minor_node(dip, "devctl", S_IFCHR, instance,
                    DDI_PSEUDO, 0);
                if (status != DDI_SUCCESS) {
                        hermon_drv_fini(state);
                        HERMON_ATTACH_MSG(state->hs_attach_buf,
                            "attach_create_mn_fail");
                        goto fail_attach;
                }

                /*
                 * If we are in "maintenance mode", then we don't want to
                 * register with the IBTF.  All InfiniBand interfaces are
                 * uninitialized, and the device is only capable of handling
                 * requests to update/flash firmware (or test/debug requests).
                 */
                if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
                        cmn_err(CE_NOTE, "!Hermon is operational\n");

                        /* Attach to InfiniBand Transport Framework (IBTF) */
                        ibc_status = ibc_attach(&tmp_ibtfpriv,
                            &state->hs_ibtfinfo);
                        if (ibc_status != IBC_SUCCESS) {
                                cmn_err(CE_CONT, "hermon_attach: ibc_attach "
                                    "failed\n");
                                ddi_remove_minor_node(dip, "devctl");
                                hermon_drv_fini(state);
                                HERMON_ATTACH_MSG(state->hs_attach_buf,
                                    "attach_ibcattach_fail");
                                goto fail_attach;
                        }

                        /*
                         * Now that we've successfully attached to the IBTF,
                         * we enable all appropriate asynch and CQ events to
                         * be forwarded to the IBTF.
                         */
                        HERMON_ENABLE_IBTF_CALLB(state, tmp_ibtfpriv);

                        ibc_post_attach(state->hs_ibtfpriv);

                        /* Register agents with IB Mgmt Framework (IBMF) */
                        status = hermon_agent_handlers_init(state);
                        if (status != DDI_SUCCESS) {
                                (void) ibc_pre_detach(tmp_ibtfpriv, DDI_DETACH);
                                HERMON_QUIESCE_IBTF_CALLB(state);
                                if (state->hs_in_evcallb != 0) {
                                        HERMON_WARNING(state, "unable to "
                                            "quiesce Hermon IBTF callbacks");
                                }
                                ibc_detach(tmp_ibtfpriv);
                                ddi_remove_minor_node(dip, "devctl");
                                hermon_drv_fini(state);
                                HERMON_ATTACH_MSG(state->hs_attach_buf,
                                    "attach_agentinit_fail");
                                goto fail_attach;
                        }
                }

                /* Report attach in maintenance mode, if appropriate */
                if (!(HERMON_IS_OPERATIONAL(state->hs_operational_mode))) {
                        cmn_err(CE_NOTE, "hermon%d: driver attached "
                            "(for maintenance mode only)", state->hs_instance);
                        hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_DEGRADED);
                }

                /* Report that driver was loaded */
                ddi_report_dev(dip);

                /* Send device information to log file */
                hermon_device_info_report(state);

                /* DEBUG PRINT */
                cmn_err(CE_CONT, "!Hermon attach complete\n");
                return (DDI_SUCCESS);

        case DDI_RESUME:
                /* Add code here for DDI_RESUME XXX */
                return (DDI_FAILURE);

        default:
                cmn_err(CE_WARN, "hermon_attach: unknown cmd (0x%x)\n", cmd);
                break;
        }

fail_attach:
        cmn_err(CE_NOTE, "hermon%d: driver failed to attach: %s", instance,
            state->hs_attach_buf);
        if (hermon_get_state(state) & HCA_EREPORT_FM) {
                hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_SRV_LOST);
        }
        hermon_drv_fini2(state);
        hermon_fm_fini(state);
        ddi_soft_state_free(hermon_statep, instance);

fail_attach_nomsg:
        return (DDI_FAILURE);
}


/*
 * hermon_detach()
 *    Context: Only called from detach() path context
 */
static int
hermon_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
        hermon_state_t  *state;
        ibc_clnt_hdl_t  tmp_ibtfpriv;
        ibc_status_t    ibc_status;
        int             instance, status;

        instance = ddi_get_instance(dip);
        state = ddi_get_soft_state(hermon_statep, instance);
        if (state == NULL) {
                return (DDI_FAILURE);
        }

        switch (cmd) {
        case DDI_DETACH:
                /*
                 * If we are in "maintenance mode", then we do not want to
                 * do teardown for any of the InfiniBand interfaces.
                 * Specifically, this means not detaching from IBTF (we never
                 * attached to begin with) and not deregistering from IBMF.
                 */
                if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
                        /* Unregister agents from IB Mgmt Framework (IBMF) */
                        status = hermon_agent_handlers_fini(state);
                        if (status != DDI_SUCCESS) {
                                return (DDI_FAILURE);
                        }

                        /*
                         * Attempt the "pre-detach" from InfiniBand Transport
                         * Framework (IBTF).  At this point the IBTF is still
                         * capable of handling incoming asynch and completion
                         * events.  This "pre-detach" is primarily a mechanism
                         * to notify the appropriate IBTF clients that the
                         * HCA is being removed/offlined.
                         */
                        ibc_status = ibc_pre_detach(state->hs_ibtfpriv, cmd);
                        if (ibc_status != IBC_SUCCESS) {
                                status = hermon_agent_handlers_init(state);
                                if (status != DDI_SUCCESS) {
                                        HERMON_WARNING(state, "failed to "
                                            "restart Hermon agents");
                                }
                                return (DDI_FAILURE);
                        }

                        /*
                         * Before we can fully detach from the IBTF we need to
                         * ensure that we have handled all outstanding event
                         * callbacks.  This is accomplished by quiescing the
                         * event callback mechanism.  Note: if we are unable
                         * to successfully quiesce the callbacks, then this is
                         * an indication that something has probably gone
                         * seriously wrong.  We print out a warning, but
                         * continue.
                         */
                        tmp_ibtfpriv = state->hs_ibtfpriv;
                        HERMON_QUIESCE_IBTF_CALLB(state);
                        if (state->hs_in_evcallb != 0) {
                                HERMON_WARNING(state, "unable to quiesce "
                                    "Hermon IBTF callbacks");
                        }

                        /* Complete the detach from the IBTF */
                        ibc_detach(tmp_ibtfpriv);
                }

                /* Remove the minor node for device */
                ddi_remove_minor_node(dip, "devctl");

                /*
                 * Only call hermon_drv_fini() if we are in Hermon HCA mode.
                 * (Because if we are in "maintenance mode", then we never
                 * successfully finished init.)  Only report successful
                 * detach for normal HCA mode.
                 */
                if (HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
                        /* Cleanup driver resources and shutdown hardware */
                        hermon_drv_fini(state);
                        cmn_err(CE_CONT, "!Hermon driver successfully "
                            "detached\n");
                }

                hermon_drv_fini2(state);
                hermon_fm_fini(state);
                ddi_soft_state_free(hermon_statep, instance);

                return (DDI_SUCCESS);

        case DDI_SUSPEND:
                /* Add code here for DDI_SUSPEND XXX */
                return (DDI_FAILURE);

        default:
                cmn_err(CE_WARN, "hermon_detach: unknown cmd (0x%x)\n", cmd);
                break;
        }

        return (DDI_FAILURE);
}

/*
 * hermon_dma_attr_init()
 *    Context: Can be called from interrupt or base context.
 */

/* ARGSUSED */
void
hermon_dma_attr_init(hermon_state_t *state, ddi_dma_attr_t *dma_attr)
{
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_attr))

        dma_attr->dma_attr_version      = DMA_ATTR_V0;
        dma_attr->dma_attr_addr_lo      = 0;
        dma_attr->dma_attr_addr_hi      = 0xFFFFFFFFFFFFFFFFull;
        dma_attr->dma_attr_count_max    = 0xFFFFFFFFFFFFFFFFull;
        dma_attr->dma_attr_align        = HERMON_PAGESIZE;  /* default 4K */
        dma_attr->dma_attr_burstsizes   = 0x3FF;
        dma_attr->dma_attr_minxfer      = 1;
        dma_attr->dma_attr_maxxfer      = 0xFFFFFFFFFFFFFFFFull;
        dma_attr->dma_attr_seg          = 0xFFFFFFFFFFFFFFFFull;
        dma_attr->dma_attr_sgllen       = 0x7FFFFFFF;
        dma_attr->dma_attr_granular     = 1;
        dma_attr->dma_attr_flags        = 0;
}

/*
 * hermon_dma_alloc()
 *    Context: Can be called from base context.
 */
int
hermon_dma_alloc(hermon_state_t *state, hermon_dma_info_t *dma_info,
    uint16_t opcode)
{
        ddi_dma_handle_t        dma_hdl;
        ddi_dma_attr_t          dma_attr;
        ddi_acc_handle_t        acc_hdl;
        ddi_dma_cookie_t        cookie;
        uint64_t                kaddr;
        uint64_t                real_len;
        uint_t                  ccount;
        int                     status;

        hermon_dma_attr_init(state, &dma_attr);
#ifdef  __sparc
        if (state->hs_cfg_profile->cp_iommu_bypass == HERMON_BINDMEM_BYPASS)
                dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
#endif

        /* Allocate a DMA handle */
        status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, DDI_DMA_SLEEP,
            NULL, &dma_hdl);
        if (status != DDI_SUCCESS) {
                IBTF_DPRINTF_L2("DMA", "alloc handle failed: %d", status);
                cmn_err(CE_CONT, "DMA alloc handle failed(status %d)", status);
                return (DDI_FAILURE);
        }

        /* Allocate DMA memory */
        status = ddi_dma_mem_alloc(dma_hdl, dma_info->length,
            &state->hs_reg_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
            (caddr_t *)&kaddr, (size_t *)&real_len, &acc_hdl);
        if (status != DDI_SUCCESS) {
                ddi_dma_free_handle(&dma_hdl);
                IBTF_DPRINTF_L2("DMA", "memory alloc failed: %d", status);
                cmn_err(CE_CONT, "DMA memory alloc failed(status %d)", status);
                return (DDI_FAILURE);
        }
        bzero((caddr_t)(uintptr_t)kaddr, real_len);

        /* Bind the memory to the handle */
        status = ddi_dma_addr_bind_handle(dma_hdl, NULL,
            (caddr_t)(uintptr_t)kaddr, (size_t)real_len, DDI_DMA_RDWR |
            DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &cookie, &ccount);
        if (status != DDI_SUCCESS) {
                ddi_dma_mem_free(&acc_hdl);
                ddi_dma_free_handle(&dma_hdl);
                IBTF_DPRINTF_L2("DMA", "bind handle failed: %d", status);
                cmn_err(CE_CONT, "DMA bind handle failed(status %d)", status);
                return (DDI_FAILURE);
        }

        /* Package the hermon_dma_info contents and return */
        dma_info->vaddr   = kaddr;
        dma_info->dma_hdl = dma_hdl;
        dma_info->acc_hdl = acc_hdl;

        /* Pass the mapping information to the firmware */
        status = hermon_map_cmd_post(state, dma_info, opcode, cookie, ccount);
        if (status != DDI_SUCCESS) {
                char *s;
                hermon_dma_free(dma_info);
                switch (opcode) {
                case MAP_ICM:
                        s = "MAP_ICM";
                        break;
                case MAP_FA:
                        s = "MAP_FA";
                        break;
                case MAP_ICM_AUX:
                        s = "MAP_ICM_AUX";
                        break;
                default:
                        s = "UNKNOWN";
                }
                cmn_err(CE_NOTE, "Map cmd '%s' failed, status %08x\n",
                    s, status);
                return (DDI_FAILURE);
        }

        return (DDI_SUCCESS);
}

/*
 * hermon_dma_free()
 *    Context: Can be called from base context.
 */
void
hermon_dma_free(hermon_dma_info_t *info)
{
        /* Unbind the handles and free the memory */
        (void) ddi_dma_unbind_handle(info->dma_hdl);
        ddi_dma_mem_free(&info->acc_hdl);
        ddi_dma_free_handle(&info->dma_hdl);
}

/* These macros are valid for use only in hermon_icm_alloc/hermon_icm_free. */
#define HERMON_ICM_ALLOC(rsrc) \
        hermon_icm_alloc(state, rsrc, index1, index2)
#define HERMON_ICM_FREE(rsrc) \
        hermon_icm_free(state, rsrc, index1, index2)

/*
 * hermon_icm_alloc()
 *    Context: Can be called from base context.
 *
 * Only one thread can be here for a given hermon_rsrc_type_t "type".
 *
 * "num_to_hdl" is set if there is a need for lookups from resource
 * number/index to resource handle.  This is needed for QPs/CQs/SRQs
 * for the various affiliated events/errors.
 */
int
hermon_icm_alloc(hermon_state_t *state, hermon_rsrc_type_t type,
    uint32_t index1, uint32_t index2)
{
        hermon_icm_table_t      *icm;
        hermon_dma_info_t       *dma_info;
        uint8_t                 *bitmap;
        int                     status;
        int                     num_to_hdl = 0;

        if (hermon_verbose) {
                IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: rsrc_type (0x%x) "
                    "index1/2 (0x%x/0x%x)", type, index1, index2);
        }

        icm = &state->hs_icm[type];

        switch (type) {
        case HERMON_QPC:
                status = HERMON_ICM_ALLOC(HERMON_CMPT_QPC);
                if (status != DDI_SUCCESS) {
                        return (status);
                }
                status = HERMON_ICM_ALLOC(HERMON_RDB);
                if (status != DDI_SUCCESS) {    /* undo icm_alloc's */
                        HERMON_ICM_FREE(HERMON_CMPT_QPC);
                        return (status);
                }
                status = HERMON_ICM_ALLOC(HERMON_ALTC);
                if (status != DDI_SUCCESS) {    /* undo icm_alloc's */
                        HERMON_ICM_FREE(HERMON_RDB);
                        HERMON_ICM_FREE(HERMON_CMPT_QPC);
                        return (status);
                }
                status = HERMON_ICM_ALLOC(HERMON_AUXC);
                if (status != DDI_SUCCESS) {    /* undo icm_alloc's */
                        HERMON_ICM_FREE(HERMON_ALTC);
                        HERMON_ICM_FREE(HERMON_RDB);
                        HERMON_ICM_FREE(HERMON_CMPT_QPC);
                        return (status);
                }
                num_to_hdl = 1;
                break;
        case HERMON_SRQC:
                status = HERMON_ICM_ALLOC(HERMON_CMPT_SRQC);
                if (status != DDI_SUCCESS) {
                        return (status);
                }
                num_to_hdl = 1;
                break;
        case HERMON_CQC:
                status = HERMON_ICM_ALLOC(HERMON_CMPT_CQC);
                if (status != DDI_SUCCESS) {
                        return (status);
                }
                num_to_hdl = 1;
                break;
        case HERMON_EQC:
                status = HERMON_ICM_ALLOC(HERMON_CMPT_EQC);
                if (status != DDI_SUCCESS) {    /* undo icm_alloc's */
                        return (status);
                }
                break;
        }

        /* ensure existence of bitmap and dmainfo, sets "dma_info" */
        hermon_bitmap(bitmap, dma_info, icm, index1, num_to_hdl);

        /* Set up the DMA handle for allocation and mapping */
        dma_info += index2;
        _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dma_info))
        dma_info->length  = icm->span << icm->log_object_size;
        dma_info->icmaddr = icm->icm_baseaddr +
            (((index1 << icm->split_shift) +
            (index2 << icm->span_shift)) << icm->log_object_size);

        /* Allocate memory for the num_to_qp/cq/srq pointers */
        if (num_to_hdl)
                icm->num_to_hdl[index1][index2] =
                    kmem_zalloc(HERMON_ICM_SPAN * sizeof (void *), KM_SLEEP);

        if (hermon_verbose) {
                IBTF_DPRINTF_L2("hermon", "alloc DMA: "
                    "rsrc (0x%x) index (%x, %x) "
                    "icm_addr/len (%llx/%x) bitmap %p", type, index1, index2,
                    (longlong_t)dma_info->icmaddr, dma_info->length, bitmap);
        }

        /* Allocate and map memory for this span */
        status = hermon_dma_alloc(state, dma_info, MAP_ICM);
        if (status != DDI_SUCCESS) {
                IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: DMA "
                    "allocation failed, status 0x%x", status);
                switch (type) {
                case HERMON_QPC:
                        HERMON_ICM_FREE(HERMON_AUXC);
                        HERMON_ICM_FREE(HERMON_ALTC);
                        HERMON_ICM_FREE(HERMON_RDB);
                        HERMON_ICM_FREE(HERMON_CMPT_QPC);
                        break;
                case HERMON_SRQC:
                        HERMON_ICM_FREE(HERMON_CMPT_SRQC);
                        break;
                case HERMON_CQC:
                        HERMON_ICM_FREE(HERMON_CMPT_CQC);
                        break;
                case HERMON_EQC:
                        HERMON_ICM_FREE(HERMON_CMPT_EQC);
                        break;
                }

                return (DDI_FAILURE);
        }
        if (hermon_verbose) {
                IBTF_DPRINTF_L2("hermon", "hermon_icm_alloc: mapping ICM: "
                    "rsrc_type (0x%x) index (0x%x, 0x%x) alloc length (0x%x) "
                    "icm_addr (0x%lx)", type, index1, index2, dma_info->length,
                    dma_info->icmaddr);
        }

        /* Set the bit for this slot in the table bitmap */
        HERMON_BMAP_BIT_SET(icm->icm_bitmap[index1], index2);

        return (DDI_SUCCESS);
}

/*
 * hermon_icm_free()
 *    Context: Can be called from base context.
 *
 * ICM resources have been successfully returned from hermon_icm_alloc().
 * Associated dma_info is no longer in use.  Free the ICM backing memory.
 */
void
hermon_icm_free(hermon_state_t *state, hermon_rsrc_type_t type,
    uint32_t index1, uint32_t index2)
{
        hermon_icm_table_t      *icm;
        hermon_dma_info_t       *dma_info;
        int                     status;

        icm = &state->hs_icm[type];
        ASSERT(icm->icm_dma[index1][index2].icm_refcnt == 0);

        if (hermon_verbose) {
                IBTF_DPRINTF_L2("hermon", "hermon_icm_free: rsrc_type (0x%x) "
                    "index (0x%x, 0x%x)", type, index1, index2);
        }

        dma_info = icm->icm_dma[index1] + index2;

        /* The following only happens if attach() is failing. */
        if (dma_info == NULL)
                return;

        /* Unmap the ICM allocation, then free the backing DMA memory */
        status = hermon_unmap_icm_cmd_post(state, dma_info);
        if (status != DDI_SUCCESS) {
                HERMON_WARNING(state, "UNMAP_ICM failure");
        }
        hermon_dma_free(dma_info);

        /* Clear the bit in the ICM table bitmap */
        HERMON_BMAP_BIT_CLR(icm->icm_bitmap[index1], index2);

        switch (type) {
        case HERMON_QPC:
                HERMON_ICM_FREE(HERMON_AUXC);
                HERMON_ICM_FREE(HERMON_ALTC);
                HERMON_ICM_FREE(HERMON_RDB);
                HERMON_ICM_FREE(HERMON_CMPT_QPC);
                break;
        case HERMON_SRQC:
                HERMON_ICM_FREE(HERMON_CMPT_SRQC);
                break;
        case HERMON_CQC:
                HERMON_ICM_FREE(HERMON_CMPT_CQC);
                break;
        case HERMON_EQC:
                HERMON_ICM_FREE(HERMON_CMPT_EQC);
                break;

        }
}


/*
 * hermon_icm_num_to_hdl()
 *    Context: Can be called from base or interrupt context.
 *
 * Given an index of a resource, index through the sparsely allocated
 * arrays to find the pointer to its software handle.  Return NULL if
 * any of the arrays of pointers has been freed (should never happen).
 */
void *
hermon_icm_num_to_hdl(hermon_state_t *state, hermon_rsrc_type_t type,
    uint32_t idx)
{
        hermon_icm_table_t      *icm;
        uint32_t                span_offset;
        uint32_t                index1, index2;
        void                    ***p1, **p2;

        icm = &state->hs_icm[type];
        hermon_index(index1, index2, idx, icm, span_offset);
        p1 = icm->num_to_hdl[index1];
        if (p1 == NULL) {
                IBTF_DPRINTF_L2("hermon", "icm_num_to_hdl failed at level 1"
                    ": rsrc_type %d, index 0x%x", type, idx);
                return (NULL);
        }
        p2 = p1[index2];
        if (p2 == NULL) {
                IBTF_DPRINTF_L2("hermon", "icm_num_to_hdl failed at level 2"
                    ": rsrc_type %d, index 0x%x", type, idx);
                return (NULL);
        }
        return (p2[span_offset]);
}

/*
 * hermon_icm_set_num_to_hdl()
 *    Context: Can be called from base or interrupt context.
 *
 * Given an index of a resource, we index through the sparsely allocated
 * arrays to store the software handle, used by hermon_icm_num_to_hdl().
 * This function is used to both set and reset (set to NULL) the handle.
 * This table is allocated during ICM allocation for the given resource,
 * so its existence is a given, and the store location does not conflict
 * with any other stores to the table (no locking needed).
 */
void
hermon_icm_set_num_to_hdl(hermon_state_t *state, hermon_rsrc_type_t type,
    uint32_t idx, void *hdl)
{
        hermon_icm_table_t      *icm;
        uint32_t                span_offset;
        uint32_t                index1, index2;

        icm = &state->hs_icm[type];
        hermon_index(index1, index2, idx, icm, span_offset);
        ASSERT((hdl == NULL) ^
            (icm->num_to_hdl[index1][index2][span_offset] == NULL));
        icm->num_to_hdl[index1][index2][span_offset] = hdl;
}

/*
 * hermon_device_mode()
 *    Context: Can be called from base or interrupt context.
 *
 * Return HERMON_HCA_MODE for operational mode
 * Return HERMON_MAINTENANCE_MODE for maintenance mode
 * Return 0 otherwise
 *
 * A non-zero return for either operational or maintenance mode simplifies
 * one of the 2 uses of this function.
 */
int
hermon_device_mode(hermon_state_t *state)
{
        if (state->hs_vendor_id != PCI_VENID_MLX)
                return (0);

        switch (state->hs_device_id) {
        case PCI_DEVID_HERMON_SDR:
        case PCI_DEVID_HERMON_DDR:
        case PCI_DEVID_HERMON_DDRG2:
        case PCI_DEVID_HERMON_QDRG2:
        case PCI_DEVID_HERMON_QDRG2V:
                return (HERMON_HCA_MODE);
        case PCI_DEVID_HERMON_MAINT:
                return (HERMON_MAINTENANCE_MODE);
        default:
                return (0);
        }
}

/*
 * hermon_drv_init()
 *    Context: Only called from attach() path context
 */
/* ARGSUSED */
static int
hermon_drv_init(hermon_state_t *state, dev_info_t *dip, int instance)
{
        int     status;

        /* Retrieve PCI device, vendor and rev IDs */
        state->hs_vendor_id      = HERMON_GET_VENDOR_ID(state->hs_dip);
        state->hs_device_id      = HERMON_GET_DEVICE_ID(state->hs_dip);
        state->hs_revision_id    = HERMON_GET_REVISION_ID(state->hs_dip);

        /*
         * Check and set the operational mode of the device. If the driver is
         * bound to the Hermon device in "maintenance mode", then this generally
         * means that either the device has been specifically jumpered to
         * start in this mode or the firmware boot process has failed to
         * successfully load either the primary or the secondary firmware
         * image.
         */
        state->hs_operational_mode = hermon_device_mode(state);
        switch (state->hs_operational_mode) {
        case HERMON_HCA_MODE:
                state->hs_cfg_profile_setting = HERMON_CFG_MEMFREE;
                break;
        case HERMON_MAINTENANCE_MODE:
                HERMON_FMANOTE(state, HERMON_FMA_MAINT);
                state->hs_fm_degraded_reason = HCA_FW_MISC; /* not fw reason */
                return (DDI_FAILURE);
        default:
                HERMON_FMANOTE(state, HERMON_FMA_PCIID);
                HERMON_WARNING(state, "unexpected device type detected");
                return (DDI_FAILURE);
        }

        /*
         * Initialize the Hermon hardware.
         *
         * Note:  If this routine returns an error, it is often a reasonably
         * good indication that something Hermon firmware-related has caused
         * the failure or some HW related errors have caused the failure.
         * (also there are few possibilities that SW (e.g. SW resource
         * shortage) can cause the failure, but the majority case is due to
         * either a firmware related error or a HW related one) In order to
         * give the user an opportunity (if desired) to update or reflash
         * the Hermon firmware image, we set "hs_operational_mode" flag
         * (described above) to indicate that we wish to enter maintenance
         * mode in case of the firmware-related issue.
         */
        status = hermon_hw_init(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "hermon%d: error during attach: %s", instance,
                    state->hs_attach_buf);
                return (DDI_FAILURE);
        }

        /*
         * Now that the ISR has been setup, arm all the EQs for event
         * generation.
         */

        status = hermon_eq_arm_all(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "EQ Arm All failed\n");
                hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
                return (DDI_FAILURE);
        }

        /* test interrupts and event queues */
        status = hermon_nop_post(state, 0x0, 0x0);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "Interrupts/EQs failed\n");
                hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
                return (DDI_FAILURE);
        }

        /* Initialize Hermon softstate */
        status = hermon_soft_state_init(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "Failed to init soft state\n");
                hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
                return (DDI_FAILURE);
        }

        return (DDI_SUCCESS);
}


/*
 * hermon_drv_fini()
 *    Context: Only called from attach() and/or detach() path contexts
 */
static void
hermon_drv_fini(hermon_state_t *state)
{
        /* Cleanup Hermon softstate */
        hermon_soft_state_fini(state);

        /* Cleanup Hermon resources and shutdown hardware */
        hermon_hw_fini(state, HERMON_DRV_CLEANUP_ALL);
}


/*
 * hermon_drv_fini2()
 *    Context: Only called from attach() and/or detach() path contexts
 */
static void
hermon_drv_fini2(hermon_state_t *state)
{
        if (state->hs_fm_poll_thread) {
                ddi_periodic_delete(state->hs_fm_poll_thread);
                state->hs_fm_poll_thread = NULL;
        }

        /* HERMON_DRV_CLEANUP_LEVEL1 */
        if (state->hs_fm_cmdhdl) {
                hermon_regs_map_free(state, &state->hs_fm_cmdhdl);
                state->hs_fm_cmdhdl = NULL;
        }

        if (state->hs_reg_cmdhdl) {
                ddi_regs_map_free(&state->hs_reg_cmdhdl);
                state->hs_reg_cmdhdl = NULL;
        }

        /* HERMON_DRV_CLEANUP_LEVEL0 */
        if (state->hs_msix_tbl_entries) {
                kmem_free(state->hs_msix_tbl_entries,
                    state->hs_msix_tbl_size);
                state->hs_msix_tbl_entries = NULL;
        }

        if (state->hs_msix_pba_entries) {
                kmem_free(state->hs_msix_pba_entries,
                    state->hs_msix_pba_size);
                state->hs_msix_pba_entries = NULL;
        }

        if (state->hs_fm_msix_tblhdl) {
                hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
                state->hs_fm_msix_tblhdl = NULL;
        }

        if (state->hs_reg_msix_tblhdl) {
                ddi_regs_map_free(&state->hs_reg_msix_tblhdl);
                state->hs_reg_msix_tblhdl = NULL;
        }

        if (state->hs_fm_msix_pbahdl) {
                hermon_regs_map_free(state, &state->hs_fm_msix_pbahdl);
                state->hs_fm_msix_pbahdl = NULL;
        }

        if (state->hs_reg_msix_pbahdl) {
                ddi_regs_map_free(&state->hs_reg_msix_pbahdl);
                state->hs_reg_msix_pbahdl = NULL;
        }

        if (state->hs_fm_pcihdl) {
                hermon_pci_config_teardown(state, &state->hs_fm_pcihdl);
                state->hs_fm_pcihdl = NULL;
        }

        if (state->hs_reg_pcihdl) {
                pci_config_teardown(&state->hs_reg_pcihdl);
                state->hs_reg_pcihdl = NULL;
        }
}


/*
 * hermon_isr_init()
 *    Context: Only called from attach() path context
 */
static int
hermon_isr_init(hermon_state_t *state)
{
        int     status;
        int     intr;

        for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {

                /*
                 * Add a handler for the interrupt or MSI
                 */
                status = ddi_intr_add_handler(state->hs_intrmsi_hdl[intr],
                    hermon_isr, (caddr_t)state, (void *)(uintptr_t)intr);
                if (status  != DDI_SUCCESS) {
                        return (DDI_FAILURE);
                }

                /*
                 * Enable the software interrupt.  Note: depending on the value
                 * returned in the capability flag, we have to call either
                 * ddi_intr_block_enable() or ddi_intr_enable().
                 */
                if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
                        status = ddi_intr_block_enable(
                            &state->hs_intrmsi_hdl[intr], 1);
                        if (status != DDI_SUCCESS) {
                                return (DDI_FAILURE);
                        }
                } else {
                        status = ddi_intr_enable(state->hs_intrmsi_hdl[intr]);
                        if (status != DDI_SUCCESS) {
                                return (DDI_FAILURE);
                        }
                }
        }

        /*
         * Now that the ISR has been enabled, defer arm_all  EQs for event
         * generation until later, in case MSIX is enabled
         */
        return (DDI_SUCCESS);
}


/*
 * hermon_isr_fini()
 *    Context: Only called from attach() and/or detach() path contexts
 */
static void
hermon_isr_fini(hermon_state_t *state)
{
        int     intr;

        for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {
                /* Disable the software interrupt */
                if (state->hs_intrmsi_cap & DDI_INTR_FLAG_BLOCK) {
                        (void) ddi_intr_block_disable(
                            &state->hs_intrmsi_hdl[intr], 1);
                } else {
                        (void) ddi_intr_disable(state->hs_intrmsi_hdl[intr]);
                }

                /*
                 * Remove the software handler for the interrupt or MSI
                 */
                (void) ddi_intr_remove_handler(state->hs_intrmsi_hdl[intr]);
        }
}


/*
 * Sum of ICM configured values:
 *     cMPT, dMPT, MTT, QPC, SRQC, RDB, CQC, ALTC, AUXC, EQC, MCG
 *
 */
static uint64_t
hermon_size_icm(hermon_state_t *state)
{
        hermon_hw_querydevlim_t *devlim;
        hermon_cfg_profile_t    *cfg;
        uint64_t                num_cmpts, num_dmpts, num_mtts;
        uint64_t                num_qpcs, num_srqc, num_rdbs;
#ifndef HERMON_FW_WORKAROUND
        uint64_t                num_auxc;
#endif
        uint64_t                num_cqcs, num_altc;
        uint64_t                num_eqcs, num_mcgs;
        uint64_t                size;

        devlim = &state->hs_devlim;
        cfg = state->hs_cfg_profile;
        /* number of respective entries */
        num_cmpts = (uint64_t)0x1 << cfg->cp_log_num_cmpt;
        num_mtts = (uint64_t)0x1 << cfg->cp_log_num_mtt;
        num_dmpts = (uint64_t)0x1 << cfg->cp_log_num_dmpt;
        num_qpcs = (uint64_t)0x1 << cfg->cp_log_num_qp;
        num_srqc = (uint64_t)0x1 << cfg->cp_log_num_srq;
        num_rdbs = (uint64_t)0x1 << cfg->cp_log_num_rdb;
        num_cqcs = (uint64_t)0x1 << cfg->cp_log_num_cq;
        num_altc = (uint64_t)0x1 << cfg->cp_log_num_qp;
#ifndef HERMON_FW_WORKAROUND
        num_auxc = (uint64_t)0x1 << cfg->cp_log_num_qp;
#endif
        num_eqcs = (uint64_t)0x1 << cfg->cp_log_num_eq;
        num_mcgs = (uint64_t)0x1 << cfg->cp_log_num_mcg;

        size =
            num_cmpts   * devlim->cmpt_entry_sz +
            num_dmpts   * devlim->dmpt_entry_sz +
            num_mtts    * devlim->mtt_entry_sz +
            num_qpcs    * devlim->qpc_entry_sz +
            num_srqc    * devlim->srq_entry_sz +
            num_rdbs    * devlim->rdmardc_entry_sz +
            num_cqcs    * devlim->cqc_entry_sz +
            num_altc    * devlim->altc_entry_sz +
#ifdef HERMON_FW_WORKAROUND
            0x80000000ull +
#else
            num_auxc    * devlim->aux_entry_sz  +
#endif
            num_eqcs    * devlim->eqc_entry_sz +
            num_mcgs    * HERMON_MCGMEM_SZ(state);
        return (size);
}


/*
 * hermon_hw_init()
 *    Context: Only called from attach() path context
 */
static int
hermon_hw_init(hermon_state_t *state)
{
        hermon_drv_cleanup_level_t      cleanup;
        sm_nodeinfo_t                   nodeinfo;
        uint64_t                        clr_intr_offset;
        int                             status;
        uint32_t                        fw_size;        /* in page */
        uint64_t                        offset;

        /* This is where driver initialization begins */
        cleanup = HERMON_DRV_CLEANUP_LEVEL0;

        /* Setup device access attributes */
        state->hs_reg_accattr.devacc_attr_version = DDI_DEVICE_ATTR_V1;
        state->hs_reg_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
        state->hs_reg_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
        state->hs_reg_accattr.devacc_attr_access = DDI_DEFAULT_ACC;

        /* Setup fma-protected access attributes */
        state->hs_fm_accattr.devacc_attr_version =
            hermon_devacc_attr_version(state);
        state->hs_fm_accattr.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
        state->hs_fm_accattr.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
        /* set acc err protection type */
        state->hs_fm_accattr.devacc_attr_access =
            hermon_devacc_attr_access(state);

        /* Setup for PCI config read/write of HCA device */
        status = hermon_pci_config_setup(state, &state->hs_fm_pcihdl);
        if (status != DDI_SUCCESS) {
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_PCI_config_space_regmap_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        /* Map PCI config space and MSI-X tables/pba */
        hermon_set_msix_info(state);

        /* Map in Hermon registers (CMD, UAR, MSIX) and setup offsets */
        status = hermon_regs_map_setup(state, HERMON_CMD_BAR,
            &state->hs_reg_cmd_baseaddr, 0, 0, &state->hs_fm_accattr,
            &state->hs_fm_cmdhdl);
        if (status != DDI_SUCCESS) {
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_CMD_BAR_regmap_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        cleanup = HERMON_DRV_CLEANUP_LEVEL1;
        /*
         * We defer UAR-BAR mapping until later.  Need to know if
         * blueflame mapping is to be done, and don't know that until after
         * we get the dev_caps, so do it right after that
         */

        /*
         * There is a third BAR defined for Hermon - it is for MSIX
         *
         * Will need to explore it's possible need/use w/ Mellanox
         * [es] Temporary mapping maybe
         */

#ifdef HERMON_SUPPORTS_MSIX_BAR
        status = ddi_regs_map_setup(state->hs_dip, HERMON_MSIX_BAR,
            &state->hs_reg_msi_baseaddr, 0, 0, &state->hs_reg_accattr,
            &state->hs_reg_msihdl);
        if (status != DDI_SUCCESS) {
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_MSIX_BAR_regmap_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
#endif

        cleanup = HERMON_DRV_CLEANUP_LEVEL2;

        /*
         * Save interesting registers away. The offsets of the first two
         * here (HCR and sw_reset) are detailed in the PRM, the others are
         * derived from values in the QUERY_FW output, so we'll save them
         * off later.
         */
        /* Host Command Register (HCR) */
        state->hs_cmd_regs.hcr = (hermon_hw_hcr_t *)
            ((uintptr_t)state->hs_reg_cmd_baseaddr + HERMON_CMD_HCR_OFFSET);
        state->hs_cmd_toggle = 0;       /* initialize it for use */

        /* Software Reset register (sw_reset) and semaphore */
        state->hs_cmd_regs.sw_reset = (uint32_t *)
            ((uintptr_t)state->hs_reg_cmd_baseaddr +
            HERMON_CMD_SW_RESET_OFFSET);
        state->hs_cmd_regs.sw_semaphore = (uint32_t *)
            ((uintptr_t)state->hs_reg_cmd_baseaddr +
            HERMON_CMD_SW_SEMAPHORE_OFFSET);

        /* make sure init'd before we start filling things in */
        bzero(&state->hs_hcaparams, sizeof (struct hermon_hw_initqueryhca_s));

        /* Initialize the Phase1 configuration profile */
        status = hermon_cfg_profile_init_phase1(state);
        if (status != DDI_SUCCESS) {
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_cfginit1_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL3;

        /* Do a software reset of the adapter to ensure proper state */
        status = hermon_sw_reset(state);
        if (status != HERMON_CMD_SUCCESS) {
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_sw_reset_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        /* Initialize mailboxes */
        status = hermon_rsrc_init_phase1(state);
        if (status != DDI_SUCCESS) {
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_rsrcinit1_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL4;

        /* Post QUERY_FW */
        status = hermon_cmn_query_cmd_post(state, QUERY_FW, 0, 0, &state->hs_fw,
            sizeof (hermon_hw_queryfw_t), HERMON_CMD_NOSLEEP_SPIN);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_NOTE, "QUERY_FW command failed: %08x\n", status);
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_query_fw_cmd_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        /* Validate what/that HERMON FW version is appropriate */

        status = hermon_fw_version_check(state);
        if (status != DDI_SUCCESS) {
                HERMON_FMANOTE(state, HERMON_FMA_FWVER);
                if (state->hs_operational_mode == HERMON_HCA_MODE) {
                        cmn_err(CE_CONT, "Unsupported Hermon FW version: "
                            "expected: %04d.%04d.%04d, "
                            "actual: %04d.%04d.%04d\n",
                            HERMON_FW_VER_MAJOR,
                            HERMON_FW_VER_MINOR,
                            HERMON_FW_VER_SUBMINOR,
                            state->hs_fw.fw_rev_major,
                            state->hs_fw.fw_rev_minor,
                            state->hs_fw.fw_rev_subminor);
                } else {
                        cmn_err(CE_CONT, "Unsupported FW version: "
                            "%04d.%04d.%04d\n",
                            state->hs_fw.fw_rev_major,
                            state->hs_fw.fw_rev_minor,
                            state->hs_fw.fw_rev_subminor);
                }
                state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
                state->hs_fm_degraded_reason = HCA_FW_MISMATCH;
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_checkfwver_fail");
                /* This case is the degraded one */
                return (HERMON_CMD_BAD_NVMEM);
        }

        /*
         * Save off the rest of the interesting registers that we'll be using.
         * Setup the offsets for the other registers.
         */

        /*
         * Hermon does the intr_offset from the BAR - technically should get the
         * BAR info from the response, but PRM says it's from BAR0-1, which is
         * for us the CMD BAR
         */

        clr_intr_offset  = state->hs_fw.clr_intr_offs & HERMON_CMD_OFFSET_MASK;

        /* Save Clear Interrupt address */
        state->hs_cmd_regs.clr_intr = (uint64_t *)
            (uintptr_t)(state->hs_reg_cmd_baseaddr + clr_intr_offset);

        /*
         * Set the error buffer also into the structure - used in hermon_event.c
         * to check for internal error on the HCA, not reported in eqe or
         * (necessarily) by interrupt
         */
        state->hs_cmd_regs.fw_err_buf = (uint32_t *)(uintptr_t)
            (state->hs_reg_cmd_baseaddr + state->hs_fw.error_buf_addr);

        /*
         * Invoke a polling thread to check the error buffer periodically.
         */
        if (!hermon_no_inter_err_chk) {
                state->hs_fm_poll_thread = ddi_periodic_add(
                    hermon_inter_err_chk, (void *)state, FM_POLL_INTERVAL,
                    DDI_IPL_0);
        }

        cleanup = HERMON_DRV_CLEANUP_LEVEL5;

        /*
         * Allocate, map, and run the HCA Firmware.
         */

        /* Allocate memory for the firmware to load into and map it */

        /* get next higher power of 2 */
        fw_size = 1 << highbit(state->hs_fw.fw_pages);
        state->hs_fw_dma.length = fw_size << HERMON_PAGESHIFT;
        status = hermon_dma_alloc(state, &state->hs_fw_dma, MAP_FA);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "FW alloc failed\n");
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_dma_alloc_fw_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        cleanup = HERMON_DRV_CLEANUP_LEVEL6;

        /* Invoke the RUN_FW cmd to run the firmware */
        status = hermon_run_fw_cmd_post(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "RUN_FW command failed: 0x%08x\n", status);
                if (status == HERMON_CMD_BAD_NVMEM) {
                        state->hs_operational_mode = HERMON_MAINTENANCE_MODE;
                        state->hs_fm_degraded_reason = HCA_FW_CORRUPT;
                }
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_run_fw_fail");
                /*
                 * If the status is HERMON_CMD_BAD_NVMEM, it's likely the
                 * firmware is corrupted, so the mode falls into the
                 * maintenance mode.
                 */
                return (status == HERMON_CMD_BAD_NVMEM ? HERMON_CMD_BAD_NVMEM :
                    DDI_FAILURE);
        }


        /*
         * QUERY DEVICE LIMITS/CAPABILITIES
         * NOTE - in Hermon, the command is changed to QUERY_DEV_CAP,
         * but for familiarity we have kept the structure name the
         * same as Tavor/Arbel
         */

        status = hermon_cmn_query_cmd_post(state, QUERY_DEV_CAP, 0, 0,
            &state->hs_devlim, sizeof (hermon_hw_querydevlim_t),
            HERMON_CMD_NOSLEEP_SPIN);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_NOTE, "QUERY_DEV_CAP command failed: 0x%08x\n",
                    status);
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_devcap_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        state->hs_rsvd_eqs = max(state->hs_devlim.num_rsvd_eq,
            (4 * state->hs_devlim.num_rsvd_uar));

        /* now we have enough info to map in the UAR BAR */
        /*
         * First, we figure out how to map the BAR for UAR - use only half if
         * BlueFlame is enabled - in that case the mapped length is 1/2 the
         * log_max_uar_sz (max__uar - 1) * 1MB ( +20).
         */

        if (state->hs_devlim.blu_flm) {         /* Blue Flame Enabled */
                offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
        } else {
                offset = 0;     /* a zero length means map the whole thing */
        }
        status = hermon_regs_map_setup(state, HERMON_UAR_BAR,
            &state->hs_reg_uar_baseaddr, 0, offset, &state->hs_fm_accattr,
            &state->hs_fm_uarhdl);
        if (status != DDI_SUCCESS) {
                HERMON_ATTACH_MSG(state->hs_attach_buf, "UAR BAR mapping");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        /* and if BlueFlame is enabled, map the other half there */
        if (state->hs_devlim.blu_flm) {         /* Blue Flame Enabled */
                offset = (uint64_t)1 << (state->hs_devlim.log_max_uar_sz + 20);
                status = ddi_regs_map_setup(state->hs_dip, HERMON_UAR_BAR,
                    &state->hs_reg_bf_baseaddr, offset, offset,
                    &state->hs_reg_accattr, &state->hs_reg_bfhdl);
                if (status != DDI_SUCCESS) {
                        HERMON_ATTACH_MSG(state->hs_attach_buf,
                            "BlueFlame BAR mapping");
                        /* This case is not the degraded one */
                        return (DDI_FAILURE);
                }
                /* This will be used in hw_fini if we fail to init. */
                state->hs_bf_offset = offset;
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL7;

        /* Hermon has a couple of things needed for phase 2 in query port */

        status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0, 0x01,
            &state->hs_queryport, sizeof (hermon_hw_query_port_t),
            HERMON_CMD_NOSLEEP_SPIN);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_NOTE, "QUERY_PORT command failed: 0x%08x\n",
                    status);
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_queryport_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        /* Initialize the Phase2 Hermon configuration profile */
        status = hermon_cfg_profile_init_phase2(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "CFG phase 2 failed: 0x%08x\n", status);
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_cfginit2_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        /* Determine and set the ICM size */
        state->hs_icm_sz = hermon_size_icm(state);
        status           = hermon_set_icm_size_cmd_post(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "Hermon: SET_ICM_SIZE cmd failed: 0x%08x\n",
                    status);
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_seticmsz_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        /* alloc icm aux physical memory and map it */

        state->hs_icma_dma.length = 1 << highbit(state->hs_icma_sz);

        status = hermon_dma_alloc(state, &state->hs_icma_dma, MAP_ICM_AUX);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "failed to alloc (0x%llx) bytes for ICMA\n",
                    (longlong_t)state->hs_icma_dma.length);
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_dma_alloc_icm_aux_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL8;

        cleanup = HERMON_DRV_CLEANUP_LEVEL9;

        /* Allocate an array of structures to house the ICM tables */
        state->hs_icm = kmem_zalloc(HERMON_NUM_ICM_RESOURCES *
            sizeof (hermon_icm_table_t), KM_SLEEP);

        /* Set up the ICM address space and the INIT_HCA command input */
        status = hermon_icm_config_setup(state, &state->hs_hcaparams);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_NOTE, "ICM configuration failed\n");
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_icm_config_setup_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL10;

        /* Initialize the adapter with the INIT_HCA cmd */
        status = hermon_init_hca_cmd_post(state, &state->hs_hcaparams,
            HERMON_CMD_NOSLEEP_SPIN);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_NOTE, "INIT_HCA command failed: %08x\n", status);
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf, "hw_init_hca_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL11;

        /* Enter the second phase of init for Hermon configuration/resources */
        status = hermon_rsrc_init_phase2(state);
        if (status != DDI_SUCCESS) {
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_rsrcinit2_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL12;

        /* Query the adapter via QUERY_ADAPTER */
        status = hermon_cmn_query_cmd_post(state, QUERY_ADAPTER, 0, 0,
            &state->hs_adapter, sizeof (hermon_hw_queryadapter_t),
            HERMON_CMD_NOSLEEP_SPIN);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_NOTE, "Hermon: QUERY_ADAPTER command failed: %08x\n",
                    status);
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_query_adapter_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        /* Allocate protection domain (PD) for Hermon internal use */
        status = hermon_pd_alloc(state, &state->hs_pdhdl_internal,
            HERMON_SLEEP);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "failed to alloc internal PD\n");
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_internal_pd_alloc_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL13;

        /* Setup UAR page for kernel use */
        status = hermon_internal_uarpg_init(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "failed to setup internal UAR\n");
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_internal_uarpg_alloc_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL14;

        /* Query and initialize the Hermon interrupt/MSI information */
        status = hermon_intr_or_msi_init(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "failed to setup INTR/MSI\n");
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_intr_or_msi_init_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL15;

        status = hermon_isr_init(state);        /* set up the isr */
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "failed to init isr\n");
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_isrinit_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL16;

        /* Setup the event queues */
        status = hermon_eq_init_all(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "failed to init EQs\n");
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_eqinitall_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL17;



        /* Reserve contexts for QP0 and QP1 */
        status = hermon_special_qp_contexts_reserve(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "failed to init special QPs\n");
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_rsrv_sqp_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL18;

        /* Initialize for multicast group handling */
        status = hermon_mcg_init(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "failed to init multicast\n");
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_mcg_init_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }
        cleanup = HERMON_DRV_CLEANUP_LEVEL19;

        /* Initialize the Hermon IB port(s) */
        status = hermon_hca_port_init(state);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "failed to init HCA Port\n");
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_hca_port_init_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        cleanup = HERMON_DRV_CLEANUP_ALL;

        /* Determine NodeGUID and SystemImageGUID */
        status = hermon_getnodeinfo_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
            &nodeinfo);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_NOTE, "GetNodeInfo command failed: %08x\n", status);
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_getnodeinfo_cmd_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        /*
         * If the NodeGUID value was set in OBP properties, then we use that
         * value.  But we still print a message if the value we queried from
         * firmware does not match this value.
         *
         * Otherwise if OBP value is not set then we use the value from
         * firmware unconditionally.
         */
        if (state->hs_cfg_profile->cp_nodeguid) {
                state->hs_nodeguid   = state->hs_cfg_profile->cp_nodeguid;
        } else {
                state->hs_nodeguid = nodeinfo.NodeGUID;
        }

        if (state->hs_nodeguid != nodeinfo.NodeGUID) {
                cmn_err(CE_NOTE, "!NodeGUID value queried from firmware "
                    "does not match value set by device property");
        }

        /*
         * If the SystemImageGUID value was set in OBP properties, then we use
         * that value.  But we still print a message if the value we queried
         * from firmware does not match this value.
         *
         * Otherwise if OBP value is not set then we use the value from
         * firmware unconditionally.
         */
        if (state->hs_cfg_profile->cp_sysimgguid) {
                state->hs_sysimgguid = state->hs_cfg_profile->cp_sysimgguid;
        } else {
                state->hs_sysimgguid = nodeinfo.SystemImageGUID;
        }

        if (state->hs_sysimgguid != nodeinfo.SystemImageGUID) {
                cmn_err(CE_NOTE, "!SystemImageGUID value queried from firmware "
                    "does not match value set by device property");
        }

        /* Get NodeDescription */
        status = hermon_getnodedesc_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN,
            (sm_nodedesc_t *)&state->hs_nodedesc);
        if (status != HERMON_CMD_SUCCESS) {
                cmn_err(CE_CONT, "GetNodeDesc command failed: %08x\n", status);
                hermon_hw_fini(state, cleanup);
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "hw_init_getnodedesc_cmd_fail");
                /* This case is not the degraded one */
                return (DDI_FAILURE);
        }

        return (DDI_SUCCESS);
}


/*
 * hermon_hw_fini()
 *    Context: Only called from attach() and/or detach() path contexts
 */
static void
hermon_hw_fini(hermon_state_t *state, hermon_drv_cleanup_level_t cleanup)
{
        uint_t          num_ports;
        int             i, status;


        /*
         * JBDB - We might not want to run these returns in all cases of
         * Bad News. We should still attempt to free all of the DMA memory
         * resources...  This needs to be worked last, after all allocations
         * are implemented. For now, and possibly for later, this works.
         */

        switch (cleanup) {
        /*
         * If we add more driver initialization steps that should be cleaned
         * up here, we need to ensure that HERMON_DRV_CLEANUP_ALL is still the
         * first entry (i.e. corresponds to the last init step).
         */
        case HERMON_DRV_CLEANUP_ALL:
                /* Shutdown the Hermon IB port(s) */
                num_ports = state->hs_cfg_profile->cp_num_ports;
                (void) hermon_hca_ports_shutdown(state, num_ports);
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL19:
                /* Teardown resources used for multicast group handling */
                hermon_mcg_fini(state);
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL18:
                /* Unreserve the special QP contexts */
                hermon_special_qp_contexts_unreserve(state);
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL17:
                /*
                 * Attempt to teardown all event queues (EQ).  If we fail
                 * here then print a warning message and return.  Something
                 * (either in HW or SW) has gone seriously wrong.
                 */
                status = hermon_eq_fini_all(state);
                if (status != DDI_SUCCESS) {
                        HERMON_WARNING(state, "failed to teardown EQs");
                        return;
                }
                /* FALLTHROUGH */
        case HERMON_DRV_CLEANUP_LEVEL16:
                /* Teardown Hermon interrupts */
                hermon_isr_fini(state);
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL15:
                status = hermon_intr_or_msi_fini(state);
                if (status != DDI_SUCCESS) {
                        HERMON_WARNING(state, "failed to free intr/MSI");
                        return;
                }
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL14:
                /* Free the resources for the Hermon internal UAR pages */
                hermon_internal_uarpg_fini(state);
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL13:
                /*
                 * Free the PD that was used internally by Hermon software.  If
                 * we fail here then print a warning and return.  Something
                 * (probably software-related, but perhaps HW) has gone wrong.
                 */
                status = hermon_pd_free(state, &state->hs_pdhdl_internal);
                if (status != DDI_SUCCESS) {
                        HERMON_WARNING(state, "failed to free internal PD");
                        return;
                }
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL12:
                /* Cleanup all the phase2 resources first */
                hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_ALL);
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL11:
                /* LEVEL11 is after INIT_HCA */
                /* FALLTHROUGH */


        case HERMON_DRV_CLEANUP_LEVEL10:
                /*
                 * Unmap the ICM memory area with UNMAP_ICM command.
                 */
                status = hermon_unmap_icm_cmd_post(state, NULL);
                if (status != DDI_SUCCESS) {
                        cmn_err(CE_WARN,
                            "hermon_hw_fini: failed to unmap ICM\n");
                }

                /* Free the initial ICM DMA handles */
                hermon_icm_dma_fini(state);

                /* Free the ICM table structures */
                hermon_icm_tables_fini(state);

                /* Free the ICM table handles */
                kmem_free(state->hs_icm, HERMON_NUM_ICM_RESOURCES *
                    sizeof (hermon_icm_table_t));

                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL9:
                /*
                 * Unmap the ICM Aux memory area with UNMAP_ICM_AUX command.
                 */
                status = hermon_unmap_icm_aux_cmd_post(state);
                if (status != HERMON_CMD_SUCCESS) {
                        cmn_err(CE_NOTE,
                            "hermon_hw_fini: failed to unmap ICMA\n");
                }
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL8:
                /*
                 * Deallocate ICM Aux DMA memory.
                 */
                hermon_dma_free(&state->hs_icma_dma);
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL7:
                if (state->hs_fm_uarhdl) {
                        hermon_regs_map_free(state, &state->hs_fm_uarhdl);
                        state->hs_fm_uarhdl = NULL;
                }

                if (state->hs_reg_uarhdl) {
                        ddi_regs_map_free(&state->hs_reg_uarhdl);
                        state->hs_reg_uarhdl = NULL;
                }

                if (state->hs_bf_offset != 0 && state->hs_reg_bfhdl) {
                        ddi_regs_map_free(&state->hs_reg_bfhdl);
                        state->hs_reg_bfhdl = NULL;
                }

                for (i = 0; i < HERMON_MAX_PORTS; i++) {
                        if (state->hs_pkey[i]) {
                                kmem_free(state->hs_pkey[i], (1 <<
                                    state->hs_cfg_profile->cp_log_max_pkeytbl) *
                                    sizeof (ib_pkey_t));
                                state->hs_pkey[i] = NULL;
                        }
                        if (state->hs_guid[i]) {
                                kmem_free(state->hs_guid[i], (1 <<
                                    state->hs_cfg_profile->cp_log_max_gidtbl) *
                                    sizeof (ib_guid_t));
                                state->hs_guid[i] = NULL;
                        }
                }
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL6:
                /*
                 * Unmap the firmware memory area with UNMAP_FA command.
                 */
                status = hermon_unmap_fa_cmd_post(state);

                if (status != HERMON_CMD_SUCCESS) {
                        cmn_err(CE_NOTE,
                            "hermon_hw_fini: failed to unmap FW\n");
                }

                /*
                 * Deallocate firmware DMA memory.
                 */
                hermon_dma_free(&state->hs_fw_dma);
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL5:
                /* stop the poll thread */
                if (state->hs_fm_poll_thread) {
                        ddi_periodic_delete(state->hs_fm_poll_thread);
                        state->hs_fm_poll_thread = NULL;
                }
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL4:
                /* Then cleanup the phase1 resources */
                hermon_rsrc_fini(state, HERMON_RSRC_CLEANUP_PHASE1_COMPLETE);
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL3:
                /* Teardown any resources allocated for the config profile */
                hermon_cfg_profile_fini(state);
                /* FALLTHROUGH */

        case HERMON_DRV_CLEANUP_LEVEL2:
#ifdef HERMON_SUPPORTS_MSIX_BAR
                /*
                 * unmap 3rd BAR, MSIX BAR
                 */
                if (state->hs_reg_msihdl) {
                        ddi_regs_map_free(&state->hs_reg_msihdl);
                        state->hs_reg_msihdl = NULL;
                }
                /* FALLTHROUGH */
#endif
        case HERMON_DRV_CLEANUP_LEVEL1:
        case HERMON_DRV_CLEANUP_LEVEL0:
                /*
                 * LEVEL1 and LEVEL0 resources are freed in
                 * hermon_drv_fini2().
                 */
                break;

        default:
                HERMON_WARNING(state, "unexpected driver cleanup level");
                return;
        }
}


/*
 * hermon_soft_state_init()
 *    Context: Only called from attach() path context
 */
static int
hermon_soft_state_init(hermon_state_t *state)
{
        ibt_hca_attr_t          *hca_attr;
        uint64_t                maxval, val;
        ibt_hca_flags_t         caps = IBT_HCA_NO_FLAGS;
        ibt_hca_flags2_t        caps2 = IBT_HCA2_NO_FLAGS;
        int                     status;
        int                     max_send_wqe_bytes;
        int                     max_recv_wqe_bytes;

        /*
         * The ibc_hca_info_t struct is passed to the IBTF.  This is the
         * routine where we initialize it.  Many of the init values come from
         * either configuration variables or successful queries of the Hermon
         * hardware abilities
         */
        state->hs_ibtfinfo.hca_ci_vers  = IBCI_V4;
        state->hs_ibtfinfo.hca_handle   = (ibc_hca_hdl_t)state;
        state->hs_ibtfinfo.hca_ops      = &hermon_ibc_ops;

        hca_attr = kmem_zalloc(sizeof (ibt_hca_attr_t), KM_SLEEP);
        state->hs_ibtfinfo.hca_attr = hca_attr;

        hca_attr->hca_dip = state->hs_dip;
        hca_attr->hca_fw_major_version = state->hs_fw.fw_rev_major;
        hca_attr->hca_fw_minor_version = state->hs_fw.fw_rev_minor;
        hca_attr->hca_fw_micro_version = state->hs_fw.fw_rev_subminor;

        /* CQ interrupt moderation maximums - each limited to 16 bits */
        hca_attr->hca_max_cq_mod_count = 0xFFFF;
        hca_attr->hca_max_cq_mod_usec = 0xFFFF;
        hca_attr->hca_max_cq_handlers = state->hs_intrmsi_allocd;


        /*
         * Determine HCA capabilities:
         * No default support for IBT_HCA_RD, IBT_HCA_RAW_MULTICAST,
         *    IBT_HCA_ATOMICS_GLOBAL, IBT_HCA_RESIZE_CHAN, IBT_HCA_INIT_TYPE,
         *    or IBT_HCA_SHUTDOWN_PORT
         * But IBT_HCA_AH_PORT_CHECK, IBT_HCA_SQD_RTS_PORT, IBT_HCA_SI_GUID,
         *    IBT_HCA_RNR_NAK, IBT_HCA_CURRENT_QP_STATE, IBT_HCA_PORT_UP,
         *    IBT_HCA_SRQ, IBT_HCA_RESIZE_SRQ and IBT_HCA_FMR are always
         *    supported
         * All other features are conditionally supported, depending on the
         *    status return by the Hermon HCA in QUERY_DEV_LIM.
         */
        if (state->hs_devlim.ud_multi) {
                caps |= IBT_HCA_UD_MULTICAST;
        }
        if (state->hs_devlim.atomic) {
                caps |= IBT_HCA_ATOMICS_HCA;
        }
        if (state->hs_devlim.apm) {
                caps |= IBT_HCA_AUTO_PATH_MIG;
        }
        if (state->hs_devlim.pkey_v) {
                caps |= IBT_HCA_PKEY_CNTR;
        }
        if (state->hs_devlim.qkey_v) {
                caps |= IBT_HCA_QKEY_CNTR;
        }
        if (state->hs_devlim.ipoib_cksm) {
                caps |= IBT_HCA_CKSUM_FULL;
                caps2 |= IBT_HCA2_IP_CLASS;
        }
        if (state->hs_devlim.mod_wr_srq) {
                caps |= IBT_HCA_RESIZE_SRQ;
        }
        if (state->hs_devlim.lif) {
                caps |= IBT_HCA_LOCAL_INVAL_FENCE;
        }
        if (state->hs_devlim.reserved_lkey) {
                caps2 |= IBT_HCA2_RES_LKEY;
                hca_attr->hca_reserved_lkey = state->hs_devlim.rsv_lkey;
        }
        if (state->hs_devlim.local_inv && state->hs_devlim.remote_inv &&
            state->hs_devlim.fast_reg_wr) {     /* fw needs to be >= 2.7.000 */
                if ((state->hs_fw.fw_rev_major > 2) ||
                    ((state->hs_fw.fw_rev_major == 2) &&
                    (state->hs_fw.fw_rev_minor >= 7)))
                        caps2 |= IBT_HCA2_MEM_MGT_EXT;
        }
        if (state->hs_devlim.log_max_rss_tbl_sz) {
                hca_attr->hca_rss_max_log2_table =
                    state->hs_devlim.log_max_rss_tbl_sz;
                if (state->hs_devlim.rss_xor)
                        caps2 |= IBT_HCA2_RSS_XOR_ALG;
                if (state->hs_devlim.rss_toep)
                        caps2 |= IBT_HCA2_RSS_TPL_ALG;
        }
        if (state->hs_devlim.mps) {
                caps |= IBT_HCA_ZERO_BASED_VA;
        }
        if (state->hs_devlim.zb) {
                caps |= IBT_HCA_MULT_PAGE_SZ_MR;
        }
        caps |= (IBT_HCA_AH_PORT_CHECK | IBT_HCA_SQD_SQD_PORT |
            IBT_HCA_SI_GUID | IBT_HCA_RNR_NAK | IBT_HCA_CURRENT_QP_STATE |
            IBT_HCA_PORT_UP | IBT_HCA_RC_SRQ | IBT_HCA_UD_SRQ | IBT_HCA_FMR);
        caps2 |= IBT_HCA2_DMA_MR;

        if (state->hs_devlim.log_max_gso_sz) {
                hca_attr->hca_max_lso_size =
                    (1 << state->hs_devlim.log_max_gso_sz);
                /* 64 = ctrl & datagram seg, 4 = LSO seg, 16 = 1 SGL */
                hca_attr->hca_max_lso_hdr_size =
                    state->hs_devlim.max_desc_sz_sq - (64 + 4 + 16);
        }

        caps |= IBT_HCA_WQE_SIZE_INFO;
        max_send_wqe_bytes = state->hs_devlim.max_desc_sz_sq;
        max_recv_wqe_bytes = state->hs_devlim.max_desc_sz_rq;
        hca_attr->hca_ud_send_sgl_sz = (max_send_wqe_bytes / 16) - 4;
        hca_attr->hca_conn_send_sgl_sz = (max_send_wqe_bytes / 16) - 1;
        hca_attr->hca_conn_rdma_sgl_overhead = 1;
        hca_attr->hca_conn_rdma_write_sgl_sz = (max_send_wqe_bytes / 16) - 2;
        hca_attr->hca_conn_rdma_read_sgl_sz = (512 / 16) - 2; /* see PRM */
        hca_attr->hca_recv_sgl_sz = max_recv_wqe_bytes / 16;

        /* We choose not to support "inline" unless it improves performance */
        hca_attr->hca_max_inline_size = 0;
        hca_attr->hca_ud_send_inline_sz = 0;
        hca_attr->hca_conn_send_inline_sz = 0;
        hca_attr->hca_conn_rdmaw_inline_overhead = 4;

#if defined(_ELF64)
        /* 32-bit kernels are too small for Fibre Channel over IB */
        if (state->hs_devlim.fcoib && (caps2 & IBT_HCA2_MEM_MGT_EXT)) {
                caps2 |= IBT_HCA2_FC;
                hca_attr->hca_rfci_max_log2_qp = 7;     /* 128 per port */
                hca_attr->hca_fexch_max_log2_qp = 16;   /* 64K per port */
                hca_attr->hca_fexch_max_log2_mem = 20;  /* 1MB per MPT */
        }
#endif

        hca_attr->hca_flags = caps;
        hca_attr->hca_flags2 = caps2;

        /*
         * Set hca_attr's IDs
         */
        hca_attr->hca_vendor_id  = state->hs_vendor_id;
        hca_attr->hca_device_id  = state->hs_device_id;
        hca_attr->hca_version_id = state->hs_revision_id;

        /*
         * Determine number of available QPs and max QP size.  Number of
         * available QPs is determined by subtracting the number of
         * "reserved QPs" (i.e. reserved for firmware use) from the
         * total number configured.
         */
        val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
        hca_attr->hca_max_qp = val - ((uint64_t)1 <<
            state->hs_devlim.log_rsvd_qp);
        maxval  = ((uint64_t)1 << state->hs_devlim.log_max_qp_sz);
        val     = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_qp_sz);
        if (val > maxval) {
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_maxqpsz_toobig_fail");
                return (DDI_FAILURE);
        }
        /* we need to reduce this by the max space needed for headroom */
        hca_attr->hca_max_qp_sz = (uint_t)val - (HERMON_QP_OH_SIZE >>
            HERMON_QP_WQE_LOG_MINIMUM) - 1;

        /*
         * Determine max scatter-gather size in WQEs. The HCA has split
         * the max sgl into rec'v Q and send Q values. Use the least.
         *
         * This is mainly useful for legacy clients.  Smart clients
         * such as IPoIB will use the IBT_HCA_WQE_SIZE_INFO sgl info.
         */
        if (state->hs_devlim.max_sg_rq <= state->hs_devlim.max_sg_sq) {
                maxval = state->hs_devlim.max_sg_rq;
        } else {
                maxval = state->hs_devlim.max_sg_sq;
        }
        val     = state->hs_cfg_profile->cp_wqe_max_sgl;
        if (val > maxval) {
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_toomanysgl_fail");
                return (DDI_FAILURE);
        }
        /* If the rounded value for max SGL is too large, cap it */
        if (state->hs_cfg_profile->cp_wqe_real_max_sgl > maxval) {
                state->hs_cfg_profile->cp_wqe_real_max_sgl = (uint32_t)maxval;
                val = maxval;
        } else {
                val = state->hs_cfg_profile->cp_wqe_real_max_sgl;
        }

        hca_attr->hca_max_sgl    = (uint_t)val;
        hca_attr->hca_max_rd_sgl = 0;   /* zero because RD is unsupported */

        /*
         * Determine number of available CQs and max CQ size. Number of
         * available CQs is determined by subtracting the number of
         * "reserved CQs" (i.e. reserved for firmware use) from the
         * total number configured.
         */
        val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_cq);
        hca_attr->hca_max_cq = val - ((uint64_t)1 <<
            state->hs_devlim.log_rsvd_cq);
        maxval  = ((uint64_t)1 << state->hs_devlim.log_max_cq_sz);
        val     = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_cq_sz) - 1;
        if (val > maxval) {
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_maxcqsz_toobig_fail");
                return (DDI_FAILURE);
        }
        hca_attr->hca_max_cq_sz = (uint_t)val;

        /*
         * Determine number of available SRQs and max SRQ size. Number of
         * available SRQs is determined by subtracting the number of
         * "reserved SRQs" (i.e. reserved for firmware use) from the
         * total number configured.
         */
        val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_srq);
        hca_attr->hca_max_srqs = val - ((uint64_t)1 <<
            state->hs_devlim.log_rsvd_srq);
        maxval  = ((uint64_t)1 << state->hs_devlim.log_max_srq_sz);
        val     = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_srq_sz);

        if (val > maxval) {
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_maxsrqsz_toobig_fail");
                return (DDI_FAILURE);
        }
        hca_attr->hca_max_srqs_sz = (uint_t)val;

        val     = hca_attr->hca_recv_sgl_sz - 1; /* SRQ has a list link */
        maxval  = state->hs_devlim.max_sg_rq - 1;
        if (val > maxval) {
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_toomanysrqsgl_fail");
                return (DDI_FAILURE);
        }
        hca_attr->hca_max_srq_sgl = (uint_t)val;

        /*
         * Determine supported HCA page sizes
         * XXX
         * For now we simply return the system pagesize as the only supported
         * pagesize
         */
        hca_attr->hca_page_sz = ((PAGESIZE == (1 << 13)) ? IBT_PAGE_8K :
            IBT_PAGE_4K);

        /*
         * Determine number of available MemReg, MemWin, and their max size.
         * Number of available MRs and MWs is determined by subtracting
         * the number of "reserved MPTs" (i.e. reserved for firmware use)
         * from the total number configured for each.
         */
        val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_dmpt);
        hca_attr->hca_max_memr    = val - ((uint64_t)1 <<
            state->hs_devlim.log_rsvd_dmpt);
        hca_attr->hca_max_mem_win = state->hs_devlim.mem_win ? (val -
            ((uint64_t)1 << state->hs_devlim.log_rsvd_dmpt)) : 0;
        maxval  = state->hs_devlim.log_max_mrw_sz;
        val     = state->hs_cfg_profile->cp_log_max_mrw_sz;
        if (val > maxval) {
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_maxmrwsz_toobig_fail");
                return (DDI_FAILURE);
        }
        hca_attr->hca_max_memr_len = ((uint64_t)1 << val);

        /* Determine RDMA/Atomic properties */
        val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_rdb);
        hca_attr->hca_max_rsc = (uint_t)val;
        val = state->hs_cfg_profile->cp_hca_max_rdma_in_qp;
        hca_attr->hca_max_rdma_in_qp  = (uint8_t)val;
        val = state->hs_cfg_profile->cp_hca_max_rdma_out_qp;
        hca_attr->hca_max_rdma_out_qp = (uint8_t)val;
        hca_attr->hca_max_rdma_in_ee  = 0;
        hca_attr->hca_max_rdma_out_ee = 0;

        /*
         * Determine maximum number of raw IPv6 and Ether QPs.  Set to 0
         * because neither type of raw QP is supported
         */
        hca_attr->hca_max_ipv6_qp  = 0;
        hca_attr->hca_max_ether_qp = 0;

        /* Determine max number of MCGs and max QP-per-MCG */
        val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_qp);
        hca_attr->hca_max_mcg_qps   = (uint_t)val;
        val = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_mcg);
        hca_attr->hca_max_mcg       = (uint_t)val;
        val = state->hs_cfg_profile->cp_num_qp_per_mcg;
        hca_attr->hca_max_qp_per_mcg = (uint_t)val;

        /* Determine max number partitions (i.e. PKeys) */
        maxval  = ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
            state->hs_queryport.log_max_pkey);
        val     = ((uint64_t)state->hs_cfg_profile->cp_num_ports <<
            state->hs_cfg_profile->cp_log_max_pkeytbl);

        if (val > maxval) {
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_toomanypkey_fail");
                return (DDI_FAILURE);
        }
        hca_attr->hca_max_partitions = (uint16_t)val;

        /* Determine number of ports */
        maxval = state->hs_devlim.num_ports;
        val = state->hs_cfg_profile->cp_num_ports;
        if ((val > maxval) || (val == 0)) {
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_toomanyports_fail");
                return (DDI_FAILURE);
        }
        hca_attr->hca_nports = (uint8_t)val;

        /* Copy NodeGUID and SystemImageGUID from softstate */
        hca_attr->hca_node_guid = state->hs_nodeguid;
        hca_attr->hca_si_guid   = state->hs_sysimgguid;

        /*
         * Determine local ACK delay.  Use the value suggested by the Hermon
         * hardware (from the QUERY_DEV_CAP command)
         */
        hca_attr->hca_local_ack_delay = state->hs_devlim.ca_ack_delay;

        /* Determine max SGID table and PKey table sizes */
        val     = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_gidtbl);
        hca_attr->hca_max_port_sgid_tbl_sz = (uint_t)val;
        val     = ((uint64_t)1 << state->hs_cfg_profile->cp_log_max_pkeytbl);
        hca_attr->hca_max_port_pkey_tbl_sz = (uint16_t)val;

        /* Determine max number of PDs */
        maxval  = ((uint64_t)1 << state->hs_devlim.log_max_pd);
        val     = ((uint64_t)1 << state->hs_cfg_profile->cp_log_num_pd);
        if (val > maxval) {
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_toomanypd_fail");
                return (DDI_FAILURE);
        }
        hca_attr->hca_max_pd = (uint_t)val;

        /* Determine max number of Address Handles (NOT IN ARBEL or HERMON) */
        hca_attr->hca_max_ah = 0;

        /* No RDDs or EECs (since Reliable Datagram is not supported) */
        hca_attr->hca_max_rdd = 0;
        hca_attr->hca_max_eec = 0;

        /* Initialize lock for reserved UAR page access */
        mutex_init(&state->hs_uar_lock, NULL, MUTEX_DRIVER,
            DDI_INTR_PRI(state->hs_intrmsi_pri));

        /* Initialize the flash fields */
        state->hs_fw_flashstarted = 0;
        mutex_init(&state->hs_fw_flashlock, NULL, MUTEX_DRIVER,
            DDI_INTR_PRI(state->hs_intrmsi_pri));

        /* Initialize the lock for the info ioctl */
        mutex_init(&state->hs_info_lock, NULL, MUTEX_DRIVER,
            DDI_INTR_PRI(state->hs_intrmsi_pri));

        /* Initialize the AVL tree for QP number support */
        hermon_qpn_avl_init(state);

        /* Initialize the cq_sched info structure */
        status = hermon_cq_sched_init(state);
        if (status != DDI_SUCCESS) {
                hermon_qpn_avl_fini(state);
                mutex_destroy(&state->hs_info_lock);
                mutex_destroy(&state->hs_fw_flashlock);
                mutex_destroy(&state->hs_uar_lock);
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_cqsched_init_fail");
                return (DDI_FAILURE);
        }

        /* Initialize the fcoib info structure */
        status = hermon_fcoib_init(state);
        if (status != DDI_SUCCESS) {
                hermon_cq_sched_fini(state);
                hermon_qpn_avl_fini(state);
                mutex_destroy(&state->hs_info_lock);
                mutex_destroy(&state->hs_fw_flashlock);
                mutex_destroy(&state->hs_uar_lock);
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_fcoibinit_fail");
                return (DDI_FAILURE);
        }

        /* Initialize the kstat info structure */
        status = hermon_kstat_init(state);
        if (status != DDI_SUCCESS) {
                hermon_fcoib_fini(state);
                hermon_cq_sched_fini(state);
                hermon_qpn_avl_fini(state);
                mutex_destroy(&state->hs_info_lock);
                mutex_destroy(&state->hs_fw_flashlock);
                mutex_destroy(&state->hs_uar_lock);
                kmem_free(hca_attr, sizeof (ibt_hca_attr_t));
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "soft_state_init_kstatinit_fail");
                return (DDI_FAILURE);
        }

        return (DDI_SUCCESS);
}


/*
 * hermon_soft_state_fini()
 *    Context: Called only from detach() path context
 */
static void
hermon_soft_state_fini(hermon_state_t *state)
{

        /* Teardown the kstat info */
        hermon_kstat_fini(state);

        /* Teardown the fcoib info */
        hermon_fcoib_fini(state);

        /* Teardown the cq_sched info */
        hermon_cq_sched_fini(state);

        /* Teardown the AVL tree for QP number support */
        hermon_qpn_avl_fini(state);

        /* Free up info ioctl mutex */
        mutex_destroy(&state->hs_info_lock);

        /* Free up flash mutex */
        mutex_destroy(&state->hs_fw_flashlock);

        /* Free up the UAR page access mutex */
        mutex_destroy(&state->hs_uar_lock);

        /* Free up the hca_attr struct */
        kmem_free(state->hs_ibtfinfo.hca_attr, sizeof (ibt_hca_attr_t));

}

/*
 * hermon_icm_config_setup()
 *    Context: Only called from attach() path context
 */
static int
hermon_icm_config_setup(hermon_state_t *state,
    hermon_hw_initqueryhca_t *inithca)
{
        hermon_hw_querydevlim_t *devlim;
        hermon_cfg_profile_t    *cfg;
        hermon_icm_table_t      *icm_p[HERMON_NUM_ICM_RESOURCES];
        hermon_icm_table_t      *icm;
        hermon_icm_table_t      *tmp;
        uint64_t                icm_addr;
        uint64_t                icm_size;
        int                     status, i, j;


        /* Bring in local devlims, cfg_profile and hs_icm table list */
        devlim = &state->hs_devlim;
        cfg = state->hs_cfg_profile;
        icm = state->hs_icm;

        /*
         * Assign each ICM table's entry size from data in the devlims,
         * except for RDB and MCG sizes, which are not returned in devlims
         * but do have a fixed size, and the UAR context entry size, which
         * we determine. For this, we use the "cp_num_pgs_per_uce" value
         * from our hs_cfg_profile.
         */
        icm[HERMON_CMPT].object_size    = devlim->cmpt_entry_sz;
        icm[HERMON_CMPT_QPC].object_size        = devlim->cmpt_entry_sz;
        icm[HERMON_CMPT_SRQC].object_size       = devlim->cmpt_entry_sz;
        icm[HERMON_CMPT_CQC].object_size        = devlim->cmpt_entry_sz;
        icm[HERMON_CMPT_EQC].object_size        = devlim->cmpt_entry_sz;
        icm[HERMON_MTT].object_size     = devlim->mtt_entry_sz;
        icm[HERMON_DMPT].object_size    = devlim->dmpt_entry_sz;
        icm[HERMON_QPC].object_size     = devlim->qpc_entry_sz;
        icm[HERMON_CQC].object_size     = devlim->cqc_entry_sz;
        icm[HERMON_SRQC].object_size    = devlim->srq_entry_sz;
        icm[HERMON_EQC].object_size     = devlim->eqc_entry_sz;
        icm[HERMON_RDB].object_size     = devlim->rdmardc_entry_sz *
            cfg->cp_hca_max_rdma_in_qp;
        icm[HERMON_MCG].object_size     = HERMON_MCGMEM_SZ(state);
        icm[HERMON_ALTC].object_size    = devlim->altc_entry_sz;
        icm[HERMON_AUXC].object_size    = devlim->aux_entry_sz;

        /* Assign each ICM table's log2 number of entries */
        icm[HERMON_CMPT].log_num_entries = cfg->cp_log_num_cmpt;
        icm[HERMON_CMPT_QPC].log_num_entries = cfg->cp_log_num_qp;
        icm[HERMON_CMPT_SRQC].log_num_entries = cfg->cp_log_num_srq;
        icm[HERMON_CMPT_CQC].log_num_entries = cfg->cp_log_num_cq;
        icm[HERMON_CMPT_EQC].log_num_entries = HERMON_NUM_EQ_SHIFT;
        icm[HERMON_MTT].log_num_entries = cfg->cp_log_num_mtt;
        icm[HERMON_DMPT].log_num_entries = cfg->cp_log_num_dmpt;
        icm[HERMON_QPC].log_num_entries = cfg->cp_log_num_qp;
        icm[HERMON_SRQC].log_num_entries = cfg->cp_log_num_srq;
        icm[HERMON_CQC].log_num_entries = cfg->cp_log_num_cq;
        icm[HERMON_EQC].log_num_entries = HERMON_NUM_EQ_SHIFT;
        icm[HERMON_RDB].log_num_entries = cfg->cp_log_num_qp;
        icm[HERMON_MCG].log_num_entries = cfg->cp_log_num_mcg;
        icm[HERMON_ALTC].log_num_entries = cfg->cp_log_num_qp;
        icm[HERMON_AUXC].log_num_entries = cfg->cp_log_num_qp;

        /* Initialize the ICM tables */
        hermon_icm_tables_init(state);

        /*
         * ICM tables must be aligned on their size in the ICM address
         * space. So, here we order the tables from largest total table
         * size to the smallest. All tables are a power of 2 in size, so
         * this will ensure that all tables are aligned on their own size
         * without wasting space in the ICM.
         *
         * In order to easily set the ICM addresses without needing to
         * worry about the ordering of our table indices as relates to
         * the hermon_rsrc_type_t enum, we will use a list of pointers
         * representing the tables for the sort, then assign ICM addresses
         * below using it.
         */
        for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
                icm_p[i] = &icm[i];
        }
        for (i = HERMON_NUM_ICM_RESOURCES; i > 0; i--) {
                switch (i) {
                case HERMON_CMPT_QPC:
                case HERMON_CMPT_SRQC:
                case HERMON_CMPT_CQC:
                case HERMON_CMPT_EQC:
                        continue;
                }
                for (j = 1; j < i; j++) {
                        if (icm_p[j]->table_size > icm_p[j - 1]->table_size) {
                                tmp             = icm_p[j];
                                icm_p[j]        = icm_p[j - 1];
                                icm_p[j - 1]    = tmp;
                        }
                }
        }

        /* Initialize the ICM address and ICM size */
        icm_addr = icm_size = 0;

        /*
         * Set the ICM base address of each table, using our sorted
         * list of pointers from above.
         */
        for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
                j = icm_p[i]->icm_type;
                switch (j) {
                case HERMON_CMPT_QPC:
                case HERMON_CMPT_SRQC:
                case HERMON_CMPT_CQC:
                case HERMON_CMPT_EQC:
                        continue;
                }
                if (icm[j].table_size) {
                        /*
                         * Set the ICM base address in the table, save the
                         * ICM offset in the rsrc pool and increment the
                         * total ICM allocation.
                         */
                        icm[j].icm_baseaddr = icm_addr;
                        if (hermon_verbose) {
                                IBTF_DPRINTF_L2("ICMADDR", "rsrc %x @ %p"
                                    " size %llx", j, icm[j].icm_baseaddr,
                                    icm[j].table_size);
                        }
                        icm_size += icm[j].table_size;
                }

                /* Verify that we don't exceed maximum ICM size */
                if (icm_size > devlim->max_icm_size) {
                        /* free the ICM table memory resources */
                        hermon_icm_tables_fini(state);
                        cmn_err(CE_WARN, "ICM configuration exceeds maximum "
                            "configuration: max (0x%lx) requested (0x%lx)\n",
                            (ulong_t)devlim->max_icm_size, (ulong_t)icm_size);
                        HERMON_ATTACH_MSG(state->hs_attach_buf,
                            "icm_config_toobig_fail");
                        return (DDI_FAILURE);
                }

                /* assign address to the 4 pieces of the CMPT */
                if (j == HERMON_CMPT) {
                        uint64_t cmpt_size = icm[j].table_size >> 2;
#define init_cmpt_icm_baseaddr(rsrc, indx)                              \
        icm[rsrc].icm_baseaddr  = icm_addr + (indx * cmpt_size);
                        init_cmpt_icm_baseaddr(HERMON_CMPT_QPC, 0);
                        init_cmpt_icm_baseaddr(HERMON_CMPT_SRQC, 1);
                        init_cmpt_icm_baseaddr(HERMON_CMPT_CQC, 2);
                        init_cmpt_icm_baseaddr(HERMON_CMPT_EQC, 3);
                }

                /* Increment the ICM address for the next table */
                icm_addr += icm[j].table_size;
        }

        /* Populate the structure for the INIT_HCA command */
        hermon_inithca_set(state, inithca);

        /*
         * Prior to invoking INIT_HCA, we must have ICM memory in place
         * for the reserved objects in each table. We will allocate and map
         * this initial ICM memory here. Note that given the assignment
         * of span_size above, tables that are smaller or equal in total
         * size to the default span_size will be mapped in full.
         */
        status = hermon_icm_dma_init(state);
        if (status != DDI_SUCCESS) {
                /* free the ICM table memory resources */
                hermon_icm_tables_fini(state);
                HERMON_WARNING(state, "Failed to allocate initial ICM");
                HERMON_ATTACH_MSG(state->hs_attach_buf,
                    "icm_config_dma_init_fail");
                return (DDI_FAILURE);
        }

        return (DDI_SUCCESS);
}

/*
 * hermon_inithca_set()
 *    Context: Only called from attach() path context
 */
static void
hermon_inithca_set(hermon_state_t *state, hermon_hw_initqueryhca_t *inithca)
{
        hermon_cfg_profile_t    *cfg;
        hermon_icm_table_t      *icm;
        int                     i;


        /* Populate the INIT_HCA structure */
        icm = state->hs_icm;
        cfg = state->hs_cfg_profile;

        /* set version */
        inithca->version = 0x02;        /* PRM 0.36 */
        /* set cacheline - log2 in 16-byte chunks */
        inithca->log2_cacheline = 0x2;  /* optimized for 64 byte cache */

        /* we need to update the inithca info with thie UAR info too */
        inithca->uar.log_max_uars = highbit(cfg->cp_log_num_uar);
        inithca->uar.uar_pg_sz = PAGESHIFT - HERMON_PAGESHIFT;

        /* Set endianess */
#ifdef  _LITTLE_ENDIAN
        inithca->big_endian     = 0;
#else
        inithca->big_endian     = 1;
#endif

        /* Port Checking is on by default */
        inithca->udav_port_chk  = HERMON_UDAV_PORTCHK_ENABLED;

        /* Enable IPoIB checksum */
        if (state->hs_devlim.ipoib_cksm)
                inithca->chsum_en = 1;

        /* Set each ICM table's attributes */
        for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
                switch (icm[i].icm_type) {
                case HERMON_CMPT:
                        inithca->tpt.cmpt_baseaddr = icm[i].icm_baseaddr;
                        break;

                case HERMON_MTT:
                        inithca->tpt.mtt_baseaddr = icm[i].icm_baseaddr;
                        break;

                case HERMON_DMPT:
                        inithca->tpt.dmpt_baseaddr = icm[i].icm_baseaddr;
                        inithca->tpt.log_dmpt_sz   = icm[i].log_num_entries;
                        inithca->tpt.pgfault_rnr_to = 0; /* just in case */
                        break;

                case HERMON_QPC:
                        inithca->context.log_num_qp = icm[i].log_num_entries;
                        inithca->context.qpc_baseaddr_h =
                            icm[i].icm_baseaddr >> 32;
                        inithca->context.qpc_baseaddr_l =
                            (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
                        break;

                case HERMON_CQC:
                        inithca->context.log_num_cq = icm[i].log_num_entries;
                        inithca->context.cqc_baseaddr_h =
                            icm[i].icm_baseaddr >> 32;
                        inithca->context.cqc_baseaddr_l =
                            (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
                        break;

                case HERMON_SRQC:
                        inithca->context.log_num_srq = icm[i].log_num_entries;
                        inithca->context.srqc_baseaddr_h =
                            icm[i].icm_baseaddr >> 32;
                        inithca->context.srqc_baseaddr_l =
                            (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
                        break;

                case HERMON_EQC:
                        inithca->context.log_num_eq = icm[i].log_num_entries;
                        inithca->context.eqc_baseaddr_h =
                            icm[i].icm_baseaddr >> 32;
                        inithca->context.eqc_baseaddr_l =
                            (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
                        break;

                case HERMON_RDB:
                        inithca->context.rdmardc_baseaddr_h =
                            icm[i].icm_baseaddr >> 32;
                        inithca->context.rdmardc_baseaddr_l =
                            (icm[i].icm_baseaddr & 0xFFFFFFFF) >> 5;
                        inithca->context.log_num_rdmardc =
                            cfg->cp_log_num_rdb - cfg->cp_log_num_qp;
                        break;

                case HERMON_MCG:
                        inithca->multi.mc_baseaddr    = icm[i].icm_baseaddr;
                        inithca->multi.log_mc_tbl_sz  = icm[i].log_num_entries;
                        inithca->multi.log_mc_tbl_ent =
                            highbit(HERMON_MCGMEM_SZ(state)) - 1;
                        inithca->multi.log_mc_tbl_hash_sz =
                            cfg->cp_log_num_mcg_hash;
                        inithca->multi.mc_hash_fn = HERMON_MCG_DEFAULT_HASH_FN;
                        break;

                case HERMON_ALTC:
                        inithca->context.altc_baseaddr = icm[i].icm_baseaddr;
                        break;

                case HERMON_AUXC:
                        inithca->context.auxc_baseaddr = icm[i].icm_baseaddr;
                        break;

                default:
                        break;

                }
        }

}

/*
 * hermon_icm_tables_init()
 *    Context: Only called from attach() path context
 *
 * Dynamic ICM breaks the various ICM tables into "span_size" chunks
 * to enable allocation of backing memory on demand.  Arbel used a
 * fixed size ARBEL_ICM_SPAN_SIZE (initially was 512KB) as the
 * span_size for all ICM chunks.  Hermon has other considerations,
 * so the span_size used differs from Arbel.
 *
 * The basic considerations for why Hermon differs are:
 *
 *      1) ICM memory is in units of HERMON pages.
 *
 *      2) The AUXC table is approximately 1 byte per QP.
 *
 *      3) ICM memory for AUXC, ALTC, and RDB is allocated when
 *      the ICM memory for the corresponding QPC is allocated.
 *
 *      4) ICM memory for the CMPT corresponding to the various primary
 *      resources (QPC, SRQC, CQC, and EQC) is allocated when the ICM
 *      memory for the primary resource is allocated.
 *
 * One HERMON page (4KB) would typically map 4K QPs worth of AUXC.
 * So, the minimum chunk for the various QPC related ICM memory should
 * all be allocated to support the 4K QPs.  Currently, this means the
 * amount of memory for the various QP chunks is:
 *
 *      QPC     256*4K bytes
 *      RDB     128*4K bytes
 *      CMPT     64*4K bytes
 *      ALTC     64*4K bytes
 *      AUXC      1*4K bytes
 *
 * The span_size chosen for the QP resource is 4KB of AUXC entries,
 * or 1 HERMON_PAGESIZE worth, which is the minimum ICM mapping size.
 *
 * Other ICM resources can have their span_size be more arbitrary.
 * This is 4K (HERMON_ICM_SPAN), except for MTTs because they are tiny.
 */

/* macro to make the code below cleaner */
#define init_dependent(rsrc, dep)                               \
        icm[dep].span           = icm[rsrc].span;               \
        icm[dep].num_spans      = icm[rsrc].num_spans;          \
        icm[dep].split_shift    = icm[rsrc].split_shift;        \
        icm[dep].span_mask      = icm[rsrc].span_mask;          \
        icm[dep].span_shift     = icm[rsrc].span_shift;         \
        icm[dep].rsrc_mask      = icm[rsrc].rsrc_mask;          \
        if (hermon_verbose) {                                   \
                IBTF_DPRINTF_L2("hermon", "tables_init: "       \
                    "rsrc (0x%x) size (0x%lx) span (0x%x) "     \
                    "num_spans (0x%x)", dep, icm[dep].table_size, \
                    icm[dep].span, icm[dep].num_spans);         \
                IBTF_DPRINTF_L2("hermon", "tables_init: "       \
                    "span_shift (0x%x) split_shift (0x%x)",     \
                    icm[dep].span_shift, icm[dep].split_shift); \
                IBTF_DPRINTF_L2("hermon", "tables_init: "       \
                    "span_mask (0x%x)  rsrc_mask   (0x%x)",     \
                    icm[dep].span_mask, icm[dep].rsrc_mask);    \
        }

static void
hermon_icm_tables_init(hermon_state_t *state)
{
        hermon_icm_table_t      *icm;
        int                     i, k;
        uint32_t                per_split;


        icm = state->hs_icm;

        for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {
                icm[i].icm_type         = i;
                icm[i].num_entries      = 1 << icm[i].log_num_entries;
                icm[i].log_object_size  = highbit(icm[i].object_size) - 1;
                icm[i].table_size       = icm[i].num_entries <<
                    icm[i].log_object_size;

                /* deal with "dependent" resource types */
                switch (i) {
                case HERMON_AUXC:
#ifdef HERMON_FW_WORKAROUND
                        icm[i].table_size = 0x80000000ull;
#endif
                        /* FALLTHROUGH */
                case HERMON_CMPT_QPC:
                case HERMON_RDB:
                case HERMON_ALTC:
                        init_dependent(HERMON_QPC, i);
                        continue;
                case HERMON_CMPT_SRQC:
                        init_dependent(HERMON_SRQC, i);
                        continue;
                case HERMON_CMPT_CQC:
                        init_dependent(HERMON_CQC, i);
                        continue;
                case HERMON_CMPT_EQC:
                        init_dependent(HERMON_EQC, i);
                        continue;
                }

                icm[i].span = HERMON_ICM_SPAN;  /* default #rsrc's in 1 span */
                if (i == HERMON_MTT) /* Alloc enough MTTs to map 256MB */
                        icm[i].span = HERMON_ICM_SPAN * 16;
                icm[i].num_spans = icm[i].num_entries / icm[i].span;
                if (icm[i].num_spans == 0) {
                        icm[i].span = icm[i].num_entries;
                        per_split = 1;
                        icm[i].num_spans = icm[i].num_entries / icm[i].span;
                } else {
                        per_split = icm[i].num_spans / HERMON_ICM_SPLIT;
                        if (per_split == 0) {
                                per_split = 1;
                        }
                }
                if (hermon_verbose)
                        IBTF_DPRINTF_L2("ICM", "rsrc %x  span %x  num_spans %x",
                            i, icm[i].span, icm[i].num_spans);

                /*
                 * Ensure a minimum table size of an ICM page, and a
                 * maximum span size of the ICM table size.  This ensures
                 * that we don't have less than an ICM page to map, which is
                 * impossible, and that we will map an entire table at
                 * once if it's total size is less than the span size.
                 */
                icm[i].table_size = max(icm[i].table_size, HERMON_PAGESIZE);

                icm[i].span_shift = 0;
                for (k = icm[i].span; k != 1; k >>= 1)
                        icm[i].span_shift++;
                icm[i].split_shift = icm[i].span_shift;
                for (k = per_split; k != 1; k >>= 1)
                        icm[i].split_shift++;
                icm[i].span_mask = (1 << icm[i].split_shift) -
                    (1 << icm[i].span_shift);
                icm[i].rsrc_mask = (1 << icm[i].span_shift) - 1;


                /* Initialize the table lock */
                mutex_init(&icm[i].icm_table_lock, NULL, MUTEX_DRIVER,
                    DDI_INTR_PRI(state->hs_intrmsi_pri));
                cv_init(&icm[i].icm_table_cv, NULL, CV_DRIVER, NULL);

                if (hermon_verbose) {
                        IBTF_DPRINTF_L2("hermon", "tables_init: "
                            "rsrc (0x%x) size (0x%lx)", i, icm[i].table_size);
                        IBTF_DPRINTF_L2("hermon", "tables_init: "
                            "span (0x%x) num_spans (0x%x)",
                            icm[i].span, icm[i].num_spans);
                        IBTF_DPRINTF_L2("hermon", "tables_init: "
                            "span_shift (0x%x) split_shift (0x%x)",
                            icm[i].span_shift, icm[i].split_shift);
                        IBTF_DPRINTF_L2("hermon", "tables_init: "
                            "span_mask (0x%x)  rsrc_mask   (0x%x)",
                            icm[i].span_mask, icm[i].rsrc_mask);
                }
        }

}

/*
 * hermon_icm_tables_fini()
 *    Context: Only called from attach() path context
 *
 * Clean up all icm_tables.  Free the bitmap and dma_info arrays.
 */
static void
hermon_icm_tables_fini(hermon_state_t *state)
{
        hermon_icm_table_t      *icm;
        int                     nspans;
        int                     i, j;


        icm = state->hs_icm;

        for (i = 0; i < HERMON_NUM_ICM_RESOURCES; i++) {

                mutex_enter(&icm[i].icm_table_lock);
                nspans = icm[i].num_spans;

                for (j = 0; j < HERMON_ICM_SPLIT; j++) {
                        if (icm[i].icm_dma[j])
                                /* Free the ICM DMA slots */
                                kmem_free(icm[i].icm_dma[j],
                                    nspans * sizeof (hermon_dma_info_t));

                        if (icm[i].icm_bitmap[j])
                                /* Free the table bitmap */
                                kmem_free(icm[i].icm_bitmap[j],
                                    (nspans + 7) / 8);
                }
                /* Destroy the table lock */
                cv_destroy(&icm[i].icm_table_cv);
                mutex_exit(&icm[i].icm_table_lock);
                mutex_destroy(&icm[i].icm_table_lock);
        }

}

/*
 * hermon_icm_dma_init()
 *    Context: Only called from attach() path context
 */
static int
hermon_icm_dma_init(hermon_state_t *state)
{
        hermon_icm_table_t      *icm;
        hermon_rsrc_type_t      type;
        int                     status;


        /*
         * This routine will allocate initial ICM DMA resources for ICM
         * tables that have reserved ICM objects. This is the only routine
         * where we should have to allocate ICM outside of hermon_rsrc_alloc().
         * We need to allocate ICM here explicitly, rather than in
         * hermon_rsrc_alloc(), because we've not yet completed the resource
         * pool initialization. When the resource pools are initialized
         * (in hermon_rsrc_init_phase2(), see hermon_rsrc.c for more
         * information), resource preallocations will be invoked to match
         * the ICM allocations seen here. We will then be able to use the
         * normal allocation path.  Note we don't need to set a refcnt on
         * these initial allocations because that will be done in the calls
         * to hermon_rsrc_alloc() from hermon_hw_entries_init() for the
         * "prealloc" objects (see hermon_rsrc.c for more information).
         */
        for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {

                /* ICM for these is allocated within hermon_icm_alloc() */
                switch (type) {
                case HERMON_CMPT:
                case HERMON_CMPT_QPC:
                case HERMON_CMPT_SRQC:
                case HERMON_CMPT_CQC:
                case HERMON_CMPT_EQC:
                case HERMON_AUXC:
                case HERMON_ALTC:
                case HERMON_RDB:
                        continue;
                }

                icm = &state->hs_icm[type];

                mutex_enter(&icm->icm_table_lock);
                status = hermon_icm_alloc(state, type, 0, 0);
                mutex_exit(&icm->icm_table_lock);
                if (status != DDI_SUCCESS) {
                        while (type--) {
                                icm = &state->hs_icm[type];
                                mutex_enter(&icm->icm_table_lock);
                                hermon_icm_free(state, type, 0, 0);
                                mutex_exit(&icm->icm_table_lock);
                        }
                        return (DDI_FAILURE);
                }

                if (hermon_verbose) {
                        IBTF_DPRINTF_L2("hermon", "hermon_icm_dma_init: "
                            "table (0x%x) index (0x%x) allocated", type, 0);
                }
        }

        return (DDI_SUCCESS);
}

/*
 * hermon_icm_dma_fini()
 *    Context: Only called from attach() path context
 *
 * ICM has been completely unmapped.  We just free the memory here.
 */
static void
hermon_icm_dma_fini(hermon_state_t *state)
{
        hermon_icm_table_t      *icm;
        hermon_dma_info_t       *dma_info;
        hermon_rsrc_type_t      type;
        int                     index1, index2;


        for (type = 0; type < HERMON_NUM_ICM_RESOURCES; type++) {
                icm = &state->hs_icm[type];
                for (index1 = 0; index1 < HERMON_ICM_SPLIT; index1++) {
                        dma_info = icm->icm_dma[index1];
                        if (dma_info == NULL)
                                continue;
                        for (index2 = 0; index2 < icm->num_spans; index2++) {
                                if (dma_info[index2].dma_hdl)
                                        hermon_dma_free(&dma_info[index2]);
                                dma_info[index2].dma_hdl = NULL;
                        }
                }
        }

}

/*
 * hermon_hca_port_init()
 *    Context: Only called from attach() path context
 */
static int
hermon_hca_port_init(hermon_state_t *state)
{
        hermon_hw_set_port_t    *portinits, *initport;
        hermon_cfg_profile_t    *cfgprof;
        uint_t                  num_ports;
        int                     i = 0, status;
        uint64_t                maxval, val;
        uint64_t                sysimgguid, nodeguid, portguid;


        cfgprof = state->hs_cfg_profile;

        /* Get number of HCA ports */
        num_ports = cfgprof->cp_num_ports;

        /* Allocate space for Hermon set port  struct(s) */
        portinits = (hermon_hw_set_port_t *)kmem_zalloc(num_ports *
            sizeof (hermon_hw_set_port_t), KM_SLEEP);



        /* Post commands to initialize each Hermon HCA port */
        /*
         * In Hermon, the process is different than in previous HCAs.
         * Here, you have to:
         *      QUERY_PORT - to get basic information from the HCA
         *      set the fields accordingly
         *      SET_PORT - to change/set everything as desired
         *      INIT_PORT - to bring the port up
         *
         * Needs to be done for each port in turn
         */

        for (i = 0; i < num_ports; i++) {
                bzero(&state->hs_queryport, sizeof (hermon_hw_query_port_t));
                status = hermon_cmn_query_cmd_post(state, QUERY_PORT, 0,
                    (i + 1), &state->hs_queryport,
                    sizeof (hermon_hw_query_port_t), HERMON_CMD_NOSLEEP_SPIN);
                if (status != HERMON_CMD_SUCCESS) {
                        cmn_err(CE_CONT, "Hermon: QUERY_PORT (port %02d) "
                            "command failed: %08x\n", i + 1, status);
                        goto init_ports_fail;
                }
                initport = &portinits[i];
                state->hs_initport = &portinits[i];

                bzero(initport, sizeof (hermon_hw_query_port_t));

                /*
                 * Determine whether we need to override the firmware's
                 * default SystemImageGUID setting.
                 */
                sysimgguid = cfgprof->cp_sysimgguid;
                if (sysimgguid != 0) {
                        initport->sig           = 1;
                        initport->sys_img_guid  = sysimgguid;
                }

                /*
                 * Determine whether we need to override the firmware's
                 * default NodeGUID setting.
                 */
                nodeguid = cfgprof->cp_nodeguid;
                if (nodeguid != 0) {
                        initport->ng            = 1;
                        initport->node_guid     = nodeguid;
                }

                /*
                 * Determine whether we need to override the firmware's
                 * default PortGUID setting.
                 */
                portguid = cfgprof->cp_portguid[i];
                if (portguid != 0) {
                        initport->g0            = 1;
                        initport->guid0         = portguid;
                }

                /* Validate max MTU size */
                maxval  = state->hs_queryport.ib_mtu;
                val     = cfgprof->cp_max_mtu;
                if (val > maxval) {
                        goto init_ports_fail;
                }

                /* Set mtu_cap to 4096 bytes */
                initport->mmc = 1;      /* set the change bit */
                initport->mtu_cap = 5;  /* for 4096 bytes */

                /* Validate the max port width */
                maxval  = state->hs_queryport.ib_port_wid;
                val     = cfgprof->cp_max_port_width;
                if (val > maxval) {
                        goto init_ports_fail;
                }

                /* Validate max VL cap size */
                maxval  = state->hs_queryport.max_vl;
                val     = cfgprof->cp_max_vlcap;
                if (val > maxval) {
                        goto init_ports_fail;
                }

                /* Since we're doing mtu_cap, cut vl_cap down */
                initport->mvc = 1;      /* set this change bit */
                initport->vl_cap = 3;   /* 3 means vl0-vl3, 4 total */

                /* Validate max GID table size */
                maxval  = ((uint64_t)1 << state->hs_queryport.log_max_gid);
                val     = ((uint64_t)1 << cfgprof->cp_log_max_gidtbl);
                if (val > maxval) {
                        goto init_ports_fail;
                }
                initport->max_gid = (uint16_t)val;
                initport->mg = 1;

                /* Validate max PKey table size */
                maxval  = ((uint64_t)1 << state->hs_queryport.log_max_pkey);
                val     = ((uint64_t)1 << cfgprof->cp_log_max_pkeytbl);
                if (val > maxval) {
                        goto init_ports_fail;
                }
                initport->max_pkey = (uint16_t)val;
                initport->mp = 1;
                /*
                 * Post the SET_PORT cmd to Hermon firmware. This sets
                 * the parameters of the port.
                 */
                status = hermon_set_port_cmd_post(state, initport, i + 1,
                    HERMON_CMD_NOSLEEP_SPIN);
                if (status != HERMON_CMD_SUCCESS) {
                        cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
                            "failed: %08x\n", i + 1, status);
                        goto init_ports_fail;
                }
                /* issue another SET_PORT cmd - performance fix/workaround */
                /* XXX - need to discuss with Mellanox */
                bzero(initport, sizeof (hermon_hw_query_port_t));
                initport->cap_mask = 0x02500868;
                status = hermon_set_port_cmd_post(state, initport, i + 1,
                    HERMON_CMD_NOSLEEP_SPIN);
                if (status != HERMON_CMD_SUCCESS) {
                        cmn_err(CE_CONT, "Hermon: SET_PORT (port %02d) command "
                            "failed: %08x\n", i + 1, status);
                        goto init_ports_fail;
                }
        }

        /*
         * Finally, do the INIT_PORT for each port in turn
         * When this command completes, the corresponding Hermon port
         * will be physically "Up" and initialized.
         */
        for (i = 0; i < num_ports; i++) {
                status = hermon_init_port_cmd_post(state, i + 1,
                    HERMON_CMD_NOSLEEP_SPIN);
                if (status != HERMON_CMD_SUCCESS) {
                        cmn_err(CE_CONT, "Hermon: INIT_PORT (port %02d) "
                            "comman failed: %08x\n", i + 1, status);
                        goto init_ports_fail;
                }
        }

        /* Free up the memory for Hermon port init struct(s), return success */
        kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
        return (DDI_SUCCESS);

init_ports_fail:
        /*
         * Free up the memory for Hermon port init struct(s), shutdown any
         * successfully initialized ports, and return failure
         */
        kmem_free(portinits, num_ports * sizeof (hermon_hw_set_port_t));
        (void) hermon_hca_ports_shutdown(state, i);

        return (DDI_FAILURE);
}


/*
 * hermon_hca_ports_shutdown()
 *    Context: Only called from attach() and/or detach() path contexts
 */
static int
hermon_hca_ports_shutdown(hermon_state_t *state, uint_t num_init)
{
        int     i, status;

        /*
         * Post commands to shutdown all init'd Hermon HCA ports.  Note: if
         * any of these commands fail for any reason, it would be entirely
         * unexpected and probably indicative a serious problem (HW or SW).
         * Although we do return void from this function, this type of failure
         * should not go unreported.  That is why we have the warning message.
         */
        for (i = 0; i < num_init; i++) {
                status = hermon_close_port_cmd_post(state, i + 1,
                    HERMON_CMD_NOSLEEP_SPIN);
                if (status != HERMON_CMD_SUCCESS) {
                        HERMON_WARNING(state, "failed to shutdown HCA port");
                        return (status);
                }
        }
        return (HERMON_CMD_SUCCESS);
}


/*
 * hermon_internal_uarpg_init
 *    Context: Only called from attach() path context
 */
static int
hermon_internal_uarpg_init(hermon_state_t *state)
{
        int     status;
        hermon_dbr_info_t       *info;

        /*
         * Allocate the UAR page for kernel use. This UAR page is
         * the privileged UAR page through which all kernel generated
         * doorbells will be rung. There are a number of UAR pages
         * reserved by hardware at the front of the UAR BAR, indicated
         * by DEVCAP.num_rsvd_uar, which we have already allocated. So,
         * the kernel page, or UAR page index num_rsvd_uar, will be
         * allocated here for kernel use.
         */

        status = hermon_rsrc_alloc(state, HERMON_UARPG, 1, HERMON_SLEEP,
            &state->hs_uarkpg_rsrc);
        if (status != DDI_SUCCESS) {
                return (DDI_FAILURE);
        }

        /* Setup pointer to kernel UAR page */
        state->hs_uar = (hermon_hw_uar_t *)state->hs_uarkpg_rsrc->hr_addr;

        /* need to set up DBr tracking as well */
        status = hermon_dbr_page_alloc(state, &info);
        if (status != DDI_SUCCESS) {
                return (DDI_FAILURE);
        }
        state->hs_kern_dbr = info;
        return (DDI_SUCCESS);
}


/*
 * hermon_internal_uarpg_fini
 *    Context: Only called from attach() and/or detach() path contexts
 */
static void
hermon_internal_uarpg_fini(hermon_state_t *state)
{
        /* Free up Hermon UAR page #1 (kernel driver doorbells) */
        hermon_rsrc_free(state, &state->hs_uarkpg_rsrc);
}


/*
 * hermon_special_qp_contexts_reserve()
 *    Context: Only called from attach() path context
 */
static int
hermon_special_qp_contexts_reserve(hermon_state_t *state)
{
        hermon_rsrc_t   *qp0_rsrc, *qp1_rsrc, *qp_resvd;
        int             status;

        /* Initialize the lock used for special QP rsrc management */
        mutex_init(&state->hs_spec_qplock, NULL, MUTEX_DRIVER,
            DDI_INTR_PRI(state->hs_intrmsi_pri));

        /*
         * Reserve contexts for QP0.  These QP contexts will be setup to
         * act as aliases for the real QP0.  Note: We are required to grab
         * two QPs (one per port) even if we are operating in single-port
         * mode.
         */
        status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
            HERMON_SLEEP, &qp0_rsrc);
        if (status != DDI_SUCCESS) {
                mutex_destroy(&state->hs_spec_qplock);
                return (DDI_FAILURE);
        }
        state->hs_spec_qp0 = qp0_rsrc;

        /*
         * Reserve contexts for QP1.  These QP contexts will be setup to
         * act as aliases for the real QP1.  Note: We are required to grab
         * two QPs (one per port) even if we are operating in single-port
         * mode.
         */
        status = hermon_rsrc_alloc(state, HERMON_QPC, 2,
            HERMON_SLEEP, &qp1_rsrc);
        if (status != DDI_SUCCESS) {
                hermon_rsrc_free(state, &qp0_rsrc);
                mutex_destroy(&state->hs_spec_qplock);
                return (DDI_FAILURE);
        }
        state->hs_spec_qp1 = qp1_rsrc;

        status = hermon_rsrc_alloc(state, HERMON_QPC, 4,
            HERMON_SLEEP, &qp_resvd);
        if (status != DDI_SUCCESS) {
                hermon_rsrc_free(state, &qp1_rsrc);
                hermon_rsrc_free(state, &qp0_rsrc);
                mutex_destroy(&state->hs_spec_qplock);
                return (DDI_FAILURE);
        }
        state->hs_spec_qp_unused = qp_resvd;

        return (DDI_SUCCESS);
}


/*
 * hermon_special_qp_contexts_unreserve()
 *    Context: Only called from attach() and/or detach() path contexts
 */
static void
hermon_special_qp_contexts_unreserve(hermon_state_t *state)
{

        /* Unreserve contexts for spec_qp_unused */
        hermon_rsrc_free(state, &state->hs_spec_qp_unused);

        /* Unreserve contexts for QP1 */
        hermon_rsrc_free(state, &state->hs_spec_qp1);

        /* Unreserve contexts for QP0 */
        hermon_rsrc_free(state, &state->hs_spec_qp0);

        /* Destroy the lock used for special QP rsrc management */
        mutex_destroy(&state->hs_spec_qplock);

}


/*
 * hermon_sw_reset()
 *    Context: Currently called only from attach() path context
 */
static int
hermon_sw_reset(hermon_state_t *state)
{
        ddi_acc_handle_t        hdl = hermon_get_pcihdl(state);
        ddi_acc_handle_t        cmdhdl = hermon_get_cmdhdl(state);
        uint32_t                reset_delay;
        int                     status, i;
        uint32_t                sem;
        uint_t                  offset;
        uint32_t                data32;         /* for devctl & linkctl */
        int                     loopcnt;

        /* initialize the FMA retry loop */
        hermon_pio_init(fm_loop_cnt, fm_status, fm_test);
        hermon_pio_init(fm_loop_cnt2, fm_status2, fm_test2);

        /*
         * If the configured software reset delay is set to zero, then we
         * will not attempt a software reset of the Hermon device.
         */
        reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
        if (reset_delay == 0) {
                return (DDI_SUCCESS);
        }

        /* the FMA retry loop starts. */
        hermon_pio_start(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
            fm_test);
        hermon_pio_start(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
            fm_test2);

        /* Query the PCI capabilities of the HCA device */
        /* but don't process the VPD until after reset */
        status = hermon_pci_capability_list(state, hdl);
        if (status != DDI_SUCCESS) {
                cmn_err(CE_NOTE, "failed to get pci capabilities list(0x%x)\n",
                    status);
                return (DDI_FAILURE);
        }

        /*
         * Read all PCI config info (reg0...reg63).  Note: According to the
         * Hermon software reset application note, we should not read or
         * restore the values in reg22 and reg23.
         * NOTE:  For Hermon (and Arbel too) it says to restore the command
         * register LAST, and technically, you need to restore the
         * PCIE Capability "device control" and "link control" (word-sized,
         * at offsets 0x08 and 0x10 from the capbility ID respectively).
         * We hold off restoring the command register - offset 0x4 - till last
         */

        /* 1st, wait for the semaphore assure accessibility - per PRM */
        status = -1;
        for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
                sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
                if (sem == 0) {
                        status = 0;
                        break;
                }
                drv_usecwait(1);
        }

        /* Check if timeout happens */
        if (status == -1) {
                /*
                 * Remove this acc handle from Hermon, then log
                 * the error.
                 */
                hermon_pci_config_teardown(state, &hdl);

                cmn_err(CE_WARN, "hermon_sw_reset timeout: "
                    "failed to get the semaphore(0x%p)\n",
                    (void *)state->hs_cmd_regs.sw_semaphore);

                hermon_fm_ereport(state, HCA_IBA_ERR, HCA_ERR_NON_FATAL);
                return (DDI_FAILURE);
        }

        for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
                if ((i != HERMON_SW_RESET_REG22_RSVD) &&
                    (i != HERMON_SW_RESET_REG23_RSVD)) {
                        state->hs_cfg_data[i]  = pci_config_get32(hdl, i << 2);
                }
        }

        /*
         * Perform the software reset (by writing 1 at offset 0xF0010)
         */
        ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);

        /*
         * This delay is required so as not to cause a panic here. If the
         * device is accessed too soon after reset it will not respond to
         * config cycles, causing a Master Abort and panic.
         */
        drv_usecwait(reset_delay);

        /*
         * Poll waiting for the device to finish resetting.
         */
        loopcnt = 100;  /* 100 times @ 100 usec - total delay 10 msec */
        while ((pci_config_get32(hdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
                drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
                if (--loopcnt == 0)
                        break;  /* just in case, break and go on */
        }
        if (loopcnt == 0)
                cmn_err(CE_CONT, "!Never see VEND_ID - read == %X",
                    pci_config_get32(hdl, 0));

        /*
         * Restore the config info
         */
        for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
                if (i == 1) continue;   /* skip the status/ctrl reg */
                if ((i != HERMON_SW_RESET_REG22_RSVD) &&
                    (i != HERMON_SW_RESET_REG23_RSVD)) {
                        pci_config_put32(hdl, i << 2, state->hs_cfg_data[i]);
                }
        }

        /*
         * PCI Express Capability - we saved during capability list, and
         * we'll restore them here.
         */
        offset = state->hs_pci_cap_offset;
        data32 = state->hs_pci_cap_devctl;
        pci_config_put32(hdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
        data32 = state->hs_pci_cap_lnkctl;
        pci_config_put32(hdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);

        pci_config_put32(hdl, 0x04, (state->hs_cfg_data[1] | 0x0006));

        /* the FMA retry loop ends. */
        hermon_pio_end(state, hdl, pio_error2, fm_loop_cnt2, fm_status2,
            fm_test2);
        hermon_pio_end(state, cmdhdl, pio_error, fm_loop_cnt, fm_status,
            fm_test);

        return (DDI_SUCCESS);

pio_error2:
        /* fall through */
pio_error:
        hermon_fm_ereport(state, HCA_SYS_ERR, HCA_ERR_NON_FATAL);
        return (DDI_FAILURE);
}


/*
 * hermon_mcg_init()
 *    Context: Only called from attach() path context
 */
static int
hermon_mcg_init(hermon_state_t *state)
{
        uint_t          mcg_tmp_sz;


        /*
         * Allocate space for the MCG temporary copy buffer.  This is
         * used by the Attach/Detach Multicast Group code
         */
        mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
        state->hs_mcgtmp = kmem_zalloc(mcg_tmp_sz, KM_SLEEP);

        /*
         * Initialize the multicast group mutex.  This ensures atomic
         * access to add, modify, and remove entries in the multicast
         * group hash lists.
         */
        mutex_init(&state->hs_mcglock, NULL, MUTEX_DRIVER,
            DDI_INTR_PRI(state->hs_intrmsi_pri));

        return (DDI_SUCCESS);
}


/*
 * hermon_mcg_fini()
 *    Context: Only called from attach() and/or detach() path contexts
 */
static void
hermon_mcg_fini(hermon_state_t *state)
{
        uint_t          mcg_tmp_sz;


        /* Free up the space used for the MCG temporary copy buffer */
        mcg_tmp_sz = HERMON_MCGMEM_SZ(state);
        kmem_free(state->hs_mcgtmp, mcg_tmp_sz);

        /* Destroy the multicast group mutex */
        mutex_destroy(&state->hs_mcglock);

}


/*
 * hermon_fw_version_check()
 *    Context: Only called from attach() path context
 */
static int
hermon_fw_version_check(hermon_state_t *state)
{

        uint_t  hermon_fw_ver_major;
        uint_t  hermon_fw_ver_minor;
        uint_t  hermon_fw_ver_subminor;

#ifdef FMA_TEST
        if (hermon_test_num == -1) {
                return (DDI_FAILURE);
        }
#endif

        /*
         * Depending on which version of driver we have attached, and which
         * HCA we've attached, the firmware version checks will be different.
         * We set up the comparison values for both Arbel and Sinai HCAs.
         */
        switch (state->hs_operational_mode) {
        case HERMON_HCA_MODE:
                hermon_fw_ver_major = HERMON_FW_VER_MAJOR;
                hermon_fw_ver_minor = HERMON_FW_VER_MINOR;
                hermon_fw_ver_subminor = HERMON_FW_VER_SUBMINOR;
                break;

        default:
                return (DDI_FAILURE);
        }

        /*
         * If FW revision major number is less than acceptable,
         * return failure, else if greater return success.  If
         * the major numbers are equal than check the minor number
         */
        if (state->hs_fw.fw_rev_major < hermon_fw_ver_major) {
                return (DDI_FAILURE);
        } else if (state->hs_fw.fw_rev_major > hermon_fw_ver_major) {
                return (DDI_SUCCESS);
        }

        /*
         * Do the same check as above, except for minor revision numbers
         * If the minor numbers are equal than check the subminor number
         */
        if (state->hs_fw.fw_rev_minor < hermon_fw_ver_minor) {
                return (DDI_FAILURE);
        } else if (state->hs_fw.fw_rev_minor > hermon_fw_ver_minor) {
                return (DDI_SUCCESS);
        }

        /*
         * Once again we do the same check as above, except for the subminor
         * revision number.  If the subminor numbers are equal here, then
         * these are the same firmware version, return success
         */
        if (state->hs_fw.fw_rev_subminor < hermon_fw_ver_subminor) {
                return (DDI_FAILURE);
        } else if (state->hs_fw.fw_rev_subminor > hermon_fw_ver_subminor) {
                return (DDI_SUCCESS);
        }

        return (DDI_SUCCESS);
}


/*
 * hermon_device_info_report()
 *    Context: Only called from attach() path context
 */
static void
hermon_device_info_report(hermon_state_t *state)
{

        cmn_err(CE_CONT, "?hermon%d: FW ver: %04d.%04d.%04d, "
            "HW rev: %02d\n", state->hs_instance, state->hs_fw.fw_rev_major,
            state->hs_fw.fw_rev_minor, state->hs_fw.fw_rev_subminor,
            state->hs_revision_id);
        cmn_err(CE_CONT, "?hermon%d: %64s (0x%016" PRIx64 ")\n",
            state->hs_instance, state->hs_nodedesc, state->hs_nodeguid);

}


/*
 * hermon_pci_capability_list()
 *    Context: Only called from attach() path context
 */
static int
hermon_pci_capability_list(hermon_state_t *state, ddi_acc_handle_t hdl)
{
        uint_t          offset, data;
        uint32_t        data32;

        state->hs_pci_cap_offset = 0;           /* make sure it's cleared */

        /*
         * Check for the "PCI Capabilities" bit in the "Status Register".
         * Bit 4 in this register indicates the presence of a "PCI
         * Capabilities" list.
         *
         * PCI-Express requires this bit to be set to 1.
         */
        data = pci_config_get16(hdl, 0x06);
        if ((data & 0x10) == 0) {
                return (DDI_FAILURE);
        }

        /*
         * Starting from offset 0x34 in PCI config space, find the
         * head of "PCI capabilities" list, and walk the list.  If
         * capabilities of a known type are encountered (e.g.
         * "PCI-X Capability"), then call the appropriate handler
         * function.
         */
        offset = pci_config_get8(hdl, 0x34);
        while (offset != 0x0) {
                data = pci_config_get8(hdl, offset);
                /*
                 * Check for known capability types.  Hermon has the
                 * following:
                 *    o Power Mgmt       (0x02)
                 *    o VPD Capability   (0x03)
                 *    o PCI-E Capability (0x10)
                 *    o MSIX Capability  (0x11)
                 */
                switch (data) {
                case 0x01:
                        /* power mgmt handling */
                        break;
                case 0x03:

/*
 * Reading the PCIe VPD is inconsistent - that is, sometimes causes
 * problems on (mostly) X64, though we've also seen problems w/ Sparc
 * and Tavor --- so, for now until it's root caused, don't try and
 * read it
 */
#ifdef HERMON_VPD_WORKS
                        hermon_pci_capability_vpd(state, hdl, offset);
#else
                        delay(100);
                        hermon_pci_capability_vpd(state, hdl, offset);
#endif
                        break;
                case 0x10:
                        /*
                         * PCI Express Capability - save offset & contents
                         * for later in reset
                         */
                        state->hs_pci_cap_offset = offset;
                        data32 = pci_config_get32(hdl,
                            offset + HERMON_PCI_CAP_DEV_OFFS);
                        state->hs_pci_cap_devctl = data32;
                        data32 = pci_config_get32(hdl,
                            offset + HERMON_PCI_CAP_LNK_OFFS);
                        state->hs_pci_cap_lnkctl = data32;
                        break;
                case 0x11:
                        /*
                         * MSIX support - nothing to do, taken care of in the
                         * MSI/MSIX interrupt frameworkd
                         */
                        break;
                default:
                        /* just go on to the next */
                        break;
                }

                /* Get offset of next entry in list */
                offset = pci_config_get8(hdl, offset + 1);
        }

        return (DDI_SUCCESS);
}

/*
 * hermon_pci_read_vpd()
 *    Context: Only called from attach() path context
 *    utility routine for hermon_pci_capability_vpd()
 */
static int
hermon_pci_read_vpd(ddi_acc_handle_t hdl, uint_t offset, uint32_t addr,
    uint32_t *data)
{
        int             retry = 40;  /* retry counter for EEPROM poll */
        uint32_t        val;
        int             vpd_addr = offset + 2;
        int             vpd_data = offset + 4;

        /*
         * In order to read a 32-bit value from VPD, we are to write down
         * the address (offset in the VPD itself) to the address register.
         * To signal the read, we also clear bit 31.  We then poll on bit 31
         * and when it is set, we can then read our 4 bytes from the data
         * register.
         */
        (void) pci_config_put32(hdl, offset, addr << 16);
        do {
                drv_usecwait(1000);
                val = pci_config_get16(hdl, vpd_addr);
                if (val & 0x8000) {             /* flag bit set */
                        *data = pci_config_get32(hdl, vpd_data);
                        return (DDI_SUCCESS);
                }
        } while (--retry);
        /* read of flag failed write one message but count the failures */
        if (debug_vpd == 0)
                cmn_err(CE_NOTE,
                    "!Failed to see flag bit after VPD addr write\n");
        debug_vpd++;

        return (DDI_FAILURE);
}



/*
 *   hermon_pci_capability_vpd()
 *    Context: Only called from attach() path context
 */
static void
hermon_pci_capability_vpd(hermon_state_t *state, ddi_acc_handle_t hdl,
    uint_t offset)
{
        uint8_t                 name_length;
        uint8_t                 pn_length;
        int                     i, err = 0;
        int                     vpd_str_id = 0;
        int                     vpd_ro_desc;
        int                     vpd_ro_pn_desc;
#ifdef _BIG_ENDIAN
        uint32_t                data32;
#endif /* _BIG_ENDIAN */
        union {
                uint32_t        vpd_int[HERMON_VPD_HDR_DWSIZE];
                uchar_t         vpd_char[HERMON_VPD_HDR_BSIZE];
        } vpd;


        /*
         * Read in the Vital Product Data (VPD) to the extend needed
         * by the fwflash utility
         */
        for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
                err = hermon_pci_read_vpd(hdl, offset, i << 2, &vpd.vpd_int[i]);
                if (err != DDI_SUCCESS) {
                        cmn_err(CE_NOTE, "!VPD read failed\n");
                        goto out;
                }
        }

#ifdef _BIG_ENDIAN
        /* Need to swap bytes for big endian. */
        for (i = 0; i < HERMON_VPD_HDR_DWSIZE; i++) {
                data32 = vpd.vpd_int[i];
                vpd.vpd_char[(i << 2) + 3] =
                    (uchar_t)((data32 & 0xFF000000) >> 24);
                vpd.vpd_char[(i << 2) + 2] =
                    (uchar_t)((data32 & 0x00FF0000) >> 16);
                vpd.vpd_char[(i << 2) + 1] =
                    (uchar_t)((data32 & 0x0000FF00) >> 8);
                vpd.vpd_char[i << 2] = (uchar_t)(data32 & 0x000000FF);
        }
#endif  /* _BIG_ENDIAN */

        /* Check for VPD String ID Tag */
        if (vpd.vpd_char[vpd_str_id] == 0x82) {
                /* get the product name */
                name_length = (uint8_t)vpd.vpd_char[vpd_str_id + 1];
                if (name_length > sizeof (state->hs_hca_name)) {
                        cmn_err(CE_NOTE, "!VPD name too large (0x%x)\n",
                            name_length);
                        goto out;
                }
                (void) memcpy(state->hs_hca_name, &vpd.vpd_char[vpd_str_id + 3],
                    name_length);
                state->hs_hca_name[name_length] = 0;

                /* get the part number */
                vpd_ro_desc = name_length + 3; /* read-only tag location */
                vpd_ro_pn_desc = vpd_ro_desc + 3; /* P/N keyword location */

                /* Verify read-only tag and Part Number keyword. */
                if (vpd.vpd_char[vpd_ro_desc] != 0x90 ||
                    (vpd.vpd_char[vpd_ro_pn_desc] != 'P' &&
                    vpd.vpd_char[vpd_ro_pn_desc + 1] != 'N')) {
                        cmn_err(CE_NOTE, "!VPD Part Number not found\n");
                        goto out;
                }

                pn_length = (uint8_t)vpd.vpd_char[vpd_ro_pn_desc + 2];
                if (pn_length > sizeof (state->hs_hca_pn)) {
                        cmn_err(CE_NOTE, "!VPD part number too large (0x%x)\n",
                            name_length);
                        goto out;
                }
                (void) memcpy(state->hs_hca_pn,
                    &vpd.vpd_char[vpd_ro_pn_desc + 3],
                    pn_length);
                state->hs_hca_pn[pn_length] = 0;
                state->hs_hca_pn_len = pn_length;
                cmn_err(CE_CONT, "!vpd %s\n", state->hs_hca_pn);
        } else {
                /* Wrong VPD String ID Tag */
                cmn_err(CE_NOTE, "!VPD String ID Tag not found, tag: %02x\n",
                    vpd.vpd_char[0]);
                goto out;
        }
        return;
out:
        state->hs_hca_pn_len = 0;
}



/*
 * hermon_intr_or_msi_init()
 *    Context: Only called from attach() path context
 */
static int
hermon_intr_or_msi_init(hermon_state_t *state)
{
        int     status;

        /* Query for the list of supported interrupt event types */
        status = ddi_intr_get_supported_types(state->hs_dip,
            &state->hs_intr_types_avail);
        if (status != DDI_SUCCESS) {
                return (DDI_FAILURE);
        }

        /*
         * If Hermon supports MSI-X in this system (and, if it
         * hasn't been overridden by a configuration variable), then
         * the default behavior is to use a single MSI-X.  Otherwise,
         * fallback to using legacy interrupts.  Also, if MSI-X is chosen,
         * but fails for whatever reasons, then next try MSI
         */
        if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
            (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {
                status = hermon_add_intrs(state, DDI_INTR_TYPE_MSIX);
                if (status == DDI_SUCCESS) {
                        state->hs_intr_type_chosen = DDI_INTR_TYPE_MSIX;
                        return (DDI_SUCCESS);
                }
        }

        /*
         * If Hermon supports MSI in this system (and, if it
         * hasn't been overridden by a configuration variable), then
         * the default behavior is to use a single MSIX.  Otherwise,
         * fallback to using legacy interrupts.  Also, if MSI is chosen,
         * but fails for whatever reasons, then fallback to using legacy
         * interrupts.
         */
        if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
            (state->hs_intr_types_avail & DDI_INTR_TYPE_MSI)) {
                status = hermon_add_intrs(state, DDI_INTR_TYPE_MSI);
                if (status == DDI_SUCCESS) {
                        state->hs_intr_type_chosen = DDI_INTR_TYPE_MSI;
                        return (DDI_SUCCESS);
                }
        }

        /*
         * MSI interrupt allocation failed, or was not available.  Fallback to
         * legacy interrupt support.
         */
        if (state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED) {
                status = hermon_add_intrs(state, DDI_INTR_TYPE_FIXED);
                if (status == DDI_SUCCESS) {
                        state->hs_intr_type_chosen = DDI_INTR_TYPE_FIXED;
                        return (DDI_SUCCESS);
                }
        }

        /*
         * None of MSI, MSI-X, nor legacy interrupts were successful.
         * Return failure.
         */
        return (DDI_FAILURE);
}

/* ARGSUSED */
static int
hermon_intr_cb_handler(dev_info_t *dip, ddi_cb_action_t action, void *cbarg,
    void *arg1, void *arg2)
{
        hermon_state_t *state = (hermon_state_t *)arg1;

        IBTF_DPRINTF_L2("hermon", "interrupt callback: instance %d, "
            "action %d, cbarg %d\n", state->hs_instance, action,
            (uint32_t)(uintptr_t)cbarg);
        return (DDI_SUCCESS);
}

/*
 * hermon_add_intrs()
 *    Context: Only called from attach() patch context
 */
static int
hermon_add_intrs(hermon_state_t *state, int intr_type)
{
        int     status;

        if (state->hs_intr_cb_hdl == NULL) {
                status = ddi_cb_register(state->hs_dip, DDI_CB_FLAG_INTR,
                    hermon_intr_cb_handler, state, NULL,
                    &state->hs_intr_cb_hdl);
                if (status != DDI_SUCCESS) {
                        cmn_err(CE_CONT, "ddi_cb_register failed: 0x%x\n",
                            status);
                        state->hs_intr_cb_hdl = NULL;
                        return (DDI_FAILURE);
                }
        }

        /* Get number of interrupts/MSI supported */
        status = ddi_intr_get_nintrs(state->hs_dip, intr_type,
            &state->hs_intrmsi_count);
        if (status != DDI_SUCCESS) {
                (void) ddi_cb_unregister(state->hs_intr_cb_hdl);
                state->hs_intr_cb_hdl = NULL;
                return (DDI_FAILURE);
        }

        /* Get number of available interrupts/MSI */
        status = ddi_intr_get_navail(state->hs_dip, intr_type,
            &state->hs_intrmsi_avail);
        if (status != DDI_SUCCESS) {
                (void) ddi_cb_unregister(state->hs_intr_cb_hdl);
                state->hs_intr_cb_hdl = NULL;
                return (DDI_FAILURE);
        }

        /* Ensure that we have at least one (1) usable MSI or interrupt */
        if ((state->hs_intrmsi_avail < 1) || (state->hs_intrmsi_count < 1)) {
                (void) ddi_cb_unregister(state->hs_intr_cb_hdl);
                state->hs_intr_cb_hdl = NULL;
                return (DDI_FAILURE);
        }

        /*
         * Allocate the #interrupt/MSI handles.
         * The number we request is the minimum of these three values:
         *      HERMON_MSIX_MAX                 driver maximum (array size)
         *      hermon_msix_max                 /etc/system override to...
         *                                              HERMON_MSIX_MAX
         *      state->hs_intrmsi_avail         Maximum the ddi provides.
         */
        status = ddi_intr_alloc(state->hs_dip, &state->hs_intrmsi_hdl[0],
            intr_type, 0, min(min(HERMON_MSIX_MAX, state->hs_intrmsi_avail),
            hermon_msix_max), &state->hs_intrmsi_allocd, DDI_INTR_ALLOC_NORMAL);
        if (status != DDI_SUCCESS) {
                (void) ddi_cb_unregister(state->hs_intr_cb_hdl);
                state->hs_intr_cb_hdl = NULL;
                return (DDI_FAILURE);
        }

        /* Ensure that we have allocated at least one (1) MSI or interrupt */
        if (state->hs_intrmsi_allocd < 1) {
                (void) ddi_cb_unregister(state->hs_intr_cb_hdl);
                state->hs_intr_cb_hdl = NULL;
                return (DDI_FAILURE);
        }

        /*
         * Extract the priority for the allocated interrupt/MSI.  This
         * will be used later when initializing certain mutexes.
         */
        status = ddi_intr_get_pri(state->hs_intrmsi_hdl[0],
            &state->hs_intrmsi_pri);
        if (status != DDI_SUCCESS) {
                /* Free the allocated interrupt/MSI handle */
                (void) ddi_intr_free(state->hs_intrmsi_hdl[0]);

                (void) ddi_cb_unregister(state->hs_intr_cb_hdl);
                state->hs_intr_cb_hdl = NULL;
                return (DDI_FAILURE);
        }

        /* Make sure the interrupt/MSI priority is below 'high level' */
        if (state->hs_intrmsi_pri >= ddi_intr_get_hilevel_pri()) {
                /* Free the allocated interrupt/MSI handle */
                (void) ddi_intr_free(state->hs_intrmsi_hdl[0]);

                return (DDI_FAILURE);
        }

        /* Get add'l capability information regarding interrupt/MSI */
        status = ddi_intr_get_cap(state->hs_intrmsi_hdl[0],
            &state->hs_intrmsi_cap);
        if (status != DDI_SUCCESS) {
                /* Free the allocated interrupt/MSI handle */
                (void) ddi_intr_free(state->hs_intrmsi_hdl[0]);

                return (DDI_FAILURE);
        }

        return (DDI_SUCCESS);
}


/*
 * hermon_intr_or_msi_fini()
 *    Context: Only called from attach() and/or detach() path contexts
 */
static int
hermon_intr_or_msi_fini(hermon_state_t *state)
{
        int     status;
        int     intr;

        for (intr = 0; intr < state->hs_intrmsi_allocd; intr++) {

                /* Free the allocated interrupt/MSI handle */
                status = ddi_intr_free(state->hs_intrmsi_hdl[intr]);
                if (status != DDI_SUCCESS) {
                        return (DDI_FAILURE);
                }
        }
        if (state->hs_intr_cb_hdl) {
                (void) ddi_cb_unregister(state->hs_intr_cb_hdl);
                state->hs_intr_cb_hdl = NULL;
        }
        return (DDI_SUCCESS);
}


/*ARGSUSED*/
void
hermon_pci_capability_msix(hermon_state_t *state, ddi_acc_handle_t hdl,
    uint_t offset)
{
        uint32_t        msix_data;
        uint16_t        msg_cntr;
        uint32_t        t_offset;       /* table offset */
        uint32_t        t_bir;
        uint32_t        p_offset;       /* pba */
        uint32_t        p_bir;
        int             t_size;         /* size in entries - each is 4 dwords */

        /* come in with offset pointing at the capability structure */

        msix_data = pci_config_get32(hdl, offset);
        cmn_err(CE_CONT, "Full cap structure dword = %X\n", msix_data);
        msg_cntr =  pci_config_get16(hdl, offset+2);
        cmn_err(CE_CONT, "MSIX msg_control = %X\n", msg_cntr);
        offset += 4;
        msix_data = pci_config_get32(hdl, offset);      /* table info */
        t_offset = (msix_data & 0xFFF8) >> 3;
        t_bir = msix_data & 0x07;
        offset += 4;
        cmn_err(CE_CONT, "  table %X --offset = %X, bir(bar) = %X\n",
            msix_data, t_offset, t_bir);
        msix_data = pci_config_get32(hdl, offset);      /* PBA info */
        p_offset = (msix_data & 0xFFF8) >> 3;
        p_bir = msix_data & 0x07;

        cmn_err(CE_CONT, "  PBA   %X --offset = %X, bir(bar) = %X\n",
            msix_data, p_offset, p_bir);
        t_size = msg_cntr & 0x7FF;              /* low eleven bits */
        cmn_err(CE_CONT, "    table size = %X entries\n", t_size);

        offset = t_offset;              /* reuse this for offset from BAR */
#ifdef HERMON_SUPPORTS_MSIX_BAR
        cmn_err(CE_CONT, "First 2 table entries behind BAR2 \n");
        for (i = 0; i < 2; i++) {
                for (j = 0; j < 4; j++, offset += 4) {
                        msix_data = ddi_get32(state->hs_reg_msihdl,
                            (uint32_t *)((uintptr_t)state->hs_reg_msi_baseaddr
                            + offset));
                        cmn_err(CE_CONT, "MSI table entry %d, dword %d == %X\n",
                            i, j, msix_data);
                }
        }
#endif

}

/*
 * X86 fastreboot support functions.
 * These functions are used to save/restore MSI-X table/PBA and also
 * to disable MSI-X interrupts in hermon_quiesce().
 */

/* Return the message control for MSI-X */
static ushort_t
get_msix_ctrl(dev_info_t *dip)
{
        ushort_t msix_ctrl = 0, caps_ctrl = 0;
        hermon_state_t *state = ddi_get_soft_state(hermon_statep,
            DEVI(dip)->devi_instance);
        ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
        ASSERT(pci_cfg_hdl != NULL);

        if ((PCI_CAP_LOCATE(pci_cfg_hdl,
            PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
                if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, 0, caps_ctrl,
                    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
                        return (0);
        }
        ASSERT(msix_ctrl != 0);

        return (msix_ctrl);
}

/* Return the MSI-X table size */
static size_t
get_msix_tbl_size(dev_info_t *dip)
{
        ushort_t msix_ctrl = get_msix_ctrl(dip);
        ASSERT(msix_ctrl != 0);

        return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
            PCI_MSIX_VECTOR_SIZE);
}

/* Return the MSI-X PBA size */
static size_t
get_msix_pba_size(dev_info_t *dip)
{
        ushort_t msix_ctrl = get_msix_ctrl(dip);
        ASSERT(msix_ctrl != 0);

        return (((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8);
}

/* Set up the MSI-X table/PBA save area */
static void
hermon_set_msix_info(hermon_state_t *state)
{
        uint_t                  rnumber, breg, nregs;
        ushort_t                caps_ctrl, msix_ctrl;
        pci_regspec_t           *rp;
        int                     reg_size, addr_space, offset, *regs_list, i;

        /*
         * MSI-X BIR Index Table:
         * BAR indicator register (BIR) to Base Address register.
         */
        uchar_t pci_msix_bir_index[8] = {0x10, 0x14, 0x18, 0x1c,
            0x20, 0x24, 0xff, 0xff};

        /* Fastreboot data access  attribute */
        ddi_device_acc_attr_t   dev_attr = {
                0,                              /* version */
                DDI_STRUCTURE_LE_ACC,
                DDI_STRICTORDER_ACC,            /* attr access */
                0
        };

        ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
        ASSERT(pci_cfg_hdl != NULL);

        if ((PCI_CAP_LOCATE(pci_cfg_hdl,
            PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
                if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, 0, caps_ctrl,
                    PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
                        return;
        }
        ASSERT(msix_ctrl != 0);

        state->hs_msix_tbl_offset = PCI_CAP_GET32(pci_cfg_hdl, 0, caps_ctrl,
            PCI_MSIX_TBL_OFFSET);

        /* Get the BIR for MSI-X table */
        breg = pci_msix_bir_index[state->hs_msix_tbl_offset &
            PCI_MSIX_TBL_BIR_MASK];
        ASSERT(breg != 0xFF);

        /* Set the MSI-X table offset */
        state->hs_msix_tbl_offset = state->hs_msix_tbl_offset &
            ~PCI_MSIX_TBL_BIR_MASK;

        /* Set the MSI-X table size */
        state->hs_msix_tbl_size = ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 1) *
            PCI_MSIX_VECTOR_SIZE;

        if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, state->hs_dip,
            DDI_PROP_DONTPASS, "reg", (int **)&regs_list, &nregs) !=
            DDI_PROP_SUCCESS) {
                return;
        }
        reg_size = sizeof (pci_regspec_t) / sizeof (int);

        /* Check the register number for MSI-X table */
        for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
                rp = (pci_regspec_t *)&regs_list[i * reg_size];
                addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
                offset = PCI_REG_REG_G(rp->pci_phys_hi);

                if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
                    (addr_space == PCI_ADDR_MEM64))) {
                        rnumber = i;
                        break;
                }
        }
        ASSERT(rnumber != 0);
        state->hs_msix_tbl_rnumber = rnumber;

        /* Set device attribute version and access according to Hermon FM */
        dev_attr.devacc_attr_version = hermon_devacc_attr_version(state);
        dev_attr.devacc_attr_access = hermon_devacc_attr_access(state);

        /* Map the entire MSI-X vector table */
        if (hermon_regs_map_setup(state, state->hs_msix_tbl_rnumber,
            (caddr_t *)&state->hs_msix_tbl_addr, state->hs_msix_tbl_offset,
            state->hs_msix_tbl_size, &dev_attr,
            &state->hs_fm_msix_tblhdl) != DDI_SUCCESS) {
                return;
        }

        state->hs_msix_pba_offset = PCI_CAP_GET32(pci_cfg_hdl, 0, caps_ctrl,
            PCI_MSIX_PBA_OFFSET);

        /* Get the BIR for MSI-X PBA */
        breg = pci_msix_bir_index[state->hs_msix_pba_offset &
            PCI_MSIX_PBA_BIR_MASK];
        ASSERT(breg != 0xFF);

        /* Set the MSI-X PBA offset */
        state->hs_msix_pba_offset = state->hs_msix_pba_offset &
            ~PCI_MSIX_PBA_BIR_MASK;

        /* Set the MSI-X PBA size */
        state->hs_msix_pba_size =
            ((msix_ctrl & PCI_MSIX_TBL_SIZE_MASK) + 64) / 64 * 8;

        /* Check the register number for MSI-X PBA */
        for (i = 1, rnumber = 0; i < nregs/reg_size; i++) {
                rp = (pci_regspec_t *)&regs_list[i * reg_size];
                addr_space = rp->pci_phys_hi & PCI_ADDR_MASK;
                offset = PCI_REG_REG_G(rp->pci_phys_hi);

                if ((offset == breg) && ((addr_space == PCI_ADDR_MEM32) ||
                    (addr_space == PCI_ADDR_MEM64))) {
                        rnumber = i;
                        break;
                }
        }
        ASSERT(rnumber != 0);
        state->hs_msix_pba_rnumber = rnumber;
        ddi_prop_free(regs_list);

        /* Map in the MSI-X Pending Bit Array */
        if (hermon_regs_map_setup(state, state->hs_msix_pba_rnumber,
            (caddr_t *)&state->hs_msix_pba_addr, state->hs_msix_pba_offset,
            state->hs_msix_pba_size, &dev_attr,
            &state->hs_fm_msix_pbahdl) != DDI_SUCCESS) {
                hermon_regs_map_free(state, &state->hs_fm_msix_tblhdl);
                state->hs_fm_msix_tblhdl = NULL;
                return;
        }

        /* Set the MSI-X table save area */
        state->hs_msix_tbl_entries = kmem_alloc(state->hs_msix_tbl_size,
            KM_SLEEP);

        /* Set the MSI-X PBA save area */
        state->hs_msix_pba_entries = kmem_alloc(state->hs_msix_pba_size,
            KM_SLEEP);
}

/* Disable Hermon interrupts */
static int
hermon_intr_disable(hermon_state_t *state)
{
        ushort_t msix_ctrl = 0, caps_ctrl = 0;
        ddi_acc_handle_t pci_cfg_hdl = hermon_get_pcihdl(state);
        ddi_acc_handle_t msix_tblhdl = hermon_get_msix_tblhdl(state);
        int i, j;
        ASSERT(pci_cfg_hdl != NULL && msix_tblhdl != NULL);
        ASSERT(state->hs_intr_types_avail &
            (DDI_INTR_TYPE_FIXED | DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX));

        /*
         * Check if MSI-X interrupts are used. If so, disable MSI-X interupts.
         * If not, since Hermon doesn't support MSI interrupts, assuming the
         * legacy interrupt is used instead, disable the legacy interrupt.
         */
        if ((state->hs_cfg_profile->cp_use_msi_if_avail != 0) &&
            (state->hs_intr_types_avail & DDI_INTR_TYPE_MSIX)) {

                if ((PCI_CAP_LOCATE(pci_cfg_hdl,
                    PCI_CAP_ID_MSI_X, &caps_ctrl) == DDI_SUCCESS)) {
                        if ((msix_ctrl = PCI_CAP_GET16(pci_cfg_hdl, 0,
                            caps_ctrl, PCI_MSIX_CTRL)) == PCI_CAP_EINVAL16)
                                return (DDI_FAILURE);
                }
                ASSERT(msix_ctrl != 0);

                if (!(msix_ctrl & PCI_MSIX_ENABLE_BIT))
                        return (DDI_SUCCESS);

                /* Clear all inums in MSI-X table */
                for (i = 0; i < get_msix_tbl_size(state->hs_dip);
                    i += PCI_MSIX_VECTOR_SIZE) {
                        for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
                                char *addr = state->hs_msix_tbl_addr + i + j;
                                ddi_put32(msix_tblhdl,
                                    (uint32_t *)(uintptr_t)addr, 0x0);
                        }
                }

                /* Disable MSI-X interrupts */
                msix_ctrl &= ~PCI_MSIX_ENABLE_BIT;
                PCI_CAP_PUT16(pci_cfg_hdl, 0, caps_ctrl, PCI_MSIX_CTRL,
                    msix_ctrl);

        } else {
                uint16_t cmdreg = pci_config_get16(pci_cfg_hdl, PCI_CONF_COMM);
                ASSERT(state->hs_intr_types_avail & DDI_INTR_TYPE_FIXED);

                /* Disable the legacy interrupts */
                cmdreg |= PCI_COMM_INTX_DISABLE;
                pci_config_put16(pci_cfg_hdl, PCI_CONF_COMM, cmdreg);
        }

        return (DDI_SUCCESS);
}

/* Hermon quiesce(9E) entry */
static int
hermon_quiesce(dev_info_t *dip)
{
        hermon_state_t *state = ddi_get_soft_state(hermon_statep,
            DEVI(dip)->devi_instance);
        ddi_acc_handle_t pcihdl = hermon_get_pcihdl(state);
        ddi_acc_handle_t cmdhdl = hermon_get_cmdhdl(state);
        ddi_acc_handle_t msix_tbl_hdl = hermon_get_msix_tblhdl(state);
        ddi_acc_handle_t msix_pba_hdl = hermon_get_msix_pbahdl(state);
        uint32_t sem, reset_delay = state->hs_cfg_profile->cp_sw_reset_delay;
        uint64_t data64;
        uint32_t data32;
        int status, i, j, loopcnt;
        uint_t offset;

        ASSERT(state != NULL);

        /* start fastreboot */
        state->hs_quiescing = B_TRUE;

        /* If it's in maintenance mode, do nothing but return with SUCCESS */
        if (!HERMON_IS_OPERATIONAL(state->hs_operational_mode)) {
                return (DDI_SUCCESS);
        }

        /* suppress Hermon FM ereports */
        if (hermon_get_state(state) & HCA_EREPORT_FM) {
                hermon_clr_state_nolock(state, HCA_EREPORT_FM);
        }

        /* Shutdown HCA ports */
        if (hermon_hca_ports_shutdown(state,
            state->hs_cfg_profile->cp_num_ports) != HERMON_CMD_SUCCESS) {
                state->hs_quiescing = B_FALSE;
                return (DDI_FAILURE);
        }

        /* Close HCA */
        if (hermon_close_hca_cmd_post(state, HERMON_CMD_NOSLEEP_SPIN) !=
            HERMON_CMD_SUCCESS) {
                state->hs_quiescing = B_FALSE;
                return (DDI_FAILURE);
        }

        /* Disable interrupts */
        if (hermon_intr_disable(state) != DDI_SUCCESS) {
                state->hs_quiescing = B_FALSE;
                return (DDI_FAILURE);
        }

        /*
         * Query the PCI capabilities of the HCA device, but don't process
         * the VPD until after reset.
         */
        if (hermon_pci_capability_list(state, pcihdl) != DDI_SUCCESS) {
                state->hs_quiescing = B_FALSE;
                return (DDI_FAILURE);
        }

        /*
         * Read all PCI config info (reg0...reg63).  Note: According to the
         * Hermon software reset application note, we should not read or
         * restore the values in reg22 and reg23.
         * NOTE:  For Hermon (and Arbel too) it says to restore the command
         * register LAST, and technically, you need to restore the
         * PCIE Capability "device control" and "link control" (word-sized,
         * at offsets 0x08 and 0x10 from the capbility ID respectively).
         * We hold off restoring the command register - offset 0x4 - till last
         */

        /* 1st, wait for the semaphore assure accessibility - per PRM */
        status = -1;
        for (i = 0; i < NANOSEC/MICROSEC /* 1sec timeout */; i++) {
                sem = ddi_get32(cmdhdl, state->hs_cmd_regs.sw_semaphore);
                if (sem == 0) {
                        status = 0;
                        break;
                }
                drv_usecwait(1);
        }

        /* Check if timeout happens */
        if (status == -1) {
                state->hs_quiescing = B_FALSE;
                return (DDI_FAILURE);
        }

        /* MSI-X interrupts are used, save the MSI-X table */
        if (msix_tbl_hdl && msix_pba_hdl) {
                /* save MSI-X table */
                for (i = 0; i < get_msix_tbl_size(state->hs_dip);
                    i += PCI_MSIX_VECTOR_SIZE) {
                        for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
                                char *addr = state->hs_msix_tbl_addr + i + j;
                                data32 = ddi_get32(msix_tbl_hdl,
                                    (uint32_t *)(uintptr_t)addr);
                                *(uint32_t *)(uintptr_t)(state->
                                    hs_msix_tbl_entries + i + j) = data32;
                        }
                }
                /* save MSI-X PBA */
                for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
                        char *addr = state->hs_msix_pba_addr + i;
                        data64 = ddi_get64(msix_pba_hdl,
                            (uint64_t *)(uintptr_t)addr);
                        *(uint64_t *)(uintptr_t)(state->
                            hs_msix_pba_entries + i) = data64;
                }
        }

        /* save PCI config space */
        for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
                if ((i != HERMON_SW_RESET_REG22_RSVD) &&
                    (i != HERMON_SW_RESET_REG23_RSVD)) {
                        state->hs_cfg_data[i]  =
                            pci_config_get32(pcihdl, i << 2);
                }
        }

        /* SW-reset HCA */
        ddi_put32(cmdhdl, state->hs_cmd_regs.sw_reset, HERMON_SW_RESET_START);

        /*
         * This delay is required so as not to cause a panic here. If the
         * device is accessed too soon after reset it will not respond to
         * config cycles, causing a Master Abort and panic.
         */
        drv_usecwait(reset_delay);

        /* Poll waiting for the device to finish resetting */
        loopcnt = 100;  /* 100 times @ 100 usec - total delay 10 msec */
        while ((pci_config_get32(pcihdl, 0) & 0x0000FFFF) != PCI_VENID_MLX) {
                drv_usecwait(HERMON_SW_RESET_POLL_DELAY);
                if (--loopcnt == 0)
                        break;  /* just in case, break and go on */
        }
        if (loopcnt == 0) {
                state->hs_quiescing = B_FALSE;
                return (DDI_FAILURE);
        }

        /* Restore the config info */
        for (i = 0; i < HERMON_SW_RESET_NUMREGS; i++) {
                if (i == 1) continue;   /* skip the status/ctrl reg */
                if ((i != HERMON_SW_RESET_REG22_RSVD) &&
                    (i != HERMON_SW_RESET_REG23_RSVD)) {
                        pci_config_put32(pcihdl, i << 2, state->hs_cfg_data[i]);
                }
        }

        /* If MSI-X interrupts are used, restore the MSI-X table */
        if (msix_tbl_hdl && msix_pba_hdl) {
                /* restore MSI-X PBA */
                for (i = 0; i < get_msix_pba_size(state->hs_dip); i += 8) {
                        char *addr = state->hs_msix_pba_addr + i;
                        data64 = *(uint64_t *)(uintptr_t)
                            (state->hs_msix_pba_entries + i);
                        ddi_put64(msix_pba_hdl,
                            (uint64_t *)(uintptr_t)addr, data64);
                }
                /* restore MSI-X table */
                for (i = 0; i < get_msix_tbl_size(state->hs_dip);
                    i += PCI_MSIX_VECTOR_SIZE) {
                        for (j = 0; j < PCI_MSIX_VECTOR_SIZE; j += 4) {
                                char *addr = state->hs_msix_tbl_addr + i + j;
                                data32 = *(uint32_t *)(uintptr_t)
                                    (state->hs_msix_tbl_entries + i + j);
                                ddi_put32(msix_tbl_hdl,
                                    (uint32_t *)(uintptr_t)addr, data32);
                        }
                }
        }

        /*
         * PCI Express Capability - we saved during capability list, and
         * we'll restore them here.
         */
        offset = state->hs_pci_cap_offset;
        data32 = state->hs_pci_cap_devctl;
        pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_DEV_OFFS, data32);
        data32 = state->hs_pci_cap_lnkctl;
        pci_config_put32(pcihdl, offset + HERMON_PCI_CAP_LNK_OFFS, data32);

        /* restore the command register */
        pci_config_put32(pcihdl, 0x04, (state->hs_cfg_data[1] | 0x0006));

        return (DDI_SUCCESS);
}