root/usr/src/uts/common/io/ib/clients/iser/iser_ib.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 */

#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/sunddi.h>
#include <sys/sysmacros.h>
#include <sys/iscsi_protocol.h>

#include <sys/ib/clients/iser/iser.h>
#include <sys/ib/clients/iser/iser_idm.h>

/*
 * iser_ib.c
 * Routines for InfiniBand transport for iSER
 *
 * This file contains the routines to interface with the IBT API to attach and
 * allocate IB resources, handle async events, and post recv work requests.
 *
 */

static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid);
static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid);

static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid);
static int iser_ib_free_hca(iser_hca_t *hca);
static int iser_ib_update_hcaports(iser_hca_t *hca);
static int iser_ib_init_hcas(void);
static int iser_ib_fini_hcas(void);

static iser_sbind_t *iser_ib_get_bind(
    iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid);
static int iser_ib_activate_port(
    idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid);
static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid);

static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size);
static void iser_ib_fini_qp(iser_qp_t *qp);

static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size,
    ibt_cq_hdl_t *cq_hdl);

static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
    ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
    ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs);

static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl,
    ibt_async_event_t *event);
static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl,
    ibt_async_event_t *event);
static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl,
    ibt_async_event_t *event);

static void iser_ib_post_recv_task(void *arg);

static struct ibt_clnt_modinfo_s iser_ib_modinfo = {
        IBTI_V_CURR,
        IBT_STORAGE_DEV,
        iser_ib_async_handler,
        NULL,
        "iSER"
};

/*
 * iser_ib_init
 *
 * This function registers the HCA drivers with IBTF and registers and binds
 * iSER as a service with IBTF.
 */
int
iser_ib_init(void)
{
        int             status;

        /* Register with IBTF */
        status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state,
            &iser_state->is_ibhdl);
        if (status != DDI_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)",
                    status);
                return (DDI_FAILURE);
        }

        /* Create the global work request kmem_cache */
        iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache",
            sizeof (iser_wr_t), 0, NULL, NULL, NULL,
            iser_state, NULL, KM_SLEEP);

        /* Populate our list of HCAs */
        status = iser_ib_init_hcas();
        if (status != DDI_SUCCESS) {
                /* HCAs failed to initialize, tear it down */
                kmem_cache_destroy(iser_state->iser_wr_cache);
                (void) ibt_detach(iser_state->is_ibhdl);
                iser_state->is_ibhdl = NULL;
                ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs");
                return (DDI_FAILURE);
        }

        /* Target will register iSER as a service with IBTF when required */

        /* Target will bind this service when it comes online */

        return (DDI_SUCCESS);
}

/*
 * iser_ib_fini
 *
 * This function unbinds and degisters the iSER service from IBTF
 */
int
iser_ib_fini(void)
{
        /* IDM would have already disabled all the services */

        /* Teardown the HCA list and associated resources */
        if (iser_ib_fini_hcas() != DDI_SUCCESS)
                return (DDI_FAILURE);

        /* Teardown the global work request kmem_cache */
        kmem_cache_destroy(iser_state->iser_wr_cache);

        /* Deregister with IBTF */
        if (iser_state->is_ibhdl != NULL) {
                (void) ibt_detach(iser_state->is_ibhdl);
                iser_state->is_ibhdl = NULL;
        }

        return (DDI_SUCCESS);
}

/*
 * iser_ib_register_service
 *
 * This function registers the iSER service using the RDMA-Aware Service ID.
 */
int
iser_ib_register_service(idm_svc_t *idm_svc)
{
        ibt_srv_desc_t  srvdesc;
        iser_svc_t      *iser_svc;
        int             status;

        bzero(&srvdesc, sizeof (ibt_srv_desc_t));

        /* Set up IBTI client callback handler from the CM */
        srvdesc.sd_handler = iser_ib_cm_handler;

        srvdesc.sd_flags = IBT_SRV_NO_FLAGS;

        iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;

        /* Register the service on the specified port */
        status = ibt_register_service(
            iser_state->is_ibhdl, &srvdesc,
            iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL);

        return (status);
}

/*
 * iser_ib_bind_service
 *
 * This function binds a given iSER service on all available HCA ports. The
 * current specification does not allow user to specify transport bindings
 * for each iscsi target. The ULP invokes this function to bind the target
 * to all available iser ports after checking for the presence of an IB HCA.
 * iSER is "configured" whenever an IB-capable IP address exists. The lack
 * of active IB ports is a less-fatal condition, and sockets would be used
 * as the transport even though an Infiniband HCA is configured but unusable.
 *
 */
int
iser_ib_bind_service(idm_svc_t *idm_svc)
{
        iser_hca_t      *hca;
        ib_gid_t        gid;
        int             num_ports = 0;
        int             num_binds = 0;
        int             num_inactive_binds = 0; /* if HCA ports inactive */
        int             status;
        int             i;

        ASSERT(idm_svc != NULL);
        ASSERT(idm_svc->is_iser_svc != NULL);

        /* Register the iSER service on all available ports */
        mutex_enter(&iser_state->is_hcalist_lock);

        for (hca = list_head(&iser_state->is_hcalist);
            hca != NULL;
            hca = list_next(&iser_state->is_hcalist, hca)) {

                for (i = 0; i < hca->hca_num_ports; i++) {
                        num_ports++;
                        if (hca->hca_port_info[i].p_linkstate !=
                            IBT_PORT_ACTIVE) {
                                /*
                                 * Move on. We will attempt to bind service
                                 * in our async handler if the port comes up
                                 * at a later time.
                                 */
                                num_inactive_binds++;
                                continue;
                        }

                        gid = hca->hca_port_info[i].p_sgid_tbl[0];

                        /* If the port is already bound, skip */
                        if (iser_ib_get_bind(
                            idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) {

                                status = iser_ib_activate_port(
                                    idm_svc, hca->hca_guid, gid);
                                if (status != IBT_SUCCESS) {
                                        ISER_LOG(CE_NOTE,
                                            "iser_ib_bind_service: "
                                            "iser_ib_activate_port failure "
                                            "(0x%x)", status);
                                        continue;
                                }
                        }
                        num_binds++;
                }
        }
        mutex_exit(&iser_state->is_hcalist_lock);

        if (num_binds) {
                ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on "
                    "(%d) of (%d) ports", num_binds, num_ports);
                return (ISER_STATUS_SUCCESS);
        } else if (num_inactive_binds) {
                ISER_LOG(CE_NOTE, "iser_ib_bind_service: Could not bind "
                    "service, HCA ports are not active.");
                /*
                 * still considered success, the async handler will bind
                 * the service when the port comes up at a later time
                 */
                return (ISER_STATUS_SUCCESS);
        } else {
                ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service");
                return (ISER_STATUS_FAIL);
        }
}

/*
 * iser_ib_unbind_service
 *
 * This function unbinds a given service on a all HCA ports
 */
void
iser_ib_unbind_service(idm_svc_t *idm_svc)
{
        iser_svc_t      *iser_svc;
        iser_sbind_t    *is_sbind, *next_sb;

        if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {

                iser_svc = idm_svc->is_iser_svc;

                for (is_sbind = list_head(&iser_svc->is_sbindlist);
                    is_sbind != NULL;
                    is_sbind = next_sb) {
                        next_sb = list_next(&iser_svc->is_sbindlist, is_sbind);
                        (void) ibt_unbind_service(iser_svc->is_srvhdl,
                            is_sbind->is_sbindhdl);
                        list_remove(&iser_svc->is_sbindlist, is_sbind);
                        kmem_free(is_sbind, sizeof (iser_sbind_t));
                }
        }
}

/* ARGSUSED */
void
iser_ib_deregister_service(idm_svc_t *idm_svc)
{
        iser_svc_t      *iser_svc;

        if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) {

                iser_svc = (iser_svc_t *)idm_svc->is_iser_svc;
                (void) ibt_deregister_service(iser_state->is_ibhdl,
                    iser_svc->is_srvhdl);
                (void) ibt_release_ip_sid(iser_svc->is_svcid);
        }
}

/*
 * iser_ib_get_paths
 * This function finds the IB path between the local and the remote address.
 *
 */
int
iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip,
    ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip)
{
        ibt_ip_path_attr_t      ipattr;
        int                     status;

        (void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t));
        ipattr.ipa_dst_ip       = remote_ip;
        ipattr.ipa_src_ip       = *local_ip;
        ipattr.ipa_max_paths    = 1;
        ipattr.ipa_ndst         = 1;

        (void) bzero(path, sizeof (ibt_path_info_t));
        status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS,
            &ipattr, path, NULL, path_src_ip);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths "
                    "failure: status (%d)", status);
                return (status);
        }

        if (local_ip != NULL) {
                ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]",
                    local_ip->un.ip4addr, remote_ip->un.ip4addr);
        } else {
                ISER_LOG(CE_NOTE, "iser_ib_get_paths success: "
                    "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr);
        }

        return (ISER_STATUS_SUCCESS);
}

/*
 * iser_ib_alloc_channel_nopathlookup
 *
 * This function allocates a reliable connected channel. This function does
 * not invoke ibt_get_ip_paths() to do the path lookup. The HCA GUID and
 * port are input to this function.
 */
iser_chan_t *
iser_ib_alloc_channel_nopathlookup(ib_guid_t hca_guid, uint8_t hca_port)
{
        iser_hca_t      *hca;
        iser_chan_t     *chan;

        /* Lookup the hca using the gid in the path info */
        hca = iser_ib_guid2hca(hca_guid);
        if (hca == NULL) {
                ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed "
                    "to lookup HCA(%llx) handle", (longlong_t)hca_guid);
                return (NULL);
        }

        chan = iser_ib_alloc_rc_channel(hca, hca_port);
        if (chan == NULL) {
                ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed "
                    "to alloc channel on HCA(%llx) %d",
                    (longlong_t)hca_guid, hca_port);
                return (NULL);
        }

        ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: "
            "chanhdl (0x%p), HCA(%llx) %d",
            (void *)chan->ic_chanhdl, (longlong_t)hca_guid, hca_port);

        return (chan);
}

/*
 * iser_ib_alloc_channel_pathlookup
 *
 * This function allocates a reliable connected channel but first invokes
 * ibt_get_ip_paths() with the given local and remote addres to get the
 * HCA lgid and the port number.
 */
iser_chan_t *
iser_ib_alloc_channel_pathlookup(
    ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip)
{
        ibt_path_info_t         ibt_path;
        ibt_path_ip_src_t       path_src_ip;
        ib_gid_t                lgid;
        uint8_t                 hca_port; /* from path */
        iser_hca_t              *hca;
        iser_chan_t             *chan;
        int                     status;

        /* Lookup a path to the given destination */
        status = iser_ib_get_paths(
            local_ip, remote_ip, &ibt_path, &path_src_ip);

        if (status != ISER_STATUS_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: faild "
                    "Path lookup IP:[%llx to %llx] failed: status (%d)",
                    (longlong_t)local_ip->un.ip4addr,
                    (longlong_t)remote_ip->un.ip4addr,
                    status);
                return (NULL);
        }

        /* get the local gid from the path info */
        lgid = ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid;

        /* get the hca port from the path info */
        hca_port = ibt_path.pi_prim_cep_path.cep_hca_port_num;

        /* Lookup the hca using the gid in the path info */
        hca = iser_ib_gid2hca(lgid);
        if (hca == NULL) {
                ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed "
                    "to lookup HCA (%llx) handle",
                    (longlong_t)hca->hca_guid);
                return (NULL);
        }

        chan = iser_ib_alloc_rc_channel(hca, hca_port);
        if (chan == NULL) {
                ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed "
                    "to alloc channel from IP:[%llx to %llx] on HCA (%llx) %d",
                    (longlong_t)local_ip->un.ip4addr,
                    (longlong_t)remote_ip->un.ip4addr,
                    (longlong_t)hca->hca_guid, hca_port);
                return (NULL);
        }

        ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: "
            "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d",
            (void *)chan->ic_chanhdl,
            (longlong_t)local_ip->un.ip4addr,
            (longlong_t)remote_ip->un.ip4addr,
            (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid,
            (longlong_t)hca->hca_guid, hca_port);

        chan->ic_ibt_path       = ibt_path;
        chan->ic_localip        = path_src_ip.ip_primary;
        chan->ic_remoteip       = *remote_ip;

        return (chan);
}

/*
 * iser_ib_alloc_rc_channel
 *
 * This function allocates a reliable communication channel using the specified
 * channel attributes.
 */
iser_chan_t *
iser_ib_alloc_rc_channel(iser_hca_t *hca, uint8_t hca_port)
{

        iser_chan_t                     *chan;
        ibt_rc_chan_alloc_args_t        chanargs;
        uint_t                          sq_size, rq_size;
        int                             status;

        chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP);

        mutex_init(&chan->ic_chan_lock, NULL, MUTEX_DRIVER, NULL);
        mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL);

        /* Set up the iSER channel handle with HCA */
        chan->ic_hca            = hca;

        /*
         * Determine the queue sizes, based upon the HCA query data.
         * For our Work Queues, we will use either our default value,
         * or the HCA's maximum value, whichever is smaller.
         */
        sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE);
        rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE);

        /*
         * For our Completion Queues, we again check the device maximum.
         * We want to end up with CQs that are the next size up from the
         * WQs they are servicing so that they have some overhead.
         */
        if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) {
                chan->ic_sendcq_sz = sq_size + 1;
        } else {
                chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz;
                sq_size = chan->ic_sendcq_sz - 1;
        }

        if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) {
                chan->ic_recvcq_sz = rq_size + 1;
        } else {
                chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz;
                rq_size = chan->ic_recvcq_sz - 1;
        }

        /* Initialize the iSER channel's QP handle */
        iser_ib_init_qp(chan, sq_size, rq_size);

        /* Set up the Send Completion Queue */
        status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz,
            &chan->ic_sendcq);
        if (status != ISER_STATUS_SUCCESS) {
                iser_ib_fini_qp(&chan->ic_qp);
                mutex_destroy(&chan->ic_chan_lock);
                mutex_destroy(&chan->ic_sq_post_lock);
                kmem_free(chan, sizeof (iser_chan_t));
                return (NULL);
        }
        ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan);
        (void) ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION);

        /* Set up the Receive Completion Queue */
        status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz,
            &chan->ic_recvcq);
        if (status != ISER_STATUS_SUCCESS) {
                (void) ibt_free_cq(chan->ic_sendcq);
                iser_ib_fini_qp(&chan->ic_qp);
                mutex_destroy(&chan->ic_chan_lock);
                mutex_destroy(&chan->ic_sq_post_lock);
                kmem_free(chan, sizeof (iser_chan_t));
                return (NULL);
        }
        ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan);
        (void) ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION);

        /* Setup the channel arguments */
        iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq,
            sq_size, rq_size, hca->hca_pdhdl, &chanargs);

        status = ibt_alloc_rc_channel(hca->hca_hdl,
            IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed "
                    "ibt_alloc_rc_channel: status (%d)", status);
                (void) ibt_free_cq(chan->ic_sendcq);
                (void) ibt_free_cq(chan->ic_recvcq);
                iser_ib_fini_qp(&chan->ic_qp);
                mutex_destroy(&chan->ic_chan_lock);
                mutex_destroy(&chan->ic_sq_post_lock);
                kmem_free(chan, sizeof (iser_chan_t));
                return (NULL);
        }

        /* Set the 'channel' as the client private data */
        (void) ibt_set_chan_private(chan->ic_chanhdl, chan);

        return (chan);
}

/*
 * iser_ib_open_rc_channel
 * This function opens a RC connection on the given allocated RC channel
 */
int
iser_ib_open_rc_channel(iser_chan_t *chan)
{
        ibt_ip_cm_info_t        ipcm_info;
        iser_private_data_t     iser_priv_data;
        ibt_chan_open_args_t    ocargs;
        ibt_rc_returns_t        ocreturns;
        int                     status;

        mutex_enter(&chan->ic_chan_lock);

        /*
         * For connection establishment, the initiator sends a CM REQ using the
         * iSER RDMA-Aware Service ID. Included are the source and destination
         * IP addresses, and the src port.
         */
        bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t));
        ipcm_info.src_addr = chan->ic_localip;
        ipcm_info.dst_addr = chan->ic_remoteip;
        ipcm_info.src_port = chan->ic_lport;

        /*
         * The CM Private Data field defines the iSER connection parameters
         * such as zero based virtual address exception (ZBVAE) and Send with
         * invalidate Exception (SIE).
         *
         * Solaris IBT does not currently support ZBVAE or SIE.
         */
        iser_priv_data.rsvd1    = 0;
        iser_priv_data.sie      = 1;
        iser_priv_data.zbvae    = 1;

        status = ibt_format_ip_private_data(&ipcm_info,
            sizeof (iser_private_data_t), &iser_priv_data);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
                mutex_exit(&chan->ic_chan_lock);
                return (status);
        }

        /*
         * Set the SID we are attempting to connect to, based upon the
         * remote port number.
         */
        chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport);

        /* Set up the args for the channel open */
        bzero(&ocargs, sizeof (ibt_chan_open_args_t));
        ocargs.oc_path                  = &chan->ic_ibt_path;
        ocargs.oc_cm_handler            = iser_ib_cm_handler;
        ocargs.oc_cm_clnt_private       = iser_state;
        ocargs.oc_rdma_ra_out           = 4;
        ocargs.oc_rdma_ra_in            = 4;
        ocargs.oc_path_retry_cnt        = 2;
        ocargs.oc_path_rnr_retry_cnt    = 2;
        ocargs.oc_priv_data_len         = sizeof (iser_private_data_t);
        ocargs.oc_priv_data             = &iser_priv_data;

        bzero(&ocreturns, sizeof (ibt_rc_returns_t));

        status = ibt_open_rc_channel(chan->ic_chanhdl,
            IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns);

        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status);
                mutex_exit(&chan->ic_chan_lock);
                return (status);
        }

        mutex_exit(&chan->ic_chan_lock);
        return (IDM_STATUS_SUCCESS);
}

/*
 * iser_ib_close_rc_channel
 * This function closes the RC channel related to this iser_chan handle.
 * We invoke this in a non-blocking, no callbacks context.
 */
void
iser_ib_close_rc_channel(iser_chan_t *chan)
{
        int                     status;

        mutex_enter(&chan->ic_chan_lock);
        status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL,
            0, NULL, NULL, 0);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: "
                    "ibt_close_rc_channel failed: status (%d)", status);
        }
        mutex_exit(&chan->ic_chan_lock);
}

/*
 * iser_ib_free_rc_channel
 *
 * This function tears down an RC channel's QP initialization and frees it.
 * Note that we do not need synchronization here; the channel has been
 * closed already, so we should only have completion polling occuring.  Once
 * complete, we are free to free the IBTF channel, WQ and CQ resources, and
 * our own related resources.
 */
void
iser_ib_free_rc_channel(iser_chan_t *chan)
{
        iser_qp_t       *iser_qp;

        iser_qp = &chan->ic_qp;

        /* Ensure the SQ is empty */
        while (chan->ic_sq_post_count != 0) {
                mutex_exit(&chan->ic_conn->ic_lock);
                delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
                mutex_enter(&chan->ic_conn->ic_lock);
        }
        mutex_destroy(&chan->ic_sq_post_lock);

        /* Ensure the RQ is empty */
        (void) ibt_flush_channel(chan->ic_chanhdl);
        mutex_enter(&iser_qp->qp_lock);
        while (iser_qp->rq_level != 0) {
                mutex_exit(&iser_qp->qp_lock);
                mutex_exit(&chan->ic_conn->ic_lock);
                delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
                mutex_enter(&chan->ic_conn->ic_lock);
                mutex_enter(&iser_qp->qp_lock);
        }

        /* Free our QP handle */
        mutex_exit(&iser_qp->qp_lock);
        (void) iser_ib_fini_qp(iser_qp);

        /* Free the IBT channel resources */
        (void) ibt_free_channel(chan->ic_chanhdl);
        chan->ic_chanhdl = NULL;

        /* Free the CQs */
        (void) ibt_free_cq(chan->ic_sendcq);
        (void) ibt_free_cq(chan->ic_recvcq);

        /* Free the chan handle */
        mutex_destroy(&chan->ic_chan_lock);
        kmem_free(chan, sizeof (iser_chan_t));
}

/*
 * iser_ib_post_recv
 *
 * This function handles keeping the RQ full on a given channel.
 * This routine will mostly be run on a taskq, and will check the
 * current fill level of the RQ, and post as many WRs as necessary
 * to fill it again.
 */

int
iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl)
{
        iser_chan_t     *chan;
        int             status;

        /* Pull our iSER channel handle from the private data */
        chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);

        /*
         * Caller must check that chan->ic_conn->ic_stage indicates
         * the connection is active (not closing, not closed) and
         * it must hold the mutex cross the check and the call to this function
         */
        ASSERT(mutex_owned(&chan->ic_conn->ic_lock));
        ASSERT((chan->ic_conn->ic_stage >= ISER_CONN_STAGE_ALLOCATED) &&
            (chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN));
        idm_conn_hold(chan->ic_conn->ic_idmc);
        status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task,
            (void *)chanhdl, DDI_NOSLEEP);
        if (status != DDI_SUCCESS) {
                idm_conn_rele(chan->ic_conn->ic_idmc);
        }

        return (status);
}

static void
iser_ib_post_recv_task(void *arg)
{
        ibt_channel_hdl_t       chanhdl = arg;
        iser_chan_t             *chan;

        /* Pull our iSER channel handle from the private data */
        chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);

        iser_ib_post_recv(chanhdl);
        idm_conn_rele(chan->ic_conn->ic_idmc);
}

void
iser_ib_post_recv(ibt_channel_hdl_t chanhdl)
{
        iser_chan_t     *chan;
        iser_hca_t      *hca;
        iser_msg_t      *msg;
        ibt_recv_wr_t   *wrlist, wr[ISER_IB_RQ_POST_MAX];
        int             rq_space, msg_ret;
        int             total_num, npost;
        uint_t          nposted;
        int             status, i;
        iser_qp_t       *iser_qp;

        /* Pull our iSER channel handle from the private data */
        chan = (iser_chan_t *)ibt_get_chan_private(chanhdl);

        ASSERT(chan != NULL);

        mutex_enter(&chan->ic_conn->ic_lock);

        /* Bail out if the connection is closed; no need for more recv WRs */
        if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) ||
            (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) {
                mutex_exit(&chan->ic_conn->ic_lock);
                return;
        }

        /* get the QP handle from the iser_chan */
        iser_qp = &chan->ic_qp;

        hca = chan->ic_hca;

        if (hca == NULL) {
                ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve "
                    "HCA handle");
                mutex_exit(&chan->ic_conn->ic_lock);
                return;
        }

        /* check for space to post on the RQ */
        mutex_enter(&iser_qp->qp_lock);
        rq_space = iser_qp->rq_depth - iser_qp->rq_level;
        if (rq_space == 0) {
                /* The RQ is full, clear the pending flag and return */
                iser_qp->rq_taskqpending = B_FALSE;
                mutex_exit(&iser_qp->qp_lock);
                mutex_exit(&chan->ic_conn->ic_lock);
                return;
        }

        /* Keep track of the lowest value for rq_min_post_level */
        if (iser_qp->rq_level < iser_qp->rq_min_post_level)
                iser_qp->rq_min_post_level = iser_qp->rq_level;

        mutex_exit(&iser_qp->qp_lock);

        /* we've room to post, so pull from the msg cache */
        msg = iser_msg_get(hca, rq_space, &msg_ret);
        if (msg == NULL) {
                ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles "
                    "available in msg cache currently");
                /*
                 * There are no messages on the cache. Wait a half-
                 * second, then try again.
                 */
                delay(drv_usectohz(ISER_DELAY_HALF_SECOND));
                status = iser_ib_post_recv_async(chanhdl);
                if (status != DDI_SUCCESS) {
                        ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
                            "redispatch routine");
                        /* Failed to dispatch, clear pending flag */
                        mutex_enter(&iser_qp->qp_lock);
                        iser_qp->rq_taskqpending = B_FALSE;
                        mutex_exit(&iser_qp->qp_lock);
                }
                mutex_exit(&chan->ic_conn->ic_lock);
                return;
        }

        if (msg_ret != rq_space) {
                ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of "
                    "messages not allocated: requested (%d) allocated (%d)",
                    rq_space, msg_ret);
                /* We got some, but not all, of our requested depth */
                rq_space = msg_ret;
        }

        /*
         * Now, walk through the allocated WRs and post them,
         * ISER_IB_RQ_POST_MAX (or less) at a time.
         */
        wrlist = &wr[0];
        total_num = rq_space;

        while (total_num) {
                /* determine the number to post on this iteration */
                npost = (total_num > ISER_IB_RQ_POST_MAX) ?
                    ISER_IB_RQ_POST_MAX : total_num;

                /* build a list of WRs from the msg list */
                for (i = 0; i < npost; i++) {
                        wrlist[i].wr_id         = (ibt_wrid_t)(uintptr_t)msg;
                        wrlist[i].wr_nds        = ISER_IB_SGLIST_SIZE;
                        wrlist[i].wr_sgl        = &msg->msg_ds;
                        msg = msg->nextp;
                }

                /* post the list to the RQ */
                nposted = 0;
                status = ibt_post_recv(chanhdl, wrlist, npost, &nposted);
                if ((status != IBT_SUCCESS) || (nposted != npost)) {
                        ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv "
                            "failed: requested (%d) posted (%d) status (%d)",
                            npost, nposted, status);
                        total_num -= nposted;
                        break;
                }

                /* decrement total number to post by the number posted */
                total_num -= nposted;
        }

        mutex_enter(&iser_qp->qp_lock);
        if (total_num != 0) {
                ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, "
                    "failed to post (%d) WRs", total_num);
                iser_qp->rq_level += rq_space - total_num;
        } else {
                iser_qp->rq_level += rq_space;
        }

        /*
         * Now that we've filled the RQ, check that all of the recv WRs
         * haven't just been immediately consumed. If so, taskqpending is
         * still B_TRUE, so we need to fire off a taskq thread to post
         * more WRs.
         */
        if (iser_qp->rq_level == 0) {
                mutex_exit(&iser_qp->qp_lock);
                status = iser_ib_post_recv_async(chanhdl);
                if (status != DDI_SUCCESS) {
                        ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to "
                            "dispatch followup routine");
                        /* Failed to dispatch, clear pending flag */
                        mutex_enter(&iser_qp->qp_lock);
                        iser_qp->rq_taskqpending = B_FALSE;
                        mutex_exit(&iser_qp->qp_lock);
                }
        } else {
                /*
                 * We're done, we've filled the RQ. Clear the taskq
                 * flag so that we can run again.
                 */
                iser_qp->rq_taskqpending = B_FALSE;
                mutex_exit(&iser_qp->qp_lock);
        }

        mutex_exit(&chan->ic_conn->ic_lock);
}

/*
 * iser_ib_handle_portup_event()
 * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event.
 *
 * To facilitate a seamless bringover of the port and configure the CM service
 * for inbound iSER service requests on this newly active port, the existing
 * IDM services will be checked for iSER support.
 * If an iSER service was already created, then this service will simply be
 * bound to the gid of the newly active port. If on the other hand, the CM
 * service did not exist, i.e. only socket communication, then a new CM
 * service will be first registered with the saved service parameters and
 * then bound to the newly active port.
 *
 */
/* ARGSUSED */
static void
iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
{
        iser_hca_t              *hca;
        ib_gid_t                gid;
        idm_svc_t               *idm_svc;
        int                     status;

        ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)",
            (longlong_t)event->ev_hca_guid, event->ev_port);

        /*
         * Query all ports on the HCA and update the port information
         * maintainted in the iser_hca_t structure
         */
        hca = iser_ib_guid2hca(event->ev_hca_guid);
        if (hca == NULL) {

                /* HCA is just made available, first port on that HCA */
                hca = iser_ib_alloc_hca(event->ev_hca_guid);
                if (hca == NULL) {
                        ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
                            "iser_ib_alloc_hca failed: HCA(0x%llx) port(%d)",
                            (longlong_t)event->ev_hca_guid, event->ev_port);
                        return;
                }
                mutex_enter(&iser_state->is_hcalist_lock);
                list_insert_tail(&iser_state->is_hcalist, hca);
                iser_state->is_num_hcas++;
                mutex_exit(&iser_state->is_hcalist_lock);

        } else {

                status = iser_ib_update_hcaports(hca);

                if (status != IBT_SUCCESS) {
                        ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
                            "status(0x%x): iser_ib_update_hcaports failed: "
                            "HCA(0x%llx) port(%d)", status,
                            (longlong_t)event->ev_hca_guid, event->ev_port);
                        return;
                }
        }

        gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];

        /*
         * Iterate through the global list of IDM target services
         * and check for existing iSER CM service.
         */
        mutex_enter(&idm.idm_global_mutex);
        for (idm_svc = list_head(&idm.idm_tgt_svc_list);
            idm_svc != NULL;
            idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) {


                if (idm_svc->is_iser_svc == NULL) {

                        /* Establish a new CM service for iSER requests */
                        status = iser_tgt_svc_create(
                            &idm_svc->is_svc_req, idm_svc);

                        if (status != IBT_SUCCESS) {
                                ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
                                    "status(0x%x): iser_tgt_svc_create failed: "
                                    "HCA(0x%llx) port(%d)", status,
                                    (longlong_t)event->ev_hca_guid,
                                    event->ev_port);

                                continue;
                        }
                }

                status = iser_ib_activate_port(
                    idm_svc, event->ev_hca_guid, gid);
                if (status != IBT_SUCCESS) {

                        ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event "
                            "status(0x%x): Bind service on port "
                            "(%llx:%llx) failed",
                            status, (longlong_t)gid.gid_prefix,
                            (longlong_t)gid.gid_guid);

                        continue;
                }
                ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound "
                    "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
                    event->ev_port);
        }
        mutex_exit(&idm.idm_global_mutex);

        ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: "
            "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
            event->ev_port);
}

/*
 * iser_ib_handle_portdown_event()
 * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error.
 *
 * Unconfigure the CM service on the deactivated port and teardown the
 * connections that are using the CM service.
 */
/* ARGSUSED */
static void
iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
{
        iser_hca_t              *hca;
        ib_gid_t                gid;
        int                     status;

        /*
         * Query all ports on the HCA and update the port information
         * maintainted in the iser_hca_t structure
         */
        hca = iser_ib_guid2hca(event->ev_hca_guid);
        ASSERT(hca != NULL);

        status = iser_ib_update_hcaports(hca);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): "
                    "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)",
                    status, (longlong_t)event->ev_hca_guid, event->ev_port);
                return;
        }

        /* get the gid of the new port */
        gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0];
        iser_ib_deactivate_port(event->ev_hca_guid, gid);

        ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: "
            "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid,
            event->ev_port);
}

/*
 * iser_ib_handle_hca_detach_event()
 * Quiesce all activity bound for the port, teardown the connection, unbind
 * iSER services on all ports and release the HCA handle.
 */
/* ARGSUSED */
static void
iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event)
{
        iser_hca_t      *nexthca, *hca;
        int             i, status;

        ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)",
            (longlong_t)event->ev_hca_guid);

        hca = iser_ib_guid2hca(event->ev_hca_guid);
        for (i = 0; i < hca->hca_num_ports; i++) {
                iser_ib_deactivate_port(hca->hca_guid,
                    hca->hca_port_info[i].p_sgid_tbl[0]);
        }

        /*
         * Update the HCA list maintained in the iser_state. Free the
         * resources allocated to the HCA, i.e. caches, protection domain
         */
        mutex_enter(&iser_state->is_hcalist_lock);

        for (hca = list_head(&iser_state->is_hcalist);
            hca != NULL;
            hca = nexthca) {

                nexthca = list_next(&iser_state->is_hcalist, hca);

                if (hca->hca_guid == event->ev_hca_guid) {

                        list_remove(&iser_state->is_hcalist, hca);
                        iser_state->is_num_hcas--;

                        status = iser_ib_free_hca(hca);
                        if (status != DDI_SUCCESS) {
                                ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: "
                                    "Failed to free hca(%p)", (void *)hca);
                                list_insert_tail(&iser_state->is_hcalist, hca);
                                iser_state->is_num_hcas++;
                        }
                        /* No way to return status to IBT if this fails */
                }
        }
        mutex_exit(&iser_state->is_hcalist_lock);

}

/*
 * iser_ib_async_handler
 * An IBT Asynchronous Event handler is registered it with the framework and
 * passed via the ibt_attach() routine. This function handles the following
 * asynchronous events.
 * IBT_EVENT_PORT_UP
 * IBT_ERROR_PORT_DOWN
 * IBT_HCA_ATTACH_EVENT
 * IBT_HCA_DETACH_EVENT
 */
/* ARGSUSED */
void
iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code,
    ibt_async_event_t *event)
{
        switch (code) {
        case IBT_EVENT_PORT_UP:
                iser_ib_handle_portup_event(hdl, event);
                break;

        case IBT_ERROR_PORT_DOWN:
                iser_ib_handle_portdown_event(hdl, event);
                break;

        case IBT_HCA_ATTACH_EVENT:
                /*
                 * A new HCA device is available for use, ignore this
                 * event because the corresponding IBT_EVENT_PORT_UP
                 * events will get triggered and handled accordingly.
                 */
                break;

        case IBT_HCA_DETACH_EVENT:
                iser_ib_handle_hca_detach_event(hdl, event);
                break;

        default:
                break;
        }
}

/*
 * iser_ib_init_hcas
 *
 * This function opens all the HCA devices, gathers the HCA state information
 * and adds the HCA handle for each HCA found in the iser_soft_state.
 */
static int
iser_ib_init_hcas(void)
{
        ib_guid_t       *guid;
        int             num_hcas;
        int             i;
        iser_hca_t      *hca;

        /* Retrieve the HCA list */
        num_hcas = ibt_get_hca_list(&guid);
        if (num_hcas == 0) {
                /*
                 * This shouldn't happen, but might if we have all HCAs
                 * detach prior to initialization.
                 */
                return (DDI_FAILURE);
        }

        /* Initialize the hcalist lock */
        mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL);

        /* Create the HCA list */
        list_create(&iser_state->is_hcalist, sizeof (iser_hca_t),
            offsetof(iser_hca_t, hca_node));

        for (i = 0; i < num_hcas; i++) {

                ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA "
                    "(0x%llx)", (longlong_t)guid[i]);

                hca = iser_ib_alloc_hca(guid[i]);
                if (hca == NULL) {
                        /* This shouldn't happen, teardown and fail */
                        (void) iser_ib_fini_hcas();
                        (void) ibt_free_hca_list(guid, num_hcas);
                        return (DDI_FAILURE);
                }

                mutex_enter(&iser_state->is_hcalist_lock);
                list_insert_tail(&iser_state->is_hcalist, hca);
                iser_state->is_num_hcas++;
                mutex_exit(&iser_state->is_hcalist_lock);

        }

        /* Free the IBT HCA list */
        (void) ibt_free_hca_list(guid, num_hcas);

        /* Check that we've initialized at least one HCA */
        mutex_enter(&iser_state->is_hcalist_lock);
        if (list_is_empty(&iser_state->is_hcalist)) {
                ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize "
                    "any HCAs");

                mutex_exit(&iser_state->is_hcalist_lock);
                (void) iser_ib_fini_hcas();
                return (DDI_FAILURE);
        }
        mutex_exit(&iser_state->is_hcalist_lock);

        return (DDI_SUCCESS);
}

/*
 * iser_ib_fini_hcas
 *
 * Teardown the iSER HCA list initialized above.
 */
static int
iser_ib_fini_hcas(void)
{
        iser_hca_t      *nexthca, *hca;
        int             status;

        mutex_enter(&iser_state->is_hcalist_lock);
        for (hca = list_head(&iser_state->is_hcalist);
            hca != NULL;
            hca = nexthca) {

                nexthca = list_next(&iser_state->is_hcalist, hca);

                list_remove(&iser_state->is_hcalist, hca);

                status = iser_ib_free_hca(hca);
                if (status != IBT_SUCCESS) {
                        ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free "
                            "HCA during fini");
                        list_insert_tail(&iser_state->is_hcalist, hca);
                        return (DDI_FAILURE);
                }

                iser_state->is_num_hcas--;

        }
        mutex_exit(&iser_state->is_hcalist_lock);
        list_destroy(&iser_state->is_hcalist);
        mutex_destroy(&iser_state->is_hcalist_lock);

        return (DDI_SUCCESS);
}

/*
 * iser_ib_alloc_hca
 *
 * This function opens the given HCA device, gathers the HCA state information
 * and adds the HCA handle
 */
static iser_hca_t *
iser_ib_alloc_hca(ib_guid_t guid)
{
        iser_hca_t      *hca;
        int             status;

        /* Allocate an iser_hca_t HCA handle */
        hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP);

        /* Open this HCA */
        status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:"
                    " guid (0x%llx) status (0x%x)", (longlong_t)guid, status);
                kmem_free(hca, sizeof (iser_hca_t));
                return (NULL);
        }

        hca->hca_guid           = guid;
        hca->hca_clnt_hdl       = iser_state->is_ibhdl;

        /* Query the HCA */
        status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca "
                    "failure: guid (0x%llx) status (0x%x)",
                    (longlong_t)guid, status);
                (void) ibt_close_hca(hca->hca_hdl);
                kmem_free(hca, sizeof (iser_hca_t));
                return (NULL);
        }

        /* Query all ports on the HCA */
        status = ibt_query_hca_ports(hca->hca_hdl, 0,
            &hca->hca_port_info, &hca->hca_num_ports,
            &hca->hca_port_info_sz);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: "
                    "ibt_query_hca_ports failure: guid (0x%llx) "
                    "status (0x%x)", (longlong_t)guid, status);
                (void) ibt_close_hca(hca->hca_hdl);
                kmem_free(hca, sizeof (iser_hca_t));
                return (NULL);
        }

        /* Allocate a single PD on this HCA */
        status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS,
            &hca->hca_pdhdl);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd "
                    "failure: guid (0x%llx) status (0x%x)",
                    (longlong_t)guid, status);
                (void) ibt_close_hca(hca->hca_hdl);
                ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz);
                kmem_free(hca, sizeof (iser_hca_t));
                return (NULL);
        }

        /* Initialize the message and data MR caches for this HCA */
        iser_init_hca_caches(hca);

        return (hca);
}

static int
iser_ib_free_hca(iser_hca_t *hca)
{
        int                     status;
        ibt_hca_portinfo_t      *hca_port_info;
        uint_t                  hca_port_info_sz;

        ASSERT(hca != NULL);
        if (hca->hca_failed)
                return (DDI_FAILURE);

        hca_port_info = hca->hca_port_info;
        hca_port_info_sz = hca->hca_port_info_sz;

        /*
         * Free the memory regions before freeing
         * the associated protection domain
         */
        iser_fini_hca_caches(hca);

        status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD "
                    "status=0x%x", status);
                goto out_caches;
        }

        status = ibt_close_hca(hca->hca_hdl);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA "
                    "status=0x%x", status);
                goto out_pd;
        }

        ibt_free_portinfo(hca_port_info, hca_port_info_sz);

        kmem_free(hca, sizeof (iser_hca_t));
        return (DDI_SUCCESS);

        /*
         * We only managed to partially tear down the HCA, try to put it back
         * like it was before returning.
         */
out_pd:
        status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl);
        if (status != IBT_SUCCESS) {
                hca->hca_failed = B_TRUE;
                /* Report error and exit */
                ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD "
                    "status=0x%x", status);
                return (DDI_FAILURE);
        }

out_caches:
        iser_init_hca_caches(hca);

        return (DDI_FAILURE);
}

static int
iser_ib_update_hcaports(iser_hca_t *hca)
{
        ibt_hca_portinfo_t      *pinfop, *oldpinfop;
        uint_t                  size, oldsize, nport;
        int                     status;

        ASSERT(hca != NULL);

        status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status);
                return (status);
        }

        oldpinfop = hca->hca_port_info;
        oldsize = hca->hca_port_info_sz;
        hca->hca_port_info = pinfop;
        hca->hca_port_info_sz = size;

        (void) ibt_free_portinfo(oldpinfop, oldsize);

        return (IBT_SUCCESS);
}

/*
 * iser_ib_gid2hca
 * Given a gid, find the corresponding hca
 */
iser_hca_t *
iser_ib_gid2hca(ib_gid_t gid)
{

        iser_hca_t      *hca;
        int             i;

        mutex_enter(&iser_state->is_hcalist_lock);
        for (hca = list_head(&iser_state->is_hcalist);
            hca != NULL;
            hca = list_next(&iser_state->is_hcalist, hca)) {

                for (i = 0; i < hca->hca_num_ports; i++) {
                        if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix ==
                            gid.gid_prefix) &&
                            (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid ==
                            gid.gid_guid)) {

                                mutex_exit(&iser_state->is_hcalist_lock);

                                return (hca);
                        }
                }
        }
        mutex_exit(&iser_state->is_hcalist_lock);
        return (NULL);
}

/*
 * iser_ib_guid2hca
 * Given a HCA guid, find the corresponding HCA
 */
iser_hca_t *
iser_ib_guid2hca(ib_guid_t guid)
{

        iser_hca_t      *hca;

        mutex_enter(&iser_state->is_hcalist_lock);
        for (hca = list_head(&iser_state->is_hcalist);
            hca != NULL;
            hca = list_next(&iser_state->is_hcalist, hca)) {

                if (hca->hca_guid == guid) {
                        mutex_exit(&iser_state->is_hcalist_lock);
                        return (hca);
                }
        }
        mutex_exit(&iser_state->is_hcalist_lock);
        return (NULL);
}

/*
 * iser_ib_conv_sockaddr2ibtaddr
 * This function converts a socket address into the IBT format
 */
void iser_ib_conv_sockaddr2ibtaddr(
    idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr)
{
        if (saddr == NULL) {
                ibt_addr->family = AF_UNSPEC;
                ibt_addr->un.ip4addr = 0;
        } else {
                switch (saddr->sin.sa_family) {
                case AF_INET:

                        ibt_addr->family        = saddr->sin4.sin_family;
                        ibt_addr->un.ip4addr    = saddr->sin4.sin_addr.s_addr;
                        break;

                case AF_INET6:

                        ibt_addr->family        = saddr->sin6.sin6_family;
                        ibt_addr->un.ip6addr    = saddr->sin6.sin6_addr;
                        break;

                default:
                        ibt_addr->family = AF_UNSPEC;
                }

        }
}

/*
 * iser_ib_conv_ibtaddr2sockaddr
 * This function converts an IBT ip address handle to a sockaddr
 */
void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss,
    ibt_ip_addr_t *ibt_addr, in_port_t port)
{
        struct sockaddr_in *sin;
        struct sockaddr_in6 *sin6;

        switch (ibt_addr->family) {
        case AF_INET:
        case AF_UNSPEC:

                sin = (struct sockaddr_in *)ibt_addr;
                sin->sin_port = ntohs(port);
                bcopy(sin, ss, sizeof (struct sockaddr_in));
                break;

        case AF_INET6:

                sin6 = (struct sockaddr_in6 *)ibt_addr;
                sin6->sin6_port = ntohs(port);
                bcopy(sin6, ss, sizeof (struct sockaddr_in6));
                break;

        default:
                ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: "
                    "unknown family type: 0x%x", ibt_addr->family);
        }
}

/*
 * iser_ib_setup_cq
 * This function sets up the Completion Queue size and allocates the specified
 * Completion Queue
 */
static int
iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl)
{

        ibt_cq_attr_t           cq_attr;
        int                     status;

        cq_attr.cq_size         = cq_size;
        cq_attr.cq_sched        = 0;
        cq_attr.cq_flags        = IBT_CQ_NO_FLAGS;

        /* Allocate a Completion Queue */
        status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL);
        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)",
                    status);
                return (status);
        }

        return (ISER_STATUS_SUCCESS);
}

/*
 * iser_ib_setup_chanargs
 *
 */
static void
iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl,
    ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size,
    ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs)
{

        bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t));

        /*
         * Set up the size of the channels send queue, receive queue and the
         * maximum number of elements in a scatter gather list of work requests
         * posted to the send and receive queues.
         */
        cargs->rc_sizes.cs_sq           = sq_size;
        cargs->rc_sizes.cs_rq           = rq_size;
        cargs->rc_sizes.cs_sq_sgl       = ISER_IB_SGLIST_SIZE;
        cargs->rc_sizes.cs_rq_sgl       = ISER_IB_SGLIST_SIZE;

        /*
         * All Work requests signaled on a WR basis will receive a send
         * request completion.
         */
        cargs->rc_flags                 = IBT_ALL_SIGNALED;

        /* Enable RDMA read and RDMA write on the channel end points */
        cargs->rc_control               = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;

        /* Set the local hca port on which the channel is allocated */
        cargs->rc_hca_port_num          = hca_port;

        /* Set the Send and Receive Completion Queue handles */
        cargs->rc_scq                   = scq_hdl;
        cargs->rc_rcq                   = rcq_hdl;

        /* Set the protection domain associated with the channel */
        cargs->rc_pd                    = hca_pdhdl;

        /* No SRQ usage */
        cargs->rc_srq                   = NULL;
}

/*
 * iser_ib_init_qp
 * Initialize the QP handle
 */
void
iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size)
{
        /* Initialize the handle lock */
        mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL);

        /* Record queue sizes */
        chan->ic_qp.sq_size = sq_size;
        chan->ic_qp.rq_size = rq_size;

        /* Initialize the RQ monitoring data */
        chan->ic_qp.rq_depth  = rq_size;
        chan->ic_qp.rq_level  = 0;
        chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100;

        /* Initialize the taskq flag */
        chan->ic_qp.rq_taskqpending = B_FALSE;
}

/*
 * iser_ib_fini_qp
 * Teardown the QP handle
 */
void
iser_ib_fini_qp(iser_qp_t *qp)
{
        /* Destroy the handle lock */
        mutex_destroy(&qp->qp_lock);
}

static int
iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid)
{
        iser_svc_t      *iser_svc;
        iser_sbind_t    *is_sbind;
        int             status;

        iser_svc = idm_svc->is_iser_svc;

        /*
         * Save the address of the service bind handle in the
         * iser_svc_t to undo the service binding at a later time
         */
        is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP);
        is_sbind->is_gid        = gid;
        is_sbind->is_guid       = guid;

        status  = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL,
            idm_svc, &is_sbind->is_sbindhdl);

        if (status != IBT_SUCCESS) {
                ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): "
                    "Bind service(%llx) on port(%llx:%llx) failed",
                    status, (longlong_t)iser_svc->is_svcid,
                    (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid);

                kmem_free(is_sbind, sizeof (iser_sbind_t));

                return (status);
        }

        list_insert_tail(&iser_svc->is_sbindlist, is_sbind);

        return (IBT_SUCCESS);
}

static void
iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid)
{
        iser_svc_t      *iser_svc;
        iser_conn_t     *iser_conn;
        iser_sbind_t    *is_sbind;
        idm_conn_t      *idm_conn;

        /*
         * Iterate through the global list of IDM target connections.
         * Issue a TRANSPORT_FAIL for any connections on this port, and
         * if there is a bound service running on the port, tear it down.
         */
        mutex_enter(&idm.idm_global_mutex);
        for (idm_conn = list_head(&idm.idm_tgt_conn_list);
            idm_conn != NULL;
            idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) {

                if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) {
                        /* this is not an iSER connection, skip it */
                        continue;
                }

                iser_conn = idm_conn->ic_transport_private;
                if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) {
                        /* this iSER connection is on a different port */
                        continue;
                }

                /* Fail the transport for this connection */
                idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL);

                if (idm_conn->ic_conn_type == CONN_TYPE_INI) {
                        /* initiator connection, nothing else to do */
                        continue;
                }

                /* Check for a service binding */
                iser_svc = idm_conn->ic_svc_binding->is_iser_svc;
                is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid);
                if (is_sbind != NULL) {
                        /* This service is still bound, tear it down */
                        (void) ibt_unbind_service(iser_svc->is_srvhdl,
                            is_sbind->is_sbindhdl);
                        list_remove(&iser_svc->is_sbindlist, is_sbind);
                        kmem_free(is_sbind, sizeof (iser_sbind_t));
                }
        }
        mutex_exit(&idm.idm_global_mutex);
}

static iser_sbind_t *
iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid)
{
        iser_sbind_t    *is_sbind;

        for (is_sbind = list_head(&iser_svc->is_sbindlist);
            is_sbind != NULL;
            is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) {

                if ((is_sbind->is_guid == hca_guid) &&
                    (is_sbind->is_gid.gid_prefix == gid.gid_prefix) &&
                    (is_sbind->is_gid.gid_guid == gid.gid_guid)) {
                        return (is_sbind);
                }
        }
        return (NULL);
}