#include <sys/types.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/modctl.h>
#include <sys/stropts.h>
#include <sys/stream.h>
#include <sys/strsun.h>
#include <sys/strsubr.h>
#include <sys/dlpi.h>
#include <sys/mac_provider.h>
#include <sys/pattr.h>
#include <sys/sysmacros.h>
#include <sys/disp.h>
#include <sys/atomic.h>
#include <sys/ethernet.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <inet/tcp.h>
#include <netinet/icmp6.h>
#include <sys/callb.h>
#include <sys/modhash.h>
#include <sys/ib/clients/ibd/ibd.h>
#include <sys/ib/mgt/sm_attr.h>
#include <sys/note.h>
#include <sys/ib/mgt/ibmf/ibmf.h>
#include <sys/priv_names.h>
#include <sys/dls.h>
#include <sys/dld_ioc.h>
#include <sys/policy.h>
#include <sys/ibpart.h>
#include <sys/file.h>
uint_t ibd_rx_softintr = 1;
uint_t ibd_tx_softintr = 1;
#ifdef IBD_LOGGING
uint_t ibd_log_sz = 0x20000;
#endif
#ifdef IBD_LOGGING
#define IBD_LOG_SZ ibd_log_sz
#endif
#define IBD_RX_POST_CNT 8
#define IBD_LOG_RX_POST 4
#define IBD_RWQE_MIN ((IBD_RX_POST_CNT << IBD_LOG_RX_POST) * 4)
#define IBD_LSO_MAXLEN 65536
#define IBD_LSO_BUFSZ 8192
#define IBD_OP_NOTSTARTED 0
#define IBD_OP_ONGOING 1
#define IBD_OP_COMPLETED 2
#define IBD_OP_ERRORED 3
#define IBD_OP_ROUTERED 4
#define IBD_DRV_START_IN_PROGRESS 0x10000000
#define IBD_DRV_STOP_IN_PROGRESS 0x20000000
#define IBD_DRV_RESTART_IN_PROGRESS 0x30000000
#define IBD_DRV_DELETE_IN_PROGRESS IBD_DRV_RESTART_IN_PROGRESS
#define IB_MGID_IPV4_LOWGRP_MASK 0xFFFFFFFF
#define IBD_DEF_MAX_SDU 2044
#define IBD_DEF_MAX_MTU (IBD_DEF_MAX_SDU + IPOIB_HDRSIZE)
#define IBD_DEF_RC_MAX_SDU 65520
#define IBD_DEF_RC_MAX_MTU (IBD_DEF_RC_MAX_SDU + IPOIB_HDRSIZE)
#define IBD_DEFAULT_QKEY 0xB1B
#ifdef IBD_LOGGING
#define IBD_DMAX_LINE 100
#endif
typedef enum {
IBD_LINK_DOWN,
IBD_LINK_UP,
IBD_LINK_UP_ABSENT
} ibd_link_op_t;
void *ibd_list;
ibd_global_state_t ibd_gstate;
ibd_state_t *ibd_objlist_head = NULL;
kmutex_t ibd_objlist_lock;
int ibd_rc_conn_timeout = 60 * 10;
#ifdef IBD_LOGGING
kmutex_t ibd_lbuf_lock;
uint8_t *ibd_lbuf;
uint32_t ibd_lbuf_ndx;
#endif
static int ibd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
static int ibd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
static int ibd_m_stat(void *, uint_t, uint64_t *);
static int ibd_m_start(void *);
static void ibd_m_stop(void *);
static int ibd_m_promisc(void *, boolean_t);
static int ibd_m_multicst(void *, boolean_t, const uint8_t *);
static int ibd_m_unicst(void *, const uint8_t *);
static mblk_t *ibd_m_tx(void *, mblk_t *);
static boolean_t ibd_m_getcapab(void *, mac_capab_t, void *);
static int ibd_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
const void *);
static int ibd_m_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
static void ibd_m_propinfo(void *, const char *, mac_prop_id_t,
mac_prop_info_handle_t);
static int ibd_set_priv_prop(ibd_state_t *, const char *, uint_t,
const void *);
static int ibd_get_priv_prop(ibd_state_t *, const char *, uint_t, void *);
static int ibd_state_init(ibd_state_t *, dev_info_t *);
static int ibd_init_txlist(ibd_state_t *);
static int ibd_init_rxlist(ibd_state_t *);
static int ibd_acache_init(ibd_state_t *);
#ifdef IBD_LOGGING
static void ibd_log_init(void);
#endif
static void ibd_state_fini(ibd_state_t *);
static void ibd_fini_txlist(ibd_state_t *);
static void ibd_fini_rxlist(ibd_state_t *);
static void ibd_tx_cleanup(ibd_state_t *, ibd_swqe_t *);
static void ibd_tx_cleanup_list(ibd_state_t *, ibd_swqe_t *, ibd_swqe_t *);
static void ibd_acache_fini(ibd_state_t *);
#ifdef IBD_LOGGING
static void ibd_log_fini(void);
#endif
static int ibd_alloc_tx_copybufs(ibd_state_t *);
static int ibd_alloc_rx_copybufs(ibd_state_t *);
static int ibd_alloc_tx_lsobufs(ibd_state_t *);
static ibd_swqe_t *ibd_acquire_swqe(ibd_state_t *);
static int ibd_acquire_lsobufs(ibd_state_t *, uint_t, ibt_wr_ds_t *,
uint32_t *);
static void ibd_free_rwqe(ibd_state_t *, ibd_rwqe_t *);
static void ibd_free_tx_copybufs(ibd_state_t *);
static void ibd_free_rx_copybufs(ibd_state_t *);
static void ibd_free_rx_rsrcs(ibd_state_t *);
static void ibd_free_tx_lsobufs(ibd_state_t *);
static void ibd_release_swqe(ibd_state_t *, ibd_swqe_t *, ibd_swqe_t *, int);
static void ibd_release_lsobufs(ibd_state_t *, ibt_wr_ds_t *, uint32_t);
static void ibd_free_lsohdr(ibd_swqe_t *, mblk_t *);
static uint_t ibd_intr(caddr_t);
static uint_t ibd_tx_recycle(caddr_t);
static void ibd_rcq_handler(ibt_cq_hdl_t, void *);
static void ibd_scq_handler(ibt_cq_hdl_t, void *);
static void ibd_poll_rcq(ibd_state_t *, ibt_cq_hdl_t);
static void ibd_poll_scq(ibd_state_t *, ibt_cq_hdl_t);
static void ibd_drain_rcq(ibd_state_t *, ibt_cq_hdl_t);
static void ibd_drain_scq(ibd_state_t *, ibt_cq_hdl_t);
static void ibd_freemsg_cb(char *);
static void ibd_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
ibt_async_event_t *);
static void ibdpd_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
ibt_async_event_t *);
static void ibd_snet_notices_handler(void *, ib_gid_t,
ibt_subnet_event_code_t, ibt_subnet_event_t *);
static boolean_t ibd_send(ibd_state_t *, mblk_t *);
static void ibd_post_send(ibd_state_t *, ibd_swqe_t *);
static void ibd_post_recv(ibd_state_t *, ibd_rwqe_t *);
static mblk_t *ibd_process_rx(ibd_state_t *, ibd_rwqe_t *, ibt_wc_t *);
static void ibd_async_work(ibd_state_t *);
static void ibd_async_acache(ibd_state_t *, ipoib_mac_t *);
static void ibd_async_multicast(ibd_state_t *, ib_gid_t, int);
static void ibd_async_setprom(ibd_state_t *);
static void ibd_async_unsetprom(ibd_state_t *);
static void ibd_async_reap_group(ibd_state_t *, ibd_mce_t *, ib_gid_t, uint8_t);
static void ibd_async_trap(ibd_state_t *, ibd_req_t *);
static void ibd_async_txsched(ibd_state_t *);
static void ibd_async_link(ibd_state_t *, ibd_req_t *);
static ibd_mce_t *ibd_async_mcache(ibd_state_t *, ipoib_mac_t *, boolean_t *);
static ibd_mce_t *ibd_join_group(ibd_state_t *, ib_gid_t, uint8_t);
static ibd_mce_t *ibd_mcache_find(ib_gid_t, struct list *);
static boolean_t ibd_get_allroutergroup(ibd_state_t *,
ipoib_mac_t *, ipoib_mac_t *);
static void ibd_leave_group(ibd_state_t *, ib_gid_t, uint8_t);
static void ibd_reacquire_group(ibd_state_t *, ibd_mce_t *);
static ibt_status_t ibd_iba_join(ibd_state_t *, ib_gid_t, ibd_mce_t *);
static ibt_status_t ibd_find_bgroup(ibd_state_t *);
static void ibd_n2h_gid(ipoib_mac_t *, ib_gid_t *);
static void ibd_h2n_mac(ipoib_mac_t *, ib_qpn_t, ib_sn_prefix_t, ib_guid_t);
static uint64_t ibd_get_portspeed(ibd_state_t *);
static boolean_t ibd_async_safe(ibd_state_t *);
static void ibd_async_done(ibd_state_t *);
static ibd_ace_t *ibd_acache_lookup(ibd_state_t *, ipoib_mac_t *, int *, int);
static ibd_ace_t *ibd_acache_get_unref(ibd_state_t *);
static void ibd_link_mod(ibd_state_t *, ibt_async_code_t);
static int ibd_locate_pkey(ib_pkey_t *, uint16_t, ib_pkey_t, uint16_t *);
static int ibd_register_mac(ibd_state_t *, dev_info_t *);
static int ibd_record_capab(ibd_state_t *);
static int ibd_get_port_details(ibd_state_t *);
static int ibd_alloc_cqs(ibd_state_t *);
static int ibd_setup_ud_channel(ibd_state_t *);
static int ibd_start(ibd_state_t *);
static int ibd_undo_start(ibd_state_t *, link_state_t);
static void ibd_set_mac_progress(ibd_state_t *, uint_t);
static void ibd_clr_mac_progress(ibd_state_t *, uint_t);
static int ibd_part_attach(ibd_state_t *state, dev_info_t *dip);
static void ibd_part_unattach(ibd_state_t *state);
static int ibd_port_attach(dev_info_t *);
static int ibd_port_unattach(ibd_state_t *state, dev_info_t *dip);
static int ibd_get_port_state(ibd_state_t *, link_state_t *);
static int ibd_part_busy(ibd_state_t *);
static int ibd_sched_poll(ibd_state_t *, int, int);
static void ibd_resume_transmission(ibd_state_t *);
static int ibd_setup_lso(ibd_swqe_t *, mblk_t *, uint32_t, ibt_ud_dest_hdl_t);
static int ibd_prepare_sgl(ibd_state_t *, mblk_t *, ibd_swqe_t *, uint_t);
static void *list_get_head(list_t *);
static int ibd_hash_key_cmp(mod_hash_key_t, mod_hash_key_t);
static uint_t ibd_hash_by_id(void *, mod_hash_key_t);
ibt_status_t ibd_get_part_attr(datalink_id_t, ibt_part_attr_t *);
ibt_status_t ibd_get_all_part_attr(ibt_part_attr_t **, int *);
#ifdef IBD_LOGGING
static void ibd_log(const char *, ...);
#endif
DDI_DEFINE_STREAM_OPS(ibd_dev_ops, nulldev, nulldev, ibd_attach, ibd_detach,
nodev, NULL, D_MP, NULL, ddi_quiesce_not_needed);
static struct modldrv ibd_modldrv = {
&mod_driverops,
"InfiniBand GLDv3 Driver",
&ibd_dev_ops
};
static struct modlinkage ibd_modlinkage = {
MODREV_1, (void *)&ibd_modldrv, NULL
};
static struct ibt_clnt_modinfo_s ibd_clnt_modinfo = {
IBTI_V_CURR,
IBT_NETWORK,
ibd_async_handler,
NULL,
"IBPART"
};
static struct ibt_clnt_modinfo_s ibdpd_clnt_modinfo = {
IBTI_V_CURR,
IBT_NETWORK,
ibdpd_async_handler,
NULL,
"IPIB"
};
#define IBD_M_CALLBACK_FLAGS \
(MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO)
static mac_callbacks_t ibd_m_callbacks = {
IBD_M_CALLBACK_FLAGS,
ibd_m_stat,
ibd_m_start,
ibd_m_stop,
ibd_m_promisc,
ibd_m_multicst,
ibd_m_unicst,
ibd_m_tx,
NULL,
NULL,
ibd_m_getcapab,
NULL,
NULL,
ibd_m_setprop,
ibd_m_getprop,
ibd_m_propinfo
};
char *ibd_priv_props[] = {
"_ibd_broadcast_group",
"_ibd_coalesce_completions",
"_ibd_create_broadcast_group",
"_ibd_hash_size",
"_ibd_lso_enable",
"_ibd_num_ah",
"_ibd_num_lso_bufs",
"_ibd_rc_enable_srq",
"_ibd_rc_num_rwqe",
"_ibd_rc_num_srq",
"_ibd_rc_num_swqe",
"_ibd_rc_rx_comp_count",
"_ibd_rc_rx_comp_usec",
"_ibd_rc_rx_copy_thresh",
"_ibd_rc_rx_rwqe_thresh",
"_ibd_rc_tx_comp_count",
"_ibd_rc_tx_comp_usec",
"_ibd_rc_tx_copy_thresh",
"_ibd_ud_num_rwqe",
"_ibd_ud_num_swqe",
"_ibd_ud_rx_comp_count",
"_ibd_ud_rx_comp_usec",
"_ibd_ud_tx_comp_count",
"_ibd_ud_tx_comp_usec",
"_ibd_ud_tx_copy_thresh",
NULL
};
static int ibd_create_partition(void *, intptr_t, int, cred_t *, int *);
static int ibd_delete_partition(void *, intptr_t, int, cred_t *, int *);
static int ibd_get_partition_info(void *, intptr_t, int, cred_t *, int *);
static dld_ioc_info_t ibd_dld_ioctl_list[] = {
{IBD_CREATE_IBPART, DLDCOPYINOUT, sizeof (ibpart_ioctl_t),
ibd_create_partition, secpolicy_dl_config},
{IBD_DELETE_IBPART, DLDCOPYIN, sizeof (ibpart_ioctl_t),
ibd_delete_partition, secpolicy_dl_config},
{IBD_INFO_IBPART, DLDCOPYIN, sizeof (ibd_ioctl_t),
ibd_get_partition_info, NULL}
};
#define IBD_FILL_SCOPE_PKEY(maddr, scope, pkey) \
{ \
*(uint32_t *)((char *)(maddr) + 4) |= \
htonl((uint32_t)(scope) << 16); \
*(uint32_t *)((char *)(maddr) + 8) |= \
htonl((uint32_t)(pkey) << 16); \
}
#define IBD_CLEAR_SCOPE_PKEY(maddr) \
{ \
*(uint32_t *)((char *)(maddr) + 4) &= \
htonl(~((uint32_t)0xF << 16)); \
*(uint32_t *)((char *)(maddr) + 8) &= \
htonl(~((uint32_t)0xFFFF << 16)); \
}
#ifdef DEBUG
int ibd_debuglevel = 100;
void
debug_print(int l, char *fmt, ...)
{
va_list ap;
if (l < ibd_debuglevel)
return;
va_start(ap, fmt);
vcmn_err(CE_CONT, fmt, ap);
va_end(ap);
}
#endif
void
ibd_print_warn(ibd_state_t *state, char *fmt, ...)
{
ib_guid_t hca_guid;
char ibd_print_buf[MAXNAMELEN + 256];
int len;
va_list ap;
char part_name[MAXNAMELEN];
datalink_id_t linkid = state->id_plinkid;
hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, state->id_dip,
0, "hca-guid", 0);
(void) dls_mgmt_get_linkinfo(linkid, part_name, NULL, NULL, NULL);
len = snprintf(ibd_print_buf, sizeof (ibd_print_buf),
"%s%d: HCA GUID %016llx port %d PKEY %02x link %s ",
ddi_driver_name(state->id_dip), ddi_get_instance(state->id_dip),
(u_longlong_t)hca_guid, state->id_port, state->id_pkey,
part_name);
va_start(ap, fmt);
(void) vsnprintf(ibd_print_buf + len, sizeof (ibd_print_buf) - len,
fmt, ap);
cmn_err(CE_NOTE, "!%s", ibd_print_buf);
va_end(ap);
}
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_lso_lock,
ibd_state_t::id_lso))
_NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_lso))
_NOTE(SCHEME_PROTECTS_DATA("init", ibd_state_t::id_lso_policy))
_NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_lsobkt_t::bkt_nfree))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_scq_poll_lock,
ibd_state_t::id_scq_poll_busy))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_txpost_lock,
ibd_state_t::id_tx_head))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_txpost_lock,
ibd_state_t::id_tx_busy))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_acache_req_lock,
ibd_state_t::id_acache_req_cv))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_acache_req_lock,
ibd_state_t::id_req_list))
_NOTE(SCHEME_PROTECTS_DATA("atomic",
ibd_acache_s::ac_ref))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
ibd_state_t::id_ah_active))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
ibd_state_t::id_ah_free))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
ibd_state_t::id_ah_addr))
_NOTE(SCHEME_PROTECTS_DATA("ac mutex should protect this",
ibd_state_t::id_ah_op))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
ibd_state_t::id_ah_error))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_ac_mutex,
ibd_state_t::id_ac_hot_ace))
_NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_ah_error))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_mc_mutex,
ibd_state_t::id_mc_full))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_mc_mutex,
ibd_state_t::id_mc_non))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_trap_lock,
ibd_state_t::id_trap_cv))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_trap_lock,
ibd_state_t::id_trap_stop))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_trap_lock,
ibd_state_t::id_trap_inprog))
_NOTE(SCHEME_PROTECTS_DATA("only by async thread",
ibd_state_t::id_prom_op))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_sched_lock,
ibd_state_t::id_sched_needed))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_link_mutex,
ibd_state_t::id_link_state))
_NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_link_state))
_NOTE(SCHEME_PROTECTS_DATA("only async thr and ibd_m_start",
ibd_state_t::id_link_speed))
_NOTE(DATA_READABLE_WITHOUT_LOCK(ibd_state_t::id_sgid))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_tx_list.dl_mutex,
ibd_state_t::id_tx_list.dl_head))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_tx_list.dl_mutex,
ibd_state_t::id_tx_list.dl_pending_sends))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::id_tx_list.dl_mutex,
ibd_state_t::id_tx_list.dl_cnt))
_NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
ibd_state_t::id_rx_list.dl_bufs_outstanding))
_NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
ibd_state_t::id_rx_list.dl_cnt))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::rc_timeout_lock,
ibd_state_t::rc_timeout_start))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_t::rc_timeout_lock,
ibd_state_t::rc_timeout))
_NOTE(SCHEME_PROTECTS_DATA("atomic update only",
ibd_state_s::id_brd_rcv
ibd_state_s::id_brd_xmt
ibd_state_s::id_multi_rcv
ibd_state_s::id_multi_xmt
ibd_state_s::id_num_intrs
ibd_state_s::id_rcv_bytes
ibd_state_s::id_rcv_pkt
ibd_state_s::id_rx_post_queue_index
ibd_state_s::id_tx_short
ibd_state_s::id_xmt_bytes
ibd_state_s::id_xmt_pkt
ibd_state_s::rc_rcv_trans_byte
ibd_state_s::rc_rcv_trans_pkt
ibd_state_s::rc_rcv_copy_byte
ibd_state_s::rc_rcv_copy_pkt
ibd_state_s::rc_xmt_bytes
ibd_state_s::rc_xmt_small_pkt
ibd_state_s::rc_xmt_fragmented_pkt
ibd_state_s::rc_xmt_map_fail_pkt
ibd_state_s::rc_xmt_map_succ_pkt
ibd_rc_chan_s::rcq_invoking))
_NOTE(SCHEME_PROTECTS_DATA("unshared or single-threaded",
callb_cpr
ib_gid_s
ib_header_info
ibd_acache_rq
ibd_acache_s::ac_mce
ibd_acache_s::ac_chan
ibd_mcache::mc_fullreap
ibd_mcache::mc_jstate
ibd_mcache::mc_req
ibd_rwqe_s
ibd_swqe_s
ibd_wqe_s
ibt_wr_ds_s::ds_va
ibt_wr_lso_s
ipoib_mac::ipoib_qpn
mac_capab_lso_s
msgb::b_next
msgb::b_cont
msgb::b_rptr
msgb::b_wptr
ibd_state_s::id_bgroup_created
ibd_state_s::id_mac_state
ibd_state_s::id_mtu
ibd_state_s::id_ud_num_rwqe
ibd_state_s::id_ud_num_swqe
ibd_state_s::id_qpnum
ibd_state_s::id_rcq_hdl
ibd_state_s::id_rx_buf_sz
ibd_state_s::id_rx_bufs
ibd_state_s::id_rx_mr_hdl
ibd_state_s::id_rx_wqes
ibd_state_s::id_rxwcs
ibd_state_s::id_rxwcs_size
ibd_state_s::id_rx_nqueues
ibd_state_s::id_rx_queues
ibd_state_s::id_scope
ibd_state_s::id_scq_hdl
ibd_state_s::id_tx_buf_sz
ibd_state_s::id_tx_bufs
ibd_state_s::id_tx_mr_hdl
ibd_state_s::id_tx_rel_list.dl_cnt
ibd_state_s::id_tx_wqes
ibd_state_s::id_txwcs
ibd_state_s::id_txwcs_size
ibd_state_s::rc_listen_hdl
ibd_state_s::rc_listen_hdl_OFED_interop
ibd_state_s::rc_srq_size
ibd_state_s::rc_srq_rwqes
ibd_state_s::rc_srq_rx_bufs
ibd_state_s::rc_srq_rx_mr_hdl
ibd_state_s::rc_tx_largebuf_desc_base
ibd_state_s::rc_tx_mr_bufs
ibd_state_s::rc_tx_mr_hdl
ipha_s
icmph_s
ibt_path_info_s::pi_sid
ibd_rc_chan_s::ace
ibd_rc_chan_s::chan_hdl
ibd_rc_chan_s::state
ibd_rc_chan_s::chan_state
ibd_rc_chan_s::is_tx_chan
ibd_rc_chan_s::rcq_hdl
ibd_rc_chan_s::rcq_size
ibd_rc_chan_s::scq_hdl
ibd_rc_chan_s::scq_size
ibd_rc_chan_s::rx_bufs
ibd_rc_chan_s::rx_mr_hdl
ibd_rc_chan_s::rx_rwqes
ibd_rc_chan_s::tx_wqes
ibd_rc_chan_s::tx_mr_bufs
ibd_rc_chan_s::tx_mr_hdl
ibd_rc_chan_s::tx_rel_list.dl_cnt
ibd_rc_chan_s::is_used
ibd_rc_tx_largebuf_s::lb_buf
ibd_rc_msg_hello_s
ibt_cm_return_args_s))
_NOTE(SCHEME_PROTECTS_DATA("protected by two mutexes",
ibd_rc_chan_s::next))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_tx_large_bufs_lock,
ibd_state_s::rc_tx_largebuf_free_head))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_tx_large_bufs_lock,
ibd_state_s::rc_tx_largebuf_nfree))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_tx_large_bufs_lock,
ibd_rc_tx_largebuf_s::lb_next))
_NOTE(MUTEX_PROTECTS_DATA(ibd_acache_s::tx_too_big_mutex,
ibd_acache_s::tx_too_big_ongoing))
_NOTE(MUTEX_PROTECTS_DATA(ibd_rc_chan_s::tx_wqe_list.dl_mutex,
ibd_rc_chan_s::tx_wqe_list.dl_head))
_NOTE(MUTEX_PROTECTS_DATA(ibd_rc_chan_s::tx_wqe_list.dl_mutex,
ibd_rc_chan_s::tx_wqe_list.dl_pending_sends))
_NOTE(MUTEX_PROTECTS_DATA(ibd_rc_chan_s::tx_wqe_list.dl_mutex,
ibd_rc_chan_s::tx_wqe_list.dl_cnt))
_NOTE(MUTEX_PROTECTS_DATA(ibd_state_s::rc_ace_recycle_lock,
ibd_state_s::rc_ace_recycle))
_NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
ibd_state_t::rc_srq_rwqe_list.dl_bufs_outstanding))
_NOTE(SCHEME_PROTECTS_DATA("atomic or dl mutex or single thr",
ibd_state_t::rc_srq_rwqe_list.dl_cnt))
_NOTE(SCHEME_PROTECTS_DATA("counters for problem diagnosis",
ibd_state_s::rc_rcv_alloc_fail
ibd_state_s::rc_rcq_err
ibd_state_s::rc_ace_not_found
ibd_state_s::rc_xmt_drop_too_long_pkt
ibd_state_s::rc_xmt_icmp_too_long_pkt
ibd_state_s::rc_xmt_reenter_too_long_pkt
ibd_state_s::rc_swqe_short
ibd_state_s::rc_swqe_mac_update
ibd_state_s::rc_xmt_buf_short
ibd_state_s::rc_xmt_buf_mac_update
ibd_state_s::rc_scq_no_swqe
ibd_state_s::rc_scq_no_largebuf
ibd_state_s::rc_conn_succ
ibd_state_s::rc_conn_fail
ibd_state_s::rc_null_conn
ibd_state_s::rc_no_estab_conn
ibd_state_s::rc_act_close
ibd_state_s::rc_pas_close
ibd_state_s::rc_delay_ace_recycle
ibd_state_s::rc_act_close_simultaneous
ibd_state_s::rc_act_close_not_clean
ibd_state_s::rc_pas_close_rcq_invoking
ibd_state_s::rc_reset_cnt
ibd_state_s::rc_timeout_act
ibd_state_s::rc_timeout_pas
ibd_state_s::rc_stop_connect))
#ifdef DEBUG
_NOTE(SCHEME_PROTECTS_DATA("counters for problem diagnosis",
ibd_state_s::rc_rwqe_short
ibd_rc_stat_s::rc_rcv_trans_byte
ibd_rc_stat_s::rc_rcv_trans_pkt
ibd_rc_stat_s::rc_rcv_copy_byte
ibd_rc_stat_s::rc_rcv_copy_pkt
ibd_rc_stat_s::rc_rcv_alloc_fail
ibd_rc_stat_s::rc_rcq_err
ibd_rc_stat_s::rc_rwqe_short
ibd_rc_stat_s::rc_xmt_bytes
ibd_rc_stat_s::rc_xmt_small_pkt
ibd_rc_stat_s::rc_xmt_fragmented_pkt
ibd_rc_stat_s::rc_xmt_map_fail_pkt
ibd_rc_stat_s::rc_xmt_map_succ_pkt
ibd_rc_stat_s::rc_ace_not_found
ibd_rc_stat_s::rc_scq_no_swqe
ibd_rc_stat_s::rc_scq_no_largebuf
ibd_rc_stat_s::rc_swqe_short
ibd_rc_stat_s::rc_swqe_mac_update
ibd_rc_stat_s::rc_xmt_buf_short
ibd_rc_stat_s::rc_xmt_buf_mac_update
ibd_rc_stat_s::rc_conn_succ
ibd_rc_stat_s::rc_conn_fail
ibd_rc_stat_s::rc_null_conn
ibd_rc_stat_s::rc_no_estab_conn
ibd_rc_stat_s::rc_act_close
ibd_rc_stat_s::rc_pas_close
ibd_rc_stat_s::rc_delay_ace_recycle
ibd_rc_stat_s::rc_act_close_simultaneous
ibd_rc_stat_s::rc_reset_cnt
ibd_rc_stat_s::rc_timeout_act
ibd_rc_stat_s::rc_timeout_pas))
#endif
int
_init()
{
int status;
status = ddi_soft_state_init(&ibd_list, max(sizeof (ibd_state_t),
PAGESIZE), 0);
if (status != 0) {
DPRINT(10, "_init:failed in ddi_soft_state_init()");
return (status);
}
mutex_init(&ibd_objlist_lock, NULL, MUTEX_DRIVER, NULL);
mac_init_ops(&ibd_dev_ops, "ibp");
status = mod_install(&ibd_modlinkage);
if (status != 0) {
DPRINT(10, "_init:failed in mod_install()");
ddi_soft_state_fini(&ibd_list);
mac_fini_ops(&ibd_dev_ops);
return (status);
}
mutex_init(&ibd_gstate.ig_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_enter(&ibd_gstate.ig_mutex);
ibd_gstate.ig_ibt_hdl = NULL;
ibd_gstate.ig_ibt_hdl_ref_cnt = 0;
ibd_gstate.ig_service_list = NULL;
mutex_exit(&ibd_gstate.ig_mutex);
if (dld_ioc_register(IBPART_IOC, ibd_dld_ioctl_list,
DLDIOCCNT(ibd_dld_ioctl_list)) != 0) {
return (EIO);
}
ibt_register_part_attr_cb(ibd_get_part_attr, ibd_get_all_part_attr);
#ifdef IBD_LOGGING
ibd_log_init();
#endif
return (0);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&ibd_modlinkage, modinfop));
}
int
_fini()
{
int status;
status = mod_remove(&ibd_modlinkage);
if (status != 0)
return (status);
ibt_unregister_part_attr_cb();
mac_fini_ops(&ibd_dev_ops);
mutex_destroy(&ibd_objlist_lock);
ddi_soft_state_fini(&ibd_list);
mutex_destroy(&ibd_gstate.ig_mutex);
#ifdef IBD_LOGGING
ibd_log_fini();
#endif
return (0);
}
static void
ibd_n2h_gid(ipoib_mac_t *mac, ib_gid_t *dgid)
{
ib_sn_prefix_t nbopref;
ib_guid_t nboguid;
bcopy(mac->ipoib_gidpref, &nbopref, sizeof (ib_sn_prefix_t));
bcopy(mac->ipoib_gidsuff, &nboguid, sizeof (ib_guid_t));
dgid->gid_prefix = b2h64(nbopref);
dgid->gid_guid = b2h64(nboguid);
}
static void
ibd_h2n_mac(ipoib_mac_t *mac, ib_qpn_t qpn, ib_sn_prefix_t prefix,
ib_guid_t guid)
{
ib_sn_prefix_t nbopref;
ib_guid_t nboguid;
mac->ipoib_qpn = htonl(qpn);
nbopref = h2b64(prefix);
nboguid = h2b64(guid);
bcopy(&nbopref, mac->ipoib_gidpref, sizeof (ib_sn_prefix_t));
bcopy(&nboguid, mac->ipoib_gidsuff, sizeof (ib_guid_t));
}
static boolean_t
ibd_get_allroutergroup(ibd_state_t *state, ipoib_mac_t *mcmac,
ipoib_mac_t *rmac)
{
boolean_t retval = B_TRUE;
uint32_t adjscope = state->id_scope << 16;
uint32_t topword;
bcopy(mcmac->ipoib_gidpref, &topword, sizeof (uint32_t));
topword = ntohl(topword);
if ((topword == (IB_MCGID_IPV4_PREFIX | adjscope)) ||
(topword == (IB_MCGID_IPV6_PREFIX | adjscope)))
ibd_h2n_mac(rmac, IB_MC_QPN, (((uint64_t)topword << 32) |
((uint32_t)(state->id_pkey << 16))),
(INADDR_ALLRTRS_GROUP - INADDR_UNSPEC_GROUP));
else
retval = B_FALSE;
return (retval);
}
#define IBD_MCACHE_INSERT_FULL(state, mce) \
list_insert_head(&state->id_mc_full, mce)
#define IBD_MCACHE_INSERT_NON(state, mce) \
list_insert_head(&state->id_mc_non, mce)
#define IBD_MCACHE_FIND_FULL(state, mgid) \
ibd_mcache_find(mgid, &state->id_mc_full)
#define IBD_MCACHE_FIND_NON(state, mgid) \
ibd_mcache_find(mgid, &state->id_mc_non)
#define IBD_MCACHE_PULLOUT_FULL(state, mce) \
list_remove(&state->id_mc_full, mce)
#define IBD_MCACHE_PULLOUT_NON(state, mce) \
list_remove(&state->id_mc_non, mce)
static void *
list_get_head(list_t *list)
{
list_node_t *lhead = list_head(list);
if (lhead != NULL)
list_remove(list, lhead);
return (lhead);
}
void
ibd_queue_work_slot(ibd_state_t *state, ibd_req_t *ptr, int op)
{
DPRINT(1, "ibd_queue_work_slot : op: %d \n", op);
ptr->rq_op = op;
mutex_enter(&state->id_acache_req_lock);
list_insert_tail(&state->id_req_list, ptr);
cv_signal(&state->id_acache_req_cv);
mutex_exit(&state->id_acache_req_lock);
}
static void
ibd_async_work(ibd_state_t *state)
{
ibd_req_t *ptr;
callb_cpr_t cprinfo;
mutex_enter(&state->id_acache_req_lock);
CALLB_CPR_INIT(&cprinfo, &state->id_acache_req_lock,
callb_generic_cpr, "ibd_async_work");
for (;;) {
ptr = list_get_head(&state->id_req_list);
if (ptr != NULL) {
mutex_exit(&state->id_acache_req_lock);
if ((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) &&
(ptr->rq_op != IBD_ASYNC_TRAP)) {
goto free_req_and_continue;
}
switch (ptr->rq_op) {
case IBD_ASYNC_GETAH:
ibd_async_acache(state, &ptr->rq_mac);
break;
case IBD_ASYNC_JOIN:
case IBD_ASYNC_LEAVE:
ibd_async_multicast(state,
ptr->rq_gid, ptr->rq_op);
break;
case IBD_ASYNC_PROMON:
ibd_async_setprom(state);
break;
case IBD_ASYNC_PROMOFF:
ibd_async_unsetprom(state);
break;
case IBD_ASYNC_REAP:
ibd_async_reap_group(state,
ptr->rq_ptr, ptr->rq_gid,
IB_MC_JSTATE_FULL);
ptr = NULL;
break;
case IBD_ASYNC_TRAP:
ibd_async_trap(state, ptr);
break;
case IBD_ASYNC_SCHED:
ibd_async_txsched(state);
break;
case IBD_ASYNC_LINK:
ibd_async_link(state, ptr);
break;
case IBD_ASYNC_EXIT:
mutex_enter(&state->id_acache_req_lock);
#ifndef __lock_lint
CALLB_CPR_EXIT(&cprinfo);
#else
mutex_exit(&state->id_acache_req_lock);
#endif
return;
case IBD_ASYNC_RC_TOO_BIG:
ibd_async_rc_process_too_big(state,
ptr);
break;
case IBD_ASYNC_RC_CLOSE_ACT_CHAN:
ibd_async_rc_close_act_chan(state, ptr);
break;
case IBD_ASYNC_RC_RECYCLE_ACE:
ibd_async_rc_recycle_ace(state, ptr);
break;
case IBD_ASYNC_RC_CLOSE_PAS_CHAN:
(void) ibd_rc_pas_close(ptr->rq_ptr,
B_TRUE, B_TRUE);
break;
}
free_req_and_continue:
if (ptr != NULL)
kmem_cache_free(state->id_req_kmc, ptr);
mutex_enter(&state->id_acache_req_lock);
} else {
#ifndef __lock_lint
CALLB_CPR_SAFE_BEGIN(&cprinfo);
cv_wait(&state->id_acache_req_cv,
&state->id_acache_req_lock);
CALLB_CPR_SAFE_END(&cprinfo,
&state->id_acache_req_lock);
#endif
}
}
_NOTE(NOT_REACHED)
}
static boolean_t
ibd_async_safe(ibd_state_t *state)
{
mutex_enter(&state->id_trap_lock);
if (state->id_trap_stop) {
mutex_exit(&state->id_trap_lock);
return (B_FALSE);
}
state->id_trap_inprog++;
mutex_exit(&state->id_trap_lock);
return (B_TRUE);
}
static void
ibd_async_done(ibd_state_t *state)
{
mutex_enter(&state->id_trap_lock);
if (--state->id_trap_inprog == 0)
cv_signal(&state->id_trap_cv);
mutex_exit(&state->id_trap_lock);
}
static uint_t
ibd_hash_by_id(void *hash_data, mod_hash_key_t key)
{
ulong_t ptraddr = (ulong_t)key;
uint_t hval;
if ((ptraddr & 3) == 0)
return ((uint_t)((ipoib_mac_t *)key)->ipoib_qpn);
bcopy(&(((ipoib_mac_t *)key)->ipoib_qpn), &hval, sizeof (uint_t));
return (hval);
}
static int
ibd_hash_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
{
if (bcmp((char *)key1, (char *)key2, IPOIB_ADDRL) == 0)
return (0);
else
return (1);
}
static int
ibd_acache_init(ibd_state_t *state)
{
ibd_ace_t *ce;
int i;
mutex_init(&state->id_ac_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->id_mc_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_enter(&state->id_ac_mutex);
list_create(&state->id_ah_free, sizeof (ibd_ace_t),
offsetof(ibd_ace_t, ac_list));
list_create(&state->id_ah_active, sizeof (ibd_ace_t),
offsetof(ibd_ace_t, ac_list));
state->id_ah_active_hash = mod_hash_create_extended("IBD AH hash",
state->id_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
ibd_hash_by_id, NULL, ibd_hash_key_cmp, KM_SLEEP);
list_create(&state->id_mc_full, sizeof (ibd_mce_t),
offsetof(ibd_mce_t, mc_list));
list_create(&state->id_mc_non, sizeof (ibd_mce_t),
offsetof(ibd_mce_t, mc_list));
state->id_ac_hot_ace = NULL;
state->id_ac_list = ce = (ibd_ace_t *)kmem_zalloc(sizeof (ibd_ace_t) *
state->id_num_ah, KM_SLEEP);
for (i = 0; i < state->id_num_ah; i++, ce++) {
if (ibt_alloc_ud_dest(state->id_hca_hdl, IBT_UD_DEST_NO_FLAGS,
state->id_pd_hdl, &ce->ac_dest) != IBT_SUCCESS) {
mutex_exit(&state->id_ac_mutex);
ibd_acache_fini(state);
return (DDI_FAILURE);
} else {
CLEAR_REFCYCLE(ce);
ce->ac_mce = NULL;
mutex_init(&ce->tx_too_big_mutex, NULL,
MUTEX_DRIVER, NULL);
IBD_ACACHE_INSERT_FREE(state, ce);
}
}
mutex_exit(&state->id_ac_mutex);
return (DDI_SUCCESS);
}
static void
ibd_acache_fini(ibd_state_t *state)
{
ibd_ace_t *ptr;
mutex_enter(&state->id_ac_mutex);
while ((ptr = IBD_ACACHE_GET_ACTIVE(state)) != NULL) {
ASSERT(GET_REF(ptr) == 0);
mutex_destroy(&ptr->tx_too_big_mutex);
(void) ibt_free_ud_dest(ptr->ac_dest);
}
while ((ptr = IBD_ACACHE_GET_FREE(state)) != NULL) {
ASSERT(GET_REF(ptr) == 0);
mutex_destroy(&ptr->tx_too_big_mutex);
(void) ibt_free_ud_dest(ptr->ac_dest);
}
list_destroy(&state->id_ah_free);
list_destroy(&state->id_ah_active);
list_destroy(&state->id_mc_full);
list_destroy(&state->id_mc_non);
kmem_free(state->id_ac_list, sizeof (ibd_ace_t) * state->id_num_ah);
mutex_exit(&state->id_ac_mutex);
mutex_destroy(&state->id_ac_mutex);
mutex_destroy(&state->id_mc_mutex);
}
ibd_ace_t *
ibd_acache_find(ibd_state_t *state, ipoib_mac_t *mac, boolean_t hold, int num)
{
ibd_ace_t *ptr;
ASSERT(mutex_owned(&state->id_ac_mutex));
if (mod_hash_find(state->id_ah_active_hash,
(mod_hash_key_t)mac, (mod_hash_val_t)&ptr) == 0) {
if (hold)
INC_REF(ptr, num);
return (ptr);
}
return (NULL);
}
static ibd_ace_t *
ibd_acache_lookup(ibd_state_t *state, ipoib_mac_t *mac, int *err, int numwqe)
{
ibd_ace_t *ptr;
ibd_req_t *req;
if (((ulong_t)mac & 3) == 0) {
DPRINT(4,
"ibd_acache_lookup : lookup for %08X:%08X:%08X:%08X:%08X",
htonl(mac->ipoib_qpn), htonl(mac->ipoib_gidpref[0]),
htonl(mac->ipoib_gidpref[1]), htonl(mac->ipoib_gidsuff[0]),
htonl(mac->ipoib_gidsuff[1]));
}
mutex_enter(&state->id_ac_mutex);
if (((ptr = state->id_ac_hot_ace) != NULL) &&
(memcmp(&ptr->ac_mac, mac, sizeof (*mac)) == 0)) {
INC_REF(ptr, numwqe);
mutex_exit(&state->id_ac_mutex);
return (ptr);
}
if (((ptr = ibd_acache_find(state, mac, B_TRUE, numwqe)) != NULL)) {
state->id_ac_hot_ace = ptr;
mutex_exit(&state->id_ac_mutex);
return (ptr);
}
*err = EAGAIN;
if (state->id_ah_op == IBD_OP_NOTSTARTED) {
req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP);
if (req != NULL) {
bcopy(mac, &(req->rq_mac), IPOIB_ADDRL);
state->id_ah_op = IBD_OP_ONGOING;
ibd_queue_work_slot(state, req, IBD_ASYNC_GETAH);
bcopy(mac, &state->id_ah_addr, IPOIB_ADDRL);
}
} else if ((state->id_ah_op != IBD_OP_ONGOING) &&
(bcmp(&state->id_ah_addr, mac, IPOIB_ADDRL) == 0)) {
if (state->id_ah_op == IBD_OP_ERRORED) {
*err = EFAULT;
state->id_ah_error++;
} else {
ipoib_mac_t routermac;
(void) ibd_get_allroutergroup(state, mac, &routermac);
ptr = ibd_acache_find(state, &routermac, B_TRUE,
numwqe);
}
state->id_ah_op = IBD_OP_NOTSTARTED;
} else if ((state->id_ah_op != IBD_OP_ONGOING) &&
(bcmp(&state->id_ah_addr, mac, IPOIB_ADDRL) != 0)) {
state->id_ah_op = IBD_OP_NOTSTARTED;
}
mutex_exit(&state->id_ac_mutex);
return (ptr);
}
static ibd_ace_t *
ibd_acache_get_unref(ibd_state_t *state)
{
ibd_ace_t *ptr = list_tail(&state->id_ah_active);
boolean_t try_rc_chan_recycle = B_FALSE;
ASSERT(mutex_owned(&state->id_ac_mutex));
while (ptr != NULL) {
if (GET_REF(ptr) == 0) {
if (ptr->ac_chan != NULL) {
ASSERT(state->id_enable_rc == B_TRUE);
if (!try_rc_chan_recycle) {
try_rc_chan_recycle = B_TRUE;
ibd_rc_signal_ace_recycle(state, ptr);
}
} else {
IBD_ACACHE_PULLOUT_ACTIVE(state, ptr);
break;
}
}
ptr = list_prev(&state->id_ah_active, ptr);
}
return (ptr);
}
boolean_t
ibd_acache_recycle(ibd_state_t *state, ipoib_mac_t *acmac, boolean_t force)
{
ibd_ace_t *acactive;
boolean_t ret = B_TRUE;
ASSERT(mutex_owned(&state->id_ac_mutex));
if ((acactive = ibd_acache_find(state, acmac, B_FALSE, 0)) != NULL) {
if (SET_CYCLE_IF_REF(acactive)) {
if (!force) {
ret = B_FALSE;
} else {
ASSERT(acactive->ac_chan == NULL);
IBD_ACACHE_PULLOUT_ACTIVE(state, acactive);
acactive->ac_mce = NULL;
}
} else {
ASSERT(acactive->ac_chan == NULL);
IBD_ACACHE_PULLOUT_ACTIVE(state, acactive);
acactive->ac_mce = NULL;
IBD_ACACHE_INSERT_FREE(state, acactive);
}
}
return (ret);
}
static ibd_mce_t *
ibd_async_mcache(ibd_state_t *state, ipoib_mac_t *mac, boolean_t *redirect)
{
ib_gid_t mgid;
ibd_mce_t *mce;
ipoib_mac_t routermac;
*redirect = B_FALSE;
ibd_n2h_gid(mac, &mgid);
mce = IBD_MCACHE_FIND_FULL(state, mgid);
if (mce != NULL) {
DPRINT(4, "ibd_async_mcache : already joined to group");
return (mce);
}
DPRINT(4, "ibd_async_mcache : not joined to group");
if ((mce = ibd_join_group(state, mgid, IB_MC_JSTATE_SEND_ONLY_NON)) !=
NULL) {
DPRINT(4, "ibd_async_mcache : nonmem joined to group");
return (mce);
}
DPRINT(4, "ibd_async_mcache : nonmem join failed");
if (!ibd_get_allroutergroup(state, mac, &routermac))
return (NULL);
*redirect = B_TRUE;
ibd_n2h_gid(&routermac, &mgid);
bcopy(&routermac, mac, IPOIB_ADDRL);
DPRINT(4, "ibd_async_mcache : router mgid : %016llx:%016llx\n",
mgid.gid_prefix, mgid.gid_guid);
if ((mce = IBD_MCACHE_FIND_FULL(state, mgid)) != NULL) {
DPRINT(4, "ibd_async_mcache : using already joined router"
"group\n");
return (mce);
}
DPRINT(4, "ibd_async_mcache : attempting join to router grp");
if ((mce = ibd_join_group(state, mgid, IB_MC_JSTATE_SEND_ONLY_NON)) !=
NULL) {
DPRINT(4, "ibd_async_mcache : joined to router grp");
return (mce);
}
return (NULL);
}
static void
ibd_async_acache(ibd_state_t *state, ipoib_mac_t *mac)
{
ibd_ace_t *ce;
ibd_mce_t *mce = NULL;
ibt_path_attr_t path_attr;
ibt_path_info_t path_info;
ib_gid_t destgid;
char ret = IBD_OP_NOTSTARTED;
DPRINT(4, "ibd_async_acache : %08X:%08X:%08X:%08X:%08X",
htonl(mac->ipoib_qpn), htonl(mac->ipoib_gidpref[0]),
htonl(mac->ipoib_gidpref[1]), htonl(mac->ipoib_gidsuff[0]),
htonl(mac->ipoib_gidsuff[1]));
if (mac->ipoib_qpn == htonl(IB_MC_QPN)) {
boolean_t redirected;
if ((mce = ibd_async_mcache(state, mac, &redirected)) ==
NULL) {
state->id_ah_op = IBD_OP_ERRORED;
return;
}
if (redirected) {
ret = IBD_OP_ROUTERED;
DPRINT(4, "ibd_async_acache : redirected to "
"%08X:%08X:%08X:%08X:%08X",
htonl(mac->ipoib_qpn), htonl(mac->ipoib_gidpref[0]),
htonl(mac->ipoib_gidpref[1]),
htonl(mac->ipoib_gidsuff[0]),
htonl(mac->ipoib_gidsuff[1]));
mutex_enter(&state->id_ac_mutex);
if (ibd_acache_find(state, mac, B_FALSE, 0) != NULL) {
state->id_ah_op = IBD_OP_ROUTERED;
mutex_exit(&state->id_ac_mutex);
DPRINT(4, "ibd_async_acache : router AH found");
return;
}
mutex_exit(&state->id_ac_mutex);
}
}
mutex_enter(&state->id_ac_mutex);
if ((ce = IBD_ACACHE_GET_FREE(state)) == NULL) {
if ((ce = ibd_acache_get_unref(state)) == NULL) {
state->id_ah_op = IBD_OP_NOTSTARTED;
mutex_exit(&state->id_ac_mutex);
DPRINT(10, "ibd_async_acache : failed to find AH "
"slot\n");
return;
}
ce->ac_mce = NULL;
}
mutex_exit(&state->id_ac_mutex);
ASSERT(ce->ac_mce == NULL);
bcopy((char *)mac, &ce->ac_mac, IPOIB_ADDRL);
bzero(&path_info, sizeof (path_info));
bzero(&path_attr, sizeof (ibt_path_attr_t));
path_attr.pa_sgid = state->id_sgid;
path_attr.pa_num_dgids = 1;
ibd_n2h_gid(&ce->ac_mac, &destgid);
path_attr.pa_dgids = &destgid;
path_attr.pa_sl = state->id_mcinfo->mc_adds_vect.av_srvl;
path_attr.pa_pkey = state->id_pkey;
if (ibt_get_paths(state->id_ibt_hdl, IBT_PATH_PKEY, &path_attr, 1,
&path_info, NULL) != IBT_SUCCESS) {
DPRINT(10, "ibd_async_acache : failed in ibt_get_paths");
goto error;
}
if (ibt_modify_ud_dest(ce->ac_dest, state->id_mcinfo->mc_qkey,
ntohl(ce->ac_mac.ipoib_qpn),
&path_info.pi_prim_cep_path.cep_adds_vect) != IBT_SUCCESS) {
DPRINT(10, "ibd_async_acache : failed in ibt_modify_ud_dest");
goto error;
}
if (mce != NULL)
ce->ac_mce = mce;
if (state->id_enable_rc && (mac->ipoib_qpn != htonl(IB_MC_QPN)) &&
(htonl(mac->ipoib_qpn) & IBD_MAC_ADDR_RC)) {
ASSERT(ce->ac_chan == NULL);
DPRINT(10, "ibd_async_acache: call "
"ibd_rc_try_connect(ace=%p)", ce);
ibd_rc_try_connect(state, ce, &path_info);
if (ce->ac_chan == NULL) {
DPRINT(10, "ibd_async_acache: fail to setup RC"
" channel");
state->rc_conn_fail++;
goto error;
}
}
mutex_enter(&state->id_ac_mutex);
IBD_ACACHE_INSERT_ACTIVE(state, ce);
state->id_ah_op = ret;
mutex_exit(&state->id_ac_mutex);
return;
error:
mutex_enter(&state->id_ac_mutex);
state->id_ah_op = IBD_OP_ERRORED;
IBD_ACACHE_INSERT_FREE(state, ce);
mutex_exit(&state->id_ac_mutex);
}
static void
ibd_async_link(ibd_state_t *state, ibd_req_t *req)
{
ibd_link_op_t opcode = (ibd_link_op_t)req->rq_ptr;
link_state_t lstate = (opcode == IBD_LINK_DOWN) ? LINK_STATE_DOWN :
LINK_STATE_UP;
ibd_mce_t *mce, *pmce;
ibd_ace_t *ace, *pace;
DPRINT(10, "ibd_async_link(): %d", opcode);
if ((opcode == IBD_LINK_UP_ABSENT) || (opcode == IBD_LINK_UP)) {
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state))
state->id_link_speed = ibd_get_portspeed(state);
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*state))
}
if (opcode == IBD_LINK_UP_ABSENT) {
if (state->id_prom_op == IBD_OP_COMPLETED) {
ibd_async_unsetprom(state);
ibd_async_setprom(state);
}
mce = list_head(&state->id_mc_full);
while ((pmce = mce) != NULL) {
mce = list_next(&state->id_mc_full, mce);
if (pmce->mc_jstate == IB_MC_JSTATE_SEND_ONLY_NON)
ibd_leave_group(state,
pmce->mc_info.mc_adds_vect.av_dgid,
IB_MC_JSTATE_SEND_ONLY_NON);
else
ibd_reacquire_group(state, pmce);
}
mutex_enter(&state->id_ac_mutex);
ace = list_head(&state->id_ah_active);
while ((pace = ace) != NULL) {
boolean_t cycled;
ace = list_next(&state->id_ah_active, ace);
mce = pace->ac_mce;
if (pace->ac_chan != NULL) {
ASSERT(mce == NULL);
ASSERT(state->id_enable_rc == B_TRUE);
if (pace->ac_chan->chan_state ==
IBD_RC_STATE_ACT_ESTAB) {
INC_REF(pace, 1);
IBD_ACACHE_PULLOUT_ACTIVE(state, pace);
pace->ac_chan->chan_state =
IBD_RC_STATE_ACT_CLOSING;
ibd_rc_signal_act_close(state, pace);
} else {
state->rc_act_close_simultaneous++;
DPRINT(40, "ibd_async_link: other "
"thread is closing it, ace=%p, "
"ac_chan=%p, chan_state=%d",
pace, pace->ac_chan,
pace->ac_chan->chan_state);
}
} else {
cycled = ibd_acache_recycle(state,
&pace->ac_mac, B_TRUE);
}
ASSERT(cycled && ((mce == NULL) || (mce->mc_jstate ==
IB_MC_JSTATE_FULL)));
if ((mce != NULL) && (mce->mc_fullreap))
ibd_async_reap_group(state, mce,
mce->mc_info.mc_adds_vect.av_dgid,
mce->mc_jstate);
}
mutex_exit(&state->id_ac_mutex);
}
mutex_enter(&state->id_link_mutex);
state->id_link_state = lstate;
mac_link_update(state->id_mh, lstate);
mutex_exit(&state->id_link_mutex);
ibd_async_done(state);
}
static int
ibd_locate_pkey(ib_pkey_t *pkey_tbl, uint16_t pkey_tbl_sz, ib_pkey_t pkey,
uint16_t *pkix)
{
uint16_t ndx;
ASSERT(pkix != NULL);
for (ndx = 0; ndx < pkey_tbl_sz; ndx++) {
if (pkey_tbl[ndx] == pkey) {
*pkix = ndx;
return (0);
}
}
return (-1);
}
static void
ibd_link_mod(ibd_state_t *state, ibt_async_code_t code)
{
ibt_hca_portinfo_t *port_infop = NULL;
ibt_status_t ibt_status;
uint_t psize, port_infosz;
ibd_link_op_t opcode;
ibd_req_t *req;
link_state_t new_link_state = LINK_STATE_UP;
uint8_t itreply;
uint16_t pkix;
int ret;
ibd_set_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS);
mutex_enter(&state->id_link_mutex);
if (state->id_link_state == LINK_STATE_UNKNOWN) {
mutex_exit(&state->id_link_mutex);
goto link_mod_return;
}
if ((state->id_link_state == LINK_STATE_DOWN) &&
((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) == 0) &&
((state->id_mac_state & IBD_DRV_STARTED) == 0)) {
mutex_exit(&state->id_link_mutex);
goto link_mod_return;
}
if (code == IBT_ERROR_PORT_DOWN) {
new_link_state = LINK_STATE_DOWN;
goto update_link_state;
}
ibt_status = ibt_query_hca_ports(state->id_hca_hdl, state->id_port,
&port_infop, &psize, &port_infosz);
if ((ibt_status != IBT_SUCCESS) || (psize != 1) ||
(port_infop->p_linkstate != IBT_PORT_ACTIVE)) {
new_link_state = LINK_STATE_DOWN;
goto update_link_state;
}
if (state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) {
mutex_exit(&state->id_link_mutex);
if ((ret = ibd_start(state)) != 0) {
DPRINT(10, "ibd_linkmod: cannot start from late HCA "
"init, ret=%d", ret);
}
ibt_free_portinfo(port_infop, port_infosz);
goto link_mod_return;
}
itreply = port_infop->p_init_type_reply;
if (((itreply & SM_INIT_TYPE_REPLY_NO_LOAD_REPLY) == 0) &&
((itreply & SM_INIT_TYPE_PRESERVE_CONTENT_REPLY) == 0)) {
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
if (bcmp(port_infop->p_sgid_tbl,
&state->id_sgid, sizeof (ib_gid_t)) != 0) {
new_link_state = LINK_STATE_DOWN;
} else if (port_infop->p_pkey_tbl[state->id_pkix] ==
state->id_pkey) {
new_link_state = LINK_STATE_UP;
} else if (ibd_locate_pkey(port_infop->p_pkey_tbl,
port_infop->p_pkey_tbl_sz, state->id_pkey, &pkix) == 0) {
ibt_free_portinfo(port_infop, port_infosz);
mutex_exit(&state->id_link_mutex);
(void) ibd_undo_start(state, LINK_STATE_DOWN);
if ((ret = ibd_start(state)) != 0) {
DPRINT(10, "ibd_restart: cannot restart, "
"ret=%d", ret);
}
goto link_mod_return;
} else {
new_link_state = LINK_STATE_DOWN;
}
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
}
update_link_state:
if (port_infop) {
ibt_free_portinfo(port_infop, port_infosz);
}
if (new_link_state == LINK_STATE_DOWN) {
opcode = IBD_LINK_DOWN;
} else {
if ((itreply & SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) ==
SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) {
opcode = IBD_LINK_UP;
} else {
opcode = IBD_LINK_UP_ABSENT;
}
}
if ((state->id_link_state == new_link_state) && (opcode !=
IBD_LINK_UP_ABSENT)) {
mutex_exit(&state->id_link_mutex);
goto link_mod_return;
}
if (!ibd_async_safe(state)) {
state->id_link_state = new_link_state;
mutex_exit(&state->id_link_mutex);
goto link_mod_return;
}
mutex_exit(&state->id_link_mutex);
req = kmem_cache_alloc(state->id_req_kmc, KM_SLEEP);
req->rq_ptr = (void *)opcode;
ibd_queue_work_slot(state, req, IBD_ASYNC_LINK);
link_mod_return:
ibd_clr_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS);
}
static void
ibd_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
ibt_async_code_t code, ibt_async_event_t *event)
{
ibd_state_t *state = (ibd_state_t *)clnt_private;
switch (code) {
case IBT_ERROR_CATASTROPHIC_CHAN:
ibd_print_warn(state, "catastrophic channel error");
break;
case IBT_ERROR_CQ:
ibd_print_warn(state, "completion queue error");
break;
case IBT_PORT_CHANGE_EVENT:
ASSERT(state->id_hca_hdl == hca_hdl);
if (state->id_port != event->ev_port)
break;
if ((event->ev_port_flags & IBT_PORT_CHANGE_PKEY) ==
IBT_PORT_CHANGE_PKEY) {
ibd_link_mod(state, code);
}
break;
case IBT_ERROR_PORT_DOWN:
case IBT_CLNT_REREG_EVENT:
case IBT_EVENT_PORT_UP:
ASSERT(state->id_hca_hdl == hca_hdl);
if (state->id_port != event->ev_port)
break;
ibd_link_mod(state, code);
break;
case IBT_HCA_ATTACH_EVENT:
case IBT_HCA_DETACH_EVENT:
break;
default:
break;
}
}
static int
ibd_register_mac(ibd_state_t *state, dev_info_t *dip)
{
mac_register_t *macp;
int ret;
if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
DPRINT(10, "ibd_register_mac: mac_alloc() failed");
return (DDI_FAILURE);
}
macp->m_type_ident = MAC_PLUGIN_IDENT_IB;
macp->m_driver = state;
macp->m_dip = dip;
macp->m_src_addr = (uint8_t *)&state->id_macaddr;
macp->m_callbacks = &ibd_m_callbacks;
macp->m_min_sdu = 0;
macp->m_multicast_sdu = IBD_DEF_MAX_SDU;
if (state->id_type == IBD_PORT_DRIVER) {
macp->m_max_sdu = IBD_DEF_RC_MAX_SDU;
} else if (state->id_enable_rc) {
macp->m_max_sdu = state->rc_mtu - IPOIB_HDRSIZE;
} else {
macp->m_max_sdu = IBD_DEF_MAX_SDU;
}
macp->m_priv_props = ibd_priv_props;
if ((ret = mac_register(macp, &state->id_mh)) != 0) {
mac_free(macp);
DPRINT(10,
"ibd_register_mac: mac_register() failed, ret=%d", ret);
return (DDI_FAILURE);
}
mac_free(macp);
return (DDI_SUCCESS);
}
static int
ibd_record_capab(ibd_state_t *state)
{
ibt_hca_attr_t hca_attrs;
ibt_status_t ibt_status;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*state))
ibt_status = ibt_query_hca(state->id_hca_hdl, &hca_attrs);
ASSERT(ibt_status == IBT_SUCCESS);
if (state->id_enable_rc) {
state->id_hwcksum_capab = 0;
} else {
if ((hca_attrs.hca_flags & IBT_HCA_CKSUM_FULL)
== IBT_HCA_CKSUM_FULL) {
state->id_hwcksum_capab = IBT_HCA_CKSUM_FULL;
}
}
if (state->id_enable_rc) {
state->id_lso_capable = B_FALSE;
state->id_lso_maxlen = 0;
} else {
if (hca_attrs.hca_max_lso_size > 0) {
state->id_lso_capable = B_TRUE;
if (hca_attrs.hca_max_lso_size > IBD_LSO_MAXLEN)
state->id_lso_maxlen = IBD_LSO_MAXLEN;
else
state->id_lso_maxlen =
hca_attrs.hca_max_lso_size;
} else {
state->id_lso_capable = B_FALSE;
state->id_lso_maxlen = 0;
}
}
if ((hca_attrs.hca_flags2 & IBT_HCA2_RES_LKEY) == IBT_HCA2_RES_LKEY) {
state->id_hca_res_lkey_capab = 1;
state->id_res_lkey = hca_attrs.hca_reserved_lkey;
state->rc_enable_iov_map = B_TRUE;
} else {
state->rc_enable_iov_map = B_FALSE;
}
if (hca_attrs.hca_flags & IBT_HCA_WQE_SIZE_INFO) {
state->id_max_sqseg = hca_attrs.hca_ud_send_sgl_sz;
state->rc_tx_max_sqseg = hca_attrs.hca_conn_send_sgl_sz;
} else {
state->id_max_sqseg = hca_attrs.hca_max_sgl;
state->rc_tx_max_sqseg = hca_attrs.hca_max_sgl;
}
if (state->id_max_sqseg > IBD_MAX_SQSEG) {
state->id_max_sqseg = IBD_MAX_SQSEG;
} else if (state->id_max_sqseg < IBD_MAX_SQSEG) {
ibd_print_warn(state, "Set #sgl = %d instead of default %d",
state->id_max_sqseg, IBD_MAX_SQSEG);
}
if (state->rc_tx_max_sqseg > IBD_MAX_SQSEG) {
state->rc_tx_max_sqseg = IBD_MAX_SQSEG;
} else if (state->rc_tx_max_sqseg < IBD_MAX_SQSEG) {
ibd_print_warn(state, "RC mode: Set #sgl = %d instead of "
"default %d", state->rc_tx_max_sqseg, IBD_MAX_SQSEG);
}
state->id_max_sqseg_hiwm = (state->id_max_sqseg * 65) / 100;
state->rc_max_sqseg_hiwm = (state->rc_tx_max_sqseg * 65) / 100;
state->id_hca_max_chan_sz = hca_attrs.hca_max_chan_sz;
if (hca_attrs.hca_max_chan_sz < state->id_ud_num_rwqe)
state->id_ud_num_rwqe = hca_attrs.hca_max_chan_sz;
state->id_rx_bufs_outstanding_limit = state->id_ud_num_rwqe -
IBD_RWQE_MIN;
if (hca_attrs.hca_max_chan_sz < state->id_ud_num_swqe)
state->id_ud_num_swqe = hca_attrs.hca_max_chan_sz;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*state))
return (DDI_SUCCESS);
}
static int
ibd_part_busy(ibd_state_t *state)
{
if (atomic_add_32_nv(&state->id_rx_list.dl_bufs_outstanding, 0) != 0) {
DPRINT(10, "ibd_part_busy: failed: rx bufs outstanding\n");
return (DDI_FAILURE);
}
if (state->rc_srq_rwqe_list.dl_bufs_outstanding != 0) {
DPRINT(10, "ibd_part_busy: failed: srq bufs outstanding\n");
return (DDI_FAILURE);
}
if (state->id_ah_op == IBD_OP_ONGOING) {
DPRINT(10, "ibd_part_busy: failed: connecting\n");
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static void
ibd_part_unattach(ibd_state_t *state)
{
uint32_t progress = state->id_mac_state;
ibt_status_t ret;
ibd_free_rx_rsrcs(state);
if (progress & IBD_DRV_RC_SRQ_ALLOCD) {
ASSERT(state->id_enable_rc);
ibd_rc_fini_srq_list(state);
state->id_mac_state &= (~IBD_DRV_RC_SRQ_ALLOCD);
}
if (progress & IBD_DRV_MAC_REGISTERED) {
(void) mac_unregister(state->id_mh);
state->id_mac_state &= (~IBD_DRV_MAC_REGISTERED);
}
if (progress & IBD_DRV_ASYNC_THR_CREATED) {
ibd_queue_work_slot(state, &state->id_ah_req, IBD_ASYNC_EXIT);
thread_join(state->id_async_thrid);
state->id_mac_state &= (~IBD_DRV_ASYNC_THR_CREATED);
}
if (progress & IBD_DRV_REQ_LIST_INITED) {
list_destroy(&state->id_req_list);
mutex_destroy(&state->id_acache_req_lock);
cv_destroy(&state->id_acache_req_cv);
state->id_mac_state &= ~IBD_DRV_REQ_LIST_INITED;
}
if (progress & IBD_DRV_PD_ALLOCD) {
if ((ret = ibt_free_pd(state->id_hca_hdl,
state->id_pd_hdl)) != IBT_SUCCESS) {
ibd_print_warn(state, "failed to free "
"protection domain, ret=%d", ret);
}
state->id_pd_hdl = NULL;
state->id_mac_state &= (~IBD_DRV_PD_ALLOCD);
}
if (progress & IBD_DRV_HCA_OPENED) {
if ((ret = ibt_close_hca(state->id_hca_hdl)) !=
IBT_SUCCESS) {
ibd_print_warn(state, "failed to close "
"HCA device, ret=%d", ret);
}
state->id_hca_hdl = NULL;
state->id_mac_state &= (~IBD_DRV_HCA_OPENED);
}
mutex_enter(&ibd_gstate.ig_mutex);
if (progress & IBD_DRV_IBTL_ATTACH_DONE) {
if ((ret = ibt_detach(state->id_ibt_hdl)) !=
IBT_SUCCESS) {
ibd_print_warn(state,
"ibt_detach() failed, ret=%d", ret);
}
state->id_ibt_hdl = NULL;
state->id_mac_state &= (~IBD_DRV_IBTL_ATTACH_DONE);
ibd_gstate.ig_ibt_hdl_ref_cnt--;
}
if ((ibd_gstate.ig_ibt_hdl_ref_cnt == 0) &&
(ibd_gstate.ig_ibt_hdl != NULL)) {
if ((ret = ibt_detach(ibd_gstate.ig_ibt_hdl)) !=
IBT_SUCCESS) {
ibd_print_warn(state, "ibt_detach(): global "
"failed, ret=%d", ret);
}
ibd_gstate.ig_ibt_hdl = NULL;
}
mutex_exit(&ibd_gstate.ig_mutex);
if (progress & IBD_DRV_TXINTR_ADDED) {
ddi_remove_softintr(state->id_tx);
state->id_tx = NULL;
state->id_mac_state &= (~IBD_DRV_TXINTR_ADDED);
}
if (progress & IBD_DRV_RXINTR_ADDED) {
ddi_remove_softintr(state->id_rx);
state->id_rx = NULL;
state->id_mac_state &= (~IBD_DRV_RXINTR_ADDED);
}
#ifdef DEBUG
if (progress & IBD_DRV_RC_PRIVATE_STATE) {
kstat_delete(state->rc_ksp);
state->id_mac_state &= (~IBD_DRV_RC_PRIVATE_STATE);
}
#endif
if (progress & IBD_DRV_STATE_INITIALIZED) {
ibd_state_fini(state);
state->id_mac_state &= (~IBD_DRV_STATE_INITIALIZED);
}
}
int
ibd_part_attach(ibd_state_t *state, dev_info_t *dip)
{
ibt_status_t ret;
int rv;
kthread_t *kht;
if (ibd_state_init(state, dip) != DDI_SUCCESS) {
DPRINT(10, "ibd_part_attach: failed in ibd_state_init()");
return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_STATE_INITIALIZED;
if (ibd_rx_softintr == 1) {
if ((rv = ddi_add_softintr(dip, DDI_SOFTINT_LOW, &state->id_rx,
NULL, NULL, ibd_intr, (caddr_t)state)) != DDI_SUCCESS) {
DPRINT(10, "ibd_part_attach: failed in "
"ddi_add_softintr(id_rx), ret=%d", rv);
return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_RXINTR_ADDED;
}
if (ibd_tx_softintr == 1) {
if ((rv = ddi_add_softintr(dip, DDI_SOFTINT_LOW, &state->id_tx,
NULL, NULL, ibd_tx_recycle,
(caddr_t)state)) != DDI_SUCCESS) {
DPRINT(10, "ibd_part_attach: failed in "
"ddi_add_softintr(id_tx), ret=%d", rv);
return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_TXINTR_ADDED;
}
mutex_enter(&ibd_gstate.ig_mutex);
if (ibd_gstate.ig_ibt_hdl == NULL) {
if ((ret = ibt_attach(&ibd_clnt_modinfo, dip, state,
&ibd_gstate.ig_ibt_hdl)) != IBT_SUCCESS) {
DPRINT(10, "ibd_part_attach: global: failed in "
"ibt_attach(), ret=%d", ret);
mutex_exit(&ibd_gstate.ig_mutex);
return (DDI_FAILURE);
}
}
if ((ret = ibt_attach(&ibd_clnt_modinfo, dip, state,
&state->id_ibt_hdl)) != IBT_SUCCESS) {
DPRINT(10, "ibd_part_attach: failed in ibt_attach(), ret=%d",
ret);
mutex_exit(&ibd_gstate.ig_mutex);
return (DDI_FAILURE);
}
ibd_gstate.ig_ibt_hdl_ref_cnt++;
mutex_exit(&ibd_gstate.ig_mutex);
state->id_mac_state |= IBD_DRV_IBTL_ATTACH_DONE;
if ((ret = ibt_open_hca(state->id_ibt_hdl, state->id_hca_guid,
&state->id_hca_hdl)) != IBT_SUCCESS) {
DPRINT(10, "ibd_part_attach: ibt_open_hca() failed, ret=%d",
ret);
return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_HCA_OPENED;
#ifdef DEBUG
if (state->id_enable_rc) {
if (ibd_rc_init_stats(state) != DDI_SUCCESS) {
DPRINT(10, "ibd_part_attach: failed in "
"ibd_rc_init_stats");
return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_RC_PRIVATE_STATE;
}
#endif
(void) ibd_record_capab(state);
if ((ret = ibt_alloc_pd(state->id_hca_hdl, IBT_PD_NO_FLAGS,
&state->id_pd_hdl)) != IBT_SUCCESS) {
DPRINT(10, "ibd_part_attach: ibt_alloc_pd() failed, ret=%d",
ret);
return (DDI_FAILURE);
}
state->id_mac_state |= IBD_DRV_PD_ALLOCD;
mutex_init(&state->id_acache_req_lock, NULL, MUTEX_DRIVER, NULL);
cv_init(&state->id_acache_req_cv, NULL, CV_DEFAULT, NULL);
list_create(&state->id_req_list, sizeof (ibd_req_t),
offsetof(ibd_req_t, rq_list));
state->id_mac_state |= IBD_DRV_REQ_LIST_INITED;
kht = thread_create(NULL, 0, ibd_async_work, state, 0, &p0,
TS_RUN, minclsyspri);
state->id_async_thrid = kht->t_did;
state->id_mac_state |= IBD_DRV_ASYNC_THR_CREATED;
return (DDI_SUCCESS);
}
static int
ibd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
int ret;
switch (cmd) {
case DDI_ATTACH:
ret = ibd_port_attach(dip);
break;
default:
ret = DDI_FAILURE;
break;
}
return (ret);
}
static int
ibd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
ibd_state_t *state;
int instance;
if (cmd != DDI_DETACH)
return (DDI_FAILURE);
instance = ddi_get_instance(dip);
state = ddi_get_soft_state(ibd_list, instance);
return (ibd_port_unattach(state, dip));
}
static int
ibd_state_init(ibd_state_t *state, dev_info_t *dip)
{
char buf[64];
mutex_init(&state->id_link_mutex, NULL, MUTEX_DRIVER, NULL);
state->id_link_state = LINK_STATE_UNKNOWN;
mutex_init(&state->id_trap_lock, NULL, MUTEX_DRIVER, NULL);
cv_init(&state->id_trap_cv, NULL, CV_DEFAULT, NULL);
state->id_trap_stop = B_TRUE;
state->id_trap_inprog = 0;
mutex_init(&state->id_scq_poll_lock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->id_rcq_poll_lock, NULL, MUTEX_DRIVER, NULL);
state->id_dip = dip;
mutex_init(&state->id_sched_lock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->id_tx_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->id_tx_rel_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->id_txpost_lock, NULL, MUTEX_DRIVER, NULL);
state->id_tx_busy = 0;
mutex_init(&state->id_lso_lock, NULL, MUTEX_DRIVER, NULL);
state->id_rx_list.dl_bufs_outstanding = 0;
state->id_rx_list.dl_cnt = 0;
mutex_init(&state->id_rx_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->id_rx_free_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
(void) sprintf(buf, "ibd_req%d_%x_%u", ddi_get_instance(dip),
state->id_pkey, state->id_plinkid);
state->id_req_kmc = kmem_cache_create(buf, sizeof (ibd_req_t),
0, NULL, NULL, NULL, NULL, NULL, 0);
mutex_init(&state->rc_rx_lock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->rc_tx_large_bufs_lock, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->rc_srq_rwqe_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->rc_srq_free_list.dl_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&state->rc_pass_chan_list.chan_list_mutex, NULL,
MUTEX_DRIVER, NULL);
mutex_init(&state->rc_timeout_lock, NULL, MUTEX_DRIVER, NULL);
state->id_enable_rc = IBD_DEF_LINK_MODE;
state->rc_mtu = IBD_DEF_RC_MAX_MTU;
state->id_mtu = IBD_DEF_MAX_MTU;
state->id_lso_policy = IBD_DEF_LSO_POLICY;
state->id_num_lso_bufs = IBD_DEF_NUM_LSO_BUFS;
state->id_num_ah = IBD_DEF_NUM_AH;
state->id_hash_size = IBD_DEF_HASH_SIZE;
state->id_create_broadcast_group = IBD_DEF_CREATE_BCAST_GROUP;
state->id_allow_coalesce_comp_tuning = IBD_DEF_COALESCE_COMPLETIONS;
state->id_ud_rx_comp_count = IBD_DEF_UD_RX_COMP_COUNT;
state->id_ud_rx_comp_usec = IBD_DEF_UD_RX_COMP_USEC;
state->id_ud_tx_comp_count = IBD_DEF_UD_TX_COMP_COUNT;
state->id_ud_tx_comp_usec = IBD_DEF_UD_TX_COMP_USEC;
state->id_rc_rx_comp_count = IBD_DEF_RC_RX_COMP_COUNT;
state->id_rc_rx_comp_usec = IBD_DEF_RC_RX_COMP_USEC;
state->id_rc_tx_comp_count = IBD_DEF_RC_TX_COMP_COUNT;
state->id_rc_tx_comp_usec = IBD_DEF_RC_TX_COMP_USEC;
state->id_ud_tx_copy_thresh = IBD_DEF_UD_TX_COPY_THRESH;
state->id_rc_rx_copy_thresh = IBD_DEF_RC_RX_COPY_THRESH;
state->id_rc_tx_copy_thresh = IBD_DEF_RC_TX_COPY_THRESH;
state->id_ud_num_rwqe = IBD_DEF_UD_NUM_RWQE;
state->id_ud_num_swqe = IBD_DEF_UD_NUM_SWQE;
state->id_rc_num_rwqe = IBD_DEF_RC_NUM_RWQE;
state->id_rc_num_swqe = IBD_DEF_RC_NUM_SWQE;
state->rc_enable_srq = IBD_DEF_RC_ENABLE_SRQ;
state->id_rc_num_srq = IBD_DEF_RC_NUM_SRQ;
state->id_rc_rx_rwqe_thresh = IBD_DEF_RC_RX_RWQE_THRESH;
return (DDI_SUCCESS);
}
static void
ibd_state_fini(ibd_state_t *state)
{
kmem_cache_destroy(state->id_req_kmc);
mutex_destroy(&state->id_rx_list.dl_mutex);
mutex_destroy(&state->id_rx_free_list.dl_mutex);
mutex_destroy(&state->id_txpost_lock);
mutex_destroy(&state->id_tx_list.dl_mutex);
mutex_destroy(&state->id_tx_rel_list.dl_mutex);
mutex_destroy(&state->id_lso_lock);
mutex_destroy(&state->id_sched_lock);
mutex_destroy(&state->id_scq_poll_lock);
mutex_destroy(&state->id_rcq_poll_lock);
cv_destroy(&state->id_trap_cv);
mutex_destroy(&state->id_trap_lock);
mutex_destroy(&state->id_link_mutex);
mutex_destroy(&state->rc_timeout_lock);
mutex_destroy(&state->rc_srq_free_list.dl_mutex);
mutex_destroy(&state->rc_srq_rwqe_list.dl_mutex);
mutex_destroy(&state->rc_pass_chan_list.chan_list_mutex);
mutex_destroy(&state->rc_tx_large_bufs_lock);
mutex_destroy(&state->rc_rx_lock);
}
static uint64_t
ibd_get_portspeed(ibd_state_t *state)
{
int ret;
ibt_path_info_t path;
ibt_path_attr_t path_attr;
uint8_t num_paths;
uint64_t ifspeed;
ifspeed = 2000000000;
bzero(&path_attr, sizeof (path_attr));
path_attr.pa_dgids = &state->id_sgid;
path_attr.pa_num_dgids = 1;
path_attr.pa_sgid = state->id_sgid;
if (ibt_get_paths(state->id_ibt_hdl, IBT_PATH_NO_FLAGS,
&path_attr, 1, &path, &num_paths) != IBT_SUCCESS)
goto earlydone;
if (num_paths < 1)
goto earlydone;
ret = 1;
switch (path.pi_prim_cep_path.cep_adds_vect.av_srate) {
case IBT_SRATE_2:
ret = 1;
break;
case IBT_SRATE_10:
ret = 4;
break;
case IBT_SRATE_30:
ret = 12;
break;
case IBT_SRATE_5:
ret = 2;
break;
case IBT_SRATE_20:
ret = 8;
break;
case IBT_SRATE_40:
ret = 16;
break;
case IBT_SRATE_60:
ret = 24;
break;
case IBT_SRATE_80:
ret = 32;
break;
case IBT_SRATE_120:
ret = 48;
break;
}
ifspeed *= ret;
earlydone:
return (ifspeed);
}
static ibd_mce_t *
ibd_mcache_find(ib_gid_t mgid, struct list *mlist)
{
ibd_mce_t *ptr = list_head(mlist);
while (ptr != NULL) {
if (bcmp(&mgid, &ptr->mc_info.mc_adds_vect.av_dgid,
sizeof (ib_gid_t)) == 0)
return (ptr);
ptr = list_next(mlist, ptr);
}
return (NULL);
}
static ibt_status_t
ibd_iba_join(ibd_state_t *state, ib_gid_t mgid, ibd_mce_t *mce)
{
ibt_mcg_attr_t mcg_attr;
bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
mcg_attr.mc_qkey = state->id_mcinfo->mc_qkey;
mcg_attr.mc_mgid = mgid;
mcg_attr.mc_join_state = mce->mc_jstate;
mcg_attr.mc_scope = state->id_scope;
mcg_attr.mc_pkey = state->id_pkey;
mcg_attr.mc_flow = state->id_mcinfo->mc_adds_vect.av_flow;
mcg_attr.mc_sl = state->id_mcinfo->mc_adds_vect.av_srvl;
mcg_attr.mc_tclass = state->id_mcinfo->mc_adds_vect.av_tclass;
return (ibt_join_mcg(state->id_sgid, &mcg_attr, &mce->mc_info,
NULL, NULL));
}
static ibd_mce_t *
ibd_join_group(ibd_state_t *state, ib_gid_t mgid, uint8_t jstate)
{
ibt_status_t ibt_status;
ibd_mce_t *mce, *tmce, *omce = NULL;
boolean_t do_attach = B_TRUE;
DPRINT(2, "ibd_join_group : join_group state %d : %016llx:%016llx\n",
jstate, mgid.gid_prefix, mgid.gid_guid);
if ((jstate == IB_MC_JSTATE_FULL) && ((omce =
IBD_MCACHE_FIND_FULL(state, mgid)) != NULL)) {
if (omce->mc_jstate == IB_MC_JSTATE_FULL) {
ASSERT(omce->mc_fullreap);
omce->mc_fullreap = B_FALSE;
return (omce);
} else {
ASSERT(omce->mc_jstate == IB_MC_JSTATE_SEND_ONLY_NON);
}
}
mce = kmem_zalloc(sizeof (ibd_mce_t), KM_SLEEP);
mce->mc_fullreap = B_FALSE;
mce->mc_jstate = jstate;
if ((ibt_status = ibd_iba_join(state, mgid, mce)) != IBT_SUCCESS) {
DPRINT(10, "ibd_join_group : failed ibt_join_mcg() %d",
ibt_status);
kmem_free(mce, sizeof (ibd_mce_t));
return (NULL);
}
if (jstate == IB_MC_JSTATE_NON) {
tmce = IBD_MCACHE_FIND_FULL(state, mgid);
if ((tmce != NULL) && (tmce->mc_jstate == IB_MC_JSTATE_FULL))
do_attach = B_FALSE;
} else if (jstate == IB_MC_JSTATE_FULL) {
if (IBD_MCACHE_FIND_NON(state, mgid) != NULL)
do_attach = B_FALSE;
} else {
do_attach = B_FALSE;
}
if (do_attach) {
DPRINT(10, "ibd_join_group: ibt_attach_mcg \n");
if ((ibt_status = ibt_attach_mcg(state->id_chnl_hdl,
&mce->mc_info)) != IBT_SUCCESS) {
DPRINT(10, "ibd_join_group : failed qp attachment "
"%d\n", ibt_status);
(void) ibt_leave_mcg(state->id_sgid, mgid,
state->id_sgid, jstate);
kmem_free(mce, sizeof (ibd_mce_t));
return (NULL);
}
}
if (jstate == IB_MC_JSTATE_NON) {
IBD_MCACHE_INSERT_NON(state, mce);
} else {
mce->mc_req.rq_gid = mgid;
mce->mc_req.rq_ptr = mce;
if (omce != NULL) {
(void) ibt_leave_mcg(state->id_sgid, mgid,
state->id_sgid, IB_MC_JSTATE_SEND_ONLY_NON);
omce->mc_jstate = IB_MC_JSTATE_FULL;
bcopy(&mce->mc_info, &omce->mc_info,
sizeof (ibt_mcg_info_t));
kmem_free(mce, sizeof (ibd_mce_t));
return (omce);
}
mutex_enter(&state->id_mc_mutex);
IBD_MCACHE_INSERT_FULL(state, mce);
mutex_exit(&state->id_mc_mutex);
}
return (mce);
}
static void
ibd_reacquire_group(ibd_state_t *state, ibd_mce_t *mce)
{
ib_gid_t mgid;
if (mce->mc_fullreap)
return;
mgid = mce->mc_info.mc_adds_vect.av_dgid;
DPRINT(2, "ibd_reacquire_group : %016llx:%016llx\n", mgid.gid_prefix,
mgid.gid_guid);
(void) ibt_leave_mcg(state->id_sgid, mgid, state->id_sgid,
mce->mc_jstate);
if (ibd_iba_join(state, mgid, mce) != IBT_SUCCESS)
ibd_print_warn(state, "Failure on port up to rejoin "
"multicast gid %016llx:%016llx",
(u_longlong_t)mgid.gid_prefix,
(u_longlong_t)mgid.gid_guid);
}
static void
ibd_async_reap_group(ibd_state_t *state, ibd_mce_t *mce, ib_gid_t mgid,
uint8_t jstate)
{
ibd_mce_t *tmce;
boolean_t do_detach = B_TRUE;
if (jstate == IB_MC_JSTATE_FULL) {
if (!mce->mc_fullreap)
return;
mutex_enter(&state->id_mc_mutex);
IBD_MCACHE_PULLOUT_FULL(state, mce);
mutex_exit(&state->id_mc_mutex);
if (IBD_MCACHE_FIND_NON(state, mgid) != NULL)
do_detach = B_FALSE;
} else if (jstate == IB_MC_JSTATE_NON) {
IBD_MCACHE_PULLOUT_NON(state, mce);
tmce = IBD_MCACHE_FIND_FULL(state, mgid);
if ((tmce != NULL) && (tmce->mc_jstate == IB_MC_JSTATE_FULL))
do_detach = B_FALSE;
} else {
mutex_enter(&state->id_mc_mutex);
IBD_MCACHE_PULLOUT_FULL(state, mce);
mutex_exit(&state->id_mc_mutex);
do_detach = B_FALSE;
}
if (do_detach) {
DPRINT(2, "ibd_async_reap_group : ibt_detach_mcg : "
"%016llx:%016llx\n", mgid.gid_prefix, mgid.gid_guid);
(void) ibt_detach_mcg(state->id_chnl_hdl, &mce->mc_info);
}
(void) ibt_leave_mcg(state->id_sgid, mgid, state->id_sgid, jstate);
kmem_free(mce, sizeof (ibd_mce_t));
}
static void
ibd_leave_group(ibd_state_t *state, ib_gid_t mgid, uint8_t jstate)
{
ipoib_mac_t mcmac;
boolean_t recycled;
ibd_mce_t *mce;
DPRINT(2, "ibd_leave_group : leave_group state %d : %016llx:%016llx\n",
jstate, mgid.gid_prefix, mgid.gid_guid);
if (jstate == IB_MC_JSTATE_NON) {
recycled = B_TRUE;
mce = IBD_MCACHE_FIND_NON(state, mgid);
if (mce == NULL) {
return;
}
} else {
mce = IBD_MCACHE_FIND_FULL(state, mgid);
if (jstate == IB_MC_JSTATE_SEND_ONLY_NON) {
if ((mce == NULL) || (mce->mc_jstate ==
IB_MC_JSTATE_FULL)) {
return;
}
} else {
ASSERT(jstate == IB_MC_JSTATE_FULL);
if (mce == NULL) {
return;
}
mce->mc_fullreap = B_TRUE;
}
mutex_enter(&state->id_ac_mutex);
ibd_h2n_mac(&mcmac, IB_MC_QPN, mgid.gid_prefix, mgid.gid_guid);
recycled = ibd_acache_recycle(state, &mcmac, (jstate ==
IB_MC_JSTATE_SEND_ONLY_NON));
mutex_exit(&state->id_ac_mutex);
}
if (recycled) {
DPRINT(2, "ibd_leave_group : leave_group reaping : "
"%016llx:%016llx\n", mgid.gid_prefix, mgid.gid_guid);
ibd_async_reap_group(state, mce, mgid, jstate);
}
}
static ibt_status_t
ibd_find_bgroup(ibd_state_t *state)
{
ibt_mcg_attr_t mcg_attr;
uint_t numg;
uchar_t scopes[] = { IB_MC_SCOPE_SUBNET_LOCAL,
IB_MC_SCOPE_SITE_LOCAL, IB_MC_SCOPE_ORG_LOCAL,
IB_MC_SCOPE_GLOBAL };
int i, mcgmtu;
boolean_t found = B_FALSE;
int ret;
ibt_mcg_info_t mcg_info;
state->id_bgroup_created = B_FALSE;
state->id_bgroup_present = B_FALSE;
query_bcast_grp:
bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
mcg_attr.mc_pkey = state->id_pkey;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_mgid))
state->id_mgid.gid_guid = IB_MGID_IPV4_LOWGRP_MASK;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_mgid))
for (i = 0; i < sizeof (scopes)/sizeof (scopes[0]); i++) {
state->id_scope = mcg_attr.mc_scope = scopes[i];
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_mgid))
state->id_mgid.gid_prefix =
(((uint64_t)IB_MCGID_IPV4_PREFIX << 32) |
((uint64_t)state->id_scope << 48) |
((uint32_t)(state->id_pkey << 16)));
mcg_attr.mc_mgid = state->id_mgid;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_mgid))
if (ibt_query_mcg(state->id_sgid, &mcg_attr, 1,
&state->id_mcinfo, &numg) == IBT_SUCCESS) {
found = B_TRUE;
break;
}
}
if (!found) {
if (state->id_create_broadcast_group) {
if (state->id_bgroup_created) {
ibd_print_warn(state, "IPoIB broadcast group "
"absent. Unable to query after create.");
goto find_bgroup_fail;
}
bzero(&mcg_attr, sizeof (ibt_mcg_attr_t));
mcg_attr.mc_qkey = IBD_DEFAULT_QKEY;
mcg_attr.mc_join_state = IB_MC_JSTATE_FULL;
mcg_attr.mc_scope = IB_MC_SCOPE_SUBNET_LOCAL;
mcg_attr.mc_pkey = state->id_pkey;
mcg_attr.mc_flow = 0;
mcg_attr.mc_sl = 0;
mcg_attr.mc_tclass = 0;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_mgid))
state->id_mgid.gid_prefix =
(((uint64_t)IB_MCGID_IPV4_PREFIX << 32) |
((uint64_t)IB_MC_SCOPE_SUBNET_LOCAL << 48) |
((uint32_t)(state->id_pkey << 16)));
mcg_attr.mc_mgid = state->id_mgid;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_mgid))
if ((ret = ibt_join_mcg(state->id_sgid, &mcg_attr,
&mcg_info, NULL, NULL)) != IBT_SUCCESS) {
ibd_print_warn(state, "IPoIB broadcast group "
"absent, create failed: ret = %d\n", ret);
state->id_bgroup_created = B_FALSE;
return (IBT_FAILURE);
}
state->id_bgroup_created = B_TRUE;
goto query_bcast_grp;
} else {
ibd_print_warn(state, "IPoIB broadcast group absent");
return (IBT_FAILURE);
}
}
mcgmtu = (128 << state->id_mcinfo->mc_mtu);
if (state->id_mtu < mcgmtu) {
ibd_print_warn(state, "IPoIB broadcast group MTU %d "
"greater than port's maximum MTU %d", mcgmtu,
state->id_mtu);
ibt_free_mcg_info(state->id_mcinfo, 1);
goto find_bgroup_fail;
}
state->id_mtu = mcgmtu;
state->id_bgroup_present = B_TRUE;
return (IBT_SUCCESS);
find_bgroup_fail:
if (state->id_bgroup_created) {
(void) ibt_leave_mcg(state->id_sgid,
mcg_info.mc_adds_vect.av_dgid, state->id_sgid,
IB_MC_JSTATE_FULL);
}
return (IBT_FAILURE);
}
static int
ibd_alloc_tx_copybufs(ibd_state_t *state)
{
ibt_mr_attr_t mem_attr;
state->id_tx_buf_sz = state->id_mtu;
if (state->id_lso_policy && state->id_lso_capable &&
(state->id_ud_tx_copy_thresh > state->id_mtu)) {
state->id_tx_buf_sz = state->id_ud_tx_copy_thresh;
}
state->id_tx_bufs = kmem_zalloc(state->id_ud_num_swqe *
state->id_tx_buf_sz, KM_SLEEP);
state->id_tx_wqes = kmem_zalloc(state->id_ud_num_swqe *
sizeof (ibd_swqe_t), KM_SLEEP);
mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->id_tx_bufs;
mem_attr.mr_len = state->id_ud_num_swqe * state->id_tx_buf_sz;
mem_attr.mr_as = NULL;
mem_attr.mr_flags = IBT_MR_SLEEP;
if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
&state->id_tx_mr_hdl, &state->id_tx_mr_desc) != IBT_SUCCESS) {
DPRINT(10, "ibd_alloc_tx_copybufs: ibt_register_mr failed");
kmem_free(state->id_tx_wqes,
state->id_ud_num_swqe * sizeof (ibd_swqe_t));
kmem_free(state->id_tx_bufs,
state->id_ud_num_swqe * state->id_tx_buf_sz);
state->id_tx_bufs = NULL;
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
ibd_alloc_tx_lsobufs(ibd_state_t *state)
{
ibt_mr_attr_t mem_attr;
ibd_lsobuf_t *buflist;
ibd_lsobuf_t *lbufp;
ibd_lsobuf_t *tail;
ibd_lsobkt_t *bktp;
uint8_t *membase;
uint8_t *memp;
uint_t memsz;
int i;
bktp = kmem_zalloc(sizeof (ibd_lsobkt_t), KM_SLEEP);
memsz = state->id_num_lso_bufs * IBD_LSO_BUFSZ;
membase = kmem_zalloc(memsz, KM_SLEEP);
mem_attr.mr_vaddr = (uint64_t)(uintptr_t)membase;
mem_attr.mr_len = memsz;
mem_attr.mr_as = NULL;
mem_attr.mr_flags = IBT_MR_SLEEP;
if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl,
&mem_attr, &bktp->bkt_mr_hdl, &bktp->bkt_mr_desc) != IBT_SUCCESS) {
DPRINT(10, "ibd_alloc_tx_lsobufs: ibt_register_mr failed");
kmem_free(membase, memsz);
kmem_free(bktp, sizeof (ibd_lsobkt_t));
return (DDI_FAILURE);
}
mutex_enter(&state->id_lso_lock);
buflist = kmem_zalloc(state->id_num_lso_bufs * sizeof (ibd_lsobuf_t),
KM_SLEEP);
memp = membase;
lbufp = buflist;
for (i = 0; i < state->id_num_lso_bufs; i++) {
lbufp->lb_isfree = 1;
lbufp->lb_buf = memp;
lbufp->lb_next = lbufp + 1;
tail = lbufp;
memp += IBD_LSO_BUFSZ;
lbufp++;
}
tail->lb_next = NULL;
bktp->bkt_bufl = buflist;
bktp->bkt_free_head = buflist;
bktp->bkt_mem = membase;
bktp->bkt_nelem = state->id_num_lso_bufs;
bktp->bkt_nfree = bktp->bkt_nelem;
state->id_lso = bktp;
mutex_exit(&state->id_lso_lock);
return (DDI_SUCCESS);
}
static int
ibd_init_txlist(ibd_state_t *state)
{
ibd_swqe_t *swqe;
ibt_lkey_t lkey;
int i;
uint_t len;
uint8_t *bufaddr;
if (ibd_alloc_tx_copybufs(state) != DDI_SUCCESS)
return (DDI_FAILURE);
if (state->id_lso_policy && state->id_lso_capable) {
if (ibd_alloc_tx_lsobufs(state) != DDI_SUCCESS)
state->id_lso_capable = B_FALSE;
}
mutex_enter(&state->id_tx_list.dl_mutex);
state->id_tx_list.dl_head = NULL;
state->id_tx_list.dl_pending_sends = B_FALSE;
state->id_tx_list.dl_cnt = 0;
mutex_exit(&state->id_tx_list.dl_mutex);
mutex_enter(&state->id_tx_rel_list.dl_mutex);
state->id_tx_rel_list.dl_head = NULL;
state->id_tx_rel_list.dl_pending_sends = B_FALSE;
state->id_tx_rel_list.dl_cnt = 0;
mutex_exit(&state->id_tx_rel_list.dl_mutex);
lkey = state->id_tx_mr_desc.md_lkey;
bufaddr = state->id_tx_bufs;
len = state->id_tx_buf_sz;
swqe = state->id_tx_wqes;
mutex_enter(&state->id_tx_list.dl_mutex);
for (i = 0; i < state->id_ud_num_swqe; i++, swqe++, bufaddr += len) {
swqe->swqe_next = NULL;
swqe->swqe_im_mblk = NULL;
swqe->swqe_copybuf.ic_sgl.ds_va = (ib_vaddr_t)(uintptr_t)
bufaddr;
swqe->swqe_copybuf.ic_sgl.ds_key = lkey;
swqe->swqe_copybuf.ic_sgl.ds_len = 0;
swqe->w_swr.wr_id = (ibt_wrid_t)(uintptr_t)swqe;
swqe->w_swr.wr_flags = IBT_WR_NO_FLAGS;
swqe->w_swr.wr_trans = IBT_UD_SRV;
swqe->w_swr.wr_nds = 0;
swqe->w_swr.wr_sgl = NULL;
swqe->w_swr.wr_opcode = IBT_WRC_SEND;
state->id_tx_list.dl_cnt++;
swqe->swqe_next = state->id_tx_list.dl_head;
state->id_tx_list.dl_head = SWQE_TO_WQE(swqe);
}
mutex_exit(&state->id_tx_list.dl_mutex);
return (DDI_SUCCESS);
}
static int
ibd_acquire_lsobufs(ibd_state_t *state, uint_t req_sz, ibt_wr_ds_t *sgl_p,
uint32_t *nds_p)
{
ibd_lsobkt_t *bktp;
ibd_lsobuf_t *lbufp;
ibd_lsobuf_t *nextp;
ibt_lkey_t lso_lkey;
uint_t frag_sz;
uint_t num_needed;
int i;
ASSERT(sgl_p != NULL);
ASSERT(nds_p != NULL);
ASSERT(req_sz != 0);
num_needed = req_sz / IBD_LSO_BUFSZ;
if ((frag_sz = req_sz % IBD_LSO_BUFSZ) != 0)
num_needed++;
mutex_enter(&state->id_lso_lock);
ASSERT(state->id_lso != NULL);
bktp = state->id_lso;
if (bktp->bkt_nfree < num_needed) {
mutex_exit(&state->id_lso_lock);
return (-1);
}
lso_lkey = bktp->bkt_mr_desc.md_lkey;
lbufp = bktp->bkt_free_head;
for (i = 0; i < num_needed; i++) {
ASSERT(lbufp->lb_isfree != 0);
ASSERT(lbufp->lb_buf != NULL);
nextp = lbufp->lb_next;
sgl_p[i].ds_va = (ib_vaddr_t)(uintptr_t)lbufp->lb_buf;
sgl_p[i].ds_key = lso_lkey;
sgl_p[i].ds_len = IBD_LSO_BUFSZ;
lbufp->lb_isfree = 0;
lbufp->lb_next = NULL;
lbufp = nextp;
}
bktp->bkt_free_head = lbufp;
if (frag_sz) {
sgl_p[i-1].ds_len = frag_sz;
}
bktp->bkt_nfree -= num_needed;
mutex_exit(&state->id_lso_lock);
*nds_p = num_needed;
return (0);
}
static void
ibd_release_lsobufs(ibd_state_t *state, ibt_wr_ds_t *sgl_p, uint32_t nds)
{
ibd_lsobkt_t *bktp;
ibd_lsobuf_t *lbufp;
uint8_t *lso_mem_end;
uint_t ndx;
int i;
mutex_enter(&state->id_lso_lock);
bktp = state->id_lso;
ASSERT(bktp != NULL);
lso_mem_end = bktp->bkt_mem + bktp->bkt_nelem * IBD_LSO_BUFSZ;
for (i = 0; i < nds; i++) {
uint8_t *va;
va = (uint8_t *)(uintptr_t)sgl_p[i].ds_va;
ASSERT(va >= bktp->bkt_mem && va < lso_mem_end);
ndx = (va - bktp->bkt_mem) / IBD_LSO_BUFSZ;
lbufp = bktp->bkt_bufl + ndx;
ASSERT(lbufp->lb_isfree == 0);
ASSERT(lbufp->lb_buf == va);
lbufp->lb_isfree = 1;
lbufp->lb_next = bktp->bkt_free_head;
bktp->bkt_free_head = lbufp;
}
bktp->bkt_nfree += nds;
mutex_exit(&state->id_lso_lock);
}
static void
ibd_free_tx_copybufs(ibd_state_t *state)
{
if (ibt_deregister_mr(state->id_hca_hdl,
state->id_tx_mr_hdl) != IBT_SUCCESS) {
DPRINT(10, "ibd_free_tx_copybufs: ibt_deregister_mr failed");
}
state->id_tx_mr_hdl = NULL;
kmem_free(state->id_tx_wqes, state->id_ud_num_swqe *
sizeof (ibd_swqe_t));
kmem_free(state->id_tx_bufs, state->id_ud_num_swqe *
state->id_tx_buf_sz);
state->id_tx_wqes = NULL;
state->id_tx_bufs = NULL;
}
static void
ibd_free_tx_lsobufs(ibd_state_t *state)
{
ibd_lsobkt_t *bktp;
mutex_enter(&state->id_lso_lock);
if ((bktp = state->id_lso) == NULL) {
mutex_exit(&state->id_lso_lock);
return;
}
ASSERT(bktp->bkt_bufl != NULL);
kmem_free(bktp->bkt_bufl, bktp->bkt_nelem * sizeof (ibd_lsobuf_t));
ASSERT(bktp->bkt_mr_hdl != NULL);
if (ibt_deregister_mr(state->id_hca_hdl,
bktp->bkt_mr_hdl) != IBT_SUCCESS) {
DPRINT(10,
"ibd_free_lsobufs: ibt_deregister_mr failed");
}
ASSERT(bktp->bkt_mem);
kmem_free(bktp->bkt_mem, bktp->bkt_nelem * IBD_LSO_BUFSZ);
kmem_free(bktp, sizeof (ibd_lsobkt_t));
state->id_lso = NULL;
mutex_exit(&state->id_lso_lock);
}
static void
ibd_fini_txlist(ibd_state_t *state)
{
mutex_enter(&state->id_tx_list.dl_mutex);
mutex_enter(&state->id_tx_rel_list.dl_mutex);
state->id_tx_list.dl_head = NULL;
state->id_tx_list.dl_pending_sends = B_FALSE;
state->id_tx_list.dl_cnt = 0;
state->id_tx_rel_list.dl_head = NULL;
state->id_tx_rel_list.dl_pending_sends = B_FALSE;
state->id_tx_rel_list.dl_cnt = 0;
mutex_exit(&state->id_tx_rel_list.dl_mutex);
mutex_exit(&state->id_tx_list.dl_mutex);
ibd_free_tx_lsobufs(state);
ibd_free_tx_copybufs(state);
}
static void
ibd_post_recv_list(ibd_state_t *state, ibd_rwqe_t *rwqe)
{
uint_t i;
uint_t num_posted;
ibt_status_t ibt_status;
ibt_recv_wr_t wrs[IBD_RX_POST_CNT];
while (rwqe) {
for (i = 0; i < IBD_RX_POST_CNT; i++) {
wrs[i] = rwqe->w_rwr;
rwqe = WQE_TO_RWQE(rwqe->rwqe_next);
if (rwqe == NULL) {
i++;
break;
}
}
atomic_add_32(&state->id_rx_list.dl_cnt, i);
num_posted = 0;
ibt_status = ibt_post_recv(state->id_chnl_hdl, wrs, i,
&num_posted);
if (ibt_status != IBT_SUCCESS) {
ibd_print_warn(state, "ibd_post_recv: FATAL: "
"posting multiple wrs failed: "
"requested=%d, done=%d, ret=%d",
IBD_RX_POST_CNT, num_posted, ibt_status);
atomic_add_32(&state->id_rx_list.dl_cnt,
num_posted - i);
}
}
}
static void
ibd_post_recv_intr(ibd_state_t *state)
{
ibd_rx_queue_t *rxp;
ibd_rwqe_t *list;
state->id_rx_post_queue_index =
(state->id_rx_post_queue_index + 1) &
(state->id_rx_nqueues - 1);
rxp = state->id_rx_queues + state->id_rx_post_queue_index;
mutex_enter(&rxp->rx_post_lock);
list = WQE_TO_RWQE(rxp->rx_head);
rxp->rx_head = NULL;
rxp->rx_cnt = 0;
mutex_exit(&rxp->rx_post_lock);
ibd_post_recv_list(state, list);
}
#define RX_QUEUE_HASH(rwqe) \
(((uintptr_t)(rwqe) >> 8) & (state->id_rx_nqueues - 1))
static void
ibd_post_recv(ibd_state_t *state, ibd_rwqe_t *rwqe)
{
ibd_rx_queue_t *rxp;
rxp = state->id_rx_queues + RX_QUEUE_HASH(rwqe);
if (!mutex_tryenter(&rxp->rx_post_lock)) {
rxp = state->id_rx_queues + RX_QUEUE_HASH(rwqe + 16);
mutex_enter(&rxp->rx_post_lock);
}
rwqe->rwqe_next = rxp->rx_head;
if (++rxp->rx_cnt >= IBD_RX_POST_CNT - 2) {
uint_t active = atomic_inc_32_nv(&state->id_rx_post_active);
if ((active & (state->id_rx_nqueues - 1)) == 0) {
rxp->rx_head = NULL;
rxp->rx_cnt = 0;
mutex_exit(&rxp->rx_post_lock);
ibd_post_recv_list(state, rwqe);
return;
}
}
rxp->rx_head = RWQE_TO_WQE(rwqe);
mutex_exit(&rxp->rx_post_lock);
}
static int
ibd_alloc_rx_copybufs(ibd_state_t *state)
{
ibt_mr_attr_t mem_attr;
int i;
state->id_rx_buf_sz = state->id_mtu + IPOIB_GRH_SIZE;
state->id_rx_bufs = kmem_zalloc(state->id_ud_num_rwqe *
state->id_rx_buf_sz, KM_SLEEP);
state->id_rx_wqes = kmem_zalloc(state->id_ud_num_rwqe *
sizeof (ibd_rwqe_t), KM_SLEEP);
state->id_rx_nqueues = 1 << IBD_LOG_RX_POST;
state->id_rx_queues = kmem_zalloc(state->id_rx_nqueues *
sizeof (ibd_rx_queue_t), KM_SLEEP);
for (i = 0; i < state->id_rx_nqueues; i++) {
ibd_rx_queue_t *rxp = state->id_rx_queues + i;
mutex_init(&rxp->rx_post_lock, NULL, MUTEX_DRIVER, NULL);
}
mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->id_rx_bufs;
mem_attr.mr_len = state->id_ud_num_rwqe * state->id_rx_buf_sz;
mem_attr.mr_as = NULL;
mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr,
&state->id_rx_mr_hdl, &state->id_rx_mr_desc) != IBT_SUCCESS) {
DPRINT(10, "ibd_alloc_rx_copybufs: ibt_register_mr failed");
kmem_free(state->id_rx_wqes,
state->id_ud_num_rwqe * sizeof (ibd_rwqe_t));
kmem_free(state->id_rx_bufs,
state->id_ud_num_rwqe * state->id_rx_buf_sz);
state->id_rx_bufs = NULL;
state->id_rx_wqes = NULL;
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
static int
ibd_init_rxlist(ibd_state_t *state)
{
ibd_rwqe_t *rwqe, *next;
ibd_wqe_t *list;
ibt_lkey_t lkey;
int i;
uint_t len;
uint8_t *bufaddr;
mutex_enter(&state->id_rx_free_list.dl_mutex);
if (state->id_rx_free_list.dl_head != NULL) {
len = state->id_rx_buf_sz;
list = state->id_rx_free_list.dl_head;
state->id_rx_free_list.dl_head = NULL;
state->id_rx_free_list.dl_cnt = 0;
mutex_exit(&state->id_rx_free_list.dl_mutex);
for (rwqe = WQE_TO_RWQE(list); rwqe != NULL;
rwqe = WQE_TO_RWQE(rwqe->rwqe_next)) {
if ((rwqe->rwqe_im_mblk = desballoc(
rwqe->rwqe_copybuf.ic_bufaddr, len, 0,
&rwqe->w_freemsg_cb)) == NULL) {
if (atomic_dec_32_nv(&state->id_running) != 0) {
cmn_err(CE_WARN, "ibd_init_rxlist: "
"id_running was not 1\n");
}
DPRINT(10, "ibd_init_rxlist : "
"failed in desballoc()");
for (rwqe = WQE_TO_RWQE(list); rwqe != NULL;
rwqe = next) {
next = WQE_TO_RWQE(rwqe->rwqe_next);
if (rwqe->rwqe_im_mblk) {
atomic_inc_32(&state->
id_rx_list.
dl_bufs_outstanding);
freemsg(rwqe->rwqe_im_mblk);
} else
ibd_free_rwqe(state, rwqe);
}
atomic_inc_32(&state->id_running);
return (DDI_FAILURE);
}
}
ibd_post_recv_list(state, WQE_TO_RWQE(list));
return (DDI_SUCCESS);
}
mutex_exit(&state->id_rx_free_list.dl_mutex);
if (ibd_alloc_rx_copybufs(state) != DDI_SUCCESS)
return (DDI_FAILURE);
len = state->id_rx_buf_sz;
lkey = state->id_rx_mr_desc.md_lkey;
rwqe = state->id_rx_wqes;
bufaddr = state->id_rx_bufs;
list = NULL;
for (i = 0; i < state->id_ud_num_rwqe; i++, rwqe++, bufaddr += len) {
rwqe->w_state = state;
rwqe->w_freemsg_cb.free_func = ibd_freemsg_cb;
rwqe->w_freemsg_cb.free_arg = (char *)rwqe;
rwqe->rwqe_copybuf.ic_bufaddr = bufaddr;
if ((rwqe->rwqe_im_mblk = desballoc(bufaddr, len, 0,
&rwqe->w_freemsg_cb)) == NULL) {
DPRINT(10, "ibd_init_rxlist : failed in desballoc()");
if (atomic_dec_32_nv(&state->id_running) != 0) {
cmn_err(CE_WARN, "ibd_init_rxlist: "
"id_running was not 1\n");
}
DPRINT(10, "ibd_init_rxlist : "
"failed in desballoc()");
for (rwqe = WQE_TO_RWQE(list); rwqe != NULL;
rwqe = next) {
next = WQE_TO_RWQE(rwqe->rwqe_next);
freemsg(rwqe->rwqe_im_mblk);
}
atomic_inc_32(&state->id_running);
mutex_enter(&state->id_rx_free_list.dl_mutex);
state->id_rx_free_list.dl_head = NULL;
state->id_rx_free_list.dl_cnt = 0;
mutex_exit(&state->id_rx_free_list.dl_mutex);
ibd_fini_rxlist(state);
return (DDI_FAILURE);
}
rwqe->rwqe_copybuf.ic_sgl.ds_key = lkey;
rwqe->rwqe_copybuf.ic_sgl.ds_va =
(ib_vaddr_t)(uintptr_t)bufaddr;
rwqe->rwqe_copybuf.ic_sgl.ds_len = len;
rwqe->w_rwr.wr_id = (ibt_wrid_t)(uintptr_t)rwqe;
rwqe->w_rwr.wr_nds = 1;
rwqe->w_rwr.wr_sgl = &rwqe->rwqe_copybuf.ic_sgl;
rwqe->rwqe_next = list;
list = RWQE_TO_WQE(rwqe);
}
ibd_post_recv_list(state, WQE_TO_RWQE(list));
return (DDI_SUCCESS);
}
static void
ibd_free_rx_copybufs(ibd_state_t *state)
{
int i;
if (ibt_deregister_mr(state->id_hca_hdl,
state->id_rx_mr_hdl) != IBT_SUCCESS) {
DPRINT(10, "ibd_free_rx_copybufs: ibt_deregister_mr failed");
}
state->id_rx_mr_hdl = NULL;
for (i = 0; i < state->id_rx_nqueues; i++) {
ibd_rx_queue_t *rxp = state->id_rx_queues + i;
mutex_destroy(&rxp->rx_post_lock);
}
kmem_free(state->id_rx_queues, state->id_rx_nqueues *
sizeof (ibd_rx_queue_t));
kmem_free(state->id_rx_wqes, state->id_ud_num_rwqe *
sizeof (ibd_rwqe_t));
kmem_free(state->id_rx_bufs, state->id_ud_num_rwqe *
state->id_rx_buf_sz);
state->id_rx_queues = NULL;
state->id_rx_wqes = NULL;
state->id_rx_bufs = NULL;
}
static void
ibd_free_rx_rsrcs(ibd_state_t *state)
{
mutex_enter(&state->id_rx_free_list.dl_mutex);
if (state->id_rx_free_list.dl_head == NULL) {
mutex_exit(&state->id_rx_free_list.dl_mutex);
return;
}
ASSERT(state->id_rx_free_list.dl_cnt == state->id_ud_num_rwqe);
ibd_free_rx_copybufs(state);
state->id_rx_free_list.dl_cnt = 0;
state->id_rx_free_list.dl_head = NULL;
mutex_exit(&state->id_rx_free_list.dl_mutex);
}
static void
ibd_fini_rxlist(ibd_state_t *state)
{
ibd_rwqe_t *rwqe;
int i;
for (i = 0; i < state->id_rx_nqueues; i++) {
ibd_rx_queue_t *rxp = state->id_rx_queues + i;
mutex_enter(&rxp->rx_post_lock);
for (rwqe = WQE_TO_RWQE(rxp->rx_head); rwqe;
rwqe = WQE_TO_RWQE(rwqe->rwqe_next)) {
freemsg(rwqe->rwqe_im_mblk);
rxp->rx_cnt--;
}
rxp->rx_head = NULL;
mutex_exit(&rxp->rx_post_lock);
}
if (atomic_add_32_nv(&state->id_rx_list.dl_bufs_outstanding, 0) == 0)
ibd_free_rx_rsrcs(state);
}
static void
ibd_free_rwqe(ibd_state_t *state, ibd_rwqe_t *rwqe)
{
mutex_enter(&state->id_rx_free_list.dl_mutex);
state->id_rx_free_list.dl_cnt++;
rwqe->rwqe_next = state->id_rx_free_list.dl_head;
state->id_rx_free_list.dl_head = RWQE_TO_WQE(rwqe);
mutex_exit(&state->id_rx_free_list.dl_mutex);
}
static void
ibd_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
{
ibd_state_t *state = (ibd_state_t *)arg;
atomic_inc_64(&state->id_num_intrs);
if (ibd_rx_softintr == 1) {
mutex_enter(&state->id_rcq_poll_lock);
if (state->id_rcq_poll_busy & IBD_CQ_POLLING) {
state->id_rcq_poll_busy |= IBD_REDO_CQ_POLLING;
mutex_exit(&state->id_rcq_poll_lock);
return;
} else {
mutex_exit(&state->id_rcq_poll_lock);
ddi_trigger_softintr(state->id_rx);
}
} else
(void) ibd_intr((caddr_t)state);
}
static void
ibd_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg)
{
ibd_state_t *state = (ibd_state_t *)arg;
atomic_inc_64(&state->id_num_intrs);
if (ibd_tx_softintr == 1) {
mutex_enter(&state->id_scq_poll_lock);
if (state->id_scq_poll_busy & IBD_CQ_POLLING) {
state->id_scq_poll_busy |= IBD_REDO_CQ_POLLING;
mutex_exit(&state->id_scq_poll_lock);
return;
} else {
mutex_exit(&state->id_scq_poll_lock);
ddi_trigger_softintr(state->id_tx);
}
} else
(void) ibd_tx_recycle((caddr_t)state);
}
static void
ibd_snet_notices_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
ibt_subnet_event_t *event)
{
ibd_state_t *state = (ibd_state_t *)arg;
ibd_req_t *req;
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
if (bcmp(&gid, &state->id_sgid, sizeof (ib_gid_t)) != 0)
return;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
DPRINT(10, "ibd_notices_handler : %d\n", code);
switch (code) {
case IBT_SM_EVENT_UNAVAILABLE:
ibd_print_warn(state, "IBA multicast support "
"degraded due to unavailability of multicast "
"traps");
break;
case IBT_SM_EVENT_AVAILABLE:
ibd_print_warn(state, "IBA multicast support "
"restored due to availability of multicast "
"traps");
break;
case IBT_SM_EVENT_MCG_CREATED:
case IBT_SM_EVENT_MCG_DELETED:
if (((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ==
IBD_DRV_IN_LATE_HCA_INIT) && (code ==
IBT_SM_EVENT_MCG_DELETED)) {
break;
}
if (!ibd_async_safe(state))
return;
req = kmem_cache_alloc(state->id_req_kmc, KM_SLEEP);
req->rq_gid = event->sm_notice_gid;
req->rq_ptr = (void *)code;
ibd_queue_work_slot(state, req, IBD_ASYNC_TRAP);
break;
}
}
static void
ibd_async_trap(ibd_state_t *state, ibd_req_t *req)
{
ib_gid_t mgid = req->rq_gid;
ibt_subnet_event_code_t code = (ibt_subnet_event_code_t)req->rq_ptr;
int ret;
ib_pkey_t pkey = (mgid.gid_prefix >> 16) & 0xffff;
DPRINT(10, "ibd_async_trap : %d\n", code);
if (((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ==
IBD_DRV_IN_LATE_HCA_INIT) && (state->id_bgroup_present == 0) &&
(code == IBT_SM_EVENT_MCG_CREATED)) {
if ((mgid.gid_guid != IB_MGID_IPV4_LOWGRP_MASK) || (pkey !=
state->id_pkey)) {
ibd_async_done(state);
return;
}
ibd_set_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS);
if (((state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ==
IBD_DRV_IN_LATE_HCA_INIT) &&
((ret = ibd_start(state)) != 0)) {
DPRINT(10, "ibd_async_trap: cannot start from late HCA "
"init, ret=%d", ret);
}
ibd_clr_mac_progress(state, IBD_DRV_RESTART_IN_PROGRESS);
ibd_async_done(state);
return;
}
ibd_leave_group(state, mgid, IB_MC_JSTATE_SEND_ONLY_NON);
if (state->id_prom_op == IBD_OP_COMPLETED) {
ibd_leave_group(state, mgid, IB_MC_JSTATE_NON);
if ((ibd_join_group(state, mgid, IB_MC_JSTATE_NON) ==
NULL) && (code == IBT_SM_EVENT_MCG_CREATED))
ibd_print_warn(state, "IBA promiscuous mode missed "
"new multicast gid %016llx:%016llx",
(u_longlong_t)mgid.gid_prefix,
(u_longlong_t)mgid.gid_guid);
}
ibd_async_done(state);
}
static boolean_t
ibd_m_getcapab(void *arg, mac_capab_t cap, void *cap_data)
{
ibd_state_t *state = arg;
if (state->id_type == IBD_PORT_DRIVER)
return (B_FALSE);
switch (cap) {
case MAC_CAPAB_HCKSUM: {
uint32_t *txflags = cap_data;
if (state->id_hwcksum_capab & IBT_HCA_CKSUM_FULL)
*txflags = HCK_FULLCKSUM | HCKSUM_INET_FULL_V4;
else
return (B_FALSE);
break;
}
case MAC_CAPAB_LSO: {
mac_capab_lso_t *cap_lso = cap_data;
if (!state->id_lso_policy || !state->id_lso_capable)
return (B_FALSE);
if ((state->id_hwcksum_capab & IBT_HCA_CKSUM_FULL) == 0)
return (B_FALSE);
if (state->id_hca_res_lkey_capab == 0) {
ibd_print_warn(state, "no reserved-lkey capability, "
"disabling LSO");
return (B_FALSE);
}
cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
cap_lso->lso_basic_tcp_ipv4.lso_max = state->id_lso_maxlen - 1;
break;
}
default:
return (B_FALSE);
}
return (B_TRUE);
}
static int
ibd_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
uint_t pr_valsize, const void *pr_val)
{
ibd_state_t *state = arg;
int err = 0;
uint32_t link_mode;
if (state->id_type == IBD_PORT_DRIVER) {
return (ENOTSUP);
}
switch (pr_num) {
case MAC_PROP_IB_LINKMODE:
if (state->id_mac_state & IBD_DRV_STARTED) {
err = EBUSY;
break;
}
if (pr_val == NULL) {
err = EINVAL;
break;
}
bcopy(pr_val, &link_mode, sizeof (link_mode));
if (link_mode != IBD_LINK_MODE_UD &&
link_mode != IBD_LINK_MODE_RC) {
err = EINVAL;
} else {
if (link_mode == IBD_LINK_MODE_RC) {
if (state->id_enable_rc) {
return (0);
}
state->id_enable_rc = 1;
err = mac_maxsdu_update2(state->id_mh,
state->rc_mtu - IPOIB_HDRSIZE,
state->id_mtu - IPOIB_HDRSIZE);
} else {
if (!state->id_enable_rc) {
return (0);
}
state->id_enable_rc = 0;
err = mac_maxsdu_update2(state->id_mh,
state->id_mtu - IPOIB_HDRSIZE,
state->id_mtu - IPOIB_HDRSIZE);
}
(void) ibd_record_capab(state);
mac_capab_update(state->id_mh);
}
break;
case MAC_PROP_PRIVATE:
err = ibd_set_priv_prop(state, pr_name,
pr_valsize, pr_val);
break;
default:
err = ENOTSUP;
break;
}
return (err);
}
static int
ibd_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
uint_t pr_valsize, void *pr_val)
{
ibd_state_t *state = arg;
int err = 0;
switch (pr_num) {
case MAC_PROP_MTU:
break;
default:
if (state->id_type == IBD_PORT_DRIVER) {
return (ENOTSUP);
}
break;
}
switch (pr_num) {
case MAC_PROP_IB_LINKMODE:
*(uint_t *)pr_val = state->id_enable_rc;
break;
case MAC_PROP_PRIVATE:
err = ibd_get_priv_prop(state, pr_name, pr_valsize,
pr_val);
break;
default:
err = ENOTSUP;
break;
}
return (err);
}
static void
ibd_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
mac_prop_info_handle_t prh)
{
ibd_state_t *state = arg;
switch (pr_num) {
case MAC_PROP_IB_LINKMODE: {
mac_prop_info_set_default_uint32(prh, IBD_DEF_LINK_MODE);
break;
}
case MAC_PROP_MTU: {
uint32_t min, max;
if (state->id_type == IBD_PORT_DRIVER) {
min = 1500;
max = IBD_DEF_RC_MAX_SDU;
} else if (state->id_enable_rc) {
min = max = IBD_DEF_RC_MAX_SDU;
} else {
min = max = state->id_mtu - IPOIB_HDRSIZE;
}
mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
mac_prop_info_set_range_uint32(prh, min, max);
break;
}
case MAC_PROP_PRIVATE: {
char valstr[64];
int value;
if (strcmp(pr_name, "_ibd_broadcast_group") == 0) {
mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
return;
} else if (strcmp(pr_name, "_ibd_coalesce_completions") == 0) {
value = IBD_DEF_COALESCE_COMPLETIONS;
} else if (strcmp(pr_name,
"_ibd_create_broadcast_group") == 0) {
value = IBD_DEF_CREATE_BCAST_GROUP;
} else if (strcmp(pr_name, "_ibd_hash_size") == 0) {
value = IBD_DEF_HASH_SIZE;
} else if (strcmp(pr_name, "_ibd_lso_enable") == 0) {
value = IBD_DEF_LSO_POLICY;
} else if (strcmp(pr_name, "_ibd_num_ah") == 0) {
value = IBD_DEF_NUM_AH;
} else if (strcmp(pr_name, "_ibd_num_lso_bufs") == 0) {
value = IBD_DEF_NUM_LSO_BUFS;
} else if (strcmp(pr_name, "_ibd_rc_enable_srq") == 0) {
value = IBD_DEF_RC_ENABLE_SRQ;
} else if (strcmp(pr_name, "_ibd_rc_num_rwqe") == 0) {
value = IBD_DEF_RC_NUM_RWQE;
} else if (strcmp(pr_name, "_ibd_rc_num_srq") == 0) {
value = IBD_DEF_RC_NUM_SRQ;
} else if (strcmp(pr_name, "_ibd_rc_num_swqe") == 0) {
value = IBD_DEF_RC_NUM_SWQE;
} else if (strcmp(pr_name, "_ibd_rc_rx_comp_count") == 0) {
value = IBD_DEF_RC_RX_COMP_COUNT;
} else if (strcmp(pr_name, "_ibd_rc_rx_comp_usec") == 0) {
value = IBD_DEF_RC_RX_COMP_USEC;
} else if (strcmp(pr_name, "_ibd_rc_rx_copy_thresh") == 0) {
value = IBD_DEF_RC_RX_COPY_THRESH;
} else if (strcmp(pr_name, "_ibd_rc_rx_rwqe_thresh") == 0) {
value = IBD_DEF_RC_RX_RWQE_THRESH;
} else if (strcmp(pr_name, "_ibd_rc_tx_comp_count") == 0) {
value = IBD_DEF_RC_TX_COMP_COUNT;
} else if (strcmp(pr_name, "_ibd_rc_tx_comp_usec") == 0) {
value = IBD_DEF_RC_TX_COMP_USEC;
} else if (strcmp(pr_name, "_ibd_rc_tx_copy_thresh") == 0) {
value = IBD_DEF_RC_TX_COPY_THRESH;
} else if (strcmp(pr_name, "_ibd_ud_num_rwqe") == 0) {
value = IBD_DEF_UD_NUM_RWQE;
} else if (strcmp(pr_name, "_ibd_ud_num_swqe") == 0) {
value = IBD_DEF_UD_NUM_SWQE;
} else if (strcmp(pr_name, "_ibd_ud_rx_comp_count") == 0) {
value = IBD_DEF_UD_RX_COMP_COUNT;
} else if (strcmp(pr_name, "_ibd_ud_rx_comp_usec") == 0) {
value = IBD_DEF_UD_RX_COMP_USEC;
} else if (strcmp(pr_name, "_ibd_ud_tx_comp_count") == 0) {
value = IBD_DEF_UD_TX_COMP_COUNT;
} else if (strcmp(pr_name, "_ibd_ud_tx_comp_usec") == 0) {
value = IBD_DEF_UD_TX_COMP_USEC;
} else if (strcmp(pr_name, "_ibd_ud_tx_copy_thresh") == 0) {
value = IBD_DEF_UD_TX_COPY_THRESH;
} else {
return;
}
(void) snprintf(valstr, sizeof (valstr), "%d", value);
mac_prop_info_set_default_str(prh, valstr);
break;
}
}
}
static int
ibd_set_priv_prop(ibd_state_t *state, const char *pr_name,
uint_t pr_valsize, const void *pr_val)
{
int err = 0;
long result;
if (strcmp(pr_name, "_ibd_coalesce_completions") == 0) {
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 0 || result > 1) {
err = EINVAL;
} else {
state->id_allow_coalesce_comp_tuning = (result == 1) ?
B_TRUE: B_FALSE;
}
return (err);
}
if (strcmp(pr_name, "_ibd_create_broadcast_group") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 0 || result > 1) {
err = EINVAL;
} else {
state->id_create_broadcast_group = (result == 1) ?
B_TRUE: B_FALSE;
}
return (err);
}
if (strcmp(pr_name, "_ibd_hash_size") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_HASH_SIZE || result > IBD_MAX_HASH_SIZE) {
err = EINVAL;
} else {
state->id_hash_size = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_lso_enable") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 0 || result > 1) {
err = EINVAL;
} else {
state->id_lso_policy = (result == 1) ?
B_TRUE: B_FALSE;
}
mac_capab_update(state->id_mh);
return (err);
}
if (strcmp(pr_name, "_ibd_num_ah") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_NUM_AH || result > IBD_MAX_NUM_AH) {
err = EINVAL;
} else {
state->id_num_ah = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_num_lso_bufs") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (!state->id_lso_policy || !state->id_lso_capable) {
return (EINVAL);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_NUM_LSO_BUFS ||
result > IBD_MAX_NUM_LSO_BUFS) {
err = EINVAL;
} else {
state->id_num_lso_bufs = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_rc_enable_srq") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 0 || result > 1) {
err = EINVAL;
} else {
state->rc_enable_srq = (result == 1) ?
B_TRUE: B_FALSE;
}
if (!state->rc_enable_srq) {
state->id_rc_num_srq = 0;
}
return (err);
}
if (strcmp(pr_name, "_ibd_rc_num_rwqe") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_RC_NUM_RWQE ||
result > IBD_MAX_RC_NUM_RWQE) {
err = EINVAL;
} else {
state->id_rc_num_rwqe = (uint32_t)result;
if (state->id_allow_coalesce_comp_tuning &&
state->id_rc_rx_comp_count > state->id_rc_num_rwqe)
state->id_rc_rx_comp_count =
state->id_rc_num_rwqe;
if (state->id_rc_num_srq > state->id_rc_num_rwqe)
state->id_rc_num_srq =
state->id_rc_num_rwqe - 1;
if (state->id_rc_rx_rwqe_thresh > state->id_rc_num_rwqe)
state->id_rc_rx_rwqe_thresh =
(state->id_rc_num_rwqe >> 2);
}
return (err);
}
if (strcmp(pr_name, "_ibd_rc_num_srq") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
if (!state->rc_enable_srq)
return (EINVAL);
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_RC_NUM_SRQ ||
result >= state->id_rc_num_rwqe) {
err = EINVAL;
} else
state->id_rc_num_srq = (uint32_t)result;
return (err);
}
if (strcmp(pr_name, "_ibd_rc_num_swqe") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_RC_NUM_SWQE ||
result > IBD_MAX_RC_NUM_SWQE) {
err = EINVAL;
} else {
state->id_rc_num_swqe = (uint32_t)result;
if (state->id_allow_coalesce_comp_tuning &&
state->id_rc_tx_comp_count > state->id_rc_num_swqe)
state->id_rc_tx_comp_count =
state->id_rc_num_swqe;
}
return (err);
}
if (strcmp(pr_name, "_ibd_rc_rx_comp_count") == 0) {
if (!state->id_allow_coalesce_comp_tuning) {
return (ENOTSUP);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 1 || result > state->id_rc_num_rwqe) {
err = EINVAL;
} else {
state->id_rc_rx_comp_count = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_rc_rx_comp_usec") == 0) {
if (!state->id_allow_coalesce_comp_tuning) {
return (ENOTSUP);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 1) {
err = EINVAL;
} else {
state->id_rc_rx_comp_usec = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_rc_rx_copy_thresh") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_RC_RX_COPY_THRESH ||
result > state->rc_mtu) {
err = EINVAL;
} else {
state->id_rc_rx_copy_thresh = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_rc_rx_rwqe_thresh") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_RC_RX_RWQE_THRESH ||
result >= state->id_rc_num_rwqe) {
err = EINVAL;
} else {
state->id_rc_rx_rwqe_thresh = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_rc_tx_comp_count") == 0) {
if (!state->id_allow_coalesce_comp_tuning) {
return (ENOTSUP);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 1 || result > state->id_rc_num_swqe) {
err = EINVAL;
} else {
state->id_rc_tx_comp_count = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_rc_tx_comp_usec") == 0) {
if (!state->id_allow_coalesce_comp_tuning) {
return (ENOTSUP);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 1)
err = EINVAL;
else {
state->id_rc_tx_comp_usec = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_rc_tx_copy_thresh") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_RC_TX_COPY_THRESH ||
result > state->rc_mtu) {
err = EINVAL;
} else {
state->id_rc_tx_copy_thresh = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_ud_num_rwqe") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_UD_NUM_RWQE ||
result > IBD_MAX_UD_NUM_RWQE) {
err = EINVAL;
} else {
if (result > state->id_hca_max_chan_sz) {
state->id_ud_num_rwqe =
state->id_hca_max_chan_sz;
} else {
state->id_ud_num_rwqe = (uint32_t)result;
}
if (state->id_allow_coalesce_comp_tuning &&
state->id_ud_rx_comp_count > state->id_ud_num_rwqe)
state->id_ud_rx_comp_count =
state->id_ud_num_rwqe;
}
return (err);
}
if (strcmp(pr_name, "_ibd_ud_num_swqe") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_UD_NUM_SWQE ||
result > IBD_MAX_UD_NUM_SWQE) {
err = EINVAL;
} else {
if (result > state->id_hca_max_chan_sz) {
state->id_ud_num_swqe =
state->id_hca_max_chan_sz;
} else {
state->id_ud_num_swqe = (uint32_t)result;
}
if (state->id_allow_coalesce_comp_tuning &&
state->id_ud_tx_comp_count > state->id_ud_num_swqe)
state->id_ud_tx_comp_count =
state->id_ud_num_swqe;
}
return (err);
}
if (strcmp(pr_name, "_ibd_ud_rx_comp_count") == 0) {
if (!state->id_allow_coalesce_comp_tuning) {
return (ENOTSUP);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 1 || result > state->id_ud_num_rwqe) {
err = EINVAL;
} else {
state->id_ud_rx_comp_count = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_ud_rx_comp_usec") == 0) {
if (!state->id_allow_coalesce_comp_tuning) {
return (ENOTSUP);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 1) {
err = EINVAL;
} else {
state->id_ud_rx_comp_usec = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_ud_tx_comp_count") == 0) {
if (!state->id_allow_coalesce_comp_tuning) {
return (ENOTSUP);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 1 || result > state->id_ud_num_swqe) {
err = EINVAL;
} else {
state->id_ud_tx_comp_count = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_ud_tx_comp_usec") == 0) {
if (!state->id_allow_coalesce_comp_tuning) {
return (ENOTSUP);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < 1) {
err = EINVAL;
} else {
state->id_ud_tx_comp_usec = (uint32_t)result;
}
return (err);
}
if (strcmp(pr_name, "_ibd_ud_tx_copy_thresh") == 0) {
if (state->id_mac_state & IBD_DRV_STARTED) {
return (EBUSY);
}
if (pr_val == NULL) {
return (EINVAL);
}
(void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
if (result < IBD_MIN_UD_TX_COPY_THRESH ||
result > IBD_MAX_UD_TX_COPY_THRESH) {
err = EINVAL;
} else {
state->id_ud_tx_copy_thresh = (uint32_t)result;
}
return (err);
}
return (ENOTSUP);
}
static int
ibd_get_priv_prop(ibd_state_t *state, const char *pr_name, uint_t pr_valsize,
void *pr_val)
{
int err = ENOTSUP;
int value;
if (strcmp(pr_name, "_ibd_broadcast_group") == 0) {
value = state->id_bgroup_present;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_coalesce_completions") == 0) {
value = state->id_allow_coalesce_comp_tuning;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_create_broadcast_group") == 0) {
value = state->id_create_broadcast_group;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_hash_size") == 0) {
value = state->id_hash_size;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_lso_enable") == 0) {
value = state->id_lso_policy;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_num_ah") == 0) {
value = state->id_num_ah;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_num_lso_bufs") == 0) {
value = state->id_num_lso_bufs;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_rc_enable_srq") == 0) {
value = state->rc_enable_srq;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_rc_num_rwqe") == 0) {
value = state->id_rc_num_rwqe;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_rc_num_srq") == 0) {
value = state->id_rc_num_srq;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_rc_num_swqe") == 0) {
value = state->id_rc_num_swqe;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_rc_rx_comp_count") == 0) {
value = state->id_rc_rx_comp_count;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_rc_rx_comp_usec") == 0) {
value = state->id_rc_rx_comp_usec;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_rc_rx_copy_thresh") == 0) {
value = state->id_rc_rx_copy_thresh;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_rc_rx_rwqe_thresh") == 0) {
value = state->id_rc_rx_rwqe_thresh;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_rc_tx_comp_count") == 0) {
value = state->id_rc_tx_comp_count;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_rc_tx_comp_usec") == 0) {
value = state->id_rc_tx_comp_usec;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_rc_tx_copy_thresh") == 0) {
value = state->id_rc_tx_copy_thresh;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_ud_num_rwqe") == 0) {
value = state->id_ud_num_rwqe;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_ud_num_swqe") == 0) {
value = state->id_ud_num_swqe;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_ud_rx_comp_count") == 0) {
value = state->id_ud_rx_comp_count;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_ud_rx_comp_usec") == 0) {
value = state->id_ud_rx_comp_usec;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_ud_tx_comp_count") == 0) {
value = state->id_ud_tx_comp_count;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_ud_tx_comp_usec") == 0) {
value = state->id_ud_tx_comp_usec;
err = 0;
goto done;
}
if (strcmp(pr_name, "_ibd_ud_tx_copy_thresh") == 0) {
value = state->id_ud_tx_copy_thresh;
err = 0;
goto done;
}
done:
if (err == 0) {
(void) snprintf(pr_val, pr_valsize, "%d", value);
}
return (err);
}
static int
ibd_get_port_details(ibd_state_t *state)
{
ibt_hca_portinfo_t *port_infop;
ibt_status_t ret;
uint_t psize, port_infosz;
mutex_enter(&state->id_link_mutex);
ret = ibt_query_hca_ports(state->id_hca_hdl, state->id_port,
&port_infop, &psize, &port_infosz);
if ((ret != IBT_SUCCESS) || (psize != 1)) {
mutex_exit(&state->id_link_mutex);
DPRINT(10, "ibd_get_port_details: ibt_query_hca_ports() "
"failed, ret=%d", ret);
return (ENETDOWN);
}
if (port_infop->p_linkstate == IBT_PORT_ACTIVE) {
if ((ret = ibt_pkey2index(state->id_hca_hdl, state->id_port,
state->id_pkey, &state->id_pkix)) != IBT_SUCCESS) {
state->id_link_state = LINK_STATE_DOWN;
} else {
state->id_link_state = LINK_STATE_UP;
}
state->id_mtu = (128 << port_infop->p_mtu);
_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(state->id_sgid))
state->id_sgid = *port_infop->p_sgid_tbl;
_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(state->id_sgid))
state->id_link_speed = ibd_get_portspeed(state);
} else {
state->id_mtu = 0;
state->id_link_state = LINK_STATE_DOWN;
state->id_link_speed = 0;
}
mutex_exit(&state->id_link_mutex);
ibt_free_portinfo(port_infop, port_infosz);
return (0);
}
static int
ibd_alloc_cqs(ibd_state_t *state)
{
ibt_hca_attr_t hca_attrs;
ibt_cq_attr_t cq_attr;
ibt_status_t ret;
uint32_t real_size;
uint_t num_rwqe_change = 0;
uint_t num_swqe_change = 0;
ret = ibt_query_hca(state->id_hca_hdl, &hca_attrs);
ASSERT(ret == IBT_SUCCESS);
cq_attr.cq_sched = NULL;
cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
if (hca_attrs.hca_max_cq_sz >= (state->id_ud_num_rwqe + 1)) {
cq_attr.cq_size = state->id_ud_num_rwqe + 1;
} else {
cq_attr.cq_size = hca_attrs.hca_max_cq_sz;
num_rwqe_change = state->id_ud_num_rwqe;
state->id_ud_num_rwqe = cq_attr.cq_size - 1;
}
if ((ret = ibt_alloc_cq(state->id_hca_hdl, &cq_attr,
&state->id_rcq_hdl, &real_size)) != IBT_SUCCESS) {
DPRINT(10, "ibd_alloc_cqs: ibt_alloc_cq(rcq) "
"failed, ret=%d\n", ret);
return (DDI_FAILURE);
}
if ((ret = ibt_modify_cq(state->id_rcq_hdl, state->id_ud_rx_comp_count,
state->id_ud_rx_comp_usec, 0)) != IBT_SUCCESS) {
DPRINT(10, "ibd_alloc_cqs: Receive CQ interrupt "
"moderation failed, ret=%d\n", ret);
}
state->id_rxwcs_size = IBD_MAX_RX_MP_LEN;
state->id_rxwcs = kmem_alloc(sizeof (ibt_wc_t) *
state->id_rxwcs_size, KM_SLEEP);
if (hca_attrs.hca_max_cq_sz >= (state->id_ud_num_swqe + 1)) {
cq_attr.cq_size = state->id_ud_num_swqe + 1;
} else {
cq_attr.cq_size = hca_attrs.hca_max_cq_sz;
num_swqe_change = state->id_ud_num_swqe;
state->id_ud_num_swqe = cq_attr.cq_size - 1;
}
if ((ret = ibt_alloc_cq(state->id_hca_hdl, &cq_attr,
&state->id_scq_hdl, &real_size)) != IBT_SUCCESS) {
DPRINT(10, "ibd_alloc_cqs: ibt_alloc_cq(scq) "
"failed, ret=%d\n", ret);
kmem_free(state->id_rxwcs, sizeof (ibt_wc_t) *
state->id_rxwcs_size);
(void) ibt_free_cq(state->id_rcq_hdl);
return (DDI_FAILURE);
}
if ((ret = ibt_modify_cq(state->id_scq_hdl, state->id_ud_tx_comp_count,
state->id_ud_tx_comp_usec, 0)) != IBT_SUCCESS) {
DPRINT(10, "ibd_alloc_cqs: Send CQ interrupt "
"moderation failed, ret=%d\n", ret);
}
state->id_txwcs_size = IBD_TX_POLL_THRESH;
state->id_txwcs = kmem_alloc(sizeof (ibt_wc_t) *
state->id_txwcs_size, KM_SLEEP);
if (num_rwqe_change) {
ibd_print_warn(state, "Setting #rwqe = %d instead of default "
"%d", state->id_ud_num_rwqe, num_rwqe_change);
}
if (num_swqe_change) {
ibd_print_warn(state, "Setting #swqe = %d instead of default "
"%d", state->id_ud_num_swqe, num_swqe_change);
}
return (DDI_SUCCESS);
}
static int
ibd_setup_ud_channel(ibd_state_t *state)
{
ibt_ud_chan_alloc_args_t ud_alloc_attr;
ibt_ud_chan_query_attr_t ud_chan_attr;
ibt_status_t ret;
ud_alloc_attr.ud_flags = IBT_ALL_SIGNALED;
if (state->id_hca_res_lkey_capab)
ud_alloc_attr.ud_flags |= IBT_FAST_REG_RES_LKEY;
if (state->id_lso_policy && state->id_lso_capable)
ud_alloc_attr.ud_flags |= IBT_USES_LSO;
ud_alloc_attr.ud_hca_port_num = state->id_port;
ud_alloc_attr.ud_sizes.cs_sq_sgl = state->id_max_sqseg;
ud_alloc_attr.ud_sizes.cs_rq_sgl = IBD_MAX_RQSEG;
ud_alloc_attr.ud_sizes.cs_sq = state->id_ud_num_swqe;
ud_alloc_attr.ud_sizes.cs_rq = state->id_ud_num_rwqe;
ud_alloc_attr.ud_qkey = state->id_mcinfo->mc_qkey;
ud_alloc_attr.ud_scq = state->id_scq_hdl;
ud_alloc_attr.ud_rcq = state->id_rcq_hdl;
ud_alloc_attr.ud_pd = state->id_pd_hdl;
ud_alloc_attr.ud_pkey_ix = state->id_pkix;
ud_alloc_attr.ud_clone_chan = NULL;
if ((ret = ibt_alloc_ud_channel(state->id_hca_hdl, IBT_ACHAN_NO_FLAGS,
&ud_alloc_attr, &state->id_chnl_hdl, NULL)) != IBT_SUCCESS) {
DPRINT(10, "ibd_setup_ud_channel: ibt_alloc_ud_channel() "
"failed, ret=%d\n", ret);
return (DDI_FAILURE);
}
if ((ret = ibt_query_ud_channel(state->id_chnl_hdl,
&ud_chan_attr)) != IBT_SUCCESS) {
DPRINT(10, "ibd_setup_ud_channel: ibt_query_ud_channel() "
"failed, ret=%d\n", ret);
(void) ibt_free_channel(state->id_chnl_hdl);
return (DDI_FAILURE);
}
state->id_qpnum = ud_chan_attr.ud_qpn;
return (DDI_SUCCESS);
}
static int
ibd_undo_start(ibd_state_t *state, link_state_t cur_link_state)
{
uint32_t progress = state->id_mac_state;
uint_t attempts;
ibt_status_t ret;
ib_gid_t mgid;
ibd_mce_t *mce;
uint8_t jstate;
timeout_id_t tid;
if (atomic_dec_32_nv(&state->id_running) != 0)
cmn_err(CE_WARN, "ibd_undo_start: id_running was not 1\n");
mutex_enter(&state->id_link_mutex);
if (cur_link_state == LINK_STATE_DOWN) {
state->id_link_state = cur_link_state;
} else {
state->id_link_state = LINK_STATE_UNKNOWN;
}
mutex_exit(&state->id_link_mutex);
bzero(&state->id_macaddr, sizeof (ipoib_mac_t));
mac_link_update(state->id_mh, state->id_link_state);
state->id_mac_state &= (~IBD_DRV_PORT_DETAILS_OBTAINED);
if (progress & IBD_DRV_STARTED) {
state->id_mac_state &= (~IBD_DRV_STARTED);
}
if (progress & IBD_DRV_IN_LATE_HCA_INIT) {
state->id_mac_state &= (~IBD_DRV_IN_LATE_HCA_INIT);
}
if (progress & IBD_DRV_RC_LISTEN) {
ASSERT(state->id_enable_rc);
if (state->rc_listen_hdl != NULL) {
ibd_rc_stop_listen(state);
}
state->id_mac_state &= (~IBD_DRV_RC_LISTEN);
}
if (progress & IBD_DRV_RC_TIMEOUT) {
ASSERT(state->id_enable_rc);
mutex_enter(&state->rc_timeout_lock);
state->rc_timeout_start = B_FALSE;
tid = state->rc_timeout;
state->rc_timeout = 0;
mutex_exit(&state->rc_timeout_lock);
if (tid != 0)
(void) untimeout(tid);
state->id_mac_state &= (~IBD_DRV_RC_TIMEOUT);
}
if ((state->id_enable_rc) && (progress & IBD_DRV_ACACHE_INITIALIZED)) {
attempts = 100;
while (state->id_ah_op == IBD_OP_ONGOING) {
delay(drv_usectohz(100000));
if (--attempts == 0) {
state->rc_stop_connect++;
DPRINT(40, "ibd_undo_start: connecting");
break;
}
}
mutex_enter(&state->id_sched_lock);
state->id_sched_needed = 0;
mutex_exit(&state->id_sched_lock);
(void) ibd_rc_close_all_chan(state);
}
if (progress & IBD_DRV_RCQ_NOTIFY_ENABLED) {
attempts = 10;
while (atomic_add_32_nv(&state->id_rx_list.dl_bufs_outstanding,
0) > 0) {
delay(drv_usectohz(100000));
if (--attempts == 0) {
cmn_err(CE_CONT, "!ibd: bufs outstanding\n");
DPRINT(2, "ibd_undo_start: "
"reclaiming failed");
break;
}
}
state->id_mac_state &= (~IBD_DRV_RCQ_NOTIFY_ENABLED);
}
if (progress & IBD_DRV_RC_LARGEBUF_ALLOCD) {
ibd_rc_fini_tx_largebuf_list(state);
state->id_mac_state &= (~IBD_DRV_RC_LARGEBUF_ALLOCD);
}
if (progress & IBD_DRV_RC_SRQ_ALLOCD) {
ASSERT(state->id_enable_rc);
if (state->rc_srq_rwqe_list.dl_bufs_outstanding == 0) {
if (state->id_ah_op == IBD_OP_ONGOING) {
delay(drv_usectohz(10000));
if (state->id_ah_op == IBD_OP_ONGOING) {
state->rc_stop_connect++;
DPRINT(40, "ibd_undo_start: "
"connecting");
} else {
ibd_rc_fini_srq_list(state);
state->id_mac_state &=
(~IBD_DRV_RC_SRQ_ALLOCD);
}
} else {
ibd_rc_fini_srq_list(state);
state->id_mac_state &= (~IBD_DRV_RC_SRQ_ALLOCD);
}
} else {
DPRINT(40, "ibd_undo_start: srq bufs outstanding\n");
}
}
if (progress & IBD_DRV_SM_NOTICES_REGISTERED) {
ibt_register_subnet_notices(state->id_ibt_hdl, NULL, NULL);
mutex_enter(&state->id_trap_lock);
state->id_trap_stop = B_TRUE;
while (state->id_trap_inprog > 0)
cv_wait(&state->id_trap_cv, &state->id_trap_lock);
mutex_exit(&state->id_trap_lock);
state->id_mac_state &= (~IBD_DRV_SM_NOTICES_REGISTERED);
}
if (progress & IBD_DRV_SCQ_NOTIFY_ENABLED) {
if ((ret = ibt_flush_channel(state->id_chnl_hdl)) !=
IBT_SUCCESS) {
DPRINT(10, "ibd_undo_start: flush_channel "
"failed, ret=%d", ret);
}
attempts = 10;
mutex_enter(&state->id_tx_list.dl_mutex);
mutex_enter(&state->id_tx_rel_list.dl_mutex);
while (state->id_tx_list.dl_cnt + state->id_tx_rel_list.dl_cnt
!= state->id_ud_num_swqe) {
if (--attempts == 0)
break;
mutex_exit(&state->id_tx_rel_list.dl_mutex);
mutex_exit(&state->id_tx_list.dl_mutex);
delay(drv_usectohz(100000));
mutex_enter(&state->id_tx_list.dl_mutex);
mutex_enter(&state->id_tx_rel_list.dl_mutex);
}
ibt_set_cq_handler(state->id_scq_hdl, 0, 0);
if (state->id_tx_list.dl_cnt + state->id_tx_rel_list.dl_cnt !=
state->id_ud_num_swqe) {
cmn_err(CE_WARN, "tx resources not freed\n");
}
mutex_exit(&state->id_tx_rel_list.dl_mutex);
mutex_exit(&state->id_tx_list.dl_mutex);
attempts = 10;
while (atomic_add_32_nv(&state->id_rx_list.dl_cnt, 0) != 0) {
if (--attempts == 0)
break;
delay(drv_usectohz(100000));
}
ibt_set_cq_handler(state->id_rcq_hdl, 0, 0);
if (atomic_add_32_nv(&state->id_rx_list.dl_cnt, 0) != 0) {
cmn_err(CE_WARN, "rx resources not freed\n");
}
state->id_mac_state &= (~IBD_DRV_SCQ_NOTIFY_ENABLED);
}
if (progress & IBD_DRV_BCAST_GROUP_JOINED) {
DPRINT(2, "ibd_undo_start: clear full cache entries");
mce = list_head(&state->id_mc_full);
while (mce != NULL) {
mgid = mce->mc_info.mc_adds_vect.av_dgid;
jstate = mce->mc_jstate;
mce = list_next(&state->id_mc_full, mce);
ibd_leave_group(state, mgid, jstate);
}
state->id_mac_state &= (~IBD_DRV_BCAST_GROUP_JOINED);
}
if (progress & IBD_DRV_RXLIST_ALLOCD) {
ibd_fini_rxlist(state);
state->id_mac_state &= (~IBD_DRV_RXLIST_ALLOCD);
}
if (progress & IBD_DRV_TXLIST_ALLOCD) {
ibd_fini_txlist(state);
state->id_mac_state &= (~IBD_DRV_TXLIST_ALLOCD);
}
if (progress & IBD_DRV_UD_CHANNEL_SETUP) {
if ((ret = ibt_free_channel(state->id_chnl_hdl)) !=
IBT_SUCCESS) {
DPRINT(10, "ibd_undo_start: free_channel "
"failed, ret=%d", ret);
}
state->id_mac_state &= (~IBD_DRV_UD_CHANNEL_SETUP);
}
if (progress & IBD_DRV_CQS_ALLOCD) {
kmem_free(state->id_txwcs,
sizeof (ibt_wc_t) * state->id_txwcs_size);
if ((ret = ibt_free_cq(state->id_scq_hdl)) !=
IBT_SUCCESS) {
DPRINT(10, "ibd_undo_start: free_cq(scq) "
"failed, ret=%d", ret);
}
kmem_free(state->id_rxwcs,
sizeof (ibt_wc_t) * state->id_rxwcs_size);
if ((ret = ibt_free_cq(state->id_rcq_hdl)) != IBT_SUCCESS) {
DPRINT(10, "ibd_undo_start: free_cq(rcq) failed, "
"ret=%d", ret);
}
state->id_txwcs = NULL;
state->id_rxwcs = NULL;
state->id_scq_hdl = NULL;
state->id_rcq_hdl = NULL;
state->id_mac_state &= (~IBD_DRV_CQS_ALLOCD);
}
if (progress & IBD_DRV_ACACHE_INITIALIZED) {
mutex_enter(&state->id_ac_mutex);
mod_hash_destroy_hash(state->id_ah_active_hash);
mutex_exit(&state->id_ac_mutex);
ibd_acache_fini(state);
state->id_mac_state &= (~IBD_DRV_ACACHE_INITIALIZED);
}
if (progress & IBD_DRV_BCAST_GROUP_FOUND) {
if (state->id_bgroup_created) {
mgid = state->id_mcinfo->mc_adds_vect.av_dgid;
jstate = IB_MC_JSTATE_FULL;
(void) ibt_leave_mcg(state->id_sgid, mgid,
state->id_sgid, jstate);
}
ibt_free_mcg_info(state->id_mcinfo, 1);
state->id_mac_state &= (~IBD_DRV_BCAST_GROUP_FOUND);
}
return (DDI_SUCCESS);
}
static void
ibd_set_mac_progress(ibd_state_t *state, uint_t flag)
{
mutex_enter(&state->id_macst_lock);
while (state->id_mac_state & IBD_DRV_RESTART_IN_PROGRESS)
cv_wait(&state->id_macst_cv, &state->id_macst_lock);
state->id_mac_state |= flag;
mutex_exit(&state->id_macst_lock);
}
static void
ibd_clr_mac_progress(ibd_state_t *state, uint_t flag)
{
mutex_enter(&state->id_macst_lock);
state->id_mac_state &= (~flag);
cv_signal(&state->id_macst_cv);
mutex_exit(&state->id_macst_lock);
}
static int
ibd_m_start(void *arg)
{
ibd_state_t *state = arg;
int ret;
if (state->id_type == IBD_PORT_DRIVER)
return (EINVAL);
ibd_set_mac_progress(state, IBD_DRV_START_IN_PROGRESS);
if (state->id_mac_state & IBD_DRV_IN_DELETION) {
ibd_clr_mac_progress(state, IBD_DRV_START_IN_PROGRESS);
return (EIO);
}
ret = ibd_start(state);
ibd_clr_mac_progress(state, IBD_DRV_START_IN_PROGRESS);
return (ret);
}
static int
ibd_start(ibd_state_t *state)
{
int err;
ibt_status_t ret;
int late_hca_init = 0;
if (state->id_mac_state & IBD_DRV_STARTED)
return (DDI_SUCCESS);
if (!(state->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) &&
(atomic_inc_32_nv(&state->id_running) != 1)) {
DPRINT(10, "ibd_start: id_running is non-zero");
cmn_err(CE_WARN, "ibd_start: id_running was not 0\n");
atomic_dec_32(&state->id_running);
return (EINVAL);
}
if ((err = ibd_get_port_details(state)) != 0) {
DPRINT(10, "ibd_start: ibd_get_port_details() failed");
goto start_fail;
}
state->id_mac_state |= IBD_DRV_PORT_DETAILS_OBTAINED;
if (state->id_link_state == LINK_STATE_DOWN) {
late_hca_init = 1;
goto late_hca_init_return;
}
if (ibd_find_bgroup(state) != IBT_SUCCESS) {
late_hca_init = 1;
goto reg_snet_notices;
}
state->id_mac_state |= IBD_DRV_BCAST_GROUP_FOUND;
if (ibd_acache_init(state) != DDI_SUCCESS) {
DPRINT(10, "ibd_start: ibd_acache_init() failed");
err = ENOMEM;
goto start_fail;
}
state->id_mac_state |= IBD_DRV_ACACHE_INITIALIZED;
if (ibd_alloc_cqs(state) != DDI_SUCCESS) {
DPRINT(10, "ibd_start: ibd_alloc_cqs() failed");
err = ENOMEM;
goto start_fail;
}
state->id_mac_state |= IBD_DRV_CQS_ALLOCD;
if (ibd_setup_ud_channel(state) != DDI_SUCCESS) {
err = ENOMEM;
DPRINT(10, "ibd_start: ibd_setup_ud_channel() failed");
goto start_fail;
}
state->id_mac_state |= IBD_DRV_UD_CHANNEL_SETUP;
if (ibd_init_txlist(state) != DDI_SUCCESS) {
DPRINT(10, "ibd_start: ibd_init_txlist() failed");
err = ENOMEM;
goto start_fail;
}
state->id_mac_state |= IBD_DRV_TXLIST_ALLOCD;
ibt_set_cq_handler(state->id_scq_hdl, ibd_scq_handler, state);
if ((ret = ibt_enable_cq_notify(state->id_scq_hdl,
IBT_NEXT_COMPLETION)) != IBT_SUCCESS) {
DPRINT(10, "ibd_start: ibt_enable_cq_notify(scq) "
"failed, ret=%d", ret);
err = EINVAL;
goto start_fail;
}
state->id_mac_state |= IBD_DRV_SCQ_NOTIFY_ENABLED;
if (ibd_init_rxlist(state) != DDI_SUCCESS) {
DPRINT(10, "ibd_start: ibd_init_rxlist() failed");
err = ENOMEM;
goto start_fail;
}
state->id_mac_state |= IBD_DRV_RXLIST_ALLOCD;
if (ibd_join_group(state, state->id_mgid, IB_MC_JSTATE_FULL) == NULL) {
DPRINT(10, "ibd_start: ibd_join_group() failed");
err = ENOTACTIVE;
goto start_fail;
}
state->id_mac_state |= IBD_DRV_BCAST_GROUP_JOINED;
if (state->id_enable_rc) {
ibd_h2n_mac(&state->id_macaddr,
IBD_MAC_ADDR_RC + state->id_qpnum,
state->id_sgid.gid_prefix, state->id_sgid.gid_guid);
ibd_h2n_mac(&state->rc_macaddr_loopback, state->id_qpnum,
state->id_sgid.gid_prefix, state->id_sgid.gid_guid);
} else {
ibd_h2n_mac(&state->id_macaddr, state->id_qpnum,
state->id_sgid.gid_prefix, state->id_sgid.gid_guid);
}
ibd_h2n_mac(&state->id_bcaddr, IB_QPN_MASK,
state->id_mgid.gid_prefix, state->id_mgid.gid_guid);
if (!state->id_enable_rc) {
(void) mac_maxsdu_update2(state->id_mh,
state->id_mtu - IPOIB_HDRSIZE,
state->id_mtu - IPOIB_HDRSIZE);
}
mac_unicst_update(state->id_mh, (uint8_t *)&state->id_macaddr);
ibt_set_cq_handler(state->id_rcq_hdl, ibd_rcq_handler, state);
if ((ret = ibt_enable_cq_notify(state->id_rcq_hdl,
IBT_NEXT_COMPLETION)) != IBT_SUCCESS) {
DPRINT(10, "ibd_start: ibt_enable_cq_notify(rcq) "
"failed, ret=%d", ret);
err = EINVAL;
goto start_fail;
}
state->id_mac_state |= IBD_DRV_RCQ_NOTIFY_ENABLED;
reg_snet_notices:
if ((state->id_mac_state & IBD_DRV_SM_NOTICES_REGISTERED) == 0) {
ibt_register_subnet_notices(state->id_ibt_hdl,
ibd_snet_notices_handler, state);
mutex_enter(&state->id_trap_lock);
state->id_trap_stop = B_FALSE;
mutex_exit(&state->id_trap_lock);
state->id_mac_state |= IBD_DRV_SM_NOTICES_REGISTERED;
}
late_hca_init_return:
if (late_hca_init == 1) {
state->id_mac_state |= IBD_DRV_IN_LATE_HCA_INIT;
state->id_link_state = LINK_STATE_DOWN;
mac_unicst_update(state->id_mh, (uint8_t *)&state->id_macaddr);
mac_link_update(state->id_mh, state->id_link_state);
return (DDI_SUCCESS);
}
if (state->id_enable_rc) {
if (state->rc_enable_srq) {
if (state->id_mac_state & IBD_DRV_RC_SRQ_ALLOCD) {
if (ibd_rc_repost_srq_free_list(state) !=
IBT_SUCCESS) {
err = ENOMEM;
goto start_fail;
}
} else {
if (ibd_rc_init_srq_list(state) !=
IBT_SUCCESS) {
err = ENOMEM;
goto start_fail;
}
state->id_mac_state |= IBD_DRV_RC_SRQ_ALLOCD;
}
}
if (ibd_rc_init_tx_largebuf_list(state) != IBT_SUCCESS) {
DPRINT(10, "ibd_start: ibd_rc_init_tx_largebuf_list() "
"failed");
err = ENOMEM;
goto start_fail;
}
state->id_mac_state |= IBD_DRV_RC_LARGEBUF_ALLOCD;
if (ibd_rc_listen(state) != IBT_SUCCESS) {
DPRINT(10, "ibd_start: ibd_rc_listen() failed");
err = EINVAL;
goto start_fail;
}
state->id_mac_state |= IBD_DRV_RC_LISTEN;
}
mac_link_update(state->id_mh, state->id_link_state);
state->id_mac_state &= ~IBD_DRV_IN_LATE_HCA_INIT;
state->id_mac_state |= IBD_DRV_STARTED;
if (state->id_enable_rc) {
mutex_enter(&state->rc_timeout_lock);
state->rc_timeout_start = B_TRUE;
state->rc_timeout = timeout(ibd_rc_conn_timeout_call, state,
SEC_TO_TICK(ibd_rc_conn_timeout));
mutex_exit(&state->rc_timeout_lock);
state->id_mac_state |= IBD_DRV_RC_TIMEOUT;
}
return (DDI_SUCCESS);
start_fail:
(void) ibd_undo_start(state, LINK_STATE_DOWN);
return (err);
}
static void
ibd_m_stop(void *arg)
{
ibd_state_t *state = (ibd_state_t *)arg;
if (state->id_type == IBD_PORT_DRIVER)
return;
ibd_set_mac_progress(state, IBD_DRV_STOP_IN_PROGRESS);
(void) ibd_undo_start(state, state->id_link_state);
ibd_clr_mac_progress(state, IBD_DRV_STOP_IN_PROGRESS);
}
static int
ibd_m_unicst(void *arg, const uint8_t *macaddr)
{
ibd_state_t *state = arg;
if (state->id_type == IBD_PORT_DRIVER)
return (EINVAL);
if ((state->id_mac_state & IBD_DRV_STARTED) == 0)
return (0);
if (bcmp(macaddr, &state->id_macaddr, IPOIB_ADDRL) == 0)
return (0);
else
return (EINVAL);
}
static void
ibd_async_multicast(ibd_state_t *state, ib_gid_t mgid, int op)
{
DPRINT(3, "ibd_async_multicast : async_setmc op %d :"
"%016llx:%016llx\n", op, mgid.gid_prefix, mgid.gid_guid);
if (op == IBD_ASYNC_JOIN) {
if (ibd_join_group(state, mgid, IB_MC_JSTATE_FULL) == NULL) {
ibd_print_warn(state, "Join multicast group failed :"
"%016llx:%016llx", mgid.gid_prefix, mgid.gid_guid);
}
} else {
ibd_leave_group(state, mgid, IB_MC_JSTATE_FULL);
}
}
static int
ibd_m_multicst(void *arg, boolean_t add, const uint8_t *mcmac)
{
ibd_state_t *state = (ibd_state_t *)arg;
ipoib_mac_t maddr, *mcast;
ib_gid_t mgid;
ibd_req_t *req;
if (state->id_type == IBD_PORT_DRIVER)
return (EINVAL);
if ((state->id_mac_state & IBD_DRV_STARTED) == 0)
return (0);
bcopy(mcmac, &maddr, sizeof (ipoib_mac_t));
mcast = &maddr;
if ((ntohl(mcast->ipoib_qpn) & IB_QPN_MASK) != IB_MC_QPN)
return (EINVAL);
IBD_FILL_SCOPE_PKEY(mcast, state->id_scope, state->id_pkey);
if (bcmp(mcast, &state->id_bcaddr, IPOIB_ADDRL) == 0)
return (0);
ibd_n2h_gid(mcast, &mgid);
req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP);
if (req == NULL)
return (ENOMEM);
req->rq_gid = mgid;
if (add) {
DPRINT(1, "ibd_m_multicst : %016llx:%016llx\n",
mgid.gid_prefix, mgid.gid_guid);
ibd_queue_work_slot(state, req, IBD_ASYNC_JOIN);
} else {
DPRINT(1, "ibd_m_multicst : unset_multicast : "
"%016llx:%016llx", mgid.gid_prefix, mgid.gid_guid);
ibd_queue_work_slot(state, req, IBD_ASYNC_LEAVE);
}
return (0);
}
static void
ibd_async_unsetprom(ibd_state_t *state)
{
ibd_mce_t *mce = list_head(&state->id_mc_non);
ib_gid_t mgid;
DPRINT(2, "ibd_async_unsetprom : async_unset_promisc");
while (mce != NULL) {
mgid = mce->mc_info.mc_adds_vect.av_dgid;
mce = list_next(&state->id_mc_non, mce);
ibd_leave_group(state, mgid, IB_MC_JSTATE_NON);
}
state->id_prom_op = IBD_OP_NOTSTARTED;
}
static void
ibd_async_setprom(ibd_state_t *state)
{
ibt_mcg_attr_t mcg_attr;
ibt_mcg_info_t *mcg_info;
ib_gid_t mgid;
uint_t numg;
int i;
char ret = IBD_OP_COMPLETED;
DPRINT(2, "ibd_async_setprom : async_set_promisc");
bzero(&mcg_attr, sizeof (mcg_attr));
mcg_attr.mc_pkey = state->id_pkey;
mcg_attr.mc_scope = state->id_scope;
mcg_attr.mc_qkey = state->id_mcinfo->mc_qkey;
mcg_attr.mc_mtu_req.r_mtu = state->id_mcinfo->mc_mtu;
mcg_attr.mc_mtu_req.r_selector = IBT_EQU;
if (ibt_query_mcg(state->id_sgid, &mcg_attr, 0, &mcg_info, &numg) !=
IBT_SUCCESS) {
ibd_print_warn(state, "Could not get list of IBA multicast "
"groups");
ret = IBD_OP_ERRORED;
goto done;
}
for (i = 0; i < numg; i++) {
mgid = mcg_info[i].mc_adds_vect.av_dgid;
if (ibd_join_group(state, mgid, IB_MC_JSTATE_NON) == NULL)
ibd_print_warn(state, "IBA promiscuous mode missed "
"multicast gid %016llx:%016llx",
(u_longlong_t)mgid.gid_prefix,
(u_longlong_t)mgid.gid_guid);
}
ibt_free_mcg_info(mcg_info, numg);
DPRINT(4, "ibd_async_setprom : async_set_promisc completes");
done:
state->id_prom_op = ret;
}
static int
ibd_m_promisc(void *arg, boolean_t on)
{
ibd_state_t *state = (ibd_state_t *)arg;
ibd_req_t *req;
if (state->id_type == IBD_PORT_DRIVER)
return (EINVAL);
if ((state->id_mac_state & IBD_DRV_STARTED) == 0)
return (0);
req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP);
if (req == NULL)
return (ENOMEM);
if (on) {
DPRINT(1, "ibd_m_promisc : set_promisc : %d", on);
ibd_queue_work_slot(state, req, IBD_ASYNC_PROMON);
} else {
DPRINT(1, "ibd_m_promisc : unset_promisc");
ibd_queue_work_slot(state, req, IBD_ASYNC_PROMOFF);
}
return (0);
}
static int
ibd_m_stat(void *arg, uint_t stat, uint64_t *val)
{
ibd_state_t *state = (ibd_state_t *)arg;
switch (stat) {
case MAC_STAT_IFSPEED:
*val = state->id_link_speed;
break;
case MAC_STAT_MULTIRCV:
*val = state->id_multi_rcv;
break;
case MAC_STAT_BRDCSTRCV:
*val = state->id_brd_rcv;
break;
case MAC_STAT_MULTIXMT:
*val = state->id_multi_xmt;
break;
case MAC_STAT_BRDCSTXMT:
*val = state->id_brd_xmt;
break;
case MAC_STAT_RBYTES:
*val = state->id_rcv_bytes + state->rc_rcv_trans_byte
+ state->rc_rcv_copy_byte;
break;
case MAC_STAT_IPACKETS:
*val = state->id_rcv_pkt + state->rc_rcv_trans_pkt
+ state->rc_rcv_copy_pkt;
break;
case MAC_STAT_OBYTES:
*val = state->id_xmt_bytes + state->rc_xmt_bytes;
break;
case MAC_STAT_OPACKETS:
*val = state->id_xmt_pkt + state->rc_xmt_small_pkt +
state->rc_xmt_fragmented_pkt +
state->rc_xmt_map_fail_pkt + state->rc_xmt_map_succ_pkt;
break;
case MAC_STAT_OERRORS:
*val = state->id_ah_error;
break;
case MAC_STAT_IERRORS:
*val = 0;
break;
case MAC_STAT_NOXMTBUF:
*val = state->id_tx_short + state->rc_swqe_short +
state->rc_xmt_buf_short;
break;
case MAC_STAT_NORCVBUF:
default:
return (ENOTSUP);
}
return (0);
}
static void
ibd_async_txsched(ibd_state_t *state)
{
ibd_resume_transmission(state);
}
static void
ibd_resume_transmission(ibd_state_t *state)
{
int flag;
int met_thresh = 0;
int thresh = 0;
int ret = -1;
mutex_enter(&state->id_sched_lock);
if (state->id_sched_needed & IBD_RSRC_SWQE) {
mutex_enter(&state->id_tx_list.dl_mutex);
mutex_enter(&state->id_tx_rel_list.dl_mutex);
met_thresh = state->id_tx_list.dl_cnt +
state->id_tx_rel_list.dl_cnt;
mutex_exit(&state->id_tx_rel_list.dl_mutex);
mutex_exit(&state->id_tx_list.dl_mutex);
thresh = IBD_FREE_SWQES_THRESH;
flag = IBD_RSRC_SWQE;
} else if (state->id_sched_needed & IBD_RSRC_LSOBUF) {
ASSERT(state->id_lso != NULL);
mutex_enter(&state->id_lso_lock);
met_thresh = state->id_lso->bkt_nfree;
thresh = IBD_FREE_LSOS_THRESH;
mutex_exit(&state->id_lso_lock);
flag = IBD_RSRC_LSOBUF;
if (met_thresh > thresh)
state->id_sched_lso_cnt++;
}
if (met_thresh > thresh) {
state->id_sched_needed &= ~flag;
state->id_sched_cnt++;
ret = 0;
}
mutex_exit(&state->id_sched_lock);
if (ret == 0)
mac_tx_update(state->id_mh);
}
static void
ibd_release_swqe(ibd_state_t *state, ibd_swqe_t *head, ibd_swqe_t *tail, int n)
{
ASSERT(tail->swqe_next == NULL);
mutex_enter(&state->id_tx_rel_list.dl_mutex);
state->id_tx_rel_list.dl_pending_sends = B_FALSE;
tail->swqe_next = state->id_tx_rel_list.dl_head;
state->id_tx_rel_list.dl_head = SWQE_TO_WQE(head);
state->id_tx_rel_list.dl_cnt += n;
mutex_exit(&state->id_tx_rel_list.dl_mutex);
}
static ibd_swqe_t *
ibd_acquire_swqe(ibd_state_t *state)
{
ibd_swqe_t *wqe;
mutex_enter(&state->id_tx_rel_list.dl_mutex);
if (state->id_tx_rel_list.dl_head != NULL) {
state->id_tx_list.dl_head =
state->id_tx_rel_list.dl_head;
state->id_tx_list.dl_cnt =
state->id_tx_rel_list.dl_cnt;
state->id_tx_list.dl_pending_sends = B_FALSE;
state->id_tx_rel_list.dl_head = NULL;
state->id_tx_rel_list.dl_cnt = 0;
mutex_exit(&state->id_tx_rel_list.dl_mutex);
wqe = WQE_TO_SWQE(state->id_tx_list.dl_head);
state->id_tx_list.dl_cnt -= 1;
state->id_tx_list.dl_head = wqe->swqe_next;
} else {
mutex_exit(&state->id_tx_rel_list.dl_mutex);
state->id_tx_list.dl_pending_sends = B_TRUE;
DPRINT(5, "ibd_acquire_swqe: out of Tx wqe");
state->id_tx_short++;
wqe = NULL;
}
return (wqe);
}
static int
ibd_setup_lso(ibd_swqe_t *node, mblk_t *mp, uint32_t mss,
ibt_ud_dest_hdl_t ud_dest)
{
mblk_t *nmp;
int iph_len, tcph_len;
ibt_wr_lso_t *lso;
uintptr_t ip_start, tcp_start;
uint8_t *dst;
uint_t pending, mblen;
lso = &(node->w_swr.wr.ud_lso);
lso->lso_ud_dest = ud_dest;
lso->lso_mss = mss;
nmp = mp;
ip_start = (uintptr_t)(nmp->b_rptr) + IPOIB_HDRSIZE;
if (ip_start >= (uintptr_t)(nmp->b_wptr)) {
ip_start = (uintptr_t)nmp->b_cont->b_rptr
+ (ip_start - (uintptr_t)(nmp->b_wptr));
nmp = nmp->b_cont;
}
iph_len = IPH_HDR_LENGTH((ipha_t *)ip_start);
tcp_start = ip_start + iph_len;
if (tcp_start >= (uintptr_t)(nmp->b_wptr)) {
tcp_start = (uintptr_t)nmp->b_cont->b_rptr
+ (tcp_start - (uintptr_t)(nmp->b_wptr));
nmp = nmp->b_cont;
}
tcph_len = TCP_HDR_LENGTH((tcph_t *)tcp_start);
lso->lso_hdr_sz = IPOIB_HDRSIZE + iph_len + tcph_len;
if (lso->lso_hdr_sz <= MBLKL(mp)) {
lso->lso_hdr = mp->b_rptr;
} else {
lso->lso_hdr = kmem_zalloc(lso->lso_hdr_sz, KM_NOSLEEP);
if (lso->lso_hdr == NULL) {
DPRINT(10, "ibd_setup_lso: couldn't allocate lso hdr, "
"sz = %d", lso->lso_hdr_sz);
lso->lso_hdr_sz = 0;
lso->lso_mss = 0;
return (-1);
}
}
if (lso->lso_hdr != mp->b_rptr) {
dst = lso->lso_hdr;
pending = lso->lso_hdr_sz;
for (nmp = mp; nmp && pending; nmp = nmp->b_cont) {
mblen = MBLKL(nmp);
if (pending > mblen) {
bcopy(nmp->b_rptr, dst, mblen);
dst += mblen;
pending -= mblen;
} else {
bcopy(nmp->b_rptr, dst, pending);
break;
}
}
}
return (0);
}
static void
ibd_free_lsohdr(ibd_swqe_t *node, mblk_t *mp)
{
ibt_wr_lso_t *lso;
if ((!node) || (!mp))
return;
if (node->w_swr.wr_opcode == IBT_WRC_SEND_LSO) {
lso = &(node->w_swr.wr.ud_lso);
if ((lso->lso_hdr) && (lso->lso_hdr != mp->b_rptr)) {
kmem_free(lso->lso_hdr, lso->lso_hdr_sz);
lso->lso_hdr = NULL;
lso->lso_hdr_sz = 0;
}
}
}
static void
ibd_post_send(ibd_state_t *state, ibd_swqe_t *node)
{
uint_t i;
uint_t num_posted;
uint_t n_wrs;
ibt_status_t ibt_status;
ibt_send_wr_t wrs[IBD_MAX_TX_POST_MULTIPLE];
ibd_swqe_t *tx_head, *elem;
ibd_swqe_t *nodes[IBD_MAX_TX_POST_MULTIPLE];
ibt_status = ibt_post_send(state->id_chnl_hdl,
&node->w_swr, 1, NULL);
if (ibt_status != IBT_SUCCESS) {
ibd_print_warn(state, "ibd_post_send: "
"posting one wr failed: ret=%d", ibt_status);
ibd_tx_cleanup(state, node);
}
tx_head = NULL;
for (;;) {
if (tx_head == NULL) {
mutex_enter(&state->id_txpost_lock);
tx_head = state->id_tx_head;
if (tx_head == NULL) {
state->id_tx_busy = 0;
mutex_exit(&state->id_txpost_lock);
return;
}
state->id_tx_head = NULL;
mutex_exit(&state->id_txpost_lock);
}
for (n_wrs = 0, elem = tx_head;
(elem) && (n_wrs < IBD_MAX_TX_POST_MULTIPLE);
elem = WQE_TO_SWQE(elem->swqe_next), n_wrs++) {
nodes[n_wrs] = elem;
wrs[n_wrs] = elem->w_swr;
}
tx_head = elem;
ASSERT(n_wrs != 0);
num_posted = 0;
ibt_status = ibt_post_send(state->id_chnl_hdl,
wrs, n_wrs, &num_posted);
if (ibt_status != IBT_SUCCESS) {
ibd_print_warn(state, "ibd_post_send: "
"posting multiple wrs failed: "
"requested=%d, done=%d, ret=%d",
n_wrs, num_posted, ibt_status);
for (i = num_posted; i < n_wrs; i++)
ibd_tx_cleanup(state, nodes[i]);
}
}
}
static int
ibd_prepare_sgl(ibd_state_t *state, mblk_t *mp, ibd_swqe_t *node,
uint_t lsohdr_sz)
{
ibt_wr_ds_t *sgl;
ibt_status_t ibt_status;
mblk_t *nmp;
mblk_t *data_mp;
uchar_t *bufp;
size_t blksize;
size_t skip;
size_t avail;
uint_t pktsize;
uint_t frag_len;
uint_t pending_hdr;
int nmblks;
int i;
data_mp = mp;
pending_hdr = 0;
if (lsohdr_sz) {
pending_hdr = lsohdr_sz;
for (nmp = mp; nmp; nmp = nmp->b_cont) {
frag_len = nmp->b_wptr - nmp->b_rptr;
if (frag_len > pending_hdr)
break;
pending_hdr -= frag_len;
}
data_mp = nmp;
ASSERT(data_mp != NULL);
}
pktsize = 0;
for (nmblks = 0, nmp = data_mp; nmp != NULL;
nmp = nmp->b_cont, nmblks++) {
pktsize += MBLKL(nmp);
}
pktsize -= pending_hdr;
if ((state->id_hca_res_lkey_capab) &&
(pktsize > state->id_ud_tx_copy_thresh) &&
(nmblks < state->id_max_sqseg_hiwm)) {
ibt_iov_t iov_arr[IBD_MAX_SQSEG];
ibt_iov_attr_t iov_attr;
iov_attr.iov_as = NULL;
iov_attr.iov = iov_arr;
iov_attr.iov_buf = NULL;
iov_attr.iov_list_len = nmblks;
iov_attr.iov_wr_nds = state->id_max_sqseg;
iov_attr.iov_lso_hdr_sz = lsohdr_sz;
iov_attr.iov_flags = IBT_IOV_SLEEP;
for (nmp = data_mp, i = 0; i < nmblks; i++, nmp = nmp->b_cont) {
iov_arr[i].iov_addr = (caddr_t)(void *)nmp->b_rptr;
iov_arr[i].iov_len = MBLKL(nmp);
if (i == 0) {
iov_arr[i].iov_addr += pending_hdr;
iov_arr[i].iov_len -= pending_hdr;
}
}
node->w_buftype = IBD_WQE_MAPPED;
node->w_swr.wr_sgl = node->w_sgl;
ibt_status = ibt_map_mem_iov(state->id_hca_hdl, &iov_attr,
(ibt_all_wr_t *)&node->w_swr, &node->w_mi_hdl);
if (ibt_status != IBT_SUCCESS) {
ibd_print_warn(state, "ibd_send: ibt_map_mem_iov "
"failed, nmblks=%d, ret=%d\n", nmblks, ibt_status);
goto ibd_copy_path;
}
return (0);
}
ibd_copy_path:
if (pktsize <= state->id_tx_buf_sz) {
node->swqe_copybuf.ic_sgl.ds_len = pktsize;
node->w_swr.wr_nds = 1;
node->w_swr.wr_sgl = &node->swqe_copybuf.ic_sgl;
node->w_buftype = IBD_WQE_TXBUF;
bufp = (uchar_t *)(uintptr_t)node->w_swr.wr_sgl->ds_va;
for (nmp = data_mp; nmp != NULL; nmp = nmp->b_cont) {
blksize = MBLKL(nmp) - pending_hdr;
bcopy(nmp->b_rptr + pending_hdr, bufp, blksize);
bufp += blksize;
pending_hdr = 0;
}
return (0);
}
node->w_swr.wr_sgl = node->w_sgl;
if (ibd_acquire_lsobufs(state, pktsize,
node->w_swr.wr_sgl, &(node->w_swr.wr_nds)) != 0) {
DPRINT(10, "ibd_prepare_sgl: lso bufs acquire failed");
return (-1);
}
node->w_buftype = IBD_WQE_LSOBUF;
nmp = data_mp;
skip = pending_hdr;
for (i = 0; i < node->w_swr.wr_nds; i++) {
sgl = node->w_swr.wr_sgl + i;
bufp = (uchar_t *)(uintptr_t)sgl->ds_va;
avail = IBD_LSO_BUFSZ;
while (nmp && avail) {
blksize = MBLKL(nmp) - skip;
if (blksize > avail) {
bcopy(nmp->b_rptr + skip, bufp, avail);
skip += avail;
avail = 0;
} else {
bcopy(nmp->b_rptr + skip, bufp, blksize);
skip = 0;
avail -= blksize;
bufp += blksize;
nmp = nmp->b_cont;
}
}
}
return (0);
}
static int
ibd_sched_poll(ibd_state_t *state, int resource_type, int q_flag)
{
ibd_req_t *req;
mutex_enter(&state->id_sched_lock);
state->id_sched_needed |= resource_type;
mutex_exit(&state->id_sched_lock);
if (q_flag) {
req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP);
if (req == NULL)
return (-1);
ibd_queue_work_slot(state, req, IBD_ASYNC_SCHED);
}
return (0);
}
static boolean_t
ibd_send(ibd_state_t *state, mblk_t *mp)
{
ibd_ace_t *ace;
ibd_swqe_t *node;
ipoib_mac_t *dest;
ib_header_info_t *ipibp;
ip6_t *ip6h;
uint_t pktsize;
uint32_t mss;
uint32_t hckflags;
uint32_t lsoflags = 0;
uint_t lsohdr_sz = 0;
int ret, len;
boolean_t dofree = B_FALSE;
boolean_t rc;
ibd_rc_chan_t *rc_chan;
int nmblks;
mblk_t *nmp;
if ((state->id_mac_state & IBD_DRV_STARTED) == 0)
return (B_FALSE);
ipibp = (ib_header_info_t *)mp->b_rptr;
dest = (ipoib_mac_t *)&ipibp->ib_dst;
if ((ntohl(dest->ipoib_qpn) & IB_QPN_MASK) == IB_MC_QPN)
IBD_FILL_SCOPE_PKEY(dest, state->id_scope, state->id_pkey);
rc_chan = NULL;
ace = ibd_acache_lookup(state, dest, &ret, 1);
if (state->id_enable_rc && (ace != NULL) &&
(ace->ac_mac.ipoib_qpn != htonl(IB_MC_QPN))) {
if (ace->ac_chan == NULL) {
state->rc_null_conn++;
} else {
if (ace->ac_chan->chan_state ==
IBD_RC_STATE_ACT_ESTAB) {
rc_chan = ace->ac_chan;
rc_chan->is_used = B_TRUE;
mutex_enter(&rc_chan->tx_wqe_list.dl_mutex);
node = WQE_TO_SWQE(
rc_chan->tx_wqe_list.dl_head);
if (node != NULL) {
rc_chan->tx_wqe_list.dl_cnt -= 1;
rc_chan->tx_wqe_list.dl_head =
node->swqe_next;
} else {
node = ibd_rc_acquire_swqes(rc_chan);
}
mutex_exit(&rc_chan->tx_wqe_list.dl_mutex);
if (node == NULL) {
state->rc_swqe_short++;
mutex_enter(&state->id_sched_lock);
state->id_sched_needed |=
IBD_RSRC_RC_SWQE;
mutex_exit(&state->id_sched_lock);
ibd_dec_ref_ace(state, ace);
return (B_FALSE);
}
} else {
state->rc_no_estab_conn++;
}
}
}
if (rc_chan == NULL) {
mutex_enter(&state->id_tx_list.dl_mutex);
node = WQE_TO_SWQE(state->id_tx_list.dl_head);
if (node != NULL) {
state->id_tx_list.dl_cnt -= 1;
state->id_tx_list.dl_head = node->swqe_next;
} else {
node = ibd_acquire_swqe(state);
}
mutex_exit(&state->id_tx_list.dl_mutex);
if (node == NULL) {
if (ibd_sched_poll(state, IBD_RSRC_SWQE, 0) == 0) {
if (ace != NULL) {
ibd_dec_ref_ace(state, ace);
}
return (B_FALSE);
}
ibd_print_warn(state, "ibd_send: no swqe, pkt drop");
if (ace != NULL) {
ibd_dec_ref_ace(state, ace);
}
return (B_TRUE);
}
}
node->swqe_im_mblk = NULL;
node->w_swr.wr_nds = 0;
node->w_swr.wr_sgl = NULL;
node->w_swr.wr_opcode = IBT_WRC_SEND;
pktsize = 0;
for (nmblks = 0, nmp = mp; nmp != NULL;
nmp = nmp->b_cont, nmblks++) {
pktsize += MBLKL(nmp);
}
if (bcmp(&ipibp->ib_dst, &state->id_bcaddr, IPOIB_ADDRL) == 0)
atomic_inc_64(&state->id_brd_xmt);
else if ((ntohl(ipibp->ib_dst.ipoib_qpn) & IB_QPN_MASK) == IB_MC_QPN)
atomic_inc_64(&state->id_multi_xmt);
if (ace != NULL) {
node->w_ahandle = ace;
node->w_swr.wr.ud.udwr_dest = ace->ac_dest;
} else {
DPRINT(5,
"ibd_send: acache lookup %s for %08X:%08X:%08X:%08X:%08X",
((ret == EFAULT) ? "failed" : "queued"),
htonl(dest->ipoib_qpn), htonl(dest->ipoib_gidpref[0]),
htonl(dest->ipoib_gidpref[1]),
htonl(dest->ipoib_gidsuff[0]),
htonl(dest->ipoib_gidsuff[1]));
state->rc_ace_not_found++;
node->w_ahandle = NULL;
if (ret == EFAULT) {
dofree = B_TRUE;
rc = B_TRUE;
} else if (ibd_sched_poll(state, IBD_RSRC_SWQE, 1) != 0) {
dofree = B_TRUE;
rc = B_TRUE;
} else {
dofree = B_FALSE;
rc = B_FALSE;
}
goto ibd_send_fail;
}
if (ntohs(ipibp->ipib_rhdr.ipoib_type) == ETHERTYPE_IPV6) {
if (MBLKL(mp) < sizeof (ib_header_info_t) + IPV6_HDR_LEN) {
if (!pullupmsg(mp, IPV6_HDR_LEN +
sizeof (ib_header_info_t))) {
DPRINT(10, "ibd_send: pullupmsg failure ");
dofree = B_TRUE;
rc = B_TRUE;
goto ibd_send_fail;
}
ipibp = (ib_header_info_t *)mp->b_rptr;
}
ip6h = (ip6_t *)((uchar_t *)ipibp +
sizeof (ib_header_info_t));
len = ntohs(ip6h->ip6_plen);
if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
mblk_t *pad;
pad = allocb(4, 0);
pad->b_wptr = (uchar_t *)pad->b_rptr + 4;
linkb(mp, pad);
if (MBLKL(mp) < sizeof (ib_header_info_t) +
IPV6_HDR_LEN + len + 4) {
if (!pullupmsg(mp, sizeof (ib_header_info_t) +
IPV6_HDR_LEN + len + 4)) {
DPRINT(10, "ibd_send: pullupmsg "
"failure ");
dofree = B_TRUE;
rc = B_TRUE;
goto ibd_send_fail;
}
ip6h = (ip6_t *)((uchar_t *)mp->b_rptr +
sizeof (ib_header_info_t));
}
IBD_PAD_NSNA(ip6h, len, IBD_SEND);
}
}
ASSERT(mp->b_wptr - mp->b_rptr >= sizeof (ib_addrs_t));
mp->b_rptr += sizeof (ib_addrs_t);
pktsize -= sizeof (ib_addrs_t);
if (rc_chan) {
ibt_iov_t iov_arr[IBD_MAX_SQSEG];
ibt_iov_attr_t iov_attr;
uint_t i;
size_t blksize;
uchar_t *bufp;
ibd_rc_tx_largebuf_t *lbufp;
atomic_add_64(&state->rc_xmt_bytes, pktsize);
ASSERT(node->w_swr.wr_trans == IBT_RC_SRV);
if (pktsize <= state->id_rc_tx_copy_thresh) {
atomic_inc_64(&state->rc_xmt_small_pkt);
node->swqe_copybuf.ic_sgl.ds_len = pktsize;
node->w_swr.wr_nds = 1;
node->w_swr.wr_sgl = &node->swqe_copybuf.ic_sgl;
node->w_buftype = IBD_WQE_TXBUF;
bufp = (uchar_t *)(uintptr_t)node->w_swr.wr_sgl->ds_va;
for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) {
blksize = MBLKL(nmp);
bcopy(nmp->b_rptr, bufp, blksize);
bufp += blksize;
}
freemsg(mp);
ASSERT(node->swqe_im_mblk == NULL);
} else {
if ((state->rc_enable_iov_map) &&
(nmblks < state->rc_max_sqseg_hiwm)) {
iov_attr.iov_as = NULL;
iov_attr.iov = iov_arr;
iov_attr.iov_buf = NULL;
iov_attr.iov_wr_nds = state->rc_tx_max_sqseg;
iov_attr.iov_lso_hdr_sz = 0;
iov_attr.iov_flags = IBT_IOV_SLEEP;
i = 0;
for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) {
iov_arr[i].iov_len = MBLKL(nmp);
if (iov_arr[i].iov_len != 0) {
iov_arr[i].iov_addr = (caddr_t)
(void *)nmp->b_rptr;
i++;
}
}
iov_attr.iov_list_len = i;
node->w_swr.wr_sgl = node->w_sgl;
ret = ibt_map_mem_iov(state->id_hca_hdl,
&iov_attr, (ibt_all_wr_t *)&node->w_swr,
&node->w_mi_hdl);
if (ret != IBT_SUCCESS) {
atomic_inc_64(
&state->rc_xmt_map_fail_pkt);
DPRINT(30, "ibd_send: ibt_map_mem_iov("
") failed, nmblks=%d, real_nmblks"
"=%d, ret=0x%x", nmblks, i, ret);
goto ibd_rc_large_copy;
}
atomic_inc_64(&state->rc_xmt_map_succ_pkt);
node->w_buftype = IBD_WQE_MAPPED;
node->swqe_im_mblk = mp;
} else {
atomic_inc_64(&state->rc_xmt_fragmented_pkt);
ibd_rc_large_copy:
mutex_enter(&state->rc_tx_large_bufs_lock);
if (state->rc_tx_largebuf_nfree == 0) {
state->rc_xmt_buf_short++;
mutex_exit
(&state->rc_tx_large_bufs_lock);
mutex_enter(&state->id_sched_lock);
state->id_sched_needed |=
IBD_RSRC_RC_TX_LARGEBUF;
mutex_exit(&state->id_sched_lock);
dofree = B_FALSE;
rc = B_FALSE;
node->w_buftype = IBD_WQE_TXBUF;
goto ibd_send_fail;
}
lbufp = state->rc_tx_largebuf_free_head;
ASSERT(lbufp->lb_buf != NULL);
state->rc_tx_largebuf_free_head =
lbufp->lb_next;
lbufp->lb_next = NULL;
state->rc_tx_largebuf_nfree --;
mutex_exit(&state->rc_tx_large_bufs_lock);
bufp = lbufp->lb_buf;
node->w_sgl[0].ds_va =
(ib_vaddr_t)(uintptr_t)bufp;
node->w_sgl[0].ds_key =
state->rc_tx_mr_desc.md_lkey;
node->w_sgl[0].ds_len = pktsize;
node->w_swr.wr_sgl = node->w_sgl;
node->w_swr.wr_nds = 1;
node->w_buftype = IBD_WQE_RC_COPYBUF;
node->w_rc_tx_largebuf = lbufp;
for (nmp = mp; nmp != NULL; nmp = nmp->b_cont) {
blksize = MBLKL(nmp);
if (blksize != 0) {
bcopy(nmp->b_rptr, bufp,
blksize);
bufp += blksize;
}
}
freemsg(mp);
ASSERT(node->swqe_im_mblk == NULL);
}
}
node->swqe_next = NULL;
mutex_enter(&rc_chan->tx_post_lock);
if (rc_chan->tx_busy) {
if (rc_chan->tx_head) {
rc_chan->tx_tail->swqe_next =
SWQE_TO_WQE(node);
} else {
rc_chan->tx_head = node;
}
rc_chan->tx_tail = node;
mutex_exit(&rc_chan->tx_post_lock);
} else {
rc_chan->tx_busy = 1;
mutex_exit(&rc_chan->tx_post_lock);
ibd_rc_post_send(rc_chan, node);
}
return (B_TRUE);
}
if ((state->id_enable_rc) && (pktsize > state->id_mtu)) {
if (ace->ac_mac.ipoib_qpn != htonl(IB_MC_QPN)) {
ibd_req_t *req;
mutex_enter(&ace->tx_too_big_mutex);
if (ace->tx_too_big_ongoing) {
mutex_exit(&ace->tx_too_big_mutex);
state->rc_xmt_reenter_too_long_pkt++;
dofree = B_TRUE;
} else {
ace->tx_too_big_ongoing = B_TRUE;
mutex_exit(&ace->tx_too_big_mutex);
state->rc_xmt_icmp_too_long_pkt++;
req = kmem_cache_alloc(state->id_req_kmc,
KM_NOSLEEP);
if (req == NULL) {
ibd_print_warn(state, "ibd_send: alloc "
"ibd_req_t fail");
dofree = B_TRUE;
} else {
req->rq_ptr = mp;
req->rq_ptr2 = ace;
ibd_queue_work_slot(state, req,
IBD_ASYNC_RC_TOO_BIG);
dofree = B_FALSE;
}
}
} else {
ibd_print_warn(state, "Reliable Connected mode is on. "
"Multicast packet length %d > %d is too long to "
"send packet (%d > %d), drop it",
pktsize, state->id_mtu);
state->rc_xmt_drop_too_long_pkt++;
dofree = B_TRUE;
}
rc = B_TRUE;
goto ibd_send_fail;
}
atomic_add_64(&state->id_xmt_bytes, pktsize);
atomic_inc_64(&state->id_xmt_pkt);
mac_lso_get(mp, &mss, &lsoflags);
if ((lsoflags & HW_LSO) != HW_LSO) {
node->w_swr.wr_opcode = IBT_WRC_SEND;
lsohdr_sz = 0;
} else {
if (ibd_setup_lso(node, mp, mss, ace->ac_dest) != 0) {
ibd_print_warn(state,
"ibd_send: no memory, lso posting failed");
dofree = B_TRUE;
rc = B_TRUE;
goto ibd_send_fail;
}
node->w_swr.wr_opcode = IBT_WRC_SEND_LSO;
lsohdr_sz = (node->w_swr.wr.ud_lso).lso_hdr_sz;
}
mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &hckflags);
if ((hckflags & HCK_FULLCKSUM) == HCK_FULLCKSUM)
node->w_swr.wr_flags |= IBT_WR_SEND_CKSUM;
else
node->w_swr.wr_flags &= ~IBT_WR_SEND_CKSUM;
if (ibd_prepare_sgl(state, mp, node, lsohdr_sz) != 0) {
if (ibd_sched_poll(state, IBD_RSRC_LSOBUF, 1) != 0) {
dofree = B_TRUE;
rc = B_TRUE;
} else {
dofree = B_FALSE;
rc = B_FALSE;
}
goto ibd_send_fail;
}
node->swqe_im_mblk = mp;
node->swqe_next = NULL;
mutex_enter(&state->id_txpost_lock);
if (state->id_tx_busy) {
if (state->id_tx_head) {
state->id_tx_tail->swqe_next =
SWQE_TO_WQE(node);
} else {
state->id_tx_head = node;
}
state->id_tx_tail = node;
mutex_exit(&state->id_txpost_lock);
} else {
state->id_tx_busy = 1;
mutex_exit(&state->id_txpost_lock);
ibd_post_send(state, node);
}
return (B_TRUE);
ibd_send_fail:
if (node && mp)
ibd_free_lsohdr(node, mp);
if (dofree)
freemsg(mp);
if (node != NULL) {
if (rc_chan) {
ibd_rc_tx_cleanup(node);
} else {
ibd_tx_cleanup(state, node);
}
}
return (rc);
}
static mblk_t *
ibd_m_tx(void *arg, mblk_t *mp)
{
ibd_state_t *state = (ibd_state_t *)arg;
mblk_t *next;
if (state->id_type == IBD_PORT_DRIVER) {
freemsgchain(mp);
return (NULL);
}
if ((state->id_link_state != LINK_STATE_UP) ||
!(state->id_mac_state & IBD_DRV_STARTED)) {
freemsgchain(mp);
mp = NULL;
}
while (mp != NULL) {
next = mp->b_next;
mp->b_next = NULL;
if (ibd_send(state, mp) == B_FALSE) {
mp->b_next = next;
break;
}
mp = next;
}
return (mp);
}
static uint_t
ibd_intr(caddr_t arg)
{
ibd_state_t *state = (ibd_state_t *)arg;
ibd_poll_rcq(state, state->id_rcq_hdl);
return (DDI_INTR_CLAIMED);
}
static void
ibd_drain_scq(ibd_state_t *state, ibt_cq_hdl_t cq_hdl)
{
ibt_wc_t *wcs = state->id_txwcs;
uint_t numwcs = state->id_txwcs_size;
ibd_wqe_t *wqe;
ibd_swqe_t *head, *tail;
ibt_wc_t *wc;
uint_t num_polled;
int i;
while (ibt_poll_cq(cq_hdl, wcs, numwcs, &num_polled) == IBT_SUCCESS) {
head = tail = NULL;
for (i = 0, wc = wcs; i < num_polled; i++, wc++) {
wqe = (ibd_wqe_t *)(uintptr_t)wc->wc_id;
if (wc->wc_status != IBT_WC_SUCCESS) {
if (wc->wc_status == IBT_WC_WR_FLUSHED_ERR) {
DPRINT(5, "ibd_drain_scq: flush error");
DPRINT(10, "ibd_drain_scq: Bad "
"status %d", wc->wc_status);
} else {
DPRINT(10, "ibd_drain_scq: "
"unexpected wc_status %d",
wc->wc_status);
}
}
if (head)
tail->swqe_next = wqe;
else
head = WQE_TO_SWQE(wqe);
tail = WQE_TO_SWQE(wqe);
}
tail->swqe_next = NULL;
ibd_tx_cleanup_list(state, head, tail);
ibd_resume_transmission(state);
}
}
static void
ibd_drain_rcq(ibd_state_t *state, ibt_cq_hdl_t cq_hdl)
{
ibt_wc_t *wcs = state->id_rxwcs;
uint_t numwcs = state->id_rxwcs_size;
ibd_rwqe_t *rwqe;
ibt_wc_t *wc;
uint_t num_polled;
int i;
mblk_t *head, *tail, *mp;
while (ibt_poll_cq(cq_hdl, wcs, numwcs, &num_polled) == IBT_SUCCESS) {
head = tail = NULL;
for (i = 0, wc = wcs; i < num_polled; i++, wc++) {
rwqe = (ibd_rwqe_t *)(uintptr_t)wc->wc_id;
if (wc->wc_status != IBT_WC_SUCCESS) {
if (wc->wc_status == IBT_WC_WR_FLUSHED_ERR) {
DPRINT(5, "ibd_drain_rcq: "
"expected flushed rwqe");
} else {
DPRINT(5, "ibd_drain_rcq: "
"unexpected wc_status %d",
wc->wc_status);
}
atomic_inc_32(
&state->id_rx_list.dl_bufs_outstanding);
freemsg(rwqe->rwqe_im_mblk);
continue;
}
mp = ibd_process_rx(state, rwqe, wc);
if (mp == NULL)
continue;
if (head)
tail->b_next = mp;
else
head = mp;
tail = mp;
}
if (head)
mac_rx(state->id_mh, state->id_rh, head);
if (atomic_add_32_nv(&state->id_rx_list.dl_cnt, -num_polled) <
(state->id_ud_num_rwqe / 4))
ibd_post_recv_intr(state);
}
}
static void
ibd_poll_scq(ibd_state_t *state, ibt_cq_hdl_t cq_hdl)
{
int flag, redo_flag;
int redo = 1;
flag = IBD_CQ_POLLING;
redo_flag = IBD_REDO_CQ_POLLING;
mutex_enter(&state->id_scq_poll_lock);
if (state->id_scq_poll_busy & flag) {
ibd_print_warn(state, "ibd_poll_scq: multiple polling threads");
state->id_scq_poll_busy |= redo_flag;
mutex_exit(&state->id_scq_poll_lock);
return;
}
state->id_scq_poll_busy |= flag;
mutex_exit(&state->id_scq_poll_lock);
ibd_drain_scq(state, cq_hdl);
do {
if (ibt_enable_cq_notify(cq_hdl, IBT_NEXT_COMPLETION) !=
IBT_SUCCESS) {
DPRINT(10, "ibd_intr: ibt_enable_cq_notify() failed");
}
ibd_drain_scq(state, cq_hdl);
mutex_enter(&state->id_scq_poll_lock);
if (state->id_scq_poll_busy & redo_flag)
state->id_scq_poll_busy &= ~redo_flag;
else {
state->id_scq_poll_busy &= ~flag;
redo = 0;
}
mutex_exit(&state->id_scq_poll_lock);
} while (redo);
}
static void
ibd_poll_rcq(ibd_state_t *state, ibt_cq_hdl_t rcq)
{
int flag, redo_flag;
int redo = 1;
flag = IBD_CQ_POLLING;
redo_flag = IBD_REDO_CQ_POLLING;
mutex_enter(&state->id_rcq_poll_lock);
if (state->id_rcq_poll_busy & flag) {
ibd_print_warn(state, "ibd_poll_rcq: multiple polling threads");
state->id_rcq_poll_busy |= redo_flag;
mutex_exit(&state->id_rcq_poll_lock);
return;
}
state->id_rcq_poll_busy |= flag;
mutex_exit(&state->id_rcq_poll_lock);
ibd_drain_rcq(state, rcq);
do {
if (ibt_enable_cq_notify(rcq, IBT_NEXT_COMPLETION) !=
IBT_SUCCESS) {
DPRINT(10, "ibd_intr: ibt_enable_cq_notify() failed");
}
ibd_drain_rcq(state, rcq);
mutex_enter(&state->id_rcq_poll_lock);
if (state->id_rcq_poll_busy & redo_flag)
state->id_rcq_poll_busy &= ~redo_flag;
else {
state->id_rcq_poll_busy &= ~flag;
redo = 0;
}
mutex_exit(&state->id_rcq_poll_lock);
} while (redo);
}
void
ibd_unmap_mem(ibd_state_t *state, ibd_swqe_t *swqe)
{
ibt_status_t stat;
DPRINT(20, "ibd_unmap_mem: wqe=%p, seg=%d\n", swqe, swqe->w_swr.wr_nds);
if (swqe->w_mi_hdl) {
if ((stat = ibt_unmap_mem_iov(state->id_hca_hdl,
swqe->w_mi_hdl)) != IBT_SUCCESS) {
DPRINT(10,
"failed in ibt_unmap_mem_iov, ret=%d\n", stat);
}
swqe->w_mi_hdl = NULL;
}
swqe->w_swr.wr_nds = 0;
}
void
ibd_dec_ref_ace(ibd_state_t *state, ibd_ace_t *ace)
{
if (DEC_REF_DO_CYCLE(ace)) {
ibd_mce_t *mce;
mutex_enter(&state->id_ac_mutex);
mce = ace->ac_mce;
if (GET_REF_CYCLE(ace) == 0) {
CLEAR_REFCYCLE(ace);
if (mce != NULL) {
ace->ac_mce = NULL;
IBD_ACACHE_PULLOUT_ACTIVE(state, ace);
ibd_queue_work_slot(state,
&mce->mc_req, IBD_ASYNC_REAP);
}
IBD_ACACHE_INSERT_FREE(state, ace);
}
mutex_exit(&state->id_ac_mutex);
}
}
static void
ibd_tx_cleanup(ibd_state_t *state, ibd_swqe_t *swqe)
{
ibd_ace_t *ace = swqe->w_ahandle;
DPRINT(20, "ibd_tx_cleanup %p\n", swqe);
if (swqe->swqe_im_mblk != NULL) {
if (swqe->w_buftype == IBD_WQE_MAPPED) {
ibd_unmap_mem(state, swqe);
} else if (swqe->w_buftype == IBD_WQE_LSOBUF) {
ibd_release_lsobufs(state,
swqe->w_swr.wr_sgl, swqe->w_swr.wr_nds);
}
ibd_free_lsohdr(swqe, swqe->swqe_im_mblk);
freemsg(swqe->swqe_im_mblk);
swqe->swqe_im_mblk = NULL;
}
if (ace != NULL) {
ibd_dec_ref_ace(state, ace);
}
swqe->swqe_next = NULL;
ibd_release_swqe(state, swqe, swqe, 1);
}
static void
ibd_tx_cleanup_list(ibd_state_t *state, ibd_swqe_t *head, ibd_swqe_t *tail)
{
ibd_ace_t *ace;
ibd_swqe_t *swqe;
int n = 0;
DPRINT(20, "ibd_tx_cleanup_list %p %p\n", head, tail);
for (swqe = head; swqe != NULL; swqe = WQE_TO_SWQE(swqe->swqe_next)) {
if (swqe->swqe_im_mblk != NULL) {
if (swqe->w_buftype == IBD_WQE_MAPPED) {
ibd_unmap_mem(state, swqe);
} else if (swqe->w_buftype == IBD_WQE_LSOBUF) {
ibd_release_lsobufs(state,
swqe->w_swr.wr_sgl, swqe->w_swr.wr_nds);
}
ibd_free_lsohdr(swqe, swqe->swqe_im_mblk);
freemsg(swqe->swqe_im_mblk);
swqe->swqe_im_mblk = NULL;
}
ace = swqe->w_ahandle;
if (ace != NULL) {
ibd_dec_ref_ace(state, ace);
}
n++;
}
ibd_release_swqe(state, head, tail, n);
}
static mblk_t *
ibd_process_rx(ibd_state_t *state, ibd_rwqe_t *rwqe, ibt_wc_t *wc)
{
ib_header_info_t *phdr;
mblk_t *mp;
ipoib_hdr_t *ipibp;
ipha_t *iphap;
ip6_t *ip6h;
int len;
ib_msglen_t pkt_len = wc->wc_bytes_xfer;
uint32_t bufs;
bufs = atomic_inc_32_nv(&state->id_rx_list.dl_bufs_outstanding);
if (bufs >= state->id_rx_bufs_outstanding_limit) {
atomic_dec_32(&state->id_rx_list.dl_bufs_outstanding);
atomic_inc_32(&state->id_rx_allocb);
mp = allocb(pkt_len, BPRI_HI);
if (mp) {
bcopy(rwqe->rwqe_im_mblk->b_rptr, mp->b_rptr, pkt_len);
ibd_post_recv(state, rwqe);
} else {
atomic_inc_32(&state->id_rx_allocb_failed);
ibd_post_recv(state, rwqe);
return (NULL);
}
} else {
mp = rwqe->rwqe_im_mblk;
}
mp->b_wptr = mp->b_rptr + pkt_len;
if (mp->b_next != NULL) {
ibd_print_warn(state,
"ibd_process_rx: got duplicate mp from rcq?");
mp->b_next = NULL;
}
phdr = (ib_header_info_t *)mp->b_rptr;
if (wc->wc_flags & IBT_WC_GRH_PRESENT) {
phdr->ib_grh.ipoib_sqpn = htonl(wc->wc_qpn);
if (state->id_enable_rc) {
if (bcmp(&phdr->ib_grh.ipoib_sqpn,
&state->rc_macaddr_loopback,
IPOIB_ADDRL) == 0) {
freemsg(mp);
return (NULL);
}
} else {
if (bcmp(&phdr->ib_grh.ipoib_sqpn, &state->id_macaddr,
IPOIB_ADDRL) == 0) {
freemsg(mp);
return (NULL);
}
}
ovbcopy(&phdr->ib_grh.ipoib_sqpn, &phdr->ib_src,
sizeof (ipoib_mac_t));
if (*(uint8_t *)(phdr->ib_grh.ipoib_dgid_pref) == 0xFF) {
phdr->ib_dst.ipoib_qpn = htonl(IB_MC_QPN);
IBD_CLEAR_SCOPE_PKEY(&phdr->ib_dst);
} else {
phdr->ib_dst.ipoib_qpn = state->id_macaddr.ipoib_qpn;
}
} else {
phdr->ib_grh.ipoib_vertcflow = 0;
ovbcopy(&state->id_macaddr, &phdr->ib_dst,
sizeof (ipoib_mac_t));
}
ipibp = (ipoib_hdr_t *)((uchar_t *)mp->b_rptr + sizeof (ipoib_pgrh_t));
if (ntohs(ipibp->ipoib_type) == ETHERTYPE_IPV6) {
ip6h = (ip6_t *)((uchar_t *)ipibp + sizeof (ipoib_hdr_t));
len = ntohs(ip6h->ip6_plen);
if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
IBD_PAD_NSNA(ip6h, len, IBD_RECV);
}
}
atomic_add_64(&state->id_rcv_bytes, pkt_len);
atomic_inc_64(&state->id_rcv_pkt);
if (bcmp(&phdr->ib_dst, &state->id_bcaddr, IPOIB_ADDRL) == 0)
atomic_inc_64(&state->id_brd_rcv);
else if ((ntohl(phdr->ib_dst.ipoib_qpn) & IB_QPN_MASK) == IB_MC_QPN)
atomic_inc_64(&state->id_multi_rcv);
iphap = (ipha_t *)((uchar_t *)ipibp + sizeof (ipoib_hdr_t));
if (((wc->wc_flags & IBT_WC_CKSUM_OK) == IBT_WC_CKSUM_OK) &&
(wc->wc_cksum == 0xFFFF) &&
(iphap->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION)) {
mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM_OK);
}
return (mp);
}
static void
ibd_freemsg_cb(char *arg)
{
ibd_rwqe_t *rwqe = (ibd_rwqe_t *)arg;
ibd_state_t *state = rwqe->w_state;
atomic_dec_32(&state->id_rx_list.dl_bufs_outstanding);
if (atomic_add_32_nv(&state->id_running, 0) == 0) {
DPRINT(6, "ibd_freemsg: wqe being freed");
rwqe->rwqe_im_mblk = NULL;
ibd_free_rwqe(state, rwqe);
return;
}
rwqe->rwqe_im_mblk = desballoc(rwqe->rwqe_copybuf.ic_bufaddr,
state->id_mtu + IPOIB_GRH_SIZE, 0, &rwqe->w_freemsg_cb);
if (rwqe->rwqe_im_mblk == NULL) {
ibd_free_rwqe(state, rwqe);
DPRINT(6, "ibd_freemsg: desballoc failed");
return;
}
ibd_post_recv(state, rwqe);
}
static uint_t
ibd_tx_recycle(caddr_t arg)
{
ibd_state_t *state = (ibd_state_t *)arg;
ibd_poll_scq(state, state->id_scq_hdl);
return (DDI_INTR_CLAIMED);
}
#ifdef IBD_LOGGING
static void
ibd_log_init(void)
{
ibd_lbuf = kmem_zalloc(IBD_LOG_SZ, KM_SLEEP);
ibd_lbuf_ndx = 0;
mutex_init(&ibd_lbuf_lock, NULL, MUTEX_DRIVER, NULL);
}
static void
ibd_log_fini(void)
{
if (ibd_lbuf)
kmem_free(ibd_lbuf, IBD_LOG_SZ);
ibd_lbuf_ndx = 0;
ibd_lbuf = NULL;
mutex_destroy(&ibd_lbuf_lock);
}
static void
ibd_log(const char *fmt, ...)
{
va_list ap;
uint32_t off;
uint32_t msglen;
char tmpbuf[IBD_DMAX_LINE];
if (ibd_lbuf == NULL)
return;
va_start(ap, fmt);
msglen = vsnprintf(tmpbuf, IBD_DMAX_LINE, fmt, ap);
va_end(ap);
if (msglen >= IBD_DMAX_LINE)
msglen = IBD_DMAX_LINE - 1;
mutex_enter(&ibd_lbuf_lock);
off = ibd_lbuf_ndx;
if ((ibd_lbuf_ndx) && (ibd_lbuf[ibd_lbuf_ndx-1] != '\n'))
ibd_lbuf[ibd_lbuf_ndx-1] = '\n';
ibd_lbuf_ndx += msglen;
ibd_lbuf[ibd_lbuf_ndx] = 0;
if (ibd_lbuf_ndx >= (IBD_LOG_SZ - 2 * IBD_DMAX_LINE))
ibd_lbuf_ndx = 0;
mutex_exit(&ibd_lbuf_lock);
bcopy(tmpbuf, ibd_lbuf+off, msglen);
}
#endif
static int
ibd_create_partition(void *karg, intptr_t arg, int mode, cred_t *credp,
int *rvalp)
{
ibd_create_ioctl_t *cmd = karg;
ibd_state_t *state, *port_state, *p;
int i, err, rval = 0;
mac_register_t *macp;
ibt_hca_portinfo_t *pinfop = NULL;
ibt_status_t ibt_status;
uint_t psize, pinfosz;
boolean_t force_create = B_FALSE;
cmd->ibdioc.ioc_status = 0;
if (cmd->ibdioc.ioc_port_inst < 0) {
cmd->ibdioc.ioc_status = IBD_INVALID_PORT_INST;
return (EINVAL);
}
port_state = ddi_get_soft_state(ibd_list, cmd->ibdioc.ioc_port_inst);
if (port_state == NULL) {
DPRINT(10, "ibd_create_partition: failed to get state %d",
cmd->ibdioc.ioc_port_inst);
cmd->ibdioc.ioc_status = IBD_INVALID_PORT_INST;
return (EINVAL);
}
if (cmd->ioc_pkey <= IB_PKEY_INVALID_FULL) {
rval = EINVAL;
goto part_create_return;
}
if (cmd->ioc_force_create == 0) {
ibt_status = ibt_query_hca_ports(port_state->id_hca_hdl,
port_state->id_port, &pinfop, &psize, &pinfosz);
if ((ibt_status != IBT_SUCCESS) || (psize != 1)) {
rval = EINVAL;
goto part_create_return;
}
if (pinfop->p_linkstate != IBT_PORT_ACTIVE) {
rval = ENETDOWN;
cmd->ibdioc.ioc_status = IBD_PORT_IS_DOWN;
goto part_create_return;
}
for (i = 0; i < pinfop->p_pkey_tbl_sz; i++) {
if (pinfop->p_pkey_tbl[i] == cmd->ioc_pkey) {
break;
}
}
if (i == pinfop->p_pkey_tbl_sz) {
rval = EINVAL;
cmd->ibdioc.ioc_status = IBD_PKEY_NOT_PRESENT;
goto part_create_return;
}
} else {
force_create = B_TRUE;
}
mutex_enter(&ibd_objlist_lock);
for (p = ibd_objlist_head; p; p = p->id_next) {
if ((p->id_port_inst == cmd->ibdioc.ioc_port_inst) &&
(p->id_pkey == cmd->ioc_pkey) &&
(p->id_plinkid == cmd->ioc_partid)) {
mutex_exit(&ibd_objlist_lock);
rval = EEXIST;
cmd->ibdioc.ioc_status = IBD_PARTITION_EXISTS;
goto part_create_return;
}
}
mutex_exit(&ibd_objlist_lock);
state = kmem_zalloc(sizeof (ibd_state_t), KM_SLEEP);
state->id_type = IBD_PARTITION_OBJ;
state->id_plinkid = cmd->ioc_partid;
state->id_dlinkid = cmd->ibdioc.ioc_linkid;
state->id_port_inst = cmd->ibdioc.ioc_port_inst;
state->id_dip = port_state->id_dip;
state->id_port = port_state->id_port;
state->id_pkey = cmd->ioc_pkey;
state->id_hca_guid = port_state->id_hca_guid;
state->id_port_guid = port_state->id_port_guid;
state->id_force_create = force_create;
mutex_init(&state->id_macst_lock, NULL, MUTEX_DRIVER, NULL);
cv_init(&state->id_macst_cv, NULL, CV_DEFAULT, NULL);
if (ibd_part_attach(state, state->id_dip) != DDI_SUCCESS) {
rval = EIO;
cmd->ibdioc.ioc_status = IBD_NO_HW_RESOURCE;
goto fail;
}
if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
rval = EAGAIN;
goto fail;
}
macp->m_type_ident = MAC_PLUGIN_IDENT_IB;
macp->m_dip = port_state->id_dip;
macp->m_instance = (uint_t)-1;
macp->m_driver = state;
macp->m_src_addr = (uint8_t *)&state->id_macaddr;
macp->m_callbacks = &ibd_m_callbacks;
macp->m_min_sdu = 0;
macp->m_multicast_sdu = IBD_DEF_MAX_SDU;
if (state->id_enable_rc) {
macp->m_max_sdu = IBD_DEF_RC_MAX_SDU;
} else {
macp->m_max_sdu = IBD_DEF_MAX_SDU;
}
macp->m_priv_props = ibd_priv_props;
err = mac_register(macp, &state->id_mh);
mac_free(macp);
if (err != 0) {
DPRINT(10, "ibd_create_partition: mac_register() failed %d",
err);
rval = err;
goto fail;
}
err = dls_devnet_create(state->id_mh,
cmd->ioc_partid, crgetzoneid(credp));
if (err != 0) {
DPRINT(10, "ibd_create_partition: dls_devnet_create() failed "
"%d", err);
rval = err;
(void) mac_unregister(state->id_mh);
goto fail;
}
mutex_enter(&ibd_objlist_lock);
if (ibd_objlist_head)
state->id_next = ibd_objlist_head;
ibd_objlist_head = state;
mutex_exit(&ibd_objlist_lock);
part_create_return:
if (pinfop) {
ibt_free_portinfo(pinfop, pinfosz);
}
return (rval);
fail:
if (pinfop) {
ibt_free_portinfo(pinfop, pinfosz);
}
ibd_part_unattach(state);
kmem_free(state, sizeof (ibd_state_t));
return (rval);
}
static int
ibd_delete_partition(void *karg, intptr_t arg, int mode, cred_t *credp,
int *rvalp)
{
int err;
datalink_id_t tmpid;
ibd_state_t *node, *prev;
ibd_delete_ioctl_t *cmd = karg;
prev = NULL;
mutex_enter(&ibd_objlist_lock);
node = ibd_objlist_head;
while (node != NULL) {
if (node->id_plinkid == cmd->ioc_partid)
break;
prev = node;
node = node->id_next;
}
if (node == NULL) {
mutex_exit(&ibd_objlist_lock);
return (ENOENT);
}
if ((err = dls_devnet_destroy(node->id_mh, &tmpid, B_TRUE)) != 0) {
DPRINT(10, "ibd_delete_partition: dls_devnet_destroy() failed "
"%d", err);
mutex_exit(&ibd_objlist_lock);
return (err);
}
ibd_set_mac_progress(node, IBD_DRV_DELETE_IN_PROGRESS);
err = 0;
if ((node->id_mac_state & IBD_DRV_STARTED) ||
(node->id_mac_state & IBD_DRV_IN_LATE_HCA_INIT) ||
(ibd_part_busy(node) != DDI_SUCCESS) ||
((err = mac_disable(node->id_mh)) != 0)) {
(void) dls_devnet_create(node->id_mh, cmd->ioc_partid,
crgetzoneid(credp));
ibd_clr_mac_progress(node, IBD_DRV_DELETE_IN_PROGRESS);
mutex_exit(&ibd_objlist_lock);
return (err != 0 ? err : EBUSY);
}
node->id_mac_state |= IBD_DRV_IN_DELETION;
ibd_part_unattach(node);
ibd_clr_mac_progress(node, IBD_DRV_DELETE_IN_PROGRESS);
if (prev == NULL)
ibd_objlist_head = node->id_next;
else
prev->id_next = node->id_next;
mutex_exit(&ibd_objlist_lock);
if ((err = mac_unregister(node->id_mh)) != 0) {
DPRINT(10, "ibd_delete_partition: mac_unregister() failed %d",
err);
}
cv_destroy(&node->id_macst_cv);
mutex_destroy(&node->id_macst_lock);
kmem_free(node, sizeof (ibd_state_t));
return (0);
}
static int
ibd_get_partition_info(void *karg, intptr_t arg, int mode, cred_t *cred,
int *rvalp)
{
ibd_ioctl_t cmd;
ibpart_ioctl_t partioc;
ibport_ioctl_t portioc;
#ifdef _MULTI_DATAMODEL
ibport_ioctl32_t portioc32;
#endif
ibd_state_t *state, *port_state;
int size;
ibt_hca_portinfo_t *pinfop = NULL;
ibt_status_t ibt_status;
uint_t psize, pinfosz;
int rval = 0;
size = sizeof (ibd_ioctl_t);
if (ddi_copyin((void *)arg, &cmd, size, mode)) {
return (EFAULT);
}
cmd.ioc_status = 0;
switch (cmd.ioc_info_cmd) {
case IBD_INFO_CMD_IBPART:
size = sizeof (ibpart_ioctl_t);
if (ddi_copyin((void *)arg, &partioc, size, mode)) {
return (EFAULT);
}
mutex_enter(&ibd_objlist_lock);
for (state = ibd_objlist_head; state; state = state->id_next) {
if (state->id_plinkid == cmd.ioc_linkid) {
break;
}
}
if (state == NULL) {
mutex_exit(&ibd_objlist_lock);
return (ENOENT);
}
partioc.ibdioc.ioc_linkid = state->id_dlinkid;
partioc.ibdioc.ioc_port_inst = state->id_port_inst;
partioc.ibdioc.ioc_portnum = state->id_port;
partioc.ibdioc.ioc_hcaguid = state->id_hca_guid;
partioc.ibdioc.ioc_portguid = state->id_port_guid;
partioc.ibdioc.ioc_status = 0;
partioc.ioc_partid = state->id_plinkid;
partioc.ioc_pkey = state->id_pkey;
partioc.ioc_force_create = state->id_force_create;
if (ddi_copyout((void *)&partioc, (void *)arg, size, mode)) {
mutex_exit(&ibd_objlist_lock);
return (EFAULT);
}
mutex_exit(&ibd_objlist_lock);
break;
case IBD_INFO_CMD_IBPORT:
if ((cmd.ioc_port_inst < 0) || ((port_state =
ddi_get_soft_state(ibd_list, cmd.ioc_port_inst)) == NULL)) {
DPRINT(10, "ibd_create_partition: failed to get"
" state %d", cmd.ioc_port_inst);
size = sizeof (ibd_ioctl_t);
cmd.ioc_status = IBD_INVALID_PORT_INST;
if (ddi_copyout((void *)&cmd, (void *)arg, size,
mode)) {
return (EFAULT);
}
return (EINVAL);
}
ibt_status = ibt_query_hca_ports(port_state->id_hca_hdl,
port_state->id_port, &pinfop, &psize, &pinfosz);
if ((ibt_status != IBT_SUCCESS) || (psize != 1)) {
return (EINVAL);
}
#ifdef _MULTI_DATAMODEL
switch (ddi_model_convert_from(mode & FMODELS)) {
case DDI_MODEL_ILP32: {
size = sizeof (ibport_ioctl32_t);
if (ddi_copyin((void *)arg, &portioc32, size, mode)) {
rval = EFAULT;
goto fail;
}
portioc32.ibdioc.ioc_status = 0;
portioc32.ibdioc.ioc_portnum = port_state->id_port;
portioc32.ibdioc.ioc_hcaguid =
port_state->id_hca_guid;
portioc32.ibdioc.ioc_portguid =
port_state->id_port_guid;
if (portioc32.ioc_pkey_tbl_sz !=
pinfop->p_pkey_tbl_sz) {
rval = EINVAL;
size = sizeof (ibd_ioctl_t);
portioc32.ibdioc.ioc_status =
IBD_INVALID_PKEY_TBL_SIZE;
if (ddi_copyout((void *)&portioc32.ibdioc,
(void *)arg, size, mode)) {
rval = EFAULT;
goto fail;
}
goto fail;
}
size = pinfop->p_pkey_tbl_sz * sizeof (ib_pkey_t);
if (ddi_copyout((void *)pinfop->p_pkey_tbl,
(void *)(uintptr_t)portioc32.ioc_pkeys, size,
mode)) {
rval = EFAULT;
goto fail;
}
size = sizeof (ibport_ioctl32_t);
if (ddi_copyout((void *)&portioc32, (void *)arg, size,
mode)) {
rval = EFAULT;
goto fail;
}
break;
}
case DDI_MODEL_NONE:
size = sizeof (ibport_ioctl_t);
if (ddi_copyin((void *)arg, &portioc, size, mode)) {
rval = EFAULT;
goto fail;
}
portioc.ibdioc.ioc_status = 0;
portioc.ibdioc.ioc_portnum = port_state->id_port;
portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
if (portioc.ioc_pkey_tbl_sz != pinfop->p_pkey_tbl_sz) {
rval = EINVAL;
size = sizeof (ibd_ioctl_t);
portioc.ibdioc.ioc_status =
IBD_INVALID_PKEY_TBL_SIZE;
if (ddi_copyout((void *)&portioc.ibdioc,
(void *)arg, size, mode)) {
rval = EFAULT;
goto fail;
}
goto fail;
}
size = pinfop->p_pkey_tbl_sz * sizeof (ib_pkey_t);
if (ddi_copyout((void *)pinfop->p_pkey_tbl,
(void *)(portioc.ioc_pkeys), size, mode)) {
rval = EFAULT;
goto fail;
}
size = sizeof (ibport_ioctl_t);
if (ddi_copyout((void *)&portioc, (void *)arg, size,
mode)) {
rval = EFAULT;
goto fail;
}
break;
}
#else
size = sizeof (ibport_ioctl_t);
if (ddi_copyin((void *)arg, &portioc, size, mode)) {
rval = EFAULT;
goto fail;
}
portioc.ibdioc.ioc_status = 0;
portioc.ibdioc.ioc_portnum = port_state->id_port;
portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
if (portioc.ioc_pkey_tbl_sz != pinfop->p_pkey_tbl_sz) {
rval = EINVAL;
size = sizeof (ibd_ioctl_t);
portioc.ibdioc.ioc_status = IBD_INVALID_PKEY_TBL_SIZE;
if (ddi_copyout((void *)&portioc.ibdioc, (void *)arg,
size, mode)) {
rval = EFAULT;
goto fail;
}
goto fail;
}
size = pinfop->p_pkey_tbl_sz * sizeof (ib_pkey_t);
if (ddi_copyout((void *)pinfop->p_pkey_tbl,
(void *)(portioc.ioc_pkeys), size, mode)) {
rval = EFAULT;
goto fail;
}
size = sizeof (ibport_ioctl_t);
if (ddi_copyout((void *)&portioc, (void *)arg, size,
mode)) {
rval = EFAULT;
goto fail;
}
#endif
break;
case IBD_INFO_CMD_PKEYTBLSZ:
if ((cmd.ioc_port_inst < 0) || ((port_state =
ddi_get_soft_state(ibd_list, cmd.ioc_port_inst)) == NULL)) {
DPRINT(10, "ibd_create_partition: failed to get"
" state %d", cmd.ioc_port_inst);
size = sizeof (ibd_ioctl_t);
cmd.ioc_status = IBD_INVALID_PORT_INST;
if (ddi_copyout((void *)&cmd, (void *)arg, size,
mode)) {
return (EFAULT);
}
return (EINVAL);
}
ibt_status = ibt_query_hca_ports(port_state->id_hca_hdl,
port_state->id_port, &pinfop, &psize, &pinfosz);
if ((ibt_status != IBT_SUCCESS) || (psize != 1)) {
return (EINVAL);
}
#ifdef _MULTI_DATAMODEL
switch (ddi_model_convert_from(mode & FMODELS)) {
case DDI_MODEL_ILP32: {
size = sizeof (ibport_ioctl32_t);
if (ddi_copyin((void *)arg, &portioc32, size, mode)) {
rval = EFAULT;
goto fail;
}
portioc32.ibdioc.ioc_status = 0;
portioc32.ibdioc.ioc_portnum = port_state->id_port;
portioc32.ibdioc.ioc_hcaguid =
port_state->id_hca_guid;
portioc32.ibdioc.ioc_portguid =
port_state->id_port_guid;
portioc32.ioc_pkey_tbl_sz = pinfop->p_pkey_tbl_sz;
if (ddi_copyout((void *)&portioc32, (void *)arg, size,
mode)) {
rval = EFAULT;
goto fail;
}
break;
}
case DDI_MODEL_NONE:
size = sizeof (ibport_ioctl_t);
if (ddi_copyin((void *)arg, &portioc, size, mode)) {
rval = EFAULT;
goto fail;
}
portioc.ibdioc.ioc_status = 0;
portioc.ibdioc.ioc_portnum = port_state->id_port;
portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
portioc.ioc_pkey_tbl_sz = pinfop->p_pkey_tbl_sz;
if (ddi_copyout((void *)&portioc, (void *)arg, size,
mode)) {
rval = EFAULT;
goto fail;
}
break;
}
#else
size = sizeof (ibport_ioctl_t);
if (ddi_copyin((void *)arg, &portioc, size, mode)) {
rval = EFAULT;
goto fail;
}
portioc.ibdioc.ioc_status = 0;
portioc.ibdioc.ioc_portnum = port_state->id_port;
portioc.ibdioc.ioc_hcaguid = port_state->id_hca_guid;
portioc.ibdioc.ioc_portguid = port_state->id_port_guid;
portioc.ioc_pkey_tbl_sz = pinfop->p_pkey_tbl_sz;
if (ddi_copyout((void *)&portioc, (void *)arg, size,
mode)) {
rval = EFAULT;
goto fail;
}
#endif
break;
default:
return (EINVAL);
}
fail:
if (pinfop) {
ibt_free_portinfo(pinfop, pinfosz);
}
return (rval);
}
static void
ibdpd_async_handler(void *arg, ibt_hca_hdl_t hca_hdl,
ibt_async_code_t code, ibt_async_event_t *event)
{
ibd_state_t *state = (ibd_state_t *)arg;
link_state_t lstate;
switch (code) {
case IBT_EVENT_PORT_UP:
case IBT_ERROR_PORT_DOWN:
if (ibd_get_port_state(state, &lstate) != 0)
break;
if (state->id_link_state != lstate) {
state->id_link_state = lstate;
mac_link_update(state->id_mh, lstate);
}
break;
default:
break;
}
}
static int
ibd_get_port_state(ibd_state_t *state, link_state_t *lstate)
{
ibt_hca_portinfo_t *port_infop;
uint_t psize, port_infosz;
ibt_status_t ret;
ret = ibt_query_hca_ports(state->id_hca_hdl, state->id_port,
&port_infop, &psize, &port_infosz);
if ((ret != IBT_SUCCESS) || (psize != 1))
return (-1);
state->id_sgid = *port_infop->p_sgid_tbl;
state->id_link_speed = ibd_get_portspeed(state);
if (port_infop->p_linkstate == IBT_PORT_ACTIVE)
*lstate = LINK_STATE_UP;
else
*lstate = LINK_STATE_DOWN;
ibt_free_portinfo(port_infop, port_infosz);
return (0);
}
static int
ibd_port_attach(dev_info_t *dip)
{
ibd_state_t *state;
link_state_t lstate;
int instance;
ibt_status_t ret;
instance = ddi_get_instance(dip);
if (ddi_soft_state_zalloc(ibd_list, instance) == DDI_FAILURE) {
DPRINT(10, "ibd_port_attach: ddi_soft_state_zalloc() failed");
return (DDI_FAILURE);
}
state = ddi_get_soft_state(ibd_list, instance);
state->id_dip = dip;
state->id_type = IBD_PORT_DRIVER;
if ((state->id_port = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 0,
"port-number", 0)) == 0) {
DPRINT(10, "ibd_port_attach: invalid port number (%d)",
state->id_port);
return (DDI_FAILURE);
}
if ((state->id_hca_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0,
"hca-guid", 0)) == 0) {
DPRINT(10, "ibd_port_attach: hca has invalid guid (0x%llx)",
state->id_hca_guid);
return (DDI_FAILURE);
}
if ((state->id_port_guid = ddi_prop_get_int64(DDI_DEV_T_ANY, dip, 0,
"port-guid", 0)) == 0) {
DPRINT(10, "ibd_port_attach: port has invalid guid (0x%llx)",
state->id_port_guid);
return (DDI_FAILURE);
}
if ((ret = ibt_attach(&ibdpd_clnt_modinfo, dip, state,
&state->id_ibt_hdl)) != IBT_SUCCESS) {
DPRINT(10, "ibd_port_attach: failed in ibt_attach(), ret=%d",
ret);
goto done;
}
state->id_mac_state |= IBD_DRV_IBTL_ATTACH_DONE;
if ((ret = ibt_open_hca(state->id_ibt_hdl, state->id_hca_guid,
&state->id_hca_hdl)) != IBT_SUCCESS) {
DPRINT(10, "ibd_port_attach: ibt_open_hca() failed, ret=%d",
ret);
goto done;
}
state->id_mac_state |= IBD_DRV_HCA_OPENED;
if (ibd_get_port_state(state, &lstate) != 0) {
DPRINT(10, "ibd_port_attach: ibt_open_hca() failed, ret=%d",
ret);
goto done;
}
state->id_link_state = lstate;
if (ibd_register_mac(state, dip) != IBT_SUCCESS) {
DPRINT(10, "ibd_port_attach: failed in ibd_register_mac()");
goto done;
}
state->id_mac_state |= IBD_DRV_MAC_REGISTERED;
mac_link_update(state->id_mh, lstate);
return (DDI_SUCCESS);
done:
(void) ibd_port_unattach(state, dip);
return (DDI_FAILURE);
}
static int
ibd_port_unattach(ibd_state_t *state, dev_info_t *dip)
{
int instance;
uint32_t progress = state->id_mac_state;
ibt_status_t ret;
if (progress & IBD_DRV_MAC_REGISTERED) {
(void) mac_unregister(state->id_mh);
state->id_mac_state &= (~IBD_DRV_MAC_REGISTERED);
}
if (progress & IBD_DRV_HCA_OPENED) {
if ((ret = ibt_close_hca(state->id_hca_hdl)) !=
IBT_SUCCESS) {
ibd_print_warn(state, "failed to close "
"HCA device, ret=%d", ret);
}
state->id_hca_hdl = NULL;
state->id_mac_state &= (~IBD_DRV_HCA_OPENED);
}
if (progress & IBD_DRV_IBTL_ATTACH_DONE) {
if ((ret = ibt_detach(state->id_ibt_hdl)) != IBT_SUCCESS) {
ibd_print_warn(state,
"ibt_detach() failed, ret=%d", ret);
}
state->id_ibt_hdl = NULL;
state->id_mac_state &= (~IBD_DRV_IBTL_ATTACH_DONE);
}
instance = ddi_get_instance(dip);
ddi_soft_state_free(ibd_list, instance);
return (DDI_SUCCESS);
}
ibt_status_t
ibd_get_part_attr(datalink_id_t linkid, ibt_part_attr_t *attr)
{
ibd_state_t *state;
mutex_enter(&ibd_objlist_lock);
for (state = ibd_objlist_head; state; state = state->id_next) {
if (state->id_plinkid == linkid) {
break;
}
}
if (state == NULL) {
mutex_exit(&ibd_objlist_lock);
return (IBT_NO_SUCH_OBJECT);
}
attr->pa_dlinkid = state->id_dlinkid;
attr->pa_plinkid = state->id_plinkid;
attr->pa_port = state->id_port;
attr->pa_hca_guid = state->id_hca_guid;
attr->pa_port_guid = state->id_port_guid;
attr->pa_pkey = state->id_pkey;
mutex_exit(&ibd_objlist_lock);
return (IBT_SUCCESS);
}
ibt_status_t
ibd_get_all_part_attr(ibt_part_attr_t **attr_list, int *nparts)
{
ibd_state_t *state;
int n = 0;
ibt_part_attr_t *attr;
mutex_enter(&ibd_objlist_lock);
for (state = ibd_objlist_head; state; state = state->id_next)
n++;
*nparts = n;
if (n == 0) {
*attr_list = NULL;
mutex_exit(&ibd_objlist_lock);
return (IBT_SUCCESS);
}
*attr_list = kmem_alloc(sizeof (ibt_part_attr_t) * n, KM_SLEEP);
attr = *attr_list;
for (state = ibd_objlist_head; state; state = state->id_next) {
#ifdef DEBUG
ASSERT(n > 0);
n--;
#endif
attr->pa_dlinkid = state->id_dlinkid;
attr->pa_plinkid = state->id_plinkid;
attr->pa_port = state->id_port;
attr->pa_hca_guid = state->id_hca_guid;
attr->pa_port_guid = state->id_port_guid;
attr->pa_pkey = state->id_pkey;
attr++;
}
mutex_exit(&ibd_objlist_lock);
return (IBT_SUCCESS);
}