#include <sys/types.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/ksynch.h>
#include <sys/byteorder.h>
#include <sys/ib/clients/eoib/enx_impl.h>
const char fip_vendor_mellanox[] = {
0x4d, 0x65, 0x6c, 0x6c, 0x61, 0x6e, 0x6f, 0x78
};
int enx_wa_no_desc_list_len = 1;
static int eibnx_fip_make_solicit_pkt(eibnx_thr_info_t *, eibnx_wqe_t *);
static int eibnx_fip_send_solicit_pkt(eibnx_thr_info_t *, eibnx_wqe_t *,
eibnx_gw_addr_t *);
static int eibnx_fip_parse_advt_pkt(uint8_t *, eibnx_gw_msg_t *);
static void eibnx_rb_fip_make_solicit_pkt(eibnx_wqe_t *);
int
eibnx_fip_solicit_mcast(eibnx_thr_info_t *info)
{
eibnx_wqe_t *swqe;
int ret;
if ((swqe = eibnx_acquire_swqe(info, KM_SLEEP)) == NULL)
return (ENX_E_FAILURE);
ret = eibnx_fip_make_solicit_pkt(info, swqe);
if (ret != ENX_E_SUCCESS) {
eibnx_release_swqe(swqe);
return (ENX_E_FAILURE);
}
ret = eibnx_fip_send_solicit_pkt(info, swqe, NULL);
if (ret != ENX_E_SUCCESS) {
eibnx_rb_fip_make_solicit_pkt(swqe);
eibnx_release_swqe(swqe);
return (ENX_E_FAILURE);
}
return (ENX_E_SUCCESS);
}
int
eibnx_fip_solicit_ucast(eibnx_thr_info_t *info, clock_t *solicit_period_ticks)
{
eibnx_gw_info_t *gw;
eibnx_wqe_t *swqe;
clock_t min_solicit_period_msec;
int ret;
min_solicit_period_msec = drv_hztousec(*solicit_period_ticks) / 1000;
for (gw = info->ti_gw; gw; gw = gw->gw_next) {
if (eibnx_is_gw_dead(gw))
continue;
swqe = gw->gw_swqe;
ASSERT(swqe != NULL);
mutex_enter(&swqe->qe_lock);
if (swqe->qe_type != ENX_QETYP_SWQE) {
ENX_DPRINTF_DEBUG("eibnx_fip_solicit_ucast: "
"gw wqe type (0x%lx) indicates this is not an "
"swqe!, cannot send solicitation to gw",
swqe->qe_type);
mutex_exit(&swqe->qe_lock);
continue;
} else if ((swqe->qe_flags & ENX_QEFL_INUSE) !=
ENX_QEFL_INUSE) {
ENX_DPRINTF_DEBUG("eibnx_fip_solicit_ucast: "
"gw swqe flags (0x%lx) indicate swqe is free!, "
"cannot send solicitation to gw", swqe->qe_flags);
mutex_exit(&swqe->qe_lock);
continue;
} else if ((swqe->qe_flags & ENX_QEFL_POSTED) ==
ENX_QEFL_POSTED) {
ENX_DPRINTF_DEBUG("eibnx_fip_solicit_ucast: gw swqe "
"flags (0x%lx) indicate swqe is still with HCA!, "
"cannot send solicitation to gw", swqe->qe_flags);
mutex_exit(&swqe->qe_lock);
continue;
}
mutex_exit(&swqe->qe_lock);
if ((gw->gw_adv_period * 4) < min_solicit_period_msec)
min_solicit_period_msec = gw->gw_adv_period * 4;
ret = eibnx_fip_make_solicit_pkt(info, swqe);
if (ret != ENX_E_SUCCESS)
continue;
ret = eibnx_fip_send_solicit_pkt(info, swqe, &gw->gw_addr);
if (ret != ENX_E_SUCCESS)
eibnx_rb_fip_make_solicit_pkt(swqe);
}
*solicit_period_ticks = drv_usectohz(min_solicit_period_msec * 1000);
return (ENX_E_SUCCESS);
}
static int
eibnx_fip_make_solicit_pkt(eibnx_thr_info_t *info, eibnx_wqe_t *swqe)
{
fip_solicit_t *solicit;
fip_proto_t *proto;
fip_basic_hdr_t *hdr;
fip_desc_iba_t *iba;
ib_gid_t port_gid;
ib_guid_t port_guid;
uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
uint_t pktsz = swqe->qe_sgl.ds_len;
uint_t solicit_sz = sizeof (fip_solicit_t);
if (pktsz < solicit_sz) {
ENX_DPRINTF_ERR("swqe bufsize too small for pkt, "
"pktsz=%x < expsz=%x", pktsz, solicit_sz);
return (ENX_E_FAILURE);
}
solicit = (fip_solicit_t *)pkt;
proto = &solicit->sl_proto_version;
proto->pr_version = FIP_PROTO_VERSION;
hdr = &solicit->sl_fip_hdr;
hdr->hd_opcode = htons(FIP_OPCODE_EOIB);
hdr->hd_subcode = FIP_SUBCODE_H_SOLICIT;
hdr->hd_desc_list_len = htons((solicit_sz >> 2) - 2);
hdr->hd_flags = 0;
hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID;
hdr->hd_len = FIP_DESC_LEN_VENDOR_ID;
bcopy(fip_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN);
iba = &solicit->sl_iba;
iba->ia_type = FIP_DESC_TYPE_IBA;
iba->ia_len = FIP_DESC_LEN_IBA;
bcopy(fip_vendor_mellanox, iba->ia_vendor_id, FIP_VENDOR_LEN);
iba->ia_qpn = htonl(info->ti_qpn);
iba->ia_sl_portid = 0;
iba->ia_lid = htons(info->ti_pi->p_base_lid);
port_gid = info->ti_pi->p_sgid_tbl[0];
port_guid = htonll(port_gid.gid_guid);
bcopy(&port_guid, iba->ia_guid, FIP_GUID_LEN);
swqe->qe_sgl.ds_len = solicit_sz;
return (ENX_E_SUCCESS);
}
static int
eibnx_setup_ud_dest(eibnx_thr_info_t *info, eibnx_wqe_t *swqe,
eibnx_gw_addr_t *gw_addr)
{
eibnx_t *ss = enx_global_ss;
ibt_path_attr_t attr;
ibt_path_info_t path;
ibt_status_t ret;
if (gw_addr == NULL) {
ret = ibt_modify_ud_dest(swqe->qe_wr.send.wr.ud.udwr_dest,
info->ti_solicit_mcg->mc_qkey, IB_MC_QPN,
&info->ti_solicit_mcg->mc_adds_vect);
if (ret != IBT_SUCCESS) {
ENX_DPRINTF_ERR("ibt_modify_ud_dest() failed with "
"ret=%d, qkey=%x, qpn=%x", ret,
info->ti_solicit_mcg->mc_qkey, IB_MC_QPN);
return (ENX_E_FAILURE);
}
return (ENX_E_SUCCESS);
}
if (gw_addr->ga_vect)
return (ENX_E_SUCCESS);
bzero(&attr, sizeof (ibt_path_info_t));
attr.pa_dgids = &gw_addr->ga_gid;
attr.pa_num_dgids = 1;
attr.pa_sgid = info->ti_pi->p_sgid_tbl[0];
attr.pa_pkey = gw_addr->ga_pkey;
bzero(&path, sizeof (ibt_path_info_t));
ret = ibt_get_paths(ss->nx_ibt_hdl, IBT_PATH_PKEY,
&attr, 1, &path, NULL);
if ((ret != IBT_SUCCESS) || (path.pi_hca_guid == 0)) {
ENX_DPRINTF_ERR("ibt_get_paths() failed with "
"ret=%d, gid_prefix=%llx, gid_guid=%llx", ret,
gw_addr->ga_gid.gid_prefix, gw_addr->ga_gid.gid_guid);
return (ENX_E_FAILURE);
}
gw_addr->ga_vect = kmem_zalloc(sizeof (ibt_adds_vect_t), KM_SLEEP);
bcopy(&path.pi_prim_cep_path.cep_adds_vect, gw_addr->ga_vect,
sizeof (ibt_adds_vect_t));
ret = ibt_modify_ud_dest(swqe->qe_wr.send.wr.ud.udwr_dest,
gw_addr->ga_qkey, gw_addr->ga_qpn, gw_addr->ga_vect);
if (ret != IBT_SUCCESS) {
ENX_DPRINTF_ERR("ibt_modify_ud_dest() failed with "
"ret=%d, qkey=%x, qpn=%x", ret, gw_addr->ga_qkey,
gw_addr->ga_qpn);
kmem_free(gw_addr->ga_vect, sizeof (ibt_adds_vect_t));
gw_addr->ga_vect = NULL;
return (ENX_E_FAILURE);
}
return (ENX_E_SUCCESS);
}
static int
eibnx_fip_send_solicit_pkt(eibnx_thr_info_t *info, eibnx_wqe_t *swqe,
eibnx_gw_addr_t *gw_addr)
{
ibt_status_t ret;
if (eibnx_setup_ud_dest(info, swqe, gw_addr) != ENX_E_SUCCESS)
return (ENX_E_FAILURE);
mutex_enter(&swqe->qe_lock);
ret = ibt_post_send(info->ti_chan, &(swqe->qe_wr.send), 1, NULL);
if (ret != IBT_SUCCESS) {
mutex_exit(&swqe->qe_lock);
ENX_DPRINTF_ERR("ibt_post_send() failed for solicit, "
"ret=%d", ret);
return (ENX_E_FAILURE);
}
swqe->qe_flags |= ENX_QEFL_POSTED;
if (gw_addr == NULL) {
swqe->qe_flags |= ENX_QEFL_RELONCOMP;
info->ti_mcast_done = 1;
}
mutex_exit(&swqe->qe_lock);
return (ENX_E_SUCCESS);
}
int
eibnx_fip_parse_pkt(uint8_t *pkt, eibnx_gw_msg_t *msg)
{
fip_basic_hdr_t *hdr;
uint16_t opcode;
uint8_t subcode;
int ret = ENX_E_FAILURE;
hdr = (fip_basic_hdr_t *)(pkt + sizeof (fip_proto_t));
if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) {
ENX_DPRINTF_WARN("unsupported opcode (%x) found in "
"gw advertisement, ignoring", opcode);
return (ENX_E_FAILURE);
}
subcode = hdr->hd_subcode;
switch (subcode) {
case FIP_SUBCODE_G_ADVERTISE:
ret = eibnx_fip_parse_advt_pkt(pkt, msg);
break;
case FIP_SUBCODE_G_VNIC_LOGIN_ACK:
msg->gm_type = FIP_VNIC_LOGIN_ACK;
ret = ENX_E_SUCCESS;
break;
default:
ENX_DPRINTF_WARN("unsupported subcode (%x) found in "
"gw advertisement, ignoring", subcode);
ret = ENX_E_FAILURE;
break;
}
return (ret);
}
static int
eibnx_fip_parse_advt_pkt(uint8_t *pkt, eibnx_gw_msg_t *msg)
{
fip_advertise_t *advertise;
fip_basic_hdr_t *hdr;
fip_desc_iba_t *desc_iba;
fip_desc_gwinfo_t *desc_gwinfo;
fip_desc_gwid_t *desc_gwid;
fip_desc_keepalive_t *desc_ka;
eibnx_gw_info_t *gwi;
ib_guid_t guid;
uint16_t rss_qpn_num_net_vnics;
uint16_t sl_portid;
uint16_t flags;
advertise = (fip_advertise_t *)pkt;
hdr = &(advertise->ad_fip_header);
if (!enx_wa_no_desc_list_len) {
uint_t pkt_data_sz;
pkt_data_sz = (ntohs(hdr->hd_desc_list_len) + 2) << 2;
if (pkt_data_sz < sizeof (fip_advertise_t)) {
ENX_DPRINTF_WARN("advertisement from gw too small; "
"expected %x, got %x", sizeof (fip_advertise_t),
pkt_data_sz);
return (ENX_E_FAILURE);
}
}
if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
ENX_DPRINTF_WARN("invalid type/len in fip basic header; "
"expected (%x,%x), got (%x,%x)", FIP_DESC_TYPE_VENDOR_ID,
FIP_DESC_LEN_VENDOR_ID, hdr->hd_type, hdr->hd_len);
return (ENX_E_FAILURE);
}
desc_iba = &(advertise->ad_iba);
if (desc_iba->ia_type != FIP_DESC_TYPE_IBA ||
desc_iba->ia_len != FIP_DESC_LEN_IBA) {
ENX_DPRINTF_WARN("invalid type/len in fip iba desc; "
"expected (%x,%x), got (%x,%x)", FIP_DESC_TYPE_IBA,
FIP_DESC_LEN_IBA, desc_iba->ia_type, desc_iba->ia_len);
return (ENX_E_FAILURE);
}
desc_gwinfo = &(advertise->ad_gwinfo);
if (desc_gwinfo->gi_type != FIP_DESC_TYPE_EOIB_GW_INFO ||
desc_gwinfo->gi_len != FIP_DESC_LEN_EOIB_GW_INFO) {
ENX_DPRINTF_WARN("invalid type/len in fip gwinfo desc; "
"expected (%x,%x), got (%x,%x)",
FIP_DESC_TYPE_EOIB_GW_INFO, FIP_DESC_LEN_EOIB_GW_INFO,
desc_gwinfo->gi_type, desc_gwinfo->gi_len);
return (ENX_E_FAILURE);
}
desc_gwid = &(advertise->ad_gwid);
if (desc_gwid->id_type != FIP_DESC_TYPE_GW_ID ||
desc_gwid->id_len != FIP_DESC_LEN_GW_ID) {
ENX_DPRINTF_WARN("invalid type/len in fip gwid desc; "
"expected (%x,%x), got (%x,%x)",
FIP_DESC_TYPE_GW_ID, FIP_DESC_LEN_GW_ID,
desc_gwid->id_type, desc_gwid->id_len);
return (ENX_E_FAILURE);
}
desc_ka = &(advertise->ad_keep_alive);
if (desc_ka->ka_type != FIP_DESC_TYPE_KEEP_ALIVE ||
desc_ka->ka_len != FIP_DESC_LEN_KEEP_ALIVE) {
ENX_DPRINTF_WARN("invalid type/len in fip ka desc; "
"expected (%x,%x), got (%x,%x)",
FIP_DESC_TYPE_KEEP_ALIVE, FIP_DESC_LEN_KEEP_ALIVE,
desc_ka->ka_type, desc_ka->ka_len);
return (ENX_E_FAILURE);
}
flags = ntohs(hdr->hd_flags);
gwi = &(msg->u.gm_info);
gwi->gw_flag_available = (flags & FIP_BHFLAG_GWAVAIL) ? 1 : 0;
gwi->gw_flag_ucast_advt = (flags & FIP_BHFLAG_SLCTMSG) ? 1 : 0;
msg->gm_type = (gwi->gw_flag_ucast_advt) ?
FIP_GW_ADVERTISE_UCAST : FIP_GW_ADVERTISE_MCAST;
gwi->gw_ctrl_qpn = (ntohl(desc_iba->ia_qpn) & FIP_IBA_QPN_MASK);
sl_portid = ntohs(desc_iba->ia_sl_portid);
gwi->gw_portid = (sl_portid & FIP_IBA_PORTID_MASK);
gwi->gw_sl = ((sl_portid & FIP_IBA_SL_MASK) >> FIP_IBA_SL_SHIFT);
gwi->gw_lid = ntohs(desc_iba->ia_lid);
bcopy(desc_iba->ia_guid, &guid, sizeof (ib_guid_t));
gwi->gw_guid = ntohll(guid);
if (desc_gwinfo->gi_flags & FIP_GWI_HOST_ADMIND_VNICS_MASK)
gwi->gw_is_host_adm_vnics = 1;
else
gwi->gw_is_host_adm_vnics = 0;
rss_qpn_num_net_vnics = ntohs(desc_gwinfo->gi_rss_qpn_num_net_vnics);
gwi->gw_num_net_vnics = (rss_qpn_num_net_vnics &
FIP_GWI_NUM_NET_VNICS_MASK);
gwi->gw_n_rss_qpn = ((rss_qpn_num_net_vnics &
FIP_GWI_RSS_QPN_MASK) >> FIP_GWI_RSS_QPN_SHIFT);
bcopy(desc_gwinfo->gi_vendor_id, gwi->gw_vendor_id, FIP_VENDOR_LEN);
(gwi->gw_vendor_id)[FIP_VENDOR_LEN] = '\0';
bcopy(desc_gwid->id_guid, &guid, sizeof (ib_guid_t));
gwi->gw_system_guid = ntohll(guid);
bcopy(desc_gwid->id_sysname, gwi->gw_system_name, FIP_SYSNAME_LEN);
(gwi->gw_system_name)[FIP_SYSNAME_LEN] = '\0';
bcopy(desc_gwid->id_portname, gwi->gw_port_name, FIP_PORTNAME_LEN);
(gwi->gw_port_name)[FIP_PORTNAME_LEN] = '\0';
gwi->gw_adv_period = ntohl(desc_ka->ka_gw_adv_period);
gwi->gw_ka_period = ntohl(desc_ka->ka_gw_ka_period);
gwi->gw_vnic_ka_period = ntohl(desc_ka->ka_vnic_ka_period);
gwi->gw_next = NULL;
return (ENX_E_SUCCESS);
}
static void
eibnx_rb_fip_make_solicit_pkt(eibnx_wqe_t *swqe)
{
uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
bzero(pkt, sizeof (fip_solicit_t));
swqe->qe_sgl.ds_len = swqe->qe_bufsz;
}