#include <sys/types.h>
#include <sys/kmem.h>
#include <sys/conf.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/ksynch.h>
#include <sys/ib/clients/eoib/eib_impl.h>
static int eib_rsrc_setup_txbufs(eib_t *, int *);
static int eib_rsrc_setup_rxbufs(eib_t *, int *);
static int eib_rsrc_setup_lsobufs(eib_t *, int *);
static void eib_rsrc_init_wqe_pool(eib_t *, eib_wqe_pool_t **,
ib_memlen_t, int);
static void eib_rsrc_fini_wqe_pool(eib_t *, eib_wqe_pool_t **);
static boolean_t eib_rsrc_ok_to_free_pool(eib_t *, eib_wqe_pool_t *, boolean_t);
static int eib_rsrc_grab_wqes(eib_t *, eib_wqe_pool_t *, eib_wqe_t **, uint_t,
uint_t *, int);
static void eib_rsrc_return_wqes(eib_t *, eib_wqe_pool_t *, eib_wqe_t **,
uint_t);
static void eib_rb_rsrc_setup_txbufs(eib_t *, boolean_t);
static void eib_rb_rsrc_setup_rxbufs(eib_t *, boolean_t);
static void eib_rb_rsrc_setup_lsobufs(eib_t *, boolean_t);
static uint_t eib_lso_num_bufs = EIB_LSO_NUM_BUFS;
int
eib_rsrc_setup_bufs(eib_t *ss, int *err)
{
if (eib_rsrc_setup_txbufs(ss, err) != EIB_E_SUCCESS)
return (EIB_E_FAILURE);
if (ss->ei_caps->cp_lso_maxlen && ss->ei_caps->cp_cksum_flags &&
ss->ei_caps->cp_resv_lkey_capab) {
if (eib_rsrc_setup_lsobufs(ss, err) != EIB_E_SUCCESS) {
eib_rb_rsrc_setup_txbufs(ss, B_FALSE);
return (EIB_E_FAILURE);
}
}
if (eib_rsrc_setup_rxbufs(ss, err) != EIB_E_SUCCESS) {
eib_rb_rsrc_setup_lsobufs(ss, B_FALSE);
eib_rb_rsrc_setup_txbufs(ss, B_FALSE);
return (EIB_E_FAILURE);
}
return (EIB_E_SUCCESS);
}
int
eib_rsrc_grab_swqes(eib_t *ss, eib_wqe_t **wqes, uint_t n_req, uint_t *actual,
int pri)
{
eib_wqe_t *wqe;
uint32_t *encap_hdr;
int ret;
int i;
ASSERT(ss->ei_tx != NULL);
ret = eib_rsrc_grab_wqes(ss, ss->ei_tx, wqes, n_req, actual, pri);
if (ret != EIB_E_SUCCESS)
return (EIB_E_FAILURE);
for (i = 0; i < (*actual); i++) {
wqe = wqes[i];
wqe->qe_wr.send.wr_flags = IBT_WR_NO_FLAGS;
wqe->qe_wr.send.wr.ud.udwr_dest = wqe->qe_dest;
wqe->qe_wr.send.wr_opcode = IBT_WRC_SEND;
wqe->qe_wr.send.wr_nds = 1;
wqe->qe_wr.send.wr_sgl = &wqe->qe_sgl;
wqe->qe_nxt_post = NULL;
wqe->qe_iov_hdl = NULL;
encap_hdr = (uint32_t *)(void *)wqe->qe_payload_hdr;
*encap_hdr = htonl(EIB_TX_ENCAP_HDR);
}
return (EIB_E_SUCCESS);
}
int
eib_rsrc_grab_rwqes(eib_t *ss, eib_wqe_t **wqes, uint_t n_req, uint_t *actual,
int pri)
{
ASSERT(ss->ei_rx != NULL);
return (eib_rsrc_grab_wqes(ss, ss->ei_rx, wqes, n_req, actual, pri));
}
int
eib_rsrc_grab_lsobufs(eib_t *ss, uint_t req_sz, ibt_wr_ds_t *sgl, uint32_t *nds)
{
eib_lsobkt_t *bkt = ss->ei_lso;
eib_lsobuf_t *elem;
eib_lsobuf_t *nxt;
uint_t frag_sz;
uint_t num_needed;
int i;
ASSERT(req_sz != 0);
ASSERT(sgl != NULL);
ASSERT(nds != NULL);
num_needed = req_sz / EIB_LSO_BUFSZ;
if ((frag_sz = req_sz % EIB_LSO_BUFSZ) != 0)
num_needed++;
if (bkt == NULL)
return (EIB_E_FAILURE);
mutex_enter(&bkt->bk_lock);
if (bkt->bk_nfree < num_needed) {
mutex_exit(&bkt->bk_lock);
return (EIB_E_FAILURE);
}
elem = bkt->bk_free_head;
for (i = 0; i < num_needed; i++) {
ASSERT(elem->lb_isfree != 0);
ASSERT(elem->lb_buf != NULL);
nxt = elem->lb_next;
sgl[i].ds_va = (ib_vaddr_t)(uintptr_t)elem->lb_buf;
sgl[i].ds_key = bkt->bk_lkey;
sgl[i].ds_len = EIB_LSO_BUFSZ;
elem->lb_isfree = 0;
elem->lb_next = NULL;
elem = nxt;
}
bkt->bk_free_head = elem;
if (frag_sz) {
sgl[i-1].ds_len = frag_sz;
}
bkt->bk_nfree -= num_needed;
mutex_exit(&bkt->bk_lock);
*nds = num_needed;
return (EIB_E_SUCCESS);
}
eib_wqe_t *
eib_rsrc_grab_swqe(eib_t *ss, int pri)
{
eib_wqe_t *wqe = NULL;
uint32_t *encap_hdr;
ASSERT(ss->ei_tx != NULL);
(void) eib_rsrc_grab_wqes(ss, ss->ei_tx, &wqe, 1, NULL, pri);
if (wqe) {
wqe->qe_wr.send.wr_flags = IBT_WR_NO_FLAGS;
wqe->qe_wr.send.wr.ud.udwr_dest = wqe->qe_dest;
wqe->qe_wr.send.wr_opcode = IBT_WRC_SEND;
wqe->qe_wr.send.wr_nds = 1;
wqe->qe_wr.send.wr_sgl = &wqe->qe_sgl;
wqe->qe_nxt_post = NULL;
wqe->qe_iov_hdl = NULL;
encap_hdr = (uint32_t *)(void *)wqe->qe_payload_hdr;
*encap_hdr = htonl(EIB_TX_ENCAP_HDR);
}
return (wqe);
}
eib_wqe_t *
eib_rsrc_grab_rwqe(eib_t *ss, int pri)
{
eib_wqe_t *wqe = NULL;
ASSERT(ss->ei_rx != NULL);
(void) eib_rsrc_grab_wqes(ss, ss->ei_rx, &wqe, 1, NULL, pri);
return (wqe);
}
void
eib_rsrc_return_swqe(eib_t *ss, eib_wqe_t *wqe, eib_chan_t *chan)
{
ASSERT(ss->ei_tx != NULL);
eib_rsrc_return_wqes(ss, ss->ei_tx, &wqe, 1);
if (chan) {
eib_rsrc_decr_posted_swqe(ss, chan);
}
}
void
eib_rsrc_return_rwqe(eib_t *ss, eib_wqe_t *wqe, eib_chan_t *chan)
{
ASSERT(ss->ei_rx != NULL);
eib_rsrc_return_wqes(ss, ss->ei_rx, &wqe, 1);
if (chan) {
eib_rsrc_decr_posted_rwqe(ss, chan);
}
}
void
eib_rsrc_return_lsobufs(eib_t *ss, ibt_wr_ds_t *sgl_p, uint32_t nds)
{
eib_lsobkt_t *bkt = ss->ei_lso;
eib_lsobuf_t *elem;
uint8_t *va;
ptrdiff_t ndx;
int i;
if (bkt == NULL)
return;
mutex_enter(&bkt->bk_lock);
for (i = 0; i < nds; i++) {
va = (uint8_t *)(uintptr_t)sgl_p[i].ds_va;
ASSERT(va >= bkt->bk_mem);
ASSERT(va < (bkt->bk_mem + bkt->bk_nelem * EIB_LSO_BUFSZ));
ndx = ((uintptr_t)va - (uintptr_t)bkt->bk_mem) / EIB_LSO_BUFSZ;
elem = bkt->bk_bufl + ndx;
ASSERT(elem->lb_isfree == 0);
ASSERT(elem->lb_buf == va);
elem->lb_isfree = 1;
elem->lb_next = bkt->bk_free_head;
bkt->bk_free_head = elem;
}
bkt->bk_nfree += nds;
if (((bkt->bk_nfree - nds) < EIB_LSO_FREE_BUFS_THRESH) &&
(bkt->bk_nfree >= EIB_LSO_FREE_BUFS_THRESH)) {
cv_broadcast(&bkt->bk_cv);
}
mutex_exit(&bkt->bk_lock);
}
void
eib_rsrc_decr_posted_swqe(eib_t *ss, eib_chan_t *chan)
{
ASSERT(chan != NULL);
mutex_enter(&chan->ch_tx_lock);
chan->ch_tx_posted--;
if ((chan->ch_tear_down) && (chan->ch_tx_posted == 0)) {
cv_signal(&chan->ch_tx_cv);
}
mutex_exit(&chan->ch_tx_lock);
}
void
eib_rsrc_decr_posted_rwqe(eib_t *ss, eib_chan_t *chan)
{
eib_chan_t *tail;
boolean_t queue_for_refill = B_FALSE;
ASSERT(chan != NULL);
mutex_enter(&chan->ch_rx_lock);
chan->ch_rx_posted--;
if (chan->ch_tear_down) {
if (chan->ch_rx_posted == 0)
cv_signal(&chan->ch_rx_cv);
} else if (chan->ch_rx_posted < chan->ch_lwm_rwqes) {
if (chan->ch_rx_refilling == B_FALSE) {
chan->ch_rx_refilling = B_TRUE;
queue_for_refill = B_TRUE;
}
}
mutex_exit(&chan->ch_rx_lock);
if (queue_for_refill) {
mutex_enter(&ss->ei_rxpost_lock);
chan->ch_rxpost_next = NULL;
for (tail = ss->ei_rxpost; tail; tail = tail->ch_rxpost_next) {
if (tail->ch_rxpost_next == NULL)
break;
}
if (tail) {
tail->ch_rxpost_next = chan;
} else {
ss->ei_rxpost = chan;
}
cv_signal(&ss->ei_rxpost_cv);
mutex_exit(&ss->ei_rxpost_lock);
}
}
void
eib_rsrc_txwqes_needed(eib_t *ss)
{
eib_wqe_pool_t *wp = ss->ei_tx;
EIB_INCR_COUNTER(&ss->ei_stats->st_noxmitbuf);
mutex_enter(&wp->wp_lock);
if ((wp->wp_status & EIB_TXWQE_SHORT) == 0) {
wp->wp_status |= EIB_TXWQE_SHORT;
cv_broadcast(&wp->wp_cv);
}
mutex_exit(&wp->wp_lock);
}
void
eib_rsrc_lsobufs_needed(eib_t *ss)
{
eib_lsobkt_t *bkt = ss->ei_lso;
EIB_INCR_COUNTER(&ss->ei_stats->st_noxmitbuf);
if (bkt == NULL) {
EIB_DPRINTF_WARN(ss->ei_instance,
"eib_rsrc_lsobufs_needed: "
"lso bufs seem to be needed even though "
"LSO support was not advertised");
return;
}
mutex_enter(&bkt->bk_lock);
if ((bkt->bk_status & EIB_LBUF_SHORT) == 0) {
bkt->bk_status |= EIB_LBUF_SHORT;
cv_broadcast(&bkt->bk_cv);
}
mutex_exit(&bkt->bk_lock);
}
boolean_t
eib_rsrc_rxpool_low(eib_wqe_t *wqe)
{
eib_wqe_pool_t *wp = wqe->qe_pool;
boolean_t ret = B_FALSE;
mutex_enter(&wp->wp_lock);
if (wp->wp_nfree <= EIB_NFREE_RWQES_LOW) {
wp->wp_status |= (EIB_RXWQE_SHORT);
} else if (wp->wp_nfree >= EIB_NFREE_RWQES_HWM) {
wp->wp_status &= (~EIB_RXWQE_SHORT);
}
if ((wp->wp_status & EIB_RXWQE_SHORT) == EIB_RXWQE_SHORT)
ret = B_TRUE;
mutex_exit(&wp->wp_lock);
return (ret);
}
void
eib_rb_rsrc_setup_bufs(eib_t *ss, boolean_t force)
{
eib_rb_rsrc_setup_rxbufs(ss, force);
eib_rb_rsrc_setup_lsobufs(ss, force);
eib_rb_rsrc_setup_txbufs(ss, force);
}
static int
eib_rsrc_setup_txbufs(eib_t *ss, int *err)
{
eib_wqe_pool_t *tx;
eib_wqe_t *wqe;
ibt_ud_dest_hdl_t dest;
ibt_mr_attr_t attr;
ibt_mr_desc_t desc;
ibt_status_t ret;
kthread_t *kt;
uint32_t *encap_hdr;
uint8_t *buf;
uint_t mtu = ss->ei_props->ep_mtu;
uint_t tx_bufsz;
uint_t blk;
uint_t ndx;
uint_t i;
int lso_enabled;
if (ss->ei_tx != NULL)
return (EIB_E_SUCCESS);
lso_enabled = ss->ei_caps->cp_lso_maxlen &&
ss->ei_caps->cp_cksum_flags && ss->ei_caps->cp_resv_lkey_capab;
tx_bufsz = ((lso_enabled) && (EIB_TX_COPY_THRESH > mtu)) ?
EIB_TX_COPY_THRESH : mtu;
eib_rsrc_init_wqe_pool(ss, &ss->ei_tx, tx_bufsz, EIB_WP_TYPE_TX);
tx = ss->ei_tx;
attr.mr_vaddr = tx->wp_vaddr;
attr.mr_len = tx->wp_memsz;
attr.mr_as = NULL;
attr.mr_flags = IBT_MR_SLEEP;
ret = ibt_register_mr(ss->ei_hca_hdl, ss->ei_pd_hdl, &attr,
&tx->wp_mr, &desc);
if (ret != IBT_SUCCESS) {
EIB_DPRINTF_ERR(ss->ei_instance, "eib_rsrc_setup_txbufs: "
"ibt_register_mr() failed for tx "
"region (0x%llx, 0x%llx) with ret=%d",
attr.mr_vaddr, attr.mr_len, ret);
*err = EINVAL;
goto rsrc_setup_txbufs_fail;
}
tx->wp_lkey = desc.md_lkey;
buf = (uint8_t *)(uintptr_t)(tx->wp_vaddr);
for (i = 0, blk = 0; blk < EIB_BLKS_PER_POOL; blk++) {
for (ndx = 0; ndx < EIB_WQES_PER_BLK; ndx++, i++) {
wqe = &tx->wp_wqe[i];
ret = ibt_alloc_ud_dest(ss->ei_hca_hdl,
IBT_UD_DEST_NO_FLAGS, ss->ei_pd_hdl, &dest);
if (ret != IBT_SUCCESS) {
EIB_DPRINTF_ERR(ss->ei_instance,
"eib_rsrc_setup_txbufs: "
"ibt_alloc_ud_dest(hca_hdl=0x%llx) "
"failed, ret=%d", ss->ei_hca_hdl, ret);
*err = ENOMEM;
goto rsrc_setup_txbufs_fail;
}
wqe->qe_pool = tx;
wqe->qe_cpbuf = buf;
wqe->qe_bufsz = tx_bufsz;
wqe->qe_dest = dest;
wqe->qe_payload_hdr =
kmem_zalloc(EIB_MAX_PAYLOAD_HDR_SZ, KM_SLEEP);
encap_hdr = (uint32_t *)(void *)wqe->qe_payload_hdr;
*encap_hdr = htonl(EIB_TX_ENCAP_HDR);
wqe->qe_sgl.ds_key = tx->wp_lkey;
wqe->qe_sgl.ds_va = (ib_vaddr_t)(uintptr_t)buf;
wqe->qe_sgl.ds_len = wqe->qe_bufsz;
wqe->qe_mp = NULL;
wqe->qe_info =
((blk & EIB_WQEBLK_MASK) << EIB_WQEBLK_SHIFT) |
((ndx & EIB_WQENDX_MASK) << EIB_WQENDX_SHIFT) |
((uint_t)EIB_WQE_TX << EIB_WQETYP_SHIFT);
wqe->qe_wr.send.wr_id = (ibt_wrid_t)(uintptr_t)wqe;
wqe->qe_wr.send.wr_trans = IBT_UD_SRV;
wqe->qe_wr.send.wr_flags = IBT_WR_NO_FLAGS;
wqe->qe_wr.send.wr.ud.udwr_dest = wqe->qe_dest;
wqe->qe_wr.send.wr_opcode = IBT_WRC_SEND;
wqe->qe_wr.send.wr_nds = 1;
wqe->qe_wr.send.wr_sgl = &wqe->qe_sgl;
wqe->qe_nxt_post = NULL;
wqe->qe_iov_hdl = NULL;
buf += wqe->qe_bufsz;
}
}
kt = thread_create(NULL, 0, eib_monitor_tx_wqes, ss, 0,
&p0, TS_RUN, minclsyspri);
ss->ei_txwqe_monitor = kt->t_did;
return (EIB_E_SUCCESS);
rsrc_setup_txbufs_fail:
eib_rb_rsrc_setup_txbufs(ss, B_FALSE);
return (EIB_E_FAILURE);
}
static int
eib_rsrc_setup_rxbufs(eib_t *ss, int *err)
{
eib_wqe_pool_t *rx;
eib_wqe_t *wqe;
ibt_mr_attr_t attr;
ibt_mr_desc_t desc;
ibt_status_t ret;
uint8_t *buf;
uint_t mtu = ss->ei_props->ep_mtu;
uint_t blk;
uint_t ndx;
uint_t i;
if (ss->ei_rx != NULL)
return (EIB_E_SUCCESS);
eib_rsrc_init_wqe_pool(ss, &ss->ei_rx,
mtu + EIB_GRH_SZ + EIB_IPHDR_ALIGN_ROOM, EIB_WP_TYPE_RX);
rx = ss->ei_rx;
attr.mr_vaddr = rx->wp_vaddr;
attr.mr_len = rx->wp_memsz;
attr.mr_as = NULL;
attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE;
ret = ibt_register_mr(ss->ei_hca_hdl, ss->ei_pd_hdl, &attr,
&rx->wp_mr, &desc);
if (ret != IBT_SUCCESS) {
EIB_DPRINTF_ERR(ss->ei_instance, "eib_rsrc_setup_rxbufs: "
"ibt_register_mr() failed for rx "
"region (0x%llx, 0x%llx) with ret=%d",
attr.mr_vaddr, attr.mr_len, ret);
*err = EINVAL;
goto rsrc_setup_rxbufs_fail;
}
rx->wp_lkey = desc.md_lkey;
buf = (uint8_t *)(uintptr_t)(rx->wp_vaddr);
for (i = 0, blk = 0; blk < EIB_BLKS_PER_POOL; blk++) {
for (ndx = 0; ndx < EIB_WQES_PER_BLK; ndx++, i++) {
wqe = &rx->wp_wqe[i];
wqe->qe_pool = rx;
wqe->qe_cpbuf = buf;
wqe->qe_bufsz = mtu + EIB_GRH_SZ + EIB_IPHDR_ALIGN_ROOM;
wqe->qe_wr.recv.wr_id = (ibt_wrid_t)(uintptr_t)wqe;
wqe->qe_wr.recv.wr_nds = 1;
wqe->qe_wr.recv.wr_sgl = &wqe->qe_sgl;
wqe->qe_frp.free_func = eib_data_rx_recycle;
wqe->qe_frp.free_arg = (caddr_t)wqe;
wqe->qe_sgl.ds_key = rx->wp_lkey;
wqe->qe_sgl.ds_va = (ib_vaddr_t)(uintptr_t)buf;
wqe->qe_sgl.ds_len = wqe->qe_bufsz;
wqe->qe_mp = NULL;
wqe->qe_info =
((blk & EIB_WQEBLK_MASK) << EIB_WQEBLK_SHIFT) |
((ndx & EIB_WQENDX_MASK) << EIB_WQENDX_SHIFT) |
((uint_t)EIB_WQE_RX << EIB_WQETYP_SHIFT);
wqe->qe_chan = NULL;
wqe->qe_vnic_inst = -1;
buf += (mtu + EIB_GRH_SZ + EIB_IPHDR_ALIGN_ROOM);
}
}
return (EIB_E_SUCCESS);
rsrc_setup_rxbufs_fail:
eib_rb_rsrc_setup_rxbufs(ss, B_FALSE);
return (EIB_E_FAILURE);
}
static int
eib_rsrc_setup_lsobufs(eib_t *ss, int *err)
{
eib_lsobkt_t *bkt;
eib_lsobuf_t *elem;
eib_lsobuf_t *tail;
ibt_mr_attr_t attr;
ibt_mr_desc_t desc;
kthread_t *kt;
uint8_t *lsomem;
uint8_t *memp;
ibt_status_t ret;
int i;
bkt = kmem_zalloc(sizeof (eib_lsobkt_t), KM_SLEEP);
lsomem = kmem_zalloc(eib_lso_num_bufs * EIB_LSO_BUFSZ, KM_SLEEP);
attr.mr_vaddr = (uint64_t)(uintptr_t)lsomem;
attr.mr_len = eib_lso_num_bufs * EIB_LSO_BUFSZ;
attr.mr_as = NULL;
attr.mr_flags = IBT_MR_SLEEP;
ret = ibt_register_mr(ss->ei_hca_hdl, ss->ei_pd_hdl, &attr,
&bkt->bk_mr_hdl, &desc);
if (ret != IBT_SUCCESS) {
*err = EINVAL;
EIB_DPRINTF_ERR(ss->ei_instance, "eib_rsrc_setup_lsobufs: "
"ibt_register_mr() failed for LSO "
"region (0x%llx, 0x%llx) with ret=%d",
attr.mr_vaddr, attr.mr_len, ret);
kmem_free(lsomem, eib_lso_num_bufs * EIB_LSO_BUFSZ);
kmem_free(bkt, sizeof (eib_lsobkt_t));
return (EIB_E_FAILURE);
}
bkt->bk_lkey = desc.md_lkey;
bkt->bk_bufl = kmem_zalloc(eib_lso_num_bufs * sizeof (eib_lsobuf_t),
KM_SLEEP);
memp = lsomem;
elem = bkt->bk_bufl;
for (i = 0; i < eib_lso_num_bufs; i++) {
elem->lb_isfree = 1;
elem->lb_buf = memp;
elem->lb_next = elem + 1;
tail = elem;
memp += EIB_LSO_BUFSZ;
elem++;
}
tail->lb_next = NULL;
bkt->bk_free_head = bkt->bk_bufl;
bkt->bk_mem = lsomem;
bkt->bk_nelem = eib_lso_num_bufs;
bkt->bk_nfree = bkt->bk_nelem;
mutex_init(&bkt->bk_lock, NULL, MUTEX_DRIVER, NULL);
cv_init(&bkt->bk_cv, NULL, CV_DEFAULT, NULL);
ss->ei_lso = bkt;
kt = thread_create(NULL, 0, eib_monitor_lso_bufs, ss, 0,
&p0, TS_RUN, minclsyspri);
ss->ei_lsobufs_monitor = kt->t_did;
return (EIB_E_SUCCESS);
}
static void
eib_rsrc_init_wqe_pool(eib_t *ss, eib_wqe_pool_t **wpp, ib_memlen_t bufsz,
int wp_type)
{
eib_wqe_pool_t *wp;
uint_t wp_wqesz;
int i;
ASSERT(wpp != NULL);
ASSERT(*wpp == NULL);
wp = kmem_zalloc(sizeof (eib_wqe_pool_t), KM_SLEEP);
wp_wqesz = EIB_WQES_PER_POOL * sizeof (eib_wqe_t);
wp->wp_wqe = (eib_wqe_t *)kmem_zalloc(wp_wqesz, KM_SLEEP);
wp->wp_memsz = EIB_WQES_PER_POOL * bufsz;
wp->wp_vaddr = (ib_vaddr_t)(uintptr_t)kmem_zalloc(wp->wp_memsz,
KM_SLEEP);
wp->wp_ss = ss;
wp->wp_type = wp_type;
wp->wp_nfree_lwm = (wp_type == EIB_WP_TYPE_TX) ?
EIB_NFREE_SWQES_LWM : EIB_NFREE_RWQES_LWM;
mutex_init(&wp->wp_lock, NULL, MUTEX_DRIVER, NULL);
cv_init(&wp->wp_cv, NULL, CV_DEFAULT, NULL);
wp->wp_nfree = EIB_WQES_PER_POOL;
wp->wp_free_blks = (EIB_BLKS_PER_POOL >= 64) ? (~0) :
(((uint64_t)1 << EIB_BLKS_PER_POOL) - 1);
for (i = 0; i < EIB_BLKS_PER_POOL; i++)
wp->wp_free_wqes[i] = ~0;
*wpp = wp;
}
static void
eib_rsrc_fini_wqe_pool(eib_t *ss, eib_wqe_pool_t **wpp)
{
eib_wqe_pool_t *wp;
ASSERT(wpp != NULL);
wp = *wpp;
ASSERT(*wpp != NULL);
cv_destroy(&wp->wp_cv);
mutex_destroy(&wp->wp_lock);
kmem_free((void *)(uintptr_t)(wp->wp_vaddr), wp->wp_memsz);
kmem_free(wp->wp_wqe, EIB_WQES_PER_POOL * sizeof (eib_wqe_t));
kmem_free(wp, sizeof (eib_wqe_pool_t));
*wpp = NULL;
}
static boolean_t
eib_rsrc_ok_to_free_pool(eib_t *ss, eib_wqe_pool_t *wp, boolean_t force)
{
uint64_t free_blks;
int i;
ASSERT(wp != NULL);
if (force == B_TRUE)
return (B_TRUE);
mutex_enter(&wp->wp_lock);
free_blks = (EIB_BLKS_PER_POOL >= 64) ? (~0) :
(((uint64_t)1 << EIB_BLKS_PER_POOL) - 1);
if (wp->wp_free_blks != free_blks) {
mutex_exit(&wp->wp_lock);
return (B_FALSE);
}
for (i = 0; i < EIB_BLKS_PER_POOL; i++) {
if (wp->wp_free_wqes[i] != (~0)) {
mutex_exit(&wp->wp_lock);
return (B_FALSE);
}
}
mutex_exit(&wp->wp_lock);
return (B_TRUE);
}
static int
eib_rsrc_grab_wqes(eib_t *ss, eib_wqe_pool_t *wp, eib_wqe_t **wqes,
uint_t n_req, uint_t *actual, int pri)
{
uint_t n_allocd = 0;
int blk;
int ndx;
int wqe_ndx;
ASSERT(wp != NULL);
ASSERT(wqes != NULL);
mutex_enter(&wp->wp_lock);
if (pri == EIB_WPRI_LO) {
if (wp->wp_nfree <= wp->wp_nfree_lwm)
n_req = 0;
else if ((wp->wp_nfree - n_req) < wp->wp_nfree_lwm)
n_req = wp->wp_nfree - wp->wp_nfree_lwm;
}
for (n_allocd = 0; n_allocd < n_req; n_allocd++) {
if (wp->wp_free_blks == 0)
break;
blk = EIB_FIND_LSB_SET(wp->wp_free_blks);
ASSERT(blk != -1);
ndx = EIB_FIND_LSB_SET(wp->wp_free_wqes[blk]);
ASSERT(ndx != -1);
wp->wp_free_wqes[blk] &= (~((uint64_t)1 << ndx));
if (wp->wp_free_wqes[blk] == 0)
wp->wp_free_blks &= (~((uint64_t)1 << blk));
wqe_ndx = blk * EIB_WQES_PER_BLK + ndx;
wqes[n_allocd] = &(wp->wp_wqe[wqe_ndx]);
}
wp->wp_nfree -= n_allocd;
mutex_exit(&wp->wp_lock);
if (n_allocd == 0)
return (EIB_E_FAILURE);
if (actual) {
*actual = n_allocd;
}
return (EIB_E_SUCCESS);
}
static void
eib_rsrc_return_wqes(eib_t *ss, eib_wqe_pool_t *wp, eib_wqe_t **wqes,
uint_t n_wqes)
{
eib_wqe_t *wqe;
uint_t n_freed = 0;
uint_t blk;
uint_t ndx;
ASSERT(wp != NULL);
ASSERT(wqes != NULL);
mutex_enter(&wp->wp_lock);
for (n_freed = 0; n_freed < n_wqes; n_freed++) {
wqe = wqes[n_freed];
wqe->qe_sgl.ds_va = (ib_vaddr_t)(uintptr_t)wqe->qe_cpbuf;
wqe->qe_sgl.ds_len = wqe->qe_bufsz;
wqe->qe_mp = NULL;
wqe->qe_chan = NULL;
wqe->qe_vnic_inst = -1;
wqe->qe_info &= (~EIB_WQEFLGS_MASK);
blk = EIB_WQE_BLK(wqe->qe_info);
ndx = EIB_WQE_NDX(wqe->qe_info);
wp->wp_free_wqes[blk] |= ((uint64_t)1 << ndx);
wp->wp_free_blks |= ((uint64_t)1 << blk);
}
wp->wp_nfree += n_freed;
if ((wp->wp_type == EIB_WP_TYPE_TX) &&
((wp->wp_nfree - n_freed) < EIB_NFREE_SWQES_HWM) &&
(wp->wp_nfree >= EIB_NFREE_SWQES_HWM)) {
cv_broadcast(&wp->wp_cv);
}
mutex_exit(&wp->wp_lock);
}
static void
eib_rb_rsrc_setup_txbufs(eib_t *ss, boolean_t force)
{
eib_wqe_pool_t *wp = ss->ei_tx;
eib_wqe_t *wqe;
ibt_ud_dest_hdl_t dest;
ibt_status_t ret;
uint8_t *plhdr;
int i;
if (wp == NULL)
return;
if (eib_rsrc_ok_to_free_pool(ss, wp, force)) {
eib_stop_monitor_tx_wqes(ss);
for (i = 0; i < EIB_WQES_PER_POOL; i++) {
wqe = &wp->wp_wqe[i];
if ((plhdr = wqe->qe_payload_hdr) != NULL) {
kmem_free(plhdr, EIB_MAX_PAYLOAD_HDR_SZ);
}
if ((dest = wqe->qe_dest) != NULL) {
ret = ibt_free_ud_dest(dest);
if (ret != IBT_SUCCESS) {
EIB_DPRINTF_WARN(ss->ei_instance,
"eib_rb_rsrc_setup_txbufs: "
"ibt_free_ud_dest() failed, ret=%d",
ret);
}
}
}
if (wp->wp_mr) {
if ((ret = ibt_deregister_mr(ss->ei_hca_hdl,
wp->wp_mr)) != IBT_SUCCESS) {
EIB_DPRINTF_WARN(ss->ei_instance,
"eib_rb_rsrc_setup_txbufs: "
"ibt_deregister_mr() failed, ret=%d", ret);
}
wp->wp_mr = NULL;
}
eib_rsrc_fini_wqe_pool(ss, &ss->ei_tx);
}
}
void
eib_rb_rsrc_setup_rxbufs(eib_t *ss, boolean_t force)
{
eib_wqe_pool_t *rx = ss->ei_rx;
ibt_status_t ret;
if (rx == NULL)
return;
if (eib_rsrc_ok_to_free_pool(ss, rx, force)) {
if (rx->wp_mr) {
if ((ret = ibt_deregister_mr(ss->ei_hca_hdl,
rx->wp_mr)) != IBT_SUCCESS) {
EIB_DPRINTF_WARN(ss->ei_instance,
"eib_rb_rsrc_setup_rxbufs: "
"ibt_deregister_mr() failed, ret=%d", ret);
}
rx->wp_mr = NULL;
}
eib_rsrc_fini_wqe_pool(ss, &ss->ei_rx);
}
}
static void
eib_rb_rsrc_setup_lsobufs(eib_t *ss, boolean_t force)
{
eib_lsobkt_t *bkt;
ibt_status_t ret;
if ((bkt = ss->ei_lso) == NULL)
return;
if (eib_stop_monitor_lso_bufs(ss, force) != EIB_E_SUCCESS)
return;
if (bkt->bk_bufl) {
kmem_free(bkt->bk_bufl, bkt->bk_nelem * sizeof (eib_lsobuf_t));
bkt->bk_bufl = NULL;
}
if (bkt->bk_mr_hdl) {
if ((ret = ibt_deregister_mr(ss->ei_hca_hdl,
bkt->bk_mr_hdl)) != IBT_SUCCESS) {
EIB_DPRINTF_WARN(ss->ei_instance,
"eib_rb_rsrc_setup_lsobufs: "
"ibt_deregister_mr() failed, ret=%d", ret);
}
bkt->bk_mr_hdl = NULL;
}
if (bkt->bk_mem) {
kmem_free(bkt->bk_mem, bkt->bk_nelem * EIB_LSO_BUFSZ);
bkt->bk_mem = NULL;
}
cv_destroy(&bkt->bk_cv);
mutex_destroy(&bkt->bk_lock);
kmem_free(bkt, sizeof (eib_lsobkt_t));
ss->ei_lso = NULL;
}