#include <sys/rds.h>
#include <sys/ib/clients/rdsv3/rdsv3.h>
#include <sys/ib/clients/rdsv3/rdma.h>
#include <sys/ib/clients/rdsv3/ib.h>
#include <sys/ib/clients/rdsv3/rdsv3_debug.h>
static void
rdsv3_ib_send_rdma_complete(struct rdsv3_message *rm,
int wc_status)
{
int notify_status;
RDSV3_DPRINTF4("rdsv3_ib_send_rdma_complete", "rm: %p, wc_status: %d",
rm, wc_status);
switch (wc_status) {
case IBT_WC_WR_FLUSHED_ERR:
return;
case IBT_WC_SUCCESS:
notify_status = RDS_RDMA_SUCCESS;
break;
case IBT_WC_REMOTE_ACCESS_ERR:
notify_status = RDS_RDMA_REMOTE_ERROR;
break;
default:
notify_status = RDS_RDMA_OTHER_ERROR;
break;
}
rdsv3_rdma_send_complete(rm, notify_status);
RDSV3_DPRINTF4("rdsv3_ib_send_rdma_complete", "rm: %p, wc_status: %d",
rm, wc_status);
}
static void rdsv3_ib_dma_unmap_sg_rdma(struct ib_device *dev,
uint_t num, struct rdsv3_rdma_sg scat[]);
void
rdsv3_ib_send_unmap_rdma(struct rdsv3_ib_connection *ic,
struct rdsv3_rdma_op *op)
{
RDSV3_DPRINTF4("rdsv3_ib_send_unmap_rdma", "ic: %p, op: %p", ic, op);
if (op->r_mapped) {
op->r_mapped = 0;
if (ic->i_cm_id) {
rdsv3_ib_dma_unmap_sg_rdma(ic->i_cm_id->device,
op->r_nents, op->r_rdma_sg);
} else {
rdsv3_ib_dma_unmap_sg_rdma((struct ib_device *)NULL,
op->r_nents, op->r_rdma_sg);
}
}
}
static void
rdsv3_ib_send_unmap_rm(struct rdsv3_ib_connection *ic,
struct rdsv3_ib_send_work *send,
int wc_status)
{
struct rdsv3_message *rm = send->s_rm;
RDSV3_DPRINTF4("rdsv3_ib_send_unmap_rm", "ic %p send %p rm %p\n",
ic, send, rm);
mutex_enter(&rm->m_rs_lock);
if (rm->m_count) {
rdsv3_ib_dma_unmap_sg(ic->i_cm_id->device,
rm->m_sg, rm->m_count);
rm->m_count = 0;
}
mutex_exit(&rm->m_rs_lock);
if (rm->m_rdma_op != NULL) {
rdsv3_ib_send_unmap_rdma(ic, rm->m_rdma_op);
rdsv3_ib_send_rdma_complete(rm, wc_status);
if (rm->m_rdma_op->r_write)
rdsv3_stats_add(s_send_rdma_bytes,
rm->m_rdma_op->r_bytes);
else
rdsv3_stats_add(s_recv_rdma_bytes,
rm->m_rdma_op->r_bytes);
}
rdsv3_message_unmapped(rm);
rdsv3_message_put(rm);
send->s_rm = NULL;
}
void
rdsv3_ib_send_init_ring(struct rdsv3_ib_connection *ic)
{
struct rdsv3_ib_send_work *send;
uint32_t i;
RDSV3_DPRINTF4("rdsv3_ib_send_init_ring", "ic: %p", ic);
for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
send->s_rm = NULL;
send->s_op = NULL;
}
}
void
rdsv3_ib_send_clear_ring(struct rdsv3_ib_connection *ic)
{
struct rdsv3_ib_send_work *send;
uint32_t i;
RDSV3_DPRINTF4("rdsv3_ib_send_clear_ring", "ic: %p", ic);
for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
if (send->s_opcode == 0xdd)
continue;
if (send->s_rm)
rdsv3_ib_send_unmap_rm(ic, send, IBT_WC_WR_FLUSHED_ERR);
if (send->s_op)
rdsv3_ib_send_unmap_rdma(ic, send->s_op);
}
RDSV3_DPRINTF4("rdsv3_ib_send_clear_ring", "Return: ic: %p", ic);
}
void
rdsv3_ib_send_cqe_handler(struct rdsv3_ib_connection *ic, ibt_wc_t *wc)
{
struct rdsv3_connection *conn = ic->conn;
struct rdsv3_ib_send_work *send;
uint32_t completed, polled;
uint32_t oldest;
uint32_t i = 0;
int ret;
RDSV3_DPRINTF4("rdsv3_ib_send_cqe_handler",
"wc wc_id 0x%llx status %u byte_len %u imm_data %u\n",
(unsigned long long)wc->wc_id, wc->wc_status,
wc->wc_bytes_xfer, ntohl(wc->wc_immed_data));
rdsv3_ib_stats_inc(s_ib_tx_cq_event);
if (wc->wc_id == RDSV3_IB_ACK_WR_ID) {
if (ic->i_ack_queued + HZ/2 < jiffies)
rdsv3_ib_stats_inc(s_ib_tx_stalled);
rdsv3_ib_ack_send_complete(ic);
return;
}
oldest = rdsv3_ib_ring_oldest(&ic->i_send_ring);
completed = rdsv3_ib_ring_completed(&ic->i_send_ring,
(wc->wc_id & ~RDSV3_IB_SEND_OP), oldest);
for (i = 0; i < completed; i++) {
send = &ic->i_sends[oldest];
switch (send->s_opcode) {
case IBT_WRC_SEND:
if (send->s_rm)
rdsv3_ib_send_unmap_rm(ic, send,
wc->wc_status);
break;
case IBT_WRC_RDMAW:
case IBT_WRC_RDMAR:
break;
default:
#ifndef __lock_lint
RDSV3_DPRINTF2("rdsv3_ib_send_cq_comp_handler",
"RDS/IB: %s: unexpected opcode "
"0x%x in WR!",
__func__, send->s_opcode);
#endif
break;
}
send->s_opcode = 0xdd;
if (send->s_queued + HZ/2 < jiffies)
rdsv3_ib_stats_inc(s_ib_tx_stalled);
if (wc->wc_status ==
IBT_WC_REMOTE_ACCESS_ERR && send->s_op) {
struct rdsv3_message *rm;
rm = rdsv3_send_get_message(conn, send->s_op);
if (rm) {
if (rm->m_rdma_op != NULL)
rdsv3_ib_send_unmap_rdma(ic,
rm->m_rdma_op);
rdsv3_ib_send_rdma_complete(rm,
wc->wc_status);
rdsv3_message_put(rm);
}
}
oldest = (oldest + 1) % ic->i_send_ring.w_nr;
}
rdsv3_ib_ring_free(&ic->i_send_ring, completed);
clear_bit(RDSV3_LL_SEND_FULL, &conn->c_flags);
if (wc->wc_status != IBT_WC_SUCCESS && rdsv3_conn_up(conn)) {
RDSV3_DPRINTF2("rdsv3_ib_send_cqe_handler",
"send completion on %u.%u.%u.%u "
"had status %u, disconnecting and reconnecting\n",
NIPQUAD(conn->c_faddr), wc->wc_status);
rdsv3_conn_drop(conn);
}
RDSV3_DPRINTF4("rdsv3_ib_send_cqe_handler", "Return: conn: %p", ic);
}
int
rdsv3_ib_send_grab_credits(struct rdsv3_ib_connection *ic,
uint32_t wanted, uint32_t *adv_credits, int need_posted)
{
unsigned int avail, posted, got = 0, advertise;
long oldval, newval;
RDSV3_DPRINTF4("rdsv3_ib_send_grab_credits", "ic: %p, %d %d %d",
ic, wanted, *adv_credits, need_posted);
*adv_credits = 0;
if (!ic->i_flowctl)
return (wanted);
try_again:
advertise = 0;
oldval = newval = atomic_get(&ic->i_credits);
posted = IB_GET_POST_CREDITS(oldval);
avail = IB_GET_SEND_CREDITS(oldval);
RDSV3_DPRINTF5("rdsv3_ib_send_grab_credits",
"wanted (%u): credits=%u posted=%u\n", wanted, avail, posted);
if (avail && !posted)
avail--;
if (avail < wanted) {
struct rdsv3_connection *conn = ic->i_cm_id->context;
set_bit(RDSV3_LL_SEND_FULL, &conn->c_flags);
got = avail;
} else {
got = wanted;
}
newval -= IB_SET_SEND_CREDITS(got);
if (posted && (got || need_posted)) {
advertise = min(posted, RDSV3_MAX_ADV_CREDIT);
newval -= IB_SET_POST_CREDITS(advertise);
}
if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
goto try_again;
*adv_credits = advertise;
RDSV3_DPRINTF4("rdsv3_ib_send_grab_credits", "ic: %p, %d %d %d",
ic, got, *adv_credits, need_posted);
return (got);
}
void
rdsv3_ib_send_add_credits(struct rdsv3_connection *conn, unsigned int credits)
{
struct rdsv3_ib_connection *ic = conn->c_transport_data;
if (credits == 0)
return;
RDSV3_DPRINTF5("rdsv3_ib_send_add_credits",
"credits (%u): current=%u%s\n",
credits,
IB_GET_SEND_CREDITS(atomic_get(&ic->i_credits)),
test_bit(RDSV3_LL_SEND_FULL, &conn->c_flags) ?
", ll_send_full" : "");
atomic_add_32(&ic->i_credits, IB_SET_SEND_CREDITS(credits));
if (test_and_clear_bit(RDSV3_LL_SEND_FULL, &conn->c_flags))
rdsv3_queue_delayed_work(rdsv3_wq, &conn->c_send_w, 0);
ASSERT(!(IB_GET_SEND_CREDITS(credits) >= 16384));
rdsv3_ib_stats_inc(s_ib_rx_credit_updates);
RDSV3_DPRINTF4("rdsv3_ib_send_add_credits",
"Return: conn: %p, credits: %d",
conn, credits);
}
void
rdsv3_ib_advertise_credits(struct rdsv3_connection *conn, unsigned int posted)
{
struct rdsv3_ib_connection *ic = conn->c_transport_data;
RDSV3_DPRINTF4("rdsv3_ib_advertise_credits", "conn: %p, posted: %d",
conn, posted);
if (posted == 0)
return;
atomic_add_32(&ic->i_credits, IB_SET_POST_CREDITS(posted));
if (IB_GET_POST_CREDITS(atomic_get(&ic->i_credits)) >= 16)
set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
}
static inline void
rdsv3_ib_xmit_populate_wr(struct rdsv3_ib_connection *ic,
ibt_send_wr_t *wr, unsigned int pos,
struct rdsv3_scatterlist *scat, unsigned int off, unsigned int length,
int send_flags)
{
ibt_wr_ds_t *sge;
RDSV3_DPRINTF4("rdsv3_ib_xmit_populate_wr",
"ic: %p, wr: %p scat: %p %d %d %d %d",
ic, wr, scat, pos, off, length, send_flags);
wr->wr_id = pos | RDSV3_IB_SEND_OP;
wr->wr_trans = IBT_RC_SRV;
wr->wr_flags = send_flags;
wr->wr_opcode = IBT_WRC_SEND;
if (length != 0) {
int ix, len, assigned;
ibt_wr_ds_t *sgl;
ASSERT(length <= scat->length - off);
sgl = scat->sgl;
if (off != 0) {
while (sgl->ds_len <= off) {
off -= sgl->ds_len;
sgl++;
}
}
ix = 1;
assigned = 0;
len = length;
do {
sge = &wr->wr_sgl[ix++];
sge->ds_va = sgl->ds_va + off;
assigned = min(len, sgl->ds_len - off);
sge->ds_len = assigned;
sge->ds_key = sgl->ds_key;
len -= assigned;
if (len != 0) {
sgl++;
off = 0;
}
} while (len > 0);
wr->wr_nds = ix;
} else {
wr->wr_nds = 1;
}
sge = &wr->wr_sgl[0];
sge->ds_va = ic->i_send_hdrs_dma + (pos * sizeof (struct rdsv3_header));
sge->ds_len = sizeof (struct rdsv3_header);
sge->ds_key = ic->i_mr->lkey;
RDSV3_DPRINTF4("rdsv3_ib_xmit_populate_wr",
"Return: ic: %p, wr: %p scat: %p", ic, wr, scat);
}
int
rdsv3_ib_xmit(struct rdsv3_connection *conn, struct rdsv3_message *rm,
unsigned int hdr_off, unsigned int sg, unsigned int off)
{
struct rdsv3_ib_connection *ic = conn->c_transport_data;
struct ib_device *dev = ic->i_cm_id->device;
struct rdsv3_ib_send_work *send = NULL;
struct rdsv3_ib_send_work *first;
struct rdsv3_ib_send_work *prev;
ibt_send_wr_t *wr;
struct rdsv3_scatterlist *scat;
uint32_t pos;
uint32_t i;
uint32_t work_alloc;
uint32_t credit_alloc;
uint32_t posted;
uint32_t adv_credits = 0;
int send_flags = 0;
int sent;
int ret;
int flow_controlled = 0;
RDSV3_DPRINTF4("rdsv3_ib_xmit", "conn: %p, rm: %p", conn, rm);
ASSERT(!(off % RDSV3_FRAG_SIZE));
ASSERT(!(hdr_off != 0 && hdr_off != sizeof (struct rdsv3_header)));
if (conn->c_loopback &&
rm->m_inc.i_hdr.h_flags & RDSV3_FLAG_CONG_BITMAP) {
rdsv3_cong_map_updated(conn->c_fcong, ~(uint64_t)0);
return (sizeof (struct rdsv3_header) + RDSV3_CONG_MAP_BYTES);
}
#ifndef __lock_lint
if (ntohl(rm->m_inc.i_hdr.h_len) == 0)
i = 1;
else
i = ceil(ntohl(rm->m_inc.i_hdr.h_len), RDSV3_FRAG_SIZE);
#endif
work_alloc = rdsv3_ib_ring_alloc(&ic->i_send_ring, i, &pos);
if (work_alloc != i) {
rdsv3_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
set_bit(RDSV3_LL_SEND_FULL, &conn->c_flags);
rdsv3_ib_stats_inc(s_ib_tx_ring_full);
ret = -ENOMEM;
goto out;
}
credit_alloc = work_alloc;
if (ic->i_flowctl) {
credit_alloc = rdsv3_ib_send_grab_credits(ic, work_alloc,
&posted, 0);
adv_credits += posted;
if (credit_alloc < work_alloc) {
rdsv3_ib_ring_unalloc(&ic->i_send_ring,
work_alloc - credit_alloc);
work_alloc = credit_alloc;
flow_controlled++;
}
if (work_alloc == 0) {
rdsv3_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
rdsv3_ib_stats_inc(s_ib_tx_throttle);
ret = -ENOMEM;
goto out;
}
}
if (ic->i_rm == NULL) {
if (rm->m_nents) {
rm->m_count = rdsv3_ib_dma_map_sg(dev,
rm->m_sg, rm->m_nents);
RDSV3_DPRINTF5("rdsv3_ib_xmit",
"ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
if (rm->m_count == 0) {
rdsv3_ib_stats_inc(s_ib_tx_sg_mapping_failure);
rdsv3_ib_ring_unalloc(&ic->i_send_ring,
work_alloc);
ret = -ENOMEM;
RDSV3_DPRINTF2("rdsv3_ib_xmit",
"fail: ic %p mapping rm %p: %d\n",
ic, rm, rm->m_count);
goto out;
}
} else {
rm->m_count = 0;
}
ic->i_unsignaled_wrs = rdsv3_ib_sysctl_max_unsig_wrs;
ic->i_unsignaled_bytes = rdsv3_ib_sysctl_max_unsig_bytes;
rdsv3_message_addref(rm);
ic->i_rm = rm;
if (test_bit(RDSV3_MSG_ACK_REQUIRED, &rm->m_flags))
rm->m_inc.i_hdr.h_flags |= RDSV3_FLAG_ACK_REQUIRED;
if (test_bit(RDSV3_MSG_RETRANSMITTED, &rm->m_flags))
rm->m_inc.i_hdr.h_flags |= RDSV3_FLAG_RETRANSMITTED;
if (rm->m_rdma_op) {
struct rdsv3_ext_header_rdma ext_hdr;
ext_hdr.h_rdma_rkey = htonl(rm->m_rdma_op->r_key);
(void) rdsv3_message_add_extension(&rm->m_inc.i_hdr,
RDSV3_EXTHDR_RDMA, &ext_hdr,
sizeof (ext_hdr));
}
if (rm->m_rdma_cookie) {
(void) rdsv3_message_add_rdma_dest_extension(
&rm->m_inc.i_hdr,
rdsv3_rdma_cookie_key(rm->m_rdma_cookie),
rdsv3_rdma_cookie_offset(rm->m_rdma_cookie));
}
rm->m_inc.i_hdr.h_ack = htonll(rdsv3_ib_piggyb_ack(ic));
rdsv3_message_make_checksum(&rm->m_inc.i_hdr);
(void) rdsv3_ib_send_grab_credits(ic, 0, &posted, 1);
adv_credits += posted;
ASSERT(adv_credits <= 255);
}
send = &ic->i_sends[pos];
first = send;
prev = NULL;
scat = &rm->m_sg[sg];
sent = 0;
i = 0;
if (rm->m_rdma_op && rm->m_rdma_op->r_fence)
send_flags = IBT_WR_SEND_FENCE;
if (ntohl(rm->m_inc.i_hdr.h_len) == 0) {
wr = &ic->i_send_wrs[0];
rdsv3_ib_xmit_populate_wr(ic, wr, pos, NULL, 0, 0, send_flags);
send->s_queued = jiffies;
send->s_op = NULL;
send->s_opcode = wr->wr_opcode;
goto add_header;
}
for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
unsigned int len;
send = &ic->i_sends[pos];
wr = &ic->i_send_wrs[i];
len = min(RDSV3_FRAG_SIZE,
rdsv3_ib_sg_dma_len(dev, scat) - off);
rdsv3_ib_xmit_populate_wr(ic, wr, pos, scat, off, len,
send_flags);
send->s_queued = jiffies;
send->s_op = NULL;
send->s_opcode = wr->wr_opcode;
if (ic->i_unsignaled_wrs-- == 0) {
ic->i_unsignaled_wrs = rdsv3_ib_sysctl_max_unsig_wrs;
wr->wr_flags |=
IBT_WR_SEND_SIGNAL | IBT_WR_SEND_SOLICIT;
}
ic->i_unsignaled_bytes -= len;
if (ic->i_unsignaled_bytes <= 0) {
ic->i_unsignaled_bytes =
rdsv3_ib_sysctl_max_unsig_bytes;
wr->wr_flags |=
IBT_WR_SEND_SIGNAL | IBT_WR_SEND_SOLICIT;
}
if (flow_controlled && i == (work_alloc-1)) {
wr->wr_flags |=
IBT_WR_SEND_SIGNAL | IBT_WR_SEND_SOLICIT;
}
RDSV3_DPRINTF5("rdsv3_ib_xmit", "send %p wr %p num_sge %u \n",
send, wr, wr->wr_nds);
sent += len;
off += len;
if (off == rdsv3_ib_sg_dma_len(dev, scat)) {
scat++;
off = 0;
}
add_header:
(void) memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr,
sizeof (struct rdsv3_header));
if (0) {
struct rdsv3_header *hdr = &ic->i_send_hdrs[pos];
RDSV3_DPRINTF2("rdsv3_ib_xmit",
"send WR dport=%u flags=0x%x len=%d",
ntohs(hdr->h_dport),
hdr->h_flags,
ntohl(hdr->h_len));
}
if (adv_credits) {
struct rdsv3_header *hdr = &ic->i_send_hdrs[pos];
hdr->h_credit = adv_credits;
rdsv3_message_make_checksum(hdr);
adv_credits = 0;
rdsv3_ib_stats_inc(s_ib_tx_credit_updates);
}
prev = send;
pos = (pos + 1) % ic->i_send_ring.w_nr;
}
if (hdr_off == 0)
sent += sizeof (struct rdsv3_header);
if (scat == &rm->m_sg[rm->m_count]) {
prev->s_rm = ic->i_rm;
wr->wr_flags |= IBT_WR_SEND_SIGNAL | IBT_WR_SEND_SOLICIT;
ic->i_rm = NULL;
}
if (i < work_alloc) {
rdsv3_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
work_alloc = i;
}
if (ic->i_flowctl && i < credit_alloc)
rdsv3_ib_send_add_credits(conn, credit_alloc - i);
ret = ibt_post_send(ib_get_ibt_channel_hdl(ic->i_cm_id),
ic->i_send_wrs, i, &posted);
if (posted != i) {
RDSV3_DPRINTF2("rdsv3_ib_xmit",
"ic %p first %p nwr: %d ret %d:%d",
ic, first, i, ret, posted);
}
if (ret) {
RDSV3_DPRINTF2("rdsv3_ib_xmit",
"RDS/IB: ib_post_send to %u.%u.%u.%u "
"returned %d\n", NIPQUAD(conn->c_faddr), ret);
rdsv3_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
if (prev->s_rm) {
ic->i_rm = prev->s_rm;
prev->s_rm = NULL;
}
RDSV3_DPRINTF2("rdsv3_ib_xmit", "ibt_post_send failed\n");
rdsv3_conn_drop(ic->conn);
ret = -EAGAIN;
goto out;
}
ret = sent;
RDSV3_DPRINTF4("rdsv3_ib_xmit", "Return: conn: %p, rm: %p", conn, rm);
out:
ASSERT(!adv_credits);
return (ret);
}
static void
rdsv3_ib_dma_unmap_sg_rdma(struct ib_device *dev, uint_t num,
struct rdsv3_rdma_sg scat[])
{
ibt_hca_hdl_t hca_hdl;
int i;
int num_sgl;
RDSV3_DPRINTF4("rdsv3_ib_dma_unmap_sg", "rdma_sg: %p", scat);
if (dev) {
hca_hdl = ib_get_ibt_hca_hdl(dev);
} else {
hca_hdl = scat[0].hca_hdl;
RDSV3_DPRINTF2("rdsv3_ib_dma_unmap_sg_rdma",
"NULL dev use cached hca_hdl %p", hca_hdl);
}
if (hca_hdl == NULL)
return;
scat[0].hca_hdl = NULL;
for (i = 0; i < num; i++) {
if (scat[i].mihdl != NULL) {
num_sgl = (scat[i].iovec.bytes / PAGESIZE) + 2;
kmem_free(scat[i].swr.wr_sgl,
(num_sgl * sizeof (ibt_wr_ds_t)));
scat[i].swr.wr_sgl = NULL;
(void) ibt_unmap_mem_iov(hca_hdl, scat[i].mihdl);
scat[i].mihdl = NULL;
} else
break;
}
}
uint_t
rdsv3_ib_dma_map_sg_rdma(struct ib_device *dev, struct rdsv3_rdma_sg scat[],
uint_t num, struct rdsv3_scatterlist **scatl)
{
ibt_hca_hdl_t hca_hdl;
ibt_iov_attr_t iov_attr;
struct buf *bp;
uint_t i, j, k;
uint_t count;
struct rdsv3_scatterlist *sg;
int ret;
RDSV3_DPRINTF4("rdsv3_ib_dma_map_sg_rdma", "scat: %p, num: %d",
scat, num);
hca_hdl = ib_get_ibt_hca_hdl(dev);
scat[0].hca_hdl = hca_hdl;
bzero(&iov_attr, sizeof (ibt_iov_attr_t));
iov_attr.iov_flags = IBT_IOV_BUF;
iov_attr.iov_lso_hdr_sz = 0;
for (i = 0, count = 0; i < num; i++) {
bp = ddi_umem_iosetup(scat[i].umem_cookie,
scat[i].iovec.addr & PAGEOFFSET, scat[i].iovec.bytes,
B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
if (bp == NULL) {
goto out;
}
iov_attr.iov_buf = bp;
iov_attr.iov_wr_nds = (scat[i].iovec.bytes / PAGESIZE) + 2;
scat[i].swr.wr_sgl =
kmem_zalloc(iov_attr.iov_wr_nds * sizeof (ibt_wr_ds_t),
KM_SLEEP);
ret = ibt_map_mem_iov(hca_hdl, &iov_attr,
(ibt_all_wr_t *)&scat[i].swr, &scat[i].mihdl);
freerbuf(bp);
if (ret != IBT_SUCCESS) {
RDSV3_DPRINTF2("rdsv3_ib_dma_map_sg_rdma",
"ibt_map_mem_iov returned: %d", ret);
kmem_free(scat[i].swr.wr_sgl,
iov_attr.iov_wr_nds * sizeof (ibt_wr_ds_t));
goto out;
}
count += scat[i].swr.wr_nds;
#ifdef DEBUG
for (j = 0; j < scat[i].swr.wr_nds; j++) {
RDSV3_DPRINTF5("rdsv3_ib_dma_map_sg_rdma",
"sgl[%d] va %llx len %x", j,
scat[i].swr.wr_sgl[j].ds_va,
scat[i].swr.wr_sgl[j].ds_len);
}
#endif
RDSV3_DPRINTF4("rdsv3_ib_dma_map_sg_rdma",
"iovec.bytes: 0x%x scat[%d]swr.wr_nds: %d",
scat[i].iovec.bytes, i, scat[i].swr.wr_nds);
}
count = ((count - 1) / RDSV3_IB_MAX_SGE) + 1;
RDSV3_DPRINTF4("rdsv3_ib_dma_map_sg_rdma", "Ret: num: %d", count);
return (count);
out:
rdsv3_ib_dma_unmap_sg_rdma(dev, num, scat);
return (0);
}
int
rdsv3_ib_xmit_rdma(struct rdsv3_connection *conn, struct rdsv3_rdma_op *op)
{
struct rdsv3_ib_connection *ic = conn->c_transport_data;
struct rdsv3_ib_send_work *send = NULL;
struct rdsv3_rdma_sg *scat;
uint64_t remote_addr;
uint32_t pos;
uint32_t work_alloc;
uint32_t i, j, k, idx;
uint32_t left, count;
uint32_t posted;
int sent;
ibt_status_t status;
ibt_send_wr_t *wr;
ibt_wr_ds_t *sge;
RDSV3_DPRINTF4("rdsv3_ib_xmit_rdma", "rdsv3_ib_conn: %p", ic);
if (!op->r_mapped) {
op->r_count = rdsv3_ib_dma_map_sg_rdma(ic->i_cm_id->device,
op->r_rdma_sg, op->r_nents, &op->r_sg);
RDSV3_DPRINTF5("rdsv3_ib_xmit_rdma", "ic %p mapping op %p: %d",
ic, op, op->r_count);
if (op->r_count == 0) {
rdsv3_ib_stats_inc(s_ib_tx_sg_mapping_failure);
RDSV3_DPRINTF2("rdsv3_ib_xmit_rdma",
"fail: ic %p mapping op %p: %d",
ic, op, op->r_count);
return (-ENOMEM);
}
op->r_mapped = 1;
}
work_alloc = rdsv3_ib_ring_alloc(&ic->i_send_ring, op->r_count, &pos);
if (work_alloc != op->r_count) {
rdsv3_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
rdsv3_ib_stats_inc(s_ib_tx_ring_full);
return (-ENOMEM);
}
RDSV3_DPRINTF4("rdsv3_ib_xmit_rdma", "pos %u cnt %u", pos, op->r_count);
scat = &op->r_rdma_sg[0];
sent = 0;
remote_addr = op->r_remote_addr;
for (i = 0, k = 0; i < op->r_nents; i++) {
left = scat[i].swr.wr_nds;
for (idx = 0; left > 0; k++) {
send = &ic->i_sends[pos];
send->s_queued = jiffies;
send->s_opcode = op->r_write ? IBT_WRC_RDMAW :
IBT_WRC_RDMAR;
send->s_op = op;
wr = &ic->i_send_wrs[k];
wr->wr_flags = 0;
wr->wr_id = pos | RDSV3_IB_SEND_OP;
wr->wr_trans = IBT_RC_SRV;
wr->wr_opcode = op->r_write ? IBT_WRC_RDMAW :
IBT_WRC_RDMAR;
wr->wr.rc.rcwr.rdma.rdma_raddr = remote_addr;
wr->wr.rc.rcwr.rdma.rdma_rkey = op->r_key;
if (left > RDSV3_IB_MAX_SGE) {
count = RDSV3_IB_MAX_SGE;
left -= RDSV3_IB_MAX_SGE;
} else {
count = left;
left = 0;
}
wr->wr_nds = count;
for (j = 0; j < count; j++) {
sge = &wr->wr_sgl[j];
*sge = scat[i].swr.wr_sgl[idx];
remote_addr += scat[i].swr.wr_sgl[idx].ds_len;
sent += scat[i].swr.wr_sgl[idx].ds_len;
idx++;
RDSV3_DPRINTF5("xmit_rdma",
"send_wrs[%d]sgl[%d] va %llx len %x",
k, j, sge->ds_va, sge->ds_len);
}
RDSV3_DPRINTF5("rdsv3_ib_xmit_rdma",
"wr[%d] %p key: %x code: %d tlen: %d",
k, wr, wr->wr.rc.rcwr.rdma.rdma_rkey,
wr->wr_opcode, sent);
if (ic->i_unsignaled_wrs-- == 0) {
ic->i_unsignaled_wrs =
rdsv3_ib_sysctl_max_unsig_wrs;
wr->wr_flags = IBT_WR_SEND_SIGNAL;
}
pos = (pos + 1) % ic->i_send_ring.w_nr;
}
}
status = ibt_post_send(ib_get_ibt_channel_hdl(ic->i_cm_id),
ic->i_send_wrs, k, &posted);
if (status != IBT_SUCCESS) {
RDSV3_DPRINTF2("rdsv3_ib_xmit_rdma",
"RDS/IB: rdma ib_post_send to %u.%u.%u.%u "
"returned %d", NIPQUAD(conn->c_faddr), status);
rdsv3_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
}
RDSV3_DPRINTF4("rdsv3_ib_xmit_rdma", "Ret: %p", ic);
return (status);
}
void
rdsv3_ib_xmit_complete(struct rdsv3_connection *conn)
{
struct rdsv3_ib_connection *ic = conn->c_transport_data;
RDSV3_DPRINTF4("rdsv3_ib_xmit_complete", "conn: %p", conn);
rdsv3_ib_attempt_ack(ic);
}