#include <sys/types.h>
#include <sys/smt.h>
#include <sys/strsubr.h>
#include <sys/pattr.h>
#include <sys/dlpi.h>
#include <inet/ip.h>
#include <inet/ip_impl.h>
#include "viona_impl.h"
#define BNXE_NIC_DRIVER "bnxe"
boolean_t viona_default_tx_copy = B_TRUE;
uint_t viona_max_header_pad = 256;
kmutex_t viona_force_copy_lock;
static enum viona_force_copy {
VFC_UNINITALIZED = 0,
VFC_COPY_UNEEDED = 1,
VFC_COPY_REQUIRED = 2,
} viona_force_copy_state = VFC_UNINITALIZED;
struct viona_desb {
frtn_t d_frtn;
viona_vring_t *d_ring;
uint_t d_ref;
uint32_t d_len;
uint16_t d_cookie;
uchar_t *d_headers;
vmm_page_t *d_pages;
};
static size_t viona_tx(viona_link_t *, viona_vring_t *);
static void viona_desb_release(viona_desb_t *);
static void
viona_tx_wait_outstanding(viona_vring_t *ring)
{
ASSERT(MUTEX_HELD(&ring->vr_lock));
while (ring->vr_xfer_outstanding != 0) {
cv_wait(&ring->vr_cv, &ring->vr_lock);
}
}
boolean_t
viona_tx_copy_needed(void)
{
boolean_t result;
if (viona_default_tx_copy) {
return (B_TRUE);
}
mutex_enter(&viona_force_copy_lock);
if (viona_force_copy_state == VFC_UNINITALIZED) {
major_t bnxe_major;
viona_force_copy_state = VFC_COPY_UNEEDED;
if ((bnxe_major = ddi_name_to_major(BNXE_NIC_DRIVER))
!= DDI_MAJOR_T_NONE) {
if (ddi_hold_installed_driver(bnxe_major) != NULL) {
viona_force_copy_state = VFC_COPY_REQUIRED;
ddi_rele_driver(bnxe_major);
}
}
}
result = (viona_force_copy_state == VFC_COPY_REQUIRED);
mutex_exit(&viona_force_copy_lock);
return (result);
}
void
viona_tx_ring_alloc(viona_vring_t *ring, const uint16_t qsz)
{
const viona_link_params_t *vlp = &ring->vr_link->l_params;
ring->vr_tx.vrt_header_pad = vlp->vlp_tx_header_pad;
if (!ring->vr_link->l_params.vlp_tx_copy_data) {
viona_desb_t *dp =
kmem_zalloc(sizeof (viona_desb_t) * qsz, KM_SLEEP);
ring->vr_tx.vrt_desb = dp;
const size_t header_sz =
VIONA_MAX_HDRS_LEN + ring->vr_tx.vrt_header_pad;
for (uint_t i = 0; i < qsz; i++, dp++) {
dp->d_frtn.free_func = viona_desb_release;
dp->d_frtn.free_arg = (void *)dp;
dp->d_ring = ring;
dp->d_headers = kmem_zalloc(header_sz, KM_SLEEP);
}
}
ring->vr_tx.vrt_iov = kmem_alloc(sizeof (struct iovec) * qsz, KM_SLEEP);
ring->vr_tx.vrt_iov_cnt = qsz;
}
void
viona_tx_ring_free(viona_vring_t *ring, const uint16_t qsz)
{
if (ring->vr_tx.vrt_desb != NULL) {
viona_desb_t *dp = ring->vr_tx.vrt_desb;
const size_t header_sz =
VIONA_MAX_HDRS_LEN + ring->vr_tx.vrt_header_pad;
for (uint_t i = 0; i < qsz; i++, dp++) {
kmem_free(dp->d_headers, header_sz);
}
kmem_free(ring->vr_tx.vrt_desb, sizeof (viona_desb_t) * qsz);
ring->vr_tx.vrt_desb = NULL;
}
if (ring->vr_tx.vrt_iov != NULL) {
ASSERT3U(ring->vr_tx.vrt_iov_cnt, !=, 0);
kmem_free(ring->vr_tx.vrt_iov,
sizeof (struct iovec) * ring->vr_tx.vrt_iov_cnt);
ring->vr_tx.vrt_iov = NULL;
ring->vr_tx.vrt_iov_cnt = 0;
}
}
static void
viona_tx_done(viona_vring_t *ring, uint32_t len, uint16_t cookie)
{
vq_pushchain(ring, len, cookie);
membar_enter();
viona_intr_ring(ring, B_FALSE);
}
#define TX_BURST_THRESH 32
void
viona_worker_tx(viona_vring_t *ring, viona_link_t *link)
{
(void) thread_vsetname(curthread, "viona_tx_%u_%p",
ring->vr_index, ring);
ASSERT(MUTEX_HELD(&ring->vr_lock));
ASSERT3U(ring->vr_state, ==, VRS_RUN);
mutex_exit(&ring->vr_lock);
for (;;) {
size_t cnt_tx = 0, size_tx = 0;
uint_t burst = 0;
viona_ring_disable_notify(ring);
while (viona_ring_num_avail(ring) != 0) {
const size_t size_sent = viona_tx(link, ring);
if (size_sent != 0) {
size_tx += size_sent;
cnt_tx++;
}
burst++;
if (burst >= TX_BURST_THRESH) {
mutex_enter(&ring->vr_lock);
const bool need_bail = vring_need_bail(ring);
mutex_exit(&ring->vr_lock);
if (need_bail) {
break;
}
burst = 0;
}
}
VIONA_PROBE2(tx, viona_link_t *, link, size_t, cnt_tx);
if (cnt_tx != 0) {
viona_ring_stat_accept(ring, cnt_tx, size_tx);
}
viona_ring_enable_notify(ring);
membar_enter();
if (viona_ring_num_avail(ring) == 0 &&
(link->l_features & VIRTIO_F_RING_NOTIFY_ON_EMPTY) != 0) {
viona_intr_ring(ring, B_TRUE);
}
mutex_enter(&ring->vr_lock);
for (;;) {
if (vring_need_bail(ring)) {
ring->vr_state = VRS_STOP;
viona_tx_wait_outstanding(ring);
return;
}
if (vmm_drv_lease_expired(ring->vr_lease)) {
ring->vr_state_flags |= VRSF_RENEW;
viona_tx_wait_outstanding(ring);
const boolean_t renewed =
viona_ring_lease_renew(ring);
ring->vr_state_flags &= ~VRSF_RENEW;
if (!renewed) {
ring->vr_state = VRS_STOP;
return;
}
}
if (viona_ring_num_avail(ring) != 0) {
break;
}
(void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock);
}
mutex_exit(&ring->vr_lock);
}
}
static void
viona_desb_release(viona_desb_t *dp)
{
viona_vring_t *ring = dp->d_ring;
uint_t ref;
uint32_t len;
uint16_t cookie;
ref = atomic_dec_uint_nv(&dp->d_ref);
if (ref > 1) {
return;
}
len = dp->d_len;
cookie = dp->d_cookie;
dp->d_len = 0;
dp->d_cookie = 0;
vmm_drv_page_release_chain(dp->d_pages);
dp->d_pages = NULL;
membar_exit();
dp->d_ref = 0;
viona_tx_done(ring, len, cookie);
mutex_enter(&ring->vr_lock);
if ((--ring->vr_xfer_outstanding) == 0) {
cv_broadcast(&ring->vr_cv);
}
mutex_exit(&ring->vr_lock);
}
static boolean_t
viona_tx_csum_req_valid(const struct virtio_net_mrgrxhdr *hdr,
const mac_ether_offload_info_t *meoi, uint_t copied_len)
{
const uint_t csum_off = hdr->vrh_csum_offset + hdr->vrh_csum_start;
if (hdr->vrh_csum_start >= meoi->meoi_len ||
hdr->vrh_csum_start < meoi->meoi_l2hlen ||
csum_off >= meoi->meoi_len ||
(csum_off + sizeof (uint16_t)) > copied_len) {
return (B_FALSE);
}
return (B_TRUE);
}
static void
viona_tx_hcksum_full(mblk_t *mp, const struct virtio_net_mrgrxhdr *hdr,
const mac_ether_offload_info_t *meoi, uint32_t added_flags)
{
uint16_t *csump = (uint16_t *)
(mp->b_rptr + hdr->vrh_csum_start + hdr->vrh_csum_offset);
*csump = 0;
mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM | added_flags);
}
static void
viona_tx_hcksum_partial(mblk_t *mp, const struct virtio_net_mrgrxhdr *hdr,
const mac_ether_offload_info_t *meoi, uint32_t added_flags)
{
mac_hcksum_set(mp,
hdr->vrh_csum_start - meoi->meoi_l2hlen,
hdr->vrh_csum_start + hdr->vrh_csum_offset - meoi->meoi_l2hlen,
meoi->meoi_len - meoi->meoi_l2hlen,
0, HCK_PARTIALCKSUM | added_flags);
}
static boolean_t
viona_tx_offloads(viona_vring_t *ring, const struct virtio_net_mrgrxhdr *hdr,
const mac_ether_offload_info_t *meoi, mblk_t *mp, uint32_t len)
{
viona_link_t *link = ring->vr_link;
const uint32_t cap_csum = link->l_cap_csum;
const uint16_t gso_size = LE_16(hdr->vrh_gso_size);
if (!viona_tx_csum_req_valid(hdr, meoi, MBLKL(mp))) {
VIONA_PROBE2(fail_hcksum, viona_link_t *, link, mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, fail_hcksum);
return (B_FALSE);
}
const uint16_t ftype = meoi->meoi_l3proto;
const uint8_t ipproto = meoi->meoi_l4proto;
if (ftype != ETHERTYPE_IP && ftype != ETHERTYPE_IPV6) {
VIONA_PROBE2(fail_hcksum_proto, viona_link_t *, link,
mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, fail_hcksum_proto);
return (B_FALSE);
}
if ((hdr->vrh_gso_type & VIRTIO_NET_HDR_GSO_TCPV4) != 0 &&
ftype == ETHERTYPE_IP) {
if ((link->l_features & VIRTIO_NET_F_HOST_TSO4) == 0) {
VIONA_PROBE2(tx_gso_fail, viona_link_t *, link,
mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, tx_gso_fail);
return (B_FALSE);
}
lso_info_set(mp, gso_size, HW_LSO);
ASSERT3U(cap_csum &
(HCKSUM_INET_PARTIAL | HCKSUM_INET_FULL_V4), !=, 0);
if ((cap_csum & HCKSUM_INET_FULL_V4) != 0) {
viona_tx_hcksum_full(mp, hdr, meoi, HW_LSO);
} else if ((cap_csum & HCKSUM_INET_PARTIAL) != 0) {
ipha_t *ipha =
(ipha_t *)(mp->b_rptr + meoi->meoi_l2hlen);
uint16_t *cksump =
IPH_TCPH_CHECKSUMP(ipha, IPH_HDR_LENGTH(ipha));
uint32_t cksum = IP_TCP_CSUM_COMP;
const ipaddr_t src = ipha->ipha_src;
const ipaddr_t dst = ipha->ipha_dst;
cksum += (dst >> 16) + (dst & 0xffff) +
(src >> 16) + (src & 0xffff);
cksum = (cksum & 0xffff) + (cksum >> 16);
*cksump = (cksum & 0xffff) + (cksum >> 16);
uint32_t v4csum = 0;
if ((cap_csum & HCKSUM_IPHDRCKSUM) != 0) {
v4csum = HCK_IPV4_HDRCKSUM;
ipha->ipha_hdr_checksum = 0;
}
viona_tx_hcksum_partial(mp, hdr, meoi, HW_LSO | v4csum);
} else {
VIONA_PROBE2(tx_gso_fail, viona_link_t *, link,
mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, tx_gso_fail);
return (B_FALSE);
}
return (B_TRUE);
}
if ((cap_csum & HCKSUM_INET_PARTIAL) != 0 &&
(ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP)) {
viona_tx_hcksum_partial(mp, hdr, meoi, 0);
return (B_TRUE);
}
if (ftype == ETHERTYPE_IP) {
if ((cap_csum & HCKSUM_INET_FULL_V4) != 0 &&
(ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP)) {
viona_tx_hcksum_full(mp, hdr, meoi, 0);
return (B_TRUE);
}
VIONA_PROBE2(fail_hcksum, viona_link_t *, link, mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, fail_hcksum);
return (B_FALSE);
} else if (ftype == ETHERTYPE_IPV6) {
if ((cap_csum & HCKSUM_INET_FULL_V6) != 0 &&
(ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP)) {
viona_tx_hcksum_full(mp, hdr, meoi, 0);
return (B_TRUE);
}
VIONA_PROBE2(fail_hcksum6, viona_link_t *, link, mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, fail_hcksum6);
return (B_FALSE);
}
VIONA_PROBE2(fail_hcksum_proto, viona_link_t *, link, mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, fail_hcksum_proto);
return (B_FALSE);
}
static mblk_t *
viona_tx_alloc_headers(viona_vring_t *ring, uint16_t cookie, viona_desb_t **dpp,
uint32_t len)
{
ASSERT3P(*dpp, ==, NULL);
mblk_t *mp = NULL;
const size_t header_pad = ring->vr_tx.vrt_header_pad;
if (ring->vr_tx.vrt_desb != NULL) {
viona_desb_t *dp = &ring->vr_tx.vrt_desb[cookie];
const size_t header_sz = VIONA_MAX_HDRS_LEN + header_pad;
if (atomic_cas_uint(&dp->d_ref, 0, 1) != 0) {
return (NULL);
}
dp->d_cookie = cookie;
mp = desballoc(dp->d_headers, header_sz, 0, &dp->d_frtn);
if (mp != NULL) {
dp->d_ref++;
*dpp = dp;
} else {
dp->d_ref = 0;
}
} else {
mp = allocb(len + header_pad, 0);
}
if (mp != NULL && header_pad != 0) {
mp->b_rptr = mp->b_wptr = (DB_BASE(mp) + header_pad);
}
return (mp);
}
static boolean_t
viona_tx_copy_headers(viona_vring_t *ring, iov_bunch_t *iob, mblk_t *mp,
mac_ether_offload_info_t *meoi)
{
ASSERT(mp->b_cont == NULL);
if (ring->vr_tx.vrt_desb == NULL) {
const uint32_t pkt_size = iob->ib_remain;
VERIFY(MBLKTAIL(mp) >= pkt_size);
VERIFY(iov_bunch_copy(iob, mp->b_wptr, pkt_size));
mp->b_wptr += pkt_size;
mac_ether_offload_info(mp, meoi);
return (B_TRUE);
}
const uint32_t copy_sz = MIN(iob->ib_remain, MBLKTAIL(mp));
VERIFY(iov_bunch_copy(iob, mp->b_wptr, copy_sz));
mp->b_wptr += copy_sz;
if (iob->ib_remain == 0) {
mac_ether_offload_info(mp, meoi);
return (B_TRUE);
}
mac_ether_offload_info(mp, meoi);
if ((meoi->meoi_flags & MEOI_L2INFO_SET) == 0) {
return (B_FALSE);
}
if ((meoi->meoi_flags & MEOI_L4INFO_SET) != 0) {
const uint32_t full_hdr_sz =
meoi->meoi_l2hlen + meoi->meoi_l3hlen + meoi->meoi_l4hlen;
if (copy_sz >= full_hdr_sz) {
return (B_TRUE);
}
}
const uint32_t remain_sz = iob->ib_remain;
mblk_t *remain_mp = allocb(remain_sz, 0);
if (remain_mp == NULL) {
return (B_FALSE);
}
VERIFY(iov_bunch_copy(iob, remain_mp->b_wptr, remain_sz));
remain_mp->b_wptr += remain_sz;
mp->b_cont = remain_mp;
mac_ether_offload_info(mp, meoi);
return (B_TRUE);
}
static size_t
viona_tx(viona_link_t *link, viona_vring_t *ring)
{
struct iovec *iov = ring->vr_tx.vrt_iov;
const uint_t max_segs = ring->vr_tx.vrt_iov_cnt;
uint16_t cookie;
vmm_page_t *pages = NULL;
uint32_t total_len;
mblk_t *mp_head = NULL;
viona_desb_t *dp = NULL;
ASSERT(iov != NULL);
const int n = vq_popchain(ring, iov, max_segs, &cookie, &pages,
&total_len);
if (n == 0) {
VIONA_PROBE1(tx_absent, viona_vring_t *, ring);
VIONA_RING_STAT_INCR(ring, tx_absent);
return (0);
} else if (n < 0) {
return (0);
}
iov_bunch_t iob = {
.ib_iov = iov,
.ib_remain = total_len,
};
struct virtio_net_mrgrxhdr hdr;
uint32_t vio_hdr_len = 0;
if (ring->vr_link->l_modern ||
((link->l_features & VIRTIO_NET_F_MRG_RXBUF) != 0)) {
vio_hdr_len = sizeof (struct virtio_net_mrgrxhdr);
} else {
vio_hdr_len = sizeof (struct virtio_net_hdr);
hdr.vrh_bufs = 0;
}
const uint32_t pkt_len = total_len - vio_hdr_len;
if (!iov_bunch_copy(&iob, &hdr, vio_hdr_len)) {
goto drop_fail;
}
if (pkt_len > VIONA_MAX_PACKET_SIZE ||
pkt_len < sizeof (struct ether_header)) {
goto drop_fail;
}
mp_head = viona_tx_alloc_headers(ring, cookie, &dp, pkt_len);
if (mp_head == NULL) {
goto drop_fail;
}
mac_ether_offload_info_t meoi = { 0 };
if (!viona_tx_copy_headers(ring, &iob, mp_head, &meoi)) {
goto drop_fail;
}
if (dp != NULL && iob.ib_remain != 0) {
uint32_t chunk_sz;
caddr_t chunk;
mblk_t *mp_tail = mp_head;
while (mp_tail->b_cont != NULL) {
mp_tail = mp_tail->b_cont;
}
while (iov_bunch_next_chunk(&iob, &chunk, &chunk_sz)) {
mblk_t *mp = desballoc((uchar_t *)chunk, chunk_sz, 0,
&dp->d_frtn);
if (mp == NULL) {
goto drop_fail;
}
mp->b_wptr += chunk_sz;
dp->d_ref++;
mp_tail->b_cont = mp;
mp_tail = mp;
}
} else {
VERIFY0(iob.ib_remain);
}
if (VNETHOOK_INTERESTED_OUT(link->l_neti)) {
if (dp != NULL)
dp->d_ref++;
mblk_t *mp = mp_head;
if (viona_hook(link, ring, &mp, B_TRUE) != 0) {
if (mp != NULL)
freemsgchain(mp);
goto drop_hook;
}
if (dp != NULL) {
dp->d_ref--;
if (dp->d_ref == 1) {
dp->d_cookie = 0;
dp->d_ref = 0;
dp = NULL;
}
}
}
if ((link->l_features & VIRTIO_NET_F_CSUM) != 0 &&
(hdr.vrh_flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) != 0) {
if (!viona_tx_offloads(ring, &hdr, &meoi, mp_head, pkt_len)) {
viona_ring_stat_error(ring);
}
}
if (dp != NULL) {
dp->d_len = total_len;
dp->d_pages = pages;
mutex_enter(&ring->vr_lock);
ring->vr_xfer_outstanding++;
mutex_exit(&ring->vr_lock);
} else {
vmm_drv_page_release_chain(pages);
viona_tx_done(ring, total_len, cookie);
}
VIONA_PROBE3(pkt__tx, viona_vring_t *, ring, mblk_t, mp_head,
size_t, pkt_len)
smt_begin_unsafe();
(void) mac_tx(link->l_mch, mp_head, 0, MAC_DROP_ON_NO_DESC, NULL);
smt_end_unsafe();
return (pkt_len);
drop_fail:
viona_ring_stat_error(ring);
if (dp != NULL) {
dp->d_ref++;
}
freemsgchain(mp_head);
drop_hook:
if (dp != NULL) {
VERIFY(dp->d_ref == 2);
dp->d_len = 0;
dp->d_cookie = 0;
dp->d_ref = 0;
}
viona_ring_stat_drop(ring, 1);
VIONA_PROBE3(tx_drop, viona_vring_t *, ring, uint32_t, pkt_len,
uint16_t, cookie);
vmm_drv_page_release_chain(pages);
viona_tx_done(ring, total_len, cookie);
return (0);
}