#include <sys/types.h>
#include <sys/strsubr.h>
#include <sys/dlpi.h>
#include <sys/pattr.h>
#include <sys/vlan.h>
#include "viona_impl.h"
#define VTNET_MAXSEGS 32
#define MIN_BUF_SIZE 60
#define NEED_VLAN_PAD_SIZE (MIN_BUF_SIZE - VLAN_TAGSZ)
static mblk_t *viona_vlan_pad_mp;
void
viona_rx_init(void)
{
mblk_t *mp;
ASSERT(viona_vlan_pad_mp == NULL);
mp = allocb_wait(VLAN_TAGSZ, BPRI_HI, STR_NOSIG, NULL);
bzero(mp->b_rptr, VLAN_TAGSZ);
mp->b_wptr += VLAN_TAGSZ;
viona_vlan_pad_mp = mp;
}
void
viona_rx_fini(void)
{
mblk_t *mp;
mp = viona_vlan_pad_mp;
viona_vlan_pad_mp = NULL;
VERIFY(mp != NULL && mp->b_cont == NULL);
freemsg(mp);
}
void
viona_worker_rx(viona_vring_t *ring, viona_link_t *link)
{
(void) thread_vsetname(curthread, "viona_rx_%u_%p",
ring->vr_index, ring);
ASSERT(MUTEX_HELD(&ring->vr_lock));
ASSERT3U(ring->vr_state, ==, VRS_RUN);
viona_ring_disable_notify(ring);
do {
if (vmm_drv_lease_expired(ring->vr_lease)) {
ring->vr_state_flags |= VRSF_RENEW;
mutex_exit(&ring->vr_lock);
mac_rx_barrier(link->l_mch);
mutex_enter(&ring->vr_lock);
if (!viona_ring_lease_renew(ring)) {
break;
}
ring->vr_state_flags &= ~VRSF_RENEW;
}
(void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock);
} while (!vring_need_bail(ring));
ring->vr_state = VRS_STOP;
mutex_exit(&ring->vr_lock);
mac_rx_barrier(link->l_mch);
mutex_enter(&ring->vr_lock);
if (ring->vr_lease != NULL) {
viona_ring_enable_notify(ring);
}
}
static size_t
viona_copy_mblk(const mblk_t *mp, size_t seek, caddr_t buf, size_t len,
boolean_t *end)
{
size_t copied = 0;
size_t off = 0;
while (seek > 0 && mp != NULL) {
const size_t chunk = MBLKL(mp);
if (chunk > seek) {
off = seek;
break;
}
mp = mp->b_cont;
seek -= chunk;
}
while (mp != NULL) {
const size_t chunk = MBLKL(mp) - off;
const size_t to_copy = MIN(chunk, len);
bcopy(mp->b_rptr + off, buf, to_copy);
copied += to_copy;
buf += to_copy;
len -= to_copy;
if (chunk == to_copy) {
mp = mp->b_cont;
off = 0;
}
#ifdef DEBUG
else {
ASSERT0(len);
}
#endif
if (len == 0) {
break;
}
}
*end = (mp == NULL);
return (copied);
}
static int
viona_recv_plain(viona_vring_t *ring, const mblk_t *mp, size_t msz)
{
struct iovec iov[VTNET_MAXSEGS];
uint16_t cookie;
int n;
const size_t hdr_sz = ring->vr_link->l_modern ?
sizeof (struct virtio_net_mrgrxhdr) :
sizeof (struct virtio_net_hdr);
struct virtio_net_mrgrxhdr *hdr;
size_t len, copied = 0;
caddr_t buf = NULL;
boolean_t end = B_FALSE;
const uint32_t features = ring->vr_link->l_features;
vmm_page_t *pages = NULL;
ASSERT(msz >= MIN_BUF_SIZE);
n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &pages, NULL);
if (n <= 0) {
return (ENOSPC);
}
if (iov[0].iov_len < hdr_sz) {
bzero(iov[0].iov_base, iov[0].iov_len);
goto bad_frame;
}
hdr = (struct virtio_net_mrgrxhdr *)iov[0].iov_base;
if (iov[0].iov_len > hdr_sz) {
buf = (caddr_t)iov[0].iov_base + hdr_sz;
len = iov[0].iov_len - hdr_sz;
copied += viona_copy_mblk(mp, copied, buf, len, &end);
}
for (int i = 1; i < n && !end; i++) {
buf = (caddr_t)iov[i].iov_base;
len = iov[i].iov_len;
copied += viona_copy_mblk(mp, copied, buf, len, &end);
}
if (copied != msz) {
VIONA_PROBE5(too_short, viona_vring_t *, ring,
uint16_t, cookie, mblk_t *, mp, size_t, copied,
size_t, msz);
VIONA_RING_STAT_INCR(ring, too_short);
goto bad_frame;
}
bzero(hdr, hdr_sz);
if (hdr_sz > offsetof(struct virtio_net_mrgrxhdr, vrh_bufs))
hdr->vrh_bufs = 1;
copied += hdr_sz;
if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) {
uint32_t cksum_flags;
if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) &&
((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) {
hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4;
hdr->vrh_gso_size = DB_LSOMSS(mp);
}
mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL,
&cksum_flags);
if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) {
hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID;
}
}
vmm_drv_page_release_chain(pages);
vq_pushchain(ring, copied, cookie);
return (0);
bad_frame:
VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring, uint16_t, cookie,
mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, bad_rx_frame);
vmm_drv_page_release_chain(pages);
vq_pushchain(ring, MAX(copied, MIN_BUF_SIZE + hdr_sz), cookie);
return (EINVAL);
}
static int
viona_recv_merged(viona_vring_t *ring, const mblk_t *mp, size_t msz)
{
struct iovec iov[VTNET_MAXSEGS];
used_elem_t uelem[VTNET_MAXSEGS];
vmm_page_t *pages = NULL, *hdr_pages = NULL;
int n, i = 0, buf_idx = 0, err = 0;
uint16_t cookie;
caddr_t buf;
size_t len, copied = 0, chunk = 0;
struct virtio_net_mrgrxhdr *hdr = NULL;
const size_t hdr_sz = sizeof (struct virtio_net_mrgrxhdr);
boolean_t end = B_FALSE;
const uint32_t features = ring->vr_link->l_features;
ASSERT(msz >= MIN_BUF_SIZE);
n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie, &hdr_pages, NULL);
if (n <= 0) {
VIONA_PROBE2(no_space, viona_vring_t *, ring, mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, no_space);
return (ENOSPC);
}
if (iov[0].iov_len < hdr_sz) {
bzero(iov[0].iov_base, iov[0].iov_len);
uelem[0].id = cookie;
uelem[0].len = iov[0].iov_len;
err = EINVAL;
goto done;
}
hdr = (struct virtio_net_mrgrxhdr *)iov[0].iov_base;
bzero(hdr, hdr_sz);
hdr->vrh_bufs = 1;
if (iov[0].iov_len > hdr_sz) {
buf = iov[0].iov_base + hdr_sz;
len = iov[0].iov_len - hdr_sz;
size_t copy_len;
copy_len = viona_copy_mblk(mp, copied, buf, len, &end);
chunk += copy_len;
copied += copy_len;
}
i = 1;
do {
while (i < n && !end) {
buf = iov[i].iov_base;
len = iov[i].iov_len;
size_t copy_len;
copy_len = viona_copy_mblk(mp, copied, buf, len, &end);
chunk += copy_len;
copied += copy_len;
i++;
}
uelem[buf_idx].id = cookie;
uelem[buf_idx].len = chunk;
if (!end) {
if (buf_idx == (VTNET_MAXSEGS - 1)) {
err = EOVERFLOW;
break;
}
if (pages != NULL) {
vmm_drv_page_release_chain(pages);
pages = NULL;
}
n = vq_popchain(ring, iov, VTNET_MAXSEGS, &cookie,
&pages, NULL);
if (n <= 0) {
err = EMSGSIZE;
break;
}
chunk = 0;
i = 0;
buf_idx++;
hdr->vrh_bufs++;
}
} while (!end && copied < msz);
uelem[0].len += hdr_sz;
if (err == 0 && copied != msz) {
VIONA_PROBE5(too_short, viona_vring_t *, ring,
uint16_t, cookie, mblk_t *, mp, size_t, copied,
size_t, msz);
VIONA_RING_STAT_INCR(ring, too_short);
err = EINVAL;
}
if ((features & VIRTIO_NET_F_GUEST_CSUM) != 0) {
uint32_t cksum_flags;
if (((features & VIRTIO_NET_F_GUEST_TSO4) != 0) &&
((DB_CKSUMFLAGS(mp) & HW_LSO) != 0)) {
hdr->vrh_gso_type |= VIRTIO_NET_HDR_GSO_TCPV4;
hdr->vrh_gso_size = DB_LSOMSS(mp);
}
mac_hcksum_get((mblk_t *)mp, NULL, NULL, NULL, NULL,
&cksum_flags);
if ((cksum_flags & HCK_FULLCKSUM_OK) != 0) {
hdr->vrh_flags |= VIRTIO_NET_HDR_F_DATA_VALID;
}
}
done:
switch (err) {
case 0:
break;
case EMSGSIZE:
VIONA_PROBE3(rx_merge_underrun, viona_vring_t *, ring,
uint16_t, cookie, mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, rx_merge_underrun);
break;
case EOVERFLOW:
VIONA_PROBE3(rx_merge_overrun, viona_vring_t *, ring,
uint16_t, cookie, mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, rx_merge_overrun);
break;
default:
VIONA_PROBE3(bad_rx_frame, viona_vring_t *, ring,
uint16_t, cookie, mblk_t *, mp);
VIONA_RING_STAT_INCR(ring, bad_rx_frame);
}
if (hdr_pages != NULL) {
vmm_drv_page_release_chain(hdr_pages);
}
if (pages != NULL) {
vmm_drv_page_release_chain(pages);
}
vq_pushchain_many(ring, buf_idx + 1, uelem);
return (err);
}
static void
viona_rx_common(viona_vring_t *ring, mblk_t *mp, boolean_t is_loopback)
{
viona_link_t *link = ring->vr_link;
mblk_t *mprx = NULL, **mprx_prevp = &mprx;
mblk_t *mpdrop = NULL, **mpdrop_prevp = &mpdrop;
const boolean_t do_merge =
(link->l_features & VIRTIO_NET_F_MRG_RXBUF) != 0;
const boolean_t allow_gro =
(link->l_features & VIRTIO_NET_F_GUEST_TSO4) != 0;
size_t cnt_accept = 0, size_accept = 0, cnt_drop = 0;
while (mp != NULL) {
mblk_t *next = mp->b_next;
mblk_t *pad = NULL;
size_t size = msgsize(mp);
int err = 0;
mp->b_next = NULL;
if (VNETHOOK_INTERESTED_IN(link->l_neti) &&
viona_hook(link, ring, &mp, B_FALSE) != 0) {
if (mp != NULL) {
*mpdrop_prevp = mp;
mpdrop_prevp = &mp->b_next;
} else {
cnt_drop++;
}
mp = next;
continue;
}
if (size > sizeof (struct ether_header) + link->l_mtu) {
const boolean_t can_emu_lso = DB_LSOMSS(mp) != 0;
const boolean_t attempt_emu =
!allow_gro || size > VIONA_GRO_MAX_PACKET_SIZE;
if ((DB_CKSUMFLAGS(mp) & HW_LSO) == 0 ||
(attempt_emu && !can_emu_lso)) {
VIONA_PROBE3(rx_drop_over_mtu, viona_vring_t *,
ring, mblk_t *, mp, size_t, size);
VIONA_RING_STAT_INCR(ring, rx_drop_over_mtu);
err = E2BIG;
goto pad_drop;
}
if (attempt_emu) {
mblk_t *tail = NULL;
uint_t n_pkts = 0;
if ((DB_CKSUMFLAGS(mp) &
(HCK_FULLCKSUM | HCK_PARTIALCKSUM)) == 0) {
DB_CKSUMFLAGS(mp) |= HCK_FULLCKSUM;
}
mac_ether_offload_info_t meoi;
mac_ether_offload_info(mp, &meoi);
if ((meoi.meoi_flags & MEOI_L2INFO_SET) != 0 &&
meoi.meoi_l3proto == ETHERTYPE_IP) {
DB_CKSUMFLAGS(mp) |= HCK_IPV4_HDRCKSUM;
}
mac_hw_emul(&mp, &tail, &n_pkts, MAC_ALL_EMULS);
if (mp == NULL) {
VIONA_RING_STAT_INCR(ring,
rx_gro_fallback_fail);
viona_ring_stat_error(ring);
mp = next;
continue;
}
VIONA_PROBE4(rx_gro_fallback, viona_vring_t *,
ring, mblk_t *, mp, size_t, size,
uint_t, n_pkts);
VIONA_RING_STAT_INCR(ring, rx_gro_fallback);
ASSERT3P(tail, !=, NULL);
if (tail != mp) {
tail->b_next = next;
next = mp->b_next;
mp->b_next = NULL;
}
size = msgsize(mp);
}
}
if (size == NEED_VLAN_PAD_SIZE) {
ASSERT(MBLKL(viona_vlan_pad_mp) == VLAN_TAGSZ);
ASSERT(viona_vlan_pad_mp->b_cont == NULL);
for (pad = mp; pad->b_cont != NULL; pad = pad->b_cont)
;
pad->b_cont = viona_vlan_pad_mp;
size += VLAN_TAGSZ;
} else if (size < MIN_BUF_SIZE) {
const size_t pad_size = MIN_BUF_SIZE - size;
mblk_t *zero_mp;
zero_mp = allocb(pad_size, BPRI_MED);
if (zero_mp == NULL) {
err = ENOMEM;
goto pad_drop;
}
VIONA_PROBE3(rx_pad_short, viona_vring_t *, ring,
mblk_t *, mp, size_t, pad_size);
VIONA_RING_STAT_INCR(ring, rx_pad_short);
zero_mp->b_wptr += pad_size;
bzero(zero_mp->b_rptr, pad_size);
linkb(mp, zero_mp);
size += pad_size;
}
if (do_merge) {
err = viona_recv_merged(ring, mp, size);
} else {
err = viona_recv_plain(ring, mp, size);
}
if (pad != NULL) {
pad->b_cont = NULL;
}
pad_drop:
if (err != 0) {
*mpdrop_prevp = mp;
mpdrop_prevp = &mp->b_next;
if (err == ENOSPC) {
mp->b_next = next;
break;
} else {
viona_ring_stat_error(ring);
}
} else {
*mprx_prevp = mp;
mprx_prevp = &mp->b_next;
cnt_accept++;
size_accept += size;
VIONA_PROBE3(pkt__rx, viona_vring_t *, ring, mblk_t, mp,
size_t, size)
}
mp = next;
}
membar_enter();
viona_intr_ring(ring, B_FALSE);
if (mprx != NULL) {
freemsgchain(mprx);
}
mp = mpdrop;
while (mp != NULL) {
mblk_t *next = mp->b_next;
mp->b_next = NULL;
freemsg(mp);
mp = next;
cnt_drop++;
}
if (cnt_accept != 0) {
viona_ring_stat_accept(ring, cnt_accept, size_accept);
}
if (cnt_drop != 0) {
viona_ring_stat_drop(ring, cnt_drop);
}
VIONA_PROBE3(rx, viona_link_t *, link, size_t, cnt_accept,
size_t, cnt_drop);
}
static inline viona_vring_t *
viona_rx_get_ring(viona_link_t *link, const uint8_t idx)
{
viona_vring_t *ring = &link->l_vrings[idx * 2];
ASSERT(VIONA_RING_ISRX(ring));
return (ring);
}
static inline viona_vring_t *
viona_rx_pick_ring(viona_link_t *link, mblk_t *mp)
{
const uint8_t r = (uint8_t)mac_pkt_hash(DL_ETHER, mp,
MAC_PKT_HASH_L3 | MAC_PKT_HASH_L4, B_TRUE) % link->l_usepairs;
return (viona_rx_get_ring(link, r));
}
static inline void
viona_rx_ring_deliver(viona_vring_t *ring, mblk_t *mp,
const boolean_t is_loopback)
{
if (ring->vr_state != VRS_RUN ||
(ring->vr_state_flags & VRSF_RENEW) != 0) {
freemsgchain(mp);
return;
}
viona_rx_common(ring, mp, is_loopback);
}
static inline void
viona_rx_split_deliver(viona_link_t *link, mblk_t *head,
const boolean_t is_loopback)
{
if (link->l_usepairs == 1) {
viona_rx_ring_deliver(viona_rx_get_ring(link, 0), head,
is_loopback);
return;
}
mblk_t *curr = head;
mblk_t *sub_tail = head;
viona_vring_t *ring = NULL;
while (curr != NULL) {
viona_vring_t *my_ring = viona_rx_pick_ring(link, curr);
if (ring != NULL && ring != my_ring) {
sub_tail->b_next = NULL;
viona_rx_ring_deliver(ring, head, is_loopback);
head = curr;
}
ring = my_ring;
sub_tail = curr;
curr = curr->b_next;
}
ASSERT3P(head, !=, NULL);
ASSERT3P(sub_tail, !=, NULL);
ASSERT3P(sub_tail->b_next, ==, NULL);
ASSERT3P(ring, !=, NULL);
viona_rx_ring_deliver(ring, head, is_loopback);
}
static void
viona_rx_classified(void *arg, mac_resource_handle_t mrh __unused, mblk_t *mp,
boolean_t is_loopback)
{
viona_link_t *link = (viona_link_t *)arg;
viona_rx_split_deliver(link, mp, is_loopback);
}
static void
viona_rx_mcast(void *arg, mac_resource_handle_t mrh, mblk_t *mp,
boolean_t is_loopback)
{
viona_link_t *link = (viona_link_t *)arg;
mac_handle_t mh = link->l_mh;
mblk_t *mp_mcast_only = NULL;
mblk_t **mpp = &mp_mcast_only;
while (mp != NULL) {
mblk_t *mp_next;
mac_header_info_t mhi;
int err;
mp_next = mp->b_next;
mp->b_next = NULL;
err = mac_vlan_header_info(mh, mp, &mhi);
if (err != 0) {
mblk_t *pull;
pull = msgpullup(mp, sizeof (struct ether_vlan_header));
if (pull == NULL) {
err = ENOMEM;
} else {
err = mac_vlan_header_info(mh, pull, &mhi);
freemsg(pull);
}
if (err != 0) {
viona_vring_t *my_ring =
viona_rx_pick_ring(link, mp);
VIONA_RING_STAT_INCR(my_ring, rx_mcast_check);
}
}
if (err == 0 && mhi.mhi_dsttype == MAC_ADDRTYPE_MULTICAST) {
*mpp = mp;
mpp = &mp->b_next;
} else {
freemsg(mp);
}
mp = mp_next;
}
if (mp_mcast_only != NULL) {
viona_rx_split_deliver(link, mp_mcast_only, is_loopback);
}
}
int
viona_rx_set(viona_link_t *link, viona_promisc_t mode)
{
int err = 0;
if (link->l_mph != NULL) {
mac_promisc_remove(link->l_mph);
link->l_mph = NULL;
}
switch (mode) {
case VIONA_PROMISC_MULTI:
mac_rx_set(link->l_mch, viona_rx_classified, link);
err = mac_promisc_add(link->l_mch, MAC_CLIENT_PROMISC_MULTI,
viona_rx_mcast, link, &link->l_mph,
MAC_PROMISC_FLAGS_NO_TX_LOOP |
MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
break;
case VIONA_PROMISC_ALL:
mac_rx_clear(link->l_mch);
err = mac_promisc_add(link->l_mch, MAC_CLIENT_PROMISC_ALL,
viona_rx_classified, link, &link->l_mph,
MAC_PROMISC_FLAGS_NO_TX_LOOP |
MAC_PROMISC_FLAGS_VLAN_TAG_STRIP);
if (err != 0) {
mac_rx_set(link->l_mch, viona_rx_classified, link);
}
break;
case VIONA_PROMISC_NONE:
default:
mac_rx_set(link->l_mch, viona_rx_classified, link);
break;
}
return (err);
}
void
viona_rx_clear(viona_link_t *link)
{
if (link->l_mph != NULL) {
mac_promisc_remove(link->l_mph);
link->l_mph = NULL;
}
mac_rx_clear(link->l_mch);
}