#include <sys/param.h>
#include <sys/linker_set.h>
#include <sys/select.h>
#include <sys/uio.h>
#include <sys/ioctl.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <sys/sysmacros.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>
#include <assert.h>
#include <pthread.h>
#include <pthread_np.h>
#include "bhyverun.h"
#include "config.h"
#include "debug.h"
#include "pci_emul.h"
#include "mevent.h"
#include "virtio.h"
#include "net_utils.h"
#include "net_backends.h"
#include "iov.h"
#define VTNET_RINGSZ 1024
#define VTNET_MAXSEGS 256
#define VTNET_MAX_PKT_LEN (65536 + 64)
#define VTNET_MIN_MTU ETHERMIN
#define VTNET_MAX_MTU 65535
#define VTNET_S_HOSTCAPS_MODERN \
(VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \
VIRTIO_RING_F_INDIRECT_DESC | VIRTIO_NET_F_MRG_RXBUF)
#define VTNET_S_HOSTCAPS_LEGACY \
(VTNET_S_HOSTCAPS_MODERN | VIRTIO_F_NOTIFY_ON_EMPTY)
#define VTNET_RXQ 0
#define VTNET_TXQ 1
#define VTNET_CTLQ 2
#define VTNET_MAXQ 3
static int pci_vtnet_debug;
#define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params
#define WPRINTF(params) PRINTLN params
struct pci_vtnet_softc {
struct virtio_softc vsc_vs;
struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
pthread_mutex_t vsc_mtx;
net_backend_t *vsc_be;
bool features_negotiated;
int resetting;
uint64_t vsc_features;
pthread_mutex_t rx_mtx;
int rx_merge;
pthread_t tx_tid;
pthread_mutex_t tx_mtx;
pthread_cond_t tx_cond;
int tx_in_progress;
size_t vhdrlen;
size_t be_vhdrlen;
struct virtio_net_config vsc_config;
struct virtio_consts vsc_consts;
};
static void pci_vtnet_reset(void *);
static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
static void pci_vtnet_neg_features(void *, uint64_t *);
static virtio_capstr_t vtnet_caps[] = {
{ VIRTIO_NET_F_CSUM, "VIRTIO_NET_F_CSUM" },
{ VIRTIO_NET_F_GUEST_CSUM, "VIRTIO_NET_F_GUEST_CSUM" },
{ VIRTIO_NET_F_MTU, "VIRTIO_NET_F_MTU" },
{ VIRTIO_NET_F_MAC, "VIRTIO_NET_F_MAC" },
{ VIRTIO_NET_F_GSO_DEPREC, "VIRTIO_NET_F_GSO_DEPREC" },
{ VIRTIO_NET_F_GUEST_TSO4, "VIRTIO_NET_F_GUEST_TSO4" },
{ VIRTIO_NET_F_GUEST_TSO6, "VIRTIO_NET_F_GUEST_TSO6" },
{ VIRTIO_NET_F_GUEST_ECN, "VIRTIO_NET_F_GUEST_ECN" },
{ VIRTIO_NET_F_GUEST_UFO, "VIRTIO_NET_F_GUEST_UFO" },
{ VIRTIO_NET_F_HOST_TSO4, "VIRTIO_NET_F_HOST_TSO4" },
{ VIRTIO_NET_F_HOST_TSO6, "VIRTIO_NET_F_HOST_TSO6" },
{ VIRTIO_NET_F_HOST_ECN, "VIRTIO_NET_F_HOST_ECN" },
{ VIRTIO_NET_F_HOST_UFO, "VIRTIO_NET_F_HOST_UFO" },
{ VIRTIO_NET_F_MRG_RXBUF, "VIRTIO_NET_F_MRG_RXBUF" },
{ VIRTIO_NET_F_STATUS, "VIRTIO_NET_F_STATUS" },
{ VIRTIO_NET_F_CTRL_VQ, "VIRTIO_NET_F_CTRL_VQ" },
{ VIRTIO_NET_F_CTRL_RX, "VIRTIO_NET_F_CTRL_RX" },
{ VIRTIO_NET_F_CTRL_VLAN, "VIRTIO_NET_F_CTRL_VLAN" },
{ VIRTIO_NET_F_GUEST_ANNOUNCE, "VIRTIO_NET_F_GUEST_ANNOUNCE" },
{ VIRTIO_NET_F_MQ, "VIRTIO_NET_F_MQ" },
};
static struct virtio_consts vtnet_vi_consts = {
.vc_name = "vtnet",
.vc_nvq = VTNET_MAXQ - 1,
.vc_cfgsize = sizeof (struct virtio_net_config),
.vc_reset = pci_vtnet_reset,
.vc_cfgread = pci_vtnet_cfgread,
.vc_cfgwrite = pci_vtnet_cfgwrite,
.vc_apply_features = pci_vtnet_neg_features,
.vc_hv_caps_legacy = VTNET_S_HOSTCAPS_LEGACY,
.vc_hv_caps_modern = VTNET_S_HOSTCAPS_MODERN,
.vc_capstr = vtnet_caps,
.vc_ncapstr = ARRAY_SIZE(vtnet_caps),
};
static void
pci_vtnet_reset(void *vsc)
{
struct pci_vtnet_softc *sc = vsc;
DPRINTF(("vtnet: device reset requested !"));
pthread_mutex_lock(&sc->rx_mtx);
sc->features_negotiated = false;
netbe_rx_disable(sc->vsc_be);
pthread_mutex_lock(&sc->tx_mtx);
sc->resetting = 1;
while (sc->tx_in_progress) {
pthread_mutex_unlock(&sc->tx_mtx);
usleep(10000);
pthread_mutex_lock(&sc->tx_mtx);
}
vi_reset_dev(&sc->vsc_vs);
sc->resetting = 0;
pthread_mutex_unlock(&sc->tx_mtx);
pthread_mutex_unlock(&sc->rx_mtx);
}
static __inline struct iovec *
iov_trim_hdr(struct iovec *iov, int *iovcnt, unsigned int hlen)
{
struct iovec *riov;
if (iov[0].iov_len < hlen) {
return (NULL);
}
iov[0].iov_len -= hlen;
if (iov[0].iov_len == 0) {
*iovcnt -= 1;
if (*iovcnt == 0) {
return (NULL);
}
riov = &iov[1];
} else {
iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen);
riov = &iov[0];
}
return (riov);
}
struct virtio_mrg_rxbuf_info {
uint16_t idx;
uint16_t pad;
uint32_t len;
};
static void
pci_vtnet_rx(struct pci_vtnet_softc *sc)
{
int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen;
struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS];
struct iovec iov[VTNET_MAXSEGS + 1];
struct vqueue_info *vq;
struct vi_req req;
vq = &sc->vsc_queues[VTNET_RXQ];
if (!sc->features_negotiated) {
return;
}
for (;;) {
struct virtio_net_rxhdr *hdr;
uint32_t riov_bytes;
struct iovec *riov;
uint32_t ulen;
int riov_len;
int n_chains;
ssize_t rlen;
ssize_t plen;
plen = netbe_peek_recvlen(sc->vsc_be);
if (plen <= 0) {
vq_endchains(vq, 0);
return;
}
plen += prepend_hdr_len;
riov_bytes = 0;
riov_len = 0;
riov = iov;
n_chains = 0;
do {
int n = vq_getchain(vq, riov, VTNET_MAXSEGS - riov_len,
&req);
info[n_chains].idx = req.idx;
if (n == 0) {
vq_kick_enable(vq);
if (!vq_has_descs(vq)) {
vq_retchains(vq, n_chains);
vq_endchains(vq, 1);
netbe_rx_disable(sc->vsc_be);
return;
}
vq_kick_disable(vq);
continue;
}
#ifndef __FreeBSD__
if (n == -1) {
vq_retchains(vq, n_chains);
vq_endchains(vq, 0);
return;
}
#endif
assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS);
riov_len += n;
if (!sc->rx_merge) {
n_chains = 1;
break;
}
#ifndef __FreeBSD__
size_t c = count_iov(riov, n);
if (c > UINT32_MAX) {
vq_retchains(vq, n_chains);
vq_endchains(vq, 0);
return;
}
info[n_chains].len = (uint32_t)c;
#else
info[n_chains].len = (uint32_t)count_iov(riov, n);
#endif
riov_bytes += info[n_chains].len;
riov += n;
n_chains++;
} while (riov_bytes < plen && riov_len < VTNET_MAXSEGS);
riov = iov;
#ifdef __FreeBSD__
hdr = riov[0].iov_base;
#else
hdr = (struct virtio_net_rxhdr *)riov[0].iov_base;
#endif
if (prepend_hdr_len > 0) {
riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len);
if (riov == NULL) {
vq_relchain(vq, info[0].idx, 0);
vq_retchains(vq, n_chains - 1);
continue;
}
memset(hdr, 0, prepend_hdr_len);
}
rlen = netbe_recv(sc->vsc_be, riov, riov_len);
if (rlen != plen - prepend_hdr_len) {
WPRINTF((
"netbe_recv: expected %zd bytes, got %zd",
plen - prepend_hdr_len, rlen));
vq_retchains(vq, n_chains);
continue;
}
ulen = (uint32_t)plen;
if (!sc->rx_merge) {
vq_relchain(vq, info[0].idx, ulen);
} else {
uint32_t iolen;
int i = 0;
do {
iolen = info[i].len;
if (iolen > ulen) {
iolen = ulen;
}
vq_relchain_prepare(vq, info[i].idx, iolen);
ulen -= iolen;
i++;
} while (ulen > 0);
hdr->vrh_bufs = i;
vq_relchain_publish(vq);
assert(i == n_chains);
}
}
}
static void
pci_vtnet_rx_callback(int fd __unused, enum ev_type type __unused, void *param)
{
struct pci_vtnet_softc *sc = param;
pthread_mutex_lock(&sc->rx_mtx);
pci_vtnet_rx(sc);
pthread_mutex_unlock(&sc->rx_mtx);
}
static void
pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
{
struct pci_vtnet_softc *sc = vsc;
pthread_mutex_lock(&sc->rx_mtx);
if (!sc->features_negotiated) {
pthread_mutex_unlock(&sc->rx_mtx);
return;
}
vq_kick_disable(vq);
netbe_rx_enable(sc->vsc_be);
pthread_mutex_unlock(&sc->rx_mtx);
}
static void
pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
{
struct iovec iov[VTNET_MAXSEGS + 1];
struct iovec *siov = iov;
struct vi_req req;
ssize_t len;
int n;
n = vq_getchain(vq, iov, VTNET_MAXSEGS, &req);
assert(n >= 1 && n <= VTNET_MAXSEGS);
if (sc->vhdrlen != sc->be_vhdrlen) {
siov = iov_trim_hdr(siov, &n, sc->vhdrlen);
}
if (siov == NULL) {
len = 0;
} else {
len = netbe_send(sc->vsc_be, siov, n);
if (len < 0) {
len = 0;
}
}
vq_relchain(vq, req.idx, len);
}
static void
pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
{
struct pci_vtnet_softc *sc = vsc;
if (!vq_has_descs(vq))
return;
pthread_mutex_lock(&sc->tx_mtx);
vq_kick_disable(vq);
if (sc->tx_in_progress == 0)
pthread_cond_signal(&sc->tx_cond);
pthread_mutex_unlock(&sc->tx_mtx);
}
static void *
pci_vtnet_tx_thread(void *param)
{
struct pci_vtnet_softc *sc = param;
struct vqueue_info *vq;
int error;
vq = &sc->vsc_queues[VTNET_TXQ];
pthread_mutex_lock(&sc->tx_mtx);
error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
assert(error == 0);
for (;;) {
while (sc->resetting || !vq_has_descs(vq)) {
vq_kick_enable(vq);
if (!sc->resetting && vq_has_descs(vq))
break;
sc->tx_in_progress = 0;
error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
assert(error == 0);
}
vq_kick_disable(vq);
sc->tx_in_progress = 1;
pthread_mutex_unlock(&sc->tx_mtx);
do {
pci_vtnet_proctx(sc, vq);
} while (vq_has_descs(vq));
vq_endchains(vq, 1);
pthread_mutex_lock(&sc->tx_mtx);
}
#ifndef __FreeBSD__
return (NULL);
#endif
}
#ifdef notyet
static void
pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
{
DPRINTF(("vtnet: control qnotify!"));
}
#endif
static int
pci_vtnet_free_softstate(struct pci_vtnet_softc *sc, int ret)
{
pthread_mutex_destroy(&sc->vsc_mtx);
if (sc->vsc_be != NULL)
netbe_cleanup(sc->vsc_be);
free(sc);
return (ret);
}
static int
pci_vtnet_init(struct pci_devinst *pi, nvlist_t *nvl)
{
struct pci_vtnet_softc *sc;
const char *value;
char tname[MAXCOMLEN + 1];
unsigned long mtu = ETHERMTU;
int err;
sc = calloc(1, sizeof (struct pci_vtnet_softc));
if (sc == NULL)
return (errno);
if (get_config_bool_default("virtio.net.debug", false))
pci_vtnet_debug = 1;
vi_set_debug(&sc->vsc_vs, pci_vtnet_debug);
sc->vsc_consts = vtnet_vi_consts;
pthread_mutex_init(&sc->vsc_mtx, NULL);
sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
#ifdef notyet
sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
#endif
value = get_config_value_node(nvl, "mac");
if (value != NULL) {
err = net_parsemac(value, sc->vsc_config.vnc_macaddr);
if (err != 0)
return (pci_vtnet_free_softstate(sc, err));
} else
net_genmac(pi, sc->vsc_config.vnc_macaddr);
value = get_config_value_node(nvl, "mtu");
if (value != NULL) {
err = net_parsemtu(value, &mtu);
if (err != 0)
return (pci_vtnet_free_softstate(sc, err));
if (mtu < VTNET_MIN_MTU || mtu > VTNET_MAX_MTU) {
errno = EINVAL;
return (pci_vtnet_free_softstate(sc, errno));
}
sc->vsc_consts.vc_hv_caps_legacy |= VIRTIO_NET_F_MTU;
sc->vsc_consts.vc_hv_caps_modern |= VIRTIO_NET_F_MTU;
}
sc->vsc_config.vnc_mtu = mtu;
if (get_config_value_node(nvl, "backend") != NULL) {
err = netbe_init(&sc->vsc_be, nvl, pci_vtnet_rx_callback, sc);
if (err != 0)
return (pci_vtnet_free_softstate(sc, err));
#ifndef __FreeBSD__
size_t buflen = sizeof (sc->vsc_config.vnc_macaddr);
err = netbe_get_mac(sc->vsc_be, sc->vsc_config.vnc_macaddr,
&buflen);
if (err != 0)
return (pci_vtnet_free_softstate(sc, err));
#endif
}
sc->vsc_consts.vc_hv_caps_legacy |= netbe_get_cap(sc->vsc_be);
sc->vsc_consts.vc_hv_caps_modern |= netbe_get_cap(sc->vsc_be);
sc->vsc_config.vnc_max_qpair = 1;
vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues);
sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
vi_pci_init(pi, VIRTIO_MODE_TRANSITIONAL, VIRTIO_DEV_NET,
VIRTIO_ID_NETWORK, PCIC_NETWORK);
sc->vsc_config.vnc_status = VIRTIO_NET_S_LINK_UP;
if (!vi_intr_init(&sc->vsc_vs, fbsdrun_virtio_msix()))
return (pci_vtnet_free_softstate(sc, EIO));
if (!vi_pcibar_setup(&sc->vsc_vs))
return (pci_vtnet_free_softstate(sc, EIO));
sc->resetting = 0;
sc->rx_merge = 0;
sc->vhdrlen = sizeof (struct virtio_net_rxhdr) - 2;
pthread_mutex_init(&sc->rx_mtx, NULL);
sc->tx_in_progress = 0;
pthread_mutex_init(&sc->tx_mtx, NULL);
pthread_cond_init(&sc->tx_cond, NULL);
pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
snprintf(tname, sizeof (tname), "vtnet-%d:%d tx", pi->pi_slot,
pi->pi_func);
pthread_set_name_np(sc->tx_tid, tname);
return (0);
}
static int
pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
{
struct pci_vtnet_softc *sc = vsc;
void *ptr;
if (offset < (int)sizeof (sc->vsc_config.vnc_macaddr)) {
assert(offset + size <=
(int)sizeof (sc->vsc_config.vnc_macaddr));
ptr = &sc->vsc_config.vnc_macaddr[offset];
memcpy(ptr, &value, size);
vq_devcfg_changed(&sc->vsc_vs);
} else {
DPRINTF(("vtnet: write to readonly reg %d", offset));
}
return (0);
}
static int
pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
{
struct pci_vtnet_softc *sc = vsc;
void *ptr;
ptr = (uint8_t *)&sc->vsc_config + offset;
memcpy(retval, ptr, size);
return (0);
}
static void
pci_vtnet_neg_features(void *vsc, uint64_t *negotiated_features)
{
struct pci_vtnet_softc *sc = vsc;
sc->vsc_features = *negotiated_features;
if ((*negotiated_features & VIRTIO_NET_F_MRG_RXBUF) != 0) {
sc->vhdrlen = sizeof (struct virtio_net_rxhdr);
sc->rx_merge = 1;
} else {
sc->vhdrlen = sizeof (struct virtio_net_rxhdr);
if (!vi_is_modern(&sc->vsc_vs))
sc->vhdrlen -= 2;
sc->rx_merge = 0;
}
netbe_set_cap(sc->vsc_be, *negotiated_features, sc->vhdrlen);
sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be);
assert(sc->be_vhdrlen == 0 || sc->be_vhdrlen == sc->vhdrlen);
pthread_mutex_lock(&sc->rx_mtx);
sc->features_negotiated = true;
pthread_mutex_unlock(&sc->rx_mtx);
}
static const struct pci_devemu pci_de_vnet = {
.pe_emu = "virtio-net",
.pe_init = pci_vtnet_init,
.pe_legacy_config = netbe_legacy_config,
.pe_cfgwrite = vi_pci_cfgwrite,
.pe_cfgread = vi_pci_cfgread,
.pe_barwrite = vi_pci_write,
.pe_barread = vi_pci_read,
};
PCI_EMUL_SET(pci_de_vnet);