#if defined(__FreeBSD__)
#include <sys/cdefs.h>
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/filio.h>
#include <sys/sockio.h>
#include <sys/socketvar.h>
#include <sys/malloc.h>
#include <sys/poll.h>
#include <sys/proc.h>
#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/selinfo.h>
#include <sys/sysctl.h>
#include <sys/jail.h>
#include <sys/epoch.h>
#include <net/vnet.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/bpf.h>
#include <machine/bus.h>
#include <sys/endian.h>
#include <sys/refcount.h>
#include <net/ethernet.h>
#elif defined(linux)
#include "bsd_glue.h"
#elif defined(__APPLE__)
#warning OSX support is only partial
#include "osx_glue.h"
#elif defined (_WIN32)
#include "win_glue.h"
#else
#error Unsupported platform
#endif
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
int netmap_verbose;
#ifdef CONFIG_NETMAP_DEBUG
int netmap_debug;
#endif
static int netmap_no_timestamp;
int netmap_no_pendintr = 1;
int netmap_txsync_retry = 2;
static int netmap_fwd = 0;
enum { NETMAP_ADMODE_BEST = 0,
NETMAP_ADMODE_NATIVE,
NETMAP_ADMODE_GENERIC,
NETMAP_ADMODE_LAST };
static int netmap_admode = NETMAP_ADMODE_BEST;
int netmap_generic_mit = 100*1000;
#ifdef linux
int netmap_generic_txqdisc = 1;
#endif
int netmap_generic_ringsize = 1024;
int netmap_generic_rings = 1;
int netmap_generic_hwcsum = 0;
int ptnet_vnet_hdr = 1;
SYSBEGIN(main_init);
SYSCTL_DECL(_dev_netmap);
SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"Netmap args");
SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
#ifdef CONFIG_NETMAP_DEBUG
SYSCTL_INT(_dev_netmap, OID_AUTO, debug,
CTLFLAG_RW, &netmap_debug, 0, "Debug messages");
#endif
SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, CTLFLAG_RW, &netmap_no_pendintr,
0, "Always look for new received packets.");
SYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
&netmap_txsync_retry, 0, "Number of txsync loops in bridge's flush.");
SYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0,
"Force NR_FORWARD mode");
SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0,
"Adapter mode. 0 selects the best option available,"
"1 forces native adapter, 2 forces emulated adapter");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_hwcsum, CTLFLAG_RW, &netmap_generic_hwcsum,
0, "Hardware checksums. 0 to disable checksum generation by the NIC (default),"
"1 to enable checksum generation by the NIC");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit,
0, "RX notification interval in nanoseconds");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW,
&netmap_generic_ringsize, 0,
"Number of per-ring slots for emulated netmap mode");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW,
&netmap_generic_rings, 0,
"Number of TX/RX queues for emulated netmap adapters");
#ifdef linux
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW,
&netmap_generic_txqdisc, 0, "Use qdisc for generic adapters");
#endif
SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr,
0, "Allow ptnet devices to use virtio-net headers");
SYSEND;
NMG_LOCK_T netmap_global_lock;
static void
netmap_disable_ring(struct netmap_kring *kr, int stopped)
{
nm_kr_stop(kr, stopped);
mtx_lock(&kr->q_lock);
mtx_unlock(&kr->q_lock);
nm_kr_put(kr);
}
void
netmap_set_ring(struct netmap_adapter *na, u_int ring_id, enum txrx t, int stopped)
{
if (stopped)
netmap_disable_ring(NMR(na, t)[ring_id], stopped);
else
NMR(na, t)[ring_id]->nkr_stopped = 0;
}
void
netmap_set_all_rings(struct netmap_adapter *na, int stopped)
{
int i;
enum txrx t;
if (!nm_netmap_on(na))
return;
if (netmap_verbose) {
nm_prinf("%s: %sable all rings", na->name,
(stopped ? "dis" : "en"));
}
for_rx_tx(t) {
for (i = 0; i < netmap_real_rings(na, t); i++) {
netmap_set_ring(na, i, t, stopped);
}
}
}
void
netmap_disable_all_rings(if_t ifp)
{
if (NM_NA_VALID(ifp)) {
netmap_set_all_rings(NA(ifp), NM_KR_LOCKED);
}
}
void
netmap_enable_all_rings(if_t ifp)
{
if (NM_NA_VALID(ifp)) {
netmap_set_all_rings(NA(ifp), 0 );
}
}
void
netmap_make_zombie(if_t ifp)
{
if (NM_NA_VALID(ifp)) {
struct netmap_adapter *na = NA(ifp);
netmap_set_all_rings(na, NM_KR_LOCKED);
na->na_flags |= NAF_ZOMBIE;
netmap_set_all_rings(na, 0);
}
}
void
netmap_undo_zombie(if_t ifp)
{
if (NM_NA_VALID(ifp)) {
struct netmap_adapter *na = NA(ifp);
if (na->na_flags & NAF_ZOMBIE) {
netmap_set_all_rings(na, NM_KR_LOCKED);
na->na_flags &= ~NAF_ZOMBIE;
netmap_set_all_rings(na, 0);
}
}
}
u_int
nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
{
u_int oldv = *v;
const char *op = NULL;
if (dflt < lo)
dflt = lo;
if (dflt > hi)
dflt = hi;
if (oldv < lo) {
*v = dflt;
op = "Bump";
} else if (oldv > hi) {
*v = hi;
op = "Clamp";
}
if (op && msg)
nm_prinf("%s %s to %d (was %d)", op, msg, *v, oldv);
return *v;
}
const char *
nm_dump_buf(char *p, int len, int lim, char *dst)
{
static char _dst[8192];
int i, j, i0;
static char hex[] ="0123456789abcdef";
char *o;
#define P_HI(x) hex[((x) & 0xf0)>>4]
#define P_LO(x) hex[((x) & 0xf)]
#define P_C(x) ((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
if (!dst)
dst = _dst;
if (lim <= 0 || lim > len)
lim = len;
o = dst;
sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
o += strlen(o);
for (i = 0; i < lim; ) {
sprintf(o, "%5d: ", i);
o += strlen(o);
memset(o, ' ', 48);
i0 = i;
for (j=0; j < 16 && i < lim; i++, j++) {
o[j*3] = P_HI(p[i]);
o[j*3+1] = P_LO(p[i]);
}
i = i0;
for (j=0; j < 16 && i < lim; i++, j++)
o[j + 48] = P_C(p[i]);
o[j+48] = '\n';
o += j+49;
}
*o = '\0';
#undef P_HI
#undef P_LO
#undef P_C
return dst;
}
int
netmap_update_config(struct netmap_adapter *na)
{
struct nm_config_info info;
if (na->ifp && !nm_is_bwrap(na)) {
strlcpy(na->name, if_name(na->ifp), sizeof(na->name));
}
bzero(&info, sizeof(info));
if (na->nm_config == NULL ||
na->nm_config(na, &info)) {
info.num_tx_rings = na->num_tx_rings;
info.num_tx_descs = na->num_tx_desc;
info.num_rx_rings = na->num_rx_rings;
info.num_rx_descs = na->num_rx_desc;
info.rx_buf_maxsize = na->rx_buf_maxsize;
}
if (na->num_tx_rings == info.num_tx_rings &&
na->num_tx_desc == info.num_tx_descs &&
na->num_rx_rings == info.num_rx_rings &&
na->num_rx_desc == info.num_rx_descs &&
na->rx_buf_maxsize == info.rx_buf_maxsize)
return 0;
if (na->active_fds == 0) {
na->num_tx_rings = info.num_tx_rings;
na->num_tx_desc = info.num_tx_descs;
na->num_rx_rings = info.num_rx_rings;
na->num_rx_desc = info.num_rx_descs;
na->rx_buf_maxsize = info.rx_buf_maxsize;
if (netmap_verbose)
nm_prinf("configuration changed for %s: txring %d x %d, "
"rxring %d x %d, rxbufsz %d",
na->name, na->num_tx_rings, na->num_tx_desc,
na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize);
return 0;
}
nm_prerr("WARNING: configuration changed for %s while active: "
"txring %d x %d, rxring %d x %d, rxbufsz %d",
na->name, info.num_tx_rings, info.num_tx_descs,
info.num_rx_rings, info.num_rx_descs,
info.rx_buf_maxsize);
return 1;
}
static int netmap_txsync_to_host(struct netmap_kring *kring, int flags);
static int netmap_rxsync_from_host(struct netmap_kring *kring, int flags);
static int
netmap_default_bufcfg(struct netmap_kring *kring, uint64_t target)
{
kring->hwbuf_len = target;
kring->buf_align = 0;
return 0;
}
int
netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
{
u_int i, len, ndesc;
struct netmap_kring *kring;
u_int n[NR_TXRX];
enum txrx t;
int err = 0;
if (na->tx_rings != NULL) {
if (netmap_debug & NM_DEBUG_ON)
nm_prerr("warning: krings were already created");
return 0;
}
n[NR_TX] = netmap_all_rings(na, NR_TX);
n[NR_RX] = netmap_all_rings(na, NR_RX);
len = (n[NR_TX] + n[NR_RX]) *
(sizeof(struct netmap_kring) + sizeof(struct netmap_kring *))
+ tailroom;
na->tx_rings = nm_os_malloc((size_t)len);
if (na->tx_rings == NULL) {
nm_prerr("Cannot allocate krings");
return ENOMEM;
}
na->rx_rings = na->tx_rings + n[NR_TX];
na->tailroom = na->rx_rings + n[NR_RX];
kring = (struct netmap_kring *)((char *)na->tailroom + tailroom);
for (i = 0; i < n[NR_TX] + n[NR_RX]; i++) {
na->tx_rings[i] = kring;
kring++;
}
for_rx_tx(t) {
ndesc = nma_get_ndesc(na, t);
for (i = 0; i < n[t]; i++) {
kring = NMR(na, t)[i];
bzero(kring, sizeof(*kring));
kring->notify_na = na;
kring->ring_id = i;
kring->tx = t;
kring->nkr_num_slots = ndesc;
kring->nr_mode = NKR_NETMAP_OFF;
kring->nr_pending_mode = NKR_NETMAP_OFF;
if (i < nma_get_nrings(na, t)) {
kring->nm_sync = (t == NR_TX ? na->nm_txsync : na->nm_rxsync);
kring->nm_bufcfg = na->nm_bufcfg;
if (kring->nm_bufcfg == NULL)
kring->nm_bufcfg = netmap_default_bufcfg;
} else {
if (!(na->na_flags & NAF_HOST_RINGS))
kring->nr_kflags |= NKR_FAKERING;
kring->nm_sync = (t == NR_TX ?
netmap_txsync_to_host:
netmap_rxsync_from_host);
kring->nm_bufcfg = netmap_default_bufcfg;
}
kring->nm_notify = na->nm_notify;
kring->rhead = kring->rcur = kring->nr_hwcur = 0;
kring->rtail = kring->nr_hwtail = (t == NR_TX ? ndesc - 1 : 0);
snprintf(kring->name, sizeof(kring->name) - 1, "%s %s%d", na->name,
nm_txrx2str(t), i);
nm_prdis("ktx %s h %d c %d t %d",
kring->name, kring->rhead, kring->rcur, kring->rtail);
err = nm_os_selinfo_init(&kring->si, kring->name);
if (err) {
netmap_krings_delete(na);
return err;
}
mtx_init(&kring->q_lock, (t == NR_TX ? "nm_txq_lock" : "nm_rxq_lock"), NULL, MTX_DEF);
kring->na = na;
}
err = nm_os_selinfo_init(&na->si[t], na->name);
if (err) {
netmap_krings_delete(na);
return err;
}
}
return 0;
}
void
netmap_krings_delete(struct netmap_adapter *na)
{
struct netmap_kring **kring = na->tx_rings;
enum txrx t;
if (na->tx_rings == NULL) {
if (netmap_debug & NM_DEBUG_ON)
nm_prerr("warning: krings were already deleted");
return;
}
for_rx_tx(t)
nm_os_selinfo_uninit(&na->si[t]);
for ( ; kring != na->tailroom; kring++) {
if ((*kring)->na != NULL)
mtx_destroy(&(*kring)->q_lock);
nm_os_selinfo_uninit(&(*kring)->si);
}
nm_os_free(na->tx_rings);
na->tx_rings = na->rx_rings = na->tailroom = NULL;
}
void
netmap_hw_krings_delete(struct netmap_adapter *na)
{
u_int lim = netmap_real_rings(na, NR_RX), i;
for (i = nma_get_nrings(na, NR_RX); i < lim; i++) {
struct mbq *q = &NMR(na, NR_RX)[i]->rx_queue;
nm_prdis("destroy sw mbq with len %d", mbq_len(q));
mbq_purge(q);
mbq_safe_fini(q);
}
netmap_krings_delete(na);
}
void
netmap_mem_restore(struct netmap_adapter *na)
{
if (na->nm_mem_prev) {
netmap_mem_put(na->nm_mem);
na->nm_mem = na->nm_mem_prev;
na->nm_mem_prev = NULL;
}
}
static void
netmap_mem_drop(struct netmap_adapter *na)
{
netmap_mem_deref(na->nm_mem, na);
if (na->active_fds <= 0) {
netmap_mem_restore(na);
}
}
static void
netmap_update_hostrings_mode(struct netmap_adapter *na)
{
enum txrx t;
struct netmap_kring *kring;
int i;
for_rx_tx(t) {
for (i = nma_get_nrings(na, t);
i < netmap_real_rings(na, t); i++) {
kring = NMR(na, t)[i];
kring->nr_mode = kring->nr_pending_mode;
}
}
}
static void netmap_unset_ringid(struct netmap_priv_d *);
static void netmap_krings_put(struct netmap_priv_d *);
void
netmap_do_unregif(struct netmap_priv_d *priv)
{
struct netmap_adapter *na = priv->np_na;
NMG_LOCK_ASSERT();
na->active_fds--;
netmap_krings_put(priv);
#ifdef WITH_MONITOR
if (na->active_fds <= 0) {
netmap_monitor_stop(na);
}
#endif
if (na->active_fds <= 0 || nm_kring_pending(priv)) {
netmap_set_all_rings(na, NM_KR_LOCKED);
na->nm_register(na, 0);
netmap_set_all_rings(na, 0);
}
netmap_mem_rings_delete(na);
if (na->active_fds <= 0) {
if (netmap_debug & NM_DEBUG_ON)
nm_prinf("deleting last instance for %s", na->name);
if (nm_netmap_on(na)) {
nm_prerr("BUG: netmap on while going to delete the krings");
}
na->nm_krings_delete(na);
if (na->na_flags & NAF_HOST_RINGS) {
na->num_host_tx_rings = 1;
na->num_host_rx_rings = 1;
} else {
na->num_host_tx_rings = 0;
na->num_host_rx_rings = 0;
}
}
netmap_unset_ringid(priv);
netmap_mem_if_delete(na, priv->np_nifp);
netmap_mem_drop(na);
priv->np_na = NULL;
priv->np_nifp = NULL;
}
struct netmap_priv_d*
netmap_priv_new(void)
{
struct netmap_priv_d *priv;
priv = nm_os_malloc(sizeof(struct netmap_priv_d));
if (priv == NULL)
return NULL;
priv->np_refs = 1;
nm_os_get_module();
return priv;
}
void
netmap_priv_delete(struct netmap_priv_d *priv)
{
struct netmap_adapter *na = priv->np_na;
if (--priv->np_refs > 0) {
return;
}
nm_os_put_module();
if (na) {
netmap_do_unregif(priv);
}
netmap_unget_na(na, priv->np_ifp);
bzero(priv, sizeof(*priv));
nm_os_free(priv);
}
void
netmap_dtor(void *data)
{
struct netmap_priv_d *priv = data;
NMG_LOCK();
netmap_priv_delete(priv);
NMG_UNLOCK();
}
static void
netmap_send_up(if_t dst, struct mbq *q)
{
struct mbuf *m;
struct mbuf *head = NULL, *prev = NULL;
#ifdef __FreeBSD__
struct epoch_tracker et;
NET_EPOCH_ENTER(et);
#endif
while ((m = mbq_dequeue(q)) != NULL) {
if (netmap_debug & NM_DEBUG_HOST)
nm_prinf("sending up pkt %p size %d", m, MBUF_LEN(m));
prev = nm_os_send_up(dst, m, prev);
if (head == NULL)
head = prev;
}
if (head)
nm_os_send_up(dst, NULL, head);
#ifdef __FreeBSD__
NET_EPOCH_EXIT(et);
#endif
mbq_fini(q);
}
static void
netmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
{
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
u_int n;
struct netmap_adapter *na = kring->na;
for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) {
struct mbuf *m;
struct netmap_slot *slot = &kring->ring->slot[n];
if ((slot->flags & NS_FORWARD) == 0 && !force)
continue;
if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE(na)) {
nm_prlim(5, "bad pkt at %d len %d", n, slot->len);
continue;
}
slot->flags &= ~NS_FORWARD;
m = m_devget(NMB(na, slot), slot->len, 0, na->ifp, NULL);
if (m == NULL)
break;
mbq_enqueue(q, m);
}
}
static inline int
_nm_may_forward(struct netmap_kring *kring)
{
return ((netmap_fwd || kring->ring->flags & NR_FORWARD) &&
kring->na->na_flags & NAF_HOST_RINGS &&
kring->tx == NR_RX);
}
static inline int
nm_may_forward_up(struct netmap_kring *kring)
{
return _nm_may_forward(kring) &&
kring->ring_id != kring->na->num_rx_rings;
}
static inline int
nm_may_forward_down(struct netmap_kring *kring, int sync_flags)
{
return _nm_may_forward(kring) &&
(sync_flags & NAF_CAN_FORWARD_DOWN) &&
kring->ring_id == kring->na->num_rx_rings;
}
static u_int
netmap_sw_to_nic(struct netmap_adapter *na)
{
struct netmap_kring *kring = na->rx_rings[na->num_rx_rings];
struct netmap_slot *rxslot = kring->ring->slot;
u_int i, rxcur = kring->nr_hwcur;
u_int const head = kring->rhead;
u_int const src_lim = kring->nkr_num_slots - 1;
u_int sent = 0;
for (i = 0; i < na->num_tx_rings; i++) {
struct netmap_kring *kdst = na->tx_rings[i];
struct netmap_ring *rdst = kdst->ring;
u_int const dst_lim = kdst->nkr_num_slots - 1;
for (; rxcur != head && !nm_ring_empty(rdst);
rxcur = nm_next(rxcur, src_lim) ) {
struct netmap_slot *src, *dst, tmp;
u_int dst_head = rdst->head;
src = &rxslot[rxcur];
if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
continue;
sent++;
dst = &rdst->slot[dst_head];
tmp = *src;
src->buf_idx = dst->buf_idx;
src->flags = NS_BUF_CHANGED;
dst->buf_idx = tmp.buf_idx;
dst->len = tmp.len;
dst->flags = NS_BUF_CHANGED;
rdst->head = rdst->cur = nm_next(dst_head, dst_lim);
}
}
return sent;
}
static int
netmap_txsync_to_host(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
struct mbq q;
mbq_init(&q);
netmap_grab_packets(kring, &q, 1 );
nm_prdis("have %d pkts in queue", mbq_len(&q));
kring->nr_hwcur = head;
kring->nr_hwtail = head + lim;
if (kring->nr_hwtail > lim)
kring->nr_hwtail -= lim + 1;
netmap_send_up(na->ifp, &q);
return 0;
}
static int
netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->na;
struct netmap_ring *ring = kring->ring;
u_int nm_i, n;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
int ret = 0;
struct mbq *q = &kring->rx_queue, fq;
mbq_init(&fq);
mbq_lock(q);
n = mbq_len(q);
if (n) {
struct mbuf *m;
uint32_t stop_i;
nm_i = kring->nr_hwtail;
stop_i = nm_prev(kring->nr_hwcur, lim);
while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
int len = MBUF_LEN(m);
struct netmap_slot *slot = &ring->slot[nm_i];
m_copydata(m, 0, len, NMB(na, slot));
nm_prdis("nm %d len %d", nm_i, len);
if (netmap_debug & NM_DEBUG_HOST)
nm_prinf("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
slot->len = len;
slot->flags = 0;
nm_i = nm_next(nm_i, lim);
mbq_enqueue(&fq, m);
}
kring->nr_hwtail = nm_i;
}
nm_i = kring->nr_hwcur;
if (nm_i != head) {
if (nm_may_forward_down(kring, flags)) {
ret = netmap_sw_to_nic(na);
if (ret > 0) {
kring->nr_kflags |= NR_FORWARD;
ret = 0;
}
}
kring->nr_hwcur = head;
}
mbq_unlock(q);
mbq_purge(&fq);
mbq_fini(&fq);
return ret;
}
static void netmap_hw_dtor(struct netmap_adapter *);
int
netmap_get_hw_na(if_t ifp, struct netmap_mem_d *nmd, struct netmap_adapter **na)
{
int i = netmap_admode;
struct netmap_adapter *prev_na;
int error = 0;
*na = NULL;
if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST)
i = netmap_admode = NETMAP_ADMODE_BEST;
if (NM_NA_VALID(ifp)) {
prev_na = NA(ifp);
if (NETMAP_OWNED_BY_ANY(prev_na)
|| i != NETMAP_ADMODE_GENERIC
|| prev_na->na_flags & NAF_FORCE_NATIVE
#ifdef WITH_PIPES
|| prev_na->na_next_pipe > 0
#endif
) {
*na = prev_na;
goto assign_mem;
}
}
if (!NM_IS_NATIVE(ifp) && i == NETMAP_ADMODE_NATIVE)
return EOPNOTSUPP;
error = generic_netmap_attach(ifp);
if (error)
return error;
*na = NA(ifp);
assign_mem:
if (nmd != NULL && !((*na)->na_flags & NAF_MEM_OWNER) &&
(*na)->active_fds == 0 && ((*na)->nm_mem != nmd)) {
(*na)->nm_mem_prev = (*na)->nm_mem;
(*na)->nm_mem = netmap_mem_get(nmd);
}
return 0;
}
int
netmap_get_na(struct nmreq_header *hdr,
struct netmap_adapter **na, if_t *ifp,
struct netmap_mem_d *nmd, int create)
{
struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
int error = 0;
struct netmap_adapter *ret = NULL;
int nmd_ref = 0;
*na = NULL;
*ifp = NULL;
if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
return EINVAL;
}
if (req->nr_mode == NR_REG_PIPE_MASTER ||
req->nr_mode == NR_REG_PIPE_SLAVE) {
nm_prerr("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax");
return EINVAL;
}
NMG_LOCK_ASSERT();
if (nmd == NULL && req->nr_mem_id) {
nmd = netmap_mem_find(req->nr_mem_id);
if (nmd == NULL)
return EINVAL;
nmd_ref = 1;
}
error = netmap_get_null_na(hdr, na, nmd, create);
if (error || *na != NULL)
goto out;
error = netmap_get_monitor_na(hdr, na, nmd, create);
if (error || *na != NULL)
goto out;
error = netmap_get_pipe_na(hdr, na, nmd, create);
if (error || *na != NULL)
goto out;
error = netmap_get_vale_na(hdr, na, nmd, create);
if (error)
goto out;
if (*na != NULL)
goto out;
*ifp = ifunit_ref(hdr->nr_name);
if (*ifp == NULL) {
error = ENXIO;
goto out;
}
error = netmap_get_hw_na(*ifp, nmd, &ret);
if (error)
goto out;
*na = ret;
netmap_adapter_get(ret);
if (((*na)->na_flags & NAF_HOST_RINGS) && (*na)->active_fds == 0) {
if (req->nr_host_tx_rings)
(*na)->num_host_tx_rings = req->nr_host_tx_rings;
if (req->nr_host_rx_rings)
(*na)->num_host_rx_rings = req->nr_host_rx_rings;
}
nm_prdis("%s: host tx %d rx %u", (*na)->name, (*na)->num_host_tx_rings,
(*na)->num_host_rx_rings);
out:
if (error) {
if (ret)
netmap_adapter_put(ret);
if (*ifp) {
if_rele(*ifp);
*ifp = NULL;
}
}
if (nmd_ref)
netmap_mem_put(nmd);
return error;
}
void
netmap_unget_na(struct netmap_adapter *na, if_t ifp)
{
if (ifp)
if_rele(ifp);
if (na)
netmap_adapter_put(na);
}
#define NM_FAIL_ON(t) do { \
if (unlikely(t)) { \
nm_prlim(5, "%s: fail '" #t "' " \
"h %d c %d t %d " \
"rh %d rc %d rt %d " \
"hc %d ht %d", \
kring->name, \
head, cur, ring->tail, \
kring->rhead, kring->rcur, kring->rtail, \
kring->nr_hwcur, kring->nr_hwtail); \
return kring->nkr_num_slots; \
} \
} while (0)
u_int
nm_txsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
{
u_int head = NM_ACCESS_ONCE(ring->head);
u_int cur = NM_ACCESS_ONCE(ring->cur);
u_int n = kring->nkr_num_slots;
nm_prdis(5, "%s kcur %d ktail %d head %d cur %d tail %d",
kring->name,
kring->nr_hwcur, kring->nr_hwtail,
ring->head, ring->cur, ring->tail);
#if 1
NM_FAIL_ON(kring->nr_hwcur >= n || kring->rhead >= n ||
kring->rtail >= n || kring->nr_hwtail >= n);
#endif
if (kring->rtail >= kring->rhead) {
NM_FAIL_ON(head < kring->rhead || head > kring->rtail);
NM_FAIL_ON(cur < head || cur > kring->rtail);
} else {
NM_FAIL_ON(head > kring->rtail && head < kring->rhead);
if (head <= kring->rtail) {
NM_FAIL_ON(cur < head || cur > kring->rtail);
} else {
NM_FAIL_ON(cur > kring->rtail && cur < head);
}
}
if (ring->tail != kring->rtail) {
nm_prlim(5, "%s tail overwritten was %d need %d", kring->name,
ring->tail, kring->rtail);
ring->tail = kring->rtail;
}
kring->rhead = head;
kring->rcur = cur;
return head;
}
u_int
nm_rxsync_prologue(struct netmap_kring *kring, struct netmap_ring *ring)
{
uint32_t const n = kring->nkr_num_slots;
uint32_t head, cur;
nm_prdis(5,"%s kc %d kt %d h %d c %d t %d",
kring->name,
kring->nr_hwcur, kring->nr_hwtail,
ring->head, ring->cur, ring->tail);
cur = kring->rcur = NM_ACCESS_ONCE(ring->cur);
head = kring->rhead = NM_ACCESS_ONCE(ring->head);
#if 1
NM_FAIL_ON(kring->nr_hwcur >= n || kring->nr_hwtail >= n);
#endif
if (kring->nr_hwtail >= kring->nr_hwcur) {
NM_FAIL_ON(head < kring->nr_hwcur || head > kring->nr_hwtail);
NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
} else {
NM_FAIL_ON(head < kring->nr_hwcur && head > kring->nr_hwtail);
if (head <= kring->nr_hwtail) {
NM_FAIL_ON(cur < head || cur > kring->nr_hwtail);
} else {
NM_FAIL_ON(cur < head && cur > kring->nr_hwtail);
}
}
if (ring->tail != kring->rtail) {
nm_prlim(5, "%s tail overwritten was %d need %d",
kring->name,
ring->tail, kring->rtail);
ring->tail = kring->rtail;
}
return head;
}
int
netmap_ring_reinit(struct netmap_kring *kring)
{
struct netmap_ring *ring = kring->ring;
u_int i, lim = kring->nkr_num_slots - 1;
int errors = 0;
nm_prlim(10, "called for %s", kring->name);
kring->rhead = ring->head;
kring->rcur = ring->cur;
kring->rtail = ring->tail;
if (ring->cur > lim)
errors++;
if (ring->head > lim)
errors++;
if (ring->tail > lim)
errors++;
for (i = 0; i <= lim; i++) {
u_int idx = ring->slot[i].buf_idx;
u_int len = ring->slot[i].len;
if (idx < 2 || idx >= kring->na->na_lut.objtotal) {
nm_prlim(5, "bad index at slot %d idx %d len %d ", i, idx, len);
ring->slot[i].buf_idx = 0;
ring->slot[i].len = 0;
} else if (len > NETMAP_BUF_SIZE(kring->na)) {
ring->slot[i].len = 0;
nm_prlim(5, "bad len at slot %d idx %d len %d", i, idx, len);
}
}
if (errors) {
nm_prlim(10, "total %d errors", errors);
nm_prlim(10, "%s reinit, cur %d -> %d tail %d -> %d",
kring->name,
ring->cur, kring->nr_hwcur,
ring->tail, kring->nr_hwtail);
ring->head = kring->rhead = kring->nr_hwcur;
ring->cur = kring->rcur = kring->nr_hwcur;
ring->tail = kring->rtail = kring->nr_hwtail;
}
return (errors ? 1 : 0);
}
int
netmap_interp_ringid(struct netmap_priv_d *priv, struct nmreq_header *hdr)
{
struct netmap_adapter *na = priv->np_na;
struct nmreq_register *reg = (struct nmreq_register *)hdr->nr_body;
int excluded_direction[] = { NR_TX_RINGS_ONLY, NR_RX_RINGS_ONLY };
enum txrx t;
u_int j;
u_int nr_flags = reg->nr_flags, nr_mode = reg->nr_mode,
nr_ringid = reg->nr_ringid;
for_rx_tx(t) {
if (nr_flags & excluded_direction[t]) {
priv->np_qfirst[t] = priv->np_qlast[t] = 0;
continue;
}
switch (nr_mode) {
case NR_REG_ALL_NIC:
case NR_REG_NULL:
priv->np_qfirst[t] = 0;
priv->np_qlast[t] = nma_get_nrings(na, t);
nm_prdis("ALL/PIPE: %s %d %d", nm_txrx2str(t),
priv->np_qfirst[t], priv->np_qlast[t]);
break;
case NR_REG_SW:
case NR_REG_NIC_SW:
if (!(na->na_flags & NAF_HOST_RINGS)) {
nm_prerr("host rings not supported");
return EINVAL;
}
priv->np_qfirst[t] = (nr_mode == NR_REG_SW ?
nma_get_nrings(na, t) : 0);
priv->np_qlast[t] = netmap_all_rings(na, t);
nm_prdis("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : "NIC+SW",
nm_txrx2str(t),
priv->np_qfirst[t], priv->np_qlast[t]);
break;
case NR_REG_ONE_NIC:
if (nr_ringid >= na->num_tx_rings &&
nr_ringid >= na->num_rx_rings) {
nm_prerr("invalid ring id %d", nr_ringid);
return EINVAL;
}
j = nr_ringid;
if (j >= nma_get_nrings(na, t))
j = 0;
priv->np_qfirst[t] = j;
priv->np_qlast[t] = j + 1;
nm_prdis("ONE_NIC: %s %d %d", nm_txrx2str(t),
priv->np_qfirst[t], priv->np_qlast[t]);
break;
case NR_REG_ONE_SW:
if (!(na->na_flags & NAF_HOST_RINGS)) {
nm_prerr("host rings not supported");
return EINVAL;
}
if (nr_ringid >= na->num_host_tx_rings &&
nr_ringid >= na->num_host_rx_rings) {
nm_prerr("invalid ring id %d", nr_ringid);
return EINVAL;
}
j = nr_ringid;
if (j >= nma_get_host_nrings(na, t))
j = 0;
priv->np_qfirst[t] = nma_get_nrings(na, t) + j;
priv->np_qlast[t] = nma_get_nrings(na, t) + j + 1;
nm_prdis("ONE_SW: %s %d %d", nm_txrx2str(t),
priv->np_qfirst[t], priv->np_qlast[t]);
break;
default:
nm_prerr("invalid regif type %d", nr_mode);
return EINVAL;
}
}
priv->np_flags = nr_flags;
if (priv->np_qfirst[NR_TX] == 0 &&
priv->np_qlast[NR_TX] >= na->num_tx_rings) {
priv->np_sync_flags |= NAF_CAN_FORWARD_DOWN;
}
if (netmap_verbose) {
nm_prinf("%s: tx [%d,%d) rx [%d,%d) id %d",
na->name,
priv->np_qfirst[NR_TX],
priv->np_qlast[NR_TX],
priv->np_qfirst[NR_RX],
priv->np_qlast[NR_RX],
nr_ringid);
}
return 0;
}
static int
netmap_set_ringid(struct netmap_priv_d *priv, struct nmreq_header *hdr)
{
struct netmap_adapter *na = priv->np_na;
struct nmreq_register *reg = (struct nmreq_register *)hdr->nr_body;
int error;
enum txrx t;
error = netmap_interp_ringid(priv, hdr);
if (error) {
return error;
}
priv->np_txpoll = (reg->nr_flags & NR_NO_TX_POLL) ? 0 : 1;
for_rx_tx(t) {
if (nm_si_user(priv, t))
na->si_users[t]++;
}
return 0;
}
static void
netmap_unset_ringid(struct netmap_priv_d *priv)
{
struct netmap_adapter *na = priv->np_na;
enum txrx t;
for_rx_tx(t) {
if (nm_si_user(priv, t))
na->si_users[t]--;
priv->np_qfirst[t] = priv->np_qlast[t] = 0;
}
priv->np_flags = 0;
priv->np_txpoll = 0;
priv->np_kloop_state = 0;
}
#define within_sel(p_, t_, i_) \
((i_) < (p_)->np_qlast[(t_)])
#define nonempty_sel(p_, t_) \
(within_sel((p_), (t_), (p_)->np_qfirst[(t_)]))
#define foreach_selected_ring(p_, t_, i_, kring_) \
for ((t_) = nonempty_sel((p_), NR_RX) ? NR_RX : NR_TX, \
(i_) = (p_)->np_qfirst[(t_)]; \
(t_ == NR_RX || \
(t == NR_TX && within_sel((p_), (t_), (i_)))) && \
((kring_) = NMR((p_)->np_na, (t_))[(i_)]); \
(i_) = within_sel((p_), (t_), (i_) + 1) ? (i_) + 1 : \
(++(t_) < NR_TXRX ? (p_)->np_qfirst[(t_)] : (i_)))
static int
netmap_krings_get(struct netmap_priv_d *priv)
{
struct netmap_adapter *na = priv->np_na;
u_int i;
struct netmap_kring *kring;
int excl = (priv->np_flags & NR_EXCLUSIVE);
enum txrx t;
if (netmap_debug & NM_DEBUG_ON)
nm_prinf("%s: grabbing tx [%d, %d) rx [%d, %d)",
na->name,
priv->np_qfirst[NR_TX],
priv->np_qlast[NR_TX],
priv->np_qfirst[NR_RX],
priv->np_qlast[NR_RX]);
foreach_selected_ring(priv, t, i, kring) {
if ((kring->nr_kflags & NKR_EXCLUSIVE) ||
(kring->users && excl))
{
nm_prdis("ring %s busy", kring->name);
return EBUSY;
}
}
foreach_selected_ring(priv, t, i, kring) {
kring->users++;
if (excl)
kring->nr_kflags |= NKR_EXCLUSIVE;
kring->nr_pending_mode = NKR_NETMAP_ON;
}
return 0;
}
static void
netmap_krings_put(struct netmap_priv_d *priv)
{
u_int i;
struct netmap_kring *kring;
int excl = (priv->np_flags & NR_EXCLUSIVE);
enum txrx t;
nm_prdis("%s: releasing tx [%d, %d) rx [%d, %d)",
na->name,
priv->np_qfirst[NR_TX],
priv->np_qlast[NR_TX],
priv->np_qfirst[NR_RX],
priv->np_qlast[MR_RX]);
foreach_selected_ring(priv, t, i, kring) {
if (excl)
kring->nr_kflags &= ~NKR_EXCLUSIVE;
kring->users--;
if (kring->users == 0)
kring->nr_pending_mode = NKR_NETMAP_OFF;
}
}
static int
nm_priv_rx_enabled(struct netmap_priv_d *priv)
{
return (priv->np_qfirst[NR_RX] != priv->np_qlast[NR_RX]);
}
static int
netmap_csb_validate(struct netmap_priv_d *priv, struct nmreq_opt_csb *csbo)
{
struct nm_csb_atok *csb_atok_base =
(struct nm_csb_atok *)(uintptr_t)csbo->csb_atok;
struct nm_csb_ktoa *csb_ktoa_base =
(struct nm_csb_ktoa *)(uintptr_t)csbo->csb_ktoa;
enum txrx t;
int num_rings[NR_TXRX], tot_rings;
size_t entry_size[2];
void *csb_start[2];
int i;
if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
nm_prerr("Cannot update CSB while kloop is running");
return EBUSY;
}
tot_rings = 0;
for_rx_tx(t) {
num_rings[t] = priv->np_qlast[t] - priv->np_qfirst[t];
tot_rings += num_rings[t];
}
if (tot_rings <= 0)
return 0;
if (!(priv->np_flags & NR_EXCLUSIVE)) {
nm_prerr("CSB mode requires NR_EXCLUSIVE");
return EINVAL;
}
entry_size[0] = sizeof(*csb_atok_base);
entry_size[1] = sizeof(*csb_ktoa_base);
csb_start[0] = (void *)csb_atok_base;
csb_start[1] = (void *)csb_ktoa_base;
for (i = 0; i < 2; i++) {
size_t csb_size = tot_rings * entry_size[i];
void *tmp;
int err;
if ((uintptr_t)csb_start[i] & (entry_size[i]-1)) {
nm_prerr("Unaligned CSB address");
return EINVAL;
}
tmp = nm_os_malloc(csb_size);
if (!tmp)
return ENOMEM;
if (i == 0) {
err = copyin(csb_start[i], tmp, csb_size);
} else {
memset(tmp, 0, csb_size);
err = copyout(tmp, csb_start[i], csb_size);
}
nm_os_free(tmp);
if (err) {
nm_prerr("Invalid CSB address");
return err;
}
}
priv->np_csb_atok_base = csb_atok_base;
priv->np_csb_ktoa_base = csb_ktoa_base;
for_rx_tx(t) {
for (i = 0; i < num_rings[t]; i++) {
struct netmap_kring *kring =
NMR(priv->np_na, t)[i + priv->np_qfirst[t]];
struct nm_csb_atok *csb_atok = csb_atok_base + i;
struct nm_csb_ktoa *csb_ktoa = csb_ktoa_base + i;
if (t == NR_RX) {
csb_atok += num_rings[NR_TX];
csb_ktoa += num_rings[NR_TX];
}
CSB_WRITE(csb_atok, head, kring->rhead);
CSB_WRITE(csb_atok, cur, kring->rcur);
CSB_WRITE(csb_atok, appl_need_kick, 1);
CSB_WRITE(csb_atok, sync_flags, 1);
CSB_WRITE(csb_ktoa, hwcur, kring->nr_hwcur);
CSB_WRITE(csb_ktoa, hwtail, kring->nr_hwtail);
CSB_WRITE(csb_ktoa, kern_need_kick, 1);
nm_prinf("csb_init for kring %s: head %u, cur %u, "
"hwcur %u, hwtail %u", kring->name,
kring->rhead, kring->rcur, kring->nr_hwcur,
kring->nr_hwtail);
}
}
return 0;
}
int
netmap_buf_size_validate(const struct netmap_adapter *na, unsigned mtu) {
unsigned nbs = NETMAP_BUF_SIZE(na);
if (mtu <= na->rx_buf_maxsize) {
if (nbs < mtu) {
nm_prerr("error: netmap buf size (%u) "
"< device MTU (%u)", nbs, mtu);
return EINVAL;
}
} else {
if (!(na->na_flags & NAF_MOREFRAG)) {
nm_prerr("error: large MTU (%d) needed "
"but %s does not support "
"NS_MOREFRAG", mtu,
if_name(na->ifp));
return EINVAL;
} else if (nbs < na->rx_buf_maxsize) {
nm_prerr("error: using NS_MOREFRAG on "
"%s requires netmap buf size "
">= %u", if_name(na->ifp),
na->rx_buf_maxsize);
return EINVAL;
} else {
nm_prinf("info: netmap application on "
"%s needs to support "
"NS_MOREFRAG "
"(MTU=%u,netmap_buf_size=%u)",
if_name(na->ifp), mtu, nbs);
}
}
return 0;
}
static int
netmap_offsets_init(struct netmap_priv_d *priv, struct nmreq_header *hdr)
{
struct nmreq_opt_offsets *opt;
struct netmap_adapter *na = priv->np_na;
struct netmap_kring *kring;
uint64_t mask = 0, bits = 0, maxbits = sizeof(uint64_t) * 8,
max_offset = 0, initial_offset = 0, min_gap = 0;
u_int i;
enum txrx t;
int error = 0;
opt = (struct nmreq_opt_offsets *)
nmreq_getoption(hdr, NETMAP_REQ_OPT_OFFSETS);
if (opt == NULL)
return 0;
if (!(na->na_flags & NAF_OFFSETS)) {
if (netmap_verbose)
nm_prerr("%s does not support offsets",
na->name);
error = EOPNOTSUPP;
goto out;
}
max_offset = opt->nro_max_offset;
min_gap = opt->nro_min_gap;
initial_offset = opt->nro_initial_offset;
bits = opt->nro_offset_bits;
if (bits > maxbits) {
if (netmap_verbose)
nm_prerr("bits: %llu too large (max %llu)",
(unsigned long long)bits,
(unsigned long long)maxbits);
error = EINVAL;
goto out;
}
if (bits == 0 || bits == maxbits) {
bits = maxbits;
mask = 0xffffffffffffffff;
} else {
mask = (1ULL << bits) - 1;
}
if (max_offset > NETMAP_BUF_SIZE(na)) {
if (netmap_verbose)
nm_prerr("max offset %llu > buf size %u",
(unsigned long long)max_offset, NETMAP_BUF_SIZE(na));
error = EINVAL;
goto out;
}
if ((max_offset & mask) != max_offset) {
if (netmap_verbose)
nm_prerr("max offset %llu to large for %llu bits",
(unsigned long long)max_offset,
(unsigned long long)bits);
error = EINVAL;
goto out;
}
if (initial_offset > max_offset) {
if (netmap_verbose)
nm_prerr("initial offset %llu > max offset %llu",
(unsigned long long)initial_offset,
(unsigned long long)max_offset);
error = EINVAL;
goto out;
}
foreach_selected_ring(priv, t, i, kring) {
struct netmap_kring *kring = NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
u_int j;
if (kring->offset_mask) {
if ((kring->offset_mask & mask) != mask ||
kring->offset_max < max_offset) {
if (netmap_verbose)
nm_prinf("%s: cannot increase"
"offset mask and/or max"
"(current: mask=%llx,max=%llu",
kring->name,
(unsigned long long)kring->offset_mask,
(unsigned long long)kring->offset_max);
error = EBUSY;
goto out;
}
mask = kring->offset_mask;
max_offset = kring->offset_max;
} else {
kring->offset_mask = mask;
*(uint64_t *)(uintptr_t)&ring->offset_mask = mask;
kring->offset_max = max_offset;
kring->offset_gap = min_gap;
}
if (!initial_offset || kring->users > 1)
continue;
for (j = 0; j < kring->nkr_num_slots; j++) {
struct netmap_slot *slot = ring->slot + j;
nm_write_offset(kring, slot, initial_offset);
}
}
out:
opt->nro_opt.nro_status = error;
if (!error) {
opt->nro_max_offset = max_offset;
}
return error;
}
static int
netmap_compute_buf_len(struct netmap_priv_d *priv)
{
enum txrx t;
u_int i;
struct netmap_kring *kring;
int error = 0;
unsigned mtu = 0;
struct netmap_adapter *na = priv->np_na;
uint64_t target;
foreach_selected_ring(priv, t, i, kring) {
if (kring->users > 1)
continue;
target = NETMAP_BUF_SIZE(kring->na) -
kring->offset_max;
if (!kring->offset_gap)
kring->offset_gap =
NETMAP_BUF_SIZE(kring->na);
if (kring->offset_gap < target)
target = kring->offset_gap;
error = kring->nm_bufcfg(kring, target);
if (error)
goto out;
*(uint64_t *)(uintptr_t)&kring->ring->buf_align = kring->buf_align;
if (mtu && t == NR_RX && kring->hwbuf_len < mtu) {
if (!(na->na_flags & NAF_MOREFRAG)) {
nm_prerr("error: large MTU (%d) needed "
"but %s does not support "
"NS_MOREFRAG", mtu,
na->name);
error = EINVAL;
goto out;
} else {
nm_prinf("info: netmap application on "
"%s needs to support "
"NS_MOREFRAG "
"(MTU=%u,buf_size=%llu)",
kring->name, mtu,
(unsigned long long)kring->hwbuf_len);
}
}
}
out:
return error;
}
int
netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
struct nmreq_header *hdr)
{
struct netmap_if *nifp = NULL;
int error;
NMG_LOCK_ASSERT();
priv->np_na = na;
error = netmap_mem_finalize(na->nm_mem, na);
if (error)
goto err;
if (na->active_fds == 0) {
error = netmap_mem_get_lut(na->nm_mem, &na->na_lut);
if (error)
goto err_drop_mem;
nm_prdis("lut %p bufs %u size %u", na->na_lut.lut, na->na_lut.objtotal,
na->na_lut.objsize);
netmap_update_config(na);
}
error = netmap_set_ringid(priv, hdr);
if (error)
goto err_put_lut;
if (na->active_fds == 0) {
if (na->ifp && nm_priv_rx_enabled(priv)) {
unsigned mtu = nm_os_ifnet_mtu(na->ifp);
nm_prdis("%s: mtu %d rx_buf_maxsize %d netmap_buf_size %d",
na->name, mtu, na->rx_buf_maxsize, NETMAP_BUF_SIZE(na));
if (na->rx_buf_maxsize == 0) {
nm_prerr("%s: error: rx_buf_maxsize == 0", na->name);
error = EIO;
goto err_drop_mem;
}
error = netmap_buf_size_validate(na, mtu);
if (error)
goto err_drop_mem;
}
error = na->nm_krings_create(na);
if (error)
goto err_put_lut;
}
error = netmap_krings_get(priv);
if (error)
goto err_del_krings;
error = netmap_mem_rings_create(na);
if (error)
goto err_rel_excl;
error = netmap_offsets_init(priv, hdr);
if (error)
goto err_rel_excl;
error = netmap_compute_buf_len(priv);
if (error)
goto err_rel_excl;
nifp = netmap_mem_if_new(na, priv);
if (nifp == NULL) {
error = ENOMEM;
goto err_rel_excl;
}
if (nm_kring_pending(priv)) {
netmap_set_all_rings(na, NM_KR_LOCKED);
error = na->nm_register(na, 1);
netmap_set_all_rings(na, 0);
if (error)
goto err_del_if;
}
na->active_fds++;
mb();
priv->np_nifp = nifp;
return 0;
err_del_if:
netmap_mem_if_delete(na, nifp);
err_rel_excl:
netmap_krings_put(priv);
netmap_mem_rings_delete(na);
err_del_krings:
if (na->active_fds == 0)
na->nm_krings_delete(na);
err_put_lut:
if (na->active_fds == 0)
memset(&na->na_lut, 0, sizeof(na->na_lut));
err_drop_mem:
netmap_mem_drop(na);
err:
priv->np_na = NULL;
return error;
}
static inline void
nm_sync_finalize(struct netmap_kring *kring)
{
kring->ring->tail = kring->rtail = kring->nr_hwtail;
nm_prdis(5, "%s now hwcur %d hwtail %d head %d cur %d tail %d",
kring->name, kring->nr_hwcur, kring->nr_hwtail,
kring->rhead, kring->rcur, kring->rtail);
}
static inline void
ring_timestamp_set(struct netmap_ring *ring)
{
if (netmap_no_timestamp == 0 || ring->flags & NR_TIMESTAMP) {
microtime(&ring->ts);
}
}
static int nmreq_copyin(struct nmreq_header *, int);
static int nmreq_copyout(struct nmreq_header *, int);
static int nmreq_checkoptions(struct nmreq_header *);
int
netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data,
struct thread *td, int nr_body_is_user)
{
struct mbq q;
struct netmap_adapter *na = NULL;
struct netmap_mem_d *nmd = NULL;
if_t ifp = NULL;
int error = 0;
u_int i, qfirst, qlast;
struct netmap_kring **krings;
int sync_flags;
enum txrx t;
switch (cmd) {
case NIOCCTRL: {
struct nmreq_header *hdr = (struct nmreq_header *)data;
if (hdr->nr_version < NETMAP_MIN_API ||
hdr->nr_version > NETMAP_MAX_API) {
nm_prerr("API mismatch: got %d need %d",
hdr->nr_version, NETMAP_API);
return EINVAL;
}
error = nmreq_copyin(hdr, nr_body_is_user);
if (error) {
return error;
}
hdr->nr_name[sizeof(hdr->nr_name) - 1] = '\0';
switch (hdr->nr_reqtype) {
case NETMAP_REQ_REGISTER: {
struct nmreq_register *req =
(struct nmreq_register *)(uintptr_t)hdr->nr_body;
struct netmap_if *nifp;
NMG_LOCK();
do {
struct nmreq_option *opt;
u_int memflags;
if (priv->np_nifp != NULL) {
error = EBUSY;
break;
}
#ifdef WITH_EXTMEM
opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_EXTMEM);
if (opt != NULL) {
struct nmreq_opt_extmem *e =
(struct nmreq_opt_extmem *)opt;
nmd = netmap_mem_ext_create(e->nro_usrptr,
&e->nro_info, &error);
opt->nro_status = error;
if (nmd == NULL)
break;
}
#endif
if (nmd == NULL && req->nr_mem_id) {
nmd = netmap_mem_find(req->nr_mem_id);
if (nmd == NULL) {
if (netmap_verbose) {
nm_prerr("%s: failed to find mem_id %u",
hdr->nr_name, req->nr_mem_id);
}
error = EINVAL;
break;
}
}
error = netmap_get_na(hdr, &na, &ifp, nmd,
1 );
if (error)
break;
if (NETMAP_OWNED_BY_KERN(na)) {
error = EBUSY;
break;
}
if (na->virt_hdr_len && !(req->nr_flags & NR_ACCEPT_VNET_HDR)) {
nm_prerr("virt_hdr_len=%d, but application does "
"not accept it", na->virt_hdr_len);
error = EIO;
break;
}
error = netmap_do_regif(priv, na, hdr);
if (error) {
break;
}
opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_CSB);
if (opt != NULL) {
struct nmreq_opt_csb *csbo =
(struct nmreq_opt_csb *)opt;
error = netmap_csb_validate(priv, csbo);
opt->nro_status = error;
if (error) {
netmap_do_unregif(priv);
break;
}
}
nifp = priv->np_nifp;
req->nr_rx_rings = na->num_rx_rings;
req->nr_tx_rings = na->num_tx_rings;
req->nr_rx_slots = na->num_rx_desc;
req->nr_tx_slots = na->num_tx_desc;
req->nr_host_tx_rings = na->num_host_tx_rings;
req->nr_host_rx_rings = na->num_host_rx_rings;
error = netmap_mem_get_info(na->nm_mem, &req->nr_memsize, &memflags,
&req->nr_mem_id);
if (error) {
netmap_do_unregif(priv);
break;
}
if (memflags & NETMAP_MEM_PRIVATE) {
*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
}
for_rx_tx(t) {
priv->np_si[t] = nm_si_user(priv, t) ?
&na->si[t] : &NMR(na, t)[priv->np_qfirst[t]]->si;
}
if (req->nr_extra_bufs) {
if (netmap_verbose)
nm_prinf("requested %d extra buffers",
req->nr_extra_bufs);
req->nr_extra_bufs = netmap_extra_alloc(na,
&nifp->ni_bufs_head, req->nr_extra_bufs);
if (netmap_verbose)
nm_prinf("got %d extra buffers", req->nr_extra_bufs);
} else {
nifp->ni_bufs_head = 0;
}
req->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
error = nmreq_checkoptions(hdr);
if (error) {
netmap_do_unregif(priv);
break;
}
priv->np_ifp = ifp;
} while (0);
if (error) {
netmap_unget_na(na, ifp);
}
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
break;
}
case NETMAP_REQ_PORT_INFO_GET: {
struct nmreq_port_info_get *req =
(struct nmreq_port_info_get *)(uintptr_t)hdr->nr_body;
int nmd_ref = 0;
NMG_LOCK();
do {
u_int memflags;
if (hdr->nr_name[0] != '\0') {
struct nmreq_register regreq;
bzero(®req, sizeof(regreq));
regreq.nr_mode = NR_REG_ALL_NIC;
regreq.nr_tx_slots = req->nr_tx_slots;
regreq.nr_rx_slots = req->nr_rx_slots;
regreq.nr_tx_rings = req->nr_tx_rings;
regreq.nr_rx_rings = req->nr_rx_rings;
regreq.nr_host_tx_rings = req->nr_host_tx_rings;
regreq.nr_host_rx_rings = req->nr_host_rx_rings;
regreq.nr_mem_id = req->nr_mem_id;
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
hdr->nr_body = (uintptr_t)®req;
error = netmap_get_na(hdr, &na, &ifp, NULL, 1 );
hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET;
hdr->nr_body = (uintptr_t)req;
if (error) {
na = NULL;
ifp = NULL;
break;
}
nmd = na->nm_mem;
} else {
nmd = netmap_mem_find(req->nr_mem_id ? req->nr_mem_id : 1);
if (nmd == NULL) {
if (netmap_verbose)
nm_prerr("%s: failed to find mem_id %u",
hdr->nr_name,
req->nr_mem_id ? req->nr_mem_id : 1);
error = EINVAL;
break;
}
nmd_ref = 1;
}
error = netmap_mem_get_info(nmd, &req->nr_memsize, &memflags,
&req->nr_mem_id);
if (error)
break;
if (na == NULL)
break;
netmap_update_config(na);
req->nr_rx_rings = na->num_rx_rings;
req->nr_tx_rings = na->num_tx_rings;
req->nr_rx_slots = na->num_rx_desc;
req->nr_tx_slots = na->num_tx_desc;
req->nr_host_tx_rings = na->num_host_tx_rings;
req->nr_host_rx_rings = na->num_host_rx_rings;
} while (0);
netmap_unget_na(na, ifp);
if (nmd_ref)
netmap_mem_put(nmd);
NMG_UNLOCK();
break;
}
#ifdef WITH_VALE
case NETMAP_REQ_VALE_ATTACH: {
error = netmap_bdg_attach(hdr, NULL );
break;
}
case NETMAP_REQ_VALE_DETACH: {
error = netmap_bdg_detach(hdr, NULL );
break;
}
case NETMAP_REQ_PORT_HDR_SET: {
struct nmreq_port_hdr *req =
(struct nmreq_port_hdr *)(uintptr_t)hdr->nr_body;
struct nmreq_register regreq;
bzero(®req, sizeof(regreq));
regreq.nr_mode = NR_REG_ALL_NIC;
if (req->nr_hdr_len != 0 &&
req->nr_hdr_len != sizeof(struct nm_vnet_hdr) &&
req->nr_hdr_len != 12) {
if (netmap_verbose)
nm_prerr("invalid hdr_len %u", req->nr_hdr_len);
error = EINVAL;
break;
}
NMG_LOCK();
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
hdr->nr_body = (uintptr_t)®req;
error = netmap_get_vale_na(hdr, &na, NULL, 0);
hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET;
hdr->nr_body = (uintptr_t)req;
if (na && !error) {
struct netmap_vp_adapter *vpna =
(struct netmap_vp_adapter *)na;
na->virt_hdr_len = req->nr_hdr_len;
if (na->virt_hdr_len) {
vpna->mfs = NETMAP_BUF_SIZE(na);
}
if (netmap_verbose)
nm_prinf("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
netmap_adapter_put(na);
} else if (!na) {
error = ENXIO;
}
NMG_UNLOCK();
break;
}
case NETMAP_REQ_PORT_HDR_GET: {
struct nmreq_port_hdr *req =
(struct nmreq_port_hdr *)(uintptr_t)hdr->nr_body;
struct nmreq_register regreq;
if_t ifp;
bzero(®req, sizeof(regreq));
regreq.nr_mode = NR_REG_ALL_NIC;
NMG_LOCK();
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
hdr->nr_body = (uintptr_t)®req;
error = netmap_get_na(hdr, &na, &ifp, NULL, 0);
hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_GET;
hdr->nr_body = (uintptr_t)req;
if (na && !error) {
req->nr_hdr_len = na->virt_hdr_len;
}
netmap_unget_na(na, ifp);
NMG_UNLOCK();
break;
}
case NETMAP_REQ_VALE_LIST: {
error = netmap_vale_list(hdr);
break;
}
case NETMAP_REQ_VALE_NEWIF: {
error = nm_vi_create(hdr);
break;
}
case NETMAP_REQ_VALE_DELIF: {
error = nm_vi_destroy(hdr->nr_name);
break;
}
#endif
case NETMAP_REQ_VALE_POLLING_ENABLE:
case NETMAP_REQ_VALE_POLLING_DISABLE: {
error = nm_bdg_polling(hdr);
break;
}
case NETMAP_REQ_POOLS_INFO_GET: {
struct nmreq_pools_info *req =
(struct nmreq_pools_info *)(uintptr_t)hdr->nr_body;
NMG_LOCK();
do {
struct nmreq_register regreq;
bzero(®req, sizeof(regreq));
regreq.nr_mem_id = req->nr_mem_id;
regreq.nr_mode = NR_REG_ALL_NIC;
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
hdr->nr_body = (uintptr_t)®req;
error = netmap_get_na(hdr, &na, &ifp, NULL, 1 );
hdr->nr_reqtype = NETMAP_REQ_POOLS_INFO_GET;
hdr->nr_body = (uintptr_t)req;
if (error) {
na = NULL;
ifp = NULL;
break;
}
nmd = na->nm_mem;
if (nmd == NULL) {
error = EINVAL;
break;
}
error = netmap_mem_finalize(nmd, na);
if (error) {
break;
}
error = netmap_mem_pools_info_get(req, nmd);
netmap_mem_drop(na);
} while (0);
netmap_unget_na(na, ifp);
NMG_UNLOCK();
break;
}
case NETMAP_REQ_CSB_ENABLE: {
struct nmreq_option *opt;
opt = nmreq_getoption(hdr, NETMAP_REQ_OPT_CSB);
if (opt == NULL) {
error = EINVAL;
} else {
struct nmreq_opt_csb *csbo =
(struct nmreq_opt_csb *)opt;
NMG_LOCK();
error = netmap_csb_validate(priv, csbo);
NMG_UNLOCK();
opt->nro_status = error;
}
break;
}
case NETMAP_REQ_SYNC_KLOOP_START: {
error = netmap_sync_kloop(priv, hdr);
break;
}
case NETMAP_REQ_SYNC_KLOOP_STOP: {
error = netmap_sync_kloop_stop(priv);
break;
}
default: {
error = EINVAL;
break;
}
}
error = nmreq_copyout(hdr, error);
break;
}
case NIOCTXSYNC:
case NIOCRXSYNC: {
if (unlikely(priv->np_nifp == NULL)) {
error = ENXIO;
break;
}
mb();
if (unlikely(priv->np_csb_atok_base)) {
nm_prerr("Invalid sync in CSB mode");
error = EBUSY;
break;
}
na = priv->np_na;
mbq_init(&q);
t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
krings = NMR(na, t);
qfirst = priv->np_qfirst[t];
qlast = priv->np_qlast[t];
sync_flags = priv->np_sync_flags;
for (i = qfirst; i < qlast; i++) {
struct netmap_kring *kring = krings[i];
struct netmap_ring *ring = kring->ring;
if (unlikely(nm_kr_tryget(kring, 1, &error))) {
error = (error ? EIO : 0);
continue;
}
if (cmd == NIOCTXSYNC) {
if (netmap_debug & NM_DEBUG_TXSYNC)
nm_prinf("pre txsync ring %d cur %d hwcur %d",
i, ring->cur,
kring->nr_hwcur);
if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
netmap_ring_reinit(kring);
} else if (kring->nm_sync(kring, sync_flags | NAF_FORCE_RECLAIM) == 0) {
nm_sync_finalize(kring);
}
if (netmap_debug & NM_DEBUG_TXSYNC)
nm_prinf("post txsync ring %d cur %d hwcur %d",
i, ring->cur,
kring->nr_hwcur);
} else {
if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
netmap_ring_reinit(kring);
}
if (nm_may_forward_up(kring)) {
netmap_grab_packets(kring, &q, netmap_fwd);
}
if (kring->nm_sync(kring, sync_flags | NAF_FORCE_READ) == 0) {
nm_sync_finalize(kring);
}
ring_timestamp_set(ring);
}
nm_kr_put(kring);
}
if (mbq_peek(&q)) {
netmap_send_up(na->ifp, &q);
}
break;
}
default: {
return netmap_ioctl_legacy(priv, cmd, data, td);
break;
}
}
return (error);
}
size_t
nmreq_size_by_type(uint16_t nr_reqtype)
{
switch (nr_reqtype) {
case NETMAP_REQ_REGISTER:
return sizeof(struct nmreq_register);
case NETMAP_REQ_PORT_INFO_GET:
return sizeof(struct nmreq_port_info_get);
case NETMAP_REQ_VALE_ATTACH:
return sizeof(struct nmreq_vale_attach);
case NETMAP_REQ_VALE_DETACH:
return sizeof(struct nmreq_vale_detach);
case NETMAP_REQ_VALE_LIST:
return sizeof(struct nmreq_vale_list);
case NETMAP_REQ_PORT_HDR_SET:
case NETMAP_REQ_PORT_HDR_GET:
return sizeof(struct nmreq_port_hdr);
case NETMAP_REQ_VALE_NEWIF:
return sizeof(struct nmreq_vale_newif);
case NETMAP_REQ_VALE_DELIF:
case NETMAP_REQ_SYNC_KLOOP_STOP:
case NETMAP_REQ_CSB_ENABLE:
return 0;
case NETMAP_REQ_VALE_POLLING_ENABLE:
case NETMAP_REQ_VALE_POLLING_DISABLE:
return sizeof(struct nmreq_vale_polling);
case NETMAP_REQ_POOLS_INFO_GET:
return sizeof(struct nmreq_pools_info);
case NETMAP_REQ_SYNC_KLOOP_START:
return sizeof(struct nmreq_sync_kloop_start);
}
return 0;
}
static size_t
nmreq_opt_size_by_type(uint32_t nro_reqtype, uint64_t nro_size)
{
size_t rv = sizeof(struct nmreq_option);
#ifdef NETMAP_REQ_OPT_DEBUG
if (nro_reqtype & NETMAP_REQ_OPT_DEBUG)
return (nro_reqtype & ~NETMAP_REQ_OPT_DEBUG);
#endif
switch (nro_reqtype) {
#ifdef WITH_EXTMEM
case NETMAP_REQ_OPT_EXTMEM:
rv = sizeof(struct nmreq_opt_extmem);
break;
#endif
case NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS:
if (nro_size >= rv)
rv = nro_size;
break;
case NETMAP_REQ_OPT_CSB:
rv = sizeof(struct nmreq_opt_csb);
break;
case NETMAP_REQ_OPT_SYNC_KLOOP_MODE:
rv = sizeof(struct nmreq_opt_sync_kloop_mode);
break;
case NETMAP_REQ_OPT_OFFSETS:
rv = sizeof(struct nmreq_opt_offsets);
break;
}
return rv - sizeof(struct nmreq_option);
}
int
nmreq_copyin(struct nmreq_header *hdr, int nr_body_is_user)
{
size_t rqsz, optsz, bufsz;
int error = 0;
char *ker = NULL, *p;
struct nmreq_option **next, *src, **opt_tab, *opt;
uint64_t *ptrs;
if (hdr->nr_reserved) {
if (netmap_verbose)
nm_prerr("nr_reserved must be zero");
return EINVAL;
}
if (!nr_body_is_user)
return 0;
hdr->nr_reserved = nr_body_is_user;
rqsz = nmreq_size_by_type(hdr->nr_reqtype);
if (rqsz > NETMAP_REQ_MAXSIZE) {
error = EMSGSIZE;
goto out_err;
}
if ((rqsz && hdr->nr_body == (uintptr_t)NULL) ||
(!rqsz && hdr->nr_body != (uintptr_t)NULL)) {
if (netmap_verbose)
nm_prerr("nr_body expected but not found, or vice versa");
error = EINVAL;
goto out_err;
}
bufsz = (2 + NETMAP_REQ_OPT_MAX) * sizeof(void *) + NETMAP_REQ_MAXSIZE +
NETMAP_REQ_OPT_MAX * sizeof(opt_tab);
ker = nm_os_malloc(bufsz);
if (ker == NULL) {
error = ENOMEM;
goto out_err;
}
p = ker;
ptrs = (uint64_t*)p;
*ptrs++ = hdr->nr_body;
*ptrs++ = hdr->nr_options;
p = (char *)ptrs;
hdr->nr_body = (uintptr_t)p;
next = (struct nmreq_option **)&hdr->nr_options;
src = *next;
hdr->nr_options = 0;
error = copyin(*(void **)ker, p, rqsz);
if (error)
goto out_restore;
p += rqsz;
opt_tab = (struct nmreq_option **)p;
p += sizeof(opt_tab) * NETMAP_REQ_OPT_MAX;
while (src) {
struct nmreq_option *nsrc;
if (p - ker + sizeof(uint64_t*) + sizeof(*src) > bufsz) {
error = EMSGSIZE;
hdr->nr_options = 0;
goto out_restore;
}
ptrs = (uint64_t *)p;
opt = (struct nmreq_option *)(ptrs + 1);
error = copyin(src, opt, sizeof(*src));
if (error)
goto out_restore;
rqsz += sizeof(*src);
p = (char *)(opt + 1);
*ptrs = opt->nro_next;
*next = opt;
nsrc = (struct nmreq_option *)opt->nro_next;
opt->nro_next = 0;
opt->nro_status = 0;
if (opt->nro_reqtype < 1) {
if (netmap_verbose)
nm_prinf("invalid option type: %u", opt->nro_reqtype);
opt->nro_status = EINVAL;
error = EINVAL;
goto out_restore;
}
if (opt->nro_reqtype >= NETMAP_REQ_OPT_MAX) {
goto next;
}
if (opt_tab[opt->nro_reqtype] != NULL) {
if (netmap_verbose)
nm_prinf("duplicate option: %u", opt->nro_reqtype);
opt->nro_status = EINVAL;
opt_tab[opt->nro_reqtype]->nro_status = EINVAL;
error = EINVAL;
goto out_restore;
}
opt_tab[opt->nro_reqtype] = opt;
optsz = nmreq_opt_size_by_type(opt->nro_reqtype,
opt->nro_size);
if ((optsz > NETMAP_REQ_MAXSIZE) || (opt->nro_size > NETMAP_REQ_MAXSIZE)
|| (rqsz + optsz > NETMAP_REQ_MAXSIZE)
|| (optsz > 0 && rqsz + optsz <= rqsz)) {
error = EMSGSIZE;
goto out_restore;
}
rqsz += optsz;
if (optsz) {
error = copyin(src + 1, p, optsz);
if (error)
goto out_restore;
p += optsz;
}
next:
next = (struct nmreq_option **)&opt->nro_next;
src = nsrc;
}
for (src = (struct nmreq_option *)hdr->nr_options; src;
src = (struct nmreq_option *)src->nro_next) {
src->nro_status = EOPNOTSUPP;
}
return 0;
out_restore:
nmreq_copyout(hdr, error);
out_err:
return error;
}
static int
nmreq_copyout(struct nmreq_header *hdr, int rerror)
{
struct nmreq_option *src, *dst;
void *ker = (void *)(uintptr_t)hdr->nr_body, *bufstart;
uint64_t *ptrs;
size_t bodysz;
int error;
if (!hdr->nr_reserved)
return rerror;
ptrs = (uint64_t *)ker - 2;
bufstart = ptrs;
hdr->nr_body = *ptrs++;
src = (struct nmreq_option *)(uintptr_t)hdr->nr_options;
hdr->nr_options = *ptrs;
if (!rerror) {
bodysz = nmreq_size_by_type(hdr->nr_reqtype);
error = copyout(ker, (void *)(uintptr_t)hdr->nr_body, bodysz);
if (error) {
rerror = error;
goto out;
}
}
dst = (struct nmreq_option *)(uintptr_t)hdr->nr_options;
while (src) {
size_t optsz;
uint64_t next;
next = src->nro_next;
ptrs = (uint64_t *)src - 1;
src->nro_next = *ptrs;
error = copyout(src, dst, sizeof(*src));
if (error) {
rerror = error;
goto out;
}
if (!rerror && !src->nro_status) {
optsz = nmreq_opt_size_by_type(src->nro_reqtype,
src->nro_size);
if (optsz) {
error = copyout(src + 1, dst + 1, optsz);
if (error) {
rerror = error;
goto out;
}
}
}
src = (struct nmreq_option *)(uintptr_t)next;
dst = (struct nmreq_option *)(uintptr_t)*ptrs;
}
out:
hdr->nr_reserved = 0;
nm_os_free(bufstart);
return rerror;
}
struct nmreq_option *
nmreq_getoption(struct nmreq_header *hdr, uint16_t reqtype)
{
struct nmreq_option **opt_tab;
if (!hdr->nr_options)
return NULL;
opt_tab = (struct nmreq_option **)((uintptr_t)hdr->nr_options) -
(NETMAP_REQ_OPT_MAX + 1);
return opt_tab[reqtype];
}
static int
nmreq_checkoptions(struct nmreq_header *hdr)
{
struct nmreq_option *opt;
for (opt = (struct nmreq_option *)(uintptr_t)hdr->nr_options; opt;
opt = (struct nmreq_option *)(uintptr_t)opt->nro_next)
if (opt->nro_status == EOPNOTSUPP)
return EOPNOTSUPP;
return 0;
}
int
netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
{
struct netmap_adapter *na;
struct netmap_kring *kring;
struct netmap_ring *ring;
u_int i, want[NR_TXRX], revents = 0;
NM_SELINFO_T *si[NR_TXRX];
#define want_tx want[NR_TX]
#define want_rx want[NR_RX]
struct mbq q;
int retry_tx = 1, retry_rx = 1;
int send_down = 0;
int sync_flags = priv->np_sync_flags;
mbq_init(&q);
if (unlikely(priv->np_nifp == NULL)) {
return POLLERR;
}
mb();
na = priv->np_na;
if (unlikely(!nm_netmap_on(na)))
return POLLERR;
if (unlikely(priv->np_csb_atok_base)) {
nm_prerr("Invalid poll in CSB mode");
return POLLERR;
}
if (netmap_debug & NM_DEBUG_ON)
nm_prinf("device %s events 0x%x", na->name, events);
want_tx = events & (POLLOUT | POLLWRNORM);
want_rx = events & (POLLIN | POLLRDNORM);
si[NR_RX] = priv->np_si[NR_RX];
si[NR_TX] = priv->np_si[NR_TX];
#ifdef __FreeBSD__
if (want_tx) {
const enum txrx t = NR_TX;
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
kring = NMR(na, t)[i];
if (kring->ring->cur != kring->ring->tail) {
revents |= want[t];
want[t] = 0;
break;
}
}
}
if (want_rx) {
const enum txrx t = NR_RX;
int rxsync_needed = 0;
for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
kring = NMR(na, t)[i];
if (kring->ring->cur == kring->ring->tail
|| kring->rhead != kring->ring->head) {
rxsync_needed = 1;
break;
}
}
if (!rxsync_needed) {
revents |= want_rx;
want_rx = 0;
}
}
#endif
#ifdef linux
nm_os_selrecord(sr, si[NR_RX]);
nm_os_selrecord(sr, si[NR_TX]);
#endif
if (priv->np_txpoll || want_tx) {
flush_tx:
for (i = priv->np_qfirst[NR_TX]; i < priv->np_qlast[NR_TX]; i++) {
int found = 0;
kring = na->tx_rings[i];
ring = kring->ring;
if (!send_down && !want_tx && ring->head == kring->nr_hwcur)
continue;
if (nm_kr_tryget(kring, 1, &revents))
continue;
if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
netmap_ring_reinit(kring);
revents |= POLLERR;
} else {
if (kring->nm_sync(kring, sync_flags))
revents |= POLLERR;
else
nm_sync_finalize(kring);
}
found = kring->rcur != kring->rtail;
nm_kr_put(kring);
if (found) {
revents |= want_tx;
want_tx = 0;
#ifndef linux
kring->nm_notify(kring, 0);
#endif
}
}
send_down = 0;
if (want_tx && retry_tx && sr) {
#ifndef linux
nm_os_selrecord(sr, si[NR_TX]);
#endif
retry_tx = 0;
goto flush_tx;
}
}
if (want_rx) {
do_retry_rx:
for (i = priv->np_qfirst[NR_RX]; i < priv->np_qlast[NR_RX]; i++) {
int found = 0;
kring = na->rx_rings[i];
ring = kring->ring;
if (unlikely(nm_kr_tryget(kring, 1, &revents)))
continue;
if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
netmap_ring_reinit(kring);
revents |= POLLERR;
}
if (nm_may_forward_up(kring)) {
netmap_grab_packets(kring, &q, netmap_fwd);
}
kring->nr_kflags &= ~NR_FORWARD;
if (kring->nm_sync(kring, sync_flags))
revents |= POLLERR;
else
nm_sync_finalize(kring);
send_down |= (kring->nr_kflags & NR_FORWARD);
ring_timestamp_set(ring);
found = kring->rcur != kring->rtail;
nm_kr_put(kring);
if (found) {
revents |= want_rx;
retry_rx = 0;
#ifndef linux
kring->nm_notify(kring, 0);
#endif
}
}
#ifndef linux
if (retry_rx && sr) {
nm_os_selrecord(sr, si[NR_RX]);
}
#endif
if (send_down || retry_rx) {
retry_rx = 0;
if (send_down)
goto flush_tx;
else
goto do_retry_rx;
}
}
if (mbq_peek(&q)) {
netmap_send_up(na->ifp, &q);
}
return (revents);
#undef want_tx
#undef want_rx
}
int
nma_intr_enable(struct netmap_adapter *na, int onoff)
{
bool changed = false;
enum txrx t;
int i;
for_rx_tx(t) {
for (i = 0; i < nma_get_nrings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
int on = !(kring->nr_kflags & NKR_NOINTR);
if (!!onoff != !!on) {
changed = true;
}
if (onoff) {
kring->nr_kflags &= ~NKR_NOINTR;
} else {
kring->nr_kflags |= NKR_NOINTR;
}
}
}
if (!changed) {
return 0;
}
if (!na->nm_intr) {
nm_prerr("Cannot %s interrupts for %s", onoff ? "enable" : "disable",
na->name);
return -1;
}
na->nm_intr(na, onoff);
return 0;
}
static int
netmap_notify(struct netmap_kring *kring, int flags)
{
struct netmap_adapter *na = kring->notify_na;
enum txrx t = kring->tx;
nm_os_selwakeup(&kring->si);
if (na->si_users[t] > 0)
nm_os_selwakeup(&na->si[t]);
return NM_IRQ_COMPLETED;
}
int
netmap_attach_common(struct netmap_adapter *na)
{
if (!na->rx_buf_maxsize) {
na->rx_buf_maxsize = PAGE_SIZE;
}
#ifdef __FreeBSD__
if (na->na_flags & NAF_HOST_RINGS && na->ifp) {
na->if_input = if_getinputfn(na->ifp);
}
na->pdev = na;
#endif
if (na->na_flags & NAF_HOST_RINGS) {
if (na->num_host_rx_rings == 0)
na->num_host_rx_rings = 1;
if (na->num_host_tx_rings == 0)
na->num_host_tx_rings = 1;
}
if (na->nm_krings_create == NULL) {
na->nm_krings_create = netmap_hw_krings_create;
na->nm_krings_delete = netmap_hw_krings_delete;
}
if (na->nm_notify == NULL)
na->nm_notify = netmap_notify;
na->active_fds = 0;
if (na->nm_mem == NULL) {
na->nm_mem = netmap_mem_get_allocator(na);
}
if (na->nm_bdg_attach == NULL)
na->nm_bdg_attach = netmap_default_bdg_attach;
return 0;
}
static int
netmap_hw_reg(struct netmap_adapter *na, int onoff)
{
struct netmap_hw_adapter *hwna =
(struct netmap_hw_adapter*)na;
int error = 0;
nm_os_ifnet_lock();
if (nm_iszombie(na)) {
if (onoff) {
error = ENXIO;
} else if (na != NULL) {
na->na_flags &= ~NAF_NETMAP_ON;
}
goto out;
}
error = hwna->nm_hw_register(na, onoff);
out:
nm_os_ifnet_unlock();
return error;
}
static void
netmap_hw_dtor(struct netmap_adapter *na)
{
if (na->ifp == NULL)
return;
NM_DETACH_NA(na->ifp);
}
int
netmap_attach_ext(struct netmap_adapter *arg, size_t size, int override_reg)
{
struct netmap_hw_adapter *hwna = NULL;
if_t ifp = NULL;
if (size < sizeof(struct netmap_hw_adapter)) {
if (netmap_debug & NM_DEBUG_ON)
nm_prerr("Invalid netmap adapter size %d", (int)size);
return EINVAL;
}
if (arg == NULL || arg->ifp == NULL) {
if (netmap_debug & NM_DEBUG_ON)
nm_prerr("either arg or arg->ifp is NULL");
return EINVAL;
}
if (arg->num_tx_rings == 0 || arg->num_rx_rings == 0) {
if (netmap_debug & NM_DEBUG_ON)
nm_prerr("%s: invalid rings tx %d rx %d",
arg->name, arg->num_tx_rings, arg->num_rx_rings);
return EINVAL;
}
ifp = arg->ifp;
if (NM_NA_CLASH(ifp)) {
nm_prerr("Error: netmap adapter hook is busy");
return EBUSY;
}
hwna = nm_os_malloc(size);
if (hwna == NULL)
goto fail;
hwna->up = *arg;
hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
strlcpy(hwna->up.name, if_name(ifp), sizeof(hwna->up.name));
if (override_reg) {
hwna->nm_hw_register = hwna->up.nm_register;
hwna->up.nm_register = netmap_hw_reg;
}
if (netmap_attach_common(&hwna->up)) {
nm_os_free(hwna);
goto fail;
}
netmap_adapter_get(&hwna->up);
NM_ATTACH_NA(ifp, &hwna->up);
nm_os_onattach(ifp);
if (arg->nm_dtor == NULL) {
hwna->up.nm_dtor = netmap_hw_dtor;
}
if_printf(ifp, "netmap queues/slots: TX %d/%d, RX %d/%d\n",
hwna->up.num_tx_rings, hwna->up.num_tx_desc,
hwna->up.num_rx_rings, hwna->up.num_rx_desc);
return 0;
fail:
nm_prerr("fail, arg %p ifp %p na %p", arg, ifp, hwna);
return (hwna ? EINVAL : ENOMEM);
}
int
netmap_attach(struct netmap_adapter *arg)
{
return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter),
1 );
}
void
NM_DBG(netmap_adapter_get)(struct netmap_adapter *na)
{
if (!na) {
return;
}
refcount_acquire(&na->na_refcount);
}
int
NM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
{
if (!na)
return 1;
if (!refcount_release(&na->na_refcount))
return 0;
if (na->nm_dtor)
na->nm_dtor(na);
if (na->tx_rings) {
if (netmap_debug & NM_DEBUG_ON)
nm_prerr("freeing leftover tx_rings");
na->nm_krings_delete(na);
}
netmap_pipe_dealloc(na);
if (na->nm_mem)
netmap_mem_put(na->nm_mem);
bzero(na, sizeof(*na));
nm_os_free(na);
return 1;
}
int
netmap_hw_krings_create(struct netmap_adapter *na)
{
int ret = netmap_krings_create(na, 0);
if (ret == 0) {
u_int lim = netmap_real_rings(na, NR_RX), i;
for (i = na->num_rx_rings; i < lim; i++) {
mbq_safe_init(&NMR(na, NR_RX)[i]->rx_queue);
}
nm_prdis("initialized sw rx queue %d", na->num_rx_rings);
}
return ret;
}
void
netmap_detach(if_t ifp)
{
struct netmap_adapter *na;
NMG_LOCK();
if (!NM_NA_VALID(ifp)) {
NMG_UNLOCK();
return;
}
na = NA(ifp);
netmap_set_all_rings(na, NM_KR_LOCKED);
if (!(na->na_flags & NAF_NATIVE) || !netmap_adapter_put(na)) {
na->na_flags |= NAF_ZOMBIE;
}
netmap_enable_all_rings(ifp);
NMG_UNLOCK();
}
int
netmap_transmit(if_t ifp, struct mbuf *m)
{
struct netmap_adapter *na = NA(ifp);
struct netmap_kring *kring, *tx_kring;
u_int len = MBUF_LEN(m);
u_int error = ENOBUFS;
unsigned int txr;
struct mbq *q;
int busy;
u_int i;
i = MBUF_TXQ(m);
if (i >= na->num_host_rx_rings) {
i = i % na->num_host_rx_rings;
}
kring = NMR(na, NR_RX)[nma_get_nrings(na, NR_RX) + i];
if (!nm_netmap_on(na)) {
nm_prerr("%s not in netmap mode anymore", na->name);
error = ENXIO;
goto done;
}
txr = MBUF_TXQ(m);
if (txr >= na->num_tx_rings) {
txr %= na->num_tx_rings;
}
tx_kring = NMR(na, NR_TX)[txr];
if (tx_kring->nr_mode == NKR_NETMAP_OFF) {
return MBUF_TRANSMIT(na, ifp, m);
}
q = &kring->rx_queue;
if (len > NETMAP_BUF_SIZE(na)) {
nm_prerr("%s from_host, drop packet size %d > %d", na->name,
len, NETMAP_BUF_SIZE(na));
goto done;
}
if (!netmap_generic_hwcsum) {
if (nm_os_mbuf_has_csum_offld(m)) {
nm_prlim(1, "%s drop mbuf that needs checksum offload", na->name);
goto done;
}
}
if (nm_os_mbuf_has_seg_offld(m)) {
nm_prlim(1, "%s drop mbuf that needs generic segmentation offload", na->name);
goto done;
}
#ifdef __FreeBSD__
ETHER_BPF_MTAP(ifp, m);
#endif
mbq_lock(q);
busy = kring->nr_hwtail - kring->nr_hwcur;
if (busy < 0)
busy += kring->nkr_num_slots;
if (busy + mbq_len(q) >= kring->nkr_num_slots - 1) {
nm_prlim(2, "%s full hwcur %d hwtail %d qlen %d", na->name,
kring->nr_hwcur, kring->nr_hwtail, mbq_len(q));
} else {
mbq_enqueue(q, m);
nm_prdis(2, "%s %d bufs in queue", na->name, mbq_len(q));
m = NULL;
error = 0;
}
mbq_unlock(q);
done:
if (m) {
if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
m_freem(m);
}
kring->nm_notify(kring, 0);
return (error);
}
struct netmap_slot *
netmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
u_int new_cur)
{
struct netmap_kring *kring;
u_int new_hwtail, new_hwofs;
if (!nm_native_on(na)) {
nm_prdis("interface not in native netmap mode");
return NULL;
}
if (tx == NR_TX) {
if (n >= na->num_tx_rings)
return NULL;
kring = na->tx_rings[n];
new_hwofs = kring->rhead;
new_hwtail = nm_prev(kring->rhead, kring->nkr_num_slots - 1);
} else {
if (n >= na->num_rx_rings)
return NULL;
kring = na->rx_rings[n];
new_hwofs = kring->nr_hwtail;
new_hwtail = kring->nr_hwtail;
}
if (kring->nr_pending_mode == NKR_NETMAP_OFF) {
kring->nr_mode = NKR_NETMAP_OFF;
return NULL;
}
if (netmap_verbose) {
nm_prinf("%s, hc %u->%u, ht %u->%u, ho %u->%u", kring->name,
kring->nr_hwcur, kring->rhead,
kring->nr_hwtail, new_hwtail,
kring->nkr_hwofs, new_hwofs);
}
kring->nr_hwcur = kring->rhead;
kring->nr_hwtail = new_hwtail;
kring->nkr_hwofs = new_hwofs;
kring->nr_mode = NKR_NETMAP_ON;
kring->nm_notify(kring, 0);
return kring->ring->slot;
}
int
netmap_common_irq(struct netmap_adapter *na, u_int q, u_int *work_done)
{
struct netmap_kring *kring;
enum txrx t = (work_done ? NR_RX : NR_TX);
q &= NETMAP_RING_MASK;
if (netmap_debug & (NM_DEBUG_RXINTR|NM_DEBUG_TXINTR)) {
nm_prlim(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
}
if (q >= nma_get_nrings(na, t))
return NM_IRQ_PASS;
kring = NMR(na, t)[q];
if (kring->nr_mode == NKR_NETMAP_OFF) {
return NM_IRQ_PASS;
}
if (t == NR_RX) {
kring->nr_kflags |= NKR_PENDINTR;
*work_done = 1;
}
return kring->nm_notify(kring, 0);
}
int
netmap_rx_irq(if_t ifp, u_int q, u_int *work_done)
{
struct netmap_adapter *na = NA(ifp);
if (!nm_netmap_on(na))
return NM_IRQ_PASS;
if (na->na_flags & NAF_SKIP_INTR) {
nm_prdis("use regular interrupt");
return NM_IRQ_PASS;
}
return netmap_common_irq(na, q, work_done);
}
void
nm_set_native_flags(struct netmap_adapter *na)
{
if_t ifp = na->ifp;
if (na->active_fds > 0) {
return;
}
na->na_flags |= NAF_NETMAP_ON;
nm_os_onenter(ifp);
netmap_update_hostrings_mode(na);
}
void
nm_clear_native_flags(struct netmap_adapter *na)
{
if_t ifp = na->ifp;
if (na->active_fds > 0) {
return;
}
netmap_update_hostrings_mode(na);
nm_os_onexit(ifp);
na->na_flags &= ~NAF_NETMAP_ON;
}
void
netmap_krings_mode_commit(struct netmap_adapter *na, int onoff)
{
enum txrx t;
for_rx_tx(t) {
int i;
for (i = 0; i < netmap_real_rings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
if (onoff && nm_kring_pending_on(kring))
kring->nr_mode = NKR_NETMAP_ON;
else if (!onoff && nm_kring_pending_off(kring))
kring->nr_mode = NKR_NETMAP_OFF;
}
}
}
static struct cdev *netmap_dev;
extern struct cdevsw netmap_cdevsw;
void
netmap_fini(void)
{
if (netmap_dev)
destroy_dev(netmap_dev);
nm_os_ifnet_fini();
netmap_uninit_bridges();
netmap_mem_fini();
NMG_LOCK_DESTROY();
nm_prinf("netmap: unloaded module.");
}
int
netmap_init(void)
{
int error;
NMG_LOCK_INIT();
error = netmap_mem_init();
if (error != 0)
goto fail;
netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD,
&netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600,
"netmap");
if (!netmap_dev)
goto fail;
error = netmap_init_bridges();
if (error)
goto fail;
#ifdef __FreeBSD__
nm_os_vi_init_index();
#endif
error = nm_os_ifnet_init();
if (error)
goto fail;
#if !defined(__FreeBSD__) || defined(KLD_MODULE)
nm_prinf("netmap: loaded module");
#endif
return (0);
fail:
netmap_fini();
return (EINVAL);
}