#if defined(__FreeBSD__)
#include <sys/cdefs.h>
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/sockio.h>
#include <sys/socketvar.h>
#include <sys/malloc.h>
#include <sys/poll.h>
#include <sys/rwlock.h>
#include <sys/socket.h>
#include <sys/selinfo.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/bpf.h>
#include <machine/bus.h>
#include <sys/endian.h>
#include <sys/refcount.h>
#include <sys/smp.h>
#elif defined(linux)
#include "bsd_glue.h"
#elif defined(__APPLE__)
#warning OSX support is only partial
#include "osx_glue.h"
#elif defined(_WIN32)
#include "win_glue.h"
#else
#error Unsupported platform
#endif
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
#include <dev/netmap/netmap_bdg.h>
#ifdef WITH_VALE
#define NM_BDG_MAXRINGS 16
#define NM_BDG_MAXSLOTS 4096
#define NM_BRIDGE_RINGSIZE 1024
#define NM_BDG_BATCH 1024
#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NETMAP_MAX_FRAGS)
#define NM_FT_NULL NM_BDG_BATCH_MAX
static int bridge_batch = NM_BDG_BATCH;
unsigned int vale_max_bridges = NM_BRIDGES;
SYSBEGIN(vars_vale);
SYSCTL_DECL(_dev_netmap);
SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0,
"Max batch size to be used in the bridge");
SYSCTL_UINT(_dev_netmap, OID_AUTO, max_bridges, CTLFLAG_RDTUN, &vale_max_bridges, 0,
"Max number of vale bridges");
SYSEND;
static int netmap_vale_vp_create(struct nmreq_header *hdr, if_t,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
static int netmap_vale_vp_bdg_attach(const char *, struct netmap_adapter *,
struct nm_bridge *);
static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *);
struct nm_vale_q {
uint16_t bq_head;
uint16_t bq_tail;
uint32_t bq_len;
};
struct netmap_bdg_ops vale_bdg_ops = {
.lookup = netmap_vale_learning,
.config = NULL,
.dtor = NULL,
.vp_create = netmap_vale_vp_create,
.bwrap_attach = netmap_vale_bwrap_attach,
.name = NM_BDG_NAME,
};
static inline void
pkt_copy(void *_src, void *_dst, int l)
{
uint64_t *src = _src;
uint64_t *dst = _dst;
if (unlikely(l >= 1024)) {
memcpy(dst, src, l);
return;
}
for (; likely(l > 0); l -= NM_BUF_ALIGN) {
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
*dst++ = *src++;
}
}
static void
nm_free_bdgfwd(struct netmap_adapter *na)
{
int nrings, i;
struct netmap_kring **kring;
NMG_LOCK_ASSERT();
nrings = na->num_tx_rings;
kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
if (kring[i]->nkr_ft) {
nm_os_free(kring[i]->nkr_ft);
kring[i]->nkr_ft = NULL;
}
}
}
static int
nm_alloc_bdgfwd(struct netmap_adapter *na)
{
int nrings, l, i, num_dstq;
struct netmap_kring **kring;
NMG_LOCK_ASSERT();
num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
l += sizeof(struct nm_vale_q) * num_dstq;
l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
nrings = netmap_real_rings(na, NR_TX);
kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
struct nm_bdg_fwd *ft;
struct nm_vale_q *dstq;
int j;
ft = nm_os_malloc(l);
if (!ft) {
nm_free_bdgfwd(na);
return ENOMEM;
}
dstq = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
for (j = 0; j < num_dstq; j++) {
dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
dstq[j].bq_len = 0;
}
kring[i]->nkr_ft = ft;
}
return 0;
}
void *
netmap_vale_create(const char *bdg_name, int *return_status)
{
struct nm_bridge *b = NULL;
void *ret = NULL;
NMG_LOCK();
b = nm_find_bridge(bdg_name, 0 , NULL);
if (b) {
*return_status = EEXIST;
goto unlock_bdg_create;
}
b = nm_find_bridge(bdg_name, 1 , &vale_bdg_ops);
if (!b) {
*return_status = ENOMEM;
goto unlock_bdg_create;
}
b->bdg_flags |= NM_BDG_ACTIVE | NM_BDG_EXCLUSIVE;
ret = nm_bdg_get_auth_token(b);
*return_status = 0;
unlock_bdg_create:
NMG_UNLOCK();
return ret;
}
int
netmap_vale_destroy(const char *bdg_name, void *auth_token)
{
struct nm_bridge *b = NULL;
int ret = 0;
NMG_LOCK();
b = nm_find_bridge(bdg_name, 0 , NULL);
if (!b) {
ret = ENXIO;
goto unlock_bdg_free;
}
if (!nm_bdg_valid_auth_token(b, auth_token)) {
ret = EACCES;
goto unlock_bdg_free;
}
if (!(b->bdg_flags & NM_BDG_EXCLUSIVE)) {
ret = EINVAL;
goto unlock_bdg_free;
}
b->bdg_flags &= ~(NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE);
ret = netmap_bdg_free(b);
if (ret) {
b->bdg_flags |= NM_BDG_EXCLUSIVE | NM_BDG_ACTIVE;
}
unlock_bdg_free:
NMG_UNLOCK();
return ret;
}
int
netmap_vale_list(struct nmreq_header *hdr)
{
struct nmreq_vale_list *req =
(struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
int namelen = strlen(hdr->nr_name);
struct nm_bridge *b, *bridges;
struct netmap_vp_adapter *vpna;
int error = 0, i, j;
u_int num_bridges;
netmap_bns_getbridges(&bridges, &num_bridges);
if (namelen) {
if (strncmp(hdr->nr_name, NM_BDG_NAME,
strlen(NM_BDG_NAME))) {
return EINVAL;
}
NMG_LOCK();
b = nm_find_bridge(hdr->nr_name, 0 , NULL);
if (!b) {
NMG_UNLOCK();
return ENOENT;
}
req->nr_bridge_idx = b - bridges;
req->nr_port_idx = NM_BDG_NOPORT;
for (j = 0; j < b->bdg_active_ports; j++) {
i = b->bdg_port_index[j];
vpna = b->bdg_ports[i];
if (vpna == NULL) {
nm_prerr("This should not happen");
continue;
}
if (!strcmp(vpna->up.name, hdr->nr_name)) {
req->nr_port_idx = i;
break;
}
}
NMG_UNLOCK();
} else {
i = req->nr_bridge_idx;
j = req->nr_port_idx;
NMG_LOCK();
for (error = ENOENT; i < vale_max_bridges; i++) {
b = bridges + i;
for ( ; j < NM_BDG_MAXPORTS; j++) {
if (b->bdg_ports[j] == NULL)
continue;
vpna = b->bdg_ports[j];
strlcpy(hdr->nr_name, vpna->up.name,
sizeof(hdr->nr_name));
error = 0;
goto out;
}
j = 0;
}
out:
req->nr_bridge_idx = i;
req->nr_port_idx = j;
NMG_UNLOCK();
}
return error;
}
static void
netmap_vale_vp_dtor(struct netmap_adapter *na)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
struct nm_bridge *b = vpna->na_bdg;
nm_prdis("%s has %d references", na->name, na->na_refcount);
if (b) {
netmap_bdg_detach_common(b, vpna->bdg_port, -1);
}
if (na->ifp != NULL && !nm_iszombie(na)) {
NM_DETACH_NA(na->ifp);
if (vpna->autodelete) {
nm_prdis("releasing %s", if_name(na->ifp));
NMG_UNLOCK();
nm_os_vi_detach(na->ifp);
NMG_LOCK();
}
}
}
static int
netmap_vale_vp_krings_create(struct netmap_adapter *na)
{
u_int tailroom;
int error, i;
uint32_t *leases;
u_int nrx = netmap_real_rings(na, NR_RX);
tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
error = netmap_krings_create(na, tailroom);
if (error)
return error;
leases = na->tailroom;
for (i = 0; i < nrx; i++) {
na->rx_rings[i]->nkr_leases = leases;
leases += na->num_rx_desc;
}
error = nm_alloc_bdgfwd(na);
if (error) {
netmap_krings_delete(na);
return error;
}
return 0;
}
static void
netmap_vale_vp_krings_delete(struct netmap_adapter *na)
{
nm_free_bdgfwd(na);
netmap_krings_delete(na);
}
static int
nm_vale_flush(struct nm_bdg_fwd *ft, u_int n,
struct netmap_vp_adapter *na, u_int ring_nr);
static int
nm_vale_preflush(struct netmap_kring *kring, u_int end)
{
struct netmap_vp_adapter *na =
(struct netmap_vp_adapter*)kring->na;
struct netmap_ring *ring = kring->ring;
struct nm_bdg_fwd *ft;
u_int ring_nr = kring->ring_id;
u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
u_int ft_i = 0;
u_int frags = 1;
struct nm_bridge *b = na->na_bdg;
nm_prdis("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
if (na->up.na_flags & NAF_BDG_MAYSLEEP)
BDG_RLOCK(b);
else if (!BDG_RTRYLOCK(b))
return j;
nm_prdis(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
ft = kring->nkr_ft;
for (; likely(j != end); j = nm_next(j, lim)) {
struct netmap_slot *slot = &ring->slot[j];
char *buf;
ft[ft_i].ft_len = slot->len;
ft[ft_i].ft_flags = slot->flags;
ft[ft_i].ft_offset = 0;
nm_prdis("flags is 0x%x", slot->flags);
slot->flags &= ~NS_BUF_CHANGED;
ft[ft_i].ft_next = NM_FT_NULL;
buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
(void *)(uintptr_t)slot->ptr : NMB_O(kring, slot);
if (unlikely(buf == NULL ||
slot->len > NETMAP_BUF_SIZE(&na->up) - nm_get_offset(kring, slot))) {
nm_prlim(5, "NULL %s buffer pointer from %s slot %d len %d",
(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
kring->name, j, ft[ft_i].ft_len);
buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
ft[ft_i].ft_len = 0;
ft[ft_i].ft_flags = 0;
}
__builtin_prefetch(buf);
++ft_i;
if (slot->flags & NS_MOREFRAG) {
frags++;
continue;
}
if (unlikely(netmap_verbose && frags > 1))
nm_prlim(5, "%d frags at %d", frags, ft_i - frags);
ft[ft_i - frags].ft_frags = frags;
frags = 1;
if (unlikely((int)ft_i >= bridge_batch))
ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
}
if (frags > 1) {
frags--;
ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
ft[ft_i - frags].ft_frags = frags;
nm_prlim(5, "Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
}
if (ft_i)
ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
BDG_RUNLOCK(b);
return j;
}
#define mix(a, b, c) \
do { \
a -= b; a -= c; a ^= (c >> 13); \
b -= c; b -= a; b ^= (a << 8); \
c -= a; c -= b; c ^= (b >> 13); \
a -= b; a -= c; a ^= (c >> 12); \
b -= c; b -= a; b ^= (a << 16); \
c -= a; c -= b; c ^= (b >> 5); \
a -= b; a -= c; a ^= (c >> 3); \
b -= c; b -= a; b ^= (a << 10); \
c -= a; c -= b; c ^= (b >> 15); \
} while (0)
static __inline uint32_t
nm_vale_rthash(const uint8_t *addr)
{
uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0;
b += addr[5] << 8;
b += addr[4];
a += addr[3] << 24;
a += addr[2] << 16;
a += addr[1] << 8;
a += addr[0];
mix(a, b, c);
#define BRIDGE_RTHASH_MASK (NM_BDG_HASH-1)
return (c & BRIDGE_RTHASH_MASK);
}
#undef mix
uint32_t
netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
struct netmap_vp_adapter *na, void *private_data)
{
uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset;
u_int buf_len = ft->ft_len - ft->ft_offset;
struct nm_hash_ent *ht = private_data;
uint32_t sh, dh;
u_int dst, mysrc = na->bdg_port;
uint64_t smac, dmac;
uint8_t indbuf[12];
if (buf_len < 14) {
return NM_BDG_NOPORT;
}
if (ft->ft_flags & NS_INDIRECT) {
if (copyin(buf, indbuf, sizeof(indbuf))) {
return NM_BDG_NOPORT;
}
buf = indbuf;
}
dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
smac = le64toh(*(uint64_t *)(buf + 4));
smac >>= 16;
if (((buf[6] & 1) == 0) && (na->last_smac != smac)) {
uint8_t *s = buf+6;
sh = nm_vale_rthash(s);
na->last_smac = ht[sh].mac = smac;
ht[sh].ports = mysrc;
if (netmap_debug & NM_DEBUG_VALE)
nm_prinf("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
}
dst = NM_BDG_BROADCAST;
if ((buf[0] & 1) == 0) {
dh = nm_vale_rthash(buf);
if (ht[dh].mac == dmac) {
dst = ht[dh].ports;
}
}
return dst;
}
static inline uint32_t
nm_kr_space(struct netmap_kring *k, int is_rx)
{
int space;
if (is_rx) {
int busy = k->nkr_hwlease - k->nr_hwcur;
if (busy < 0)
busy += k->nkr_num_slots;
space = k->nkr_num_slots - 1 - busy;
} else {
space = k->nr_hwtail - k->nkr_hwlease;
if (space < 0)
space += k->nkr_num_slots;
}
#if 0
if (k->nkr_hwlease >= k->nkr_num_slots ||
k->nr_hwcur >= k->nkr_num_slots ||
k->nr_tail >= k->nkr_num_slots ||
busy < 0 ||
busy >= k->nkr_num_slots) {
nm_prerr("invalid kring, cur %d tail %d lease %d lease_idx %d lim %d",
k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
k->nkr_lease_idx, k->nkr_num_slots);
}
#endif
return space;
}
static inline uint32_t
nm_kr_lease(struct netmap_kring *k, u_int n, int is_rx)
{
uint32_t lim = k->nkr_num_slots - 1;
uint32_t lease_idx = k->nkr_lease_idx;
k->nkr_leases[lease_idx] = NR_NOSLOT;
k->nkr_lease_idx = nm_next(lease_idx, lim);
#ifdef CONFIG_NETMAP_DEBUG
if (n > nm_kr_space(k, is_rx)) {
nm_prerr("invalid request for %d slots", n);
panic("x");
}
#endif
k->nkr_hwlease += n;
if (k->nkr_hwlease > lim)
k->nkr_hwlease -= lim + 1;
#ifdef CONFIG_NETMAP_DEBUG
if (k->nkr_hwlease >= k->nkr_num_slots ||
k->nr_hwcur >= k->nkr_num_slots ||
k->nr_hwtail >= k->nkr_num_slots ||
k->nkr_lease_idx >= k->nkr_num_slots) {
nm_prerr("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
k->na->name,
k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
k->nkr_lease_idx, k->nkr_num_slots);
}
#endif
return lease_idx;
}
int
nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
u_int ring_nr)
{
struct nm_vale_q *dst_ents, *brddst;
uint16_t num_dsts = 0, *dsts;
struct nm_bridge *b = na->na_bdg;
u_int i, me = na->bdg_port;
dst_ents = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
for (i = 0; likely(i < n); i += ft[i].ft_frags) {
uint8_t dst_ring = ring_nr;
uint16_t dst_port, d_i;
struct nm_vale_q *d;
struct nm_bdg_fwd *start_ft = NULL;
nm_prdis("slot %d frags %d", i, ft[i].ft_frags);
if (na->up.virt_hdr_len < ft[i].ft_len) {
ft[i].ft_offset = na->up.virt_hdr_len;
start_ft = &ft[i];
} else if (na->up.virt_hdr_len == ft[i].ft_len && ft[i].ft_flags & NS_MOREFRAG) {
ft[i].ft_offset = ft[i].ft_len;
start_ft = &ft[i+1];
} else {
continue;
}
dst_port = b->bdg_ops.lookup(start_ft, &dst_ring, na, b->private_data);
if (netmap_verbose > 255)
nm_prlim(5, "slot %d port %d -> %d", i, me, dst_port);
if (dst_port >= NM_BDG_NOPORT)
continue;
else if (dst_port == NM_BDG_BROADCAST)
dst_ring = 0;
else if (unlikely(dst_port == me ||
!b->bdg_ports[dst_port]))
continue;
d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
d = dst_ents + d_i;
if (d->bq_head == NM_FT_NULL) {
d->bq_head = d->bq_tail = i;
if (dst_port != NM_BDG_BROADCAST)
dsts[num_dsts++] = d_i;
} else {
ft[d->bq_tail].ft_next = i;
d->bq_tail = i;
}
d->bq_len += ft[i].ft_frags;
}
brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
if (brddst->bq_head != NM_FT_NULL) {
u_int j;
for (j = 0; likely(j < b->bdg_active_ports); j++) {
uint16_t d_i;
i = b->bdg_port_index[j];
if (unlikely(i == me))
continue;
d_i = i * NM_BDG_MAXRINGS;
if (dst_ents[d_i].bq_head == NM_FT_NULL)
dsts[num_dsts++] = d_i;
}
}
nm_prdis(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
for (i = 0; i < num_dsts; i++) {
struct netmap_vp_adapter *dst_na;
struct netmap_kring *kring;
struct netmap_ring *ring;
u_int dst_nr, lim, j, d_i, next, brd_next;
u_int needed, howmany;
int retry = netmap_txsync_retry;
struct nm_vale_q *d;
uint32_t my_start = 0, lease_idx = 0;
int nrings;
int virt_hdr_mismatch = 0;
d_i = dsts[i];
nm_prdis("second pass %d port %d", i, d_i);
d = dst_ents + d_i;
dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
if (unlikely(dst_na == NULL))
goto cleanup;
if (dst_na->up.na_flags & NAF_SW_ONLY)
goto cleanup;
if (unlikely(!nm_netmap_on(&dst_na->up))) {
nm_prdis("not in netmap mode!");
goto cleanup;
}
brd_next = brddst->bq_head;
next = d->bq_head;
needed = d->bq_len + brddst->bq_len;
if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
if (netmap_verbose) {
nm_prlim(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
dst_na->up.virt_hdr_len);
}
virt_hdr_mismatch = 1;
if (dst_na->mfs < na->mfs) {
KASSERT(dst_na->mfs > 0, ("vpna->mfs is 0"));
needed = (needed * na->mfs) /
(dst_na->mfs - WORST_CASE_GSO_HEADER) + 1;
nm_prdis(3, "srcmtu=%u, dstmtu=%u, x=%u", na->mfs, dst_na->mfs, needed);
}
}
nm_prdis(5, "pass 2 dst %d is %x %s",
i, d_i, nm_is_bwrap(&dst_na->up) ? "nic/host" : "virtual");
dst_nr = d_i & (NM_BDG_MAXRINGS-1);
nrings = dst_na->up.num_rx_rings;
if (dst_nr >= nrings)
dst_nr = dst_nr % nrings;
kring = dst_na->up.rx_rings[dst_nr];
ring = kring->ring;
if (unlikely(ring == NULL || kring->nr_mode != NKR_NETMAP_ON))
goto cleanup;
lim = kring->nkr_num_slots - 1;
retry:
if (dst_na->retry && retry) {
kring->nm_notify(kring, NAF_FORCE_RECLAIM);
}
mtx_lock(&kring->q_lock);
if (kring->nkr_stopped) {
mtx_unlock(&kring->q_lock);
goto cleanup;
}
my_start = j = kring->nkr_hwlease;
howmany = nm_kr_space(kring, 1);
if (needed < howmany)
howmany = needed;
lease_idx = nm_kr_lease(kring, howmany, 1);
mtx_unlock(&kring->q_lock);
if (retry && needed <= howmany)
retry = 0;
while (howmany > 0) {
struct netmap_slot *slot;
struct nm_bdg_fwd *ft_p, *ft_end;
u_int cnt;
if (next < brd_next) {
ft_p = ft + next;
next = ft_p->ft_next;
} else {
ft_p = ft + brd_next;
brd_next = ft_p->ft_next;
}
cnt = ft_p->ft_frags;
if (unlikely(cnt > howmany))
break;
if (netmap_verbose && cnt > 1)
nm_prlim(5, "rx %d frags to %d", cnt, j);
ft_end = ft_p + cnt;
if (unlikely(virt_hdr_mismatch)) {
bdg_mismatch_datapath(na, dst_na, ft_p, ring, &j, lim, &howmany);
} else {
howmany -= cnt;
do {
char *dst, *src = ft_p->ft_buf;
size_t copy_len = ft_p->ft_len, dst_len = copy_len;
uintptr_t src_cb;
uint64_t dstoff, dstoff_cb;
int src_co, dst_co;
const uintptr_t mask = NM_BUF_ALIGN - 1;
slot = &ring->slot[j];
dst = NMB(&dst_na->up, slot);
dstoff = nm_get_offset(kring, slot);
dstoff_cb = dstoff & ~mask;
src_cb = ((uintptr_t)src) & ~mask;
src_co = ((uintptr_t)src) & mask;
dst_co = ((uintptr_t)(dst + dstoff)) & mask;
if (dst_co < src_co) {
dstoff_cb += NM_BUF_ALIGN;
}
dstoff = dstoff_cb + src_co;
copy_len += src_co;
nm_prdis("send [%d] %d(%d) bytes at %s:%d",
i, (int)copy_len, (int)dst_len,
NM_IFPNAME(dst_ifp), j);
if (unlikely(dstoff > NETMAP_BUF_SIZE(&dst_na->up) ||
dst_len > NETMAP_BUF_SIZE(&dst_na->up) - dstoff)) {
nm_prlim(5, "dropping packet/fragment of len %zu, dest offset %llu",
dst_len, (unsigned long long)dstoff);
copy_len = dst_len = 0;
dstoff = nm_get_offset(kring, slot);
}
if (ft_p->ft_flags & NS_INDIRECT) {
if (copyin(src, dst, copy_len)) {
dst_len = 0;
}
} else {
pkt_copy((char *)src_cb, dst + dstoff_cb, (int)copy_len);
}
slot->len = dst_len;
slot->flags = (cnt << 8)| NS_MOREFRAG;
nm_write_offset(kring, slot, dstoff);
j = nm_next(j, lim);
needed--;
ft_p++;
} while (ft_p != ft_end);
slot->flags = (cnt << 8);
}
if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
break;
}
{
uint32_t *p = kring->nkr_leases;
uint32_t update_pos;
int still_locked = 1;
mtx_lock(&kring->q_lock);
if (unlikely(howmany > 0)) {
nm_prdis("leftover %d bufs", howmany);
if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
nm_prdis("roll back nkr_hwlease to %d", j);
kring->nkr_hwlease = j;
} else {
while (howmany-- > 0) {
ring->slot[j].len = 0;
ring->slot[j].flags = 0;
j = nm_next(j, lim);
}
}
}
p[lease_idx] = j;
update_pos = kring->nr_hwtail;
if (my_start == update_pos) {
while (lease_idx != kring->nkr_lease_idx &&
p[lease_idx] != NR_NOSLOT) {
j = p[lease_idx];
p[lease_idx] = NR_NOSLOT;
lease_idx = nm_next(lease_idx, lim);
}
if (likely(j != my_start)) {
kring->nr_hwtail = j;
still_locked = 0;
mtx_unlock(&kring->q_lock);
kring->nm_notify(kring, 0);
if (dst_na->retry && retry--) {
goto retry;
}
}
}
if (still_locked)
mtx_unlock(&kring->q_lock);
}
cleanup:
d->bq_head = d->bq_tail = NM_FT_NULL;
d->bq_len = 0;
}
brddst->bq_head = brddst->bq_tail = NM_FT_NULL;
brddst->bq_len = 0;
return 0;
}
static int
netmap_vale_vp_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_vp_adapter *na =
(struct netmap_vp_adapter *)kring->na;
u_int done;
u_int const lim = kring->nkr_num_slots - 1;
u_int const head = kring->rhead;
if (bridge_batch <= 0) {
done = head;
goto done;
}
if (!na->na_bdg) {
done = head;
goto done;
}
if (bridge_batch > NM_BDG_BATCH)
bridge_batch = NM_BDG_BATCH;
done = nm_vale_preflush(kring, head);
done:
if (done != head)
nm_prerr("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
kring->nr_hwcur = done;
kring->nr_hwtail = nm_prev(done, lim);
if (netmap_debug & NM_DEBUG_TXSYNC)
nm_prinf("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
return 0;
}
static int
netmap_vale_vp_create(struct nmreq_header *hdr, if_t ifp,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret)
{
struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
struct netmap_vp_adapter *vpna;
struct netmap_adapter *na;
int error = 0;
u_int npipes = 0;
u_int extrabufs = 0;
if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
return EINVAL;
}
vpna = nm_os_malloc(sizeof(*vpna));
if (vpna == NULL)
return ENOMEM;
na = &vpna->up;
na->ifp = ifp;
strlcpy(na->name, hdr->nr_name, sizeof(na->name));
na->num_tx_rings = req->nr_tx_rings;
nm_bound_var(&na->num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
req->nr_tx_rings = na->num_tx_rings;
na->num_rx_rings = req->nr_rx_rings;
nm_bound_var(&na->num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
req->nr_rx_rings = na->num_rx_rings;
nm_bound_var(&req->nr_tx_slots, NM_BRIDGE_RINGSIZE,
1, NM_BDG_MAXSLOTS, NULL);
na->num_tx_desc = req->nr_tx_slots;
nm_bound_var(&req->nr_rx_slots, NM_BRIDGE_RINGSIZE,
1, NM_BDG_MAXSLOTS, NULL);
nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
extrabufs = req->nr_extra_bufs;
nm_bound_var(&extrabufs, 0, 0,
128*NM_BDG_MAXSLOTS, NULL);
req->nr_extra_bufs = extrabufs;
na->num_rx_desc = req->nr_rx_slots;
vpna->mfs = NM_BDG_MFS_DEFAULT;
vpna->last_smac = ~0llu;
if (netmap_verbose)
nm_prinf("max frame size %u", vpna->mfs);
na->na_flags |= (NAF_BDG_MAYSLEEP | NAF_OFFSETS);
if (ifp)
na->na_flags |= NAF_NATIVE;
na->nm_txsync = netmap_vale_vp_txsync;
na->nm_rxsync = netmap_vp_rxsync;
na->nm_register = netmap_vp_reg;
na->nm_krings_create = netmap_vale_vp_krings_create;
na->nm_krings_delete = netmap_vale_vp_krings_delete;
na->nm_dtor = netmap_vale_vp_dtor;
nm_prdis("nr_mem_id %d", req->nr_mem_id);
na->nm_mem = nmd ?
netmap_mem_get(nmd):
netmap_mem_private_new(
na->num_tx_rings, na->num_tx_desc,
na->num_rx_rings, na->num_rx_desc,
req->nr_extra_bufs, npipes, &error);
if (na->nm_mem == NULL)
goto err;
na->nm_bdg_attach = netmap_vale_vp_bdg_attach;
error = netmap_attach_common(na);
if (error)
goto err;
*ret = vpna;
return 0;
err:
if (na->nm_mem != NULL)
netmap_mem_put(na->nm_mem);
nm_os_free(vpna);
return error;
}
static int
netmap_vale_vp_bdg_attach(const char *name, struct netmap_adapter *na,
struct nm_bridge *b)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
if ((b->bdg_flags & NM_BDG_NEED_BWRAP) || vpna->na_bdg) {
return NM_NEED_BWRAP;
}
na->na_vp = vpna;
strlcpy(na->name, name, sizeof(na->name));
na->na_hostvp = NULL;
return 0;
}
static int
netmap_vale_bwrap_krings_create(struct netmap_adapter *na)
{
int error;
error = netmap_vale_vp_krings_create(na);
if (error)
return error;
error = netmap_bwrap_krings_create_common(na);
if (error) {
netmap_vale_vp_krings_delete(na);
}
return error;
}
static void
netmap_vale_bwrap_krings_delete(struct netmap_adapter *na)
{
netmap_bwrap_krings_delete_common(na);
netmap_vale_vp_krings_delete(na);
}
static int
netmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
{
struct netmap_bwrap_adapter *bna;
struct netmap_adapter *na = NULL;
struct netmap_adapter *hostna = NULL;
int error;
bna = nm_os_malloc(sizeof(*bna));
if (bna == NULL) {
return ENOMEM;
}
na = &bna->up.up;
strlcpy(na->name, nr_name, sizeof(na->name));
na->nm_register = netmap_bwrap_reg;
na->nm_txsync = netmap_vale_vp_txsync;
na->nm_krings_create = netmap_vale_bwrap_krings_create;
na->nm_krings_delete = netmap_vale_bwrap_krings_delete;
na->nm_notify = netmap_bwrap_notify;
bna->nm_intr_notify = netmap_bwrap_intr_notify;
bna->up.retry = 1;
bna->up.mfs = NM_BDG_MFS_DEFAULT;
if (hwna->na_flags & NAF_HOST_RINGS) {
hostna = &bna->host.up;
hostna->nm_notify = netmap_bwrap_notify;
bna->host.mfs = NM_BDG_MFS_DEFAULT;
}
error = netmap_bwrap_attach_common(na, hwna);
if (error) {
nm_os_free(bna);
}
return error;
}
int
netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na,
struct netmap_mem_d *nmd, int create)
{
return netmap_get_bdg_na(hdr, na, nmd, create, &vale_bdg_ops);
}
int
nm_vi_create(struct nmreq_header *hdr)
{
struct nmreq_vale_newif *req =
(struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body;
int error = 0;
struct nmreq_register regreq;
bzero(®req, sizeof(regreq));
regreq.nr_tx_slots = req->nr_tx_slots;
regreq.nr_rx_slots = req->nr_rx_slots;
regreq.nr_tx_rings = req->nr_tx_rings;
regreq.nr_rx_rings = req->nr_rx_rings;
regreq.nr_mem_id = req->nr_mem_id;
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
hdr->nr_body = (uintptr_t)®req;
error = netmap_vi_create(hdr, 0 );
hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
hdr->nr_body = (uintptr_t)req;
req->nr_tx_slots = regreq.nr_tx_slots;
req->nr_rx_slots = regreq.nr_rx_slots;
req->nr_tx_rings = regreq.nr_tx_rings;
req->nr_rx_rings = regreq.nr_rx_rings;
req->nr_mem_id = regreq.nr_mem_id;
return error;
}
int
nm_vi_destroy(const char *name)
{
if_t ifp;
struct netmap_vp_adapter *vpna;
int error;
ifp = ifunit_ref(name);
if (!ifp)
return ENXIO;
NMG_LOCK();
if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
error = EINVAL;
goto err;
}
vpna = (struct netmap_vp_adapter *)NA(ifp);
if (vpna->autodelete) {
error = EINVAL;
goto err;
}
if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
vpna->up.na_refcount > 1 ) {
error = EBUSY;
goto err;
}
NMG_UNLOCK();
if (netmap_verbose)
nm_prinf("destroying a persistent vale interface %s", if_name(ifp));
netmap_detach(ifp);
if_rele(ifp);
nm_os_vi_detach(ifp);
return 0;
err:
NMG_UNLOCK();
if_rele(ifp);
return error;
}
static int
nm_update_info(struct nmreq_register *req, struct netmap_adapter *na)
{
req->nr_rx_rings = na->num_rx_rings;
req->nr_tx_rings = na->num_tx_rings;
req->nr_rx_slots = na->num_rx_desc;
req->nr_tx_slots = na->num_tx_desc;
return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL,
&req->nr_mem_id);
}
int
netmap_vi_create(struct nmreq_header *hdr, int autodelete)
{
struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
if_t ifp;
struct netmap_vp_adapter *vpna;
struct netmap_mem_d *nmd = NULL;
int error;
if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
return EINVAL;
}
if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
return EINVAL;
if (strlen(hdr->nr_name) >= IFNAMSIZ) {
return EINVAL;
}
ifp = ifunit_ref(hdr->nr_name);
if (ifp) {
error = EEXIST;
NMG_LOCK();
if (NM_NA_VALID(ifp)) {
int update_err = nm_update_info(req, NA(ifp));
if (update_err)
error = update_err;
}
NMG_UNLOCK();
if_rele(ifp);
return error;
}
error = nm_os_vi_persist(hdr->nr_name, &ifp);
if (error)
return error;
NMG_LOCK();
if (req->nr_mem_id) {
nmd = netmap_mem_find(req->nr_mem_id);
if (nmd == NULL) {
error = EINVAL;
goto err_1;
}
}
error = netmap_vale_vp_create(hdr, ifp, nmd, &vpna);
if (error) {
if (netmap_debug & NM_DEBUG_VALE)
nm_prerr("error %d", error);
goto err_1;
}
vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
if (!autodelete) {
netmap_adapter_get(&vpna->up);
} else {
vpna->autodelete = 1;
}
NM_ATTACH_NA(ifp, &vpna->up);
error = nm_update_info(req, &vpna->up);
if (error) {
goto err_2;
}
nm_prdis("returning nr_mem_id %d", req->nr_mem_id);
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
nm_prdis("created %s", if_name(ifp));
return 0;
err_2:
netmap_detach(ifp);
err_1:
if (nmd)
netmap_mem_put(nmd);
NMG_UNLOCK();
nm_os_vi_detach(ifp);
return error;
}
#endif