root/sys/net/if_vxlan.c
/*-
 * Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
 * All rights reserved.
 * Copyright (c) 2020, Chelsio Communications.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice unmodified, this list of conditions, and the following
 *    disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include "opt_inet.h"
#include "opt_inet6.h"

#include <sys/param.h>
#include <sys/eventhandler.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/hash.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/refcount.h>
#include <sys/rmlock.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/sbuf.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/sysctl.h>
#include <sys/systm.h>

#include <net/bpf.h>
#include <net/ethernet.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_private.h>
#include <net/if_clone.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/if_vxlan.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/route/nhop.h>

#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/in_pcb.h>
#include <netinet/ip.h>
#include <netinet/ip6.h>
#include <netinet/ip_var.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <netinet/in_fib.h>
#include <netinet6/in6_fib.h>

#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>

struct vxlan_softc;
LIST_HEAD(vxlan_softc_head, vxlan_softc);

struct sx vxlan_sx;
SX_SYSINIT(vxlan, &vxlan_sx, "VXLAN global start/stop lock");

struct vxlan_socket_mc_info {
        union vxlan_sockaddr             vxlsomc_saddr;
        union vxlan_sockaddr             vxlsomc_gaddr;
        int                              vxlsomc_ifidx;
        int                              vxlsomc_users;
};

/*
 * The maximum MTU of encapsulated ethernet frame within IPv4/UDP packet.
 */
#define VXLAN_MAX_MTU   (IP_MAXPACKET - \
                60 /* Maximum IPv4 header len */ - \
                sizeof(struct udphdr) - \
                sizeof(struct vxlan_header) - \
                ETHER_HDR_LEN - ETHER_VLAN_ENCAP_LEN)
#define VXLAN_BASIC_IFCAPS (IFCAP_LINKSTATE | IFCAP_JUMBO_MTU)

#define VXLAN_SO_MC_MAX_GROUPS          32

#define VXLAN_SO_VNI_HASH_SHIFT         6
#define VXLAN_SO_VNI_HASH_SIZE          (1 << VXLAN_SO_VNI_HASH_SHIFT)
#define VXLAN_SO_VNI_HASH(_vni)         ((_vni) % VXLAN_SO_VNI_HASH_SIZE)

struct vxlan_socket {
        struct socket                   *vxlso_sock;
        struct rmlock                    vxlso_lock;
        u_int                            vxlso_refcnt;
        union vxlan_sockaddr             vxlso_laddr;
        LIST_ENTRY(vxlan_socket)         vxlso_entry;
        struct vxlan_softc_head          vxlso_vni_hash[VXLAN_SO_VNI_HASH_SIZE];
        struct vxlan_socket_mc_info      vxlso_mc[VXLAN_SO_MC_MAX_GROUPS];
};

#define VXLAN_SO_RLOCK(_vso, _p)        rm_rlock(&(_vso)->vxlso_lock, (_p))
#define VXLAN_SO_RUNLOCK(_vso, _p)      rm_runlock(&(_vso)->vxlso_lock, (_p))
#define VXLAN_SO_WLOCK(_vso)            rm_wlock(&(_vso)->vxlso_lock)
#define VXLAN_SO_WUNLOCK(_vso)          rm_wunlock(&(_vso)->vxlso_lock)
#define VXLAN_SO_LOCK_ASSERT(_vso) \
    rm_assert(&(_vso)->vxlso_lock, RA_LOCKED)
#define VXLAN_SO_LOCK_WASSERT(_vso) \
    rm_assert(&(_vso)->vxlso_lock, RA_WLOCKED)

#define VXLAN_SO_ACQUIRE(_vso)          refcount_acquire(&(_vso)->vxlso_refcnt)
#define VXLAN_SO_RELEASE(_vso)          refcount_release(&(_vso)->vxlso_refcnt)

struct vxlan_ftable_entry {
        LIST_ENTRY(vxlan_ftable_entry)   vxlfe_hash;
        uint16_t                         vxlfe_flags;
        uint8_t                          vxlfe_mac[ETHER_ADDR_LEN];
        union vxlan_sockaddr             vxlfe_raddr;
        time_t                           vxlfe_expire;
};

#define VXLAN_FE_FLAG_DYNAMIC           0x01
#define VXLAN_FE_FLAG_STATIC            0x02

#define VXLAN_FE_IS_DYNAMIC(_fe) \
    ((_fe)->vxlfe_flags & VXLAN_FE_FLAG_DYNAMIC)

#define VXLAN_SC_FTABLE_SHIFT           9
#define VXLAN_SC_FTABLE_SIZE            (1 << VXLAN_SC_FTABLE_SHIFT)
#define VXLAN_SC_FTABLE_MASK            (VXLAN_SC_FTABLE_SIZE - 1)
#define VXLAN_SC_FTABLE_HASH(_sc, _mac) \
    (vxlan_mac_hash(_sc, _mac) % VXLAN_SC_FTABLE_SIZE)

LIST_HEAD(vxlan_ftable_head, vxlan_ftable_entry);

struct vxlan_statistics {
        uint32_t        ftable_nospace;
        uint32_t        ftable_lock_upgrade_failed;
        counter_u64_t   txcsum;
        counter_u64_t   tso;
        counter_u64_t   rxcsum;
};

struct vxlan_softc {
        struct ifnet                    *vxl_ifp;
        int                              vxl_reqcap;
        u_int                            vxl_fibnum;
        struct vxlan_socket             *vxl_sock;
        uint32_t                         vxl_vni;
        union vxlan_sockaddr             vxl_src_addr;
        union vxlan_sockaddr             vxl_dst_addr;
        uint32_t                         vxl_flags;
#define VXLAN_FLAG_INIT         0x0001
#define VXLAN_FLAG_TEARDOWN     0x0002
#define VXLAN_FLAG_LEARN        0x0004
#define VXLAN_FLAG_USER_MTU     0x0008

        uint32_t                         vxl_port_hash_key;
        uint16_t                         vxl_min_port;
        uint16_t                         vxl_max_port;
        uint8_t                          vxl_ttl;

        /* Lookup table from MAC address to forwarding entry. */
        uint32_t                         vxl_ftable_cnt;
        uint32_t                         vxl_ftable_max;
        uint32_t                         vxl_ftable_timeout;
        uint32_t                         vxl_ftable_hash_key;
        struct vxlan_ftable_head        *vxl_ftable;

        /* Derived from vxl_dst_addr. */
        struct vxlan_ftable_entry        vxl_default_fe;

        struct ip_moptions              *vxl_im4o;
        struct ip6_moptions             *vxl_im6o;

        struct rmlock                    vxl_lock;
        volatile u_int                   vxl_refcnt;

        int                              vxl_unit;
        int                              vxl_vso_mc_index;
        struct vxlan_statistics          vxl_stats;
        struct sysctl_oid               *vxl_sysctl_node;
        struct sysctl_ctx_list           vxl_sysctl_ctx;
        struct callout                   vxl_callout;
        struct ether_addr                vxl_hwaddr;
        int                              vxl_mc_ifindex;
        struct ifnet                    *vxl_mc_ifp;
        struct ifmedia                   vxl_media;
        char                             vxl_mc_ifname[IFNAMSIZ];
        LIST_ENTRY(vxlan_softc)          vxl_entry;
        LIST_ENTRY(vxlan_softc)          vxl_ifdetach_list;

        /* For rate limiting errors on the tx fast path. */
        struct timeval err_time;
        int err_pps;
};

#define VXLAN_RLOCK(_sc, _p)    rm_rlock(&(_sc)->vxl_lock, (_p))
#define VXLAN_RUNLOCK(_sc, _p)  rm_runlock(&(_sc)->vxl_lock, (_p))
#define VXLAN_WLOCK(_sc)        rm_wlock(&(_sc)->vxl_lock)
#define VXLAN_WUNLOCK(_sc)      rm_wunlock(&(_sc)->vxl_lock)
#define VXLAN_LOCK_WOWNED(_sc)  rm_wowned(&(_sc)->vxl_lock)
#define VXLAN_LOCK_ASSERT(_sc)  rm_assert(&(_sc)->vxl_lock, RA_LOCKED)
#define VXLAN_LOCK_WASSERT(_sc) rm_assert(&(_sc)->vxl_lock, RA_WLOCKED)
#define VXLAN_UNLOCK(_sc, _p) do {              \
    if (VXLAN_LOCK_WOWNED(_sc))                 \
        VXLAN_WUNLOCK(_sc);                     \
    else                                        \
        VXLAN_RUNLOCK(_sc, _p);                 \
} while (0)

#define VXLAN_ACQUIRE(_sc)      refcount_acquire(&(_sc)->vxl_refcnt)
#define VXLAN_RELEASE(_sc)      refcount_release(&(_sc)->vxl_refcnt)

#define satoconstsin(sa)        ((const struct sockaddr_in *)(sa))
#define satoconstsin6(sa)       ((const struct sockaddr_in6 *)(sa))

struct vxlanudphdr {
        struct udphdr           vxlh_udp;
        struct vxlan_header     vxlh_hdr;
} __packed;

static int      vxlan_ftable_addr_cmp(const uint8_t *, const uint8_t *);
static void     vxlan_ftable_init(struct vxlan_softc *);
static void     vxlan_ftable_fini(struct vxlan_softc *);
static void     vxlan_ftable_flush(struct vxlan_softc *, int);
static void     vxlan_ftable_expire(struct vxlan_softc *);
static int      vxlan_ftable_update_locked(struct vxlan_softc *,
                    const union vxlan_sockaddr *, const uint8_t *,
                    struct rm_priotracker *);
static int      vxlan_ftable_learn(struct vxlan_softc *,
                    const struct sockaddr *, const uint8_t *);
static int      vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS);

static struct vxlan_ftable_entry *
                vxlan_ftable_entry_alloc(void);
static void     vxlan_ftable_entry_free(struct vxlan_ftable_entry *);
static void     vxlan_ftable_entry_init(struct vxlan_softc *,
                    struct vxlan_ftable_entry *, const uint8_t *,
                    const struct sockaddr *, uint32_t);
static void     vxlan_ftable_entry_destroy(struct vxlan_softc *,
                    struct vxlan_ftable_entry *);
static int      vxlan_ftable_entry_insert(struct vxlan_softc *,
                    struct vxlan_ftable_entry *);
static struct vxlan_ftable_entry *
                vxlan_ftable_entry_lookup(struct vxlan_softc *,
                    const uint8_t *);
static void     vxlan_ftable_entry_dump(struct vxlan_ftable_entry *,
                    struct sbuf *);

static struct vxlan_socket *
                vxlan_socket_alloc(const union vxlan_sockaddr *);
static void     vxlan_socket_destroy(struct vxlan_socket *);
static void     vxlan_socket_release(struct vxlan_socket *);
static struct vxlan_socket *
                vxlan_socket_lookup(union vxlan_sockaddr *vxlsa);
static void     vxlan_socket_insert(struct vxlan_socket *);
static int      vxlan_socket_init(struct vxlan_socket *, struct ifnet *);
static int      vxlan_socket_bind(struct vxlan_socket *, struct ifnet *);
static int      vxlan_socket_create(struct ifnet *, int,
                    const union vxlan_sockaddr *, struct vxlan_socket **);
static void     vxlan_socket_ifdetach(struct vxlan_socket *,
                    struct ifnet *, struct vxlan_softc_head *);

static struct vxlan_socket *
                vxlan_socket_mc_lookup(const union vxlan_sockaddr *);
static int      vxlan_sockaddr_mc_info_match(
                    const struct vxlan_socket_mc_info *,
                    const union vxlan_sockaddr *,
                    const union vxlan_sockaddr *, int);
static int      vxlan_socket_mc_join_group(struct vxlan_socket *,
                    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
                    int *, union vxlan_sockaddr *);
static int      vxlan_socket_mc_leave_group(struct vxlan_socket *,
                    const union vxlan_sockaddr *,
                    const union vxlan_sockaddr *, int);
static int      vxlan_socket_mc_add_group(struct vxlan_socket *,
                    const union vxlan_sockaddr *, const union vxlan_sockaddr *,
                    int, int *);
static void     vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *,
                    int);

static struct vxlan_softc *
                vxlan_socket_lookup_softc_locked(struct vxlan_socket *,
                    uint32_t);
static struct vxlan_softc *
                vxlan_socket_lookup_softc(struct vxlan_socket *, uint32_t);
static int      vxlan_socket_insert_softc(struct vxlan_socket *,
                    struct vxlan_softc *);
static void     vxlan_socket_remove_softc(struct vxlan_socket *,
                    struct vxlan_softc *);

static struct ifnet *
                vxlan_multicast_if_ref(struct vxlan_softc *, int);
static void     vxlan_free_multicast(struct vxlan_softc *);
static int      vxlan_setup_multicast_interface(struct vxlan_softc *);

static int      vxlan_setup_multicast(struct vxlan_softc *);
static int      vxlan_setup_socket(struct vxlan_softc *);
#ifdef INET6
static void     vxlan_setup_zero_checksum_port(struct vxlan_softc *);
#endif
static void     vxlan_setup_interface_hdrlen(struct vxlan_softc *);
static int      vxlan_valid_init_config(struct vxlan_softc *);
static void     vxlan_init_wait(struct vxlan_softc *);
static void     vxlan_init_complete(struct vxlan_softc *);
static void     vxlan_init(void *);
static void     vxlan_release(struct vxlan_softc *);
static void     vxlan_teardown_wait(struct vxlan_softc *);
static void     vxlan_teardown_complete(struct vxlan_softc *);
static void     vxlan_teardown_locked(struct vxlan_softc *);
static void     vxlan_teardown(struct vxlan_softc *);
static void     vxlan_ifdetach(struct vxlan_softc *, struct ifnet *,
                    struct vxlan_softc_head *);
static void     vxlan_timer(void *);

static int      vxlan_ctrl_get_config(struct vxlan_softc *, void *);
static int      vxlan_ctrl_set_vni(struct vxlan_softc *, void *);
static int      vxlan_ctrl_set_local_addr(struct vxlan_softc *, void *);
static int      vxlan_ctrl_set_remote_addr(struct vxlan_softc *, void *);
static int      vxlan_ctrl_set_local_port(struct vxlan_softc *, void *);
static int      vxlan_ctrl_set_remote_port(struct vxlan_softc *, void *);
static int      vxlan_ctrl_set_port_range(struct vxlan_softc *, void *);
static int      vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *, void *);
static int      vxlan_ctrl_set_ftable_max(struct vxlan_softc *, void *);
static int      vxlan_ctrl_set_multicast_if(struct vxlan_softc * , void *);
static int      vxlan_ctrl_set_ttl(struct vxlan_softc *, void *);
static int      vxlan_ctrl_set_learn(struct vxlan_softc *, void *);
static int      vxlan_ctrl_ftable_entry_add(struct vxlan_softc *, void *);
static int      vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *, void *);
static int      vxlan_ctrl_flush(struct vxlan_softc *, void *);
static int      vxlan_ioctl_drvspec(struct vxlan_softc *,
                    struct ifdrv *, int);
static int      vxlan_ioctl_ifflags(struct vxlan_softc *);
static int      vxlan_ioctl(struct ifnet *, u_long, caddr_t);

#if defined(INET) || defined(INET6)
static uint16_t vxlan_pick_source_port(struct vxlan_softc *, struct mbuf *);
static void     vxlan_encap_header(struct vxlan_softc *, struct mbuf *,
                    int, uint16_t, uint16_t);
#endif
static int      vxlan_encap4(struct vxlan_softc *,
                    const union vxlan_sockaddr *, struct mbuf *);
static int      vxlan_encap6(struct vxlan_softc *,
                    const union vxlan_sockaddr *, struct mbuf *);
static int      vxlan_transmit(struct ifnet *, struct mbuf *);
static void     vxlan_qflush(struct ifnet *);
static bool     vxlan_rcv_udp_packet(struct mbuf *, int, struct inpcb *,
                    const struct sockaddr *, void *);
static int      vxlan_input(struct vxlan_socket *, uint32_t, struct mbuf **,
                    const struct sockaddr *);

static void     vxlan_stats_alloc(struct vxlan_softc *);
static void     vxlan_stats_free(struct vxlan_softc *);
static void     vxlan_set_default_config(struct vxlan_softc *);
static int      vxlan_set_user_config(struct vxlan_softc *,
                     struct ifvxlanparam *);
static int      vxlan_set_reqcap(struct vxlan_softc *, struct ifnet *, int);
static void     vxlan_set_hwcaps(struct vxlan_softc *);
static int      vxlan_clone_create(struct if_clone *, char *, size_t,
                    struct ifc_data *, struct ifnet **);
static int      vxlan_clone_destroy(struct if_clone *, struct ifnet *, uint32_t);

static uint32_t vxlan_mac_hash(struct vxlan_softc *, const uint8_t *);
static int      vxlan_media_change(struct ifnet *);
static void     vxlan_media_status(struct ifnet *, struct ifmediareq *);

static int      vxlan_sockaddr_cmp(const union vxlan_sockaddr *,
                    const struct sockaddr *);
static void     vxlan_sockaddr_copy(union vxlan_sockaddr *,
                    const struct sockaddr *);
static int      vxlan_sockaddr_in_equal(const union vxlan_sockaddr *,
                    const struct sockaddr *);
static void     vxlan_sockaddr_in_copy(union vxlan_sockaddr *,
                    const struct sockaddr *);
static int      vxlan_sockaddr_supported(const union vxlan_sockaddr *, int);
static int      vxlan_sockaddr_in_any(const union vxlan_sockaddr *);
static int      vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *);
static int      vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *);

static int      vxlan_can_change_config(struct vxlan_softc *);
static int      vxlan_check_vni(uint32_t);
static int      vxlan_check_ttl(int);
static int      vxlan_check_ftable_timeout(uint32_t);
static int      vxlan_check_ftable_max(uint32_t);

static void     vxlan_sysctl_setup(struct vxlan_softc *);
static void     vxlan_sysctl_destroy(struct vxlan_softc *);
static int      vxlan_tunable_int(struct vxlan_softc *, const char *, int);

static void     vxlan_ifdetach_event(void *, struct ifnet *);
static void     vxlan_load(void);
static void     vxlan_unload(void);
static int      vxlan_modevent(module_t, int, void *);

static const char vxlan_name[] = "vxlan";
static MALLOC_DEFINE(M_VXLAN, vxlan_name,
    "Virtual eXtensible LAN Interface");
static struct if_clone *vxlan_cloner;

static struct mtx vxlan_list_mtx;
#define VXLAN_LIST_LOCK()       mtx_lock(&vxlan_list_mtx)
#define VXLAN_LIST_UNLOCK()     mtx_unlock(&vxlan_list_mtx)

static LIST_HEAD(, vxlan_socket) vxlan_socket_list =
    LIST_HEAD_INITIALIZER(vxlan_socket_list);

static eventhandler_tag vxlan_ifdetach_event_tag;

SYSCTL_DECL(_net_link);
SYSCTL_NODE(_net_link, OID_AUTO, vxlan, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
    "Virtual eXtensible Local Area Network");

static int vxlan_legacy_port = 0;
SYSCTL_INT(_net_link_vxlan, OID_AUTO, legacy_port, CTLFLAG_RDTUN,
    &vxlan_legacy_port, 0, "Use legacy port");
static int vxlan_reuse_port = 0;
SYSCTL_INT(_net_link_vxlan, OID_AUTO, reuse_port, CTLFLAG_RDTUN,
    &vxlan_reuse_port, 0, "Re-use port");

/*
 * This macro controls the default upper limitation on nesting of vxlan
 * tunnels. By default it is 3, as the overhead of IPv6 vxlan tunnel is 70
 * bytes, this will create at most 210 bytes overhead and the most inner
 * tunnel's MTU will be 1290 which will meet IPv6 minimum MTU size 1280.
 * Be careful to configure the tunnels when raising the limit. A large
 * number of nested tunnels can introduce system crash.
 */
#ifndef MAX_VXLAN_NEST
#define MAX_VXLAN_NEST  3
#endif
static int max_vxlan_nesting = MAX_VXLAN_NEST;
SYSCTL_INT(_net_link_vxlan, OID_AUTO, max_nesting, CTLFLAG_RW,
    &max_vxlan_nesting, 0, "Max nested tunnels");

/* Default maximum number of addresses in the forwarding table. */
#ifndef VXLAN_FTABLE_MAX
#define VXLAN_FTABLE_MAX        2000
#endif

/* Timeout (in seconds) of addresses learned in the forwarding table. */
#ifndef VXLAN_FTABLE_TIMEOUT
#define VXLAN_FTABLE_TIMEOUT    (20 * 60)
#endif

/*
 * Maximum timeout (in seconds) of addresses learned in the forwarding
 * table.
 */
#ifndef VXLAN_FTABLE_MAX_TIMEOUT
#define VXLAN_FTABLE_MAX_TIMEOUT        (60 * 60 * 24)
#endif

/* Number of seconds between pruning attempts of the forwarding table. */
#ifndef VXLAN_FTABLE_PRUNE
#define VXLAN_FTABLE_PRUNE      (5 * 60)
#endif

static int vxlan_ftable_prune_period = VXLAN_FTABLE_PRUNE;

struct vxlan_control {
        int     (*vxlc_func)(struct vxlan_softc *, void *);
        int     vxlc_argsize;
        int     vxlc_flags;
#define VXLAN_CTRL_FLAG_COPYIN  0x01
#define VXLAN_CTRL_FLAG_COPYOUT 0x02
#define VXLAN_CTRL_FLAG_SUSER   0x04
};

static const struct vxlan_control vxlan_control_table[] = {
        [VXLAN_CMD_GET_CONFIG] =
            {   vxlan_ctrl_get_config, sizeof(struct ifvxlancfg),
                VXLAN_CTRL_FLAG_COPYOUT
            },

        [VXLAN_CMD_SET_VNI] =
            {   vxlan_ctrl_set_vni, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_SET_LOCAL_ADDR] =
            {   vxlan_ctrl_set_local_addr, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_SET_REMOTE_ADDR] =
            {   vxlan_ctrl_set_remote_addr, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_SET_LOCAL_PORT] =
            {   vxlan_ctrl_set_local_port, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_SET_REMOTE_PORT] =
            {   vxlan_ctrl_set_remote_port, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_SET_PORT_RANGE] =
            {   vxlan_ctrl_set_port_range, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_SET_FTABLE_TIMEOUT] =
            {   vxlan_ctrl_set_ftable_timeout, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_SET_FTABLE_MAX] =
            {   vxlan_ctrl_set_ftable_max, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_SET_MULTICAST_IF] =
            {   vxlan_ctrl_set_multicast_if, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_SET_TTL] =
            {   vxlan_ctrl_set_ttl, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_SET_LEARN] =
            {   vxlan_ctrl_set_learn, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_FTABLE_ENTRY_ADD] =
            {   vxlan_ctrl_ftable_entry_add, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_FTABLE_ENTRY_REM] =
            {   vxlan_ctrl_ftable_entry_rem, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },

        [VXLAN_CMD_FLUSH] =
            {   vxlan_ctrl_flush, sizeof(struct ifvxlancmd),
                VXLAN_CTRL_FLAG_COPYIN | VXLAN_CTRL_FLAG_SUSER,
            },
};

static const int vxlan_control_table_size = nitems(vxlan_control_table);

static int
vxlan_ftable_addr_cmp(const uint8_t *a, const uint8_t *b)
{
        int i, d;

        for (i = 0, d = 0; i < ETHER_ADDR_LEN && d == 0; i++)
                d = ((int)a[i]) - ((int)b[i]);

        return (d);
}

static void
vxlan_ftable_init(struct vxlan_softc *sc)
{
        int i;

        sc->vxl_ftable = malloc(sizeof(struct vxlan_ftable_head) *
            VXLAN_SC_FTABLE_SIZE, M_VXLAN, M_ZERO | M_WAITOK);

        for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++)
                LIST_INIT(&sc->vxl_ftable[i]);
        sc->vxl_ftable_hash_key = arc4random();
}

static void
vxlan_ftable_fini(struct vxlan_softc *sc)
{
        int i;

        for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
                KASSERT(LIST_EMPTY(&sc->vxl_ftable[i]),
                    ("%s: vxlan %p ftable[%d] not empty", __func__, sc, i));
        }
        MPASS(sc->vxl_ftable_cnt == 0);

        free(sc->vxl_ftable, M_VXLAN);
        sc->vxl_ftable = NULL;
}

static void
vxlan_ftable_flush(struct vxlan_softc *sc, int all)
{
        struct vxlan_ftable_entry *fe, *tfe;
        int i;

        for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
                LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
                        if (all || VXLAN_FE_IS_DYNAMIC(fe))
                                vxlan_ftable_entry_destroy(sc, fe);
                }
        }
}

static void
vxlan_ftable_expire(struct vxlan_softc *sc)
{
        struct vxlan_ftable_entry *fe, *tfe;
        int i;

        VXLAN_LOCK_WASSERT(sc);

        for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
                LIST_FOREACH_SAFE(fe, &sc->vxl_ftable[i], vxlfe_hash, tfe) {
                        if (VXLAN_FE_IS_DYNAMIC(fe) &&
                            time_uptime >= fe->vxlfe_expire)
                                vxlan_ftable_entry_destroy(sc, fe);
                }
        }
}

static int
vxlan_ftable_update_locked(struct vxlan_softc *sc,
    const union vxlan_sockaddr *vxlsa, const uint8_t *mac,
    struct rm_priotracker *tracker)
{
        struct vxlan_ftable_entry *fe;
        int error __unused;

        VXLAN_LOCK_ASSERT(sc);

again:
        /*
         * A forwarding entry for this MAC address might already exist. If
         * so, update it, otherwise create a new one. We may have to upgrade
         * the lock if we have to change or create an entry.
         */
        fe = vxlan_ftable_entry_lookup(sc, mac);
        if (fe != NULL) {
                fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;

                if (!VXLAN_FE_IS_DYNAMIC(fe) ||
                    vxlan_sockaddr_in_equal(&fe->vxlfe_raddr, &vxlsa->sa))
                        return (0);
                if (!VXLAN_LOCK_WOWNED(sc)) {
                        VXLAN_RUNLOCK(sc, tracker);
                        VXLAN_WLOCK(sc);
                        sc->vxl_stats.ftable_lock_upgrade_failed++;
                        goto again;
                }
                vxlan_sockaddr_in_copy(&fe->vxlfe_raddr, &vxlsa->sa);
                return (0);
        }

        if (!VXLAN_LOCK_WOWNED(sc)) {
                VXLAN_RUNLOCK(sc, tracker);
                VXLAN_WLOCK(sc);
                sc->vxl_stats.ftable_lock_upgrade_failed++;
                goto again;
        }

        if (sc->vxl_ftable_cnt >= sc->vxl_ftable_max) {
                sc->vxl_stats.ftable_nospace++;
                return (ENOSPC);
        }

        fe = vxlan_ftable_entry_alloc();
        if (fe == NULL)
                return (ENOMEM);

        vxlan_ftable_entry_init(sc, fe, mac, &vxlsa->sa, VXLAN_FE_FLAG_DYNAMIC);

        /* The prior lookup failed, so the insert should not. */
        error = vxlan_ftable_entry_insert(sc, fe);
        MPASS(error == 0);

        return (0);
}

static int
vxlan_ftable_learn(struct vxlan_softc *sc, const struct sockaddr *sa,
    const uint8_t *mac)
{
        struct rm_priotracker tracker;
        union vxlan_sockaddr vxlsa;
        int error;

        /*
         * The source port may be randomly selected by the remote host, so
         * use the port of the default destination address.
         */
        vxlan_sockaddr_copy(&vxlsa, sa);
        vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;

        if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
                error = vxlan_sockaddr_in6_embedscope(&vxlsa);
                if (error)
                        return (error);
        }

        VXLAN_RLOCK(sc, &tracker);
        error = vxlan_ftable_update_locked(sc, &vxlsa, mac, &tracker);
        VXLAN_UNLOCK(sc, &tracker);

        return (error);
}

static int
vxlan_ftable_sysctl_dump(SYSCTL_HANDLER_ARGS)
{
        struct rm_priotracker tracker;
        struct sbuf sb;
        struct vxlan_softc *sc;
        struct vxlan_ftable_entry *fe;
        size_t size;
        int i, error;

        /*
         * This is mostly intended for debugging during development. It is
         * not practical to dump an entire large table this way.
         */

        sc = arg1;
        size = PAGE_SIZE;       /* Calculate later. */

        sbuf_new(&sb, NULL, size, SBUF_FIXEDLEN);
        sbuf_putc(&sb, '\n');

        VXLAN_RLOCK(sc, &tracker);
        for (i = 0; i < VXLAN_SC_FTABLE_SIZE; i++) {
                LIST_FOREACH(fe, &sc->vxl_ftable[i], vxlfe_hash) {
                        if (sbuf_error(&sb) != 0)
                                break;
                        vxlan_ftable_entry_dump(fe, &sb);
                }
        }
        VXLAN_RUNLOCK(sc, &tracker);

        if (sbuf_len(&sb) == 1)
                sbuf_setpos(&sb, 0);

        sbuf_finish(&sb);
        error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
        sbuf_delete(&sb);

        return (error);
}

static struct vxlan_ftable_entry *
vxlan_ftable_entry_alloc(void)
{
        struct vxlan_ftable_entry *fe;

        fe = malloc(sizeof(*fe), M_VXLAN, M_ZERO | M_NOWAIT);

        return (fe);
}

static void
vxlan_ftable_entry_free(struct vxlan_ftable_entry *fe)
{

        free(fe, M_VXLAN);
}

static void
vxlan_ftable_entry_init(struct vxlan_softc *sc, struct vxlan_ftable_entry *fe,
    const uint8_t *mac, const struct sockaddr *sa, uint32_t flags)
{

        fe->vxlfe_flags = flags;
        fe->vxlfe_expire = time_uptime + sc->vxl_ftable_timeout;
        memcpy(fe->vxlfe_mac, mac, ETHER_ADDR_LEN);
        vxlan_sockaddr_copy(&fe->vxlfe_raddr, sa);
}

static void
vxlan_ftable_entry_destroy(struct vxlan_softc *sc,
    struct vxlan_ftable_entry *fe)
{

        sc->vxl_ftable_cnt--;
        LIST_REMOVE(fe, vxlfe_hash);
        vxlan_ftable_entry_free(fe);
}

static int
vxlan_ftable_entry_insert(struct vxlan_softc *sc,
    struct vxlan_ftable_entry *fe)
{
        struct vxlan_ftable_entry *lfe;
        uint32_t hash;
        int dir;

        VXLAN_LOCK_WASSERT(sc);
        hash = VXLAN_SC_FTABLE_HASH(sc, fe->vxlfe_mac);

        lfe = LIST_FIRST(&sc->vxl_ftable[hash]);
        if (lfe == NULL) {
                LIST_INSERT_HEAD(&sc->vxl_ftable[hash], fe, vxlfe_hash);
                goto out;
        }

        do {
                dir = vxlan_ftable_addr_cmp(fe->vxlfe_mac, lfe->vxlfe_mac);
                if (dir == 0)
                        return (EEXIST);
                if (dir > 0) {
                        LIST_INSERT_BEFORE(lfe, fe, vxlfe_hash);
                        goto out;
                } else if (LIST_NEXT(lfe, vxlfe_hash) == NULL) {
                        LIST_INSERT_AFTER(lfe, fe, vxlfe_hash);
                        goto out;
                } else
                        lfe = LIST_NEXT(lfe, vxlfe_hash);
        } while (lfe != NULL);

out:
        sc->vxl_ftable_cnt++;

        return (0);
}

static struct vxlan_ftable_entry *
vxlan_ftable_entry_lookup(struct vxlan_softc *sc, const uint8_t *mac)
{
        struct vxlan_ftable_entry *fe;
        uint32_t hash;
        int dir;

        VXLAN_LOCK_ASSERT(sc);
        hash = VXLAN_SC_FTABLE_HASH(sc, mac);

        LIST_FOREACH(fe, &sc->vxl_ftable[hash], vxlfe_hash) {
                dir = vxlan_ftable_addr_cmp(mac, fe->vxlfe_mac);
                if (dir == 0)
                        return (fe);
                if (dir > 0)
                        break;
        }

        return (NULL);
}

static void
vxlan_ftable_entry_dump(struct vxlan_ftable_entry *fe, struct sbuf *sb)
{
        char buf[64];
        const union vxlan_sockaddr *sa;
        const void *addr;
        int i, len, af, width;

        sa = &fe->vxlfe_raddr;
        af = sa->sa.sa_family;
        len = sbuf_len(sb);

        sbuf_printf(sb, "%c 0x%02X ", VXLAN_FE_IS_DYNAMIC(fe) ? 'D' : 'S',
            fe->vxlfe_flags);

        for (i = 0; i < ETHER_ADDR_LEN - 1; i++)
                sbuf_printf(sb, "%02X:", fe->vxlfe_mac[i]);
        sbuf_printf(sb, "%02X ", fe->vxlfe_mac[i]);

        if (af == AF_INET) {
                addr = &sa->in4.sin_addr;
                width = INET_ADDRSTRLEN - 1;
        } else {
                addr = &sa->in6.sin6_addr;
                width = INET6_ADDRSTRLEN - 1;
        }
        inet_ntop(af, addr, buf, sizeof(buf));
        sbuf_printf(sb, "%*s ", width, buf);

        sbuf_printf(sb, "%08jd", (intmax_t)fe->vxlfe_expire);

        sbuf_putc(sb, '\n');

        /* Truncate a partial line. */
        if (sbuf_error(sb) != 0)
                sbuf_setpos(sb, len);
}

static struct vxlan_socket *
vxlan_socket_alloc(const union vxlan_sockaddr *sa)
{
        struct vxlan_socket *vso;
        int i;

        vso = malloc(sizeof(*vso), M_VXLAN, M_WAITOK | M_ZERO);
        rm_init(&vso->vxlso_lock, "vxlansorm");
        refcount_init(&vso->vxlso_refcnt, 0);
        for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++)
                LIST_INIT(&vso->vxlso_vni_hash[i]);
        vso->vxlso_laddr = *sa;

        return (vso);
}

static void
vxlan_socket_destroy(struct vxlan_socket *vso)
{
        struct socket *so;
#ifdef INVARIANTS
        int i;
        struct vxlan_socket_mc_info *mc;

        for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
                mc = &vso->vxlso_mc[i];
                KASSERT(mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC,
                    ("%s: socket %p mc[%d] still has address",
                     __func__, vso, i));
        }

        for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
                KASSERT(LIST_EMPTY(&vso->vxlso_vni_hash[i]),
                    ("%s: socket %p vni_hash[%d] not empty",
                     __func__, vso, i));
        }
#endif
        so = vso->vxlso_sock;
        if (so != NULL) {
                vso->vxlso_sock = NULL;
                soclose(so);
        }

        rm_destroy(&vso->vxlso_lock);
        free(vso, M_VXLAN);
}

static void
vxlan_socket_release(struct vxlan_socket *vso)
{
        int destroy;

        VXLAN_LIST_LOCK();
        destroy = VXLAN_SO_RELEASE(vso);
        if (destroy != 0)
                LIST_REMOVE(vso, vxlso_entry);
        VXLAN_LIST_UNLOCK();

        if (destroy != 0)
                vxlan_socket_destroy(vso);
}

static struct vxlan_socket *
vxlan_socket_lookup(union vxlan_sockaddr *vxlsa)
{
        struct vxlan_socket *vso;

        VXLAN_LIST_LOCK();
        LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry) {
                if (vxlan_sockaddr_cmp(&vso->vxlso_laddr, &vxlsa->sa) == 0) {
                        VXLAN_SO_ACQUIRE(vso);
                        break;
                }
        }
        VXLAN_LIST_UNLOCK();

        return (vso);
}

static void
vxlan_socket_insert(struct vxlan_socket *vso)
{

        VXLAN_LIST_LOCK();
        VXLAN_SO_ACQUIRE(vso);
        LIST_INSERT_HEAD(&vxlan_socket_list, vso, vxlso_entry);
        VXLAN_LIST_UNLOCK();
}

static int
vxlan_socket_init(struct vxlan_socket *vso, struct ifnet *ifp)
{
        struct thread *td;
        int error;

        td = curthread;

        error = socreate(vso->vxlso_laddr.sa.sa_family, &vso->vxlso_sock,
            SOCK_DGRAM, IPPROTO_UDP, td->td_ucred, td);
        if (error) {
                if_printf(ifp, "cannot create socket: %d\n", error);
                return (error);
        }

        error = udp_set_kernel_tunneling(vso->vxlso_sock,
            vxlan_rcv_udp_packet, NULL, vso);
        if (error) {
                if_printf(ifp, "cannot set tunneling function: %d\n", error);
                return (error);
        }

        if (vxlan_reuse_port != 0) {
                struct sockopt sopt;
                int val = 1;

                bzero(&sopt, sizeof(sopt));
                sopt.sopt_dir = SOPT_SET;
                sopt.sopt_level = IPPROTO_IP;
                sopt.sopt_name = SO_REUSEPORT;
                sopt.sopt_val = &val;
                sopt.sopt_valsize = sizeof(val);
                error = sosetopt(vso->vxlso_sock, &sopt);
                if (error) {
                        if_printf(ifp,
                            "cannot set REUSEADDR socket opt: %d\n", error);
                        return (error);
                }
        }

        return (0);
}

static int
vxlan_socket_bind(struct vxlan_socket *vso, struct ifnet *ifp)
{
        union vxlan_sockaddr laddr;
        struct thread *td;
        int error;

        td = curthread;
        laddr = vso->vxlso_laddr;

        error = sobind(vso->vxlso_sock, &laddr.sa, td);
        if (error) {
                if (error != EADDRINUSE)
                        if_printf(ifp, "cannot bind socket: %d\n", error);
                return (error);
        }

        return (0);
}

static int
vxlan_socket_create(struct ifnet *ifp, int multicast,
    const union vxlan_sockaddr *saddr, struct vxlan_socket **vsop)
{
        union vxlan_sockaddr laddr;
        struct vxlan_socket *vso;
        int error;

        laddr = *saddr;

        /*
         * If this socket will be multicast, then only the local port
         * must be specified when binding.
         */
        if (multicast != 0) {
                if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
                        laddr.in4.sin_addr.s_addr = INADDR_ANY;
#ifdef INET6
                else
                        laddr.in6.sin6_addr = in6addr_any;
#endif
        }

        vso = vxlan_socket_alloc(&laddr);
        if (vso == NULL)
                return (ENOMEM);

        error = vxlan_socket_init(vso, ifp);
        if (error)
                goto fail;

        error = vxlan_socket_bind(vso, ifp);
        if (error)
                goto fail;

        /*
         * There is a small window between the bind completing and
         * inserting the socket, so that a concurrent create may fail.
         * Let's not worry about that for now.
         */
        vxlan_socket_insert(vso);
        *vsop = vso;

        return (0);

fail:
        vxlan_socket_destroy(vso);

        return (error);
}

static void
vxlan_socket_ifdetach(struct vxlan_socket *vso, struct ifnet *ifp,
    struct vxlan_softc_head *list)
{
        struct rm_priotracker tracker;
        struct vxlan_softc *sc;
        int i;

        VXLAN_SO_RLOCK(vso, &tracker);
        for (i = 0; i < VXLAN_SO_VNI_HASH_SIZE; i++) {
                LIST_FOREACH(sc, &vso->vxlso_vni_hash[i], vxl_entry)
                        vxlan_ifdetach(sc, ifp, list);
        }
        VXLAN_SO_RUNLOCK(vso, &tracker);
}

static struct vxlan_socket *
vxlan_socket_mc_lookup(const union vxlan_sockaddr *vxlsa)
{
        union vxlan_sockaddr laddr;
        struct vxlan_socket *vso;

        laddr = *vxlsa;

        if (VXLAN_SOCKADDR_IS_IPV4(&laddr))
                laddr.in4.sin_addr.s_addr = INADDR_ANY;
#ifdef INET6
        else
                laddr.in6.sin6_addr = in6addr_any;
#endif

        vso = vxlan_socket_lookup(&laddr);

        return (vso);
}

static int
vxlan_sockaddr_mc_info_match(const struct vxlan_socket_mc_info *mc,
    const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
    int ifidx)
{

        if (!vxlan_sockaddr_in_any(local) &&
            !vxlan_sockaddr_in_equal(&mc->vxlsomc_saddr, &local->sa))
                return (0);
        if (!vxlan_sockaddr_in_equal(&mc->vxlsomc_gaddr, &group->sa))
                return (0);
        if (ifidx != 0 && ifidx != mc->vxlsomc_ifidx)
                return (0);

        return (1);
}

static int
vxlan_socket_mc_join_group(struct vxlan_socket *vso,
    const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
    int *ifidx, union vxlan_sockaddr *source)
{
        struct sockopt sopt;
        int error;

        *source = *local;

        if (VXLAN_SOCKADDR_IS_IPV4(group)) {
                struct ip_mreq mreq;

                mreq.imr_multiaddr = group->in4.sin_addr;
                mreq.imr_interface = local->in4.sin_addr;

                bzero(&sopt, sizeof(sopt));
                sopt.sopt_dir = SOPT_SET;
                sopt.sopt_level = IPPROTO_IP;
                sopt.sopt_name = IP_ADD_MEMBERSHIP;
                sopt.sopt_val = &mreq;
                sopt.sopt_valsize = sizeof(mreq);
                error = sosetopt(vso->vxlso_sock, &sopt);
                if (error)
                        return (error);

                /*
                 * BMV: Ideally, there would be a formal way for us to get
                 * the local interface that was selected based on the
                 * imr_interface address. We could then update *ifidx so
                 * vxlan_sockaddr_mc_info_match() would return a match for
                 * later creates that explicitly set the multicast interface.
                 *
                 * If we really need to, we can of course look in the INP's
                 * membership list:
                 *     sotoinpcb(vso->vxlso_sock)->inp_moptions->
                 *         imo_head[]->imf_inm->inm_ifp
                 * similarly to imo_match_group().
                 */
                source->in4.sin_addr = local->in4.sin_addr;

        } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
                struct ipv6_mreq mreq;

                mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
                mreq.ipv6mr_interface = *ifidx;

                bzero(&sopt, sizeof(sopt));
                sopt.sopt_dir = SOPT_SET;
                sopt.sopt_level = IPPROTO_IPV6;
                sopt.sopt_name = IPV6_JOIN_GROUP;
                sopt.sopt_val = &mreq;
                sopt.sopt_valsize = sizeof(mreq);
                error = sosetopt(vso->vxlso_sock, &sopt);
                if (error)
                        return (error);

                /*
                 * BMV: As with IPv4, we would really like to know what
                 * interface in6p_lookup_mcast_ifp() selected.
                 */
        } else
                error = EAFNOSUPPORT;

        return (error);
}

static int
vxlan_socket_mc_leave_group(struct vxlan_socket *vso,
    const union vxlan_sockaddr *group, const union vxlan_sockaddr *source,
    int ifidx)
{
        struct sockopt sopt;
        int error;

        bzero(&sopt, sizeof(sopt));
        sopt.sopt_dir = SOPT_SET;

        if (VXLAN_SOCKADDR_IS_IPV4(group)) {
                struct ip_mreq mreq;

                mreq.imr_multiaddr = group->in4.sin_addr;
                mreq.imr_interface = source->in4.sin_addr;

                sopt.sopt_level = IPPROTO_IP;
                sopt.sopt_name = IP_DROP_MEMBERSHIP;
                sopt.sopt_val = &mreq;
                sopt.sopt_valsize = sizeof(mreq);
                error = sosetopt(vso->vxlso_sock, &sopt);

        } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
                struct ipv6_mreq mreq;

                mreq.ipv6mr_multiaddr = group->in6.sin6_addr;
                mreq.ipv6mr_interface = ifidx;

                sopt.sopt_level = IPPROTO_IPV6;
                sopt.sopt_name = IPV6_LEAVE_GROUP;
                sopt.sopt_val = &mreq;
                sopt.sopt_valsize = sizeof(mreq);
                error = sosetopt(vso->vxlso_sock, &sopt);

        } else
                error = EAFNOSUPPORT;

        return (error);
}

static int
vxlan_socket_mc_add_group(struct vxlan_socket *vso,
    const union vxlan_sockaddr *group, const union vxlan_sockaddr *local,
    int ifidx, int *idx)
{
        union vxlan_sockaddr source;
        struct vxlan_socket_mc_info *mc;
        int i, empty, error;

        /*
         * Within a socket, the same multicast group may be used by multiple
         * interfaces, each with a different network identifier. But a socket
         * may only join a multicast group once, so keep track of the users
         * here.
         */

        VXLAN_SO_WLOCK(vso);
        for (empty = 0, i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
                mc = &vso->vxlso_mc[i];

                if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
                        empty++;
                        continue;
                }

                if (vxlan_sockaddr_mc_info_match(mc, group, local, ifidx))
                        goto out;
        }
        VXLAN_SO_WUNLOCK(vso);

        if (empty == 0)
                return (ENOSPC);

        error = vxlan_socket_mc_join_group(vso, group, local, &ifidx, &source);
        if (error)
                return (error);

        VXLAN_SO_WLOCK(vso);
        for (i = 0; i < VXLAN_SO_MC_MAX_GROUPS; i++) {
                mc = &vso->vxlso_mc[i];

                if (mc->vxlsomc_gaddr.sa.sa_family == AF_UNSPEC) {
                        vxlan_sockaddr_copy(&mc->vxlsomc_gaddr, &group->sa);
                        vxlan_sockaddr_copy(&mc->vxlsomc_saddr, &source.sa);
                        mc->vxlsomc_ifidx = ifidx;
                        goto out;
                }
        }
        VXLAN_SO_WUNLOCK(vso);

        error = vxlan_socket_mc_leave_group(vso, group, &source, ifidx);
        MPASS(error == 0);

        return (ENOSPC);

out:
        mc->vxlsomc_users++;
        VXLAN_SO_WUNLOCK(vso);

        *idx = i;

        return (0);
}

static void
vxlan_socket_mc_release_group_by_idx(struct vxlan_socket *vso, int idx)
{
        union vxlan_sockaddr group, source;
        struct vxlan_socket_mc_info *mc;
        int ifidx, leave;

        KASSERT(idx >= 0 && idx < VXLAN_SO_MC_MAX_GROUPS,
            ("%s: vso %p idx %d out of bounds", __func__, vso, idx));

        leave = 0;
        mc = &vso->vxlso_mc[idx];

        VXLAN_SO_WLOCK(vso);
        mc->vxlsomc_users--;
        if (mc->vxlsomc_users == 0) {
                group = mc->vxlsomc_gaddr;
                source = mc->vxlsomc_saddr;
                ifidx = mc->vxlsomc_ifidx;
                bzero(mc, sizeof(*mc));
                leave = 1;
        }
        VXLAN_SO_WUNLOCK(vso);

        if (leave != 0) {
                /*
                 * Our socket's membership in this group may have already
                 * been removed if we joined through an interface that's
                 * been detached.
                 */
                vxlan_socket_mc_leave_group(vso, &group, &source, ifidx);
        }
}

static struct vxlan_softc *
vxlan_socket_lookup_softc_locked(struct vxlan_socket *vso, uint32_t vni)
{
        struct vxlan_softc *sc;
        uint32_t hash;

        VXLAN_SO_LOCK_ASSERT(vso);
        hash = VXLAN_SO_VNI_HASH(vni);

        LIST_FOREACH(sc, &vso->vxlso_vni_hash[hash], vxl_entry) {
                if (sc->vxl_vni == vni) {
                        VXLAN_ACQUIRE(sc);
                        break;
                }
        }

        return (sc);
}

static struct vxlan_softc *
vxlan_socket_lookup_softc(struct vxlan_socket *vso, uint32_t vni)
{
        struct rm_priotracker tracker;
        struct vxlan_softc *sc;

        VXLAN_SO_RLOCK(vso, &tracker);
        sc = vxlan_socket_lookup_softc_locked(vso, vni);
        VXLAN_SO_RUNLOCK(vso, &tracker);

        return (sc);
}

static int
vxlan_socket_insert_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
{
        struct vxlan_softc *tsc;
        uint32_t vni, hash;

        vni = sc->vxl_vni;
        hash = VXLAN_SO_VNI_HASH(vni);

        VXLAN_SO_WLOCK(vso);
        tsc = vxlan_socket_lookup_softc_locked(vso, vni);
        if (tsc != NULL) {
                VXLAN_SO_WUNLOCK(vso);
                vxlan_release(tsc);
                return (EEXIST);
        }

        VXLAN_ACQUIRE(sc);
        LIST_INSERT_HEAD(&vso->vxlso_vni_hash[hash], sc, vxl_entry);
        VXLAN_SO_WUNLOCK(vso);

        return (0);
}

static void
vxlan_socket_remove_softc(struct vxlan_socket *vso, struct vxlan_softc *sc)
{

        VXLAN_SO_WLOCK(vso);
        LIST_REMOVE(sc, vxl_entry);
        VXLAN_SO_WUNLOCK(vso);

        vxlan_release(sc);
}

static struct ifnet *
vxlan_multicast_if_ref(struct vxlan_softc *sc, int ipv4)
{
        struct ifnet *ifp;

        VXLAN_LOCK_ASSERT(sc);

        if (ipv4 && sc->vxl_im4o != NULL)
                ifp = sc->vxl_im4o->imo_multicast_ifp;
        else if (!ipv4 && sc->vxl_im6o != NULL)
                ifp = sc->vxl_im6o->im6o_multicast_ifp;
        else
                ifp = NULL;

        if (ifp != NULL)
                if_ref(ifp);

        return (ifp);
}

static void
vxlan_free_multicast(struct vxlan_softc *sc)
{

        if (sc->vxl_mc_ifp != NULL) {
                if_rele(sc->vxl_mc_ifp);
                sc->vxl_mc_ifp = NULL;
                sc->vxl_mc_ifindex = 0;
        }

        if (sc->vxl_im4o != NULL) {
                free(sc->vxl_im4o, M_VXLAN);
                sc->vxl_im4o = NULL;
        }

        if (sc->vxl_im6o != NULL) {
                free(sc->vxl_im6o, M_VXLAN);
                sc->vxl_im6o = NULL;
        }
}

static int
vxlan_setup_multicast_interface(struct vxlan_softc *sc)
{
        struct ifnet *ifp;

        ifp = ifunit_ref(sc->vxl_mc_ifname);
        if (ifp == NULL) {
                if_printf(sc->vxl_ifp, "multicast interface %s does "
                    "not exist\n", sc->vxl_mc_ifname);
                return (ENOENT);
        }

        if ((ifp->if_flags & IFF_MULTICAST) == 0) {
                if_printf(sc->vxl_ifp, "interface %s does not support "
                     "multicast\n", sc->vxl_mc_ifname);
                if_rele(ifp);
                return (ENOTSUP);
        }

        sc->vxl_mc_ifp = ifp;
        sc->vxl_mc_ifindex = ifp->if_index;

        return (0);
}

static int
vxlan_setup_multicast(struct vxlan_softc *sc)
{
        const union vxlan_sockaddr *group;
        int error;

        group = &sc->vxl_dst_addr;
        error = 0;

        if (sc->vxl_mc_ifname[0] != '\0') {
                error = vxlan_setup_multicast_interface(sc);
                if (error)
                        return (error);
        }

        /*
         * Initialize an multicast options structure that is sufficiently
         * populated for use in the respective IP output routine. This
         * structure is typically stored in the socket, but our sockets
         * may be shared among multiple interfaces.
         */
        if (VXLAN_SOCKADDR_IS_IPV4(group)) {
                sc->vxl_im4o = malloc(sizeof(struct ip_moptions), M_VXLAN,
                    M_ZERO | M_WAITOK);
                sc->vxl_im4o->imo_multicast_ifp = sc->vxl_mc_ifp;
                sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
                sc->vxl_im4o->imo_multicast_vif = -1;
        } else if (VXLAN_SOCKADDR_IS_IPV6(group)) {
                sc->vxl_im6o = malloc(sizeof(struct ip6_moptions), M_VXLAN,
                    M_ZERO | M_WAITOK);
                sc->vxl_im6o->im6o_multicast_ifp = sc->vxl_mc_ifp;
                sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
        }

        return (error);
}

static int
vxlan_setup_socket(struct vxlan_softc *sc)
{
        struct vxlan_socket *vso;
        struct ifnet *ifp;
        union vxlan_sockaddr *saddr, *daddr;
        int multicast, error;

        vso = NULL;
        ifp = sc->vxl_ifp;
        saddr = &sc->vxl_src_addr;
        daddr = &sc->vxl_dst_addr;

        multicast = vxlan_sockaddr_in_multicast(daddr);
        MPASS(multicast != -1);
        sc->vxl_vso_mc_index = -1;

        /*
         * Try to create the socket. If that fails, attempt to use an
         * existing socket.
         */
        error = vxlan_socket_create(ifp, multicast, saddr, &vso);
        if (error) {
                if (multicast != 0)
                        vso = vxlan_socket_mc_lookup(saddr);
                else
                        vso = vxlan_socket_lookup(saddr);

                if (vso == NULL) {
                        if_printf(ifp, "cannot create socket (error: %d), "
                            "and no existing socket found\n", error);
                        goto out;
                }
        }

        if (multicast != 0) {
                error = vxlan_setup_multicast(sc);
                if (error)
                        goto out;

                error = vxlan_socket_mc_add_group(vso, daddr, saddr,
                    sc->vxl_mc_ifindex, &sc->vxl_vso_mc_index);
                if (error)
                        goto out;
        }

        sc->vxl_sock = vso;
        error = vxlan_socket_insert_softc(vso, sc);
        if (error) {
                sc->vxl_sock = NULL;
                if_printf(ifp, "network identifier %d already exists in "
                    "this socket\n", sc->vxl_vni);
                goto out;
        }

        return (0);

out:
        if (vso != NULL) {
                if (sc->vxl_vso_mc_index != -1) {
                        vxlan_socket_mc_release_group_by_idx(vso,
                            sc->vxl_vso_mc_index);
                        sc->vxl_vso_mc_index = -1;
                }
                if (multicast != 0)
                        vxlan_free_multicast(sc);
                vxlan_socket_release(vso);
        }

        return (error);
}

#ifdef INET6
static void
vxlan_setup_zero_checksum_port(struct vxlan_softc *sc)
{

        if (!VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_src_addr))
                return;

        MPASS(sc->vxl_src_addr.in6.sin6_port != 0);
        MPASS(sc->vxl_dst_addr.in6.sin6_port != 0);

        if (sc->vxl_src_addr.in6.sin6_port != sc->vxl_dst_addr.in6.sin6_port) {
                if_printf(sc->vxl_ifp, "port %d in src address does not match "
                    "port %d in dst address, rfc6935_port (%d) not updated.\n",
                    ntohs(sc->vxl_src_addr.in6.sin6_port),
                    ntohs(sc->vxl_dst_addr.in6.sin6_port),
                    V_zero_checksum_port);
                return;
        }

        if (V_zero_checksum_port != 0) {
                if (V_zero_checksum_port !=
                    ntohs(sc->vxl_src_addr.in6.sin6_port)) {
                        if_printf(sc->vxl_ifp, "rfc6935_port is already set to "
                            "%d, cannot set it to %d.\n", V_zero_checksum_port,
                            ntohs(sc->vxl_src_addr.in6.sin6_port));
                }
                return;
        }

        V_zero_checksum_port = ntohs(sc->vxl_src_addr.in6.sin6_port);
        if_printf(sc->vxl_ifp, "rfc6935_port set to %d\n",
            V_zero_checksum_port);
}
#endif

static void
vxlan_setup_interface_hdrlen(struct vxlan_softc *sc)
{
        struct ifnet *ifp;

        VXLAN_LOCK_WASSERT(sc);

        ifp = sc->vxl_ifp;
        ifp->if_hdrlen = ETHER_HDR_LEN + sizeof(struct vxlanudphdr);

        if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr) != 0)
                ifp->if_hdrlen += sizeof(struct ip);
        else if (VXLAN_SOCKADDR_IS_IPV6(&sc->vxl_dst_addr) != 0)
                ifp->if_hdrlen += sizeof(struct ip6_hdr);

        if ((sc->vxl_flags & VXLAN_FLAG_USER_MTU) == 0)
                ifp->if_mtu = ETHERMTU - ifp->if_hdrlen;
}

static int
vxlan_valid_init_config(struct vxlan_softc *sc)
{
        const char *reason;

        if (vxlan_check_vni(sc->vxl_vni) != 0) {
                reason = "invalid virtual network identifier specified";
                goto fail;
        }

        if (vxlan_sockaddr_supported(&sc->vxl_src_addr, 1) == 0) {
                reason = "source address type is not supported";
                goto fail;
        }

        if (vxlan_sockaddr_supported(&sc->vxl_dst_addr, 0) == 0) {
                reason = "destination address type is not supported";
                goto fail;
        }

        if (vxlan_sockaddr_in_any(&sc->vxl_dst_addr) != 0) {
                reason = "no valid destination address specified";
                goto fail;
        }

        if (vxlan_sockaddr_in_multicast(&sc->vxl_dst_addr) == 0 &&
            sc->vxl_mc_ifname[0] != '\0') {
                reason = "can only specify interface with a group address";
                goto fail;
        }

        if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
                if (VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_src_addr) ^
                    VXLAN_SOCKADDR_IS_IPV4(&sc->vxl_dst_addr)) {
                        reason = "source and destination address must both "
                            "be either IPv4 or IPv6";
                        goto fail;
                }
        }

        if (sc->vxl_src_addr.in4.sin_port == 0) {
                reason = "local port not specified";
                goto fail;
        }

        if (sc->vxl_dst_addr.in4.sin_port == 0) {
                reason = "remote port not specified";
                goto fail;
        }

        return (0);

fail:
        if_printf(sc->vxl_ifp, "cannot initialize interface: %s\n", reason);
        return (EINVAL);
}

static void
vxlan_init_wait(struct vxlan_softc *sc)
{

        VXLAN_LOCK_WASSERT(sc);
        while (sc->vxl_flags & VXLAN_FLAG_INIT)
                rm_sleep(sc, &sc->vxl_lock, 0, "vxlint", hz);
}

static void
vxlan_init_complete(struct vxlan_softc *sc)
{

        VXLAN_WLOCK(sc);
        sc->vxl_flags &= ~VXLAN_FLAG_INIT;
        wakeup(sc);
        VXLAN_WUNLOCK(sc);
}

static void
vxlan_init(void *xsc)
{
        static const uint8_t empty_mac[ETHER_ADDR_LEN];
        struct vxlan_softc *sc;
        struct ifnet *ifp;

        sc = xsc;
        ifp = sc->vxl_ifp;

        sx_xlock(&vxlan_sx);
        VXLAN_WLOCK(sc);
        if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
                VXLAN_WUNLOCK(sc);
                sx_xunlock(&vxlan_sx);
                return;
        }
        sc->vxl_flags |= VXLAN_FLAG_INIT;
        VXLAN_WUNLOCK(sc);

        if (vxlan_valid_init_config(sc) != 0)
                goto out;

        if (vxlan_setup_socket(sc) != 0)
                goto out;

#ifdef INET6
        vxlan_setup_zero_checksum_port(sc);
#endif

        /* Initialize the default forwarding entry. */
        vxlan_ftable_entry_init(sc, &sc->vxl_default_fe, empty_mac,
            &sc->vxl_dst_addr.sa, VXLAN_FE_FLAG_STATIC);

        VXLAN_WLOCK(sc);
        ifp->if_drv_flags |= IFF_DRV_RUNNING;
        callout_reset(&sc->vxl_callout, vxlan_ftable_prune_period * hz,
            vxlan_timer, sc);
        VXLAN_WUNLOCK(sc);

        if_link_state_change(ifp, LINK_STATE_UP);

        EVENTHANDLER_INVOKE(vxlan_start, ifp, sc->vxl_src_addr.in4.sin_family,
            ntohs(sc->vxl_src_addr.in4.sin_port));
out:
        vxlan_init_complete(sc);
        sx_xunlock(&vxlan_sx);
}

static void
vxlan_release(struct vxlan_softc *sc)
{

        /*
         * The softc may be destroyed as soon as we release our reference,
         * so we cannot serialize the wakeup with the softc lock. We use a
         * timeout in our sleeps so a missed wakeup is unfortunate but not
         * fatal.
         */
        if (VXLAN_RELEASE(sc) != 0)
                wakeup(sc);
}

static void
vxlan_teardown_wait(struct vxlan_softc *sc)
{

        VXLAN_LOCK_WASSERT(sc);
        while (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
                rm_sleep(sc, &sc->vxl_lock, 0, "vxltrn", hz);
}

static void
vxlan_teardown_complete(struct vxlan_softc *sc)
{

        VXLAN_WLOCK(sc);
        sc->vxl_flags &= ~VXLAN_FLAG_TEARDOWN;
        wakeup(sc);
        VXLAN_WUNLOCK(sc);
}

static void
vxlan_teardown_locked(struct vxlan_softc *sc)
{
        struct ifnet *ifp;
        struct vxlan_socket *vso;
        bool running;

        sx_assert(&vxlan_sx, SA_XLOCKED);
        VXLAN_LOCK_WASSERT(sc);
        MPASS(sc->vxl_flags & VXLAN_FLAG_TEARDOWN);

        ifp = sc->vxl_ifp;
        ifp->if_flags &= ~IFF_UP;
        running = (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0;
        ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
        callout_stop(&sc->vxl_callout);
        vso = sc->vxl_sock;
        sc->vxl_sock = NULL;

        VXLAN_WUNLOCK(sc);
        if_link_state_change(ifp, LINK_STATE_DOWN);
        if (running)
                EVENTHANDLER_INVOKE(vxlan_stop, ifp,
                    sc->vxl_src_addr.in4.sin_family,
                    ntohs(sc->vxl_src_addr.in4.sin_port));

        if (vso != NULL) {
                vxlan_socket_remove_softc(vso, sc);

                if (sc->vxl_vso_mc_index != -1) {
                        vxlan_socket_mc_release_group_by_idx(vso,
                            sc->vxl_vso_mc_index);
                        sc->vxl_vso_mc_index = -1;
                }
        }

        VXLAN_WLOCK(sc);
        while (sc->vxl_refcnt != 0)
                rm_sleep(sc, &sc->vxl_lock, 0, "vxldrn", hz);
        VXLAN_WUNLOCK(sc);

        callout_drain(&sc->vxl_callout);

        vxlan_free_multicast(sc);
        if (vso != NULL)
                vxlan_socket_release(vso);

        vxlan_teardown_complete(sc);
}

static void
vxlan_teardown(struct vxlan_softc *sc)
{

        sx_xlock(&vxlan_sx);
        VXLAN_WLOCK(sc);
        if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN) {
                vxlan_teardown_wait(sc);
                VXLAN_WUNLOCK(sc);
                sx_xunlock(&vxlan_sx);
                return;
        }

        sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
        vxlan_teardown_locked(sc);
        sx_xunlock(&vxlan_sx);
}

static void
vxlan_ifdetach(struct vxlan_softc *sc, struct ifnet *ifp,
    struct vxlan_softc_head *list)
{

        VXLAN_WLOCK(sc);

        if (sc->vxl_mc_ifp != ifp)
                goto out;
        if (sc->vxl_flags & VXLAN_FLAG_TEARDOWN)
                goto out;

        sc->vxl_flags |= VXLAN_FLAG_TEARDOWN;
        LIST_INSERT_HEAD(list, sc, vxl_ifdetach_list);

out:
        VXLAN_WUNLOCK(sc);
}

static void
vxlan_timer(void *xsc)
{
        struct vxlan_softc *sc;

        sc = xsc;
        VXLAN_LOCK_WASSERT(sc);

        vxlan_ftable_expire(sc);
        callout_schedule(&sc->vxl_callout, vxlan_ftable_prune_period * hz);
}

static int
vxlan_ioctl_ifflags(struct vxlan_softc *sc)
{
        struct ifnet *ifp;

        ifp = sc->vxl_ifp;

        if (ifp->if_flags & IFF_UP) {
                if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
                        vxlan_init(sc);
        } else {
                if (ifp->if_drv_flags & IFF_DRV_RUNNING)
                        vxlan_teardown(sc);
        }

        return (0);
}

static int
vxlan_ctrl_get_config(struct vxlan_softc *sc, void *arg)
{
        struct rm_priotracker tracker;
        struct ifvxlancfg *cfg;

        cfg = arg;
        bzero(cfg, sizeof(*cfg));

        VXLAN_RLOCK(sc, &tracker);
        cfg->vxlc_vni = sc->vxl_vni;
        memcpy(&cfg->vxlc_local_sa, &sc->vxl_src_addr,
            sizeof(union vxlan_sockaddr));
        memcpy(&cfg->vxlc_remote_sa, &sc->vxl_dst_addr,
            sizeof(union vxlan_sockaddr));
        cfg->vxlc_mc_ifindex = sc->vxl_mc_ifindex;
        cfg->vxlc_ftable_cnt = sc->vxl_ftable_cnt;
        cfg->vxlc_ftable_max = sc->vxl_ftable_max;
        cfg->vxlc_ftable_timeout = sc->vxl_ftable_timeout;
        cfg->vxlc_port_min = sc->vxl_min_port;
        cfg->vxlc_port_max = sc->vxl_max_port;
        cfg->vxlc_learn = (sc->vxl_flags & VXLAN_FLAG_LEARN) != 0;
        cfg->vxlc_ttl = sc->vxl_ttl;
        VXLAN_RUNLOCK(sc, &tracker);

#ifdef INET6
        if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_local_sa))
                sa6_recoverscope(&cfg->vxlc_local_sa.in6);
        if (VXLAN_SOCKADDR_IS_IPV6(&cfg->vxlc_remote_sa))
                sa6_recoverscope(&cfg->vxlc_remote_sa.in6);
#endif

        return (0);
}

static int
vxlan_ctrl_set_vni(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;
        int error;

        cmd = arg;

        if (vxlan_check_vni(cmd->vxlcmd_vni) != 0)
                return (EINVAL);

        VXLAN_WLOCK(sc);
        if (vxlan_can_change_config(sc)) {
                sc->vxl_vni = cmd->vxlcmd_vni;
                error = 0;
        } else
                error = EBUSY;
        VXLAN_WUNLOCK(sc);

        return (error);
}

static int
vxlan_ctrl_set_local_addr(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;
        union vxlan_sockaddr *vxlsa;
        int error;

        cmd = arg;
        vxlsa = &cmd->vxlcmd_sa;

        if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
                return (EINVAL);
        if (vxlan_sockaddr_in_multicast(vxlsa) != 0)
                return (EINVAL);
        if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
                error = vxlan_sockaddr_in6_embedscope(vxlsa);
                if (error)
                        return (error);
        }

        VXLAN_WLOCK(sc);
        if (vxlan_can_change_config(sc)) {
                vxlan_sockaddr_in_copy(&sc->vxl_src_addr, &vxlsa->sa);
                vxlan_set_hwcaps(sc);
                error = 0;
        } else
                error = EBUSY;
        VXLAN_WUNLOCK(sc);

        return (error);
}

static int
vxlan_ctrl_set_remote_addr(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;
        union vxlan_sockaddr *vxlsa;
        int error;

        cmd = arg;
        vxlsa = &cmd->vxlcmd_sa;

        if (!VXLAN_SOCKADDR_IS_IPV46(vxlsa))
                return (EINVAL);
        if (VXLAN_SOCKADDR_IS_IPV6(vxlsa)) {
                error = vxlan_sockaddr_in6_embedscope(vxlsa);
                if (error)
                        return (error);
        }

        VXLAN_WLOCK(sc);
        if (vxlan_can_change_config(sc)) {
                vxlan_sockaddr_in_copy(&sc->vxl_dst_addr, &vxlsa->sa);
                vxlan_setup_interface_hdrlen(sc);
                error = 0;
        } else
                error = EBUSY;
        VXLAN_WUNLOCK(sc);

        return (error);
}

static int
vxlan_ctrl_set_local_port(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;
        int error;

        cmd = arg;

        if (cmd->vxlcmd_port == 0)
                return (EINVAL);

        VXLAN_WLOCK(sc);
        if (vxlan_can_change_config(sc)) {
                sc->vxl_src_addr.in4.sin_port = htons(cmd->vxlcmd_port);
                error = 0;
        } else
                error = EBUSY;
        VXLAN_WUNLOCK(sc);

        return (error);
}

static int
vxlan_ctrl_set_remote_port(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;
        int error;

        cmd = arg;

        if (cmd->vxlcmd_port == 0)
                return (EINVAL);

        VXLAN_WLOCK(sc);
        if (vxlan_can_change_config(sc)) {
                sc->vxl_dst_addr.in4.sin_port = htons(cmd->vxlcmd_port);
                error = 0;
        } else
                error = EBUSY;
        VXLAN_WUNLOCK(sc);

        return (error);
}

static int
vxlan_ctrl_set_port_range(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;
        uint16_t min, max;
        int error;

        cmd = arg;
        min = cmd->vxlcmd_port_min;
        max = cmd->vxlcmd_port_max;

        if (max < min)
                return (EINVAL);

        VXLAN_WLOCK(sc);
        if (vxlan_can_change_config(sc)) {
                sc->vxl_min_port = min;
                sc->vxl_max_port = max;
                error = 0;
        } else
                error = EBUSY;
        VXLAN_WUNLOCK(sc);

        return (error);
}

static int
vxlan_ctrl_set_ftable_timeout(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;
        int error;

        cmd = arg;

        VXLAN_WLOCK(sc);
        if (vxlan_check_ftable_timeout(cmd->vxlcmd_ftable_timeout) == 0) {
                sc->vxl_ftable_timeout = cmd->vxlcmd_ftable_timeout;
                error = 0;
        } else
                error = EINVAL;
        VXLAN_WUNLOCK(sc);

        return (error);
}

static int
vxlan_ctrl_set_ftable_max(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;
        int error;

        cmd = arg;

        VXLAN_WLOCK(sc);
        if (vxlan_check_ftable_max(cmd->vxlcmd_ftable_max) == 0) {
                sc->vxl_ftable_max = cmd->vxlcmd_ftable_max;
                error = 0;
        } else
                error = EINVAL;
        VXLAN_WUNLOCK(sc);

        return (error);
}

static int
vxlan_ctrl_set_multicast_if(struct vxlan_softc * sc, void *arg)
{
        struct ifvxlancmd *cmd;
        int error;

        cmd = arg;

        VXLAN_WLOCK(sc);
        if (vxlan_can_change_config(sc)) {
                strlcpy(sc->vxl_mc_ifname, cmd->vxlcmd_ifname, IFNAMSIZ);
                vxlan_set_hwcaps(sc);
                error = 0;
        } else
                error = EBUSY;
        VXLAN_WUNLOCK(sc);

        return (error);
}

static int
vxlan_ctrl_set_ttl(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;
        int error;

        cmd = arg;

        VXLAN_WLOCK(sc);
        if (vxlan_check_ttl(cmd->vxlcmd_ttl) == 0) {
                sc->vxl_ttl = cmd->vxlcmd_ttl;
                if (sc->vxl_im4o != NULL)
                        sc->vxl_im4o->imo_multicast_ttl = sc->vxl_ttl;
                if (sc->vxl_im6o != NULL)
                        sc->vxl_im6o->im6o_multicast_hlim = sc->vxl_ttl;
                error = 0;
        } else
                error = EINVAL;
        VXLAN_WUNLOCK(sc);

        return (error);
}

static int
vxlan_ctrl_set_learn(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;

        cmd = arg;

        VXLAN_WLOCK(sc);
        if (cmd->vxlcmd_flags & VXLAN_CMD_FLAG_LEARN)
                sc->vxl_flags |= VXLAN_FLAG_LEARN;
        else
                sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
        VXLAN_WUNLOCK(sc);

        return (0);
}

static int
vxlan_ctrl_ftable_entry_add(struct vxlan_softc *sc, void *arg)
{
        union vxlan_sockaddr vxlsa;
        struct ifvxlancmd *cmd;
        struct vxlan_ftable_entry *fe;
        int error;

        cmd = arg;
        vxlsa = cmd->vxlcmd_sa;

        if (!VXLAN_SOCKADDR_IS_IPV46(&vxlsa))
                return (EINVAL);
        if (vxlan_sockaddr_in_any(&vxlsa) != 0)
                return (EINVAL);
        if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
                return (EINVAL);
        /* BMV: We could support both IPv4 and IPv6 later. */
        if (vxlsa.sa.sa_family != sc->vxl_dst_addr.sa.sa_family)
                return (EAFNOSUPPORT);

        if (VXLAN_SOCKADDR_IS_IPV6(&vxlsa)) {
                error = vxlan_sockaddr_in6_embedscope(&vxlsa);
                if (error)
                        return (error);
        }

        fe = vxlan_ftable_entry_alloc();
        if (fe == NULL)
                return (ENOMEM);

        if (vxlsa.in4.sin_port == 0)
                vxlsa.in4.sin_port = sc->vxl_dst_addr.in4.sin_port;

        vxlan_ftable_entry_init(sc, fe, cmd->vxlcmd_mac, &vxlsa.sa,
            VXLAN_FE_FLAG_STATIC);

        VXLAN_WLOCK(sc);
        error = vxlan_ftable_entry_insert(sc, fe);
        VXLAN_WUNLOCK(sc);

        if (error)
                vxlan_ftable_entry_free(fe);

        return (error);
}

static int
vxlan_ctrl_ftable_entry_rem(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;
        struct vxlan_ftable_entry *fe;
        int error;

        cmd = arg;

        VXLAN_WLOCK(sc);
        fe = vxlan_ftable_entry_lookup(sc, cmd->vxlcmd_mac);
        if (fe != NULL) {
                vxlan_ftable_entry_destroy(sc, fe);
                error = 0;
        } else
                error = ENOENT;
        VXLAN_WUNLOCK(sc);

        return (error);
}

static int
vxlan_ctrl_flush(struct vxlan_softc *sc, void *arg)
{
        struct ifvxlancmd *cmd;
        int all;

        cmd = arg;
        all = cmd->vxlcmd_flags & VXLAN_CMD_FLAG_FLUSH_ALL;

        VXLAN_WLOCK(sc);
        vxlan_ftable_flush(sc, all);
        VXLAN_WUNLOCK(sc);

        return (0);
}

static int
vxlan_ioctl_drvspec(struct vxlan_softc *sc, struct ifdrv *ifd, int get)
{
        const struct vxlan_control *vc;
        union {
                struct ifvxlancfg       cfg;
                struct ifvxlancmd       cmd;
        } args;
        int out, error;

        if (ifd->ifd_cmd >= vxlan_control_table_size)
                return (EINVAL);

        bzero(&args, sizeof(args));
        vc = &vxlan_control_table[ifd->ifd_cmd];
        out = (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) != 0;

        if ((get != 0 && out == 0) || (get == 0 && out != 0))
                return (EINVAL);

        if (vc->vxlc_flags & VXLAN_CTRL_FLAG_SUSER) {
                error = priv_check(curthread, PRIV_NET_VXLAN);
                if (error)
                        return (error);
        }

        if (ifd->ifd_len != vc->vxlc_argsize ||
            ifd->ifd_len > sizeof(args))
                return (EINVAL);

        if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYIN) {
                error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
                if (error)
                        return (error);
        }

        error = vc->vxlc_func(sc, &args);
        if (error)
                return (error);

        if (vc->vxlc_flags & VXLAN_CTRL_FLAG_COPYOUT) {
                error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
                if (error)
                        return (error);
        }

        return (0);
}

static int
vxlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
        struct rm_priotracker tracker;
        struct vxlan_softc *sc;
        struct ifreq *ifr;
        struct ifdrv *ifd;
        int error;

        sc = ifp->if_softc;
        ifr = (struct ifreq *) data;
        ifd = (struct ifdrv *) data;

        error = 0;

        switch (cmd) {
        case SIOCADDMULTI:
        case SIOCDELMULTI:
                break;

        case SIOCGDRVSPEC:
        case SIOCSDRVSPEC:
                error = vxlan_ioctl_drvspec(sc, ifd, cmd == SIOCGDRVSPEC);
                break;

        case SIOCSIFFLAGS:
                error = vxlan_ioctl_ifflags(sc);
                break;

        case SIOCSIFMEDIA:
        case SIOCGIFMEDIA:
                error = ifmedia_ioctl(ifp, ifr, &sc->vxl_media, cmd);
                break;

        case SIOCSIFMTU:
                if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VXLAN_MAX_MTU) {
                        error = EINVAL;
                } else {
                        VXLAN_WLOCK(sc);
                        ifp->if_mtu = ifr->ifr_mtu;
                        sc->vxl_flags |= VXLAN_FLAG_USER_MTU;
                        VXLAN_WUNLOCK(sc);
                }
                break;

        case SIOCSIFCAP:
                VXLAN_WLOCK(sc);
                error = vxlan_set_reqcap(sc, ifp, ifr->ifr_reqcap);
                if (error == 0)
                        vxlan_set_hwcaps(sc);
                VXLAN_WUNLOCK(sc);
                break;

        case SIOCGTUNFIB:
                VXLAN_RLOCK(sc, &tracker);
                ifr->ifr_fib = sc->vxl_fibnum;
                VXLAN_RUNLOCK(sc, &tracker);
                break;

        case SIOCSTUNFIB:
                if ((error = priv_check(curthread, PRIV_NET_VXLAN)) != 0)
                        break;

                if (ifr->ifr_fib >= rt_numfibs)
                        error = EINVAL;
                else {
                        VXLAN_WLOCK(sc);
                        sc->vxl_fibnum = ifr->ifr_fib;
                        VXLAN_WUNLOCK(sc);
                }
                break;

        default:
                error = ether_ioctl(ifp, cmd, data);
                break;
        }

        return (error);
}

#if defined(INET) || defined(INET6)
static uint16_t
vxlan_pick_source_port(struct vxlan_softc *sc, struct mbuf *m)
{
        int range;
        uint32_t hash;

        range = sc->vxl_max_port - sc->vxl_min_port + 1;

        if (M_HASHTYPE_ISHASH(m))
                hash = m->m_pkthdr.flowid;
        else
                hash = jenkins_hash(m->m_data, ETHER_HDR_LEN,
                    sc->vxl_port_hash_key);

        return (sc->vxl_min_port + (hash % range));
}

static void
vxlan_encap_header(struct vxlan_softc *sc, struct mbuf *m, int ipoff,
    uint16_t srcport, uint16_t dstport)
{
        struct vxlanudphdr *hdr;
        struct udphdr *udph;
        struct vxlan_header *vxh;
        int len;

        len = m->m_pkthdr.len - ipoff;
        MPASS(len >= sizeof(struct vxlanudphdr));
        hdr = mtodo(m, ipoff);

        udph = &hdr->vxlh_udp;
        udph->uh_sport = srcport;
        udph->uh_dport = dstport;
        udph->uh_ulen = htons(len);
        udph->uh_sum = 0;

        vxh = &hdr->vxlh_hdr;
        vxh->vxlh_flags = htonl(VXLAN_HDR_FLAGS_VALID_VNI);
        vxh->vxlh_vni = htonl(sc->vxl_vni << VXLAN_HDR_VNI_SHIFT);
}
#endif

#if defined(INET6) || defined(INET)
/*
 * Return the CSUM_INNER_* equivalent of CSUM_* caps.
 */
static uint32_t
csum_flags_to_inner_flags(uint32_t csum_flags_in, const uint32_t encap)
{
        uint32_t csum_flags = encap;
        const uint32_t v4 = CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP;

        /*
         * csum_flags can request either v4 or v6 offload but not both.
         * tcp_output always sets CSUM_TSO (both CSUM_IP_TSO and CSUM_IP6_TSO)
         * so those bits are no good to detect the IP version.  Other bits are
         * always set with CSUM_TSO and we use those to figure out the IP
         * version.
         */
        if (csum_flags_in & v4) {
                if (csum_flags_in & CSUM_IP)
                        csum_flags |= CSUM_INNER_IP;
                if (csum_flags_in & CSUM_IP_UDP)
                        csum_flags |= CSUM_INNER_IP_UDP;
                if (csum_flags_in & CSUM_IP_TCP)
                        csum_flags |= CSUM_INNER_IP_TCP;
                if (csum_flags_in & CSUM_IP_TSO)
                        csum_flags |= CSUM_INNER_IP_TSO;
        } else {
#ifdef INVARIANTS
                const uint32_t v6 = CSUM_IP6_UDP | CSUM_IP6_TCP;

                MPASS((csum_flags_in & v6) != 0);
#endif
                if (csum_flags_in & CSUM_IP6_UDP)
                        csum_flags |= CSUM_INNER_IP6_UDP;
                if (csum_flags_in & CSUM_IP6_TCP)
                        csum_flags |= CSUM_INNER_IP6_TCP;
                if (csum_flags_in & CSUM_IP6_TSO)
                        csum_flags |= CSUM_INNER_IP6_TSO;
        }

        return (csum_flags);
}
#endif

static int
vxlan_encap4(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
    struct mbuf *m)
{
#ifdef INET
        struct ifnet *ifp;
        struct ip *ip;
        struct in_addr srcaddr, dstaddr;
        uint16_t srcport, dstport;
        int plen, mcast, error;
        struct route route, *ro;
        struct sockaddr_in *sin;
        uint32_t csum_flags;

        NET_EPOCH_ASSERT();

        ifp = sc->vxl_ifp;
        srcaddr = sc->vxl_src_addr.in4.sin_addr;
        srcport = htons(vxlan_pick_source_port(sc, m));
        dstaddr = fvxlsa->in4.sin_addr;
        dstport = fvxlsa->in4.sin_port;

        plen = m->m_pkthdr.len;
        M_PREPEND(m, sizeof(struct ip) + sizeof(struct vxlanudphdr),
            M_NOWAIT);
        if (m == NULL) {
                if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
                return (ENOBUFS);
        }

        ip = mtod(m, struct ip *);
        ip->ip_tos = 0;
        ip->ip_len = htons(m->m_pkthdr.len);
        ip->ip_off = 0;
        ip->ip_ttl = sc->vxl_ttl;
        ip->ip_p = IPPROTO_UDP;
        ip->ip_sum = 0;
        ip->ip_src = srcaddr;
        ip->ip_dst = dstaddr;

        vxlan_encap_header(sc, m, sizeof(struct ip), srcport, dstport);

        mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
        m->m_flags &= ~(M_MCAST | M_BCAST);

        m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
        if (m->m_pkthdr.csum_flags != 0) {
                /*
                 * HW checksum (L3 and/or L4) or TSO has been requested.  Look
                 * up the ifnet for the outbound route and verify that the
                 * outbound ifnet can perform the requested operation on the
                 * inner frame.
                 */
                bzero(&route, sizeof(route));
                ro = &route;
                sin = (struct sockaddr_in *)&ro->ro_dst;
                sin->sin_family = AF_INET;
                sin->sin_len = sizeof(*sin);
                sin->sin_addr = ip->ip_dst;
                ro->ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_NONE,
                    0);
                if (ro->ro_nh == NULL) {
                        m_freem(m);
                        if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
                        return (EHOSTUNREACH);
                }

                csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
                    CSUM_ENCAP_VXLAN);
                if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
                    csum_flags) {
                        if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
                                const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;

                                if_printf(ifp, "interface %s is missing hwcaps "
                                    "0x%08x, csum_flags 0x%08x -> 0x%08x, "
                                    "hwassist 0x%08x\n", nh_ifp->if_xname,
                                    csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
                                    m->m_pkthdr.csum_flags, csum_flags,
                                    (uint32_t)nh_ifp->if_hwassist);
                        }
                        m_freem(m);
                        if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
                        return (ENXIO);
                }
                m->m_pkthdr.csum_flags = csum_flags;
                if (csum_flags &
                    (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
                    CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
                        counter_u64_add(sc->vxl_stats.txcsum, 1);
                        if (csum_flags & CSUM_INNER_TSO)
                                counter_u64_add(sc->vxl_stats.tso, 1);
                }
        } else
                ro = NULL;
        error = ip_output(m, NULL, ro, 0, sc->vxl_im4o, NULL);
        if (error == 0) {
                if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
                if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
                if (mcast != 0)
                        if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
        } else
                if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);

        return (error);
#else
        m_freem(m);
        return (ENOTSUP);
#endif
}

static int
vxlan_encap6(struct vxlan_softc *sc, const union vxlan_sockaddr *fvxlsa,
    struct mbuf *m)
{
#ifdef INET6
        struct ifnet *ifp;
        struct ip6_hdr *ip6;
        const struct in6_addr *srcaddr, *dstaddr;
        uint16_t srcport, dstport;
        int plen, mcast, error;
        struct route_in6 route, *ro;
        struct sockaddr_in6 *sin6;
        uint32_t csum_flags;

        NET_EPOCH_ASSERT();

        ifp = sc->vxl_ifp;
        srcaddr = &sc->vxl_src_addr.in6.sin6_addr;
        srcport = htons(vxlan_pick_source_port(sc, m));
        dstaddr = &fvxlsa->in6.sin6_addr;
        dstport = fvxlsa->in6.sin6_port;

        plen = m->m_pkthdr.len;
        M_PREPEND(m, sizeof(struct ip6_hdr) + sizeof(struct vxlanudphdr),
            M_NOWAIT);
        if (m == NULL) {
                if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
                return (ENOBUFS);
        }

        ip6 = mtod(m, struct ip6_hdr *);
        ip6->ip6_flow = 0;              /* BMV: Keep in forwarding entry? */
        ip6->ip6_vfc = IPV6_VERSION;
        ip6->ip6_plen = 0;
        ip6->ip6_nxt = IPPROTO_UDP;
        ip6->ip6_hlim = sc->vxl_ttl;
        ip6->ip6_src = *srcaddr;
        ip6->ip6_dst = *dstaddr;

        vxlan_encap_header(sc, m, sizeof(struct ip6_hdr), srcport, dstport);

        mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0;
        m->m_flags &= ~(M_MCAST | M_BCAST);

        ro = NULL;
        m->m_pkthdr.csum_flags &= CSUM_FLAGS_TX;
        if (m->m_pkthdr.csum_flags != 0) {
                /*
                 * HW checksum (L3 and/or L4) or TSO has been requested.  Look
                 * up the ifnet for the outbound route and verify that the
                 * outbound ifnet can perform the requested operation on the
                 * inner frame.
                 */
                bzero(&route, sizeof(route));
                ro = &route;
                sin6 = (struct sockaddr_in6 *)&ro->ro_dst;
                sin6->sin6_family = AF_INET6;
                sin6->sin6_len = sizeof(*sin6);
                sin6->sin6_addr = ip6->ip6_dst;
                ro->ro_nh = fib6_lookup(M_GETFIB(m), &ip6->ip6_dst, 0,
                    NHR_NONE, 0);
                if (ro->ro_nh == NULL) {
                        m_freem(m);
                        if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
                        return (EHOSTUNREACH);
                }

                csum_flags = csum_flags_to_inner_flags(m->m_pkthdr.csum_flags,
                    CSUM_ENCAP_VXLAN);
                if ((csum_flags & ro->ro_nh->nh_ifp->if_hwassist) !=
                    csum_flags) {
                        if (ppsratecheck(&sc->err_time, &sc->err_pps, 1)) {
                                const struct ifnet *nh_ifp = ro->ro_nh->nh_ifp;

                                if_printf(ifp, "interface %s is missing hwcaps "
                                    "0x%08x, csum_flags 0x%08x -> 0x%08x, "
                                    "hwassist 0x%08x\n", nh_ifp->if_xname,
                                    csum_flags & ~(uint32_t)nh_ifp->if_hwassist,
                                    m->m_pkthdr.csum_flags, csum_flags,
                                    (uint32_t)nh_ifp->if_hwassist);
                        }
                        m_freem(m);
                        if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
                        return (ENXIO);
                }
                m->m_pkthdr.csum_flags = csum_flags;
                if (csum_flags &
                    (CSUM_INNER_IP | CSUM_INNER_IP_UDP | CSUM_INNER_IP6_UDP |
                    CSUM_INNER_IP_TCP | CSUM_INNER_IP6_TCP)) {
                        counter_u64_add(sc->vxl_stats.txcsum, 1);
                        if (csum_flags & CSUM_INNER_TSO)
                                counter_u64_add(sc->vxl_stats.tso, 1);
                }
        } else if (ntohs(dstport) != V_zero_checksum_port) {
                struct udphdr *hdr = mtodo(m, sizeof(struct ip6_hdr));

                hdr->uh_sum = in6_cksum_pseudo(ip6,
                    m->m_pkthdr.len - sizeof(struct ip6_hdr), IPPROTO_UDP, 0);
                m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
                m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
        }
        error = ip6_output(m, NULL, ro, 0, sc->vxl_im6o, NULL, NULL);
        if (error == 0) {
                if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
                if_inc_counter(ifp, IFCOUNTER_OBYTES, plen);
                if (mcast != 0)
                        if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
        } else
                if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);

        return (error);
#else
        m_freem(m);
        return (ENOTSUP);
#endif
}

#define MTAG_VXLAN_LOOP 0x7876706c /* vxlp */
static int
vxlan_transmit(struct ifnet *ifp, struct mbuf *m)
{
        struct rm_priotracker tracker;
        union vxlan_sockaddr vxlsa;
        struct vxlan_softc *sc;
        struct vxlan_ftable_entry *fe;
        struct ifnet *mcifp;
        struct ether_header *eh;
        int ipv4, error;

        sc = ifp->if_softc;
        eh = mtod(m, struct ether_header *);
        fe = NULL;
        mcifp = NULL;

        ETHER_BPF_MTAP(ifp, m);

        VXLAN_RLOCK(sc, &tracker);
        M_SETFIB(m, sc->vxl_fibnum);
        if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
                VXLAN_RUNLOCK(sc, &tracker);
                m_freem(m);
                return (ENETDOWN);
        }
        if (__predict_false(if_tunnel_check_nesting(ifp, m, MTAG_VXLAN_LOOP,
            max_vxlan_nesting) != 0)) {
                VXLAN_RUNLOCK(sc, &tracker);
                m_freem(m);
                if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
                return (ELOOP);
        }

        if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
                fe = vxlan_ftable_entry_lookup(sc, eh->ether_dhost);
        if (fe == NULL)
                fe = &sc->vxl_default_fe;
        vxlan_sockaddr_copy(&vxlsa, &fe->vxlfe_raddr.sa);

        ipv4 = VXLAN_SOCKADDR_IS_IPV4(&vxlsa) != 0;
        if (vxlan_sockaddr_in_multicast(&vxlsa) != 0)
                mcifp = vxlan_multicast_if_ref(sc, ipv4);

        VXLAN_ACQUIRE(sc);
        VXLAN_RUNLOCK(sc, &tracker);

        if (ipv4 != 0)
                error = vxlan_encap4(sc, &vxlsa, m);
        else
                error = vxlan_encap6(sc, &vxlsa, m);

        vxlan_release(sc);
        if (mcifp != NULL)
                if_rele(mcifp);

        return (error);
}

static void
vxlan_qflush(struct ifnet *ifp __unused)
{
}

static bool
vxlan_rcv_udp_packet(struct mbuf *m, int offset, struct inpcb *inpcb,
    const struct sockaddr *srcsa, void *xvso)
{
        struct vxlan_socket *vso;
        struct vxlan_header *vxh, vxlanhdr;
        uint32_t vni;
        int error __unused;

        M_ASSERTPKTHDR(m);
        vso = xvso;
        offset += sizeof(struct udphdr);

        if (m->m_pkthdr.len < offset + sizeof(struct vxlan_header))
                goto out;

        if (__predict_false(m->m_len < offset + sizeof(struct vxlan_header))) {
                m_copydata(m, offset, sizeof(struct vxlan_header),
                    (caddr_t) &vxlanhdr);
                vxh = &vxlanhdr;
        } else
                vxh = mtodo(m, offset);

        /*
         * Drop if there is a reserved bit set in either the flags or VNI
         * fields of the header. This goes against the specification, but
         * a bit set may indicate an unsupported new feature. This matches
         * the behavior of the Linux implementation.
         */
        if (vxh->vxlh_flags != htonl(VXLAN_HDR_FLAGS_VALID_VNI) ||
            vxh->vxlh_vni & ~VXLAN_VNI_MASK)
                goto out;

        vni = ntohl(vxh->vxlh_vni) >> VXLAN_HDR_VNI_SHIFT;

        /* Adjust to the start of the inner Ethernet frame. */
        m_adj_decap(m, offset + sizeof(struct vxlan_header));

        error = vxlan_input(vso, vni, &m, srcsa);
        MPASS(error != 0 || m == NULL);

out:
        if (m != NULL)
                m_freem(m);

        return (true);
}

static int
vxlan_input(struct vxlan_socket *vso, uint32_t vni, struct mbuf **m0,
    const struct sockaddr *sa)
{
        struct vxlan_softc *sc;
        struct ifnet *ifp;
        struct mbuf *m;
        struct ether_header *eh;
        int error;

        m = *m0;

        if (m->m_pkthdr.len < ETHER_HDR_LEN)
                return (EINVAL);

        sc = vxlan_socket_lookup_softc(vso, vni);
        if (sc == NULL)
                return (ENOENT);

        ifp = sc->vxl_ifp;
        if (m->m_len < ETHER_HDR_LEN &&
            (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
                *m0 = NULL;
                error = ENOBUFS;
                goto out;
        }
        eh = mtod(m, struct ether_header *);

        if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
                error = ENETDOWN;
                goto out;
        } else if (ifp == m->m_pkthdr.rcvif) {
                /* XXX Does not catch more complex loops. */
                error = EDEADLK;
                goto out;
        }

        if (sc->vxl_flags & VXLAN_FLAG_LEARN)
                vxlan_ftable_learn(sc, sa, eh->ether_shost);

        m_clrprotoflags(m);
        m->m_pkthdr.rcvif = ifp;
        M_SETFIB(m, ifp->if_fib);
        if (((ifp->if_capenable & IFCAP_RXCSUM &&
            m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC) ||
            (ifp->if_capenable & IFCAP_RXCSUM_IPV6 &&
            !(m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)))) {
                uint32_t csum_flags = 0;

                if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_CALC)
                        csum_flags |= CSUM_L3_CALC;
                if (m->m_pkthdr.csum_flags & CSUM_INNER_L3_VALID)
                        csum_flags |= CSUM_L3_VALID;
                if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_CALC)
                        csum_flags |= CSUM_L4_CALC;
                if (m->m_pkthdr.csum_flags & CSUM_INNER_L4_VALID)
                        csum_flags |= CSUM_L4_VALID;
                m->m_pkthdr.csum_flags = csum_flags;
                counter_u64_add(sc->vxl_stats.rxcsum, 1);
        } else {
                /* clear everything */
                m->m_pkthdr.csum_flags = 0;
                m->m_pkthdr.csum_data = 0;
        }

        if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
        (*ifp->if_input)(ifp, m);
        *m0 = NULL;
        error = 0;

out:
        vxlan_release(sc);
        return (error);
}

static void
vxlan_stats_alloc(struct vxlan_softc *sc)
{
        struct vxlan_statistics *stats = &sc->vxl_stats;

        stats->txcsum = counter_u64_alloc(M_WAITOK);
        stats->tso = counter_u64_alloc(M_WAITOK);
        stats->rxcsum = counter_u64_alloc(M_WAITOK);
}

static void
vxlan_stats_free(struct vxlan_softc *sc)
{
        struct vxlan_statistics *stats = &sc->vxl_stats;

        counter_u64_free(stats->txcsum);
        counter_u64_free(stats->tso);
        counter_u64_free(stats->rxcsum);
}

static void
vxlan_set_default_config(struct vxlan_softc *sc)
{

        sc->vxl_flags |= VXLAN_FLAG_LEARN;

        sc->vxl_vni = VXLAN_VNI_MAX;
        sc->vxl_ttl = IPDEFTTL;

        if (!vxlan_tunable_int(sc, "legacy_port", vxlan_legacy_port)) {
                sc->vxl_src_addr.in4.sin_port = htons(VXLAN_PORT);
                sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_PORT);
        } else {
                sc->vxl_src_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
                sc->vxl_dst_addr.in4.sin_port = htons(VXLAN_LEGACY_PORT);
        }

        sc->vxl_min_port = V_ipport_firstauto;
        sc->vxl_max_port = V_ipport_lastauto;

        sc->vxl_ftable_max = VXLAN_FTABLE_MAX;
        sc->vxl_ftable_timeout = VXLAN_FTABLE_TIMEOUT;
}

static int
vxlan_set_user_config(struct vxlan_softc *sc, struct ifvxlanparam *vxlp)
{

#ifndef INET
        if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR4 |
            VXLAN_PARAM_WITH_REMOTE_ADDR4))
                return (EAFNOSUPPORT);
#endif

#ifndef INET6
        if (vxlp->vxlp_with & (VXLAN_PARAM_WITH_LOCAL_ADDR6 |
            VXLAN_PARAM_WITH_REMOTE_ADDR6))
                return (EAFNOSUPPORT);
#else
        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
                int error = vxlan_sockaddr_in6_embedscope(&vxlp->vxlp_local_sa);
                if (error)
                        return (error);
        }
        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
                int error = vxlan_sockaddr_in6_embedscope(
                   &vxlp->vxlp_remote_sa);
                if (error)
                        return (error);
        }
#endif

        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_VNI) {
                if (vxlan_check_vni(vxlp->vxlp_vni) == 0)
                        sc->vxl_vni = vxlp->vxlp_vni;
        }

        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR4) {
                sc->vxl_src_addr.in4.sin_len = sizeof(struct sockaddr_in);
                sc->vxl_src_addr.in4.sin_family = AF_INET;
                sc->vxl_src_addr.in4.sin_addr =
                    vxlp->vxlp_local_sa.in4.sin_addr;
        } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_ADDR6) {
                sc->vxl_src_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
                sc->vxl_src_addr.in6.sin6_family = AF_INET6;
                sc->vxl_src_addr.in6.sin6_addr =
                    vxlp->vxlp_local_sa.in6.sin6_addr;
        }

        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR4) {
                sc->vxl_dst_addr.in4.sin_len = sizeof(struct sockaddr_in);
                sc->vxl_dst_addr.in4.sin_family = AF_INET;
                sc->vxl_dst_addr.in4.sin_addr =
                    vxlp->vxlp_remote_sa.in4.sin_addr;
        } else if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_ADDR6) {
                sc->vxl_dst_addr.in6.sin6_len = sizeof(struct sockaddr_in6);
                sc->vxl_dst_addr.in6.sin6_family = AF_INET6;
                sc->vxl_dst_addr.in6.sin6_addr =
                    vxlp->vxlp_remote_sa.in6.sin6_addr;
        }

        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LOCAL_PORT)
                sc->vxl_src_addr.in4.sin_port = htons(vxlp->vxlp_local_port);
        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_REMOTE_PORT)
                sc->vxl_dst_addr.in4.sin_port = htons(vxlp->vxlp_remote_port);

        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_PORT_RANGE) {
                if (vxlp->vxlp_min_port <= vxlp->vxlp_max_port) {
                        sc->vxl_min_port = vxlp->vxlp_min_port;
                        sc->vxl_max_port = vxlp->vxlp_max_port;
                }
        }

        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_MULTICAST_IF)
                strlcpy(sc->vxl_mc_ifname, vxlp->vxlp_mc_ifname, IFNAMSIZ);

        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_TIMEOUT) {
                if (vxlan_check_ftable_timeout(vxlp->vxlp_ftable_timeout) == 0)
                        sc->vxl_ftable_timeout = vxlp->vxlp_ftable_timeout;
        }

        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_FTABLE_MAX) {
                if (vxlan_check_ftable_max(vxlp->vxlp_ftable_max) == 0)
                        sc->vxl_ftable_max = vxlp->vxlp_ftable_max;
        }

        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_TTL) {
                if (vxlan_check_ttl(vxlp->vxlp_ttl) == 0)
                        sc->vxl_ttl = vxlp->vxlp_ttl;
        }

        if (vxlp->vxlp_with & VXLAN_PARAM_WITH_LEARN) {
                if (vxlp->vxlp_learn == 0)
                        sc->vxl_flags &= ~VXLAN_FLAG_LEARN;
        }

        return (0);
}

static int
vxlan_set_reqcap(struct vxlan_softc *sc, struct ifnet *ifp, int reqcap)
{
        int mask = reqcap ^ ifp->if_capenable;

        /* Disable TSO if tx checksums are disabled. */
        if (mask & IFCAP_TXCSUM && !(reqcap & IFCAP_TXCSUM) &&
            reqcap & IFCAP_TSO4) {
                reqcap &= ~IFCAP_TSO4;
                if_printf(ifp, "tso4 disabled due to -txcsum.\n");
        }
        if (mask & IFCAP_TXCSUM_IPV6 && !(reqcap & IFCAP_TXCSUM_IPV6) &&
            reqcap & IFCAP_TSO6) {
                reqcap &= ~IFCAP_TSO6;
                if_printf(ifp, "tso6 disabled due to -txcsum6.\n");
        }

        /* Do not enable TSO if tx checksums are disabled. */
        if (mask & IFCAP_TSO4 && reqcap & IFCAP_TSO4 &&
            !(reqcap & IFCAP_TXCSUM)) {
                if_printf(ifp, "enable txcsum first.\n");
                return (EAGAIN);
        }
        if (mask & IFCAP_TSO6 && reqcap & IFCAP_TSO6 &&
            !(reqcap & IFCAP_TXCSUM_IPV6)) {
                if_printf(ifp, "enable txcsum6 first.\n");
                return (EAGAIN);
        }

        sc->vxl_reqcap = reqcap;
        return (0);
}

/*
 * A VXLAN interface inherits the capabilities of the vxlandev or the interface
 * hosting the vxlanlocal address.
 */
static void
vxlan_set_hwcaps(struct vxlan_softc *sc)
{
        struct epoch_tracker et;
        struct ifnet *p;
        struct ifaddr *ifa;
        u_long hwa;
        int cap, ena;
        bool rel;
        struct ifnet *ifp = sc->vxl_ifp;

        /* reset caps */
        ifp->if_capabilities &= VXLAN_BASIC_IFCAPS;
        ifp->if_capenable &= VXLAN_BASIC_IFCAPS;
        ifp->if_hwassist = 0;

        NET_EPOCH_ENTER(et);
        CURVNET_SET(ifp->if_vnet);

        rel = false;
        p = NULL;
        if (sc->vxl_mc_ifname[0] != '\0') {
                rel = true;
                p = ifunit_ref(sc->vxl_mc_ifname);
        } else if (vxlan_sockaddr_in_any(&sc->vxl_src_addr) == 0) {
                if (sc->vxl_src_addr.sa.sa_family == AF_INET) {
                        struct sockaddr_in in4 = sc->vxl_src_addr.in4;

                        in4.sin_port = 0;
                        ifa = ifa_ifwithaddr((struct sockaddr *)&in4);
                        if (ifa != NULL)
                                p = ifa->ifa_ifp;
                } else if (sc->vxl_src_addr.sa.sa_family == AF_INET6) {
                        struct sockaddr_in6 in6 = sc->vxl_src_addr.in6;

                        in6.sin6_port = 0;
                        ifa = ifa_ifwithaddr((struct sockaddr *)&in6);
                        if (ifa != NULL)
                                p = ifa->ifa_ifp;
                }
        }
        if (p == NULL)
                goto done;

        cap = ena = hwa = 0;

        /* checksum offload */
        if (p->if_capabilities & IFCAP_VXLAN_HWCSUM)
                cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
        if (p->if_capenable & IFCAP_VXLAN_HWCSUM) {
                ena |= sc->vxl_reqcap & p->if_capenable &
                    (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6);
                if (ena & IFCAP_TXCSUM) {
                        if (p->if_hwassist & CSUM_INNER_IP)
                                hwa |= CSUM_IP;
                        if (p->if_hwassist & CSUM_INNER_IP_UDP)
                                hwa |= CSUM_IP_UDP;
                        if (p->if_hwassist & CSUM_INNER_IP_TCP)
                                hwa |= CSUM_IP_TCP;
                }
                if (ena & IFCAP_TXCSUM_IPV6) {
                        if (p->if_hwassist & CSUM_INNER_IP6_UDP)
                                hwa |= CSUM_IP6_UDP;
                        if (p->if_hwassist & CSUM_INNER_IP6_TCP)
                                hwa |= CSUM_IP6_TCP;
                }
        }

        /* hardware TSO */
        if (p->if_capabilities & IFCAP_VXLAN_HWTSO) {
                cap |= p->if_capabilities & IFCAP_TSO;
                if (p->if_hw_tsomax > IP_MAXPACKET - ifp->if_hdrlen)
                        ifp->if_hw_tsomax = IP_MAXPACKET - ifp->if_hdrlen;
                else
                        ifp->if_hw_tsomax = p->if_hw_tsomax;
                /* XXX: tsomaxsegcount decrement is cxgbe specific  */
                ifp->if_hw_tsomaxsegcount = p->if_hw_tsomaxsegcount - 1;
                ifp->if_hw_tsomaxsegsize = p->if_hw_tsomaxsegsize;
        }
        if (p->if_capenable & IFCAP_VXLAN_HWTSO) {
                ena |= sc->vxl_reqcap & p->if_capenable & IFCAP_TSO;
                if (ena & IFCAP_TSO) {
                        if (p->if_hwassist & CSUM_INNER_IP_TSO)
                                hwa |= CSUM_IP_TSO;
                        if (p->if_hwassist & CSUM_INNER_IP6_TSO)
                                hwa |= CSUM_IP6_TSO;
                }
        }

        ifp->if_capabilities |= cap;
        ifp->if_capenable |= ena;
        ifp->if_hwassist |= hwa;
        if (rel)
                if_rele(p);
done:
        CURVNET_RESTORE();
        NET_EPOCH_EXIT(et);
}

static int
vxlan_clone_create(struct if_clone *ifc, char *name, size_t len,
    struct ifc_data *ifd, struct ifnet **ifpp)
{
        struct vxlan_softc *sc;
        struct ifnet *ifp;
        struct ifvxlanparam vxlp;
        int error;

        sc = malloc(sizeof(struct vxlan_softc), M_VXLAN, M_WAITOK | M_ZERO);
        sc->vxl_unit = ifd->unit;
        sc->vxl_fibnum = curthread->td_proc->p_fibnum;
        vxlan_set_default_config(sc);

        if (ifd->params != NULL) {
                error = ifc_copyin(ifd, &vxlp, sizeof(vxlp));
                if (error)
                        goto fail;

                error = vxlan_set_user_config(sc, &vxlp);
                if (error)
                        goto fail;
        }

        vxlan_stats_alloc(sc);
        ifp = if_alloc(IFT_ETHER);
        sc->vxl_ifp = ifp;
        rm_init(&sc->vxl_lock, "vxlanrm");
        callout_init_rw(&sc->vxl_callout, &sc->vxl_lock, 0);
        sc->vxl_port_hash_key = arc4random();
        vxlan_ftable_init(sc);

        vxlan_sysctl_setup(sc);

        ifp->if_softc = sc;
        if_initname(ifp, vxlan_name, ifd->unit);
        ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
        ifp->if_init = vxlan_init;
        ifp->if_ioctl = vxlan_ioctl;
        ifp->if_transmit = vxlan_transmit;
        ifp->if_qflush = vxlan_qflush;
        ifp->if_capabilities = VXLAN_BASIC_IFCAPS;
        ifp->if_capenable = VXLAN_BASIC_IFCAPS;
        sc->vxl_reqcap = -1;
        vxlan_set_hwcaps(sc);

        ifmedia_init(&sc->vxl_media, 0, vxlan_media_change, vxlan_media_status);
        ifmedia_add(&sc->vxl_media, IFM_ETHER | IFM_AUTO, 0, NULL);
        ifmedia_set(&sc->vxl_media, IFM_ETHER | IFM_AUTO);

        ether_gen_addr(ifp, &sc->vxl_hwaddr);
        ether_ifattach(ifp, sc->vxl_hwaddr.octet);

        ifp->if_baudrate = 0;

        VXLAN_WLOCK(sc);
        vxlan_setup_interface_hdrlen(sc);
        VXLAN_WUNLOCK(sc);
        *ifpp = ifp;

        return (0);

fail:
        free(sc, M_VXLAN);
        return (error);
}

static int
vxlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp, uint32_t flags)
{
        struct vxlan_softc *sc;

        sc = ifp->if_softc;

        vxlan_teardown(sc);

        vxlan_ftable_flush(sc, 1);

        ether_ifdetach(ifp);
        if_free(ifp);
        ifmedia_removeall(&sc->vxl_media);

        vxlan_ftable_fini(sc);

        vxlan_sysctl_destroy(sc);
        rm_destroy(&sc->vxl_lock);
        vxlan_stats_free(sc);
        free(sc, M_VXLAN);

        return (0);
}

/* BMV: Taken from if_bridge. */
static uint32_t
vxlan_mac_hash(struct vxlan_softc *sc, const uint8_t *addr)
{
        uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->vxl_ftable_hash_key;

        b += addr[5] << 8;
        b += addr[4];
        a += addr[3] << 24;
        a += addr[2] << 16;
        a += addr[1] << 8;
        a += addr[0];

/*
 * The following hash function is adapted from "Hash Functions" by Bob Jenkins
 * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
 */
#define mix(a, b, c)                                                    \
do {                                                                    \
        a -= b; a -= c; a ^= (c >> 13);                                 \
        b -= c; b -= a; b ^= (a << 8);                                  \
        c -= a; c -= b; c ^= (b >> 13);                                 \
        a -= b; a -= c; a ^= (c >> 12);                                 \
        b -= c; b -= a; b ^= (a << 16);                                 \
        c -= a; c -= b; c ^= (b >> 5);                                  \
        a -= b; a -= c; a ^= (c >> 3);                                  \
        b -= c; b -= a; b ^= (a << 10);                                 \
        c -= a; c -= b; c ^= (b >> 15);                                 \
} while (0)

        mix(a, b, c);

#undef mix

        return (c);
}

static int
vxlan_media_change(struct ifnet *ifp)
{

        /* Ignore. */
        return (0);
}

static void
vxlan_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
{

        ifmr->ifm_status = IFM_ACTIVE | IFM_AVALID;
        ifmr->ifm_active = IFM_ETHER | IFM_FDX;
}

static int
vxlan_sockaddr_cmp(const union vxlan_sockaddr *vxladdr,
    const struct sockaddr *sa)
{

        return (bcmp(&vxladdr->sa, sa, vxladdr->sa.sa_len));
}

static void
vxlan_sockaddr_copy(union vxlan_sockaddr *vxladdr,
    const struct sockaddr *sa)
{

        MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);
        bzero(vxladdr, sizeof(*vxladdr));

        if (sa->sa_family == AF_INET) {
                vxladdr->in4 = *satoconstsin(sa);
                vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
        } else if (sa->sa_family == AF_INET6) {
                vxladdr->in6 = *satoconstsin6(sa);
                vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
        }
}

static int
vxlan_sockaddr_in_equal(const union vxlan_sockaddr *vxladdr,
    const struct sockaddr *sa)
{
        int equal;

        if (sa->sa_family == AF_INET) {
                const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
                equal = in4->s_addr == vxladdr->in4.sin_addr.s_addr;
        } else if (sa->sa_family == AF_INET6) {
                const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
                equal = IN6_ARE_ADDR_EQUAL(in6, &vxladdr->in6.sin6_addr);
        } else
                equal = 0;

        return (equal);
}

static void
vxlan_sockaddr_in_copy(union vxlan_sockaddr *vxladdr,
    const struct sockaddr *sa)
{

        MPASS(sa->sa_family == AF_INET || sa->sa_family == AF_INET6);

        if (sa->sa_family == AF_INET) {
                const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
                vxladdr->in4.sin_family = AF_INET;
                vxladdr->in4.sin_len = sizeof(struct sockaddr_in);
                vxladdr->in4.sin_addr = *in4;
        } else if (sa->sa_family == AF_INET6) {
                const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
                vxladdr->in6.sin6_family = AF_INET6;
                vxladdr->in6.sin6_len = sizeof(struct sockaddr_in6);
                vxladdr->in6.sin6_addr = *in6;
        }
}

static int
vxlan_sockaddr_supported(const union vxlan_sockaddr *vxladdr, int unspec)
{
        const struct sockaddr *sa;
        int supported;

        sa = &vxladdr->sa;
        supported = 0;

        if (sa->sa_family == AF_UNSPEC && unspec != 0) {
                supported = 1;
        } else if (sa->sa_family == AF_INET) {
#ifdef INET
                supported = 1;
#endif
        } else if (sa->sa_family == AF_INET6) {
#ifdef INET6
                supported = 1;
#endif
        }

        return (supported);
}

static int
vxlan_sockaddr_in_any(const union vxlan_sockaddr *vxladdr)
{
        const struct sockaddr *sa;
        int any;

        sa = &vxladdr->sa;

        if (sa->sa_family == AF_INET) {
                const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
                any = in4->s_addr == INADDR_ANY;
        } else if (sa->sa_family == AF_INET6) {
                const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
                any = IN6_IS_ADDR_UNSPECIFIED(in6);
        } else
                any = -1;

        return (any);
}

static int
vxlan_sockaddr_in_multicast(const union vxlan_sockaddr *vxladdr)
{
        const struct sockaddr *sa;
        int mc;

        sa = &vxladdr->sa;

        if (sa->sa_family == AF_INET) {
                const struct in_addr *in4 = &satoconstsin(sa)->sin_addr;
                mc = IN_MULTICAST(ntohl(in4->s_addr));
        } else if (sa->sa_family == AF_INET6) {
                const struct in6_addr *in6 = &satoconstsin6(sa)->sin6_addr;
                mc = IN6_IS_ADDR_MULTICAST(in6);
        } else
                mc = -1;

        return (mc);
}

static int
vxlan_sockaddr_in6_embedscope(union vxlan_sockaddr *vxladdr)
{
        int error;

        MPASS(VXLAN_SOCKADDR_IS_IPV6(vxladdr));
#ifdef INET6
        error = sa6_embedscope(&vxladdr->in6, V_ip6_use_defzone);
#else
        error = EAFNOSUPPORT;
#endif

        return (error);
}

static int
vxlan_can_change_config(struct vxlan_softc *sc)
{
        struct ifnet *ifp;

        ifp = sc->vxl_ifp;
        VXLAN_LOCK_ASSERT(sc);

        if (ifp->if_drv_flags & IFF_DRV_RUNNING)
                return (0);
        if (sc->vxl_flags & (VXLAN_FLAG_INIT | VXLAN_FLAG_TEARDOWN))
                return (0);

        return (1);
}

static int
vxlan_check_vni(uint32_t vni)
{

        return (vni >= VXLAN_VNI_MAX);
}

static int
vxlan_check_ttl(int ttl)
{

        return (ttl > MAXTTL);
}

static int
vxlan_check_ftable_timeout(uint32_t timeout)
{

        return (timeout > VXLAN_FTABLE_MAX_TIMEOUT);
}

static int
vxlan_check_ftable_max(uint32_t max)
{

        return (max > VXLAN_FTABLE_MAX);
}

static void
vxlan_sysctl_setup(struct vxlan_softc *sc)
{
        struct sysctl_ctx_list *ctx;
        struct sysctl_oid *node;
        struct vxlan_statistics *stats;
        char namebuf[8];

        ctx = &sc->vxl_sysctl_ctx;
        stats = &sc->vxl_stats;
        snprintf(namebuf, sizeof(namebuf), "%d", sc->vxl_unit);

        sysctl_ctx_init(ctx);
        sc->vxl_sysctl_node = SYSCTL_ADD_NODE(ctx,
            SYSCTL_STATIC_CHILDREN(_net_link_vxlan), OID_AUTO, namebuf,
            CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");

        node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
            OID_AUTO, "ftable", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
        SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "count",
            CTLFLAG_RD, &sc->vxl_ftable_cnt, 0,
            "Number of entries in forwarding table");
        SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "max",
             CTLFLAG_RD, &sc->vxl_ftable_max, 0,
            "Maximum number of entries allowed in forwarding table");
        SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "timeout",
            CTLFLAG_RD, &sc->vxl_ftable_timeout, 0,
            "Number of seconds between prunes of the forwarding table");
        SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "dump",
            CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_SKIP,
            sc, 0, vxlan_ftable_sysctl_dump, "A",
            "Dump the forwarding table entries");

        node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->vxl_sysctl_node),
            OID_AUTO, "stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
        SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
            "ftable_nospace", CTLFLAG_RD, &stats->ftable_nospace, 0,
            "Fowarding table reached maximum entries");
        SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(node), OID_AUTO,
            "ftable_lock_upgrade_failed", CTLFLAG_RD,
            &stats->ftable_lock_upgrade_failed, 0,
            "Forwarding table update required lock upgrade");

        SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "txcsum",
            CTLFLAG_RD, &stats->txcsum,
            "# of times hardware assisted with tx checksum");
        SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "tso",
            CTLFLAG_RD, &stats->tso, "# of times hardware assisted with TSO");
        SYSCTL_ADD_COUNTER_U64(ctx, SYSCTL_CHILDREN(node), OID_AUTO, "rxcsum",
            CTLFLAG_RD, &stats->rxcsum,
            "# of times hardware assisted with rx checksum");
}

static void
vxlan_sysctl_destroy(struct vxlan_softc *sc)
{

        sysctl_ctx_free(&sc->vxl_sysctl_ctx);
        sc->vxl_sysctl_node = NULL;
}

static int
vxlan_tunable_int(struct vxlan_softc *sc, const char *knob, int def)
{
        char path[64];

        snprintf(path, sizeof(path), "net.link.vxlan.%d.%s",
            sc->vxl_unit, knob);
        TUNABLE_INT_FETCH(path, &def);

        return (def);
}

static void
vxlan_ifdetach_event(void *arg __unused, struct ifnet *ifp)
{
        struct vxlan_softc_head list = LIST_HEAD_INITIALIZER(list);
        struct vxlan_socket *vso;
        struct vxlan_softc *sc, *tsc;

        if ((ifp->if_flags & IFF_MULTICAST) == 0)
                return;

        VXLAN_LIST_LOCK();
        LIST_FOREACH(vso, &vxlan_socket_list, vxlso_entry)
                vxlan_socket_ifdetach(vso, ifp, &list);
        VXLAN_LIST_UNLOCK();

        LIST_FOREACH_SAFE(sc, &list, vxl_ifdetach_list, tsc) {
                LIST_REMOVE(sc, vxl_ifdetach_list);

                sx_xlock(&vxlan_sx);
                VXLAN_WLOCK(sc);
                if (sc->vxl_flags & VXLAN_FLAG_INIT)
                        vxlan_init_wait(sc);
                vxlan_teardown_locked(sc);
                sx_xunlock(&vxlan_sx);
        }
}

static void
vxlan_load(void)
{

        mtx_init(&vxlan_list_mtx, "vxlan list", NULL, MTX_DEF);
        vxlan_ifdetach_event_tag = EVENTHANDLER_REGISTER(ifnet_departure_event,
            vxlan_ifdetach_event, NULL, EVENTHANDLER_PRI_ANY);

        struct if_clone_addreq req = {
                .create_f = vxlan_clone_create,
                .destroy_f = vxlan_clone_destroy,
                .flags = IFC_F_AUTOUNIT,
        };
        vxlan_cloner = ifc_attach_cloner(vxlan_name, &req);
}

static void
vxlan_unload(void)
{

        EVENTHANDLER_DEREGISTER(ifnet_departure_event,
            vxlan_ifdetach_event_tag);
        ifc_detach_cloner(vxlan_cloner);
        mtx_destroy(&vxlan_list_mtx);
        MPASS(LIST_EMPTY(&vxlan_socket_list));
}

static int
vxlan_modevent(module_t mod, int type, void *unused)
{
        int error;

        error = 0;

        switch (type) {
        case MOD_LOAD:
                vxlan_load();
                break;
        case MOD_UNLOAD:
                vxlan_unload();
                break;
        default:
                error = ENOTSUP;
                break;
        }

        return (error);
}

static moduledata_t vxlan_mod = {
        "if_vxlan",
        vxlan_modevent,
        0
};

DECLARE_MODULE(if_vxlan, vxlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
MODULE_VERSION(if_vxlan, 1);