root/usr/src/uts/common/io/overlay/overlay_target.c
/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright 2016 Joyent, Inc.
 * Copyright 2022 MNX Cloud, Inc.
 */

/*
 * Overlay device target cache management
 *
 * For more information, see the big theory statement in
 * uts/common/io/overlay/overlay.c
 */

#include <sys/types.h>
#include <sys/ethernet.h>
#include <sys/kmem.h>
#include <sys/policy.h>
#include <sys/sysmacros.h>
#include <sys/stream.h>
#include <sys/strsun.h>
#include <sys/strsubr.h>
#include <sys/mac_provider.h>
#include <sys/mac_client.h>
#include <sys/mac_client_priv.h>
#include <sys/vlan.h>
#include <sys/crc32.h>
#include <sys/cred.h>
#include <sys/file.h>
#include <sys/errno.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>

#include <sys/overlay_impl.h>
#include <sys/sdt.h>

/*
 * This is total straw man, but at least it's a prime number. Here we're
 * going to have to go through and do a lot of evaluation and understanding as
 * to how these target caches should grow and shrink, as well as, memory
 * pressure and evictions. This just gives us a starting point that'll be 'good
 * enough', until it's not.
 */
#define OVERLAY_HSIZE   823

/*
 * We use this data structure to keep track of what requests have been actively
 * allocated to a given instance so we know what to put back on the pending
 * list.
 */
typedef struct overlay_target_hdl {
        minor_t oth_minor;              /* RO */
        zoneid_t oth_zoneid;            /* RO */
        int oth_oflags;                 /* RO */
        list_node_t oth_link;           /* overlay_target_lock */
        kmutex_t oth_lock;
        list_t  oth_outstanding;        /* oth_lock */
} overlay_target_hdl_t;

typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int);
typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *);
typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int);

typedef struct overlay_target_ioctl {
        int             oti_cmd;        /* ioctl id */
        boolean_t       oti_write;      /* ioctl requires FWRITE */
        boolean_t       oti_ncopyout;   /* copyout data? */
        overlay_target_copyin_f oti_copyin;     /* copyin func */
        overlay_target_ioctl_f oti_func; /* function to call */
        overlay_target_copyout_f oti_copyout;   /* copyin func */
        size_t          oti_size;       /* size of user level structure */
} overlay_target_ioctl_t;

static kmem_cache_t *overlay_target_cache;
static kmem_cache_t *overlay_entry_cache;
static id_space_t *overlay_thdl_idspace;
static void *overlay_thdl_state;

/*
 * When we support overlay devices in the NGZ, then all of these need to become
 * zone aware, by plugging into the netstack engine and becoming per-netstack
 * data.
 */
static list_t overlay_thdl_list;
static kmutex_t overlay_target_lock;
static kcondvar_t overlay_target_condvar;
static list_t overlay_target_list;
static boolean_t overlay_target_excl;

/*
 * Outstanding data per hash table entry.
 */
static int overlay_ent_size = 128 * 1024;

/* ARGSUSED */
static int
overlay_target_cache_constructor(void *buf, void *arg, int kmflgs)
{
        overlay_target_t *ott = buf;

        mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL);
        cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL);
        return (0);
}

/* ARGSUSED */
static void
overlay_target_cache_destructor(void *buf, void *arg)
{
        overlay_target_t *ott = buf;

        cv_destroy(&ott->ott_cond);
        mutex_destroy(&ott->ott_lock);
}

/* ARGSUSED */
static int
overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs)
{
        overlay_target_entry_t *ote = buf;

        bzero(ote, sizeof (overlay_target_entry_t));
        mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL);
        return (0);
}

/* ARGSUSED */
static void
overlay_entry_cache_destructor(void *buf, void *arg)
{
        overlay_target_entry_t *ote = buf;

        mutex_destroy(&ote->ote_lock);
}

static uint64_t
overlay_mac_hash(const void *v)
{
        uint32_t crc;
        CRC32(crc, v, ETHERADDRL, -1U, crc32_table);
        return (crc);
}

static int
overlay_mac_cmp(const void *a, const void *b)
{
        return (bcmp(a, b, ETHERADDRL));
}

/* ARGSUSED */
static void
overlay_target_entry_dtor(void *arg)
{
        overlay_target_entry_t *ote = arg;

        ote->ote_flags = 0;
        bzero(ote->ote_addr, ETHERADDRL);
        ote->ote_ott = NULL;
        ote->ote_odd = NULL;
        freemsgchain(ote->ote_chead);
        ote->ote_chead = ote->ote_ctail = NULL;
        ote->ote_mbsize = 0;
        ote->ote_vtime = 0;
        kmem_cache_free(overlay_entry_cache, ote);
}

static int
overlay_mac_avl(const void *a, const void *b)
{
        int i;
        const overlay_target_entry_t *l, *r;
        l = a;
        r = b;

        for (i = 0; i < ETHERADDRL; i++) {
                if (l->ote_addr[i] > r->ote_addr[i])
                        return (1);
                else if (l->ote_addr[i] < r->ote_addr[i])
                        return (-1);
        }

        return (0);
}

void
overlay_target_init(void)
{
        int ret;
        ret = ddi_soft_state_init(&overlay_thdl_state,
            sizeof (overlay_target_hdl_t), 1);
        VERIFY(ret == 0);
        overlay_target_cache = kmem_cache_create("overlay_target",
            sizeof (overlay_target_t), 0, overlay_target_cache_constructor,
            overlay_target_cache_destructor, NULL, NULL, NULL, 0);
        overlay_entry_cache = kmem_cache_create("overlay_entry",
            sizeof (overlay_target_entry_t), 0, overlay_entry_cache_constructor,
            overlay_entry_cache_destructor, NULL, NULL, NULL, 0);
        mutex_init(&overlay_target_lock, NULL, MUTEX_DRIVER, NULL);
        cv_init(&overlay_target_condvar, NULL, CV_DRIVER, NULL);
        list_create(&overlay_target_list, sizeof (overlay_target_entry_t),
            offsetof(overlay_target_entry_t, ote_qlink));
        list_create(&overlay_thdl_list, sizeof (overlay_target_hdl_t),
            offsetof(overlay_target_hdl_t, oth_link));
        overlay_thdl_idspace = id_space_create("overlay_target_minors",
            1, INT32_MAX);
}

void
overlay_target_fini(void)
{
        id_space_destroy(overlay_thdl_idspace);
        list_destroy(&overlay_thdl_list);
        list_destroy(&overlay_target_list);
        cv_destroy(&overlay_target_condvar);
        mutex_destroy(&overlay_target_lock);
        kmem_cache_destroy(overlay_entry_cache);
        kmem_cache_destroy(overlay_target_cache);
        ddi_soft_state_fini(&overlay_thdl_state);
}

void
overlay_target_free(overlay_dev_t *odd)
{
        if (odd->odd_target == NULL)
                return;

        if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) {
                refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash;
                avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree;
                overlay_target_entry_t *ote;

                /*
                 * Our AVL tree and hashtable contain the same elements,
                 * therefore we should just remove it from the tree, but then
                 * delete the entries when we remove them from the hash table
                 * (which happens through the refhash dtor).
                 */
                while ((ote = avl_first(ap)) != NULL)
                        avl_remove(ap, ote);

                avl_destroy(ap);
                for (ote = refhash_first(rp); ote != NULL;
                    ote = refhash_next(rp, ote)) {
                        refhash_remove(rp, ote);
                }
                refhash_destroy(rp);
        }

        ASSERT(odd->odd_target->ott_ocount == 0);
        kmem_cache_free(overlay_target_cache, odd->odd_target);
}

int
overlay_target_busy()
{
        int ret;

        mutex_enter(&overlay_target_lock);
        ret = !list_is_empty(&overlay_thdl_list);
        mutex_exit(&overlay_target_lock);

        return (ret);
}

static void
overlay_target_queue(overlay_target_entry_t *entry)
{
        mutex_enter(&overlay_target_lock);
        mutex_enter(&entry->ote_ott->ott_lock);
        if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) {
                mutex_exit(&entry->ote_ott->ott_lock);
                mutex_exit(&overlay_target_lock);
                return;
        }
        entry->ote_ott->ott_ocount++;
        mutex_exit(&entry->ote_ott->ott_lock);
        list_insert_tail(&overlay_target_list, entry);
        cv_signal(&overlay_target_condvar);
        mutex_exit(&overlay_target_lock);
}

void
overlay_target_quiesce(overlay_target_t *ott)
{
        if (ott == NULL)
                return;
        mutex_enter(&ott->ott_lock);
        ott->ott_flags |= OVERLAY_T_TEARDOWN;
        while (ott->ott_ocount != 0)
                cv_wait(&ott->ott_cond, &ott->ott_lock);
        mutex_exit(&ott->ott_lock);
}

/*
 * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP |
 * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at
 * this time, say for NVGRE, we drop all packets that mcuh this.
 */
int
overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock,
    socklen_t *slenp)
{
        int ret;
        struct sockaddr_in6 *v6;
        overlay_target_t *ott;
        mac_header_info_t mhi;
        overlay_target_entry_t *entry;

        ASSERT(odd->odd_target != NULL);

        /*
         * At this point, the overlay device is in a mux which means that it's
         * been activated. At this point, parts of the target, such as the mode
         * and the destination are now read-only and we don't have to worry
         * about synchronization for them.
         */
        ott = odd->odd_target;
        if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT))
                return (OVERLAY_TARGET_DROP);

        v6 = (struct sockaddr_in6 *)sock;
        bzero(v6, sizeof (struct sockaddr_in6));
        v6->sin6_family = AF_INET6;

        if (ott->ott_mode == OVERLAY_TARGET_POINT) {
                mutex_enter(&ott->ott_lock);
                bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr,
                    sizeof (struct in6_addr));
                v6->sin6_port = htons(ott->ott_u.ott_point.otp_port);
                mutex_exit(&ott->ott_lock);
                *slenp = sizeof (struct sockaddr_in6);

                return (OVERLAY_TARGET_OK);
        }

        ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC);

        /*
         * Note we only want the MAC address here, therefore we won't bother
         * using mac_vlan_header_info(). If any caller needs the vlan info at
         * this point, this should change to a call to mac_vlan_header_info().
         */
        if (mac_header_info(odd->odd_mh, mp, &mhi) != 0)
                return (OVERLAY_TARGET_DROP);
        mutex_enter(&ott->ott_lock);
        entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
            mhi.mhi_daddr);
        if (entry == NULL) {
                entry = kmem_cache_alloc(overlay_entry_cache, KM_NOSLEEP_LAZY);
                if (entry == NULL) {
                        mutex_exit(&ott->ott_lock);
                        return (OVERLAY_TARGET_DROP);
                }
                bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL);
                entry->ote_chead = entry->ote_ctail = mp;
                entry->ote_mbsize = msgsize(mp);
                entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
                entry->ote_ott = ott;
                entry->ote_odd = odd;
                refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry);
                avl_add(&ott->ott_u.ott_dyn.ott_tree, entry);
                mutex_exit(&ott->ott_lock);
                overlay_target_queue(entry);
                return (OVERLAY_TARGET_ASYNC);
        }
        refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry);
        mutex_exit(&ott->ott_lock);

        mutex_enter(&entry->ote_lock);
        if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) {
                ret = OVERLAY_TARGET_DROP;
        } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
                bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr,
                    sizeof (struct in6_addr));
                v6->sin6_port = htons(entry->ote_dest.otp_port);
                *slenp = sizeof (struct sockaddr_in6);
                ret = OVERLAY_TARGET_OK;
        } else {
                size_t mlen = msgsize(mp);

                if (mlen + entry->ote_mbsize > overlay_ent_size) {
                        ret = OVERLAY_TARGET_DROP;
                } else {
                        if (entry->ote_ctail != NULL) {
                                ASSERT(entry->ote_ctail->b_next ==
                                    NULL);
                                entry->ote_ctail->b_next = mp;
                                entry->ote_ctail = mp;
                        } else {
                                entry->ote_chead = mp;
                                entry->ote_ctail = mp;
                        }
                        entry->ote_mbsize += mlen;
                        if ((entry->ote_flags &
                            OVERLAY_ENTRY_F_PENDING) == 0) {
                                entry->ote_flags |=
                                    OVERLAY_ENTRY_F_PENDING;
                                overlay_target_queue(entry);
                        }
                        ret = OVERLAY_TARGET_ASYNC;
                }
        }
        mutex_exit(&entry->ote_lock);

        mutex_enter(&ott->ott_lock);
        refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry);
        mutex_exit(&ott->ott_lock);

        return (ret);
}

/* ARGSUSED */
static int
overlay_target_info(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_dev_t *odd;
        overlay_targ_info_t *oti = arg;

        odd = overlay_hold_by_dlid(oti->oti_linkid);
        if (odd == NULL)
                return (ENOENT);

        mutex_enter(&odd->odd_lock);
        oti->oti_flags = 0;
        oti->oti_needs = odd->odd_plugin->ovp_dest;
        if (odd->odd_flags & OVERLAY_F_DEGRADED)
                oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED;
        if (odd->odd_flags & OVERLAY_F_ACTIVATED)
                oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE;
        oti->oti_vnetid = odd->odd_vid;
        mutex_exit(&odd->odd_lock);
        overlay_hold_rele(odd);
        return (0);
}

/* ARGSUSED */
static int
overlay_target_associate(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_dev_t *odd;
        overlay_target_t *ott;
        overlay_targ_associate_t *ota = arg;

        odd = overlay_hold_by_dlid(ota->ota_linkid);
        if (odd == NULL)
                return (ENOENT);

        if (ota->ota_id == 0) {
                overlay_hold_rele(odd);
                return (EINVAL);
        }

        if (ota->ota_mode != OVERLAY_TARGET_POINT &&
            ota->ota_mode != OVERLAY_TARGET_DYNAMIC) {
                overlay_hold_rele(odd);
                return (EINVAL);
        }

        if (ota->ota_provides != odd->odd_plugin->ovp_dest) {
                overlay_hold_rele(odd);
                return (EINVAL);
        }

        if (ota->ota_mode == OVERLAY_TARGET_POINT) {
                if (ota->ota_provides & OVERLAY_PLUGIN_D_IP) {
                        if (IN6_IS_ADDR_UNSPECIFIED(&ota->ota_point.otp_ip) ||
                            IN6_IS_ADDR_V4COMPAT(&ota->ota_point.otp_ip) ||
                            IN6_IS_ADDR_V4MAPPED_ANY(&ota->ota_point.otp_ip)) {
                                overlay_hold_rele(odd);
                                return (EINVAL);
                        }
                }

                if (ota->ota_provides & OVERLAY_PLUGIN_D_PORT) {
                        if (ota->ota_point.otp_port == 0) {
                                overlay_hold_rele(odd);
                                return (EINVAL);
                        }
                }
        }

        ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP);
        ott->ott_flags = 0;
        ott->ott_ocount = 0;
        ott->ott_mode = ota->ota_mode;
        ott->ott_dest = ota->ota_provides;
        ott->ott_id = ota->ota_id;

        if (ott->ott_mode == OVERLAY_TARGET_POINT) {
                bcopy(&ota->ota_point, &ott->ott_u.ott_point,
                    sizeof (overlay_target_point_t));
        } else {
                ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE,
                    overlay_mac_hash, overlay_mac_cmp,
                    overlay_target_entry_dtor, sizeof (overlay_target_entry_t),
                    offsetof(overlay_target_entry_t, ote_reflink),
                    offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP);
                avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl,
                    sizeof (overlay_target_entry_t),
                    offsetof(overlay_target_entry_t, ote_avllink));
        }
        mutex_enter(&odd->odd_lock);
        if (odd->odd_flags & OVERLAY_F_VARPD) {
                mutex_exit(&odd->odd_lock);
                kmem_cache_free(overlay_target_cache, ott);
                overlay_hold_rele(odd);
                return (EEXIST);
        }

        odd->odd_flags |= OVERLAY_F_VARPD;
        odd->odd_target = ott;
        mutex_exit(&odd->odd_lock);

        overlay_hold_rele(odd);


        return (0);
}


/* ARGSUSED */
static int
overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_dev_t *odd;
        overlay_targ_degrade_t *otd = arg;

        odd = overlay_hold_by_dlid(otd->otd_linkid);
        if (odd == NULL)
                return (ENOENT);

        overlay_fm_degrade(odd, otd->otd_buf);
        overlay_hold_rele(odd);
        return (0);
}

/* ARGSUSED */
static int
overlay_target_restore(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_dev_t *odd;
        overlay_targ_id_t *otid = arg;

        odd = overlay_hold_by_dlid(otid->otid_linkid);
        if (odd == NULL)
                return (ENOENT);

        overlay_fm_restore(odd);
        overlay_hold_rele(odd);
        return (0);
}

/* ARGSUSED */
static int
overlay_target_disassociate(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_dev_t *odd;
        overlay_targ_id_t *otid = arg;

        odd = overlay_hold_by_dlid(otid->otid_linkid);
        if (odd == NULL)
                return (ENOENT);

        mutex_enter(&odd->odd_lock);
        odd->odd_flags &= ~OVERLAY_F_VARPD;
        mutex_exit(&odd->odd_lock);

        overlay_hold_rele(odd);
        return (0);

}

static int
overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_targ_lookup_t *otl = arg;
        overlay_target_entry_t *entry;
        clock_t ret, timeout;
        mac_header_info_t mhi;

        timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC);
again:
        mutex_enter(&overlay_target_lock);
        while (list_is_empty(&overlay_target_list)) {
                ret = cv_timedwait(&overlay_target_condvar,
                    &overlay_target_lock, timeout);
                if (ret == -1) {
                        mutex_exit(&overlay_target_lock);
                        return (ETIME);
                }
        }
        entry = list_remove_head(&overlay_target_list);
        mutex_exit(&overlay_target_lock);
        mutex_enter(&entry->ote_lock);
        if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
                ASSERT(entry->ote_chead == NULL);
                mutex_exit(&entry->ote_lock);
                goto again;
        }
        ASSERT(entry->ote_chead != NULL);

        /*
         * If we have a bogon that doesn't have a valid mac header, drop it and
         * try again.
         */
        if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead,
            &mhi) != 0) {
                boolean_t queue = B_FALSE;
                mblk_t *mp = entry->ote_chead;
                entry->ote_chead = mp->b_next;
                mp->b_next = NULL;
                if (entry->ote_ctail == mp)
                        entry->ote_ctail = entry->ote_chead;
                entry->ote_mbsize -= msgsize(mp);
                if (entry->ote_chead != NULL)
                        queue = B_TRUE;
                mutex_exit(&entry->ote_lock);
                if (queue == B_TRUE)
                        overlay_target_queue(entry);
                freemsg(mp);
                goto again;
        }

        otl->otl_dlid = entry->ote_odd->odd_linkid;
        otl->otl_reqid = (uintptr_t)entry;
        otl->otl_varpdid = entry->ote_ott->ott_id;
        otl->otl_vnetid = entry->ote_odd->odd_vid;

        otl->otl_hdrsize = mhi.mhi_hdrsize;
        otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize;
        bcopy(mhi.mhi_daddr, otl->otl_dstaddr, ETHERADDRL);
        bcopy(mhi.mhi_saddr, otl->otl_srcaddr, ETHERADDRL);
        otl->otl_dsttype = mhi.mhi_dsttype;
        otl->otl_sap = mhi.mhi_bindsap;
        otl->otl_vlan = VLAN_ID(mhi.mhi_tci);
        mutex_exit(&entry->ote_lock);

        mutex_enter(&thdl->oth_lock);
        list_insert_tail(&thdl->oth_outstanding, entry);
        mutex_exit(&thdl->oth_lock);

        return (0);
}

static int
overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg)
{
        const overlay_targ_resp_t *otr = arg;
        overlay_target_entry_t *entry;
        mblk_t *mp;

        mutex_enter(&thdl->oth_lock);
        for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
            entry = list_next(&thdl->oth_outstanding, entry)) {
                if ((uintptr_t)entry == otr->otr_reqid)
                        break;
        }

        if (entry == NULL) {
                mutex_exit(&thdl->oth_lock);
                return (EINVAL);
        }
        list_remove(&thdl->oth_outstanding, entry);
        mutex_exit(&thdl->oth_lock);

        mutex_enter(&entry->ote_lock);
        bcopy(&otr->otr_answer, &entry->ote_dest,
            sizeof (overlay_target_point_t));
        entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
        entry->ote_flags |= OVERLAY_ENTRY_F_VALID;
        mp = entry->ote_chead;
        entry->ote_chead = NULL;
        entry->ote_ctail = NULL;
        entry->ote_mbsize = 0;
        entry->ote_vtime = gethrtime();
        mutex_exit(&entry->ote_lock);

        /*
         * For now do an in-situ drain.
         */
        mp = overlay_m_tx(entry->ote_odd, mp);
        freemsgchain(mp);

        mutex_enter(&entry->ote_ott->ott_lock);
        entry->ote_ott->ott_ocount--;
        cv_signal(&entry->ote_ott->ott_cond);
        mutex_exit(&entry->ote_ott->ott_lock);

        return (0);
}

static int
overlay_target_lookup_drop(overlay_target_hdl_t *thdl, void *arg)
{
        const overlay_targ_resp_t *otr = arg;
        overlay_target_entry_t *entry;
        mblk_t *mp;
        boolean_t queue = B_FALSE;

        mutex_enter(&thdl->oth_lock);
        for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
            entry = list_next(&thdl->oth_outstanding, entry)) {
                if ((uintptr_t)entry == otr->otr_reqid)
                        break;
        }

        if (entry == NULL) {
                mutex_exit(&thdl->oth_lock);
                return (EINVAL);
        }
        list_remove(&thdl->oth_outstanding, entry);
        mutex_exit(&thdl->oth_lock);

        mutex_enter(&entry->ote_lock);

        /* Safeguard against a confused varpd */
        if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) {
                entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
                DTRACE_PROBE1(overlay__target__valid__drop,
                    overlay_target_entry_t *, entry);
                mutex_exit(&entry->ote_lock);
                goto done;
        }

        mp = entry->ote_chead;
        if (mp != NULL) {
                entry->ote_chead = mp->b_next;
                mp->b_next = NULL;
                if (entry->ote_ctail == mp)
                        entry->ote_ctail = entry->ote_chead;
                entry->ote_mbsize -= msgsize(mp);
        }
        if (entry->ote_chead != NULL) {
                queue = B_TRUE;
                entry->ote_flags |= OVERLAY_ENTRY_F_PENDING;
        } else {
                entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING;
        }
        mutex_exit(&entry->ote_lock);

        if (queue == B_TRUE)
                overlay_target_queue(entry);
        freemsg(mp);

done:
        mutex_enter(&entry->ote_ott->ott_lock);
        entry->ote_ott->ott_ocount--;
        cv_signal(&entry->ote_ott->ott_cond);
        mutex_exit(&entry->ote_ott->ott_lock);

        return (0);
}

/* ARGSUSED */
static int
overlay_target_pkt_copyin(const void *ubuf, void **outp, size_t *bsize,
    int flags)
{
        overlay_targ_pkt_t *pkt;
        overlay_targ_pkt32_t *pkt32;

        pkt = kmem_alloc(sizeof (overlay_targ_pkt_t), KM_SLEEP);
        *outp = pkt;
        *bsize = sizeof (overlay_targ_pkt_t);
        if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
                uintptr_t addr;

                if (ddi_copyin(ubuf, pkt, sizeof (overlay_targ_pkt32_t),
                    flags & FKIOCTL) != 0) {
                        kmem_free(pkt, *bsize);
                        return (EFAULT);
                }
                pkt32 = (overlay_targ_pkt32_t *)pkt;
                addr = pkt32->otp_buf;
                pkt->otp_buf = (void *)addr;
        } else {
                if (ddi_copyin(ubuf, pkt, *bsize, flags & FKIOCTL) != 0) {
                        kmem_free(pkt, *bsize);
                        return (EFAULT);
                }
        }
        return (0);
}

static int
overlay_target_pkt_copyout(void *ubuf, void *buf, size_t bufsize,
    int flags)
{
        if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) {
                overlay_targ_pkt_t *pkt = buf;
                overlay_targ_pkt32_t *pkt32 = buf;
                uintptr_t addr = (uintptr_t)pkt->otp_buf;
                pkt32->otp_buf = (caddr32_t)addr;
                if (ddi_copyout(buf, ubuf, sizeof (overlay_targ_pkt32_t),
                    flags & FKIOCTL) != 0)
                        return (EFAULT);
        } else {
                if (ddi_copyout(buf, ubuf, bufsize, flags & FKIOCTL) != 0)
                        return (EFAULT);
        }
        return (0);
}

static int
overlay_target_packet(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_targ_pkt_t *pkt = arg;
        overlay_target_entry_t *entry;
        mblk_t *mp;
        size_t mlen;
        size_t boff;

        mutex_enter(&thdl->oth_lock);
        for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
            entry = list_next(&thdl->oth_outstanding, entry)) {
                if ((uintptr_t)entry == pkt->otp_reqid)
                        break;
        }

        if (entry == NULL) {
                mutex_exit(&thdl->oth_lock);
                return (EINVAL);
        }
        mutex_enter(&entry->ote_lock);
        mutex_exit(&thdl->oth_lock);
        mp = entry->ote_chead;
        /* Protect against a rogue varpd */
        if (mp == NULL) {
                mutex_exit(&entry->ote_lock);
                return (EINVAL);
        }
        mlen = MIN(msgsize(mp), pkt->otp_size);
        pkt->otp_size = mlen;
        boff = 0;
        while (mlen > 0) {
                size_t wlen = MIN(MBLKL(mp), mlen);
                if (ddi_copyout(mp->b_rptr,
                    (void *)((uintptr_t)pkt->otp_buf + boff),
                    wlen, 0) != 0) {
                        mutex_exit(&entry->ote_lock);
                        return (EFAULT);
                }
                mlen -= wlen;
                boff += wlen;
                mp = mp->b_cont;
        }
        mutex_exit(&entry->ote_lock);
        return (0);
}

static int
overlay_target_inject(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_targ_pkt_t *pkt = arg;
        overlay_target_entry_t *entry;
        overlay_dev_t *odd;
        mblk_t *mp;

        if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
                return (EINVAL);

        mp = allocb(pkt->otp_size, 0);
        if (mp == NULL)
                return (ENOMEM);

        if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
                freeb(mp);
                return (EFAULT);
        }
        mp->b_wptr += pkt->otp_size;

        if (pkt->otp_linkid != UINT64_MAX) {
                odd = overlay_hold_by_dlid(pkt->otp_linkid);
                if (odd == NULL) {
                        freeb(mp);
                        return (ENOENT);
                }
        } else {
                mutex_enter(&thdl->oth_lock);
                for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
                    entry = list_next(&thdl->oth_outstanding, entry)) {
                        if ((uintptr_t)entry == pkt->otp_reqid)
                                break;
                }

                if (entry == NULL) {
                        mutex_exit(&thdl->oth_lock);
                        freeb(mp);
                        return (ENOENT);
                }
                odd = entry->ote_odd;
                mutex_exit(&thdl->oth_lock);
        }

        mutex_enter(&odd->odd_lock);
        if ((odd->odd_flags & OVERLAY_F_MDDROP) ||
            !(odd->odd_flags & OVERLAY_F_IN_MUX)) {
                /* Can't do receive... */
                mutex_exit(&odd->odd_lock);
                OVERLAY_FREEMSG(mp, "dev dropped");
                freeb(mp);
                return (EBUSY);
        }
        overlay_io_start(odd, OVERLAY_F_IN_RX);
        mutex_exit(&odd->odd_lock);

        mac_rx(odd->odd_mh, NULL, mp);

        mutex_enter(&odd->odd_lock);
        overlay_io_done(odd, OVERLAY_F_IN_RX);
        mutex_exit(&odd->odd_lock);

        return (0);
}

static int
overlay_target_resend(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_targ_pkt_t *pkt = arg;
        overlay_target_entry_t *entry;
        overlay_dev_t *odd;
        mblk_t *mp;

        if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ)
                return (EINVAL);

        mp = allocb(pkt->otp_size, 0);
        if (mp == NULL)
                return (ENOMEM);

        if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) {
                freeb(mp);
                return (EFAULT);
        }
        mp->b_wptr += pkt->otp_size;

        if (pkt->otp_linkid != UINT64_MAX) {
                odd = overlay_hold_by_dlid(pkt->otp_linkid);
                if (odd == NULL) {
                        freeb(mp);
                        return (ENOENT);
                }
        } else {
                mutex_enter(&thdl->oth_lock);
                for (entry = list_head(&thdl->oth_outstanding); entry != NULL;
                    entry = list_next(&thdl->oth_outstanding, entry)) {
                        if ((uintptr_t)entry == pkt->otp_reqid)
                                break;
                }

                if (entry == NULL) {
                        mutex_exit(&thdl->oth_lock);
                        freeb(mp);
                        return (ENOENT);
                }
                odd = entry->ote_odd;
                mutex_exit(&thdl->oth_lock);
        }

        mp = overlay_m_tx(odd, mp);
        freemsgchain(mp);

        return (0);
}

typedef struct overlay_targ_list_int {
        boolean_t       otli_count;
        uint32_t        otli_cur;
        uint32_t        otli_nents;
        uint32_t        otli_ents[];
} overlay_targ_list_int_t;

static int
overlay_target_list_copyin(const void *ubuf, void **outp, size_t *bsize,
    int flags)
{
        overlay_targ_list_t n;
        overlay_targ_list_int_t *otl;

        if (ddi_copyin(ubuf, &n, sizeof (overlay_targ_list_t),
            flags & FKIOCTL) != 0)
                return (EFAULT);

        /*
         */
        if (n.otl_nents >= INT32_MAX / sizeof (uint32_t))
                return (EINVAL);
        *bsize = sizeof (overlay_targ_list_int_t) +
            sizeof (uint32_t) * n.otl_nents;
        otl = kmem_zalloc(*bsize, KM_SLEEP);
        otl->otli_cur = 0;
        otl->otli_nents = n.otl_nents;
        if (otl->otli_nents != 0) {
                otl->otli_count = B_FALSE;
                if (ddi_copyin((void *)((uintptr_t)ubuf +
                    offsetof(overlay_targ_list_t, otl_ents)),
                    otl->otli_ents, n.otl_nents * sizeof (uint32_t),
                    flags & FKIOCTL) != 0) {
                        kmem_free(otl, *bsize);
                        return (EFAULT);
                }
        } else {
                otl->otli_count = B_TRUE;
        }

        *outp = otl;
        return (0);
}

static int
overlay_target_ioctl_list_cb(overlay_dev_t *odd, void *arg)
{
        overlay_targ_list_int_t *otl = arg;

        if (otl->otli_cur < otl->otli_nents)
                otl->otli_ents[otl->otli_cur] = odd->odd_linkid;
        otl->otli_cur++;
        return (0);
}

/* ARGSUSED */
static int
overlay_target_ioctl_list(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_dev_iter(overlay_target_ioctl_list_cb, arg);
        return (0);
}

/* ARGSUSED */
static int
overlay_target_list_copyout(void *ubuf, void *buf, size_t bufsize, int flags)
{
        overlay_targ_list_int_t *otl = buf;

        if (ddi_copyout(&otl->otli_cur, ubuf, sizeof (uint32_t),
            flags & FKIOCTL) != 0)
                return (EFAULT);

        if (otl->otli_count == B_FALSE) {
                if (ddi_copyout(otl->otli_ents,
                    (void *)((uintptr_t)ubuf +
                    offsetof(overlay_targ_list_t, otl_ents)),
                    sizeof (uint32_t) * otl->otli_nents,
                    flags & FKIOCTL) != 0)
                        return (EFAULT);
        }
        return (0);
}

/* ARGSUSED */
static int
overlay_target_cache_get(overlay_target_hdl_t *thdl, void *arg)
{
        int ret = 0;
        overlay_dev_t *odd;
        overlay_target_t *ott;
        overlay_targ_cache_t *otc = arg;

        odd = overlay_hold_by_dlid(otc->otc_linkid);
        if (odd == NULL)
                return (ENOENT);

        mutex_enter(&odd->odd_lock);
        if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
                mutex_exit(&odd->odd_lock);
                overlay_hold_rele(odd);
                return (ENXIO);
        }
        ott = odd->odd_target;
        if (ott->ott_mode != OVERLAY_TARGET_POINT &&
            ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
                mutex_exit(&odd->odd_lock);
                overlay_hold_rele(odd);
                return (ENOTSUP);
        }
        mutex_enter(&ott->ott_lock);
        mutex_exit(&odd->odd_lock);

        if (ott->ott_mode == OVERLAY_TARGET_POINT) {
                otc->otc_entry.otce_flags = 0;
                bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest,
                    sizeof (overlay_target_point_t));
        } else {
                overlay_target_entry_t *ote;
                ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
                    otc->otc_entry.otce_mac);
                if (ote != NULL) {
                        mutex_enter(&ote->ote_lock);
                        if ((ote->ote_flags &
                            OVERLAY_ENTRY_F_VALID_MASK) != 0) {
                                if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) {
                                        otc->otc_entry.otce_flags =
                                            OVERLAY_TARGET_CACHE_DROP;
                                } else {
                                        otc->otc_entry.otce_flags = 0;
                                        bcopy(&ote->ote_dest,
                                            &otc->otc_entry.otce_dest,
                                            sizeof (overlay_target_point_t));
                                }
                                ret = 0;
                        } else {
                                ret = ENOENT;
                        }
                        mutex_exit(&ote->ote_lock);
                } else {
                        ret = ENOENT;
                }
        }

        mutex_exit(&ott->ott_lock);
        overlay_hold_rele(odd);

        return (ret);
}

/* ARGSUSED */
static int
overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_dev_t *odd;
        overlay_target_t *ott;
        overlay_target_entry_t *ote;
        overlay_targ_cache_t *otc = arg;
        mblk_t *mp = NULL;

        if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP)
                return (EINVAL);

        odd = overlay_hold_by_dlid(otc->otc_linkid);
        if (odd == NULL)
                return (ENOENT);

        mutex_enter(&odd->odd_lock);
        if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
                mutex_exit(&odd->odd_lock);
                overlay_hold_rele(odd);
                return (ENXIO);
        }
        ott = odd->odd_target;
        if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
                mutex_exit(&odd->odd_lock);
                overlay_hold_rele(odd);
                return (ENOTSUP);
        }
        mutex_enter(&ott->ott_lock);
        mutex_exit(&odd->odd_lock);

        ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
            otc->otc_entry.otce_mac);
        if (ote == NULL) {
                ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP);
                bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL);
                ote->ote_chead = ote->ote_ctail = NULL;
                ote->ote_mbsize = 0;
                ote->ote_ott = ott;
                ote->ote_odd = odd;
                mutex_enter(&ote->ote_lock);
                refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote);
                avl_add(&ott->ott_u.ott_dyn.ott_tree, ote);
        } else {
                mutex_enter(&ote->ote_lock);
        }

        if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) {
                ote->ote_flags |= OVERLAY_ENTRY_F_DROP;
        } else {
                ote->ote_flags |= OVERLAY_ENTRY_F_VALID;
                bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest,
                    sizeof (overlay_target_point_t));
                mp = ote->ote_chead;
                ote->ote_chead = NULL;
                ote->ote_ctail = NULL;
                ote->ote_mbsize = 0;
                ote->ote_vtime = gethrtime();
        }

        mutex_exit(&ote->ote_lock);
        mutex_exit(&ott->ott_lock);

        if (mp != NULL) {
                mp = overlay_m_tx(ote->ote_odd, mp);
                freemsgchain(mp);
        }

        overlay_hold_rele(odd);

        return (0);
}

/* ARGSUSED */
static int
overlay_target_cache_remove(overlay_target_hdl_t *thdl, void *arg)
{
        int ret = 0;
        overlay_dev_t *odd;
        overlay_target_t *ott;
        overlay_target_entry_t *ote;
        overlay_targ_cache_t *otc = arg;

        odd = overlay_hold_by_dlid(otc->otc_linkid);
        if (odd == NULL)
                return (ENOENT);

        mutex_enter(&odd->odd_lock);
        if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
                mutex_exit(&odd->odd_lock);
                overlay_hold_rele(odd);
                return (ENXIO);
        }
        ott = odd->odd_target;
        if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
                mutex_exit(&odd->odd_lock);
                overlay_hold_rele(odd);
                return (ENOTSUP);
        }
        mutex_enter(&ott->ott_lock);
        mutex_exit(&odd->odd_lock);

        ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
            otc->otc_entry.otce_mac);
        if (ote != NULL) {
                mutex_enter(&ote->ote_lock);
                ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
                mutex_exit(&ote->ote_lock);
                ret = 0;
        } else {
                ret = ENOENT;
        }

        mutex_exit(&ott->ott_lock);
        overlay_hold_rele(odd);

        return (ret);
}

/* ARGSUSED */
static int
overlay_target_cache_flush(overlay_target_hdl_t *thdl, void *arg)
{
        avl_tree_t *avl;
        overlay_dev_t *odd;
        overlay_target_t *ott;
        overlay_target_entry_t *ote;
        overlay_targ_cache_t *otc = arg;

        odd = overlay_hold_by_dlid(otc->otc_linkid);
        if (odd == NULL)
                return (ENOENT);

        mutex_enter(&odd->odd_lock);
        if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
                mutex_exit(&odd->odd_lock);
                overlay_hold_rele(odd);
                return (ENXIO);
        }
        ott = odd->odd_target;
        if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) {
                mutex_exit(&odd->odd_lock);
                overlay_hold_rele(odd);
                return (ENOTSUP);
        }
        mutex_enter(&ott->ott_lock);
        mutex_exit(&odd->odd_lock);
        avl = &ott->ott_u.ott_dyn.ott_tree;

        for (ote = avl_first(avl); ote != NULL; ote = AVL_NEXT(avl, ote)) {
                mutex_enter(&ote->ote_lock);
                ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK;
                mutex_exit(&ote->ote_lock);
        }
        ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash,
            otc->otc_entry.otce_mac);

        mutex_exit(&ott->ott_lock);
        overlay_hold_rele(odd);

        return (0);
}

static int
overlay_target_cache_iter_copyin(const void *ubuf, void **outp, size_t *bsize,
    int flags)
{
        overlay_targ_cache_iter_t base, *iter;

        if (ddi_copyin(ubuf, &base, sizeof (overlay_targ_cache_iter_t),
            flags & FKIOCTL) != 0)
                return (EFAULT);

        if (base.otci_count > OVERLAY_TARGET_ITER_MAX)
                return (E2BIG);

        if (base.otci_count == 0)
                return (EINVAL);

        *bsize = sizeof (overlay_targ_cache_iter_t) +
            base.otci_count * sizeof (overlay_targ_cache_entry_t);
        iter = kmem_alloc(*bsize, KM_SLEEP);
        bcopy(&base, iter, sizeof (overlay_targ_cache_iter_t));
        *outp = iter;

        return (0);
}

typedef struct overlay_targ_cache_marker {
        uint8_t         otcm_mac[ETHERADDRL];
        uint16_t        otcm_done;
} overlay_targ_cache_marker_t;

/* ARGSUSED */
static int
overlay_target_cache_iter(overlay_target_hdl_t *thdl, void *arg)
{
        overlay_dev_t *odd;
        overlay_target_t *ott;
        overlay_target_entry_t lookup, *ent;
        overlay_targ_cache_marker_t *mark;
        avl_index_t where;
        avl_tree_t *avl;
        uint16_t written = 0;

        overlay_targ_cache_iter_t *iter = arg;
        mark = (void *)&iter->otci_marker;

        if (mark->otcm_done != 0) {
                iter->otci_count = 0;
                return (0);
        }

        odd = overlay_hold_by_dlid(iter->otci_linkid);
        if (odd == NULL)
                return (ENOENT);

        mutex_enter(&odd->odd_lock);
        if (!(odd->odd_flags & OVERLAY_F_VARPD)) {
                mutex_exit(&odd->odd_lock);
                overlay_hold_rele(odd);
                return (ENXIO);
        }
        ott = odd->odd_target;
        if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC &&
            ott->ott_mode != OVERLAY_TARGET_POINT) {
                mutex_exit(&odd->odd_lock);
                overlay_hold_rele(odd);
                return (ENOTSUP);
        }

        /*
         * Holding this lock across the entire iteration probably isn't very
         * good. We should perhaps add an r/w lock for the avl tree. But we'll
         * wait until we now it's necessary before we do more.
         */
        mutex_enter(&ott->ott_lock);
        mutex_exit(&odd->odd_lock);

        if (ott->ott_mode == OVERLAY_TARGET_POINT) {
                overlay_targ_cache_entry_t *out = &iter->otci_ents[0];
                bzero(out->otce_mac, ETHERADDRL);
                out->otce_flags = 0;
                bcopy(&ott->ott_u.ott_point, &out->otce_dest,
                    sizeof (overlay_target_point_t));
                written++;
                mark->otcm_done = 1;
        }

        avl = &ott->ott_u.ott_dyn.ott_tree;
        bcopy(mark->otcm_mac, lookup.ote_addr, ETHERADDRL);
        ent = avl_find(avl, &lookup, &where);

        /*
         * NULL ent means that the entry does not exist, so we want to start
         * with the closest node in the tree. This means that we implicitly rely
         * on the tree's order and the first node will be the mac 00:00:00:00:00
         * and the last will be ff:ff:ff:ff:ff:ff.
         */
        if (ent == NULL) {
                ent = avl_nearest(avl, where, AVL_AFTER);
                if (ent == NULL) {
                        mark->otcm_done = 1;
                        goto done;
                }
        }

        for (; ent != NULL && written < iter->otci_count;
            ent = AVL_NEXT(avl, ent)) {
                overlay_targ_cache_entry_t *out = &iter->otci_ents[written];
                mutex_enter(&ent->ote_lock);
                if ((ent->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) == 0) {
                        mutex_exit(&ent->ote_lock);
                        continue;
                }
                bcopy(ent->ote_addr, out->otce_mac, ETHERADDRL);
                out->otce_flags = 0;
                if (ent->ote_flags & OVERLAY_ENTRY_F_DROP)
                        out->otce_flags |= OVERLAY_TARGET_CACHE_DROP;
                if (ent->ote_flags & OVERLAY_ENTRY_F_VALID)
                        bcopy(&ent->ote_dest, &out->otce_dest,
                            sizeof (overlay_target_point_t));
                written++;
                mutex_exit(&ent->ote_lock);
        }

        if (ent != NULL) {
                bcopy(ent->ote_addr, mark->otcm_mac, ETHERADDRL);
        } else {
                mark->otcm_done = 1;
        }

done:
        iter->otci_count = written;
        mutex_exit(&ott->ott_lock);
        overlay_hold_rele(odd);

        return (0);
}

/* ARGSUSED */
static int
overlay_target_cache_iter_copyout(void *ubuf, void *buf, size_t bufsize,
    int flags)
{
        size_t outsize;
        const overlay_targ_cache_iter_t *iter = buf;

        outsize = sizeof (overlay_targ_cache_iter_t) +
            iter->otci_count * sizeof (overlay_targ_cache_entry_t);

        if (ddi_copyout(buf, ubuf, outsize, flags & FKIOCTL) != 0)
                return (EFAULT);

        return (0);
}

static overlay_target_ioctl_t overlay_target_ioctab[] = {
        { OVERLAY_TARG_INFO, B_TRUE, B_TRUE,
                NULL, overlay_target_info,
                NULL, sizeof (overlay_targ_info_t)      },
        { OVERLAY_TARG_ASSOCIATE, B_TRUE, B_FALSE,
                NULL, overlay_target_associate,
                NULL, sizeof (overlay_targ_associate_t) },
        { OVERLAY_TARG_DISASSOCIATE, B_TRUE, B_FALSE,
                NULL, overlay_target_disassociate,
                NULL, sizeof (overlay_targ_id_t)        },
        { OVERLAY_TARG_DEGRADE, B_TRUE, B_FALSE,
                NULL, overlay_target_degrade,
                NULL, sizeof (overlay_targ_degrade_t)   },
        { OVERLAY_TARG_RESTORE, B_TRUE, B_FALSE,
                NULL, overlay_target_restore,
                NULL, sizeof (overlay_targ_id_t)        },
        { OVERLAY_TARG_LOOKUP, B_FALSE, B_TRUE,
                NULL, overlay_target_lookup_request,
                NULL, sizeof (overlay_targ_lookup_t)    },
        { OVERLAY_TARG_RESPOND, B_TRUE, B_FALSE,
                NULL, overlay_target_lookup_respond,
                NULL, sizeof (overlay_targ_resp_t)      },
        { OVERLAY_TARG_DROP, B_TRUE, B_FALSE,
                NULL, overlay_target_lookup_drop,
                NULL, sizeof (overlay_targ_resp_t)      },
        { OVERLAY_TARG_PKT, B_TRUE, B_TRUE,
                overlay_target_pkt_copyin,
                overlay_target_packet,
                overlay_target_pkt_copyout,
                sizeof (overlay_targ_pkt_t)             },
        { OVERLAY_TARG_INJECT, B_TRUE, B_FALSE,
                overlay_target_pkt_copyin,
                overlay_target_inject,
                NULL, sizeof (overlay_targ_pkt_t)       },
        { OVERLAY_TARG_RESEND, B_TRUE, B_FALSE,
                overlay_target_pkt_copyin,
                overlay_target_resend,
                NULL, sizeof (overlay_targ_pkt_t)       },
        { OVERLAY_TARG_LIST, B_FALSE, B_TRUE,
                overlay_target_list_copyin,
                overlay_target_ioctl_list,
                overlay_target_list_copyout,
                sizeof (overlay_targ_list_t)            },
        { OVERLAY_TARG_CACHE_GET, B_FALSE, B_TRUE,
                NULL, overlay_target_cache_get,
                NULL, sizeof (overlay_targ_cache_t)     },
        { OVERLAY_TARG_CACHE_SET, B_TRUE, B_TRUE,
                NULL, overlay_target_cache_set,
                NULL, sizeof (overlay_targ_cache_t)     },
        { OVERLAY_TARG_CACHE_REMOVE, B_TRUE, B_TRUE,
                NULL, overlay_target_cache_remove,
                NULL, sizeof (overlay_targ_cache_t)     },
        { OVERLAY_TARG_CACHE_FLUSH, B_TRUE, B_TRUE,
                NULL, overlay_target_cache_flush,
                NULL, sizeof (overlay_targ_cache_t)     },
        { OVERLAY_TARG_CACHE_ITER, B_FALSE, B_TRUE,
                overlay_target_cache_iter_copyin,
                overlay_target_cache_iter,
                overlay_target_cache_iter_copyout,
                sizeof (overlay_targ_cache_iter_t)              },
        { 0 }
};

int
overlay_target_open(dev_t *devp, int flags, int otype, cred_t *credp)
{
        minor_t mid;
        overlay_target_hdl_t *thdl;

        if (secpolicy_dl_config(credp) != 0)
                return (EPERM);

        if (getminor(*devp) != 0)
                return (ENXIO);

        if (otype & OTYP_BLK)
                return (EINVAL);

        if (flags & ~(FREAD | FWRITE | FEXCL))
                return (EINVAL);

        if ((flags & FWRITE) &&
            !(flags & FEXCL))
                return (EINVAL);

        if (!(flags & FREAD) && !(flags & FWRITE))
                return (EINVAL);

        if (crgetzoneid(credp) != GLOBAL_ZONEID)
                return (EPERM);

        mid = id_alloc(overlay_thdl_idspace);
        if (ddi_soft_state_zalloc(overlay_thdl_state, mid) != 0) {
                id_free(overlay_thdl_idspace, mid);
                return (ENXIO);
        }

        thdl = ddi_get_soft_state(overlay_thdl_state, mid);
        VERIFY(thdl != NULL);
        thdl->oth_minor = mid;
        thdl->oth_zoneid = crgetzoneid(credp);
        thdl->oth_oflags = flags;
        mutex_init(&thdl->oth_lock, NULL, MUTEX_DRIVER, NULL);
        list_create(&thdl->oth_outstanding, sizeof (overlay_target_entry_t),
            offsetof(overlay_target_entry_t, ote_qlink));
        *devp = makedevice(getmajor(*devp), mid);

        mutex_enter(&overlay_target_lock);
        if ((flags & FEXCL) && overlay_target_excl == B_TRUE) {
                mutex_exit(&overlay_target_lock);
                list_destroy(&thdl->oth_outstanding);
                mutex_destroy(&thdl->oth_lock);
                ddi_soft_state_free(overlay_thdl_state, mid);
                id_free(overlay_thdl_idspace, mid);
                return (EEXIST);
        } else if ((flags & FEXCL) != 0) {
                VERIFY(overlay_target_excl == B_FALSE);
                overlay_target_excl = B_TRUE;
        }
        list_insert_tail(&overlay_thdl_list, thdl);
        mutex_exit(&overlay_target_lock);

        return (0);
}

/* ARGSUSED */
int
overlay_target_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
    int *rvalp)
{
        overlay_target_ioctl_t *ioc;
        overlay_target_hdl_t *thdl;

        if (secpolicy_dl_config(credp) != 0)
                return (EPERM);

        if ((thdl = ddi_get_soft_state(overlay_thdl_state,
            getminor(dev))) == NULL)
                return (ENXIO);

        for (ioc = &overlay_target_ioctab[0]; ioc->oti_cmd != 0; ioc++) {
                int ret;
                caddr_t buf;
                size_t bufsize;

                if (ioc->oti_cmd != cmd)
                        continue;

                if (ioc->oti_write == B_TRUE && !(mode & FWRITE))
                        return (EBADF);

                if (ioc->oti_copyin == NULL) {
                        bufsize = ioc->oti_size;
                        buf = kmem_alloc(bufsize, KM_SLEEP);
                        if (ddi_copyin((void *)(uintptr_t)arg, buf, bufsize,
                            mode & FKIOCTL) != 0) {
                                kmem_free(buf, bufsize);
                                return (EFAULT);
                        }
                } else {
                        if ((ret = ioc->oti_copyin((void *)(uintptr_t)arg,
                            (void **)&buf, &bufsize, mode)) != 0)
                                return (ret);
                }

                ret = ioc->oti_func(thdl, buf);
                if (ret == 0 && ioc->oti_size != 0 &&
                    ioc->oti_ncopyout == B_TRUE) {
                        if (ioc->oti_copyout == NULL) {
                                if (ddi_copyout(buf, (void *)(uintptr_t)arg,
                                    bufsize, mode & FKIOCTL) != 0)
                                        ret = EFAULT;
                        } else {
                                ret = ioc->oti_copyout((void *)(uintptr_t)arg,
                                    buf, bufsize, mode);
                        }
                }

                kmem_free(buf, bufsize);
                return (ret);
        }

        return (ENOTTY);
}

/* ARGSUSED */
int
overlay_target_close(dev_t dev, int flags, int otype, cred_t *credp)
{
        overlay_target_hdl_t *thdl;
        overlay_target_entry_t *entry;
        minor_t mid = getminor(dev);

        if ((thdl = ddi_get_soft_state(overlay_thdl_state, mid)) == NULL)
                return (ENXIO);

        mutex_enter(&overlay_target_lock);
        list_remove(&overlay_thdl_list, thdl);
        mutex_enter(&thdl->oth_lock);
        while ((entry = list_remove_head(&thdl->oth_outstanding)) != NULL)
                list_insert_tail(&overlay_target_list, entry);
        cv_signal(&overlay_target_condvar);
        mutex_exit(&thdl->oth_lock);
        if ((thdl->oth_oflags & FEXCL) != 0) {
                VERIFY(overlay_target_excl == B_TRUE);
                overlay_target_excl = B_FALSE;
        }
        mutex_exit(&overlay_target_lock);

        list_destroy(&thdl->oth_outstanding);
        mutex_destroy(&thdl->oth_lock);
        mid = thdl->oth_minor;
        ddi_soft_state_free(overlay_thdl_state, mid);
        id_free(overlay_thdl_idspace, mid);

        return (0);
}