root/usr/src/uts/common/os/netstack.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
 */

#include <sys/param.h>
#include <sys/sysmacros.h>
#include <sys/vm.h>
#include <sys/proc.h>
#include <sys/tuneable.h>
#include <sys/systm.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/sdt.h>
#include <sys/mutex.h>
#include <sys/bitmap.h>
#include <sys/atomic.h>
#include <sys/sunddi.h>
#include <sys/kobj.h>
#include <sys/disp.h>
#include <vm/seg_kmem.h>
#include <sys/zone.h>
#include <sys/netstack.h>

/*
 * What we use so that the zones framework can tell us about new zones,
 * which we use to create new stacks.
 */
static zone_key_t netstack_zone_key;

static int      netstack_initialized = 0;

/*
 * Track the registered netstacks.
 * The global lock protects
 * - ns_reg
 * - the list starting at netstack_head and following the netstack_next
 *   pointers.
 */
static kmutex_t netstack_g_lock;

/*
 * Registry of netstacks with their create/shutdown/destory functions.
 */
static struct netstack_registry ns_reg[NS_MAX];

/*
 * Global list of existing stacks.  We use this when a new zone with
 * an exclusive IP instance is created.
 *
 * Note that in some cases a netstack_t needs to stay around after the zone
 * has gone away. This is because there might be outstanding references
 * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
 * structure and all the foo_stack_t's hanging off of it will be cleaned up
 * when the last reference to it is dropped.
 * However, the same zone might be rebooted. That is handled using the
 * assumption that the zones framework picks a new zoneid each time a zone
 * is (re)booted. We assert for that condition in netstack_zone_create().
 * Thus the old netstack_t can take its time for things to time out.
 */
static netstack_t *netstack_head;

/*
 * To support kstat_create_netstack() using kstat_zone_add we need
 * to track both
 *  - all zoneids that use the global/shared stack
 *  - all kstats that have been added for the shared stack
 */
struct shared_zone_list {
        struct shared_zone_list *sz_next;
        zoneid_t                sz_zoneid;
};

struct shared_kstat_list {
        struct shared_kstat_list *sk_next;
        kstat_t                  *sk_kstat;
};

static kmutex_t netstack_shared_lock;   /* protects the following two */
static struct shared_zone_list  *netstack_shared_zones;
static struct shared_kstat_list *netstack_shared_kstats;

static void     *netstack_zone_create(zoneid_t zoneid);
static void     netstack_zone_shutdown(zoneid_t zoneid, void *arg);
static void     netstack_zone_destroy(zoneid_t zoneid, void *arg);

static void     netstack_shared_zone_add(zoneid_t zoneid);
static void     netstack_shared_zone_remove(zoneid_t zoneid);
static void     netstack_shared_kstat_add(kstat_t *ks);
static void     netstack_shared_kstat_remove(kstat_t *ks);

typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);

static void     apply_all_netstacks(int, applyfn_t *);
static void     apply_all_modules(netstack_t *, applyfn_t *);
static void     apply_all_modules_reverse(netstack_t *, applyfn_t *);
static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
    kmutex_t *);

static void netstack_hold_locked(netstack_t *);

static ksema_t netstack_reap_limiter;
/*
 * Hard-coded constant, but since this is not tunable in real-time, it seems
 * making it an /etc/system tunable is better than nothing.
 */
uint_t netstack_outstanding_reaps = 1024;

void
netstack_init(void)
{
        mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
        mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);

        sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL,
            SEMA_DRIVER, NULL);

        netstack_initialized = 1;

        /*
         * We want to be informed each time a zone is created or
         * destroyed in the kernel, so we can maintain the
         * stack instance information.
         */
        zone_key_create(&netstack_zone_key, netstack_zone_create,
            netstack_zone_shutdown, netstack_zone_destroy);
}

/*
 * Register a new module with the framework.
 * This registers interest in changes to the set of netstacks.
 * The createfn and destroyfn are required, but the shutdownfn can be
 * NULL.
 * Note that due to the current zsd implementation, when the create
 * function is called the zone isn't fully present, thus functions
 * like zone_find_by_* will fail, hence the create function can not
 * use many zones kernel functions including zcmn_err().
 */
void
netstack_register(int moduleid,
    void *(*module_create)(netstackid_t, netstack_t *),
    void (*module_shutdown)(netstackid_t, void *),
    void (*module_destroy)(netstackid_t, void *))
{
        netstack_t *ns;

        ASSERT(netstack_initialized);
        ASSERT(moduleid >= 0 && moduleid < NS_MAX);
        ASSERT(module_create != NULL);

        /*
         * Make instances created after this point in time run the create
         * callback.
         */
        mutex_enter(&netstack_g_lock);
        ASSERT(ns_reg[moduleid].nr_create == NULL);
        ASSERT(ns_reg[moduleid].nr_flags == 0);
        ns_reg[moduleid].nr_create = module_create;
        ns_reg[moduleid].nr_shutdown = module_shutdown;
        ns_reg[moduleid].nr_destroy = module_destroy;
        ns_reg[moduleid].nr_flags = NRF_REGISTERED;

        /*
         * Determine the set of stacks that exist before we drop the lock.
         * Set NSS_CREATE_NEEDED for each of those.
         * netstacks which have been deleted will have NSS_CREATE_COMPLETED
         * set, but check NSF_CLOSING to be sure.
         */
        for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
                nm_state_t *nms = &ns->netstack_m_state[moduleid];

                mutex_enter(&ns->netstack_lock);
                if (!(ns->netstack_flags & NSF_CLOSING) &&
                    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
                        nms->nms_flags |= NSS_CREATE_NEEDED;
                        DTRACE_PROBE2(netstack__create__needed,
                            netstack_t *, ns, int, moduleid);
                }
                mutex_exit(&ns->netstack_lock);
        }
        mutex_exit(&netstack_g_lock);

        /*
         * At this point in time a new instance can be created or an instance
         * can be destroyed, or some other module can register or unregister.
         * Make sure we either run all the create functions for this moduleid
         * or we wait for any other creators for this moduleid.
         */
        apply_all_netstacks(moduleid, netstack_apply_create);
}

void
netstack_unregister(int moduleid)
{
        netstack_t *ns;

        ASSERT(moduleid >= 0 && moduleid < NS_MAX);

        ASSERT(ns_reg[moduleid].nr_create != NULL);
        ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);

        mutex_enter(&netstack_g_lock);
        /*
         * Determine the set of stacks that exist before we drop the lock.
         * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
         * That ensures that when we return all the callbacks for existing
         * instances have completed. And since we set NRF_DYING no new
         * instances can use this module.
         */
        for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
                boolean_t created = B_FALSE;
                nm_state_t *nms = &ns->netstack_m_state[moduleid];

                mutex_enter(&ns->netstack_lock);

                /*
                 * We need to be careful here. We could actually have a netstack
                 * being created as we speak waiting for us to let go of this
                 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
                 * have gotten to the point of completing it yet. If
                 * NSS_CREATE_NEEDED, we can safely just remove it here and
                 * never create the module. However, if NSS_CREATE_INPROGRESS is
                 * set, we need to still flag this module for shutdown and
                 * deletion, just as though it had reached NSS_CREATE_COMPLETED.
                 *
                 * It is safe to do that because of two different guarantees
                 * that exist in the system. The first is that before we do a
                 * create, shutdown, or destroy, we ensure that nothing else is
                 * in progress in the system for this netstack and wait for it
                 * to complete. Secondly, because the zone is being created, we
                 * know that the following call to apply_all_netstack will block
                 * on the zone finishing its initialization.
                 */
                if (nms->nms_flags & NSS_CREATE_NEEDED)
                        nms->nms_flags &= ~NSS_CREATE_NEEDED;

                if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
                    nms->nms_flags & NSS_CREATE_COMPLETED)
                        created = B_TRUE;

                if (ns_reg[moduleid].nr_shutdown != NULL && created &&
                    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
                    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
                        nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
                        DTRACE_PROBE2(netstack__shutdown__needed,
                            netstack_t *, ns, int, moduleid);
                }
                if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
                    ns_reg[moduleid].nr_destroy != NULL && created &&
                    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
                        nms->nms_flags |= NSS_DESTROY_NEEDED;
                        DTRACE_PROBE2(netstack__destroy__needed,
                            netstack_t *, ns, int, moduleid);
                }
                mutex_exit(&ns->netstack_lock);
        }
        /*
         * Prevent any new netstack from calling the registered create
         * function, while keeping the function pointers in place until the
         * shutdown and destroy callbacks are complete.
         */
        ns_reg[moduleid].nr_flags |= NRF_DYING;
        mutex_exit(&netstack_g_lock);

        apply_all_netstacks(moduleid, netstack_apply_shutdown);
        apply_all_netstacks(moduleid, netstack_apply_destroy);

        /*
         * Clear the nms_flags so that we can handle this module
         * being loaded again.
         * Also remove the registered functions.
         */
        mutex_enter(&netstack_g_lock);
        ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
        ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
        for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
                nm_state_t *nms = &ns->netstack_m_state[moduleid];

                mutex_enter(&ns->netstack_lock);
                if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
                        nms->nms_flags = 0;
                        DTRACE_PROBE2(netstack__destroy__done,
                            netstack_t *, ns, int, moduleid);
                }
                mutex_exit(&ns->netstack_lock);
        }

        ns_reg[moduleid].nr_create = NULL;
        ns_reg[moduleid].nr_shutdown = NULL;
        ns_reg[moduleid].nr_destroy = NULL;
        ns_reg[moduleid].nr_flags = 0;
        mutex_exit(&netstack_g_lock);
}

/*
 * Lookup and/or allocate a netstack for this zone.
 */
static void *
netstack_zone_create(zoneid_t zoneid)
{
        netstackid_t stackid;
        netstack_t *ns;
        netstack_t **nsp;
        zone_t  *zone;
        int i;

        ASSERT(netstack_initialized);

        zone = zone_find_by_id_nolock(zoneid);
        ASSERT(zone != NULL);

        if (zone->zone_flags & ZF_NET_EXCL) {
                stackid = zoneid;
        } else {
                /* Look for the stack instance for the global */
                stackid = GLOBAL_NETSTACKID;
        }

        /* Allocate even if it isn't needed; simplifies locking */
        ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);

        /* Look if there is a matching stack instance */
        mutex_enter(&netstack_g_lock);
        for (nsp = &netstack_head; *nsp != NULL;
            nsp = &((*nsp)->netstack_next)) {
                if ((*nsp)->netstack_stackid == stackid) {
                        /*
                         * Should never find a pre-existing exclusive stack
                         */
                        VERIFY(stackid == GLOBAL_NETSTACKID);
                        kmem_free(ns, sizeof (netstack_t));
                        ns = *nsp;
                        mutex_enter(&ns->netstack_lock);
                        ns->netstack_numzones++;
                        mutex_exit(&ns->netstack_lock);
                        mutex_exit(&netstack_g_lock);
                        DTRACE_PROBE1(netstack__inc__numzones,
                            netstack_t *, ns);
                        /* Record that we have a new shared stack zone */
                        netstack_shared_zone_add(zoneid);
                        zone->zone_netstack = ns;
                        return (ns);
                }
        }
        /* Not found */
        mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
        cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
        ns->netstack_stackid = zoneid;
        ns->netstack_numzones = 1;
        ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
        ns->netstack_flags = NSF_UNINIT;
        *nsp = ns;
        zone->zone_netstack = ns;

        mutex_enter(&ns->netstack_lock);
        /*
         * Mark this netstack as having a CREATE running so
         * any netstack_register/netstack_unregister waits for
         * the existing create callbacks to complete in moduleid order
         */
        ns->netstack_flags |= NSF_ZONE_CREATE;

        /*
         * Determine the set of module create functions that need to be
         * called before we drop the lock.
         * Set NSS_CREATE_NEEDED for each of those.
         * Skip any with NRF_DYING set, since those are in the process of
         * going away, by checking for flags being exactly NRF_REGISTERED.
         */
        for (i = 0; i < NS_MAX; i++) {
                nm_state_t *nms = &ns->netstack_m_state[i];

                cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);

                if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
                    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
                        nms->nms_flags |= NSS_CREATE_NEEDED;
                        DTRACE_PROBE2(netstack__create__needed,
                            netstack_t *, ns, int, i);
                }
        }
        mutex_exit(&ns->netstack_lock);
        mutex_exit(&netstack_g_lock);

        apply_all_modules(ns, netstack_apply_create);

        /* Tell any waiting netstack_register/netstack_unregister to proceed */
        mutex_enter(&ns->netstack_lock);
        ns->netstack_flags &= ~NSF_UNINIT;
        ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
        ns->netstack_flags &= ~NSF_ZONE_CREATE;
        cv_broadcast(&ns->netstack_cv);
        mutex_exit(&ns->netstack_lock);

        return (ns);
}

/* ARGSUSED */
static void
netstack_zone_shutdown(zoneid_t zoneid, void *arg)
{
        netstack_t *ns = (netstack_t *)arg;
        int i;

        ASSERT(arg != NULL);

        mutex_enter(&ns->netstack_lock);
        ASSERT(ns->netstack_numzones > 0);
        if (ns->netstack_numzones != 1) {
                /* Stack instance being used by other zone */
                mutex_exit(&ns->netstack_lock);
                ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
                return;
        }
        mutex_exit(&ns->netstack_lock);

        mutex_enter(&netstack_g_lock);
        mutex_enter(&ns->netstack_lock);
        /*
         * Mark this netstack as having a SHUTDOWN running so
         * any netstack_register/netstack_unregister waits for
         * the existing create callbacks to complete in moduleid order
         */
        ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
        ns->netstack_flags |= NSF_ZONE_SHUTDOWN;

        /*
         * Determine the set of stacks that exist before we drop the lock.
         * Set NSS_SHUTDOWN_NEEDED for each of those.
         */
        for (i = 0; i < NS_MAX; i++) {
                nm_state_t *nms = &ns->netstack_m_state[i];

                if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
                    ns_reg[i].nr_shutdown != NULL &&
                    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
                    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
                        nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
                        DTRACE_PROBE2(netstack__shutdown__needed,
                            netstack_t *, ns, int, i);
                }
        }
        mutex_exit(&ns->netstack_lock);
        mutex_exit(&netstack_g_lock);

        /*
         * Call the shutdown function for all registered modules for this
         * netstack.
         */
        apply_all_modules_reverse(ns, netstack_apply_shutdown);

        /* Tell any waiting netstack_register/netstack_unregister to proceed */
        mutex_enter(&ns->netstack_lock);
        ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
        ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
        cv_broadcast(&ns->netstack_cv);
        mutex_exit(&ns->netstack_lock);
}

/*
 * Common routine to release a zone.
 * If this was the last zone using the stack instance then prepare to
 * have the refcnt dropping to zero free the zone.
 */
/* ARGSUSED */
static void
netstack_zone_destroy(zoneid_t zoneid, void *arg)
{
        netstack_t *ns = (netstack_t *)arg;

        ASSERT(arg != NULL);

        mutex_enter(&ns->netstack_lock);
        ASSERT(ns->netstack_numzones > 0);
        ns->netstack_numzones--;
        if (ns->netstack_numzones != 0) {
                /* Stack instance being used by other zone */
                mutex_exit(&ns->netstack_lock);
                ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
                /* Record that we a shared stack zone has gone away */
                netstack_shared_zone_remove(zoneid);
                return;
        }
        /*
         * Set CLOSING so that netstack_find_by will not find it.
         */
        ns->netstack_flags |= NSF_CLOSING;
        mutex_exit(&ns->netstack_lock);
        DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
        /* No other thread can call zone_destroy for this stack */

        /*
         * Decrease refcnt to account for the one in netstack_zone_init()
         */
        netstack_rele(ns);
}

/*
 * Called when the reference count drops to zero.
 * Call the destroy functions for each registered module.
 */
static void
netstack_stack_inactive(netstack_t *ns)
{
        int i;

        mutex_enter(&netstack_g_lock);
        mutex_enter(&ns->netstack_lock);
        /*
         * Mark this netstack as having a DESTROY running so
         * any netstack_register/netstack_unregister waits for
         * the existing destroy callbacks to complete in reverse moduleid order
         */
        ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
        ns->netstack_flags |= NSF_ZONE_DESTROY;
        /*
         * If the shutdown callback wasn't called earlier (e.g., if this is
         * a netstack shared between multiple zones), then we schedule it now.
         *
         * Determine the set of stacks that exist before we drop the lock.
         * Set NSS_DESTROY_NEEDED for each of those. That
         * ensures that when we return all the callbacks for existing
         * instances have completed.
         */
        for (i = 0; i < NS_MAX; i++) {
                nm_state_t *nms = &ns->netstack_m_state[i];

                if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
                    ns_reg[i].nr_shutdown != NULL &&
                    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
                    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
                        nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
                        DTRACE_PROBE2(netstack__shutdown__needed,
                            netstack_t *, ns, int, i);
                }

                if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
                    ns_reg[i].nr_destroy != NULL &&
                    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
                    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
                        nms->nms_flags |= NSS_DESTROY_NEEDED;
                        DTRACE_PROBE2(netstack__destroy__needed,
                            netstack_t *, ns, int, i);
                }
        }
        mutex_exit(&ns->netstack_lock);
        mutex_exit(&netstack_g_lock);

        /*
         * Call the shutdown and destroy functions for all registered modules
         * for this netstack.
         *
         * Since there are some ordering dependencies between the modules we
         * tear them down in the reverse order of what was used to create them.
         *
         * Since a netstack_t is never reused (when a zone is rebooted it gets
         * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
         * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
         * That is different than in the netstack_unregister() case.
         */
        apply_all_modules_reverse(ns, netstack_apply_shutdown);
        apply_all_modules_reverse(ns, netstack_apply_destroy);

        /* Tell any waiting netstack_register/netstack_unregister to proceed */
        mutex_enter(&ns->netstack_lock);
        ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
        ns->netstack_flags &= ~NSF_ZONE_DESTROY;
        cv_broadcast(&ns->netstack_cv);
        mutex_exit(&ns->netstack_lock);
}

/*
 * Apply a function to all netstacks for a particular moduleid.
 *
 * If there is any zone activity (due to a zone being created, shutdown,
 * or destroyed) we wait for that to complete before we proceed. This ensures
 * that the moduleids are processed in order when a zone is created or
 * destroyed.
 *
 * The applyfn has to drop netstack_g_lock if it does some work.
 * In that case we don't follow netstack_next,
 * even if it is possible to do so without any hazards. This is
 * because we want the design to allow for the list of netstacks threaded
 * by netstack_next to change in any arbitrary way during the time the
 * lock was dropped.
 *
 * It is safe to restart the loop at netstack_head since the applyfn
 * changes netstack_m_state as it processes things, so a subsequent
 * pass through will have no effect in applyfn, hence the loop will terminate
 * in at worst O(N^2).
 */
static void
apply_all_netstacks(int moduleid, applyfn_t *applyfn)
{
        netstack_t *ns;

        mutex_enter(&netstack_g_lock);
        ns = netstack_head;
        while (ns != NULL) {
                if (wait_for_zone_creator(ns, &netstack_g_lock)) {
                        /* Lock dropped - restart at head */
                        ns = netstack_head;
                } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
                        /* Lock dropped - restart at head */
                        ns = netstack_head;
                } else {
                        ns = ns->netstack_next;
                }
        }
        mutex_exit(&netstack_g_lock);
}

/*
 * Apply a function to all moduleids for a particular netstack.
 *
 * Since the netstack linkage doesn't matter in this case we can
 * ignore whether the function drops the lock.
 */
static void
apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
{
        int i;

        mutex_enter(&netstack_g_lock);
        for (i = 0; i < NS_MAX; i++) {
                /*
                 * We don't care whether the lock was dropped
                 * since we are not iterating over netstack_head.
                 */
                (void) (applyfn)(&netstack_g_lock, ns, i);
        }
        mutex_exit(&netstack_g_lock);
}

/* Like the above but in reverse moduleid order */
static void
apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
{
        int i;

        mutex_enter(&netstack_g_lock);
        for (i = NS_MAX-1; i >= 0; i--) {
                /*
                 * We don't care whether the lock was dropped
                 * since we are not iterating over netstack_head.
                 */
                (void) (applyfn)(&netstack_g_lock, ns, i);
        }
        mutex_exit(&netstack_g_lock);
}

/*
 * Call the create function for the ns and moduleid if CREATE_NEEDED
 * is set.
 * If some other thread gets here first and sets *_INPROGRESS, then
 * we wait for that thread to complete so that we can ensure that
 * all the callbacks are done when we've looped over all netstacks/moduleids.
 *
 * When we call the create function, we temporarily drop the netstack_lock
 * held by the caller, and return true to tell the caller it needs to
 * re-evalute the state.
 */
static boolean_t
netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
{
        void *result;
        netstackid_t stackid;
        nm_state_t *nms = &ns->netstack_m_state[moduleid];
        boolean_t dropped = B_FALSE;

        ASSERT(MUTEX_HELD(lockp));
        mutex_enter(&ns->netstack_lock);

        if (wait_for_nms_inprogress(ns, nms, lockp))
                dropped = B_TRUE;

        if (nms->nms_flags & NSS_CREATE_NEEDED) {
                nms->nms_flags &= ~NSS_CREATE_NEEDED;
                nms->nms_flags |= NSS_CREATE_INPROGRESS;
                DTRACE_PROBE2(netstack__create__inprogress,
                    netstack_t *, ns, int, moduleid);
                mutex_exit(&ns->netstack_lock);
                mutex_exit(lockp);
                dropped = B_TRUE;

                ASSERT(ns_reg[moduleid].nr_create != NULL);
                stackid = ns->netstack_stackid;
                DTRACE_PROBE2(netstack__create__start,
                    netstackid_t, stackid,
                    netstack_t *, ns);
                result = (ns_reg[moduleid].nr_create)(stackid, ns);
                DTRACE_PROBE2(netstack__create__end,
                    void *, result, netstack_t *, ns);

                ASSERT(result != NULL);
                mutex_enter(lockp);
                mutex_enter(&ns->netstack_lock);
                ns->netstack_modules[moduleid] = result;
                nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
                nms->nms_flags |= NSS_CREATE_COMPLETED;
                cv_broadcast(&nms->nms_cv);
                DTRACE_PROBE2(netstack__create__completed,
                    netstack_t *, ns, int, moduleid);
                mutex_exit(&ns->netstack_lock);
                return (dropped);
        } else {
                mutex_exit(&ns->netstack_lock);
                return (dropped);
        }
}

/*
 * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
 * is set.
 * If some other thread gets here first and sets *_INPROGRESS, then
 * we wait for that thread to complete so that we can ensure that
 * all the callbacks are done when we've looped over all netstacks/moduleids.
 *
 * When we call the shutdown function, we temporarily drop the netstack_lock
 * held by the caller, and return true to tell the caller it needs to
 * re-evalute the state.
 */
static boolean_t
netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
{
        netstackid_t stackid;
        void * netstack_module;
        nm_state_t *nms = &ns->netstack_m_state[moduleid];
        boolean_t dropped = B_FALSE;

        ASSERT(MUTEX_HELD(lockp));
        mutex_enter(&ns->netstack_lock);

        if (wait_for_nms_inprogress(ns, nms, lockp))
                dropped = B_TRUE;

        if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
                nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
                nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
                DTRACE_PROBE2(netstack__shutdown__inprogress,
                    netstack_t *, ns, int, moduleid);
                mutex_exit(&ns->netstack_lock);
                mutex_exit(lockp);
                dropped = B_TRUE;

                ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
                stackid = ns->netstack_stackid;
                netstack_module = ns->netstack_modules[moduleid];
                DTRACE_PROBE2(netstack__shutdown__start,
                    netstackid_t, stackid,
                    void *, netstack_module);
                (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
                DTRACE_PROBE1(netstack__shutdown__end,
                    netstack_t *, ns);

                mutex_enter(lockp);
                mutex_enter(&ns->netstack_lock);
                nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
                nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
                cv_broadcast(&nms->nms_cv);
                DTRACE_PROBE2(netstack__shutdown__completed,
                    netstack_t *, ns, int, moduleid);
                mutex_exit(&ns->netstack_lock);
                return (dropped);
        } else {
                mutex_exit(&ns->netstack_lock);
                return (dropped);
        }
}

/*
 * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
 * is set.
 * If some other thread gets here first and sets *_INPROGRESS, then
 * we wait for that thread to complete so that we can ensure that
 * all the callbacks are done when we've looped over all netstacks/moduleids.
 *
 * When we call the destroy function, we temporarily drop the netstack_lock
 * held by the caller, and return true to tell the caller it needs to
 * re-evalute the state.
 */
static boolean_t
netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
{
        netstackid_t stackid;
        void * netstack_module;
        nm_state_t *nms = &ns->netstack_m_state[moduleid];
        boolean_t dropped = B_FALSE;

        ASSERT(MUTEX_HELD(lockp));
        mutex_enter(&ns->netstack_lock);

        if (wait_for_nms_inprogress(ns, nms, lockp))
                dropped = B_TRUE;

        if (nms->nms_flags & NSS_DESTROY_NEEDED) {
                nms->nms_flags &= ~NSS_DESTROY_NEEDED;
                nms->nms_flags |= NSS_DESTROY_INPROGRESS;
                DTRACE_PROBE2(netstack__destroy__inprogress,
                    netstack_t *, ns, int, moduleid);
                mutex_exit(&ns->netstack_lock);
                mutex_exit(lockp);
                dropped = B_TRUE;

                ASSERT(ns_reg[moduleid].nr_destroy != NULL);
                stackid = ns->netstack_stackid;
                netstack_module = ns->netstack_modules[moduleid];
                DTRACE_PROBE2(netstack__destroy__start,
                    netstackid_t, stackid,
                    void *, netstack_module);
                (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
                DTRACE_PROBE1(netstack__destroy__end,
                    netstack_t *, ns);

                mutex_enter(lockp);
                mutex_enter(&ns->netstack_lock);
                ns->netstack_modules[moduleid] = NULL;
                nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
                nms->nms_flags |= NSS_DESTROY_COMPLETED;
                cv_broadcast(&nms->nms_cv);
                DTRACE_PROBE2(netstack__destroy__completed,
                    netstack_t *, ns, int, moduleid);
                mutex_exit(&ns->netstack_lock);
                return (dropped);
        } else {
                mutex_exit(&ns->netstack_lock);
                return (dropped);
        }
}

/*
 * If somebody  is creating the netstack (due to a new zone being created)
 * then we wait for them to complete. This ensures that any additional
 * netstack_register() doesn't cause the create functions to run out of
 * order.
 * Note that we do not need such a global wait in the case of the shutdown
 * and destroy callbacks, since in that case it is sufficient for both
 * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
 * Returns true if lockp was temporarily dropped while waiting.
 */
static boolean_t
wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
{
        boolean_t dropped = B_FALSE;

        mutex_enter(&ns->netstack_lock);
        while (ns->netstack_flags & NSF_ZONE_CREATE) {
                DTRACE_PROBE1(netstack__wait__zone__inprogress,
                    netstack_t *, ns);
                if (lockp != NULL) {
                        dropped = B_TRUE;
                        mutex_exit(lockp);
                }
                cv_wait(&ns->netstack_cv, &ns->netstack_lock);
                if (lockp != NULL) {
                        /* First drop netstack_lock to preserve order */
                        mutex_exit(&ns->netstack_lock);
                        mutex_enter(lockp);
                        mutex_enter(&ns->netstack_lock);
                }
        }
        mutex_exit(&ns->netstack_lock);
        return (dropped);
}

/*
 * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
 * combination.
 * Returns true if lockp was temporarily dropped while waiting.
 */
static boolean_t
wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
{
        boolean_t dropped = B_FALSE;

        while (nms->nms_flags & NSS_ALL_INPROGRESS) {
                DTRACE_PROBE2(netstack__wait__nms__inprogress,
                    netstack_t *, ns, nm_state_t *, nms);
                if (lockp != NULL) {
                        dropped = B_TRUE;
                        mutex_exit(lockp);
                }
                cv_wait(&nms->nms_cv, &ns->netstack_lock);
                if (lockp != NULL) {
                        /* First drop netstack_lock to preserve order */
                        mutex_exit(&ns->netstack_lock);
                        mutex_enter(lockp);
                        mutex_enter(&ns->netstack_lock);
                }
        }
        return (dropped);
}

/*
 * Get the stack instance used in caller's zone.
 * Increases the reference count, caller must do a netstack_rele.
 * It can't be called after zone_destroy() has started.
 */
netstack_t *
netstack_get_current(void)
{
        netstack_t *ns;

        ns = curproc->p_zone->zone_netstack;
        ASSERT(ns != NULL);
        return (netstack_hold_if_active(ns));
}

/*
 * Find a stack instance given the cred.
 * This is used by the modules to potentially allow for a future when
 * something other than the zoneid is used to determine the stack.
 */
netstack_t *
netstack_find_by_cred(const cred_t *cr)
{
        zoneid_t zoneid = crgetzoneid(cr);

        /* Handle the case when cr_zone is NULL */
        if (zoneid == (zoneid_t)-1)
                zoneid = GLOBAL_ZONEID;

        /* For performance ... */
        if (curproc->p_zone->zone_id == zoneid)
                return (netstack_get_current());
        else
                return (netstack_find_by_zoneid(zoneid));
}

/*
 * Find a stack instance given the zoneid.
 * Increases the reference count if found; caller must do a
 * netstack_rele().
 *
 * If there is no exact match then assume the shared stack instance
 * matches.
 *
 * Skip the uninitialized and closing ones.
 */
netstack_t *
netstack_find_by_zoneid(zoneid_t zoneid)
{
        netstack_t *ns;
        zone_t *zone;

        zone = zone_find_by_id(zoneid);

        if (zone == NULL)
                return (NULL);

        ASSERT(zone->zone_netstack != NULL);
        ns = netstack_hold_if_active(zone->zone_netstack);

        zone_rele(zone);
        return (ns);
}

/*
 * Find a stack instance given the zoneid. Can only be called from
 * the create callback. See the comments in zone_find_by_id_nolock why
 * that limitation exists.
 *
 * Increases the reference count if found; caller must do a
 * netstack_rele().
 *
 * If there is no exact match then assume the shared stack instance
 * matches.
 *
 * Skip the unitialized ones.
 */
netstack_t *
netstack_find_by_zoneid_nolock(zoneid_t zoneid)
{
        zone_t *zone;

        zone = zone_find_by_id_nolock(zoneid);

        if (zone == NULL)
                return (NULL);

        ASSERT(zone->zone_netstack != NULL);
        /* zone_find_by_id_nolock does not have a hold on the zone */
        return (netstack_hold_if_active(zone->zone_netstack));
}

/*
 * Find a stack instance given the stackid with exact match?
 * Increases the reference count if found; caller must do a
 * netstack_rele().
 *
 * Skip the unitialized ones.
 */
netstack_t *
netstack_find_by_stackid(netstackid_t stackid)
{
        netstack_t *ns;

        mutex_enter(&netstack_g_lock);
        for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
                /* Can't use hold_if_active because of stackid check. */
                mutex_enter(&ns->netstack_lock);
                if (ns->netstack_stackid == stackid &&
                    !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
                        netstack_hold_locked(ns);
                        mutex_exit(&ns->netstack_lock);
                        mutex_exit(&netstack_g_lock);
                        return (ns);
                }
                mutex_exit(&ns->netstack_lock);
        }
        mutex_exit(&netstack_g_lock);
        return (NULL);
}

boolean_t
netstack_inuse_by_stackid(netstackid_t stackid)
{
        netstack_t *ns;
        boolean_t rval = B_FALSE;

        mutex_enter(&netstack_g_lock);

        for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
                if (ns->netstack_stackid == stackid) {
                        rval = B_TRUE;
                        break;
                }
        }

        mutex_exit(&netstack_g_lock);

        return (rval);
}


static void
netstack_reap(void *arg)
{
        netstack_t **nsp, *ns = (netstack_t *)arg;
        boolean_t found;
        int i;

        /*
         * Time to call the destroy functions and free up
         * the structure
         */
        netstack_stack_inactive(ns);

        /* Make sure nothing increased the references */
        ASSERT(ns->netstack_refcnt == 0);
        ASSERT(ns->netstack_numzones == 0);

        /* Finally remove from list of netstacks */
        mutex_enter(&netstack_g_lock);
        found = B_FALSE;
        for (nsp = &netstack_head; *nsp != NULL;
            nsp = &(*nsp)->netstack_next) {
                if (*nsp == ns) {
                        *nsp = ns->netstack_next;
                        ns->netstack_next = NULL;
                        found = B_TRUE;
                        break;
                }
        }
        ASSERT(found);
        mutex_exit(&netstack_g_lock);

        /* Make sure nothing increased the references */
        ASSERT(ns->netstack_refcnt == 0);
        ASSERT(ns->netstack_numzones == 0);

        ASSERT(ns->netstack_flags & NSF_CLOSING);

        for (i = 0; i < NS_MAX; i++) {
                nm_state_t *nms = &ns->netstack_m_state[i];

                cv_destroy(&nms->nms_cv);
        }
        mutex_destroy(&ns->netstack_lock);
        cv_destroy(&ns->netstack_cv);
        kmem_free(ns, sizeof (*ns));
        /* Allow another reap to be scheduled. */
        sema_v(&netstack_reap_limiter);
}

void
netstack_rele(netstack_t *ns)
{
        int refcnt, numzones;

        mutex_enter(&ns->netstack_lock);
        ASSERT(ns->netstack_refcnt > 0);
        ns->netstack_refcnt--;
        /*
         * As we drop the lock additional netstack_rele()s can come in
         * and decrement the refcnt to zero and free the netstack_t.
         * Store pointers in local variables and if we were not the last
         * then don't reference the netstack_t after that.
         */
        refcnt = ns->netstack_refcnt;
        numzones = ns->netstack_numzones;
        DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
        mutex_exit(&ns->netstack_lock);

        if (refcnt == 0 && numzones == 0) {
                /*
                 * Because there are possibilities of re-entrancy in various
                 * netstack structures by callers, which might cause a lock up
                 * due to odd reference models, or other factors, we choose to
                 * schedule the actual deletion of this netstack as a deferred
                 * task on the system taskq.  This way, any such reference
                 * models won't trip over themselves.
                 *
                 * Assume we aren't in a high-priority interrupt context, so
                 * we can use KM_SLEEP and semaphores.
                 */
                if (sema_tryp(&netstack_reap_limiter) == 0) {
                        /*
                         * Indicate we're slamming against a limit.
                         */
                        hrtime_t measurement = gethrtime();

                        sema_p(&netstack_reap_limiter);
                        /* Capture delay in ns. */
                        DTRACE_PROBE1(netstack__reap__rate__limited,
                            hrtime_t, gethrtime() - measurement);
                }

                /* TQ_SLEEP should prevent taskq_dispatch() from failing. */
                (void) taskq_dispatch(system_taskq, netstack_reap, ns,
                    TQ_SLEEP);
        }
}

static void
netstack_hold_locked(netstack_t *ns)
{
        ASSERT(MUTEX_HELD(&ns->netstack_lock));
        ns->netstack_refcnt++;
        ASSERT(ns->netstack_refcnt > 0);
        DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
}

/*
 * If the passed-in netstack isn't active (i.e. it's uninitialized or closing),
 * return NULL, otherwise return it with its reference held.  Common code
 * for many netstack_find*() functions.
 */
netstack_t *
netstack_hold_if_active(netstack_t *ns)
{
        netstack_t *retval;

        mutex_enter(&ns->netstack_lock);
        if (ns->netstack_flags & (NSF_UNINIT | NSF_CLOSING)) {
                retval = NULL;
        } else {
                netstack_hold_locked(ns);
                retval = ns;
        }
        mutex_exit(&ns->netstack_lock);

        return (retval);
}

void
netstack_hold(netstack_t *ns)
{
        mutex_enter(&ns->netstack_lock);
        netstack_hold_locked(ns);
        mutex_exit(&ns->netstack_lock);
}

/*
 * To support kstat_create_netstack() using kstat_zone_add we need
 * to track both
 *  - all zoneids that use the global/shared stack
 *  - all kstats that have been added for the shared stack
 */
kstat_t *
kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
    char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
    netstackid_t ks_netstackid)
{
        kstat_t *ks;

        if (ks_netstackid == GLOBAL_NETSTACKID) {
                ks = kstat_create_zone(ks_module, ks_instance, ks_name,
                    ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
                if (ks != NULL)
                        netstack_shared_kstat_add(ks);
                return (ks);
        } else {
                zoneid_t zoneid = ks_netstackid;

                return (kstat_create_zone(ks_module, ks_instance, ks_name,
                    ks_class, ks_type, ks_ndata, ks_flags, zoneid));
        }
}

void
kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
{
        if (ks_netstackid == GLOBAL_NETSTACKID) {
                netstack_shared_kstat_remove(ks);
        }
        kstat_delete(ks);
}

static void
netstack_shared_zone_add(zoneid_t zoneid)
{
        struct shared_zone_list *sz;
        struct shared_kstat_list *sk;

        sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
        sz->sz_zoneid = zoneid;

        /* Insert in list */
        mutex_enter(&netstack_shared_lock);
        sz->sz_next = netstack_shared_zones;
        netstack_shared_zones = sz;

        /*
         * Perform kstat_zone_add for each existing shared stack kstat.
         * Note: Holds netstack_shared_lock lock across kstat_zone_add.
         */
        for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
                kstat_zone_add(sk->sk_kstat, zoneid);
        }
        mutex_exit(&netstack_shared_lock);
}

static void
netstack_shared_zone_remove(zoneid_t zoneid)
{
        struct shared_zone_list **szp, *sz;
        struct shared_kstat_list *sk;

        /* Find in list */
        mutex_enter(&netstack_shared_lock);
        sz = NULL;
        for (szp = &netstack_shared_zones; *szp != NULL;
            szp = &((*szp)->sz_next)) {
                if ((*szp)->sz_zoneid == zoneid) {
                        sz = *szp;
                        break;
                }
        }
        /* We must find it */
        ASSERT(sz != NULL);
        *szp = sz->sz_next;
        sz->sz_next = NULL;

        /*
         * Perform kstat_zone_remove for each existing shared stack kstat.
         * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
         */
        for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
                kstat_zone_remove(sk->sk_kstat, zoneid);
        }
        mutex_exit(&netstack_shared_lock);

        kmem_free(sz, sizeof (*sz));
}

static void
netstack_shared_kstat_add(kstat_t *ks)
{
        struct shared_zone_list *sz;
        struct shared_kstat_list *sk;

        sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
        sk->sk_kstat = ks;

        /* Insert in list */
        mutex_enter(&netstack_shared_lock);
        sk->sk_next = netstack_shared_kstats;
        netstack_shared_kstats = sk;

        /*
         * Perform kstat_zone_add for each existing shared stack zone.
         * Note: Holds netstack_shared_lock lock across kstat_zone_add.
         */
        for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
                kstat_zone_add(ks, sz->sz_zoneid);
        }
        mutex_exit(&netstack_shared_lock);
}

static void
netstack_shared_kstat_remove(kstat_t *ks)
{
        struct shared_zone_list *sz;
        struct shared_kstat_list **skp, *sk;

        /* Find in list */
        mutex_enter(&netstack_shared_lock);
        sk = NULL;
        for (skp = &netstack_shared_kstats; *skp != NULL;
            skp = &((*skp)->sk_next)) {
                if ((*skp)->sk_kstat == ks) {
                        sk = *skp;
                        break;
                }
        }
        /* Must find it */
        ASSERT(sk != NULL);
        *skp = sk->sk_next;
        sk->sk_next = NULL;

        /*
         * Perform kstat_zone_remove for each existing shared stack kstat.
         * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
         */
        for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
                kstat_zone_remove(ks, sz->sz_zoneid);
        }
        mutex_exit(&netstack_shared_lock);
        kmem_free(sk, sizeof (*sk));
}

/*
 * If a zoneid is part of the shared zone, return true
 */
static boolean_t
netstack_find_shared_zoneid(zoneid_t zoneid)
{
        struct shared_zone_list *sz;

        mutex_enter(&netstack_shared_lock);
        for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
                if (sz->sz_zoneid == zoneid) {
                        mutex_exit(&netstack_shared_lock);
                        return (B_TRUE);
                }
        }
        mutex_exit(&netstack_shared_lock);
        return (B_FALSE);
}

/*
 * Hide the fact that zoneids and netstackids are allocated from
 * the same space in the current implementation.
 * We currently do not check that the stackid/zoneids are valid, since there
 * is no need for that. But this should only be done for ids that are
 * valid.
 */
zoneid_t
netstackid_to_zoneid(netstackid_t stackid)
{
        return (stackid);
}

netstackid_t
zoneid_to_netstackid(zoneid_t zoneid)
{
        if (netstack_find_shared_zoneid(zoneid))
                return (GLOBAL_ZONEID);
        else
                return (zoneid);
}

zoneid_t
netstack_get_zoneid(netstack_t *ns)
{
        return (netstackid_to_zoneid(ns->netstack_stackid));
}

/*
 * Simplistic support for walking all the handles.
 * Example usage:
 *      netstack_handle_t nh;
 *      netstack_t *ns;
 *
 *      netstack_next_init(&nh);
 *      while ((ns = netstack_next(&nh)) != NULL) {
 *              do something;
 *              netstack_rele(ns);
 *      }
 *      netstack_next_fini(&nh);
 */
void
netstack_next_init(netstack_handle_t *handle)
{
        *handle = 0;
}

/* ARGSUSED */
void
netstack_next_fini(netstack_handle_t *handle)
{
}

netstack_t *
netstack_next(netstack_handle_t *handle)
{
        netstack_t *ns;
        int i, end;

        end = *handle;
        /* Walk skipping *handle number of instances */

        /* Look if there is a matching stack instance */
        mutex_enter(&netstack_g_lock);
        ns = netstack_head;
        for (i = 0; i < end; i++) {
                if (ns == NULL)
                        break;
                ns = ns->netstack_next;
        }
        /*
         * Skip those that aren't really here (uninitialized or closing).
         * Can't use hold_if_active because of "end" tracking.
         */
        while (ns != NULL) {
                mutex_enter(&ns->netstack_lock);
                if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
                        *handle = end + 1;
                        netstack_hold_locked(ns);
                        mutex_exit(&ns->netstack_lock);
                        break;
                }
                mutex_exit(&ns->netstack_lock);
                end++;
                ns = ns->netstack_next;
        }
        mutex_exit(&netstack_g_lock);
        return (ns);
}