root/usr/src/uts/common/io/nvme/nvme_lock.c
/*
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 */

/*
 * Copyright 2025 Oxide Computer Company
 */

/*
 * This implements the general locking routines. See the big theory section
 * 'ioctls, Errors, and Exclusive Access' for more information.
 */

#include <sys/stddef.h>
#include <sys/nvme.h>

#include "nvme_reg.h"
#include "nvme_var.h"

/*
 * Do we have a writer or someone pending. Note, some cases require checking
 * both of these and others do not. Please see each individual check for the
 * nuance here. As a general rule of thumb, when locking, the pending writers
 * are important. However, when passing the lock on to the next owner (the
 * handoff functions below), one doesn't check it.
 */
static boolean_t
nvme_rwlock_wr_or_pend(nvme_lock_t *lock)
{
        return (lock->nl_writer != NULL ||
            list_is_empty(&lock->nl_pend_writers) == 0);
}

/*
 * Taking a namespace read lock requires that there is no writer (or pending) on
 * the controller and the namespace.
 */
static boolean_t
nvme_rwlock_block_ns_rdlock(nvme_t *nvme, nvme_namespace_t *ns)
{
        return (nvme_rwlock_wr_or_pend(&nvme->n_lock) ||
            nvme_rwlock_wr_or_pend(&ns->ns_lock));
}

/*
 * The following entities all block a namespace write lock from being taken:
 *
 * 1) Any active or pending writer on the controller lock. They block and starve
 *    namespace writers respectively.
 * 2) Any active or pending writers on the namespace lock. We must wait in line.
 * 3) Any active readers on the namespace lock. We ignore pending namespace
 *    readers as by definition that implies some other situation will cause
 *    this.
 */
static boolean_t
nvme_rwlock_block_ns_wrlock(nvme_t *nvme, nvme_namespace_t *ns)
{
        return (nvme_rwlock_wr_or_pend(&nvme->n_lock) ||
            nvme_rwlock_wr_or_pend(&ns->ns_lock) ||
            list_is_empty(&ns->ns_lock.nl_readers) == 0);
}

/*
 * The only thing that blocks acquisition of a controller read lock is if
 * there are outstanding or pending writers on the controller lock. We can
 * ignore the state of all namespaces here.
 */
static boolean_t
nvme_rwlock_block_ctrl_rdlock(nvme_t *nvme)
{
        return (nvme_rwlock_wr_or_pend(&nvme->n_lock));
}

/*
 * Taking the controller write lock is the most challenging of all, but also
 * takes priority. The following all block a controller write lock from being
 * taken:
 *
 * 1) Any controller write lock or pending write
 * 2) Any controller read lock. We skip pending reads because if they exist,
 *    some other situation causes that that will trip us.
 * 3) Any namespace having a write lock. We ignore pending writes because by
 *    definition there is some condition that causes that to be the case.
 * 4) Any read lock on a namespace. We ignore pending reads like in the
 *    controller case.
 */
static boolean_t
nvme_rwlock_block_ctrl_wrlock(nvme_t *nvme)
{
        if (nvme_rwlock_wr_or_pend(&nvme->n_lock) ||
            list_is_empty(&nvme->n_lock.nl_readers) == 0) {
                return (B_TRUE);
        }

        for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) {
                nvme_namespace_t *ns = nvme_nsid2ns(nvme, i);
                if (ns->ns_lock.nl_writer != NULL ||
                    list_is_empty(&ns->ns_lock.nl_readers) == 0) {
                        return (B_TRUE);
                }
        }

        return (B_FALSE);
}

/*
 * Answer can we hand off the world to a pending controller write lock. This has
 * similar rules to the above; however, we critically _ignore_ pending
 * controller write lock holds, as the assumption is that they are here, so the
 * only consideration from above are controller reader locks and namespace
 * locks.
 */
static boolean_t
nvme_rwlock_handoff_ctrl_wrlock(nvme_t *nvme)
{
        /* See nvme_rwlock_wakeup() for on why this can be done. */
        ASSERT3P(nvme->n_lock.nl_writer, ==, NULL);

        if (list_is_empty(&nvme->n_lock.nl_readers) == 0) {
                return (B_FALSE);
        }

        for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) {
                nvme_namespace_t *ns = nvme_nsid2ns(nvme, i);
                if (ns->ns_lock.nl_writer != NULL ||
                    list_is_empty(&ns->ns_lock.nl_readers) == 0) {
                        return (B_FALSE);
                }
        }

        return (B_TRUE);
}

/*
 * Namespace handoff variant. It skips pending writers on the namespace lock,
 * but fully considers them on the controller due to their priority. Otherwise
 * this follows the same rules as the normal blocking check.
 */
static boolean_t
nvme_rwlock_handoff_ns_wrlock(nvme_t *nvme, nvme_namespace_t *ns)
{
        if (nvme_rwlock_wr_or_pend(&nvme->n_lock) ||
            list_is_empty(&nvme->n_lock.nl_readers) == 0) {
                return (B_FALSE);
        }

        if (ns->ns_lock.nl_writer != NULL ||
            list_is_empty(&ns->ns_lock.nl_readers) == 0) {
                return (B_FALSE);
        }

        return (B_TRUE);
}

static void
nvme_rwlock_rdlock(nvme_minor_lock_info_t *info, nvme_lock_t *lock)
{
        ASSERT3U(list_is_empty(&lock->nl_pend_writers), !=, 0);
        ASSERT3P(lock->nl_writer, ==, NULL);
        ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_UNLOCKED);
        ASSERT3U(list_link_active(&info->nli_node), ==, 0);
        ASSERT3P(info->nli_minor, !=, NULL);
        ASSERT3P(info->nli_nvme, !=, NULL);
        ASSERT3U(info->nli_curlevel, ==, NVME_LOCK_L_READ);

        info->nli_state = NVME_LOCK_STATE_ACQUIRED;
        info->nli_last_change = gethrtime();
        info->nli_acq_kthread = (uintptr_t)curthread;
        info->nli_acq_pid = (uint32_t)curproc->p_pid;

        list_insert_tail(&lock->nl_readers, info);
        lock->nl_nread_locks++;
}

static void
nvme_rwlock_wrlock(nvme_minor_lock_info_t *info, nvme_lock_t *lock)
{
        ASSERT3P(lock->nl_writer, ==, NULL);
        ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_UNLOCKED);
        ASSERT3U(list_link_active(&info->nli_node), ==, 0);
        ASSERT3P(info->nli_minor, !=, NULL);
        ASSERT3P(info->nli_nvme, !=, NULL);

        info->nli_state = NVME_LOCK_STATE_ACQUIRED;
        info->nli_curlevel = NVME_LOCK_L_WRITE;
        info->nli_last_change = gethrtime();
        info->nli_acq_kthread = (uintptr_t)curthread;
        info->nli_acq_pid = (uint32_t)curproc->p_pid;

        lock->nl_writer = info;
        lock->nl_nwrite_locks++;
}

#ifdef  DEBUG
/*
 * This is just a sanity check for our lock logic.
 */
static boolean_t
nvme_rwlock_is_reader(nvme_lock_t *lock, const nvme_minor_lock_info_t *info)
{
        for (nvme_minor_lock_info_t *i = list_head(&lock->nl_readers);
            i != NULL; i = list_next(&lock->nl_readers, i)) {
                if (i == info) {
                        return (B_TRUE);
                }
        }
        return (B_FALSE);
}
#endif

static void
nvme_rwlock_signal_one(nvme_minor_lock_info_t *info, nvme_ioctl_errno_t err)
{
        ASSERT3P(info->nli_ioc, !=, NULL);
        ASSERT3P(info->nli_minor, !=, NULL);
        ASSERT3P(info->nli_state, !=, NVME_LOCK_STATE_BLOCKED);

        if (err == NVME_IOCTL_E_OK) {
                nvme_ioctl_success(info->nli_ioc);
        } else {
                (void) nvme_ioctl_error(info->nli_ioc, err, 0, 0);
        }

        cv_signal(&info->nli_minor->nm_cv);
}

static void
nvme_rwlock_wakeup_readers(nvme_lock_t *lock)
{
        nvme_minor_lock_info_t *info;

        if (list_is_empty(&lock->nl_pend_readers) != 0) {
                return;
        }

        ASSERT3U(list_is_empty(&lock->nl_readers), !=, 0);
        ASSERT3P(lock->nl_writer, ==, NULL);
        ASSERT3U(list_is_empty(&lock->nl_pend_writers), !=, 0);
        while ((info = list_remove_head(&lock->nl_pend_readers)) != NULL) {
                info->nli_state = NVME_LOCK_STATE_UNLOCKED;
                nvme_rwlock_rdlock(info, lock);
                nvme_rwlock_signal_one(info, NVME_IOCTL_E_OK);
        }
}

/*
 * An unlock occurred somewhere. We need to evaluate the total state of the
 * world. An unlock of a namespace can allow a controller lock to proceed. On
 * the other hand, dropping the controller write lock allows every namespace to
 * proceed. While we know the context of where the unlock occurred, it's simpler
 * right now to just allow everything to continue. This is somewhat expensive,
 * but this can be sped up with more cached information when it's justified. We
 * process things in the following order:
 *
 * 1) Evaluate if someone can now take a controller write lock. If so, wake up
 * the head of the list and then all subsequent processing is done.
 * 2) Evaluate if there are pending readers for the controller. If so, wake up
 * each and every waiter. Always continue to namespaces in this case.
 *
 * For each namespace:
 *
 * 1) Evaluate if there are pending writers and they can take the write lock. If
 * so, wake up the head of the list.  If so, continue to the next namespace.
 * 2) Otherwise, if there are pending readers. If so, wake up each and every
 * reader. Continue onto the next namespace.
 */
static void
nvme_rwlock_wakeup(nvme_t *nvme)
{
        nvme_lock_t *ctrl_lock = &nvme->n_lock;

        /*
         * This assertion may seem weird, but it's actually a bit of an
         * invariant. When the controller's write lock is taken, by definition
         * there are no other locks that can be taken. Therefore if we were
         * somehow unable to unlock a lock on this controller, then we'd be
         * violating our rules.
         */
        VERIFY3P(ctrl_lock->nl_writer, ==, NULL);

        /*
         * If there are pending writers, either one of them will be woken up or
         * no one will. Writers trump readers, but it's possible that we may not
         * be able to wake up a waiting writer yet. If we take this arm, we
         * should not process anything else. The same logic applies in the
         * namespace case as well.
         */
        if (list_is_empty(&ctrl_lock->nl_pend_writers) == 0) {
                nvme_minor_lock_info_t *info;

                if (!nvme_rwlock_handoff_ctrl_wrlock(nvme))
                        return;

                /*
                 * We opt to indicate that this is unlocked ahead of
                 * taking the lock for state tracking purposes.
                 */
                info = list_remove_head(&ctrl_lock->nl_pend_writers);
                info->nli_state = NVME_LOCK_STATE_UNLOCKED;
                nvme_rwlock_wrlock(info, ctrl_lock);
                nvme_rwlock_signal_one(info, NVME_IOCTL_E_OK);
                return;
        }

        nvme_rwlock_wakeup_readers(ctrl_lock);
        for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) {
                nvme_namespace_t *ns = nvme_nsid2ns(nvme, i);
                nvme_lock_t *ns_lock = &ns->ns_lock;

                if (list_is_empty(&ns_lock->nl_pend_writers) == 0) {
                        nvme_minor_lock_info_t *info;

                        if (!nvme_rwlock_handoff_ns_wrlock(nvme, ns))
                                continue;

                        info = list_remove_head(&ns_lock->nl_pend_writers);
                        info->nli_state = NVME_LOCK_STATE_UNLOCKED;
                        nvme_rwlock_wrlock(info, ns_lock);
                        nvme_rwlock_signal_one(info, NVME_IOCTL_E_OK);
                } else {
                        nvme_rwlock_wakeup_readers(ns_lock);
                }
        }
}

/*
 * This cleans up all the state in the minor for returning without a lock held.
 */
static void
nvme_rwunlock_cleanup_minor(nvme_minor_lock_info_t *info)
{
        info->nli_lock = NULL;
        info->nli_state = NVME_LOCK_STATE_UNLOCKED;
        info->nli_curlevel = 0;
        info->nli_ns = NULL;
}

/*
 * We've been asked to unlock a lock. Not only must we remove our hold from this
 * lock, we must go through and wake up the next waiter. The waiters that we
 * have to wake up vary depending on our lock. See section 'ioctls, Errors, and
 * Exclusive Access' in the theory statement for more information.
 */

void
nvme_rwunlock(nvme_minor_lock_info_t *info, nvme_lock_t *lock)
{
        nvme_t *const nvme = info->nli_nvme;
        boolean_t is_read;

        VERIFY(MUTEX_HELD(&nvme->n_minor_mutex));
        VERIFY3P(info->nli_lock, ==, lock);
        VERIFY(info->nli_curlevel == NVME_LOCK_L_READ ||
            info->nli_curlevel == NVME_LOCK_L_WRITE);
        is_read = info->nli_curlevel == NVME_LOCK_L_READ;

        /*
         * First we need to remove this minor from the lock and clean up all of
         * the state this lock in the info structure.
         */
        info->nli_last_change = gethrtime();
        if (is_read) {
                VERIFY3U(list_link_active(&info->nli_node), !=, 0);
                ASSERT3U(nvme_rwlock_is_reader(lock, info), ==, B_TRUE);
                list_remove(&lock->nl_readers, info);
        } else {
                VERIFY3U(list_link_active(&info->nli_node), ==, 0);
                VERIFY3P(lock->nl_writer, ==, info);
                lock->nl_writer = NULL;
        }

        nvme_rwunlock_cleanup_minor(info);
        nvme_rwlock_wakeup(nvme);
}

/*
 * We were just interrupted due to a signal. However, just because our block was
 * interrupted due to a signal doesn't mean that other activity didn't occur. In
 * particular, the signal wake up could race with a subsequent wake up that was
 * due to the device being removed or actually acquiring the lock. Depending on
 * which state we were in, we need to perform the appropriate clean up. In all
 * cases, the signal trumps all, which may mean actually unlocking!
 */
static void
nvme_rwlock_signal(nvme_minor_lock_info_t *info, nvme_lock_t *lock,
    boolean_t is_read)
{
        ASSERT3P(info->nli_ioc, !=, NULL);

        /*
         * We're changing the state here, so update the minor's last change
         * time.
         */
        info->nli_last_change = gethrtime();
        lock->nl_nsignals++;

        /*
         * This is the simplest case. We've already been removed from the lock
         * that we're on. All we need to do is change the error to indicate that
         * we received a signal.
         */
        if (info->nli_state == NVME_LOCK_STATE_UNLOCKED) {
                ASSERT3P(info->nli_lock, ==, NULL);
                (void) nvme_ioctl_error(info->nli_ioc,
                    NVME_IOCTL_E_LOCK_WAIT_SIGNAL, 0, 0);
                lock->nl_nsig_unlock++;
                return;
        }

        /*
         * For all others, the lock should be set here.
         */
        ASSERT3P(info->nli_lock, ==, lock);

        /*
         * For someone that was blocked, we need to remove them from the pending
         * lists.
         */
        if (info->nli_state == NVME_LOCK_STATE_BLOCKED) {
                ASSERT3S(list_link_active(&info->nli_node), !=, 0);
                if (is_read) {
                        list_remove(&lock->nl_pend_readers, info);
                } else {
                        list_remove(&lock->nl_pend_writers, info);
                }

                nvme_rwunlock_cleanup_minor(info);
                (void) nvme_ioctl_error(info->nli_ioc,
                    NVME_IOCTL_E_LOCK_WAIT_SIGNAL, 0, 0);
                lock->nl_nsig_blocks++;
                return;
        }

        /*
         * Now, the most nuanced thing that we need to do. We need to unlock
         * this node. We synthesize an unlock request and submit that.
         */
        lock->nl_nsig_acq++;
        nvme_rwunlock(info, lock);
}

/*
 * Here we need to implement our read-write lock policy. Refer to the big theory
 * statement for more information. Here's a summary of the priority that's
 * relevant here:
 *
 * 1) Waiting writers starve waiting readers
 * 2) Waiting writers for the controller starve all namespace writers and
 *    readers
 * 3) A read lock can be taken if there are no pending or active writers on the
 *    lock (and the controller lock for a namespace).
 */
void
nvme_rwlock(nvme_minor_t *minor, nvme_ioctl_lock_t *req)
{
        nvme_t *const nvme = minor->nm_ctrl;
        const boolean_t is_nonblock = (req->nil_flags &
            NVME_LOCK_F_DONT_BLOCK) != 0;
        const boolean_t is_read = req->nil_level == NVME_LOCK_L_READ;
        const boolean_t is_ctrl = req->nil_ent == NVME_LOCK_E_CTRL;
        nvme_minor_lock_info_t *info;
        nvme_lock_t *lock;
        boolean_t waiters;
        hrtime_t sleep_time;

        VERIFY(MUTEX_HELD(&nvme->n_minor_mutex));

        if (is_ctrl) {
                info = &minor->nm_ctrl_lock;
                lock = &nvme->n_lock;

                if (is_read) {
                        waiters = nvme_rwlock_block_ctrl_rdlock(nvme);
                } else {
                        waiters = nvme_rwlock_block_ctrl_wrlock(nvme);
                }
        } else {
                nvme_namespace_t *ns;
                const uint32_t nsid = req->nil_common.nioc_nsid;
                info = &minor->nm_ns_lock;

                VERIFY3U(req->nil_ent, ==, NVME_LOCK_E_NS);
                ns = nvme_nsid2ns(nvme, nsid);
                minor->nm_ns_lock.nli_ns = ns;
                lock = &ns->ns_lock;

                if (is_read) {
                        waiters = nvme_rwlock_block_ns_rdlock(nvme, ns);
                } else {
                        waiters = nvme_rwlock_block_ns_wrlock(nvme, ns);
                }
        }

        /*
         * Set the information that indicates what kind of lock we're attempting
         * to acquire and that we're operating on.
         */
        info->nli_curlevel = is_read ? NVME_LOCK_L_READ : NVME_LOCK_L_WRITE;
        info->nli_lock = lock;

        /*
         * We think we can get the lock, hurrah.
         */
        if (!waiters) {
                if (is_read) {
                        nvme_rwlock_rdlock(info, lock);
                } else {
                        nvme_rwlock_wrlock(info, lock);
                }
                (void) nvme_ioctl_success(&req->nil_common);
                return;
        }

        /*
         * We failed to get the lock. At this point we will set ourselves up to
         * block. Once we go to sleep on the CV, our assumption is that anyone
         * who has woken us up will have filled in the information the status of
         * this operation and therefore after this point, all we have to do is
         * return.
         */
        if (is_nonblock) {
                nvme_rwunlock_cleanup_minor(info);
                lock->nl_nnonblock++;
                (void) nvme_ioctl_error(&req->nil_common,
                    NVME_IOCTL_E_LOCK_WOULD_BLOCK, 0, 0);
                return;
        }

        ASSERT3P(info->nli_ioc, ==, NULL);
        info->nli_ioc = &req->nil_common;
        if (is_read) {
                list_insert_tail(&lock->nl_pend_readers, info);
                lock->nl_npend_reads++;
        } else {
                list_insert_tail(&lock->nl_pend_writers, info);
                lock->nl_npend_writes++;
        }

        ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_UNLOCKED);
        info->nli_state = NVME_LOCK_STATE_BLOCKED;
        sleep_time = gethrtime();
        info->nli_last_change = sleep_time;
        while (info->nli_state == NVME_LOCK_STATE_BLOCKED) {
                /*
                 * Block until we receive a signal. Note, a signal trumps all
                 * other processing. We may be woken up here because we acquired
                 * a lock, we may also end up woken up here if the controller is
                 * marked as dead.
                 */
                if (cv_wait_sig(&minor->nm_cv, &nvme->n_minor_mutex) == 0) {
                        nvme_rwlock_signal(info, lock, is_read);
                        break;
                }
        }

        /*
         * Before we return, clean up and sanity check our state.
         */
        info->nli_ioc = NULL;
#ifdef  DEBUG
        ASSERT3S(info->nli_last_change, !=, sleep_time);
        if (info->nli_state == NVME_LOCK_STATE_UNLOCKED) {
                ASSERT3S(list_link_active(&info->nli_node), ==, 0);
                ASSERT3P(info->nli_ns, ==, NULL);
                ASSERT3U(req->nil_common.nioc_drv_err, !=, NVME_IOCTL_E_OK);
        } else {
                ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_ACQUIRED);
                ASSERT3U(req->nil_common.nioc_drv_err, ==, NVME_IOCTL_E_OK);
                if (is_read) {
                        ASSERT3S(list_link_active(&info->nli_node), !=, 0);
                } else {
                        ASSERT3P(lock->nl_writer, ==, info);
                }
        }
        ASSERT3P(info->nli_minor, ==, minor);
        ASSERT3P(info->nli_nvme, ==, minor->nm_ctrl);
#endif
}

/*
 * This is used to clean up a single minor that was blocking trying to get a
 * lock prior to a controller going dead. In particular, the key here is we need
 * to change its state to unlocked by cleaning it up and then signal it to wake
 * up and process things. The clean up also helps deal with the case of a racing
 * signal, though it does leave the state a little awkward in this intermediate
 * moment; however, since it's been removed from a list that's really the proper
 * action and no one can issue new lock ioctls at this point.
 */
static void
nvme_rwlock_ctrl_dead_cleanup_one(nvme_t *nvme, nvme_minor_lock_info_t *info)
{
        ASSERT3U(info->nli_state, ==, NVME_LOCK_STATE_BLOCKED);
        ASSERT3P(info->nli_ioc, !=, NULL);

        /*
         * Update the last time this has changed for our snaity checks.
         */
        info->nli_last_change = gethrtime();
        nvme_rwunlock_cleanup_minor(info);
        nvme_rwlock_signal_one(info, nvme->n_dead_status);
}

/*
 * We've just been informed that this controller has set n_dead. This is most
 * unfortunate for anyone trying to actively use it right now and we must notify
 * them. Anyone who has successfully obtained a lock gets to keep it until they
 * drop it (hopefully soon). Anyone who is asleep should be kicked out being
 * told they are not getting it.
 *
 * The moment we grab n_minor_mutex, no other state here can change. So we can
 * go ahead and wake up all waiters with impunity. This is being called from the
 * nvme_dead_taskq.
 *
 * While this would ideally only be used after attach(9E) has been called, this
 * can end up triggered at different points in attach(9E) in response to some of
 * the different command codes that we issue. If we haven't actually gotten far
 * enough in attach that we've indicated that we've basically finished that then
 * we should not touch the actual namespaces (and there is likely no one there).
 */
void
nvme_rwlock_ctrl_dead(void *arg)
{
        nvme_t *nvme = arg;
        nvme_lock_t *ctrl_lock = &nvme->n_lock;
        nvme_minor_lock_info_t *info;

        mutex_enter(&nvme->n_minor_mutex);
        if ((nvme->n_progress & NVME_NS_INIT) == 0) {
                mutex_exit(&nvme->n_minor_mutex);
                return;
        }

        for (uint32_t i = 1; i <= nvme->n_namespace_count; i++) {
                nvme_namespace_t *ns = nvme_nsid2ns(nvme, i);
                nvme_lock_t *ns_lock = &ns->ns_lock;

                while ((info = list_remove_head(&ns_lock->nl_pend_readers)) !=
                    NULL) {
                        nvme_rwlock_ctrl_dead_cleanup_one(nvme, info);
                }

                while ((info = list_remove_head(&ns_lock->nl_pend_writers)) !=
                    NULL) {
                        nvme_rwlock_ctrl_dead_cleanup_one(nvme, info);
                }
        }

        while ((info = list_remove_head(&ctrl_lock->nl_pend_readers)) != NULL) {
                nvme_rwlock_ctrl_dead_cleanup_one(nvme, info);
        }

        while ((info = list_remove_head(&ctrl_lock->nl_pend_writers)) != NULL) {

                nvme_rwlock_ctrl_dead_cleanup_one(nvme, info);
        }
        mutex_exit(&nvme->n_minor_mutex);
}

void
nvme_lock_fini(nvme_lock_t *lock)
{
        VERIFY3P(lock->nl_writer, ==, NULL);
        list_destroy(&lock->nl_pend_writers);
        list_destroy(&lock->nl_pend_readers);
        list_destroy(&lock->nl_readers);
}

void
nvme_lock_init(nvme_lock_t *lock)
{
        list_create(&lock->nl_readers, sizeof (nvme_minor_lock_info_t),
            offsetof(nvme_minor_lock_info_t, nli_node));
        list_create(&lock->nl_pend_readers, sizeof (nvme_minor_lock_info_t),
            offsetof(nvme_minor_lock_info_t, nli_node));
        list_create(&lock->nl_pend_writers, sizeof (nvme_minor_lock_info_t),
            offsetof(nvme_minor_lock_info_t, nli_node));
}