root/usr/src/uts/common/contract/device.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
 * Copyright 2019 Joyent, Inc.
 */

#include <sys/mutex.h>
#include <sys/debug.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/kmem.h>
#include <sys/thread.h>
#include <sys/id_space.h>
#include <sys/avl.h>
#include <sys/list.h>
#include <sys/sysmacros.h>
#include <sys/proc.h>
#include <sys/contract.h>
#include <sys/contract_impl.h>
#include <sys/contract/device.h>
#include <sys/contract/device_impl.h>
#include <sys/cmn_err.h>
#include <sys/nvpair.h>
#include <sys/policy.h>
#include <sys/ddi_impldefs.h>
#include <sys/ddi_implfuncs.h>
#include <sys/systm.h>
#include <sys/stat.h>
#include <sys/sunddi.h>
#include <sys/esunddi.h>
#include <sys/ddi.h>
#include <sys/fs/dv_node.h>
#include <sys/sunndi.h>
#undef ct_lock  /* needed because clnt.h defines ct_lock as a macro */

/*
 * Device Contracts
 * -----------------
 * This file contains the core code for the device contracts framework.
 * A device contract is an agreement or a contract between a process and
 * the kernel regarding the state of the device. A device contract may be
 * created when a relationship is formed between a device and a process
 * i.e. at open(2) time, or it may be created at some point after the device
 * has been opened. A device contract once formed may be broken by either party.
 * A device contract can be broken by the process by an explicit abandon of the
 * contract or by an implicit abandon when the process exits. A device contract
 * can be broken by the kernel either asynchronously (without negotiation) or
 * synchronously (with negotiation). Exactly which happens depends on the device
 * state transition. The following state diagram shows the transitions between
 * device states. Only device state transitions currently supported by device
 * contracts is shown.
 *
 *                              <-- A -->
 *                       /-----------------> DEGRADED
 *                       |                      |
 *                       |                      |
 *                       |                      | S
 *                       |                      | |
 *                       |                      | v
 *                       v       S -->          v
 *                      ONLINE ------------> OFFLINE
 *
 *
 * In the figure above, the arrows indicate the direction of transition. The
 * letter S refers to transitions which are inherently synchronous i.e.
 * require negotiation and the letter A indicates transitions which are
 * asynchronous i.e. are done without contract negotiations. A good example
 * of a synchronous transition is the ONLINE -> OFFLINE transition. This
 * transition cannot happen as long as there are consumers which have the
 * device open. Thus some form of negotiation needs to happen between the
 * consumers and the kernel to ensure that consumers either close devices
 * or disallow the move to OFFLINE. Certain other transitions such as
 * ONLINE --> DEGRADED for example, are inherently asynchronous i.e.
 * non-negotiable. A device that suffers a fault that degrades its
 * capabilities will become degraded irrespective of what consumers it has,
 * so a negotiation in this case is pointless.
 *
 * The following device states are currently defined for device contracts:
 *
 *      CT_DEV_EV_ONLINE
 *              The device is online and functioning normally
 *      CT_DEV_EV_DEGRADED
 *              The device is online but is functioning in a degraded capacity
 *      CT_DEV_EV_OFFLINE
 *              The device is offline and is no longer configured
 *
 * A typical consumer of device contracts starts out with a contract
 * template and adds terms to that template. These include the
 * "acceptable set" (A-set) term, which is a bitset of device states which
 * are guaranteed by the contract. If the device moves out of a state in
 * the A-set, the contract is broken. The breaking of the contract can
 * be asynchronous in which case a critical contract event is sent to the
 * contract holder but no negotiations take place. If the breaking of the
 * contract is synchronous, negotations are opened between the affected
 * consumer and the kernel. The kernel does this by sending a critical
 * event to the consumer with the CTE_NEG flag set indicating that this
 * is a negotiation event. The consumer can accept this change by sending
 * a ACK message to the kernel. Alternatively, if it has the necessary
 * privileges, it can send a NACK message to the kernel which will block
 * the device state change. To NACK a negotiable event, a process must
 * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
 *
 * Other terms include the "minor path" term, specified explicitly if the
 * contract is not being created at open(2) time or specified implicitly
 * if the contract is being created at open time via an activated template.
 *
 * A contract event is sent on any state change to which the contract
 * owner has subscribed via the informative or critical event sets. Only
 * critical events are guaranteed to be delivered. Since all device state
 * changes are controlled by the kernel and cannot be arbitrarily generated
 * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not
 * need to be asserted in a process's effective set to designate an event as
 * critical. To ensure privacy, a process must either have the same effective
 * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege
 * asserted in its effective set in order to observe device contract events
 * off the device contract type specific endpoint.
 *
 * Yet another term available with device contracts is the "non-negotiable"
 * term. This term is used to pre-specify a NACK to any contract negotiation.
 * This term is ignored for asynchronous state changes. For example, a
 * provcess may have the A-set {ONLINE|DEGRADED} and make the contract
 * non-negotiable. In this case, the device contract framework assumes a
 * NACK for any transition to OFFLINE and blocks the offline. If the A-set
 * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE
 * are NACKed but transitions to DEGRADE succeed.
 *
 * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract)
 * happens just before the I/O framework attempts to offline a device
 * (i.e. detach a device and set the offline flag so that it cannot be
 * reattached). A device contract holder is expected to either NACK the offline
 * (if privileged) or release the device and allow the offline to proceed.
 *
 * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract)
 * is generated just before the I/O framework transitions the device state
 * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology).
 *
 * The contract holder is expected to ACK or NACK a negotiation event
 * within a certain period of time. If the ACK/NACK is not received
 * within the timeout period, the device contract framework will behave
 * as if the contract does not exist and will proceed with the event.
 *
 * Unlike a process contract a device contract does not need to exist
 * once it is abandoned, since it does not define a fault boundary. It
 * merely represents an agreement between a process and the kernel
 * regarding the state of the device. Once the process has abandoned
 * the contract (either implicitly via a process exit or explicitly)
 * the kernel has no reason to retain the contract. As a result
 * device contracts are neither inheritable nor need to exist in an
 * orphan state.
 *
 * A device unlike a process may exist in multiple contracts and has
 * a "life" outside a device contract. A device unlike a process
 * may exist without an associated contract. Unlike a process contract
 * a device contract may be formed after a binding relationship is
 * formed between a process and a device.
 *
 *      IMPLEMENTATION NOTES
 *      ====================
 * DATA STRUCTURES
 * ----------------
 *      The heart of the device contracts implementation is the device contract
 *      private cont_device_t (or ctd for short) data structure. It encapsulates
 *      the generic contract_t data structure and has a number of private
 *      fields.
 *      These include:
 *              cond_minor: The minor device that is the subject of the contract
 *              cond_aset:  The bitset of states which are guaranteed by the
 *                         contract
 *              cond_noneg: If set, indicates that the result of negotiation has
 *                          been predefined to be a NACK
 *      In addition, there are other device identifiers such the devinfo node,
 *      dev_t and spec_type of the minor node. There are also a few fields that
 *      are used during negotiation to maintain state. See
 *              uts/common/sys/contract/device_impl.h
 *      for details.
 *      The ctd structure represents the device private part of a contract of
 *      type "device"
 *
 *      Another data structure used by device contracts is ctmpl_device. It is
 *      the device contracts private part of the contract template structure. It
 *      encapsulates the generic template structure "ct_template_t" and includes
 *      the following device contract specific fields
 *              ctd_aset:   The bitset of states that should be guaranteed by a
 *                          contract
 *              ctd_noneg:  If set, indicates that contract should NACK a
 *                          negotiation
 *              ctd_minor:  The devfs_path (without the /devices prefix) of the
 *                          minor node that is the subject of the contract.
 *
 * ALGORITHMS
 * ---------
 * There are three sets of routines in this file
 *      Template related routines
 *      -------------------------
 *      These routines provide support for template related operations initated
 *      via the generic template operations. These include routines that dup
 *      a template, free it, and set various terms in the template
 *      (such as the minor node path, the acceptable state set (or A-set)
 *      and the non-negotiable term) as well as a routine to query the
 *      device specific portion of the template for the abovementioned terms.
 *      There is also a routine to create (ctmpl_device_create) that is used to
 *      create a contract from a template. This routine calls (after initial
 *      setup) the common function used to create a device contract
 *      (contract_device_create).
 *
 *      core device contract implementation
 *      ----------------------------------
 *      These routines support the generic contract framework to provide
 *      functionality that allows contracts to be created, managed and
 *      destroyed. The contract_device_create() routine is a routine used
 *      to create a contract from a template (either via an explicit create
 *      operation on a template or implicitly via an open with an
 *      activated template.). The contract_device_free() routine assists
 *      in freeing the device contract specific parts. There are routines
 *      used to abandon (contract_device_abandon) a device contract as well
 *      as a routine to destroy (which despite its name does not destroy,
 *      it only moves a contract to a dead state) a contract.
 *      There is also a routine to return status information about a
 *      contract - the level of detail depends on what is requested by the
 *      user. A value of CTD_FIXED only returns fixed length fields such
 *      as the A-set, state of device and value of the "noneg" term. If
 *      CTD_ALL is specified, the minor node path is returned as well.
 *
 *      In addition there are interfaces (contract_device_ack/nack) which
 *      are used to support negotiation between userland processes and
 *      device contracts. These interfaces record the acknowledgement
 *      or lack thereof for negotiation events and help determine if the
 *      negotiated event should occur.
 *
 *      "backend routines"
 *      -----------------
 *      The backend routines form the interface between the I/O framework
 *      and the device contract subsystem. These routines, allow the I/O
 *      framework to call into the device contract subsystem to notify it of
 *      impending changes to a device state as well as to inform of the
 *      final disposition of such attempted state changes. Routines in this
 *      class include contract_device_offline() that indicates an attempt to
 *      offline a device, contract_device_degrade() that indicates that
 *      a device is moving to the degraded state and contract_device_negend()
 *      that is used by the I/O framework to inform the contracts subsystem of
 *      the final disposition of an attempted operation.
 *
 *      SUMMARY
 *      -------
 *      A contract starts its life as a template. A process allocates a device
 *      contract template and sets various terms:
 *              The A-set
 *              The device minor node
 *              Critical and informative events
 *              The noneg i.e. no negotition term
 *      Setting of these terms in the template is done via the
 *      ctmpl_device_set() entry point in this file. A process can query a
 *      template to determine the terms already set in the template - this is
 *      facilitated by the ctmpl_device_get() routine.
 *
 *      Once all the appropriate terms are set, the contract is instantiated via
 *      one of two methods
 *      - via an explicit create operation - this is facilitated by the
 *        ctmpl_device_create() entry point
 *      - synchronously with the open(2) system call - this is achieved via the
 *        contract_device_open() routine.
 *      The core work for both these above functions is done by
 *      contract_device_create()
 *
 *      A contract once created can be queried for its status. Support for
 *      status info is provided by both the common contracts framework and by
 *      the "device" contract type. If the level of detail requested is
 *      CTD_COMMON, only the common contract framework data is used. Higher
 *      levels of detail result in calls to contract_device_status() to supply
 *      device contract type specific status information.
 *
 *      A contract once created may be abandoned either explicitly or implictly.
 *      In either case, the contract_device_abandon() function is invoked. This
 *      function merely calls contract_destroy() which moves the contract to
 *      the DEAD state. The device contract portion of destroy processing is
 *      provided by contract_device_destroy() which merely disassociates the
 *      contract from its device devinfo node. A contract in the DEAD state is
 *      not freed. It hanbgs around until all references to the contract are
 *      gone. When that happens, the contract is finally deallocated. The
 *      device contract specific portion of the free is done by
 *      contract_device_free() which finally frees the device contract specific
 *      data structure (cont_device_t).
 *
 *      When a device undergoes a state change, the I/O framework calls the
 *      corresponding device contract entry point. For example, when a device
 *      is about to go OFFLINE, the routine contract_device_offline() is
 *      invoked. Similarly if a device moves to DEGRADED state, the routine
 *      contract_device_degrade() function is called. These functions call the
 *      core routine contract_device_publish(). This function determines via
 *      the function is_sync_neg() whether an event is a synchronous (i.e.
 *      negotiable) event or not. In the former case contract_device_publish()
 *      publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs
 *      and/or NACKs from contract holders. In the latter case, it simply
 *      publishes the event and does not wait. In the negotiation case, ACKs or
 *      NACKs from userland consumers results in contract_device_ack_nack()
 *      being called where the result of the negotiation is recorded in the
 *      contract data structure. Once all outstanding contract owners have
 *      responded, the device contract code in wait_for_acks() determines the
 *      final result of the negotiation. A single NACK overrides all other ACKs
 *      If there is no NACK, then a single ACK will result in an overall ACK
 *      result. If there are no ACKs or NACKs, then the result CT_NONE is
 *      returned back to the I/O framework. Once the event is permitted or
 *      blocked, the I/O framework proceeds or aborts the state change. The
 *      I/O framework then calls contract_device_negend() with a result code
 *      indicating final disposition of the event. This call releases the
 *      barrier and other state associated with the previous negotiation,
 *      which permits the next event (if any) to come into the device contract
 *      framework.
 *
 *      Finally, a device that has outstanding contracts may be removed from
 *      the system which results in its devinfo node being freed. The devinfo
 *      free routine in the I/O framework, calls into the device contract
 *      function - contract_device_remove_dip(). This routine, disassociates
 *      the dip from all contracts associated with the contract being freed,
 *      allowing the devinfo node to be freed.
 *
 * LOCKING
 * ---------
 *      There are four sets of data that need to be protected by locks
 *
 *      i) device contract specific portion of the contract template - This data
 *      is protected by the template lock ctmpl_lock.
 *
 *      ii) device contract specific portion of the contract - This data is
 *      protected by the contract lock ct_lock
 *
 *      iii) The linked list of contracts hanging off a devinfo node - This
 *      list is protected by the per-devinfo node lock devi_ct_lock
 *
 *      iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv
 *      and devi_ct_count that controls state changes to a dip
 *
 *      The template lock is independent in that none of the other locks in this
 *      file may be taken while holding the template lock (and vice versa).
 *
 *      The remaining three locks have the following lock order
 *
 *      devi_ct_lock  -> ct_count barrier ->  ct_lock
 *
 */

static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev,
    int spec_type, proc_t *owner, int *errorp);

/* barrier routines */
static void ct_barrier_acquire(dev_info_t *dip);
static void ct_barrier_release(dev_info_t *dip);
static int ct_barrier_held(dev_info_t *dip);
static int ct_barrier_empty(dev_info_t *dip);
static void ct_barrier_wait_for_release(dev_info_t *dip);
static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs);
static void ct_barrier_decr(dev_info_t *dip);
static void ct_barrier_incr(dev_info_t *dip);

ct_type_t *device_type;

/*
 * Macro predicates for determining when events should be sent and how.
 */
#define EVSENDP(ctd, flag) \
        ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag)

#define EVINFOP(ctd, flag) \
        ((ctd->cond_contract.ct_ev_crit & flag) == 0)

/*
 * State transition table showing which transitions are synchronous and which
 * are not.
 */
struct ct_dev_negtable {
        uint_t  st_old;
        uint_t  st_new;
        uint_t  st_neg;
} ct_dev_negtable[] = {
        {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE,   1},
        {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED,  0},
        {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE,  0},
        {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1},
        {0}
};

/*
 * Device contract template implementation
 */

/*
 * ctmpl_device_dup
 *
 * The device contract template dup entry point.
 * This simply copies all the fields (generic as well as device contract
 * specific) fields of the original.
 */
static struct ct_template *
ctmpl_device_dup(struct ct_template *template)
{
        ctmpl_device_t *new;
        ctmpl_device_t *old = template->ctmpl_data;
        char *buf;
        char *minor;

        new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
        buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);

        /*
         * copy generic fields.
         * ctmpl_copy returns with old template lock held
         */
        ctmpl_copy(&new->ctd_ctmpl, template);

        new->ctd_ctmpl.ctmpl_data = new;
        new->ctd_aset = old->ctd_aset;
        new->ctd_minor = NULL;
        new->ctd_noneg = old->ctd_noneg;

        if (old->ctd_minor) {
                ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN);
                bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1);
        } else {
                kmem_free(buf, MAXPATHLEN);
                buf = NULL;
        }

        mutex_exit(&template->ctmpl_lock);
        if (buf) {
                minor = i_ddi_strdup(buf, KM_SLEEP);
                kmem_free(buf, MAXPATHLEN);
                buf = NULL;
        } else {
                minor = NULL;
        }
        mutex_enter(&template->ctmpl_lock);

        if (minor) {
                new->ctd_minor = minor;
        }

        ASSERT(buf == NULL);
        return (&new->ctd_ctmpl);
}

/*
 * ctmpl_device_free
 *
 * The device contract template free entry point.  Just
 * frees the template.
 */
static void
ctmpl_device_free(struct ct_template *template)
{
        ctmpl_device_t *dtmpl = template->ctmpl_data;

        if (dtmpl->ctd_minor)
                kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);

        kmem_free(dtmpl, sizeof (ctmpl_device_t));
}

/*
 * SAFE_EV is the set of events which a non-privileged process is
 * allowed to make critical. An unprivileged device contract owner has
 * no control over when a device changes state, so all device events
 * can be in the critical set.
 *
 * EXCESS tells us if "value", a critical event set, requires
 * additional privilege. For device contracts EXCESS currently
 * evaluates to 0.
 */
#define SAFE_EV         (CT_DEV_ALLEVENT)
#define EXCESS(value)   ((value) & ~SAFE_EV)


/*
 * ctmpl_device_set
 *
 * The device contract template set entry point. Sets various terms in the
 * template. The non-negotiable  term can only be set if the process has
 * the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
 */
static int
ctmpl_device_set(struct ct_template *tmpl, ct_kparam_t *kparam,
    const cred_t *cr)
{
        ctmpl_device_t *dtmpl = tmpl->ctmpl_data;
        ct_param_t *param = &kparam->param;
        int error;
        dev_info_t *dip;
        int spec_type;
        uint64_t param_value;
        char *str_value;

        ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock));

        param_value = SAFE_EV;
        if (param->ctpm_id == CTDP_MINOR) {
                str_value = (char *)kparam->ctpm_kbuf;
                str_value[param->ctpm_size - 1] = '\0';
        } else {
                if (param->ctpm_size < sizeof (uint64_t))
                        return (EINVAL);
                param_value = *(uint64_t *)kparam->ctpm_kbuf;
        }

        switch (param->ctpm_id) {
        case CTDP_ACCEPT:
                if (param_value & ~CT_DEV_ALLEVENT)
                        return (EINVAL);
                if (param_value == 0)
                        return (EINVAL);
                if (param_value == CT_DEV_ALLEVENT)
                        return (EINVAL);

                dtmpl->ctd_aset = param_value;
                break;
        case CTDP_NONEG:
                if (param_value != CTDP_NONEG_SET &&
                    param_value != CTDP_NONEG_CLEAR)
                        return (EINVAL);

                /*
                 * only privileged processes can designate a contract
                 * non-negotiatble.
                 */
                if (param_value == CTDP_NONEG_SET &&
                    (error = secpolicy_sys_devices(cr)) != 0) {
                        return (error);
                }

                dtmpl->ctd_noneg = param_value;
                break;

        case CTDP_MINOR:
                if (*str_value != '/' ||
                    strncmp(str_value, "/devices/",
                    strlen("/devices/")) == 0 ||
                    strstr(str_value, "../devices/") != NULL ||
                    strchr(str_value, ':') == NULL) {
                        return (EINVAL);
                }

                spec_type = 0;
                dip = NULL;
                if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) {
                        return (ERANGE);
                }
                ddi_release_devi(dip);

                if (spec_type != S_IFCHR && spec_type != S_IFBLK) {
                        return (EINVAL);
                }

                if (dtmpl->ctd_minor != NULL) {
                        kmem_free(dtmpl->ctd_minor,
                            strlen(dtmpl->ctd_minor) + 1);
                }
                dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP);
                break;
        case CTP_EV_CRITICAL:
                /*
                 * Currently for device contracts, any event
                 * may be added to the critical set. We retain the
                 * following code however for future enhancements.
                 */
                if (EXCESS(param_value) &&
                    (error = secpolicy_contract_event(cr)) != 0)
                        return (error);
                tmpl->ctmpl_ev_crit = param_value;
                break;
        default:
                return (EINVAL);
        }

        return (0);
}

/*
 * ctmpl_device_get
 *
 * The device contract template get entry point.  Simply fetches and
 * returns the value of the requested term.
 */
static int
ctmpl_device_get(struct ct_template *template, ct_kparam_t *kparam)
{
        ctmpl_device_t *dtmpl = template->ctmpl_data;
        ct_param_t *param = &kparam->param;
        uint64_t *param_value = kparam->ctpm_kbuf;

        ASSERT(MUTEX_HELD(&template->ctmpl_lock));

        if (param->ctpm_id == CTDP_ACCEPT ||
            param->ctpm_id == CTDP_NONEG) {
                if (param->ctpm_size < sizeof (uint64_t))
                        return (EINVAL);
                kparam->ret_size = sizeof (uint64_t);
        }

        switch (param->ctpm_id) {
        case CTDP_ACCEPT:
                *param_value = dtmpl->ctd_aset;
                break;
        case CTDP_NONEG:
                *param_value = dtmpl->ctd_noneg;
                break;
        case CTDP_MINOR:
                if (dtmpl->ctd_minor) {
                        kparam->ret_size = strlcpy((char *)kparam->ctpm_kbuf,
                            dtmpl->ctd_minor, param->ctpm_size);
                        kparam->ret_size++;
                } else {
                        return (ENOENT);
                }
                break;
        default:
                return (EINVAL);
        }

        return (0);
}

/*
 * Device contract type specific portion of creating a contract using
 * a specified template
 */
/*ARGSUSED*/
int
ctmpl_device_create(ct_template_t *template, ctid_t *ctidp)
{
        ctmpl_device_t *dtmpl;
        char *buf;
        dev_t dev;
        int spec_type;
        int error;
        cont_device_t *ctd;

        if (ctidp == NULL)
                return (EINVAL);

        buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);

        dtmpl = template->ctmpl_data;

        mutex_enter(&template->ctmpl_lock);
        if (dtmpl->ctd_minor == NULL) {
                /* incomplete template */
                mutex_exit(&template->ctmpl_lock);
                kmem_free(buf, MAXPATHLEN);
                return (EINVAL);
        } else {
                ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
                bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1);
        }
        mutex_exit(&template->ctmpl_lock);

        spec_type = 0;
        dev = NODEV;
        if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 ||
            dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE ||
            (spec_type != S_IFCHR && spec_type != S_IFBLK)) {
                CT_DEBUG((CE_WARN,
                    "tmpl_create: failed to find device: %s", buf));
                kmem_free(buf, MAXPATHLEN);
                return (ERANGE);
        }
        kmem_free(buf, MAXPATHLEN);

        ctd = contract_device_create(template->ctmpl_data,
            dev, spec_type, curproc, &error);

        if (ctd == NULL) {
                CT_DEBUG((CE_WARN, "Failed to create device contract for "
                    "process (%d) with device (devt = %lu, spec_type = %s)",
                    curproc->p_pid, dev,
                    spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK"));
                return (error);
        }

        mutex_enter(&ctd->cond_contract.ct_lock);
        *ctidp = ctd->cond_contract.ct_id;
        mutex_exit(&ctd->cond_contract.ct_lock);

        return (0);
}

/*
 * Device contract specific template entry points
 */
static ctmplops_t ctmpl_device_ops = {
        ctmpl_device_dup,               /* ctop_dup */
        ctmpl_device_free,              /* ctop_free */
        ctmpl_device_set,               /* ctop_set */
        ctmpl_device_get,               /* ctop_get */
        ctmpl_device_create,            /* ctop_create */
        CT_DEV_ALLEVENT                 /* all device events bitmask */
};


/*
 * Device contract implementation
 */

/*
 * contract_device_default
 *
 * The device contract default template entry point.  Creates a
 * device contract template with a default A-set and no "noneg" ,
 * with informative degrade events and critical offline events.
 * There is no default minor path.
 */
static ct_template_t *
contract_device_default(void)
{
        ctmpl_device_t *new;

        new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
        ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new);

        new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED;
        new->ctd_noneg = 0;
        new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED;
        new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE;

        return (&new->ctd_ctmpl);
}

/*
 * contract_device_free
 *
 * Destroys the device contract specific portion of a contract and
 * frees the contract.
 */
static void
contract_device_free(contract_t *ct)
{
        cont_device_t *ctd = ct->ct_data;

        ASSERT(ctd->cond_minor);
        ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
        kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1);

        ASSERT(ctd->cond_devt != DDI_DEV_T_ANY &&
            ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV);

        ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR);

        ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT));
        ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1);

        ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT));
        ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK)));

        ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0));
        ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0));

        ASSERT(!list_link_active(&ctd->cond_next));

        kmem_free(ctd, sizeof (cont_device_t));
}

/*
 * contract_device_abandon
 *
 * The device contract abandon entry point.
 */
static void
contract_device_abandon(contract_t *ct)
{
        ASSERT(MUTEX_HELD(&ct->ct_lock));

        /*
         * device contracts cannot be inherited or orphaned.
         * Move the contract to the DEAD_STATE. It will be freed
         * once all references to it are gone.
         */
        contract_destroy(ct);
}

/*
 * contract_device_destroy
 *
 * The device contract destroy entry point.
 * Called from contract_destroy() to do any type specific destroy. Note
 * that destroy is a misnomer - this does not free the contract, it only
 * moves it to the dead state. A contract is actually freed via
 *      contract_rele() -> contract_dtor(), contop_free()
 */
static void
contract_device_destroy(contract_t *ct)
{
        cont_device_t   *ctd;
        dev_info_t      *dip;

        ASSERT(MUTEX_HELD(&ct->ct_lock));

        for (;;) {
                ctd = ct->ct_data;
                dip = ctd->cond_dip;
                if (dip == NULL) {
                        /*
                         * The dip has been removed, this is a dangling contract
                         * Check that dip linkages are NULL
                         */
                        ASSERT(!list_link_active(&ctd->cond_next));
                        CT_DEBUG((CE_NOTE, "contract_device_destroy:"
                            " contract has no devinfo node. contract ctid : %d",
                            ct->ct_id));
                        return;
                }

                /*
                 * The intended lock order is : devi_ct_lock -> ct_count
                 * barrier -> ct_lock.
                 * However we can't do this here as dropping the ct_lock allows
                 * a race condition with i_ddi_free_node()/
                 * contract_device_remove_dip() which may free off dip before
                 * we can take devi_ct_lock. So use mutex_tryenter to avoid
                 * dropping ct_lock until we have acquired devi_ct_lock.
                 */
                if (mutex_tryenter(&(DEVI(dip)->devi_ct_lock)) != 0)
                        break;
                mutex_exit(&ct->ct_lock);
                delay(drv_usectohz(1000));
                mutex_enter(&ct->ct_lock);
        }
        mutex_exit(&ct->ct_lock);

        /*
         * Waiting for the barrier to be released is strictly speaking not
         * necessary. But it simplifies the implementation of
         * contract_device_publish() by establishing the invariant that
         * device contracts cannot go away during negotiation.
         */
        ct_barrier_wait_for_release(dip);
        mutex_enter(&ct->ct_lock);

        list_remove(&(DEVI(dip)->devi_ct), ctd);
        ctd->cond_dip = NULL; /* no longer linked to dip */
        contract_rele(ct);      /* remove hold for dip linkage */

        mutex_exit(&ct->ct_lock);
        mutex_exit(&(DEVI(dip)->devi_ct_lock));
        mutex_enter(&ct->ct_lock);
}

/*
 * contract_device_status
 *
 * The device contract status entry point. Called when level of "detail"
 * is either CTD_FIXED or CTD_ALL
 *
 */
static void
contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl,
    void *status, model_t model)
{
        cont_device_t *ctd = ct->ct_data;

        ASSERT(detail == CTD_FIXED || detail == CTD_ALL);

        mutex_enter(&ct->ct_lock);
        contract_status_common(ct, zone, status, model);

        /*
         * There's no need to hold the contract lock while accessing static
         * data like aset or noneg. But since we need the lock to access other
         * data like state, we hold it anyway.
         */
        VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0);
        VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0);
        VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0);

        if (detail == CTD_FIXED) {
                mutex_exit(&ct->ct_lock);
                return;
        }

        ASSERT(ctd->cond_minor);
        VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0);

        mutex_exit(&ct->ct_lock);
}

/*
 * Converts a result integer into the corresponding string. Used for printing
 * messages
 */
static char *
result_str(uint_t result)
{
        switch (result) {
        case CT_ACK:
                return ("CT_ACK");
        case CT_NACK:
                return ("CT_NACK");
        case CT_NONE:
                return ("CT_NONE");
        default:
                return ("UNKNOWN");
        }
}

/*
 * Converts a device state integer constant into the corresponding string.
 * Used to print messages.
 */
static char *
state_str(uint_t state)
{
        switch (state) {
        case CT_DEV_EV_ONLINE:
                return ("ONLINE");
        case CT_DEV_EV_DEGRADED:
                return ("DEGRADED");
        case CT_DEV_EV_OFFLINE:
                return ("OFFLINE");
        default:
                return ("UNKNOWN");
        }
}

/*
 * Routine that determines if a particular CT_DEV_EV_? event corresponds to a
 * synchronous state change or not.
 */
static int
is_sync_neg(uint_t old, uint_t new)
{
        int     i;

        ASSERT(old & CT_DEV_ALLEVENT);
        ASSERT(new & CT_DEV_ALLEVENT);

        if (old == new) {
                CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s",
                    state_str(new)));
                return (-2);
        }

        for (i = 0; ct_dev_negtable[i].st_new != 0; i++) {
                if (old == ct_dev_negtable[i].st_old &&
                    new == ct_dev_negtable[i].st_new) {
                        return (ct_dev_negtable[i].st_neg);
                }
        }

        CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: "
            "old = %s -> new = %s", state_str(old), state_str(new)));

        return (-1);
}

/*
 * Used to cleanup cached dv_nodes so that when a device is released by
 * a contract holder, its devinfo node can be successfully detached.
 */
static int
contract_device_dvclean(dev_info_t *dip)
{
        char            *devnm;
        dev_info_t      *pdip;

        ASSERT(dip);

        /* pdip can be NULL if we have contracts against the root dip */
        pdip = ddi_get_parent(dip);

        if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) {
                char            *path;

                path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
                (void) ddi_pathname(dip, path);
                CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, "
                    "device=%s", path));
                kmem_free(path, MAXPATHLEN);
                return (EDEADLOCK);
        }

        if (pdip) {
                devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
                (void) ddi_deviname(dip, devnm);
                (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
                kmem_free(devnm, MAXNAMELEN + 1);
        } else {
                (void) devfs_clean(dip, NULL, DV_CLEAN_FORCE);
        }

        return (0);
}

/*
 * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland.
 * Results in the ACK or NACK being recorded on the dip for one particular
 * contract. The device contracts framework evaluates the ACK/NACKs for all
 * contracts against a device to determine if a particular device state change
 * should be allowed.
 */
static int
contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid,
    uint_t cmd)
{
        cont_device_t *ctd = ct->ct_data;
        dev_info_t *dip;
        ctid_t  ctid;
        int error;

        ctid = ct->ct_id;

        CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid));

        mutex_enter(&ct->ct_lock);
        CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid));

        dip = ctd->cond_dip;

        ASSERT(ctd->cond_minor);
        ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);

        /*
         * Negotiation only if new state is not in A-set
         */
        ASSERT(!(ctd->cond_aset & evtype));

        /*
         * Negotiation only if transition is synchronous
         */
        ASSERT(is_sync_neg(ctd->cond_state, evtype));

        /*
         * We shouldn't be negotiating if the "noneg" flag is set
         */
        ASSERT(!ctd->cond_noneg);

        if (dip)
                ndi_hold_devi(dip);

        mutex_exit(&ct->ct_lock);

        /*
         * dv_clean only if !NACK and offline state change
         */
        if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) {
                CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid));
                error = contract_device_dvclean(dip);
                if (error != 0) {
                        CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d",
                            ctid));
                        ddi_release_devi(dip);
                }
        }

        mutex_enter(&ct->ct_lock);

        if (dip)
                ddi_release_devi(dip);

        if (dip == NULL) {
                if (ctd->cond_currev_id != evid) {
                        CT_DEBUG((CE_WARN, "%sACK for non-current event "
                            "(type=%s, id=%llu) on removed device",
                            cmd == CT_NACK ? "N" : "",
                            state_str(evtype), (unsigned long long)evid));
                        CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d",
                            ctid));
                } else {
                        ASSERT(ctd->cond_currev_type == evtype);
                        CT_DEBUG((CE_WARN, "contract_ack: no such device: "
                            "ctid: %d", ctid));
                }
                error = (ct->ct_state == CTS_DEAD) ? ESRCH :
                    ((cmd == CT_NACK) ? ETIMEDOUT : 0);
                mutex_exit(&ct->ct_lock);
                return (error);
        }

        /*
         * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock
         */
        mutex_exit(&ct->ct_lock);

        mutex_enter(&DEVI(dip)->devi_ct_lock);
        mutex_enter(&ct->ct_lock);
        if (ctd->cond_currev_id != evid) {
                char *buf;
                mutex_exit(&ct->ct_lock);
                mutex_exit(&DEVI(dip)->devi_ct_lock);
                ndi_hold_devi(dip);
                buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
                (void) ddi_pathname(dip, buf);
                ddi_release_devi(dip);
                CT_DEBUG((CE_WARN, "%sACK for non-current event"
                    "(type=%s, id=%llu) on device %s",
                    cmd == CT_NACK ? "N" : "",
                    state_str(evtype), (unsigned long long)evid, buf));
                kmem_free(buf, MAXPATHLEN);
                CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d",
                    cmd == CT_NACK ? ETIMEDOUT : 0, ctid));
                return (cmd == CT_ACK ? 0 : ETIMEDOUT);
        }

        ASSERT(ctd->cond_currev_type == evtype);
        ASSERT(cmd == CT_ACK || cmd == CT_NACK);

        CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d",
            cmd == CT_NACK ? "N" : "", ctid));

        ctd->cond_currev_ack = cmd;
        mutex_exit(&ct->ct_lock);

        ct_barrier_decr(dip);
        mutex_exit(&DEVI(dip)->devi_ct_lock);

        CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid));

        return (0);
}

/*
 * Invoked when a userland contract holder approves (i.e. ACKs) a state change
 */
static int
contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid)
{
        return (contract_device_ack_nack(ct, evtype, evid, CT_ACK));
}

/*
 * Invoked when a userland contract holder blocks (i.e. NACKs) a state change
 */
static int
contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid)
{
        return (contract_device_ack_nack(ct, evtype, evid, CT_NACK));
}

/*
 * Creates a new contract synchronously with the breaking of an existing
 * contract. Currently not supported.
 */
/*ARGSUSED*/
static int
contract_device_newct(contract_t *ct)
{
        return (ENOTSUP);
}

/*
 * Core device contract implementation entry points
 */
static contops_t contract_device_ops = {
        contract_device_free,           /* contop_free */
        contract_device_abandon,        /* contop_abandon */
        contract_device_destroy,        /* contop_destroy */
        contract_device_status,         /* contop_status */
        contract_device_ack,            /* contop_ack */
        contract_device_nack,           /* contop_nack */
        contract_qack_notsup,           /* contop_qack */
        contract_device_newct           /* contop_newct */
};

/*
 * contract_device_init
 *
 * Initializes the device contract type.
 */
void
contract_device_init(void)
{
        device_type = contract_type_init(CTT_DEVICE, "device",
            &contract_device_ops, contract_device_default);
}

/*
 * contract_device_create
 *
 * create a device contract given template "tmpl" and the "owner" process.
 * May fail and return NULL if project.max-contracts would have been exceeded.
 *
 * Common device contract creation routine called for both open-time and
 * non-open time device contract creation
 */
static cont_device_t *
contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type,
    proc_t *owner, int *errorp)
{
        cont_device_t *ctd;
        char *minor;
        char *path;
        dev_info_t *dip;

        ASSERT(dtmpl != NULL);
        ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE);
        ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK);
        ASSERT(errorp);

        *errorp = 0;

        path = kmem_alloc(MAXPATHLEN, KM_SLEEP);

        mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
        ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
        bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1);
        mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);

        dip = e_ddi_hold_devi_by_path(path, 0);
        if (dip == NULL) {
                cmn_err(CE_WARN, "contract_create: Cannot find devinfo node "
                    "for device path (%s)", path);
                kmem_free(path, MAXPATHLEN);
                *errorp = ERANGE;
                return (NULL);
        }

        /*
         * Lock out any parallel contract negotiations
         */
        mutex_enter(&(DEVI(dip)->devi_ct_lock));
        ct_barrier_acquire(dip);
        mutex_exit(&(DEVI(dip)->devi_ct_lock));

        minor = i_ddi_strdup(path, KM_SLEEP);
        kmem_free(path, MAXPATHLEN);

        (void) contract_type_pbundle(device_type, owner);

        ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP);

        /*
         * Only we hold a refernce to this contract. Safe to access
         * the fields without a ct_lock
         */
        ctd->cond_minor = minor;
        /*
         * It is safe to set the dip pointer in the contract
         * as the contract will always be destroyed before the dip
         * is released
         */
        ctd->cond_dip = dip;
        ctd->cond_devt = dev;
        ctd->cond_spec = spec_type;

        /*
         * Since we are able to lookup the device, it is either
         * online or degraded
         */
        ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ?
            CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE;

        mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
        ctd->cond_aset = dtmpl->ctd_aset;
        ctd->cond_noneg = dtmpl->ctd_noneg;

        /*
         * contract_ctor() initailizes the common portion of a contract
         * contract_dtor() destroys the common portion of a contract
         */
        if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl,
            ctd, 0, owner, B_TRUE)) {
                mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
                /*
                 * contract_device_free() destroys the type specific
                 * portion of a contract and frees the contract.
                 * The "minor" path and "cred" is a part of the type specific
                 * portion of the contract and will be freed by
                 * contract_device_free()
                 */
                contract_device_free(&ctd->cond_contract);

                /* release barrier */
                mutex_enter(&(DEVI(dip)->devi_ct_lock));
                ct_barrier_release(dip);
                mutex_exit(&(DEVI(dip)->devi_ct_lock));

                ddi_release_devi(dip);
                *errorp = EAGAIN;
                return (NULL);
        }
        mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);

        mutex_enter(&ctd->cond_contract.ct_lock);
        ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME;
        ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME;
        ctd->cond_contract.ct_ntime.ctm_start = -1;
        ctd->cond_contract.ct_qtime.ctm_start = -1;
        mutex_exit(&ctd->cond_contract.ct_lock);

        /*
         * Insert device contract into list hanging off the dip
         * Bump up the ref-count on the contract to reflect this
         */
        contract_hold(&ctd->cond_contract);
        mutex_enter(&(DEVI(dip)->devi_ct_lock));
        list_insert_tail(&(DEVI(dip)->devi_ct), ctd);

        /* release barrier */
        ct_barrier_release(dip);
        mutex_exit(&(DEVI(dip)->devi_ct_lock));

        ddi_release_devi(dip);

        return (ctd);
}

/*
 * Called when a device is successfully opened to create an open-time contract
 * i.e. synchronously with a device open.
 */
int
contract_device_open(dev_t dev, int spec_type, contract_t **ctpp)
{
        ctmpl_device_t *dtmpl;
        ct_template_t  *tmpl;
        cont_device_t *ctd;
        char *path;
        klwp_t *lwp;
        int error;

        if (ctpp)
                *ctpp = NULL;

        /*
         * Check if we are in user-context i.e. if we have an lwp
         */
        lwp = ttolwp(curthread);
        if (lwp == NULL) {
                CT_DEBUG((CE_NOTE, "contract_open: Not user-context"));
                return (0);
        }

        tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]);
        if (tmpl == NULL) {
                return (0);
        }
        dtmpl = tmpl->ctmpl_data;

        /*
         * If the user set a minor path in the template before an open,
         * ignore it. We use the minor path of the actual minor opened.
         */
        mutex_enter(&tmpl->ctmpl_lock);
        if (dtmpl->ctd_minor != NULL) {
                CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: "
                    "ignoring device minor path in active template: %s",
                    curproc->p_pid, dtmpl->ctd_minor));
                /*
                 * This is a copy of the actual activated template.
                 * Safe to make changes such as freeing the minor
                 * path in the template.
                 */
                kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
                dtmpl->ctd_minor = NULL;
        }
        mutex_exit(&tmpl->ctmpl_lock);

        path = kmem_alloc(MAXPATHLEN, KM_SLEEP);

        if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) {
                CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive "
                    "minor path from dev_t,spec {%lu, %d} for process (%d)",
                    dev, spec_type, curproc->p_pid));
                ctmpl_free(tmpl);
                kmem_free(path, MAXPATHLEN);
                return (1);
        }

        mutex_enter(&tmpl->ctmpl_lock);
        ASSERT(dtmpl->ctd_minor == NULL);
        dtmpl->ctd_minor = path;
        mutex_exit(&tmpl->ctmpl_lock);

        ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error);

        mutex_enter(&tmpl->ctmpl_lock);
        ASSERT(dtmpl->ctd_minor);
        dtmpl->ctd_minor = NULL;
        mutex_exit(&tmpl->ctmpl_lock);
        ctmpl_free(tmpl);
        kmem_free(path, MAXPATHLEN);

        if (ctd == NULL) {
                cmn_err(CE_NOTE, "contract_device_open(): Failed to "
                    "create device contract for process (%d) holding "
                    "device (devt = %lu, spec_type = %d)",
                    curproc->p_pid, dev, spec_type);
                return (1);
        }

        if (ctpp) {
                mutex_enter(&ctd->cond_contract.ct_lock);
                *ctpp = &ctd->cond_contract;
                mutex_exit(&ctd->cond_contract.ct_lock);
        }
        return (0);
}

/*
 * Called during contract negotiation by the device contract framework to wait
 * for ACKs or NACKs from contract holders. If all responses are not received
 * before a specified timeout, this routine times out.
 */
static uint_t
wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype)
{
        cont_device_t *ctd;
        int timed_out = 0;
        int result = CT_NONE;
        int ack;
        char *f = "wait_for_acks";

        ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
        ASSERT(dip);
        ASSERT(evtype & CT_DEV_ALLEVENT);
        ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
        ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
            (spec_type == S_IFBLK || spec_type == S_IFCHR));

        CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip));

        if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) {
                /*
                 * some contract owner(s) didn't respond in time
                 */
                CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip));
                timed_out = 1;
        }

        ack = 0;
        for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
            ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {

                mutex_enter(&ctd->cond_contract.ct_lock);

                ASSERT(ctd->cond_dip == dip);

                if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
                        mutex_exit(&ctd->cond_contract.ct_lock);
                        continue;
                }
                if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
                        mutex_exit(&ctd->cond_contract.ct_lock);
                        continue;
                }

                /* skip if non-negotiable contract */
                if (ctd->cond_noneg) {
                        mutex_exit(&ctd->cond_contract.ct_lock);
                        continue;
                }

                ASSERT(ctd->cond_currev_type == evtype);
                if (ctd->cond_currev_ack == CT_NACK) {
                        CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p",
                            f, (void *)dip));
                        mutex_exit(&ctd->cond_contract.ct_lock);
                        return (CT_NACK);
                } else if (ctd->cond_currev_ack == CT_ACK) {
                        ack = 1;
                        CT_DEBUG((CE_NOTE, "%s: found a ACK: %p",
                            f, (void *)dip));
                }
                mutex_exit(&ctd->cond_contract.ct_lock);
        }

        if (ack) {
                result = CT_ACK;
                CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip));
        } else if (timed_out) {
                result = CT_NONE;
                CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p",
                    f, (void *)dip));
        } else {
                CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p",
                    f, (void *)dip));
        }


        return (result);
}

/*
 * Determines the current state of a device (i.e a devinfo node
 */
static int
get_state(dev_info_t *dip)
{
        if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip))
                return (CT_DEV_EV_OFFLINE);
        else if (DEVI_IS_DEVICE_DEGRADED(dip))
                return (CT_DEV_EV_DEGRADED);
        else
                return (CT_DEV_EV_ONLINE);
}

/*
 * Sets the current state of a device in a device contract
 */
static void
set_cond_state(dev_info_t *dip)
{
        uint_t state = get_state(dip);
        cont_device_t *ctd;

        /* verify that barrier is held */
        ASSERT(ct_barrier_held(dip));

        for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
            ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
                mutex_enter(&ctd->cond_contract.ct_lock);
                ASSERT(ctd->cond_dip == dip);
                ctd->cond_state = state;
                mutex_exit(&ctd->cond_contract.ct_lock);
        }
}

/*
 * Core routine called by event-specific routines when an event occurs.
 * Determines if an event should be be published, and if it is to be
 * published, whether a negotiation should take place. Also implements
 * NEGEND events which publish the final disposition of an event after
 * negotiations are complete.
 *
 * When an event occurs on a minor node, this routine walks the list of
 * contracts hanging off a devinfo node and for each contract on the affected
 * dip, evaluates the following cases
 *
 *      a. an event that is synchronous, breaks the contract and NONEG not set
 *              - bumps up the outstanding negotiation counts on the dip
 *              - marks the dip as undergoing negotiation (devi_ct_neg)
 *              - event of type CTE_NEG is published
 *      b. an event that is synchronous, breaks the contract and NONEG is set
 *              - sets the final result to CT_NACK, event is blocked
 *              - does not publish an event
 *      c. event is asynchronous and breaks the contract
 *              - publishes a critical event irrespect of whether the NONEG
 *                flag is set, since the contract will be broken and contract
 *                owner needs to be informed.
 *      d. No contract breakage but the owner has subscribed to the event
 *              - publishes the event irrespective of the NONEG event as the
 *                owner has explicitly subscribed to the event.
 *      e. NEGEND event
 *              - publishes a critical event. Should only be doing this if
 *                if NONEG is not set.
 *      f. all other events
 *              - Since a contract is not broken and this event has not been
 *                subscribed to, this event does not need to be published for
 *                for this contract.
 *
 *      Once an event is published, what happens next depends on the type of
 *      event:
 *
 *      a. NEGEND event
 *              - cleanup all state associated with the preceding negotiation
 *                and return CT_ACK to the caller of contract_device_publish()
 *      b. NACKed event
 *              - One or more contracts had the NONEG term, so the event was
 *                blocked. Return CT_NACK to the caller.
 *      c. Negotiated event
 *              - Call wait_for_acks() to wait for responses from contract
 *              holders. The end result is either CT_ACK (event is permitted),
 *              CT_NACK (event is blocked) or CT_NONE (no contract owner)
 *              responded. This result is returned back to the caller.
 *      d. All other events
 *              - If the event was asynchronous (i.e. not negotiated) or
 *              a contract was not broken return CT_ACK to the caller.
 */
static uint_t
contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type,
    uint_t evtype, nvlist_t *tnvl)
{
        cont_device_t *ctd;
        uint_t result = CT_NONE;
        uint64_t evid = 0;
        uint64_t nevid = 0;
        char *path = NULL;
        int negend;
        int match;
        int sync = 0;
        contract_t *ct;
        ct_kevent_t *event;
        nvlist_t *nvl;
        int broken = 0;

        ASSERT(dip);
        ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
        ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
            (spec_type == S_IFBLK || spec_type == S_IFCHR));
        ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT));

        /* Is this a synchronous state change ? */
        if (evtype != CT_EV_NEGEND) {
                sync = is_sync_neg(get_state(dip), evtype);
                /* NOP if unsupported transition */
                if (sync == -2 || sync == -1) {
                        DEVI(dip)->devi_flags |= DEVI_CT_NOP;
                        result = (sync == -2) ? CT_ACK : CT_NONE;
                        goto out;
                }
                CT_DEBUG((CE_NOTE, "publish: is%s sync state change",
                    sync ? "" : " not"));
        } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) {
                DEVI(dip)->devi_flags &= ~DEVI_CT_NOP;
                result = CT_ACK;
                goto out;
        }

        path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
        (void) ddi_pathname(dip, path);

        mutex_enter(&(DEVI(dip)->devi_ct_lock));

        /*
         * Negotiation end - set the state of the device in the contract
         */
        if (evtype == CT_EV_NEGEND) {
                CT_DEBUG((CE_NOTE, "publish: negend: setting cond state"));
                set_cond_state(dip);
        }

        /*
         * If this device didn't go through negotiation, don't publish
         * a NEGEND event - simply release the barrier to allow other
         * device events in.
         */
        negend = 0;
        if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) {
                CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier"));
                ct_barrier_release(dip);
                mutex_exit(&(DEVI(dip)->devi_ct_lock));
                result = CT_ACK;
                goto out;
        } else if (evtype == CT_EV_NEGEND) {
                /*
                 * There are negotiated contract breakages that
                 * need a NEGEND event
                 */
                ASSERT(ct_barrier_held(dip));
                negend = 1;
                CT_DEBUG((CE_NOTE, "publish: setting negend flag"));
        } else {
                /*
                 * This is a new event, not a NEGEND event. Wait for previous
                 * contract events to complete.
                 */
                ct_barrier_acquire(dip);
        }


        match = 0;
        for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
            ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {

                ctid_t ctid;
                size_t len = strlen(path);

                mutex_enter(&ctd->cond_contract.ct_lock);

                ASSERT(ctd->cond_dip == dip);
                ASSERT(ctd->cond_minor);
                ASSERT(strncmp(ctd->cond_minor, path, len) == 0 &&
                    ctd->cond_minor[len] == ':');

                if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
                        mutex_exit(&ctd->cond_contract.ct_lock);
                        continue;
                }
                if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
                        mutex_exit(&ctd->cond_contract.ct_lock);
                        continue;
                }

                /* We have a matching contract */
                match = 1;
                ctid = ctd->cond_contract.ct_id;
                CT_DEBUG((CE_NOTE, "publish: found matching contract: %d",
                    ctid));

                /*
                 * There are 4 possible cases
                 * 1. A contract is broken (dev not in acceptable state) and
                 *    the state change is synchronous - start negotiation
                 *    by sending a CTE_NEG critical event.
                 * 2. A contract is broken and the state change is
                 *    asynchronous - just send a critical event and
                 *    break the contract.
                 * 3. Contract is not broken, but consumer has subscribed
                 *    to the event as a critical or informative event
                 *    - just send the appropriate event
                 * 4. contract waiting for negend event - just send the critical
                 *    NEGEND event.
                 */
                broken = 0;
                if (!negend && !(evtype & ctd->cond_aset)) {
                        broken = 1;
                        CT_DEBUG((CE_NOTE, "publish: Contract broken: %d",
                            ctid));
                }

                /*
                 * Don't send event if
                 *      - contract is not broken AND
                 *      - contract holder has not subscribed to this event AND
                 *      - contract not waiting for a NEGEND event
                 */
                if (!broken && !EVSENDP(ctd, evtype) &&
                    !ctd->cond_neg) {
                        CT_DEBUG((CE_NOTE, "contract_device_publish(): "
                            "contract (%d): no publish reqd: event %d",
                            ctd->cond_contract.ct_id, evtype));
                        mutex_exit(&ctd->cond_contract.ct_lock);
                        continue;
                }

                /*
                 * Note: need to kmem_zalloc() the event so mutexes are
                 * initialized automatically
                 */
                ct = &ctd->cond_contract;
                event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
                event->cte_type = evtype;

                if (broken && sync) {
                        CT_DEBUG((CE_NOTE, "publish: broken + sync: "
                            "ctid: %d", ctid));
                        ASSERT(!negend);
                        ASSERT(ctd->cond_currev_id == 0);
                        ASSERT(ctd->cond_currev_type == 0);
                        ASSERT(ctd->cond_currev_ack == 0);
                        ASSERT(ctd->cond_neg == 0);
                        if (ctd->cond_noneg) {
                                /* Nothing to publish. Event has been blocked */
                                CT_DEBUG((CE_NOTE, "publish: sync and noneg:"
                                    "not publishing blocked ev: ctid: %d",
                                    ctid));
                                result = CT_NACK;
                                kmem_free(event, sizeof (ct_kevent_t));
                                mutex_exit(&ctd->cond_contract.ct_lock);
                                continue;
                        }
                        event->cte_flags = CTE_NEG; /* critical neg. event */
                        ctd->cond_currev_type = event->cte_type;
                        ct_barrier_incr(dip);
                        DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */
                        ctd->cond_neg = 1;
                } else if (broken && !sync) {
                        CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d",
                            ctid));
                        ASSERT(!negend);
                        ASSERT(ctd->cond_currev_id == 0);
                        ASSERT(ctd->cond_currev_type == 0);
                        ASSERT(ctd->cond_currev_ack == 0);
                        ASSERT(ctd->cond_neg == 0);
                        event->cte_flags = 0; /* critical event */
                } else if (EVSENDP(ctd, event->cte_type)) {
                        CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d",
                            ctid));
                        ASSERT(!negend);
                        ASSERT(ctd->cond_currev_id == 0);
                        ASSERT(ctd->cond_currev_type == 0);
                        ASSERT(ctd->cond_currev_ack == 0);
                        ASSERT(ctd->cond_neg == 0);
                        event->cte_flags = EVINFOP(ctd, event->cte_type) ?
                            CTE_INFO : 0;
                } else if (ctd->cond_neg) {
                        CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid));
                        ASSERT(negend);
                        ASSERT(ctd->cond_noneg == 0);
                        nevid = ctd->cond_contract.ct_nevent ?
                            ctd->cond_contract.ct_nevent->cte_id : 0;
                        ASSERT(ctd->cond_currev_id == nevid);
                        event->cte_flags = 0;   /* NEGEND is always critical */
                        ctd->cond_currev_id = 0;
                        ctd->cond_currev_type = 0;
                        ctd->cond_currev_ack = 0;
                        ctd->cond_neg = 0;
                } else {
                        CT_DEBUG((CE_NOTE, "publish: not publishing event for "
                            "ctid: %d, evtype: %d",
                            ctd->cond_contract.ct_id, event->cte_type));
                        ASSERT(!negend);
                        ASSERT(ctd->cond_currev_id == 0);
                        ASSERT(ctd->cond_currev_type == 0);
                        ASSERT(ctd->cond_currev_ack == 0);
                        ASSERT(ctd->cond_neg == 0);
                        kmem_free(event, sizeof (ct_kevent_t));
                        mutex_exit(&ctd->cond_contract.ct_lock);
                        continue;
                }

                nvl = NULL;
                if (tnvl) {
                        VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0);
                        if (negend) {
                                int32_t newct = 0;
                                ASSERT(ctd->cond_noneg == 0);
                                VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid)
                                    == 0);
                                VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT,
                                    &newct) == 0);
                                VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
                                    newct == 1 ? 0 :
                                    ctd->cond_contract.ct_id) == 0);
                                CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d "
                                    "CTS_NEVID: %llu, CTS_NEWCT: %s",
                                    ctid, (unsigned long long)nevid,
                                    newct ? "success" : "failure"));

                        }
                }

                if (ctd->cond_neg) {
                        ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1);
                        ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1);
                        ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt();
                        ctd->cond_contract.ct_qtime.ctm_start =
                            ctd->cond_contract.ct_ntime.ctm_start;
                }

                /*
                 * by holding the dip's devi_ct_lock we ensure that
                 * all ACK/NACKs are held up until we have finished
                 * publishing to all contracts.
                 */
                mutex_exit(&ctd->cond_contract.ct_lock);
                evid = cte_publish_all(ct, event, nvl, NULL);
                mutex_enter(&ctd->cond_contract.ct_lock);

                if (ctd->cond_neg) {
                        ASSERT(!negend);
                        ASSERT(broken);
                        ASSERT(sync);
                        ASSERT(!ctd->cond_noneg);
                        CT_DEBUG((CE_NOTE, "publish: sync break, setting evid"
                            ": %d", ctid));
                        ctd->cond_currev_id = evid;
                } else if (negend) {
                        ctd->cond_contract.ct_ntime.ctm_start = -1;
                        ctd->cond_contract.ct_qtime.ctm_start = -1;
                }
                mutex_exit(&ctd->cond_contract.ct_lock);
        }

        /*
         * If "negend" set counter back to initial state (-1) so that
         * other events can be published. Also clear the negotiation flag
         * on dip.
         *
         * 0 .. n are used for counting.
         * -1 indicates counter is available for use.
         */
        if (negend) {
                /*
                 * devi_ct_count not necessarily 0. We may have
                 * timed out in which case, count will be non-zero.
                 */
                ct_barrier_release(dip);
                DEVI(dip)->devi_ct_neg = 0;
                CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p",
                    (void *)dip));
        } else if (DEVI(dip)->devi_ct_neg) {
                ASSERT(match);
                ASSERT(!ct_barrier_empty(dip));
                CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p",
                    DEVI(dip)->devi_ct_count, (void *)dip));
        } else {
                /*
                 * for non-negotiated events or subscribed events or no
                 * matching contracts
                 */
                ASSERT(ct_barrier_empty(dip));
                ASSERT(DEVI(dip)->devi_ct_neg == 0);
                CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: "
                    "dip=%p", (void *)dip));

                /*
                 * only this function when called from contract_device_negend()
                 * can reset the counter to READY state i.e. -1. This function
                 * is so called for every event whether a NEGEND event is needed
                 * or not, but the negend event is only published if the event
                 * whose end they signal is a negotiated event for the contract.
                 */
        }

        if (!match) {
                /* No matching contracts */
                CT_DEBUG((CE_NOTE, "publish: No matching contract"));
                result = CT_NONE;
        } else if (result == CT_NACK) {
                /* a non-negotiable contract exists and this is a neg. event */
                CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract"));
                (void) wait_for_acks(dip, dev, spec_type, evtype);
        } else if (DEVI(dip)->devi_ct_neg) {
                /* one or more contracts going through negotations  */
                CT_DEBUG((CE_NOTE, "publish: sync contract: waiting"));
                result = wait_for_acks(dip, dev, spec_type, evtype);
        } else {
                /* no negotiated contracts or no broken contracts or NEGEND */
                CT_DEBUG((CE_NOTE, "publish: async/no-break/negend"));
                result = CT_ACK;
        }

        /*
         * Release the lock only now so that the only point where we
         * drop the lock is in wait_for_acks(). This is so that we don't
         * miss cv_signal/cv_broadcast from contract holders
         */
        CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock"));
        mutex_exit(&(DEVI(dip)->devi_ct_lock));

out:
        nvlist_free(tnvl);
        if (path)
                kmem_free(path, MAXPATHLEN);


        CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result)));
        return (result);
}


/*
 * contract_device_offline
 *
 * Event publishing routine called by I/O framework when a device is offlined.
 */
ct_ack_t
contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type)
{
        nvlist_t *nvl;
        uint_t result;
        uint_t evtype;

        VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);

        evtype = CT_DEV_EV_OFFLINE;
        result = contract_device_publish(dip, dev, spec_type, evtype, nvl);

        /*
         * If a contract offline is NACKED, the framework expects us to call
         * NEGEND ourselves, since we know the final result
         */
        if (result == CT_NACK) {
                contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE);
        }

        return (result);
}

/*
 * contract_device_degrade
 *
 * Event publishing routine called by I/O framework when a device
 * moves to degrade state.
 */
/*ARGSUSED*/
void
contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type)
{
        nvlist_t *nvl;
        uint_t evtype;

        VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);

        evtype = CT_DEV_EV_DEGRADED;
        (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
}

/*
 * contract_device_undegrade
 *
 * Event publishing routine called by I/O framework when a device
 * moves from degraded state to online state.
 */
/*ARGSUSED*/
void
contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type)
{
        nvlist_t *nvl;
        uint_t evtype;

        VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);

        evtype = CT_DEV_EV_ONLINE;
        (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
}

/*
 * For all contracts which have undergone a negotiation (because the device
 * moved out of the acceptable state for that contract and the state
 * change is synchronous i.e. requires negotiation) this routine publishes
 * a CT_EV_NEGEND event with the final disposition of the event.
 *
 * This event is always a critical event.
 */
void
contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result)
{
        nvlist_t *nvl;
        uint_t evtype;

        ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE);

        CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, "
            "dip: %p", result, (void *)dip));

        VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
        VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
            result == CT_EV_SUCCESS ? 1 : 0) == 0);

        evtype = CT_EV_NEGEND;
        (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);

        CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p",
            (void *)dip));
}

/*
 * Wrapper routine called by other subsystems (such as LDI) to start
 * negotiations when a synchronous device state change occurs.
 * Returns CT_ACK or CT_NACK.
 */
ct_ack_t
contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type,
    uint_t evtype)
{
        int     result;

        ASSERT(dip);
        ASSERT(dev != NODEV);
        ASSERT(dev != DDI_DEV_T_ANY);
        ASSERT(dev != DDI_DEV_T_NONE);
        ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);

        result = CT_NACK;
        switch (evtype) {
        case CT_DEV_EV_OFFLINE:
                result = contract_device_offline(dip, dev, spec_type);
                break;
        default:
                cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation "
                    "not supported: event (%d) for dev_t (%lu) and spec (%d), "
                    "dip (%p)", evtype, dev, spec_type, (void *)dip);
                break;
        }

        return (result);
}

/*
 * A wrapper routine called by other subsystems (such as the LDI) to
 * finalize event processing for a state change event. For synchronous
 * state changes, this publishes NEGEND events. For asynchronous i.e.
 * non-negotiable events this publishes the event.
 */
void
contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type,
    uint_t evtype, int ct_result)
{
        ASSERT(dip);
        ASSERT(dev != NODEV);
        ASSERT(dev != DDI_DEV_T_ANY);
        ASSERT(dev != DDI_DEV_T_NONE);
        ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);

        switch (evtype) {
        case CT_DEV_EV_OFFLINE:
                contract_device_negend(dip, dev, spec_type, ct_result);
                break;
        case CT_DEV_EV_DEGRADED:
                contract_device_degrade(dip, dev, spec_type);
                contract_device_negend(dip, dev, spec_type, ct_result);
                break;
        case CT_DEV_EV_ONLINE:
                contract_device_undegrade(dip, dev, spec_type);
                contract_device_negend(dip, dev, spec_type, ct_result);
                break;
        default:
                cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported "
                    "event (%d) for dev_t (%lu) and spec (%d), dip (%p)",
                    evtype, dev, spec_type, (void *)dip);
                break;
        }
}

/*
 * Called by I/O framework when a devinfo node is freed to remove the
 * association between a devinfo node and its contracts.
 */
void
contract_device_remove_dip(dev_info_t *dip)
{
        cont_device_t *ctd;
        cont_device_t *next;
        contract_t *ct;

        mutex_enter(&(DEVI(dip)->devi_ct_lock));
        ct_barrier_wait_for_release(dip);

        for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) {
                next = list_next(&(DEVI(dip)->devi_ct), ctd);
                list_remove(&(DEVI(dip)->devi_ct), ctd);
                ct = &ctd->cond_contract;
                /*
                 * Unlink the dip associated with this contract
                 */
                mutex_enter(&ct->ct_lock);
                ASSERT(ctd->cond_dip == dip);
                ctd->cond_dip = NULL; /* no longer linked to dip */
                contract_rele(ct);      /* remove hold for dip linkage */
                CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: "
                    "ctid: %d", ct->ct_id));
                mutex_exit(&ct->ct_lock);
        }
        ASSERT(list_is_empty(&(DEVI(dip)->devi_ct)));
        mutex_exit(&(DEVI(dip)->devi_ct_lock));
}

/*
 * Barrier related routines
 */
static void
ct_barrier_acquire(dev_info_t *dip)
{
        ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
        CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier"));
        while (DEVI(dip)->devi_ct_count != -1)
                cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
        DEVI(dip)->devi_ct_count = 0;
        CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier"));
}

static void
ct_barrier_release(dev_info_t *dip)
{
        ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
        ASSERT(DEVI(dip)->devi_ct_count != -1);
        DEVI(dip)->devi_ct_count = -1;
        cv_broadcast(&(DEVI(dip)->devi_ct_cv));
        CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier"));
}

static int
ct_barrier_held(dev_info_t *dip)
{
        ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
        return (DEVI(dip)->devi_ct_count != -1);
}

static int
ct_barrier_empty(dev_info_t *dip)
{
        ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
        ASSERT(DEVI(dip)->devi_ct_count != -1);
        return (DEVI(dip)->devi_ct_count == 0);
}

static void
ct_barrier_wait_for_release(dev_info_t *dip)
{
        ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
        while (DEVI(dip)->devi_ct_count != -1)
                cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
}

static void
ct_barrier_decr(dev_info_t *dip)
{
        CT_DEBUG((CE_NOTE, "barrier_decr:  ct_count before decr: %d",
            DEVI(dip)->devi_ct_count));

        ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
        ASSERT(DEVI(dip)->devi_ct_count > 0);

        DEVI(dip)->devi_ct_count--;
        if (DEVI(dip)->devi_ct_count == 0) {
                cv_broadcast(&DEVI(dip)->devi_ct_cv);
                CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast"));
        }
}

static void
ct_barrier_incr(dev_info_t *dip)
{
        ASSERT(ct_barrier_held(dip));
        DEVI(dip)->devi_ct_count++;
}

static int
ct_barrier_wait_for_empty(dev_info_t *dip, int secs)
{
        clock_t abstime;

        ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));

        abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000);
        while (DEVI(dip)->devi_ct_count) {
                if (cv_timedwait(&(DEVI(dip)->devi_ct_cv),
                    &(DEVI(dip)->devi_ct_lock), abstime) == -1) {
                        return (-1);
                }
        }
        return (0);
}