root/usr/src/uts/common/fs/sockfs/sockstr.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
 */

#include <sys/types.h>
#include <sys/inttypes.h>
#include <sys/t_lock.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/buf.h>
#include <sys/conf.h>
#include <sys/cred.h>
#include <sys/kmem.h>
#include <sys/sysmacros.h>
#include <sys/vfs.h>
#include <sys/vnode.h>
#include <sys/debug.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/file.h>
#include <sys/user.h>
#include <sys/stream.h>
#include <sys/strsubr.h>
#include <sys/esunddi.h>
#include <sys/flock.h>
#include <sys/modctl.h>
#include <sys/vtrace.h>
#include <sys/strsun.h>
#include <sys/cmn_err.h>
#include <sys/proc.h>
#include <sys/ddi.h>

#include <sys/suntpi.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/socketvar.h>
#include <netinet/in.h>
#include <inet/common.h>
#include <inet/proto_set.h>

#include <sys/tiuser.h>
#define _SUN_TPI_VERSION        2
#include <sys/tihdr.h>

#include <c2/audit.h>

#include <fs/sockfs/socktpi.h>
#include <fs/sockfs/socktpi_impl.h>

int so_default_version = SOV_SOCKSTREAM;

#ifdef DEBUG
/* Set sockdebug to print debug messages when SO_DEBUG is set */
int sockdebug = 0;

/* Set sockprinterr to print error messages when SO_DEBUG is set */
int sockprinterr = 0;

/*
 * Set so_default_options to SO_DEBUG is all sockets should be created
 * with SO_DEBUG set. This is needed to get debug printouts from the
 * socket() call itself.
 */
int so_default_options = 0;
#endif /* DEBUG */

#ifdef SOCK_TEST
/*
 * Set to number of ticks to limit cv_waits for code coverage testing.
 * Set to 1000 when SO_DEBUG is set to 2.
 */
clock_t sock_test_timelimit = 0;
#endif /* SOCK_TEST */

/*
 * For concurrency testing of e.g. opening /dev/ip which does not
 * handle T_INFO_REQ messages.
 */
int so_no_tinfo = 0;

/*
 * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider
 * to simply ignore the T_CAPABILITY_REQ.
 */
clock_t sock_capability_timeout = 2;    /* seconds */

static int      do_tcapability(struct sonode *so, t_uscalar_t cap_bits1);
static void     so_removehooks(struct sonode *so);

static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
                strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
                strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
                strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
                strsigset_t *allmsgsigs, strpollset_t *pollwakeups);

/*
 * Convert a socket to a stream. Invoked when the illusory sockmod
 * is popped from the stream.
 * Change the stream head back to default operation without losing
 * any messages (T_conn_ind's are moved to the stream head queue).
 */
int
so_sock2stream(struct sonode *so)
{
        struct vnode            *vp = SOTOV(so);
        queue_t                 *rq;
        mblk_t                  *mp;
        int                     error = 0;
        sotpi_info_t            *sti = SOTOTPI(so);

        ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));

        mutex_enter(&so->so_lock);
        so_lock_single(so);

        ASSERT(so->so_version != SOV_STREAM);

        if (sti->sti_direct) {
                mblk_t **mpp;
                int rval;

                /*
                 * Tell the transport below that sockmod is being popped
                 */
                mutex_exit(&so->so_lock);
                error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(),
                    &rval);
                mutex_enter(&so->so_lock);
                if (error != 0) {
                        dprintso(so, 0, ("so_sock2stream(%p): "
                            "_SIOCSOCKFALLBACK failed\n", (void *)so));
                        goto exit;
                }
                sti->sti_direct = 0;

                for (mpp = &sti->sti_conn_ind_head; (mp = *mpp) != NULL;
                    mpp = &mp->b_next) {
                        struct T_conn_ind       *conn_ind;

                        /*
                         * strsock_proto() has already verified the length of
                         * this message block.
                         */
                        ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind));

                        conn_ind = (struct T_conn_ind *)mp->b_rptr;
                        if (conn_ind->OPT_length == 0 &&
                            conn_ind->OPT_offset == 0)
                                continue;

                        if (DB_REF(mp) > 1) {
                                mblk_t  *newmp;
                                size_t  length;
                                cred_t  *cr;
                                pid_t   cpid;
                                int error;      /* Dummy - error not returned */

                                /*
                                 * Copy the message block because it is used
                                 * elsewhere, too.
                                 * Can't use copyb since we want to wait
                                 * yet allow for EINTR.
                                 */
                                /* Round up size for reuse */
                                length = MAX(MBLKL(mp), 64);
                                cr = msg_getcred(mp, &cpid);
                                if (cr != NULL) {
                                        newmp = allocb_cred_wait(length, 0,
                                            &error, cr, cpid);
                                } else {
                                        newmp = allocb_wait(length, 0, 0,
                                            &error);
                                }
                                if (newmp == NULL) {
                                        error = EINTR;
                                        goto exit;
                                }
                                bcopy(mp->b_rptr, newmp->b_wptr, length);
                                newmp->b_wptr += length;
                                newmp->b_next = mp->b_next;

                                /*
                                 * Link the new message block into the queue
                                 * and free the old one.
                                 */
                                *mpp = newmp;
                                mp->b_next = NULL;
                                freemsg(mp);

                                mp = newmp;
                                conn_ind = (struct T_conn_ind *)mp->b_rptr;
                        }

                        /*
                         * Remove options added by TCP for accept fast-path.
                         */
                        conn_ind->OPT_length = 0;
                        conn_ind->OPT_offset = 0;
                }
        }

        so->so_version = SOV_STREAM;
        so->so_proto_handle = NULL;

        /*
         * Remove the hooks in the stream head to avoid queuing more
         * packets in sockfs.
         */
        mutex_exit(&so->so_lock);
        so_removehooks(so);
        mutex_enter(&so->so_lock);

        /*
         * Clear any state related to urgent data. Leave any T_EXDATA_IND
         * on the queue - the behavior of urgent data after a switch is
         * left undefined.
         */
        so->so_error = sti->sti_delayed_error = 0;
        freemsg(so->so_oobmsg);
        so->so_oobmsg = NULL;
        sti->sti_oobsigcnt = sti->sti_oobcnt = 0;

        so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
            SS_SAVEDEOR);
        ASSERT(so_verify_oobstate(so));

        freemsg(sti->sti_ack_mp);
        sti->sti_ack_mp = NULL;

        /*
         * Flush the T_DISCON_IND on sti_discon_ind_mp.
         */
        so_flush_discon_ind(so);

        /*
         * Move any queued T_CONN_IND messages to stream head queue.
         */
        rq = RD(strvp2wq(vp));
        while ((mp = sti->sti_conn_ind_head) != NULL) {
                sti->sti_conn_ind_head = mp->b_next;
                mp->b_next = NULL;
                if (sti->sti_conn_ind_head == NULL) {
                        ASSERT(sti->sti_conn_ind_tail == mp);
                        sti->sti_conn_ind_tail = NULL;
                }
                dprintso(so, 0,
                    ("so_sock2stream(%p): moving T_CONN_IND\n", (void *)so));

                /* Drop lock across put() */
                mutex_exit(&so->so_lock);
                put(rq, mp);
                mutex_enter(&so->so_lock);
        }

exit:
        ASSERT(MUTEX_HELD(&so->so_lock));
        so_unlock_single(so, SOLOCKED);
        mutex_exit(&so->so_lock);
        return (error);
}

/*
 * Covert a stream back to a socket. This is invoked when the illusory
 * sockmod is pushed on a stream (where the stream was "created" by
 * popping the illusory sockmod).
 * This routine can not recreate the socket state (certain aspects of
 * it like urgent data state and the bound/connected addresses for AF_UNIX
 * sockets can not be recreated by asking the transport for information).
 * Thus this routine implicitly assumes that the socket is in an initial
 * state (as if it was just created). It flushes any messages queued on the
 * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages.
 */
void
so_stream2sock(struct sonode *so)
{
        struct vnode *vp = SOTOV(so);
        sotpi_info_t *sti = SOTOTPI(so);

        ASSERT(MUTEX_HELD(&sti->sti_plumb_lock));

        mutex_enter(&so->so_lock);
        so_lock_single(so);
        ASSERT(so->so_version == SOV_STREAM);
        so->so_version = SOV_SOCKSTREAM;
        sti->sti_pushcnt = 0;
        mutex_exit(&so->so_lock);

        /*
         * Set a permenent error to force any thread in sorecvmsg to
         * return (and drop SOREADLOCKED). Clear the error once
         * we have SOREADLOCKED.
         * This makes a read sleeping during the I_PUSH of sockmod return
         * EIO.
         */
        strsetrerror(SOTOV(so), EIO, 1, NULL);

        /*
         * Get the read lock before flushing data to avoid
         * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg.
         */
        mutex_enter(&so->so_lock);
        (void) so_lock_read(so, 0);     /* Set SOREADLOCKED */
        mutex_exit(&so->so_lock);

        strsetrerror(SOTOV(so), 0, 0, NULL);
        so_installhooks(so);

        /*
         * Flush everything on the read queue.
         * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND
         * remain; those types of messages would confuse sockfs.
         */
        strflushrq(vp, FLUSHALL);
        mutex_enter(&so->so_lock);

        /*
         * Flush the T_DISCON_IND on sti_discon_ind_mp.
         */
        so_flush_discon_ind(so);
        so_unlock_read(so);     /* Clear SOREADLOCKED */

        so_unlock_single(so, SOLOCKED);
        mutex_exit(&so->so_lock);
}

/*
 * Install the hooks in the stream head.
 */
void
so_installhooks(struct sonode *so)
{
        struct vnode *vp = SOTOV(so);

        strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA,
            strsock_proto, strsock_misc);
        strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0);
}

/*
 * Remove the hooks in the stream head.
 */
static void
so_removehooks(struct sonode *so)
{
        struct vnode *vp = SOTOV(so);

        strsetrputhooks(vp, 0, NULL, NULL);
        strsetwputhooks(vp, 0, STRTIMOUT);
        /*
         * Leave read behavior as it would have been for a normal
         * stream i.e. a read of an M_PROTO will fail.
         */
}

void
so_basic_strinit(struct sonode *so)
{
        struct vnode *vp = SOTOV(so);
        struct stdata *stp;
        mblk_t *mp;
        sotpi_info_t *sti = SOTOTPI(so);

        /* Preallocate an unbind_req message */
        mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP, CRED());
        mutex_enter(&so->so_lock);
        sti->sti_unbind_mp = mp;
#ifdef DEBUG
        so->so_options = so_default_options;
#endif /* DEBUG */
        mutex_exit(&so->so_lock);

        so_installhooks(so);

        stp = vp->v_stream;
        /*
         * Have to keep minpsz at zero in order to allow write/send of zero
         * bytes.
         */
        mutex_enter(&stp->sd_lock);
        if (stp->sd_qn_minpsz == 1)
                stp->sd_qn_minpsz = 0;
        mutex_exit(&stp->sd_lock);
}

/*
 * Initialize the streams side of a socket including
 * T_info_req/ack processing. If tso is not NULL its values are used thereby
 * avoiding the T_INFO_REQ.
 */
int
so_strinit(struct sonode *so, struct sonode *tso)
{
        sotpi_info_t *sti = SOTOTPI(so);
        sotpi_info_t *tsti;
        int error;

        so_basic_strinit(so);

        /*
         * The T_CAPABILITY_REQ should be the first message sent down because
         * at least TCP has a fast-path for this which avoids timeouts while
         * waiting for the T_CAPABILITY_ACK under high system load.
         */
        if (tso == NULL) {
                error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO);
                if (error)
                        return (error);
        } else {
                tsti = SOTOTPI(tso);

                mutex_enter(&so->so_lock);
                sti->sti_tsdu_size = tsti->sti_tsdu_size;
                sti->sti_etsdu_size = tsti->sti_etsdu_size;
                sti->sti_addr_size = tsti->sti_addr_size;
                sti->sti_opt_size = tsti->sti_opt_size;
                sti->sti_tidu_size = tsti->sti_tidu_size;
                sti->sti_serv_type = tsti->sti_serv_type;
                so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
                mutex_exit(&so->so_lock);

                /* the following do_tcapability may update so->so_mode */
                if ((tsti->sti_serv_type != T_CLTS) &&
                    (sti->sti_direct == 0)) {
                        error = do_tcapability(so, TC1_ACCEPTOR_ID);
                        if (error)
                                return (error);
                }
        }
        /*
         * If the addr_size is 0 we treat it as already bound
         * and connected. This is used by the routing socket.
         * We set the addr_size to something to allocate a the address
         * structures.
         */
        if (sti->sti_addr_size == 0) {
                so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
                /* Address size can vary with address families. */
                if (so->so_family == AF_INET6)
                        sti->sti_addr_size =
                            (t_scalar_t)sizeof (struct sockaddr_in6);
                else
                        sti->sti_addr_size =
                            (t_scalar_t)sizeof (struct sockaddr_in);
                ASSERT(sti->sti_unbind_mp);
        }

        so_alloc_addr(so, sti->sti_addr_size);

        return (0);
}

static void
copy_tinfo(struct sonode *so, struct T_info_ack *tia)
{
        sotpi_info_t *sti = SOTOTPI(so);

        sti->sti_tsdu_size = tia->TSDU_size;
        sti->sti_etsdu_size = tia->ETSDU_size;
        sti->sti_addr_size = tia->ADDR_size;
        sti->sti_opt_size = tia->OPT_size;
        sti->sti_tidu_size = tia->TIDU_size;
        sti->sti_serv_type = tia->SERV_type;
        switch (tia->CURRENT_state) {
        case TS_UNBND:
                break;
        case TS_IDLE:
                so->so_state |= SS_ISBOUND;
                sti->sti_laddr_len = 0;
                sti->sti_laddr_valid = 0;
                break;
        case TS_DATA_XFER:
                so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
                sti->sti_laddr_len = 0;
                sti->sti_faddr_len = 0;
                sti->sti_laddr_valid = 0;
                sti->sti_faddr_valid = 0;
                break;
        }

        /*
         * Heuristics for determining the socket mode flags
         * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING,
         * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
         * from the info ack.
         */
        if (sti->sti_serv_type == T_CLTS) {
                so->so_mode |= SM_ATOMIC | SM_ADDR;
        } else {
                so->so_mode |= SM_CONNREQUIRED;
                if (sti->sti_etsdu_size != 0 && sti->sti_etsdu_size != -2)
                        so->so_mode |= SM_EXDATA;
        }
        if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
                /* Semantics are to discard tail end of messages */
                so->so_mode |= SM_ATOMIC;
        }
        if (so->so_family == AF_UNIX) {
                so->so_mode |= SM_FDPASSING | SM_OPTDATA;
                if (sti->sti_addr_size == -1) {
                        /* MAXPATHLEN + soun_family + nul termination */
                        sti->sti_addr_size = (t_scalar_t)(MAXPATHLEN +
                            sizeof (short) + 1);
                }
                if (so->so_type == SOCK_STREAM) {
                        /*
                         * Make it into a byte-stream transport.
                         * SOCK_SEQPACKET sockets are unchanged.
                         */
                        sti->sti_tsdu_size = 0;
                }
        } else if (sti->sti_addr_size == -1) {
                /*
                 * Logic extracted from sockmod - have to pick some max address
                 * length in order to preallocate the addresses.
                 */
                sti->sti_addr_size = SOA_DEFSIZE;
        }
        if (sti->sti_tsdu_size == 0)
                so->so_mode |= SM_BYTESTREAM;
}

static int
check_tinfo(struct sonode *so)
{
        sotpi_info_t *sti = SOTOTPI(so);

        /* Consistency checks */
        if (so->so_type == SOCK_DGRAM && sti->sti_serv_type != T_CLTS) {
                eprintso(so, ("service type and socket type mismatch\n"));
                eprintsoline(so, EPROTO);
                return (EPROTO);
        }
        if (so->so_type == SOCK_STREAM && sti->sti_serv_type == T_CLTS) {
                eprintso(so, ("service type and socket type mismatch\n"));
                eprintsoline(so, EPROTO);
                return (EPROTO);
        }
        if (so->so_type == SOCK_SEQPACKET && sti->sti_serv_type == T_CLTS) {
                eprintso(so, ("service type and socket type mismatch\n"));
                eprintsoline(so, EPROTO);
                return (EPROTO);
        }
        if (so->so_family == AF_INET &&
            sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
                eprintso(so,
                    ("AF_INET must have sockaddr_in address length. Got %d\n",
                    sti->sti_addr_size));
                eprintsoline(so, EMSGSIZE);
                return (EMSGSIZE);
        }
        if (so->so_family == AF_INET6 &&
            sti->sti_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
                eprintso(so,
                    ("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
                    sti->sti_addr_size));
                eprintsoline(so, EMSGSIZE);
                return (EMSGSIZE);
        }

        dprintso(so, 1, (
            "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
            sti->sti_serv_type, sti->sti_tsdu_size, sti->sti_etsdu_size,
            sti->sti_addr_size, sti->sti_opt_size,
            sti->sti_tidu_size));
        dprintso(so, 1, ("tinfo: so_state %s\n",
            pr_state(so->so_state, so->so_mode)));
        return (0);
}

/*
 * Send down T_info_req and wait for the ack.
 * Record interesting T_info_ack values in the sonode.
 */
static int
do_tinfo(struct sonode *so)
{
        struct T_info_req tir;
        mblk_t *mp;
        int error;

        ASSERT(MUTEX_NOT_HELD(&so->so_lock));

        if (so_no_tinfo) {
                SOTOTPI(so)->sti_addr_size = 0;
                return (0);
        }

        dprintso(so, 1, ("do_tinfo(%p)\n", (void *)so));

        /* Send T_INFO_REQ */
        tir.PRIM_type = T_INFO_REQ;
        mp = soallocproto1(&tir, sizeof (tir),
            sizeof (struct T_info_req) + sizeof (struct T_info_ack),
            _ALLOC_INTR, CRED());
        if (mp == NULL) {
                eprintsoline(so, ENOBUFS);
                return (ENOBUFS);
        }
        /* T_INFO_REQ has to be M_PCPROTO */
        DB_TYPE(mp) = M_PCPROTO;

        error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
            MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
        if (error) {
                eprintsoline(so, error);
                return (error);
        }
        mutex_enter(&so->so_lock);
        /* Wait for T_INFO_ACK */
        if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK,
            (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) {
                mutex_exit(&so->so_lock);
                eprintsoline(so, error);
                return (error);
        }

        ASSERT(mp);
        copy_tinfo(so, (struct T_info_ack *)mp->b_rptr);
        mutex_exit(&so->so_lock);
        freemsg(mp);
        return (check_tinfo(so));
}

/*
 * Send down T_capability_req and wait for the ack.
 * Record interesting T_capability_ack values in the sonode.
 */
static int
do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
{
        struct T_capability_req tcr;
        struct T_capability_ack *tca;
        mblk_t *mp;
        int error;
        sotpi_info_t *sti = SOTOTPI(so);

        ASSERT(cap_bits1 != 0);
        ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
        ASSERT(MUTEX_NOT_HELD(&so->so_lock));

        if (sti->sti_provinfo->tpi_capability == PI_NO)
                return (do_tinfo(so));

        if (so_no_tinfo) {
                sti->sti_addr_size = 0;
                if ((cap_bits1 &= ~TC1_INFO) == 0)
                        return (0);
        }

        dprintso(so, 1, ("do_tcapability(%p)\n", (void *)so));

        /* Send T_CAPABILITY_REQ */
        tcr.PRIM_type = T_CAPABILITY_REQ;
        tcr.CAP_bits1 = cap_bits1;
        mp = soallocproto1(&tcr, sizeof (tcr),
            sizeof (struct T_capability_req) + sizeof (struct T_capability_ack),
            _ALLOC_INTR, CRED());
        if (mp == NULL) {
                eprintsoline(so, ENOBUFS);
                return (ENOBUFS);
        }
        /* T_CAPABILITY_REQ should be M_PCPROTO here */
        DB_TYPE(mp) = M_PCPROTO;

        error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
            MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
        if (error) {
                eprintsoline(so, error);
                return (error);
        }
        mutex_enter(&so->so_lock);
        /* Wait for T_CAPABILITY_ACK */
        if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
            (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
                mutex_exit(&so->so_lock);
                PI_PROVLOCK(sti->sti_provinfo);
                if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW)
                        sti->sti_provinfo->tpi_capability = PI_NO;
                PI_PROVUNLOCK(sti->sti_provinfo);
                ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
                if (cap_bits1 & TC1_INFO) {
                        /*
                         * If the T_CAPABILITY_REQ timed out and then a
                         * T_INFO_REQ gets a protocol error, most likely
                         * the capability was slow (vs. unsupported). Return
                         * ENOSR for this case as a best guess.
                         */
                        if (error == ETIME) {
                                return ((error = do_tinfo(so)) == EPROTO ?
                                    ENOSR : error);
                        }
                        return (do_tinfo(so));
                }
                return (0);
        }

        ASSERT(mp);
        tca = (struct T_capability_ack *)mp->b_rptr;

        ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
        so_proc_tcapability_ack(so, tca);

        cap_bits1 = tca->CAP_bits1;

        mutex_exit(&so->so_lock);
        freemsg(mp);

        if (cap_bits1 & TC1_INFO)
                return (check_tinfo(so));

        return (0);
}

/*
 * Process a T_CAPABILITY_ACK
 */
void
so_proc_tcapability_ack(struct sonode *so, struct T_capability_ack *tca)
{
        sotpi_info_t *sti = SOTOTPI(so);

        if (sti->sti_provinfo->tpi_capability == PI_DONTKNOW) {
                PI_PROVLOCK(sti->sti_provinfo);
                sti->sti_provinfo->tpi_capability = PI_YES;
                PI_PROVUNLOCK(sti->sti_provinfo);
        }

        if (tca->CAP_bits1 & TC1_ACCEPTOR_ID) {
                sti->sti_acceptor_id = tca->ACCEPTOR_id;
                so->so_mode |= SM_ACCEPTOR_ID;
        }

        if (tca->CAP_bits1 & TC1_INFO)
                copy_tinfo(so, &tca->INFO_ack);
}

/*
 * Retrieve socket error, clear error if not peek.
 */
int
sogeterr(struct sonode *so, boolean_t clear_err)
{
        int error;

        ASSERT(MUTEX_HELD(&so->so_lock));

        error = so->so_error;
        if (clear_err)
                so->so_error = 0;

        return (error);
}

/*
 * This routine is registered with the stream head to retrieve read
 * side errors.
 * It does not clear the socket error for a peeking read side operation.
 * It the error is to be cleared it sets *clearerr.
 */
int
sogetrderr(vnode_t *vp, int ispeek, int *clearerr)
{
        struct sonode *so = VTOSO(vp);
        int error;

        mutex_enter(&so->so_lock);
        if (ispeek) {
                error = so->so_error;
                *clearerr = 0;
        } else {
                error = so->so_error;
                so->so_error = 0;
                *clearerr = 1;
        }
        mutex_exit(&so->so_lock);
        return (error);
}

/*
 * This routine is registered with the stream head to retrieve write
 * side errors.
 * It does not clear the socket error for a peeking read side operation.
 * It the error is to be cleared it sets *clearerr.
 */
int
sogetwrerr(vnode_t *vp, int ispeek, int *clearerr)
{
        struct sonode *so = VTOSO(vp);
        int error;

        mutex_enter(&so->so_lock);
        if (so->so_state & SS_CANTSENDMORE) {
                error = EPIPE;
                *clearerr = 0;
        } else {
                error = so->so_error;
                if (ispeek) {
                        *clearerr = 0;
                } else {
                        so->so_error = 0;
                        *clearerr = 1;
                }
        }
        mutex_exit(&so->so_lock);
        return (error);
}

/*
 * Set a nonpersistent read and write error on the socket.
 * Used when there is a T_uderror_ind for a connected socket.
 * The caller also needs to call strsetrerror and strsetwerror
 * after dropping the lock.
 */
void
soseterror(struct sonode *so, int error)
{
        ASSERT(error != 0);

        ASSERT(MUTEX_HELD(&so->so_lock));
        so->so_error = (ushort_t)error;
}

void
soisconnecting(struct sonode *so)
{
        ASSERT(MUTEX_HELD(&so->so_lock));
        so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
        so->so_state |= SS_ISCONNECTING;
        cv_broadcast(&so->so_state_cv);
}

void
soisconnected(struct sonode *so)
{
        ASSERT(MUTEX_HELD(&so->so_lock));
        so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
        so->so_state |= SS_ISCONNECTED;
        cv_broadcast(&so->so_state_cv);
}

/*
 * The caller also needs to call strsetrerror, strsetwerror and strseteof.
 */
void
soisdisconnected(struct sonode *so, int error)
{
        ASSERT(MUTEX_HELD(&so->so_lock));
        so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
        so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
        so->so_error = (ushort_t)error;
        if (so->so_peercred != NULL) {
                crfree(so->so_peercred);
                so->so_peercred = NULL;
        }
        cv_broadcast(&so->so_state_cv);
}

/*
 * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes.
 * Does not affect write side.
 * The caller also has to call strsetrerror.
 */
static void
sobreakconn(struct sonode *so, int error)
{
        ASSERT(MUTEX_HELD(&so->so_lock));
        so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
        so->so_error = (ushort_t)error;
        cv_broadcast(&so->so_state_cv);
}

/*
 * Can no longer send.
 * Caller must also call strsetwerror.
 *
 * We mark the peer address as no longer valid for getpeername, but
 * leave it around for so_unix_close to notify the peer (that
 * transport has no addressing held at that layer).
 */
void
socantsendmore(struct sonode *so)
{
        ASSERT(MUTEX_HELD(&so->so_lock));
        so->so_state |= SS_CANTSENDMORE;
        cv_broadcast(&so->so_state_cv);
}

/*
 * The caller must call strseteof(,1) as well as this routine
 * to change the socket state.
 */
void
socantrcvmore(struct sonode *so)
{
        ASSERT(MUTEX_HELD(&so->so_lock));
        so->so_state |= SS_CANTRCVMORE;
        cv_broadcast(&so->so_state_cv);
}

/*
 * The caller has sent down a "request_prim" primitive and wants to wait for
 * an ack ("ack_prim") or an T_ERROR_ACK for it.
 * The specified "ack_prim" can be a T_OK_ACK.
 *
 * Assumes that all the TPI acks are M_PCPROTO messages.
 *
 * Note that the socket is single-threaded (using so_lock_single)
 * for all operations that generate TPI ack messages. Since
 * only TPI ack messages are M_PCPROTO we should never receive
 * anything except either the ack we are expecting or a T_ERROR_ACK
 * for the same primitive.
 */
int
sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
            t_uscalar_t min_size, mblk_t **mpp, clock_t wait)
{
        mblk_t *mp;
        union T_primitives *tpr;
        int error;

        dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n",
            (void *)so, request_prim, ack_prim, min_size, (void *)mpp, wait));

        ASSERT(MUTEX_HELD(&so->so_lock));

        error = sowaitack(so, &mp, wait);
        if (error)
                return (error);

        dprintso(so, 1, ("got msg %p\n", (void *)mp));
        if (DB_TYPE(mp) != M_PCPROTO ||
            MBLKL(mp) < sizeof (tpr->type)) {
                freemsg(mp);
                eprintsoline(so, EPROTO);
                return (EPROTO);
        }
        tpr = (union T_primitives *)mp->b_rptr;
        /*
         * Did we get the primitive that we were asking for?
         * For T_OK_ACK we also check that it matches the request primitive.
         */
        if (tpr->type == ack_prim &&
            (ack_prim != T_OK_ACK ||
            tpr->ok_ack.CORRECT_prim == request_prim)) {
                if (MBLKL(mp) >= (ssize_t)min_size) {
                        /* Found what we are looking for */
                        *mpp = mp;
                        return (0);
                }
                /* Too short */
                freemsg(mp);
                eprintsoline(so, EPROTO);
                return (EPROTO);
        }

        if (tpr->type == T_ERROR_ACK &&
            tpr->error_ack.ERROR_prim == request_prim) {
                /* Error to the primitive we were looking for */
                if (tpr->error_ack.TLI_error == TSYSERR) {
                        error = tpr->error_ack.UNIX_error;
                } else {
                        error = proto_tlitosyserr(tpr->error_ack.TLI_error);
                }
                dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
                    tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
                    tpr->error_ack.UNIX_error, error));
                freemsg(mp);
                return (error);
        }
        /*
         * Wrong primitive or T_ERROR_ACK for the wrong primitive
         */
#ifdef DEBUG
        if (tpr->type == T_ERROR_ACK) {
                dprintso(so, 0, ("error_ack for %d: %d/%d\n",
                    tpr->error_ack.ERROR_prim, tpr->error_ack.TLI_error,
                    tpr->error_ack.UNIX_error));
        } else if (tpr->type == T_OK_ACK) {
                dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
                    tpr->ok_ack.CORRECT_prim, ack_prim, request_prim));
        } else {
                dprintso(so, 0,
                    ("unexpected primitive %d, expected %d for %d\n",
                    tpr->type, ack_prim, request_prim));
        }
#endif /* DEBUG */

        freemsg(mp);
        eprintsoline(so, EPROTO);
        return (EPROTO);
}

/*
 * Wait for a T_OK_ACK for the specified primitive.
 */
int
sowaitokack(struct sonode *so, t_scalar_t request_prim)
{
        mblk_t *mp;
        int error;

        error = sowaitprim(so, request_prim, T_OK_ACK,
            (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0);
        if (error)
                return (error);
        freemsg(mp);
        return (0);
}

/*
 * Queue a received TPI ack message on sti_ack_mp.
 */
void
soqueueack(struct sonode *so, mblk_t *mp)
{
        sotpi_info_t *sti = SOTOTPI(so);

        if (DB_TYPE(mp) != M_PCPROTO) {
                zcmn_err(getzoneid(), CE_WARN,
                    "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
                    *(t_scalar_t *)mp->b_rptr);
                freemsg(mp);
                return;
        }

        mutex_enter(&so->so_lock);
        if (sti->sti_ack_mp != NULL) {
                dprintso(so, 1, ("sti_ack_mp already set\n"));
                freemsg(sti->sti_ack_mp);
                sti->sti_ack_mp = NULL;
        }
        sti->sti_ack_mp = mp;
        cv_broadcast(&sti->sti_ack_cv);
        mutex_exit(&so->so_lock);
}

/*
 * Wait for a TPI ack ignoring signals and errors.
 */
int
sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
{
        sotpi_info_t *sti = SOTOTPI(so);

        ASSERT(MUTEX_HELD(&so->so_lock));

        while (sti->sti_ack_mp == NULL) {
#ifdef SOCK_TEST
                if (wait == 0 && sock_test_timelimit != 0)
                        wait = sock_test_timelimit;
#endif
                if (wait != 0) {
                        /*
                         * Only wait for the time limit.
                         */
                        if (cv_reltimedwait(&sti->sti_ack_cv, &so->so_lock,
                            wait, TR_CLOCK_TICK) == -1) {
                                eprintsoline(so, ETIME);
                                return (ETIME);
                        }
                }
                else
                        cv_wait(&sti->sti_ack_cv, &so->so_lock);
        }
        *mpp = sti->sti_ack_mp;
#ifdef DEBUG
        {
                union T_primitives *tpr;
                mblk_t *mp = *mpp;

                tpr = (union T_primitives *)mp->b_rptr;
                ASSERT(DB_TYPE(mp) == M_PCPROTO);
                ASSERT(tpr->type == T_OK_ACK ||
                    tpr->type == T_ERROR_ACK ||
                    tpr->type == T_BIND_ACK ||
                    tpr->type == T_CAPABILITY_ACK ||
                    tpr->type == T_INFO_ACK ||
                    tpr->type == T_OPTMGMT_ACK);
        }
#endif /* DEBUG */
        sti->sti_ack_mp = NULL;
        return (0);
}

/*
 * Queue a received T_CONN_IND message on sti_conn_ind_head/tail.
 */
void
soqueueconnind(struct sonode *so, mblk_t *mp)
{
        sotpi_info_t *sti = SOTOTPI(so);

        if (DB_TYPE(mp) != M_PROTO) {
                zcmn_err(getzoneid(), CE_WARN,
                    "sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
                freemsg(mp);
                return;
        }

        mutex_enter(&so->so_lock);
        ASSERT(mp->b_next == NULL);
        if (sti->sti_conn_ind_head == NULL) {
                sti->sti_conn_ind_head = mp;
        } else {
                ASSERT(sti->sti_conn_ind_tail->b_next == NULL);
                sti->sti_conn_ind_tail->b_next = mp;
        }
        sti->sti_conn_ind_tail = mp;
        /* Wakeup a single consumer of the T_CONN_IND */
        cv_signal(&so->so_acceptq_cv);
        mutex_exit(&so->so_lock);
}

/*
 * Wait for a T_CONN_IND.
 * Don't wait if nonblocking.
 * Accept signals and socket errors.
 */
int
sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
{
        mblk_t *mp;
        sotpi_info_t *sti = SOTOTPI(so);
        int error = 0;

        ASSERT(MUTEX_NOT_HELD(&so->so_lock));
        mutex_enter(&so->so_lock);
check_error:
        if (so->so_error) {
                error = sogeterr(so, B_TRUE);
                if (error) {
                        mutex_exit(&so->so_lock);
                        return (error);
                }
        }

        if (sti->sti_conn_ind_head == NULL) {
                if (fmode & (FNDELAY|FNONBLOCK)) {
                        error = EWOULDBLOCK;
                        goto done;
                }

                if (so->so_state & SS_CLOSING) {
                        error = EINTR;
                        goto done;
                }

                if (!cv_wait_sig_swap(&so->so_acceptq_cv, &so->so_lock)) {
                        error = EINTR;
                        goto done;
                }
                goto check_error;
        }
        mp = sti->sti_conn_ind_head;
        sti->sti_conn_ind_head = mp->b_next;
        mp->b_next = NULL;
        if (sti->sti_conn_ind_head == NULL) {
                ASSERT(sti->sti_conn_ind_tail == mp);
                sti->sti_conn_ind_tail = NULL;
        }
        *mpp = mp;
done:
        mutex_exit(&so->so_lock);
        return (error);
}

/*
 * Flush a T_CONN_IND matching the sequence number from the list.
 * Return zero if found; non-zero otherwise.
 * This is called very infrequently thus it is ok to do a linear search.
 */
int
soflushconnind(struct sonode *so, t_scalar_t seqno)
{
        mblk_t *prevmp, *mp;
        struct T_conn_ind *tci;
        sotpi_info_t *sti = SOTOTPI(so);

        mutex_enter(&so->so_lock);
        for (prevmp = NULL, mp = sti->sti_conn_ind_head; mp != NULL;
            prevmp = mp, mp = mp->b_next) {
                tci = (struct T_conn_ind *)mp->b_rptr;
                if (tci->SEQ_number == seqno) {
                        dprintso(so, 1,
                            ("t_discon_ind: found T_CONN_IND %d\n", seqno));
                        /* Deleting last? */
                        if (sti->sti_conn_ind_tail == mp) {
                                sti->sti_conn_ind_tail = prevmp;
                        }
                        if (prevmp == NULL) {
                                /* Deleting first */
                                sti->sti_conn_ind_head = mp->b_next;
                        } else {
                                prevmp->b_next = mp->b_next;
                        }
                        mp->b_next = NULL;

                        ASSERT((sti->sti_conn_ind_head == NULL &&
                            sti->sti_conn_ind_tail == NULL) ||
                            (sti->sti_conn_ind_head != NULL &&
                            sti->sti_conn_ind_tail != NULL));

                        so->so_error = ECONNABORTED;
                        mutex_exit(&so->so_lock);

                        freemsg(mp);
                        return (0);
                }
        }
        mutex_exit(&so->so_lock);
        dprintso(so, 1, ("t_discon_ind: NOT found T_CONN_IND %d\n", seqno));
        return (-1);
}

/*
 * Wait until the socket is connected or there is an error.
 * fmode should contain any nonblocking flags. nosig should be
 * set if the caller does not want the wait to be interrupted by a signal.
 */
int
sowaitconnected(struct sonode *so, int fmode, int nosig)
{
        int error;

        ASSERT(MUTEX_HELD(&so->so_lock));

        while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
            SS_ISCONNECTING && so->so_error == 0) {

                dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n",
                    (void *)so));
                if (fmode & (FNDELAY|FNONBLOCK))
                        return (EINPROGRESS);

                if (so->so_state & SS_CLOSING)
                        return (EINTR);

                if (nosig)
                        cv_wait(&so->so_state_cv, &so->so_lock);
                else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
                        /*
                         * Return EINTR and let the application use
                         * nonblocking techniques for detecting when
                         * the connection has been established.
                         */
                        return (EINTR);
                }
                dprintso(so, 1, ("awoken on %p\n", (void *)so));
        }

        if (so->so_error != 0) {
                error = sogeterr(so, B_TRUE);
                ASSERT(error != 0);
                dprintso(so, 1, ("sowaitconnected: error %d\n", error));
                return (error);
        }
        if (!(so->so_state & SS_ISCONNECTED)) {
                /*
                 * Could have received a T_ORDREL_IND or a T_DISCON_IND with
                 * zero errno. Or another thread could have consumed so_error
                 * e.g. by calling read.
                 */
                error = ECONNREFUSED;
                dprintso(so, 1, ("sowaitconnected: error %d\n", error));
                return (error);
        }
        return (0);
}


/*
 * Handle the signal generation aspect of urgent data.
 */
static void
so_oob_sig(struct sonode *so, int extrasig,
    strsigset_t *signals, strpollset_t *pollwakeups)
{
        sotpi_info_t *sti = SOTOTPI(so);

        ASSERT(MUTEX_HELD(&so->so_lock));

        ASSERT(so_verify_oobstate(so));
        ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
        if (sti->sti_oobsigcnt > sti->sti_oobcnt) {
                /*
                 * Signal has already been generated once for this
                 * urgent "event". However, since TCP can receive updated
                 * urgent pointers we still generate a signal.
                 */
                ASSERT(so->so_state & SS_OOBPEND);
                if (extrasig) {
                        *signals |= S_RDBAND;
                        *pollwakeups |= POLLRDBAND;
                }
                return;
        }

        sti->sti_oobsigcnt++;
        ASSERT(sti->sti_oobsigcnt > 0); /* Wraparound */
        ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);

        /*
         * Record (for select/poll) that urgent data is pending.
         */
        so->so_state |= SS_OOBPEND;
        /*
         * New urgent data on the way so forget about any old
         * urgent data.
         */
        so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
        if (so->so_oobmsg != NULL) {
                dprintso(so, 1, ("sock: discarding old oob\n"));
                freemsg(so->so_oobmsg);
                so->so_oobmsg = NULL;
        }
        *signals |= S_RDBAND;
        *pollwakeups |= POLLRDBAND;
        ASSERT(so_verify_oobstate(so));
}

/*
 * Handle the processing of the T_EXDATA_IND with urgent data.
 * Returns the T_EXDATA_IND if it should be queued on the read queue.
 */
/* ARGSUSED2 */
static mblk_t *
so_oob_exdata(struct sonode *so, mblk_t *mp,
        strsigset_t *signals, strpollset_t *pollwakeups)
{
        sotpi_info_t *sti = SOTOTPI(so);

        ASSERT(MUTEX_HELD(&so->so_lock));

        ASSERT(so_verify_oobstate(so));

        ASSERT(sti->sti_oobsigcnt > sti->sti_oobcnt);

        sti->sti_oobcnt++;
        ASSERT(sti->sti_oobcnt > 0);    /* wraparound? */
        ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);

        /*
         * Set MSGMARK for SIOCATMARK.
         */
        mp->b_flag |= MSGMARK;

        ASSERT(so_verify_oobstate(so));
        return (mp);
}

/*
 * Handle the processing of the actual urgent data.
 * Returns the data mblk if it should be queued on the read queue.
 */
static mblk_t *
so_oob_data(struct sonode *so, mblk_t *mp,
        strsigset_t *signals, strpollset_t *pollwakeups)
{
        sotpi_info_t *sti = SOTOTPI(so);

        ASSERT(MUTEX_HELD(&so->so_lock));

        ASSERT(so_verify_oobstate(so));

        ASSERT(sti->sti_oobsigcnt >= sti->sti_oobcnt);
        ASSERT(mp != NULL);
        /*
         * For OOBINLINE we keep the data in the T_EXDATA_IND.
         * Otherwise we store it in so_oobmsg.
         */
        ASSERT(so->so_oobmsg == NULL);
        if (so->so_options & SO_OOBINLINE) {
                *pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND;
                *signals |= S_INPUT | S_RDNORM;
        } else {
                *pollwakeups |= POLLRDBAND;
                so->so_state |= SS_HAVEOOBDATA;
                so->so_oobmsg = mp;
                mp = NULL;
        }
        ASSERT(so_verify_oobstate(so));
        return (mp);
}

/*
 * Caller must hold the mutex.
 * For delayed processing, save the T_DISCON_IND received
 * from below on sti_discon_ind_mp.
 * When the message is processed the framework will call:
 *      (*func)(so, mp);
 */
static void
so_save_discon_ind(struct sonode *so,
        mblk_t *mp,
        void (*func)(struct sonode *so, mblk_t *))
{
        sotpi_info_t *sti = SOTOTPI(so);

        ASSERT(MUTEX_HELD(&so->so_lock));

        /*
         * Discard new T_DISCON_IND if we have already received another.
         * Currently the earlier message can either be on sti_discon_ind_mp
         * or being processed.
         */
        if (sti->sti_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
                zcmn_err(getzoneid(), CE_WARN,
                    "sockfs: received unexpected additional T_DISCON_IND\n");
                freemsg(mp);
                return;
        }
        mp->b_prev = (mblk_t *)func;
        mp->b_next = NULL;
        sti->sti_discon_ind_mp = mp;
}

/*
 * Caller must hold the mutex and make sure that either SOLOCKED
 * or SOASYNC_UNBIND is set. Called from so_unlock_single().
 * Perform delayed processing of T_DISCON_IND message on sti_discon_ind_mp.
 * Need to ensure that strsock_proto() will not end up sleeping for
 * SOASYNC_UNBIND, while executing this function.
 */
void
so_drain_discon_ind(struct sonode *so)
{
        mblk_t  *bp;
        void (*func)(struct sonode *so, mblk_t *);
        sotpi_info_t *sti = SOTOTPI(so);

        ASSERT(MUTEX_HELD(&so->so_lock));
        ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));

        /* Process T_DISCON_IND on sti_discon_ind_mp */
        if ((bp = sti->sti_discon_ind_mp) != NULL) {
                sti->sti_discon_ind_mp = NULL;
                func = (void (*)())bp->b_prev;
                bp->b_prev = NULL;

                /*
                 * This (*func) is supposed to generate a message downstream
                 * and we need to have a flag set until the corresponding
                 * upstream message reaches stream head.
                 * When processing T_DISCON_IND in strsock_discon_ind
                 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and
                 * drop the flag after we get the ACK in strsock_proto.
                 */
                (void) (*func)(so, bp);
        }
}

/*
 * Caller must hold the mutex.
 * Remove the T_DISCON_IND on sti_discon_ind_mp.
 */
void
so_flush_discon_ind(struct sonode *so)
{
        mblk_t  *bp;
        sotpi_info_t *sti = SOTOTPI(so);

        ASSERT(MUTEX_HELD(&so->so_lock));

        /*
         * Remove T_DISCON_IND mblk at sti_discon_ind_mp.
         */
        if ((bp = sti->sti_discon_ind_mp) != NULL) {
                sti->sti_discon_ind_mp = NULL;
                bp->b_prev = NULL;
                freemsg(bp);
        }
}

/*
 * Caller must hold the mutex.
 *
 * This function is used to process the T_DISCON_IND message. It does
 * immediate processing when called from strsock_proto and delayed
 * processing of discon_ind saved on sti_discon_ind_mp when called from
 * so_drain_discon_ind. When a T_DISCON_IND message is saved in
 * sti_discon_ind_mp for delayed processing, this function is registered
 * as the callback function to process the message.
 *
 * SOASYNC_UNBIND should be held in this function, during the non-blocking
 * unbind operation, and should be released only after we receive the ACK
 * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set,
 * no TPI messages would be sent down at this time. This is to prevent M_FLUSH
 * sent from either this function or tcp_unbind(), flushing away any TPI
 * message that is being sent down and stays in a lower module's queue.
 *
 * This function drops so_lock and grabs it again.
 */
static void
strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
{
        struct vnode *vp;
        struct stdata *stp;
        union T_primitives *tpr;
        struct T_unbind_req *ubr;
        mblk_t *mp;
        int error;
        sotpi_info_t *sti = SOTOTPI(so);

        ASSERT(MUTEX_HELD(&so->so_lock));
        ASSERT(discon_mp);
        ASSERT(discon_mp->b_rptr);

        tpr = (union T_primitives *)discon_mp->b_rptr;
        ASSERT(tpr->type == T_DISCON_IND);

        vp = SOTOV(so);
        stp = vp->v_stream;
        ASSERT(stp);

        /*
         * Not a listener
         */
        ASSERT((so->so_state & SS_ACCEPTCONN) == 0);

        /*
         * This assumes that the name space for DISCON_reason
         * is the errno name space.
         */
        soisdisconnected(so, tpr->discon_ind.DISCON_reason);
        sti->sti_laddr_valid = 0;
        sti->sti_faddr_valid = 0;

        /*
         * Unbind with the transport without blocking.
         * If we've already received a T_DISCON_IND do not unbind.
         *
         * If there is no preallocated unbind message, we have already
         * unbound with the transport
         *
         * If the socket is not bound, no need to unbind.
         */
        mp = sti->sti_unbind_mp;
        if (mp == NULL) {
                ASSERT(!(so->so_state & SS_ISBOUND));
                mutex_exit(&so->so_lock);
        } else if (!(so->so_state & SS_ISBOUND))  {
                mutex_exit(&so->so_lock);
        } else {
                sti->sti_unbind_mp = NULL;

                /*
                 * Is another T_DISCON_IND being processed.
                 */
                ASSERT((so->so_flag & SOASYNC_UNBIND) == 0);

                /*
                 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for
                 * this unbind. Set SOASYNC_UNBIND. This should be cleared
                 * only after we receive the ACK in strsock_proto.
                 */
                so->so_flag |= SOASYNC_UNBIND;
                ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
                so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN);
                sti->sti_laddr_valid = 0;
                mutex_exit(&so->so_lock);

                /*
                 * Send down T_UNBIND_REQ ignoring flow control.
                 * XXX Assumes that MSG_IGNFLOW implies that this thread
                 * does not run service procedures.
                 */
                ASSERT(DB_TYPE(mp) == M_PROTO);
                ubr = (struct T_unbind_req *)mp->b_rptr;
                mp->b_wptr += sizeof (*ubr);
                ubr->PRIM_type = T_UNBIND_REQ;

                /*
                 * Flush the read and write side (except stream head read queue)
                 * and send down T_UNBIND_REQ.
                 */
                (void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
                error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
                    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
                /* LINTED - warning: statement has no consequent: if */
                if (error) {
                        eprintsoline(so, error);
                }
        }

        if (tpr->discon_ind.DISCON_reason != 0)
                strsetrerror(SOTOV(so), 0, 0, sogetrderr);
        strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
        strseteof(SOTOV(so), 1);
        /*
         * strseteof takes care of read side wakeups,
         * pollwakeups, and signals.
         */
        dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error));
        freemsg(discon_mp);


        pollwakeup(&stp->sd_pollist, POLLOUT);
        mutex_enter(&stp->sd_lock);

        /*
         * Wake sleeping write
         */
        if (stp->sd_flag & WSLEEP) {
                stp->sd_flag &= ~WSLEEP;
                cv_broadcast(&stp->sd_wrq->q_wait);
        }

        /*
         * strsendsig can handle multiple signals with a
         * single call.  Send SIGPOLL for S_OUTPUT event.
         */
        if (stp->sd_sigflags & S_OUTPUT)
                strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0);

        mutex_exit(&stp->sd_lock);
        mutex_enter(&so->so_lock);
}

/*
 * This routine is registered with the stream head to receive M_PROTO
 * and M_PCPROTO messages.
 *
 * Returns NULL if the message was consumed.
 * Returns an mblk to make that mblk be processed (and queued) by the stream
 * head.
 *
 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
 * *pollwakeups) for the stream head to take action on. Note that since
 * sockets always deliver SIGIO for every new piece of data this routine
 * never sets *firstmsgsigs; any signals are returned in *allmsgsigs.
 *
 * This routine handles all data related TPI messages independent of
 * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message
 * arrive on a SOCK_STREAM.
 */
static mblk_t *
strsock_proto(vnode_t *vp, mblk_t *mp,
                strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
                strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
{
        union T_primitives *tpr;
        struct sonode *so;
        sotpi_info_t *sti;
        uint32_t auditing = AU_AUDITING();

        so = VTOSO(vp);
        sti = SOTOTPI(so);

        dprintso(so, 1, ("strsock_proto(%p, %p)\n", (void *)vp, (void *)mp));

        /* Set default return values */
        *firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0;

        ASSERT(DB_TYPE(mp) == M_PROTO ||
            DB_TYPE(mp) == M_PCPROTO);

        if (MBLKL(mp) < sizeof (tpr->type)) {
                /* The message is too short to even contain the primitive */
                zcmn_err(getzoneid(), CE_WARN,
                    "sockfs: Too short TPI message received. Len = %ld\n",
                    (ptrdiff_t)(MBLKL(mp)));
                freemsg(mp);
                return (NULL);
        }
        if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
                /* The read pointer is not aligned correctly for TPI */
                zcmn_err(getzoneid(), CE_WARN,
                    "sockfs: Unaligned TPI message received. rptr = %p\n",
                    (void *)mp->b_rptr);
                freemsg(mp);
                return (NULL);
        }
        tpr = (union T_primitives *)mp->b_rptr;
        dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type));

        switch (tpr->type) {

        case T_DATA_IND:
                if (MBLKL(mp) < sizeof (struct T_data_ind)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_DATA_IND. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }
                /*
                 * Ignore zero-length T_DATA_IND messages. These might be
                 * generated by some transports.
                 * This is needed to prevent read (which skips the M_PROTO
                 * part) to unexpectedly return 0 (or return EWOULDBLOCK
                 * on a non-blocking socket after select/poll has indicated
                 * that data is available).
                 */
                if (msgdsize(mp->b_cont) == 0) {
                        dprintso(so, 0,
                            ("strsock_proto: zero length T_DATA_IND\n"));
                        freemsg(mp);
                        return (NULL);
                }
                *allmsgsigs = S_INPUT | S_RDNORM;
                *pollwakeups = POLLIN | POLLRDNORM;
                *wakeups = RSLEEP;
                return (mp);

        case T_UNITDATA_IND: {
                struct T_unitdata_ind   *tudi = &tpr->unitdata_ind;
                void                    *addr;
                t_uscalar_t             addrlen;

                if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_UNITDATA_IND. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }

                /* Is this is not a connected datagram socket? */
                if ((so->so_mode & SM_CONNREQUIRED) ||
                    !(so->so_state & SS_ISCONNECTED)) {
                        /*
                         * Not a connected datagram socket. Look for
                         * the SO_UNIX_CLOSE option. If such an option is found
                         * discard the message (since it has no meaning
                         * unless connected).
                         */
                        if (so->so_family == AF_UNIX && msgdsize(mp) == 0 &&
                            tudi->OPT_length != 0) {
                                void *opt;
                                t_uscalar_t optlen = tudi->OPT_length;

                                opt = sogetoff(mp, tudi->OPT_offset,
                                    optlen, __TPI_ALIGN_SIZE);
                                if (opt == NULL) {
                                        /* The len/off falls outside mp */
                                        freemsg(mp);
                                        mutex_enter(&so->so_lock);
                                        soseterror(so, EPROTO);
                                        mutex_exit(&so->so_lock);
                                        zcmn_err(getzoneid(), CE_WARN,
                                            "sockfs: T_unidata_ind with "
                                            "invalid optlen/offset %u/%d\n",
                                            optlen, tudi->OPT_offset);
                                        return (NULL);
                                }
                                if (so_getopt_unix_close(opt, optlen)) {
                                        freemsg(mp);
                                        return (NULL);
                                }
                        }
                        *allmsgsigs = S_INPUT | S_RDNORM;
                        *pollwakeups = POLLIN | POLLRDNORM;
                        *wakeups = RSLEEP;
                        if (auditing)
                                audit_sock(T_UNITDATA_IND, strvp2wq(vp),
                                    mp, 0);
                        return (mp);
                }

                /*
                 * A connect datagram socket. For AF_INET{,6} we verify that
                 * the source address matches the "connected to" address.
                 * The semantics of AF_UNIX sockets is to not verify
                 * the source address.
                 * Note that this source address verification is transport
                 * specific. Thus the real fix would be to extent TPI
                 * to allow T_CONN_REQ messages to be send to connectionless
                 * transport providers and always let the transport provider
                 * do whatever filtering is needed.
                 *
                 * The verification/filtering semantics for transports
                 * other than AF_INET and AF_UNIX are unknown. The choice
                 * would be to either filter using bcmp or let all messages
                 * get through. This code does not filter other address
                 * families since this at least allows the application to
                 * work around any missing filtering.
                 *
                 * XXX Should we move filtering to UDP/ICMP???
                 * That would require passing e.g. a T_DISCON_REQ to UDP
                 * when the socket becomes unconnected.
                 */
                addrlen = tudi->SRC_length;
                /*
                 * The alignment restriction is really to strict but
                 * we want enough alignment to inspect the fields of
                 * a sockaddr_in.
                 */
                addr = sogetoff(mp, tudi->SRC_offset, addrlen,
                    __TPI_ALIGN_SIZE);
                if (addr == NULL) {
                        freemsg(mp);
                        mutex_enter(&so->so_lock);
                        soseterror(so, EPROTO);
                        mutex_exit(&so->so_lock);
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: T_unidata_ind with invalid "
                            "addrlen/offset %u/%d\n",
                            addrlen, tudi->SRC_offset);
                        return (NULL);
                }

                if (so->so_family == AF_INET) {
                        /*
                         * For AF_INET we allow wildcarding both sin_addr
                         * and sin_port.
                         */
                        struct sockaddr_in *faddr, *sin;

                        /* Prevent sti_faddr_sa from changing while accessed */
                        mutex_enter(&so->so_lock);
                        ASSERT(sti->sti_faddr_len ==
                            (socklen_t)sizeof (struct sockaddr_in));
                        faddr = (struct sockaddr_in *)sti->sti_faddr_sa;
                        sin = (struct sockaddr_in *)addr;
                        if (addrlen !=
                            (t_uscalar_t)sizeof (struct sockaddr_in) ||
                            (sin->sin_addr.s_addr != faddr->sin_addr.s_addr &&
                            faddr->sin_addr.s_addr != INADDR_ANY) ||
                            (so->so_type != SOCK_RAW &&
                            sin->sin_port != faddr->sin_port &&
                            faddr->sin_port != 0)) {
#ifdef DEBUG
                                dprintso(so, 0,
                                    ("sockfs: T_UNITDATA_IND mismatch: %s",
                                    pr_addr(so->so_family,
                                    (struct sockaddr *)addr, addrlen)));
                                dprintso(so, 0, (" - %s\n",
                                    pr_addr(so->so_family, sti->sti_faddr_sa,
                                    (t_uscalar_t)sti->sti_faddr_len)));
#endif /* DEBUG */
                                mutex_exit(&so->so_lock);
                                freemsg(mp);
                                return (NULL);
                        }
                        mutex_exit(&so->so_lock);
                } else if (so->so_family == AF_INET6) {
                        /*
                         * For AF_INET6 we allow wildcarding both sin6_addr
                         * and sin6_port.
                         */
                        struct sockaddr_in6 *faddr6, *sin6;
                        static struct in6_addr zeroes; /* inits to all zeros */

                        /* Prevent sti_faddr_sa from changing while accessed */
                        mutex_enter(&so->so_lock);
                        ASSERT(sti->sti_faddr_len ==
                            (socklen_t)sizeof (struct sockaddr_in6));
                        faddr6 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
                        sin6 = (struct sockaddr_in6 *)addr;
                        /* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
                        if (addrlen !=
                            (t_uscalar_t)sizeof (struct sockaddr_in6) ||
                            (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
                            &faddr6->sin6_addr) &&
                            !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) ||
                            (so->so_type != SOCK_RAW &&
                            sin6->sin6_port != faddr6->sin6_port &&
                            faddr6->sin6_port != 0)) {
#ifdef DEBUG
                                dprintso(so, 0,
                                    ("sockfs: T_UNITDATA_IND mismatch: %s",
                                    pr_addr(so->so_family,
                                    (struct sockaddr *)addr, addrlen)));
                                dprintso(so, 0, (" - %s\n",
                                    pr_addr(so->so_family, sti->sti_faddr_sa,
                                    (t_uscalar_t)sti->sti_faddr_len)));
#endif /* DEBUG */
                                mutex_exit(&so->so_lock);
                                freemsg(mp);
                                return (NULL);
                        }
                        mutex_exit(&so->so_lock);
                } else if (so->so_family == AF_UNIX &&
                    msgdsize(mp->b_cont) == 0 &&
                    tudi->OPT_length != 0) {
                        /*
                         * Attempt to extract AF_UNIX
                         * SO_UNIX_CLOSE indication from options.
                         */
                        void *opt;
                        t_uscalar_t optlen = tudi->OPT_length;

                        opt = sogetoff(mp, tudi->OPT_offset,
                            optlen, __TPI_ALIGN_SIZE);
                        if (opt == NULL) {
                                /* The len/off falls outside mp */
                                freemsg(mp);
                                mutex_enter(&so->so_lock);
                                soseterror(so, EPROTO);
                                mutex_exit(&so->so_lock);
                                zcmn_err(getzoneid(), CE_WARN,
                                    "sockfs: T_unidata_ind with invalid "
                                    "optlen/offset %u/%d\n",
                                    optlen, tudi->OPT_offset);
                                return (NULL);
                        }
                        /*
                         * If we received a unix close indication mark the
                         * socket and discard this message.
                         */
                        if (so_getopt_unix_close(opt, optlen)) {
                                mutex_enter(&so->so_lock);
                                sobreakconn(so, ECONNRESET);
                                mutex_exit(&so->so_lock);
                                strsetrerror(SOTOV(so), 0, 0, sogetrderr);
                                freemsg(mp);
                                *pollwakeups = POLLIN | POLLRDNORM;
                                *allmsgsigs = S_INPUT | S_RDNORM;
                                *wakeups = RSLEEP;
                                return (NULL);
                        }
                }
                *allmsgsigs = S_INPUT | S_RDNORM;
                *pollwakeups = POLLIN | POLLRDNORM;
                *wakeups = RSLEEP;
                return (mp);
        }

        case T_OPTDATA_IND: {
                struct T_optdata_ind    *tdi = &tpr->optdata_ind;

                if (MBLKL(mp) < sizeof (struct T_optdata_ind)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_OPTDATA_IND. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }
                /*
                 * Allow zero-length messages carrying options.
                 * This is used when carrying the SO_UNIX_CLOSE option.
                 */
                if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 &&
                    tdi->OPT_length != 0) {
                        /*
                         * Attempt to extract AF_UNIX close indication
                         * from the options. Ignore any other options -
                         * those are handled once the message is removed
                         * from the queue.
                         * The close indication message should not carry data.
                         */
                        void *opt;
                        t_uscalar_t optlen = tdi->OPT_length;

                        opt = sogetoff(mp, tdi->OPT_offset,
                            optlen, __TPI_ALIGN_SIZE);
                        if (opt == NULL) {
                                /* The len/off falls outside mp */
                                freemsg(mp);
                                mutex_enter(&so->so_lock);
                                soseterror(so, EPROTO);
                                mutex_exit(&so->so_lock);
                                zcmn_err(getzoneid(), CE_WARN,
                                    "sockfs: T_optdata_ind with invalid "
                                    "optlen/offset %u/%d\n",
                                    optlen, tdi->OPT_offset);
                                return (NULL);
                        }
                        /*
                         * If we received a close indication mark the
                         * socket and discard this message.
                         */
                        if (so_getopt_unix_close(opt, optlen)) {
                                mutex_enter(&so->so_lock);
                                socantsendmore(so);
                                sti->sti_faddr_valid = 0;
                                mutex_exit(&so->so_lock);
                                strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
                                freemsg(mp);
                                return (NULL);
                        }
                }
                *allmsgsigs = S_INPUT | S_RDNORM;
                *pollwakeups = POLLIN | POLLRDNORM;
                *wakeups = RSLEEP;
                return (mp);
        }

        case T_EXDATA_IND: {
                mblk_t          *mctl, *mdata;
                mblk_t *lbp;
                union T_primitives *tprp;
                struct stdata   *stp;
                queue_t *qp;

                if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_EXDATA_IND. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }
                /*
                 * Ignore zero-length T_EXDATA_IND messages. These might be
                 * generated by some transports.
                 *
                 * This is needed to prevent read (which skips the M_PROTO
                 * part) to unexpectedly return 0 (or return EWOULDBLOCK
                 * on a non-blocking socket after select/poll has indicated
                 * that data is available).
                 */
                dprintso(so, 1,
                    ("T_EXDATA_IND(%p): counts %d/%d state %s\n",
                    (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
                    pr_state(so->so_state, so->so_mode)));

                if (msgdsize(mp->b_cont) == 0) {
                        dprintso(so, 0,
                            ("strsock_proto: zero length T_EXDATA_IND\n"));
                        freemsg(mp);
                        return (NULL);
                }

                /*
                 * Split into the T_EXDATA_IND and the M_DATA part.
                 * We process these three pieces separately:
                 *      signal generation
                 *      handling T_EXDATA_IND
                 *      handling M_DATA component
                 */
                mctl = mp;
                mdata = mctl->b_cont;
                mctl->b_cont = NULL;
                mutex_enter(&so->so_lock);
                so_oob_sig(so, 0, allmsgsigs, pollwakeups);
                mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
                mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);

                stp = vp->v_stream;
                ASSERT(stp != NULL);
                qp = _RD(stp->sd_wrq);

                mutex_enter(QLOCK(qp));
                lbp = qp->q_last;

                /*
                 * We want to avoid queueing up a string of T_EXDATA_IND
                 * messages with no intervening data messages at the stream
                 * head. These messages contribute to the total message
                 * count. Eventually this can lead to STREAMS flow contol
                 * and also cause TCP to advertise a zero window condition
                 * to the peer. This can happen in the degenerate case where
                 * the sender and receiver exchange only OOB data. The sender
                 * only sends messages with MSG_OOB flag and the receiver
                 * receives only MSG_OOB messages and does not use SO_OOBINLINE.
                 * An example of this scenario has been reported in applications
                 * that use OOB data to exchange heart beats. Flow control
                 * relief will never happen if the application only reads OOB
                 * data which is done directly by sorecvoob() and the
                 * T_EXDATA_IND messages at the streamhead won't be consumed.
                 * Note that there is no correctness issue in compressing the
                 * string of T_EXDATA_IND messages into a single T_EXDATA_IND
                 * message. A single read that does not specify MSG_OOB will
                 * read across all the marks in a loop in sotpi_recvmsg().
                 * Each mark is individually distinguishable only if the
                 * T_EXDATA_IND messages are separated by data messages.
                 */
                if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) {
                        tprp = (union T_primitives *)lbp->b_rptr;
                        if ((tprp->type == T_EXDATA_IND) &&
                            !(so->so_options & SO_OOBINLINE)) {

                                /*
                                 * free the new M_PROTO message
                                 */
                                freemsg(mctl);

                                /*
                                 * adjust the OOB count and OOB signal count
                                 * just incremented for the new OOB data.
                                 */
                                sti->sti_oobcnt--;
                                sti->sti_oobsigcnt--;
                                mutex_exit(QLOCK(qp));
                                mutex_exit(&so->so_lock);
                                return (NULL);
                        }
                }
                mutex_exit(QLOCK(qp));

                /*
                 * Pass the T_EXDATA_IND and the M_DATA back separately
                 * by using b_next linkage. (The stream head will queue any
                 * b_next linked messages separately.) This is needed
                 * since MSGMARK applies to the last by of the message
                 * hence we can not have any M_DATA component attached
                 * to the marked T_EXDATA_IND. Note that the stream head
                 * will not consolidate M_DATA messages onto an MSGMARK'ed
                 * message in order to preserve the constraint that
                 * the T_EXDATA_IND always is a separate message.
                 */
                ASSERT(mctl != NULL);
                mctl->b_next = mdata;
                mp = mctl;
#ifdef DEBUG
                if (mdata == NULL) {
                        dprintso(so, 1,
                            ("after outofline T_EXDATA_IND(%p): "
                            "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
                            (void *)vp, sti->sti_oobsigcnt,
                            sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
                            pr_state(so->so_state, so->so_mode)));
                } else {
                        dprintso(so, 1,
                            ("after inline T_EXDATA_IND(%p): "
                            "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
                            (void *)vp, sti->sti_oobsigcnt,
                            sti->sti_oobcnt, *pollwakeups, *allmsgsigs,
                            pr_state(so->so_state, so->so_mode)));
                }
#endif /* DEBUG */
                mutex_exit(&so->so_lock);
                *wakeups = RSLEEP;
                return (mp);
        }

        case T_CONN_CON: {
                struct T_conn_con       *conn_con;
                void                    *addr;
                t_uscalar_t             addrlen;

                /*
                 * Verify the state, update the state to ISCONNECTED,
                 * record the potentially new address in the message,
                 * and drop the message.
                 */
                if (MBLKL(mp) < sizeof (struct T_conn_con)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_CONN_CON. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }

                mutex_enter(&so->so_lock);
                if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) !=
                    SS_ISCONNECTING) {
                        mutex_exit(&so->so_lock);
                        dprintso(so, 1,
                            ("T_CONN_CON: state %x\n", so->so_state));
                        freemsg(mp);
                        return (NULL);
                }

                conn_con = &tpr->conn_con;
                addrlen = conn_con->RES_length;
                /*
                 * Allow the address to be of different size than sent down
                 * in the T_CONN_REQ as long as it doesn't exceed the maxlen.
                 * For AF_UNIX require the identical length.
                 */
                if (so->so_family == AF_UNIX ?
                    addrlen != (t_uscalar_t)sizeof (sti->sti_ux_laddr) :
                    addrlen > (t_uscalar_t)sti->sti_faddr_maxlen) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: T_conn_con with different "
                            "length %u/%d\n",
                            addrlen, conn_con->RES_length);
                        soisdisconnected(so, EPROTO);
                        sti->sti_laddr_valid = 0;
                        sti->sti_faddr_valid = 0;
                        mutex_exit(&so->so_lock);
                        strsetrerror(SOTOV(so), 0, 0, sogetrderr);
                        strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
                        strseteof(SOTOV(so), 1);
                        freemsg(mp);
                        /*
                         * strseteof takes care of read side wakeups,
                         * pollwakeups, and signals.
                         */
                        *wakeups = WSLEEP;
                        *allmsgsigs = S_OUTPUT;
                        *pollwakeups = POLLOUT;
                        return (NULL);
                }
                addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1);
                if (addr == NULL) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: T_conn_con with invalid "
                            "addrlen/offset %u/%d\n",
                            addrlen, conn_con->RES_offset);
                        mutex_exit(&so->so_lock);
                        strsetrerror(SOTOV(so), 0, 0, sogetrderr);
                        strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
                        strseteof(SOTOV(so), 1);
                        freemsg(mp);
                        /*
                         * strseteof takes care of read side wakeups,
                         * pollwakeups, and signals.
                         */
                        *wakeups = WSLEEP;
                        *allmsgsigs = S_OUTPUT;
                        *pollwakeups = POLLOUT;
                        return (NULL);
                }

                /*
                 * Save for getpeername.
                 */
                if (so->so_family != AF_UNIX) {
                        sti->sti_faddr_len = (socklen_t)addrlen;
                        ASSERT(sti->sti_faddr_len <= sti->sti_faddr_maxlen);
                        bcopy(addr, sti->sti_faddr_sa, addrlen);
                        sti->sti_faddr_valid = 1;
                }

                if (so->so_peercred != NULL)
                        crfree(so->so_peercred);
                so->so_peercred = msg_getcred(mp, &so->so_cpid);
                if (so->so_peercred != NULL)
                        crhold(so->so_peercred);

                /* Wakeup anybody sleeping in sowaitconnected */
                soisconnected(so);
                mutex_exit(&so->so_lock);

                /*
                 * The socket is now available for sending data.
                 */
                *wakeups = WSLEEP;
                *allmsgsigs = S_OUTPUT;
                *pollwakeups = POLLOUT;
                freemsg(mp);
                return (NULL);
        }

        case T_CONN_IND:
                /*
                 * Verify the min size and queue the message on
                 * the sti_conn_ind_head/tail list.
                 */
                if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_CONN_IND. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }

                if (auditing)
                        audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0);
                if (!(so->so_state & SS_ACCEPTCONN)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: T_conn_ind on non-listening socket\n");
                        freemsg(mp);
                        return (NULL);
                }

                soqueueconnind(so, mp);
                *allmsgsigs = S_INPUT | S_RDNORM;
                *pollwakeups = POLLIN | POLLRDNORM;
                *wakeups = RSLEEP;
                return (NULL);

        case T_ORDREL_IND:
                if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_ORDREL_IND. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }

                /*
                 * Some providers send this when not fully connected.
                 * SunLink X.25 needs to retrieve disconnect reason after
                 * disconnect for compatibility. It uses T_ORDREL_IND
                 * instead of T_DISCON_IND so that it may use the
                 * endpoint after a connect failure to retrieve the
                 * reason using an ioctl. Thus we explicitly clear
                 * SS_ISCONNECTING here for SunLink X.25.
                 * This is a needed TPI violation.
                 */
                mutex_enter(&so->so_lock);
                so->so_state &= ~SS_ISCONNECTING;
                socantrcvmore(so);
                mutex_exit(&so->so_lock);
                strseteof(SOTOV(so), 1);
                /*
                 * strseteof takes care of read side wakeups,
                 * pollwakeups, and signals.
                 */
                freemsg(mp);
                return (NULL);

        case T_DISCON_IND:
                if (MBLKL(mp) < sizeof (struct T_discon_ind)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_DISCON_IND. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }
                if (so->so_state & SS_ACCEPTCONN) {
                        /*
                         * This is a listener. Look for a queued T_CONN_IND
                         * with a matching sequence number and remove it
                         * from the list.
                         * It is normal to not find the sequence number since
                         * the soaccept might have already dequeued it
                         * (in which case the T_CONN_RES will fail with
                         * TBADSEQ).
                         */
                        (void) soflushconnind(so, tpr->discon_ind.SEQ_number);
                        freemsg(mp);
                        return (0);
                }

                /*
                 * Not a listener
                 *
                 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason.
                 * Such a discon_ind appears when the peer has first done
                 * a shutdown() followed by a close() in which case we just
                 * want to record socantsendmore.
                 * In this case sockfs first receives a T_ORDREL_IND followed
                 * by a T_DISCON_IND.
                 * Note that for other transports (e.g. TCP) we need to handle
                 * the discon_ind in this case since it signals an error.
                 */
                mutex_enter(&so->so_lock);
                if ((so->so_state & SS_CANTRCVMORE) &&
                    (so->so_family == AF_UNIX)) {
                        socantsendmore(so);
                        sti->sti_faddr_valid = 0;
                        mutex_exit(&so->so_lock);
                        strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
                        dprintso(so, 1,
                            ("T_DISCON_IND: error %d\n", so->so_error));
                        freemsg(mp);
                        /*
                         * Set these variables for caller to process them.
                         * For the else part where T_DISCON_IND is processed,
                         * this will be done in the function being called
                         * (strsock_discon_ind())
                         */
                        *wakeups = WSLEEP;
                        *allmsgsigs = S_OUTPUT;
                        *pollwakeups = POLLOUT;
                } else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) {
                        /*
                         * Deferred processing of T_DISCON_IND
                         */
                        so_save_discon_ind(so, mp, strsock_discon_ind);
                        mutex_exit(&so->so_lock);
                } else {
                        /*
                         * Process T_DISCON_IND now
                         */
                        (void) strsock_discon_ind(so, mp);
                        mutex_exit(&so->so_lock);
                }
                return (NULL);

        case T_UDERROR_IND: {
                struct T_uderror_ind    *tudi = &tpr->uderror_ind;
                void                    *addr;
                t_uscalar_t             addrlen;
                int                     error;

                dprintso(so, 0,
                    ("T_UDERROR_IND: error %d\n", tudi->ERROR_type));

                if (MBLKL(mp) < sizeof (struct T_uderror_ind)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_UDERROR_IND. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }
                /* Ignore on connection-oriented transports */
                if (so->so_mode & SM_CONNREQUIRED) {
                        freemsg(mp);
                        eprintsoline(so, 0);
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: T_uderror_ind on connection-oriented "
                            "transport\n");
                        return (NULL);
                }
                addrlen = tudi->DEST_length;
                addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1);
                if (addr == NULL) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: T_uderror_ind with invalid "
                            "addrlen/offset %u/%d\n",
                            addrlen, tudi->DEST_offset);
                        freemsg(mp);
                        return (NULL);
                }

                /* Verify source address for connected socket. */
                mutex_enter(&so->so_lock);
                if (so->so_state & SS_ISCONNECTED) {
                        void *faddr;
                        t_uscalar_t faddr_len;
                        boolean_t match = B_FALSE;

                        switch (so->so_family) {
                        case AF_INET: {
                                /* Compare just IP address and port */
                                struct sockaddr_in *sin1, *sin2;

                                sin1 = (struct sockaddr_in *)sti->sti_faddr_sa;
                                sin2 = (struct sockaddr_in *)addr;
                                if (addrlen == sizeof (struct sockaddr_in) &&
                                    sin1->sin_port == sin2->sin_port &&
                                    sin1->sin_addr.s_addr ==
                                    sin2->sin_addr.s_addr)
                                        match = B_TRUE;
                                break;
                        }
                        case AF_INET6: {
                                /* Compare just IP address and port. Not flow */
                                struct sockaddr_in6 *sin1, *sin2;

                                sin1 = (struct sockaddr_in6 *)sti->sti_faddr_sa;
                                sin2 = (struct sockaddr_in6 *)addr;
                                if (addrlen == sizeof (struct sockaddr_in6) &&
                                    sin1->sin6_port == sin2->sin6_port &&
                                    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
                                    &sin2->sin6_addr))
                                        match = B_TRUE;
                                break;
                        }
                        case AF_UNIX:
                                faddr = &sti->sti_ux_faddr;
                                faddr_len =
                                    (t_uscalar_t)sizeof (sti->sti_ux_faddr);
                                if (faddr_len == addrlen &&
                                    bcmp(addr, faddr, addrlen) == 0)
                                        match = B_TRUE;
                                break;
                        default:
                                faddr = sti->sti_faddr_sa;
                                faddr_len = (t_uscalar_t)sti->sti_faddr_len;
                                if (faddr_len == addrlen &&
                                    bcmp(addr, faddr, addrlen) == 0)
                                        match = B_TRUE;
                                break;
                        }

                        if (!match) {
#ifdef DEBUG
                                dprintso(so, 0,
                                    ("sockfs: T_UDERR_IND mismatch: %s - ",
                                    pr_addr(so->so_family,
                                    (struct sockaddr *)addr, addrlen)));
                                dprintso(so, 0, ("%s\n",
                                    pr_addr(so->so_family, sti->sti_faddr_sa,
                                    sti->sti_faddr_len)));
#endif /* DEBUG */
                                mutex_exit(&so->so_lock);
                                freemsg(mp);
                                return (NULL);
                        }
                        /*
                         * Make the write error nonpersistent. If the error
                         * is zero we use ECONNRESET.
                         * This assumes that the name space for ERROR_type
                         * is the errno name space.
                         */
                        if (tudi->ERROR_type != 0)
                                error = tudi->ERROR_type;
                        else
                                error = ECONNRESET;

                        soseterror(so, error);
                        mutex_exit(&so->so_lock);
                        strsetrerror(SOTOV(so), 0, 0, sogetrderr);
                        strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
                        *wakeups = RSLEEP | WSLEEP;
                        *allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT;
                        *pollwakeups = POLLIN | POLLRDNORM | POLLOUT;
                        freemsg(mp);
                        return (NULL);
                }
                /*
                 * If the application asked for delayed errors
                 * record the T_UDERROR_IND sti_eaddr_mp and the reason in
                 * sti_delayed_error for delayed error posting. If the reason
                 * is zero use ECONNRESET.
                 * Note that delayed error indications do not make sense for
                 * AF_UNIX sockets since sendto checks that the destination
                 * address is valid at the time of the sendto.
                 */
                if (!(so->so_options & SO_DGRAM_ERRIND)) {
                        mutex_exit(&so->so_lock);
                        freemsg(mp);
                        return (NULL);
                }
                if (sti->sti_eaddr_mp != NULL)
                        freemsg(sti->sti_eaddr_mp);

                sti->sti_eaddr_mp = mp;
                if (tudi->ERROR_type != 0)
                        error = tudi->ERROR_type;
                else
                        error = ECONNRESET;
                sti->sti_delayed_error = (ushort_t)error;
                mutex_exit(&so->so_lock);
                return (NULL);
        }

        case T_ERROR_ACK:
                dprintso(so, 0,
                    ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n",
                    tpr->error_ack.ERROR_prim,
                    tpr->error_ack.TLI_error,
                    tpr->error_ack.UNIX_error));

                if (MBLKL(mp) < sizeof (struct T_error_ack)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_ERROR_ACK. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }
                /*
                 * Check if we were waiting for the async message
                 */
                mutex_enter(&so->so_lock);
                if ((so->so_flag & SOASYNC_UNBIND) &&
                    tpr->error_ack.ERROR_prim == T_UNBIND_REQ) {
                        so_unlock_single(so, SOASYNC_UNBIND);
                        mutex_exit(&so->so_lock);
                        freemsg(mp);
                        return (NULL);
                }
                mutex_exit(&so->so_lock);
                soqueueack(so, mp);
                return (NULL);

        case T_OK_ACK:
                if (MBLKL(mp) < sizeof (struct T_ok_ack)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_OK_ACK. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }
                /*
                 * Check if we were waiting for the async message
                 */
                mutex_enter(&so->so_lock);
                if ((so->so_flag & SOASYNC_UNBIND) &&
                    tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) {
                        dprintso(so, 1,
                            ("strsock_proto: T_OK_ACK async unbind\n"));
                        so_unlock_single(so, SOASYNC_UNBIND);
                        mutex_exit(&so->so_lock);
                        freemsg(mp);
                        return (NULL);
                }
                mutex_exit(&so->so_lock);
                soqueueack(so, mp);
                return (NULL);

        case T_INFO_ACK:
                if (MBLKL(mp) < sizeof (struct T_info_ack)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_INFO_ACK. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }
                soqueueack(so, mp);
                return (NULL);

        case T_CAPABILITY_ACK:
                /*
                 * A T_capability_ack need only be large enough to hold
                 * the PRIM_type and CAP_bits1 fields; checking for anything
                 * larger might reject a correct response from an older
                 * provider.
                 */
                if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }
                soqueueack(so, mp);
                return (NULL);

        case T_BIND_ACK:
                if (MBLKL(mp) < sizeof (struct T_bind_ack)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_BIND_ACK. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }
                soqueueack(so, mp);
                return (NULL);

        case T_OPTMGMT_ACK:
                if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) {
                        zcmn_err(getzoneid(), CE_WARN,
                            "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n",
                            (ptrdiff_t)(MBLKL(mp)));
                        freemsg(mp);
                        return (NULL);
                }
                soqueueack(so, mp);
                return (NULL);
        default:
#ifdef DEBUG
                zcmn_err(getzoneid(), CE_WARN,
                    "sockfs: unknown TPI primitive %d received\n",
                    tpr->type);
#endif /* DEBUG */
                freemsg(mp);
                return (NULL);
        }
}

/*
 * This routine is registered with the stream head to receive other
 * (non-data, and non-proto) messages.
 *
 * Returns NULL if the message was consumed.
 * Returns an mblk to make that mblk be processed by the stream head.
 *
 * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
 * *pollwakeups) for the stream head to take action on.
 */
static mblk_t *
strsock_misc(vnode_t *vp, mblk_t *mp,
                strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
                strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
{
        struct sonode *so;
        sotpi_info_t *sti;

        so = VTOSO(vp);
        sti = SOTOTPI(so);

        dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
            (void *)vp, (void *)mp, DB_TYPE(mp)));

        /* Set default return values */
        *wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0;

        switch (DB_TYPE(mp)) {
        case M_PCSIG:
                /*
                 * This assumes that an M_PCSIG for the urgent data arrives
                 * before the corresponding T_EXDATA_IND.
                 *
                 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be
                 * awoken before the urgent data shows up.
                 * For OOBINLINE this can result in select returning
                 * only exceptions as opposed to except|read.
                 */
                if (*mp->b_rptr == SIGURG) {
                        mutex_enter(&so->so_lock);
                        dprintso(so, 1,
                            ("SIGURG(%p): counts %d/%d state %s\n",
                            (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
                            pr_state(so->so_state, so->so_mode)));
                        so_oob_sig(so, 1, allmsgsigs, pollwakeups);
                        dprintso(so, 1,
                            ("after SIGURG(%p): counts %d/%d "
                            " poll 0x%x sig 0x%x state %s\n",
                            (void *)vp, sti->sti_oobsigcnt, sti->sti_oobcnt,
                            *pollwakeups, *allmsgsigs,
                            pr_state(so->so_state, so->so_mode)));
                        mutex_exit(&so->so_lock);
                }
                freemsg(mp);
                return (NULL);

        case M_SIG:
        case M_HANGUP:
        case M_UNHANGUP:
        case M_ERROR:
                /* M_ERRORs etc are ignored */
                freemsg(mp);
                return (NULL);

        case M_FLUSH:
                /*
                 * Do not flush read queue. If the M_FLUSH
                 * arrives because of an impending T_discon_ind
                 * we still have to keep any queued data - this is part of
                 * socket semantics.
                 */
                if (*mp->b_rptr & FLUSHW) {
                        *mp->b_rptr &= ~FLUSHR;
                        return (mp);
                }
                freemsg(mp);
                return (NULL);

        default:
                return (mp);
        }
}


/* Register to receive signals for certain events */
int
so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr)
{
        struct strsigset ss;
        int32_t rval;

        /*
         * Note that SOLOCKED will be set except for the call from soaccept().
         */
        ASSERT(!mutex_owned(&VTOSO(vp)->so_lock));
        ss.ss_pid = pgrp;
        ss.ss_events = events;
        return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr,
            &rval));
}


/* Register for events matching the SS_ASYNC flag */
int
so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr)
{
        int events = so->so_state & SS_ASYNC ?
            S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
            S_RDBAND | S_BANDURG;

        return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr));
}


/* Change the SS_ASYNC flag, and update signal delivery if needed */
int
so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr)
{
        ASSERT(mutex_owned(&so->so_lock));
        if (so->so_pgrp != 0) {
                int error;
                int events = so->so_state & SS_ASYNC ?          /* Old flag */
                    S_RDBAND | S_BANDURG :                      /* New sigs */
                    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT;

                so_lock_single(so);
                mutex_exit(&so->so_lock);

                error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr);

                mutex_enter(&so->so_lock);
                so_unlock_single(so, SOLOCKED);
                if (error)
                        return (error);
        }
        so->so_state ^= SS_ASYNC;
        return (0);
}

/*
 * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing
 * any existing one.  If passed zero, just clear the existing one.
 */
int
so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr)
{
        int events = so->so_state & SS_ASYNC ?
            S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
            S_RDBAND | S_BANDURG;
        int error;

        ASSERT(mutex_owned(&so->so_lock));

        /*
         * Change socket process (group).
         *
         * strioctl (via so_set_asyncsigs) will perform permission check and
         * also keep a PID_HOLD to prevent the pid from being reused.
         */
        so_lock_single(so);
        mutex_exit(&so->so_lock);

        if (pgrp != 0) {
                dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n",
                    pgrp, events));
                error = so_set_asyncsigs(vp, pgrp, events, mode, cr);
                if (error != 0) {
                        eprintsoline(so, error);
                        goto bad;
                }
        }
        /* Remove the previously registered process/group */
        if (so->so_pgrp != 0) {
                dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp));
                error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr);
                if (error != 0) {
                        eprintsoline(so, error);
                        error = 0;
                }
        }
        mutex_enter(&so->so_lock);
        so_unlock_single(so, SOLOCKED);
        so->so_pgrp = pgrp;
        return (0);
bad:
        mutex_enter(&so->so_lock);
        so_unlock_single(so, SOLOCKED);
        return (error);
}

/*
 * Wrapper for getmsg. If the socket has been converted to a stream
 * pass the request to the stream head.
 */
int
sock_getmsg(
        struct vnode *vp,
        struct strbuf *mctl,
        struct strbuf *mdata,
        uchar_t *prip,
        int *flagsp,
        int fmode,
        rval_t *rvp
)
{
        struct sonode *so;

        ASSERT(vp->v_type == VSOCK);
        /*
         * Use the stream head to find the real socket vnode.
         * This is needed when namefs sits above sockfs.  Some
         * sockets (like SCTP) are not streams.
         */
        if (!vp->v_stream) {
                return (ENOSTR);
        }
        ASSERT(vp->v_stream->sd_vnode);
        vp = vp->v_stream->sd_vnode;
        ASSERT(vn_matchops(vp, socket_vnodeops));
        so = VTOSO(vp);

        dprintso(so, 1, ("sock_getmsg(%p) %s\n",
            (void *)so, pr_state(so->so_state, so->so_mode)));

        if (so->so_version == SOV_STREAM) {
                /* The imaginary "sockmod" has been popped - act as a stream */
                return (strgetmsg(vp, mctl, mdata, prip, flagsp, fmode, rvp));
        }
        eprintsoline(so, ENOSTR);
        return (ENOSTR);
}

/*
 * Wrapper for putmsg. If the socket has been converted to a stream
 * pass the request to the stream head.
 *
 * Note that a while a regular socket (SOV_SOCKSTREAM) does support the
 * streams ioctl set it does not support putmsg and getmsg.
 * Allowing putmsg would prevent sockfs from tracking the state of
 * the socket/transport and would also invalidate the locking in sockfs.
 */
int
sock_putmsg(
        struct vnode *vp,
        struct strbuf *mctl,
        struct strbuf *mdata,
        uchar_t pri,
        int flag,
        int fmode
)
{
        struct sonode *so;

        ASSERT(vp->v_type == VSOCK);
        /*
         * Use the stream head to find the real socket vnode.
         * This is needed when namefs sits above sockfs.
         */
        if (!vp->v_stream) {
                return (ENOSTR);
        }
        ASSERT(vp->v_stream->sd_vnode);
        vp = vp->v_stream->sd_vnode;
        ASSERT(vn_matchops(vp, socket_vnodeops));
        so = VTOSO(vp);

        dprintso(so, 1, ("sock_putmsg(%p) %s\n",
            (void *)so, pr_state(so->so_state, so->so_mode)));

        if (so->so_version == SOV_STREAM) {
                /* The imaginary "sockmod" has been popped - act as a stream */
                return (strputmsg(vp, mctl, mdata, pri, flag, fmode));
        }
        eprintsoline(so, ENOSTR);
        return (ENOSTR);
}

/*
 * Special function called only from f_getfl().
 * Returns FASYNC if the SS_ASYNC flag is set on a socket, else 0.
 * No locks are acquired here, so it is safe to use while uf_lock is held.
 * This exists solely for BSD fcntl() FASYNC compatibility.
 */
int
sock_getfasync(vnode_t *vp)
{
        struct sonode *so;

        ASSERT(vp->v_type == VSOCK);
        /*
         * For stream model, v_stream is used; For non-stream, v_stream always
         * equals NULL
         */
        if (vp->v_stream != NULL)
                so = VTOSO(vp->v_stream->sd_vnode);
        else
                so = VTOSO(vp);

        if (so->so_version == SOV_STREAM || !(so->so_state & SS_ASYNC))
                return (0);

        return (FASYNC);
}