root/usr/src/uts/common/inet/tcp/tcp_tpi.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 */

/* This files contains all TCP TLI/TPI related functions */

#include <sys/types.h>
#include <sys/stream.h>
#include <sys/strsun.h>
#include <sys/strsubr.h>
#include <sys/stropts.h>
#include <sys/strlog.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/suntpi.h>
#include <sys/xti_inet.h>
#include <sys/squeue_impl.h>
#include <sys/squeue.h>

#include <inet/common.h>
#include <inet/ip.h>
#include <inet/tcp.h>
#include <inet/tcp_impl.h>
#include <inet/proto_set.h>

static void     tcp_accept_swap(tcp_t *, tcp_t *, tcp_t *);
static int      tcp_conprim_opt_process(tcp_t *, mblk_t *, int *, int *, int *);

void
tcp_use_pure_tpi(tcp_t *tcp)
{
        conn_t          *connp = tcp->tcp_connp;

#ifdef  _ILP32
        tcp->tcp_acceptor_id = (t_uscalar_t)connp->conn_rq;
#else
        tcp->tcp_acceptor_id = connp->conn_dev;
#endif
        /*
         * Insert this socket into the acceptor hash.
         * We might need it for T_CONN_RES message
         */
        tcp_acceptor_hash_insert(tcp->tcp_acceptor_id, tcp);

        tcp->tcp_issocket = B_FALSE;
        TCP_STAT(tcp->tcp_tcps, tcp_sock_fallback);
}

/* Shorthand to generate and send TPI error acks to our client */
void
tcp_err_ack(tcp_t *tcp, mblk_t *mp, int t_error, int sys_error)
{
        if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
                putnext(tcp->tcp_connp->conn_rq, mp);
}

/* Shorthand to generate and send TPI error acks to our client */
void
tcp_err_ack_prim(tcp_t *tcp, mblk_t *mp, int primitive,
    int t_error, int sys_error)
{
        struct T_error_ack      *teackp;

        if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
            M_PCPROTO, T_ERROR_ACK)) != NULL) {
                teackp = (struct T_error_ack *)mp->b_rptr;
                teackp->ERROR_prim = primitive;
                teackp->TLI_error = t_error;
                teackp->UNIX_error = sys_error;
                putnext(tcp->tcp_connp->conn_rq, mp);
        }
}

/*
 * TCP routine to get the values of options.
 */
int
tcp_tpi_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
{
        return (tcp_opt_get(Q_TO_CONN(q), level, name, ptr));
}

/* ARGSUSED */
int
tcp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
    uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
    void *thisdg_attrs, cred_t *cr)
{
        conn_t  *connp =  Q_TO_CONN(q);

        return (tcp_opt_set(connp, optset_context, level, name, inlen, invalp,
            outlenp, outvalp, thisdg_attrs, cr));
}

static int
tcp_conprim_opt_process(tcp_t *tcp, mblk_t *mp, int *do_disconnectp,
    int *t_errorp, int *sys_errorp)
{
        int error;
        int is_absreq_failure;
        t_scalar_t *opt_lenp;
        t_scalar_t opt_offset;
        int prim_type;
        struct T_conn_req *tcreqp;
        struct T_conn_res *tcresp;
        cred_t *cr;

        /*
         * All Solaris components should pass a db_credp
         * for this TPI message, hence we ASSERT.
         * But in case there is some other M_PROTO that looks
         * like a TPI message sent by some other kernel
         * component, we check and return an error.
         */
        cr = msg_getcred(mp, NULL);
        ASSERT(cr != NULL);
        if (cr == NULL)
                return (-1);

        prim_type = ((union T_primitives *)mp->b_rptr)->type;
        ASSERT(prim_type == T_CONN_REQ || prim_type == O_T_CONN_RES ||
            prim_type == T_CONN_RES);

        switch (prim_type) {
        case T_CONN_REQ:
                tcreqp = (struct T_conn_req *)mp->b_rptr;
                opt_offset = tcreqp->OPT_offset;
                opt_lenp = (t_scalar_t *)&tcreqp->OPT_length;
                break;
        case O_T_CONN_RES:
        case T_CONN_RES:
                tcresp = (struct T_conn_res *)mp->b_rptr;
                opt_offset = tcresp->OPT_offset;
                opt_lenp = (t_scalar_t *)&tcresp->OPT_length;
                break;
        default:
                opt_lenp = 0;
                opt_offset = 0;
                break;
        }

        *t_errorp = 0;
        *sys_errorp = 0;
        *do_disconnectp = 0;

        error = tpi_optcom_buf(tcp->tcp_connp->conn_wq, mp, opt_lenp,
            opt_offset, cr, &tcp_opt_obj,
            NULL, &is_absreq_failure);

        switch (error) {
        case  0:                /* no error */
                ASSERT(is_absreq_failure == 0);
                return (0);
        case ENOPROTOOPT:
                *t_errorp = TBADOPT;
                break;
        case EACCES:
                *t_errorp = TACCES;
                break;
        default:
                *t_errorp = TSYSERR; *sys_errorp = error;
                break;
        }
        if (is_absreq_failure != 0) {
                /*
                 * The connection request should get the local ack
                 * T_OK_ACK and then a T_DISCON_IND.
                 */
                *do_disconnectp = 1;
        }
        return (-1);
}

void
tcp_tpi_bind(tcp_t *tcp, mblk_t *mp)
{
        int     error;
        conn_t  *connp = tcp->tcp_connp;
        struct sockaddr *sa;
        mblk_t  *mp1;
        struct T_bind_req *tbr;
        int     backlog;
        socklen_t       len;
        sin_t   *sin;
        sin6_t  *sin6;
        cred_t          *cr;

        /*
         * All Solaris components should pass a db_credp
         * for this TPI message, hence we ASSERT.
         * But in case there is some other M_PROTO that looks
         * like a TPI message sent by some other kernel
         * component, we check and return an error.
         */
        cr = msg_getcred(mp, NULL);
        ASSERT(cr != NULL);
        if (cr == NULL) {
                tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
                return;
        }

        ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
        if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
                if (connp->conn_debug) {
                        (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
                            "tcp_tpi_bind: bad req, len %u",
                            (uint_t)(mp->b_wptr - mp->b_rptr));
                }
                tcp_err_ack(tcp, mp, TPROTO, 0);
                return;
        }
        /* Make sure the largest address fits */
        mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
        if (mp1 == NULL) {
                tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
                return;
        }
        mp = mp1;
        tbr = (struct T_bind_req *)mp->b_rptr;

        backlog = tbr->CONIND_number;
        len = tbr->ADDR_length;

        switch (len) {
        case 0:         /* request for a generic port */
                tbr->ADDR_offset = sizeof (struct T_bind_req);
                if (connp->conn_family == AF_INET) {
                        tbr->ADDR_length = sizeof (sin_t);
                        sin = (sin_t *)&tbr[1];
                        *sin = sin_null;
                        sin->sin_family = AF_INET;
                        sa = (struct sockaddr *)sin;
                        len = sizeof (sin_t);
                        mp->b_wptr = (uchar_t *)&sin[1];
                } else {
                        ASSERT(connp->conn_family == AF_INET6);
                        tbr->ADDR_length = sizeof (sin6_t);
                        sin6 = (sin6_t *)&tbr[1];
                        *sin6 = sin6_null;
                        sin6->sin6_family = AF_INET6;
                        sa = (struct sockaddr *)sin6;
                        len = sizeof (sin6_t);
                        mp->b_wptr = (uchar_t *)&sin6[1];
                }
                break;

        case sizeof (sin_t):    /* Complete IPv4 address */
                sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
                    sizeof (sin_t));
                break;

        case sizeof (sin6_t): /* Complete IPv6 address */
                sa = (struct sockaddr *)mi_offset_param(mp,
                    tbr->ADDR_offset, sizeof (sin6_t));
                break;

        default:
                if (connp->conn_debug) {
                        (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
                            "tcp_tpi_bind: bad address length, %d",
                            tbr->ADDR_length);
                }
                tcp_err_ack(tcp, mp, TBADADDR, 0);
                return;
        }

        if (backlog > 0) {
                error = tcp_do_listen(connp, sa, len, backlog, DB_CRED(mp),
                    tbr->PRIM_type != O_T_BIND_REQ);
        } else {
                error = tcp_do_bind(connp, sa, len, DB_CRED(mp),
                    tbr->PRIM_type != O_T_BIND_REQ);
        }

        if (error > 0) {
                tcp_err_ack(tcp, mp, TSYSERR, error);
        } else if (error < 0) {
                tcp_err_ack(tcp, mp, -error, 0);
        } else {
                /*
                 * Update port information as sockfs/tpi needs it for checking
                 */
                if (connp->conn_family == AF_INET) {
                        sin = (sin_t *)sa;
                        sin->sin_port = connp->conn_lport;
                } else {
                        sin6 = (sin6_t *)sa;
                        sin6->sin6_port = connp->conn_lport;
                }
                mp->b_datap->db_type = M_PCPROTO;
                tbr->PRIM_type = T_BIND_ACK;
                putnext(connp->conn_rq, mp);
        }
}

/* tcp_unbind is called by tcp_wput_proto to handle T_UNBIND_REQ messages. */
void
tcp_tpi_unbind(tcp_t *tcp, mblk_t *mp)
{
        conn_t *connp = tcp->tcp_connp;
        int error;

        error = tcp_do_unbind(connp);
        if (error > 0) {
                tcp_err_ack(tcp, mp, TSYSERR, error);
        } else if (error < 0) {
                tcp_err_ack(tcp, mp, -error, 0);
        } else {
                /* Send M_FLUSH according to TPI */
                (void) putnextctl1(connp->conn_rq, M_FLUSH, FLUSHRW);

                mp = mi_tpi_ok_ack_alloc(mp);
                if (mp != NULL)
                        putnext(connp->conn_rq, mp);
        }
}

/* ARGSUSED */
int
tcp_tpi_close(queue_t *q, int flags, cred_t *credp __unused)
{
        conn_t          *connp;

        ASSERT(WR(q)->q_next == NULL);

        if (flags & SO_FALLBACK) {
                /*
                 * stream is being closed while in fallback
                 * simply free the resources that were allocated
                 */
                inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
                qprocsoff(q);
                goto done;
        }

        connp = Q_TO_CONN(q);
        /*
         * We are being closed as /dev/tcp or /dev/tcp6.
         */
        tcp_close_common(connp, flags);

        qprocsoff(q);
        inet_minor_free(connp->conn_minor_arena, connp->conn_dev);

        /*
         * Drop IP's reference on the conn. This is the last reference
         * on the connp if the state was less than established. If the
         * connection has gone into timewait state, then we will have
         * one ref for the TCP and one more ref (total of two) for the
         * classifier connected hash list (a timewait connections stays
         * in connected hash till closed).
         *
         * We can't assert the references because there might be other
         * transient reference places because of some walkers or queued
         * packets in squeue for the timewait state.
         */
        CONN_DEC_REF(connp);
done:
        q->q_ptr = WR(q)->q_ptr = NULL;
        return (0);
}

/* ARGSUSED */
int
tcp_tpi_close_accept(queue_t *q, int flags __unused, cred_t *credp __unused)
{
        vmem_t  *minor_arena;
        dev_t   conn_dev;
        extern struct qinit tcp_acceptor_winit;

        ASSERT(WR(q)->q_qinfo == &tcp_acceptor_winit);

        /*
         * We had opened an acceptor STREAM for sockfs which is
         * now being closed due to some error.
         */
        qprocsoff(q);

        minor_arena = (vmem_t *)WR(q)->q_ptr;
        conn_dev = (dev_t)RD(q)->q_ptr;
        ASSERT(minor_arena != NULL);
        ASSERT(conn_dev != 0);
        inet_minor_free(minor_arena, conn_dev);
        q->q_ptr = WR(q)->q_ptr = NULL;
        return (0);
}

/*
 * Put a connection confirmation message upstream built from the
 * address/flowid information with the conn and iph. Report our success or
 * failure.
 */
boolean_t
tcp_conn_con(tcp_t *tcp, uchar_t *iphdr, mblk_t *idmp,
    mblk_t **defermp, ip_recv_attr_t *ira)
{
        sin_t   sin;
        sin6_t  sin6;
        mblk_t  *mp;
        char    *optp = NULL;
        int     optlen = 0;
        conn_t  *connp = tcp->tcp_connp;

        if (defermp != NULL)
                *defermp = NULL;

        if (tcp->tcp_conn.tcp_opts_conn_req != NULL) {
                /*
                 * Return in T_CONN_CON results of option negotiation through
                 * the T_CONN_REQ. Note: If there is an real end-to-end option
                 * negotiation, then what is received from remote end needs
                 * to be taken into account but there is no such thing (yet?)
                 * in our TCP/IP.
                 * Note: We do not use mi_offset_param() here as
                 * tcp_opts_conn_req contents do not directly come from
                 * an application and are either generated in kernel or
                 * from user input that was already verified.
                 */
                mp = tcp->tcp_conn.tcp_opts_conn_req;
                optp = (char *)(mp->b_rptr +
                    ((struct T_conn_req *)mp->b_rptr)->OPT_offset);
                optlen = (int)
                    ((struct T_conn_req *)mp->b_rptr)->OPT_length;
        }

        if (IPH_HDR_VERSION(iphdr) == IPV4_VERSION) {

                /* packet is IPv4 */
                if (connp->conn_family == AF_INET) {
                        sin = sin_null;
                        sin.sin_addr.s_addr = connp->conn_faddr_v4;
                        sin.sin_port = connp->conn_fport;
                        sin.sin_family = AF_INET;
                        mp = mi_tpi_conn_con(NULL, (char *)&sin,
                            (int)sizeof (sin_t), optp, optlen);
                } else {
                        sin6 = sin6_null;
                        sin6.sin6_addr = connp->conn_faddr_v6;
                        sin6.sin6_port = connp->conn_fport;
                        sin6.sin6_family = AF_INET6;
                        mp = mi_tpi_conn_con(NULL, (char *)&sin6,
                            (int)sizeof (sin6_t), optp, optlen);

                }
        } else {
                ip6_t   *ip6h = (ip6_t *)iphdr;

                ASSERT(IPH_HDR_VERSION(iphdr) == IPV6_VERSION);
                ASSERT(connp->conn_family == AF_INET6);
                sin6 = sin6_null;
                sin6.sin6_addr = connp->conn_faddr_v6;
                sin6.sin6_port = connp->conn_fport;
                sin6.sin6_family = AF_INET6;
                sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
                mp = mi_tpi_conn_con(NULL, (char *)&sin6,
                    (int)sizeof (sin6_t), optp, optlen);
        }

        if (!mp)
                return (B_FALSE);

        mblk_copycred(mp, idmp);

        if (defermp == NULL) {
                conn_t *connp = tcp->tcp_connp;
                if (IPCL_IS_NONSTR(connp)) {
                        (*connp->conn_upcalls->su_connected)
                            (connp->conn_upper_handle, tcp->tcp_connid,
                            ira->ira_cred, ira->ira_cpid);
                        freemsg(mp);
                } else {
                        if (ira->ira_cred != NULL) {
                                /* So that getpeerucred works for TPI sockfs */
                                mblk_setcred(mp, ira->ira_cred, ira->ira_cpid);
                        }
                        putnext(connp->conn_rq, mp);
                }
        } else {
                *defermp = mp;
        }

        if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
                tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
        return (B_TRUE);
}

/*
 * Successful connect request processing begins when our client passes
 * a T_CONN_REQ message into tcp_wput(), which performs function calls into
 * IP and the passes a T_OK_ACK (or T_ERROR_ACK upstream).
 *
 * After various error checks are completed, tcp_tpi_connect() lays
 * the target address and port into the composite header template.
 * Then we ask IP for information, including a source address if we didn't
 * already have one. Finally we prepare to send the SYN packet, and then
 * send up the T_OK_ACK reply message.
 */
void
tcp_tpi_connect(tcp_t *tcp, mblk_t *mp)
{
        sin_t           *sin;
        struct T_conn_req       *tcr;
        struct sockaddr *sa;
        socklen_t       len;
        int             error;
        cred_t          *cr;
        pid_t           cpid;
        conn_t          *connp = tcp->tcp_connp;
        queue_t         *q = connp->conn_wq;

        /*
         * All Solaris components should pass a db_credp
         * for this TPI message, hence we ASSERT.
         * But in case there is some other M_PROTO that looks
         * like a TPI message sent by some other kernel
         * component, we check and return an error.
         */
        cr = msg_getcred(mp, &cpid);
        ASSERT(cr != NULL);
        if (cr == NULL) {
                tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
                return;
        }

        tcr = (struct T_conn_req *)mp->b_rptr;

        ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
        if ((mp->b_wptr - mp->b_rptr) < sizeof (*tcr)) {
                tcp_err_ack(tcp, mp, TPROTO, 0);
                return;
        }

        /*
         * Pre-allocate the T_ordrel_ind mblk so that at close time, we
         * will always have that to send up.  Otherwise, we need to do
         * special handling in case the allocation fails at that time.
         * If the end point is TPI, the tcp_t can be reused and the
         * tcp_ordrel_mp may be allocated already.
         */
        if (tcp->tcp_ordrel_mp == NULL) {
                if ((tcp->tcp_ordrel_mp = mi_tpi_ordrel_ind()) == NULL) {
                        tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
                        return;
                }
        }

        /*
         * Determine packet type based on type of address passed in
         * the request should contain an IPv4 or IPv6 address.
         * Make sure that address family matches the type of
         * family of the address passed down.
         */
        switch (tcr->DEST_length) {
        default:
                tcp_err_ack(tcp, mp, TBADADDR, 0);
                return;

        case (sizeof (sin_t) - sizeof (sin->sin_zero)): {
                /*
                 * XXX: The check for valid DEST_length was not there
                 * in earlier releases and some buggy
                 * TLI apps (e.g Sybase) got away with not feeding
                 * in sin_zero part of address.
                 * We allow that bug to keep those buggy apps humming.
                 * Test suites require the check on DEST_length.
                 * We construct a new mblk with valid DEST_length
                 * free the original so the rest of the code does
                 * not have to keep track of this special shorter
                 * length address case.
                 */
                mblk_t *nmp;
                struct T_conn_req *ntcr;
                sin_t *nsin;

                nmp = allocb(sizeof (struct T_conn_req) + sizeof (sin_t) +
                    tcr->OPT_length, BPRI_HI);
                if (nmp == NULL) {
                        tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
                        return;
                }
                ntcr = (struct T_conn_req *)nmp->b_rptr;
                bzero(ntcr, sizeof (struct T_conn_req)); /* zero fill */
                ntcr->PRIM_type = T_CONN_REQ;
                ntcr->DEST_length = sizeof (sin_t);
                ntcr->DEST_offset = sizeof (struct T_conn_req);

                nsin = (sin_t *)((uchar_t *)ntcr + ntcr->DEST_offset);
                *nsin = sin_null;
                /* Get pointer to shorter address to copy from original mp */
                sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset,
                    tcr->DEST_length); /* extract DEST_length worth of sin_t */
                if (sin == NULL || !OK_32PTR((char *)sin)) {
                        freemsg(nmp);
                        tcp_err_ack(tcp, mp, TSYSERR, EINVAL);
                        return;
                }
                nsin->sin_family = sin->sin_family;
                nsin->sin_port = sin->sin_port;
                nsin->sin_addr = sin->sin_addr;
                /* Note:nsin->sin_zero zero-fill with sin_null assign above */
                nmp->b_wptr = (uchar_t *)&nsin[1];
                if (tcr->OPT_length != 0) {
                        ntcr->OPT_length = tcr->OPT_length;
                        ntcr->OPT_offset = nmp->b_wptr - nmp->b_rptr;
                        bcopy((uchar_t *)tcr + tcr->OPT_offset,
                            (uchar_t *)ntcr + ntcr->OPT_offset,
                            tcr->OPT_length);
                        nmp->b_wptr += tcr->OPT_length;
                }
                freemsg(mp);    /* original mp freed */
                mp = nmp;       /* re-initialize original variables */
                tcr = ntcr;
        }
        /* FALLTHRU */

        case sizeof (sin_t):
                sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
                    sizeof (sin_t));
                len = sizeof (sin_t);
                break;

        case sizeof (sin6_t):
                sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
                    sizeof (sin6_t));
                len = sizeof (sin6_t);
                break;
        }

        error = proto_verify_ip_addr(connp->conn_family, sa, len);
        if (error != 0) {
                tcp_err_ack(tcp, mp, TSYSERR, error);
                return;
        }

        /*
         * TODO: If someone in TCPS_TIME_WAIT has this dst/port we
         * should key on their sequence number and cut them loose.
         */

        /*
         * If options passed in, feed it for verification and handling
         */
        if (tcr->OPT_length != 0) {
                mblk_t  *ok_mp;
                mblk_t  *discon_mp;
                mblk_t  *conn_opts_mp;
                int t_error, sys_error, do_disconnect;

                conn_opts_mp = NULL;

                if (tcp_conprim_opt_process(tcp, mp,
                    &do_disconnect, &t_error, &sys_error) < 0) {
                        if (do_disconnect) {
                                ASSERT(t_error == 0 && sys_error == 0);
                                discon_mp = mi_tpi_discon_ind(NULL,
                                    ECONNREFUSED, 0);
                                if (!discon_mp) {
                                        tcp_err_ack_prim(tcp, mp, T_CONN_REQ,
                                            TSYSERR, ENOMEM);
                                        return;
                                }
                                ok_mp = mi_tpi_ok_ack_alloc(mp);
                                if (!ok_mp) {
                                        tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
                                            TSYSERR, ENOMEM);
                                        return;
                                }
                                qreply(q, ok_mp);
                                qreply(q, discon_mp); /* no flush! */
                        } else {
                                ASSERT(t_error != 0);
                                tcp_err_ack_prim(tcp, mp, T_CONN_REQ, t_error,
                                    sys_error);
                        }
                        return;
                }
                /*
                 * Success in setting options, the mp option buffer represented
                 * by OPT_length/offset has been potentially modified and
                 * contains results of option processing. We copy it in
                 * another mp to save it for potentially influencing returning
                 * it in T_CONN_CONN.
                 */
                if (tcr->OPT_length != 0) { /* there are resulting options */
                        conn_opts_mp = copyb(mp);
                        if (!conn_opts_mp) {
                                tcp_err_ack_prim(tcp, mp, T_CONN_REQ,
                                    TSYSERR, ENOMEM);
                                return;
                        }
                        ASSERT(tcp->tcp_conn.tcp_opts_conn_req == NULL);
                        tcp->tcp_conn.tcp_opts_conn_req = conn_opts_mp;
                        /*
                         * Note:
                         * These resulting option negotiation can include any
                         * end-to-end negotiation options but there no such
                         * thing (yet?) in our TCP/IP.
                         */
                }
        }

        /* call the non-TPI version */
        error = tcp_do_connect(tcp->tcp_connp, sa, len, cr, cpid);
        if (error < 0) {
                mp = mi_tpi_err_ack_alloc(mp, -error, 0);
        } else if (error > 0) {
                mp = mi_tpi_err_ack_alloc(mp, TSYSERR, error);
        } else {
                mp = mi_tpi_ok_ack_alloc(mp);
        }

        /*
         * Note: Code below is the "failure" case
         */
        /* return error ack and blow away saved option results if any */
        if (mp != NULL)
                putnext(connp->conn_rq, mp);
        else {
                tcp_err_ack_prim(tcp, NULL, T_CONN_REQ,
                    TSYSERR, ENOMEM);
        }
}

/* Return the TPI/TLI equivalent of our current tcp_state */
static int
tcp_tpistate(tcp_t *tcp)
{
        switch (tcp->tcp_state) {
        case TCPS_IDLE:
                return (TS_UNBND);
        case TCPS_LISTEN:
                /*
                 * Return whether there are outstanding T_CONN_IND waiting
                 * for the matching T_CONN_RES. Therefore don't count q0.
                 */
                if (tcp->tcp_conn_req_cnt_q > 0)
                        return (TS_WRES_CIND);
                else
                        return (TS_IDLE);
        case TCPS_BOUND:
                return (TS_IDLE);
        case TCPS_SYN_SENT:
                return (TS_WCON_CREQ);
        case TCPS_SYN_RCVD:
                /*
                 * Note: assumption: this has to the active open SYN_RCVD.
                 * The passive instance is detached in SYN_RCVD stage of
                 * incoming connection processing so we cannot get request
                 * for T_info_ack on it.
                 */
                return (TS_WACK_CRES);
        case TCPS_ESTABLISHED:
                return (TS_DATA_XFER);
        case TCPS_CLOSE_WAIT:
                return (TS_WREQ_ORDREL);
        case TCPS_FIN_WAIT_1:
                return (TS_WIND_ORDREL);
        case TCPS_FIN_WAIT_2:
                return (TS_WIND_ORDREL);

        case TCPS_CLOSING:
        case TCPS_LAST_ACK:
        case TCPS_TIME_WAIT:
        case TCPS_CLOSED:
                /*
                 * Following TS_WACK_DREQ7 is a rendition of "not
                 * yet TS_IDLE" TPI state. There is no best match to any
                 * TPI state for TCPS_{CLOSING, LAST_ACK, TIME_WAIT} but we
                 * choose a value chosen that will map to TLI/XTI level
                 * state of TSTATECHNG (state is process of changing) which
                 * captures what this dummy state represents.
                 */
                return (TS_WACK_DREQ7);
        default:
                cmn_err(CE_WARN, "tcp_tpistate: strange state (%d) %s",
                    tcp->tcp_state, tcp_display(tcp, NULL,
                    DISP_PORT_ONLY));
                return (TS_UNBND);
        }
}

static void
tcp_copy_info(struct T_info_ack *tia, tcp_t *tcp)
{
        tcp_stack_t     *tcps = tcp->tcp_tcps;
        conn_t          *connp = tcp->tcp_connp;
        extern struct T_info_ack tcp_g_t_info_ack;
        extern struct T_info_ack tcp_g_t_info_ack_v6;

        if (connp->conn_family == AF_INET6)
                *tia = tcp_g_t_info_ack_v6;
        else
                *tia = tcp_g_t_info_ack;
        tia->CURRENT_state = tcp_tpistate(tcp);
        tia->OPT_size = tcp_max_optsize;
        if (tcp->tcp_mss == 0) {
                /* Not yet set - tcp_open does not set mss */
                if (connp->conn_ipversion == IPV4_VERSION)
                        tia->TIDU_size = tcps->tcps_mss_def_ipv4;
                else
                        tia->TIDU_size = tcps->tcps_mss_def_ipv6;
        } else {
                tia->TIDU_size = tcp->tcp_mss;
        }
        /* TODO: Default ETSDU is 1.  Is that correct for tcp? */
}

void
tcp_do_capability_ack(tcp_t *tcp, struct T_capability_ack *tcap,
    t_uscalar_t cap_bits1)
{
        tcap->CAP_bits1 = 0;

        if (cap_bits1 & TC1_INFO) {
                tcp_copy_info(&tcap->INFO_ack, tcp);
                tcap->CAP_bits1 |= TC1_INFO;
        }

        if (cap_bits1 & TC1_ACCEPTOR_ID) {
                tcap->ACCEPTOR_id = tcp->tcp_acceptor_id;
                tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
        }

}

/*
 * This routine responds to T_CAPABILITY_REQ messages.  It is called by
 * tcp_wput.  Much of the T_CAPABILITY_ACK information is copied from
 * tcp_g_t_info_ack.  The current state of the stream is copied from
 * tcp_state.
 */
void
tcp_capability_req(tcp_t *tcp, mblk_t *mp)
{
        t_uscalar_t             cap_bits1;
        struct T_capability_ack *tcap;

        if (MBLKL(mp) < sizeof (struct T_capability_req)) {
                freemsg(mp);
                return;
        }

        cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;

        mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
            mp->b_datap->db_type, T_CAPABILITY_ACK);
        if (mp == NULL)
                return;

        tcap = (struct T_capability_ack *)mp->b_rptr;
        tcp_do_capability_ack(tcp, tcap, cap_bits1);

        putnext(tcp->tcp_connp->conn_rq, mp);
}

/*
 * This routine responds to T_INFO_REQ messages.  It is called by tcp_wput.
 * Most of the T_INFO_ACK information is copied from tcp_g_t_info_ack.
 * The current state of the stream is copied from tcp_state.
 */
void
tcp_info_req(tcp_t *tcp, mblk_t *mp)
{
        mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
            T_INFO_ACK);
        if (!mp) {
                tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
                return;
        }
        tcp_copy_info((struct T_info_ack *)mp->b_rptr, tcp);
        putnext(tcp->tcp_connp->conn_rq, mp);
}

/* Respond to the TPI addr request */
void
tcp_addr_req(tcp_t *tcp, mblk_t *mp)
{
        struct sockaddr *sa;
        mblk_t  *ackmp;
        struct T_addr_ack *taa;
        conn_t  *connp = tcp->tcp_connp;
        uint_t  addrlen;

        /* Make it large enough for worst case */
        ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
            2 * sizeof (sin6_t), 1);
        if (ackmp == NULL) {
                tcp_err_ack(tcp, mp, TSYSERR, ENOMEM);
                return;
        }

        taa = (struct T_addr_ack *)ackmp->b_rptr;

        bzero(taa, sizeof (struct T_addr_ack));
        ackmp->b_wptr = (uchar_t *)&taa[1];

        taa->PRIM_type = T_ADDR_ACK;
        ackmp->b_datap->db_type = M_PCPROTO;

        if (connp->conn_family == AF_INET)
                addrlen = sizeof (sin_t);
        else
                addrlen = sizeof (sin6_t);

        /*
         * Note: Following code assumes 32 bit alignment of basic
         * data structures like sin_t and struct T_addr_ack.
         */
        if (tcp->tcp_state >= TCPS_BOUND) {
                /*
                 * Fill in local address first
                 */
                taa->LOCADDR_offset = sizeof (*taa);
                taa->LOCADDR_length = addrlen;
                sa = (struct sockaddr *)&taa[1];
                (void) conn_getsockname(connp, sa, &addrlen);
                ackmp->b_wptr += addrlen;
        }
        if (tcp->tcp_state >= TCPS_SYN_RCVD) {
                /*
                 * Fill in Remote address
                 */
                taa->REMADDR_length = addrlen;
                /* assumed 32-bit alignment */
                taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
                sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
                (void) conn_getpeername(connp, sa, &addrlen);
                ackmp->b_wptr += addrlen;
        }
        ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
        putnext(tcp->tcp_connp->conn_rq, ackmp);
}

/*
 * Swap information between the eager and acceptor for a TLI/XTI client.
 * The sockfs accept is done on the acceptor stream and control goes
 * through tcp_tli_accept() and tcp_accept()/tcp_accept_swap() is not
 * called. In either case, both the eager and listener are in their own
 * perimeter (squeue) and the code has to deal with potential race.
 *
 * See the block comment on top of tcp_accept() and tcp_tli_accept().
 */
static void
tcp_accept_swap(tcp_t *listener, tcp_t *acceptor, tcp_t *eager)
{
        conn_t  *econnp, *aconnp;

        ASSERT(eager->tcp_connp->conn_rq == listener->tcp_connp->conn_rq);
        ASSERT(eager->tcp_detached && !acceptor->tcp_detached);
        ASSERT(!TCP_IS_SOCKET(acceptor));
        ASSERT(!TCP_IS_SOCKET(eager));
        ASSERT(!TCP_IS_SOCKET(listener));

        /*
         * Trusted Extensions may need to use a security label that is
         * different from the acceptor's label on MLP and MAC-Exempt
         * sockets. If this is the case, the required security label
         * already exists in econnp->conn_ixa->ixa_tsl. Since we make the
         * acceptor stream refer to econnp we atomatically get that label.
         */

        acceptor->tcp_detached = B_TRUE;
        /*
         * To permit stream re-use by TLI/XTI, the eager needs a copy of
         * the acceptor id.
         */
        eager->tcp_acceptor_id = acceptor->tcp_acceptor_id;

        /* remove eager from listen list... */
        mutex_enter(&listener->tcp_eager_lock);
        tcp_eager_unlink(eager);
        ASSERT(eager->tcp_eager_next_q == NULL &&
            eager->tcp_eager_last_q == NULL);
        ASSERT(eager->tcp_eager_next_q0 == NULL &&
            eager->tcp_eager_prev_q0 == NULL);
        mutex_exit(&listener->tcp_eager_lock);

        econnp = eager->tcp_connp;
        aconnp = acceptor->tcp_connp;
        econnp->conn_rq = aconnp->conn_rq;
        econnp->conn_wq = aconnp->conn_wq;
        econnp->conn_rq->q_ptr = econnp;
        econnp->conn_wq->q_ptr = econnp;

        /*
         * In the TLI/XTI loopback case, we are inside the listener's squeue,
         * which might be a different squeue from our peer TCP instance.
         * For TCP Fusion, the peer expects that whenever tcp_detached is
         * clear, our TCP queues point to the acceptor's queues.  Thus, use
         * membar_producer() to ensure that the assignments of conn_rq/conn_wq
         * above reach global visibility prior to the clearing of tcp_detached.
         */
        membar_producer();
        eager->tcp_detached = B_FALSE;

        ASSERT(eager->tcp_ack_tid == 0);

        econnp->conn_dev = aconnp->conn_dev;
        econnp->conn_minor_arena = aconnp->conn_minor_arena;

        ASSERT(econnp->conn_minor_arena != NULL);
        if (econnp->conn_cred != NULL)
                crfree(econnp->conn_cred);
        econnp->conn_cred = aconnp->conn_cred;
        ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
        econnp->conn_ixa->ixa_cred = econnp->conn_cred;
        aconnp->conn_cred = NULL;
        econnp->conn_cpid = aconnp->conn_cpid;
        ASSERT(econnp->conn_netstack == aconnp->conn_netstack);
        ASSERT(eager->tcp_tcps == acceptor->tcp_tcps);

        econnp->conn_zoneid = aconnp->conn_zoneid;
        econnp->conn_allzones = aconnp->conn_allzones;
        econnp->conn_ixa->ixa_zoneid = aconnp->conn_ixa->ixa_zoneid;

        econnp->conn_mac_mode = aconnp->conn_mac_mode;
        econnp->conn_zone_is_global = aconnp->conn_zone_is_global;
        aconnp->conn_mac_mode = CONN_MAC_DEFAULT;

        /* Do the IPC initialization */
        CONN_INC_REF(econnp);

        /* Done with old IPC. Drop its ref on its connp */
        CONN_DEC_REF(aconnp);
}

/*
 * This runs at the tail end of accept processing on the squeue of the
 * new connection.
 */
/* ARGSUSED */
static void
tcp_accept_finish(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
{
        conn_t                  *connp = (conn_t *)arg;
        tcp_t                   *tcp = connp->conn_tcp;
        queue_t                 *q = connp->conn_rq;
        tcp_stack_t             *tcps = tcp->tcp_tcps;
        struct stroptions       *stropt;
        struct sock_proto_props sopp;

        /* Should never be called for non-STREAMS sockets */
        ASSERT(!IPCL_IS_NONSTR(connp));

        /* We should just receive a single mblk that fits a T_discon_ind */
        ASSERT(mp->b_cont == NULL);

        /*
         * Drop the eager's ref on the listener, that was placed when
         * this eager began life in tcp_input_listener.
         */
        CONN_DEC_REF(tcp->tcp_saved_listener->tcp_connp);

        tcp->tcp_detached = B_FALSE;

        if (tcp->tcp_state <= TCPS_BOUND || tcp->tcp_accept_error) {
                /*
                 * Someone blewoff the eager before we could finish
                 * the accept.
                 *
                 * The only reason eager exists it because we put in
                 * a ref on it when conn ind went up. We need to send
                 * a disconnect indication up while the last reference
                 * on the eager will be dropped by the squeue when we
                 * return.
                 */
                ASSERT(tcp->tcp_listener == NULL);
                if (tcp->tcp_issocket || tcp->tcp_send_discon_ind) {
                        struct  T_discon_ind    *tdi;

                        (void) putnextctl1(q, M_FLUSH, FLUSHRW);
                        /*
                         * Let us reuse the incoming mblk to avoid
                         * memory allocation failure problems. We know
                         * that the size of the incoming mblk i.e.
                         * stroptions is greater than sizeof
                         * T_discon_ind.
                         */
                        ASSERT(DB_REF(mp) == 1);
                        ASSERT(MBLKSIZE(mp) >=
                            sizeof (struct T_discon_ind));

                        DB_TYPE(mp) = M_PROTO;
                        ((union T_primitives *)mp->b_rptr)->type =
                            T_DISCON_IND;
                        tdi = (struct T_discon_ind *)mp->b_rptr;
                        if (tcp->tcp_issocket) {
                                tdi->DISCON_reason = ECONNREFUSED;
                                tdi->SEQ_number = 0;
                        } else {
                                tdi->DISCON_reason = ENOPROTOOPT;
                                tdi->SEQ_number =
                                    tcp->tcp_conn_req_seqnum;
                        }
                        mp->b_wptr = mp->b_rptr +
                            sizeof (struct T_discon_ind);
                        putnext(q, mp);
                }
                tcp->tcp_hard_binding = B_FALSE;
                return;
        }

        /*
         * This is the first time we run on the correct
         * queue after tcp_accept. So fix all the q parameters
         * here.
         *
         * Let us reuse the incoming mblk to avoid
         * memory allocation failure problems. We know
         * that the size of the incoming mblk is at least
         * stroptions
         */
        tcp_get_proto_props(tcp, &sopp);

        ASSERT(DB_REF(mp) == 1);
        ASSERT(MBLKSIZE(mp) >= sizeof (struct stroptions));

        DB_TYPE(mp) = M_SETOPTS;
        stropt = (struct stroptions *)mp->b_rptr;
        mp->b_wptr = mp->b_rptr + sizeof (struct stroptions);
        stropt = (struct stroptions *)mp->b_rptr;
        ASSERT(sopp.sopp_flags & (SO_HIWAT|SO_WROFF|SO_MAXBLK));
        stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK;
        stropt->so_hiwat = sopp.sopp_rxhiwat;
        stropt->so_wroff = sopp.sopp_wroff;
        stropt->so_maxblk = sopp.sopp_maxblk;

        /* Send the options up */
        putnext(q, mp);

        /*
         * Pass up any data and/or a fin that has been received.
         *
         * Adjust receive window in case it had decreased
         * (because there is data <=> tcp_rcv_list != NULL)
         * while the connection was detached. Note that
         * in case the eager was flow-controlled, w/o this
         * code, the rwnd may never open up again!
         */
        if (tcp->tcp_rcv_list != NULL) {
                /* We drain directly in case of fused tcp loopback */

                if (!tcp->tcp_fused && canputnext(q)) {
                        tcp->tcp_rwnd = connp->conn_rcvbuf;
                        if (tcp->tcp_state >= TCPS_ESTABLISHED &&
                            tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) {
                                tcp_xmit_ctl(NULL,
                                    tcp, (tcp->tcp_swnd == 0) ?
                                    tcp->tcp_suna : tcp->tcp_snxt,
                                    tcp->tcp_rnxt, TH_ACK);
                        }
                }

                (void) tcp_rcv_drain(tcp);

                /*
                 * For fused tcp loopback, back-enable peer endpoint
                 * if it's currently flow-controlled.
                 */
                if (tcp->tcp_fused) {
                        tcp_t *peer_tcp = tcp->tcp_loopback_peer;

                        ASSERT(peer_tcp != NULL);
                        ASSERT(peer_tcp->tcp_fused);

                        mutex_enter(&peer_tcp->tcp_non_sq_lock);
                        if (peer_tcp->tcp_flow_stopped) {
                                tcp_clrqfull(peer_tcp);
                                TCP_STAT(tcps, tcp_fusion_backenabled);
                        }
                        mutex_exit(&peer_tcp->tcp_non_sq_lock);
                }
        }
        ASSERT(tcp->tcp_rcv_list == NULL || tcp->tcp_fused_sigurg);
        if (tcp->tcp_fin_rcvd && !tcp->tcp_ordrel_done) {
                tcp->tcp_ordrel_done = B_TRUE;
                mp = tcp->tcp_ordrel_mp;
                tcp->tcp_ordrel_mp = NULL;
                putnext(q, mp);
        }
        tcp->tcp_hard_binding = B_FALSE;

        if (connp->conn_keepalive) {
                tcp->tcp_ka_last_intrvl = 0;
                tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer,
                    tcp->tcp_ka_interval);
        }

        /*
         * At this point, eager is fully established and will
         * have the following references -
         *
         * 2 references for connection to exist (1 for TCP and 1 for IP).
         * 1 reference for the squeue which will be dropped by the squeue as
         *      soon as this function returns.
         * There will be 1 additonal reference for being in classifier
         *      hash list provided something bad hasn't happened.
         */
        ASSERT((connp->conn_fanout != NULL && connp->conn_ref >= 4) ||
            (connp->conn_fanout == NULL && connp->conn_ref >= 3));
}

/*
 * Pull a deferred connection indication off of the listener. The caller
 * must verify that there is a deferred conn ind under eager_lock before
 * calling this function.
 */
static mblk_t *
tcp_get_def_conn_ind(tcp_t *listener)
{
        tcp_t *tail;
        tcp_t *tcp;
        mblk_t *conn_ind;

        ASSERT(MUTEX_HELD(&listener->tcp_eager_lock));
        ASSERT(listener->tcp_eager_prev_q0->tcp_conn_def_q0);

        tcp = listener->tcp_eager_prev_q0;
        /*
         * listener->tcp_eager_prev_q0 points to the TAIL of the
         * deferred T_conn_ind queue. We need to get to the head
         * of the queue in order to send up T_conn_ind the same
         * order as how the 3WHS is completed.
         */
        while (tcp != listener) {
                if (!tcp->tcp_eager_prev_q0->tcp_conn_def_q0)
                        break;
                else
                        tcp = tcp->tcp_eager_prev_q0;
        }

        conn_ind = tcp->tcp_conn.tcp_eager_conn_ind;
        tcp->tcp_conn.tcp_eager_conn_ind = NULL;
        /* Move from q0 to q */
        ASSERT(listener->tcp_conn_req_cnt_q0 > 0);
        listener->tcp_conn_req_cnt_q0--;
        listener->tcp_conn_req_cnt_q++;
        tcp->tcp_eager_next_q0->tcp_eager_prev_q0 =
            tcp->tcp_eager_prev_q0;
        tcp->tcp_eager_prev_q0->tcp_eager_next_q0 =
            tcp->tcp_eager_next_q0;
        tcp->tcp_eager_prev_q0 = NULL;
        tcp->tcp_eager_next_q0 = NULL;
        tcp->tcp_conn_def_q0 = B_FALSE;

        /* Make sure the tcp isn't in the list of droppables */
        ASSERT(tcp->tcp_eager_next_drop_q0 == NULL &&
            tcp->tcp_eager_prev_drop_q0 == NULL);

        /*
         * Insert at end of the queue because sockfs sends
         * down T_CONN_RES in chronological order. Leaving
         * the older conn indications at front of the queue
         * helps reducing search time.
         */
        tail = listener->tcp_eager_last_q;
        if (tail != NULL) {
                tail->tcp_eager_next_q = tcp;
        } else {
                listener->tcp_eager_next_q = tcp;
        }
        listener->tcp_eager_last_q = tcp;
        tcp->tcp_eager_next_q = NULL;

        return (conn_ind);
}


/*
 * Reply to a clients T_CONN_RES TPI message. This function
 * is used only for TLI/XTI listener. Sockfs sends T_CONN_RES
 * on the acceptor STREAM and processed in tcp_accept_common().
 * Read the block comment on top of tcp_input_listener().
 */
void
tcp_tli_accept(tcp_t *listener, mblk_t *mp)
{
        tcp_t           *acceptor;
        tcp_t           *eager;
        struct T_conn_res       *tcr;
        t_uscalar_t     acceptor_id;
        t_scalar_t      seqnum;
        mblk_t          *discon_mp = NULL;
        mblk_t          *ok_mp;
        mblk_t          *mp1;
        tcp_stack_t     *tcps = listener->tcp_tcps;
        conn_t          *econnp;

        if ((mp->b_wptr - mp->b_rptr) < sizeof (*tcr)) {
                tcp_err_ack(listener, mp, TPROTO, 0);
                return;
        }
        tcr = (struct T_conn_res *)mp->b_rptr;

        /*
         * Under ILP32 the stream head points tcr->ACCEPTOR_id at the
         * read side queue of the streams device underneath us i.e. the
         * read side queue of 'ip'. Since we can't deference QUEUE_ptr we
         * look it up in the queue_hash.  Under LP64 it sends down the
         * minor_t of the accepting endpoint.
         *
         * Once the acceptor/eager are modified (in tcp_accept_swap) the
         * fanout hash lock is held.
         * This prevents any thread from entering the acceptor queue from
         * below (since it has not been hard bound yet i.e. any inbound
         * packets will arrive on the listener conn_t and
         * go through the classifier).
         * The CONN_INC_REF will prevent the acceptor from closing.
         *
         * XXX It is still possible for a tli application to send down data
         * on the accepting stream while another thread calls t_accept.
         * This should not be a problem for well-behaved applications since
         * the T_OK_ACK is sent after the queue swapping is completed.
         *
         * If the accepting fd is the same as the listening fd, avoid
         * queue hash lookup since that will return an eager listener in a
         * already established state.
         */
        acceptor_id = tcr->ACCEPTOR_id;
        mutex_enter(&listener->tcp_eager_lock);
        if (listener->tcp_acceptor_id == acceptor_id) {
                eager = listener->tcp_eager_next_q;
                /* only count how many T_CONN_INDs so don't count q0 */
                if ((listener->tcp_conn_req_cnt_q != 1) ||
                    (eager->tcp_conn_req_seqnum != tcr->SEQ_number)) {
                        mutex_exit(&listener->tcp_eager_lock);
                        tcp_err_ack(listener, mp, TBADF, 0);
                        return;
                }
                if (listener->tcp_conn_req_cnt_q0 != 0) {
                        /* Throw away all the eagers on q0. */
                        tcp_eager_cleanup(listener, 1);
                }
                if (listener->tcp_syn_defense) {
                        listener->tcp_syn_defense = B_FALSE;
                        if (listener->tcp_ip_addr_cache != NULL) {
                                kmem_free(listener->tcp_ip_addr_cache,
                                    IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t));
                                listener->tcp_ip_addr_cache = NULL;
                        }
                }
                /*
                 * Transfer tcp_conn_req_max to the eager so that when
                 * a disconnect occurs we can revert the endpoint to the
                 * listen state.
                 */
                eager->tcp_conn_req_max = listener->tcp_conn_req_max;
                ASSERT(listener->tcp_conn_req_cnt_q0 == 0);
                /*
                 * Get a reference on the acceptor just like the
                 * tcp_acceptor_hash_lookup below.
                 */
                acceptor = listener;
                CONN_INC_REF(acceptor->tcp_connp);
        } else {
                acceptor = tcp_acceptor_hash_lookup(acceptor_id, tcps);
                if (acceptor == NULL) {
                        if (listener->tcp_connp->conn_debug) {
                                (void) strlog(TCP_MOD_ID, 0, 1,
                                    SL_ERROR|SL_TRACE,
                                    "tcp_accept: did not find acceptor 0x%x\n",
                                    acceptor_id);
                        }
                        mutex_exit(&listener->tcp_eager_lock);
                        tcp_err_ack(listener, mp, TPROVMISMATCH, 0);
                        return;
                }
                /*
                 * Verify acceptor state. The acceptable states for an acceptor
                 * include TCPS_IDLE and TCPS_BOUND.
                 */
                switch (acceptor->tcp_state) {
                case TCPS_IDLE:
                        /* FALLTHRU */
                case TCPS_BOUND:
                        break;
                default:
                        CONN_DEC_REF(acceptor->tcp_connp);
                        mutex_exit(&listener->tcp_eager_lock);
                        tcp_err_ack(listener, mp, TOUTSTATE, 0);
                        return;
                }
        }

        /* The listener must be in TCPS_LISTEN */
        if (listener->tcp_state != TCPS_LISTEN) {
                CONN_DEC_REF(acceptor->tcp_connp);
                mutex_exit(&listener->tcp_eager_lock);
                tcp_err_ack(listener, mp, TOUTSTATE, 0);
                return;
        }

        /*
         * Rendezvous with an eager connection request packet hanging off
         * 'tcp' that has the 'seqnum' tag.  We tagged the detached open
         * tcp structure when the connection packet arrived in
         * tcp_input_listener().
         */
        seqnum = tcr->SEQ_number;
        eager = listener;
        do {
                eager = eager->tcp_eager_next_q;
                if (eager == NULL) {
                        CONN_DEC_REF(acceptor->tcp_connp);
                        mutex_exit(&listener->tcp_eager_lock);
                        tcp_err_ack(listener, mp, TBADSEQ, 0);
                        return;
                }
        } while (eager->tcp_conn_req_seqnum != seqnum);
        mutex_exit(&listener->tcp_eager_lock);

        /*
         * At this point, both acceptor and listener have 2 ref
         * that they begin with. Acceptor has one additional ref
         * we placed in lookup while listener has 3 additional
         * ref for being behind the squeue (tcp_accept() is
         * done on listener's squeue); being in classifier hash;
         * and eager's ref on listener.
         */
        ASSERT(listener->tcp_connp->conn_ref >= 5);
        ASSERT(acceptor->tcp_connp->conn_ref >= 3);

        /*
         * The eager at this point is set in its own squeue and
         * could easily have been killed (tcp_accept_finish will
         * deal with that) because of a TH_RST so we can only
         * ASSERT for a single ref.
         */
        ASSERT(eager->tcp_connp->conn_ref >= 1);

        /*
         * Pre allocate the discon_ind mblk also. tcp_accept_finish will
         * use it if something failed.
         */
        discon_mp = allocb(MAX(sizeof (struct T_discon_ind),
            sizeof (struct stroptions)), BPRI_HI);
        if (discon_mp == NULL) {
                CONN_DEC_REF(acceptor->tcp_connp);
                CONN_DEC_REF(eager->tcp_connp);
                tcp_err_ack(listener, mp, TSYSERR, ENOMEM);
                return;
        }

        econnp = eager->tcp_connp;

        /* Hold a copy of mp, in case reallocb fails */
        if ((mp1 = copymsg(mp)) == NULL) {
                CONN_DEC_REF(acceptor->tcp_connp);
                CONN_DEC_REF(eager->tcp_connp);
                freemsg(discon_mp);
                tcp_err_ack(listener, mp, TSYSERR, ENOMEM);
                return;
        }

        tcr = (struct T_conn_res *)mp1->b_rptr;

        /*
         * This is an expanded version of mi_tpi_ok_ack_alloc()
         * which allocates a larger mblk and appends the new
         * local address to the ok_ack.  The address is copied by
         * soaccept() for getsockname().
         */
        {
                int extra;

                extra = (econnp->conn_family == AF_INET) ?
                    sizeof (sin_t) : sizeof (sin6_t);

                /*
                 * Try to re-use mp, if possible.  Otherwise, allocate
                 * an mblk and return it as ok_mp.  In any case, mp
                 * is no longer usable upon return.
                 */
                if ((ok_mp = mi_tpi_ok_ack_alloc_extra(mp, extra)) == NULL) {
                        CONN_DEC_REF(acceptor->tcp_connp);
                        CONN_DEC_REF(eager->tcp_connp);
                        freemsg(discon_mp);
                        /* Original mp has been freed by now, so use mp1 */
                        tcp_err_ack(listener, mp1, TSYSERR, ENOMEM);
                        return;
                }

                mp = NULL;      /* We should never use mp after this point */

                switch (extra) {
                case sizeof (sin_t): {
                        sin_t *sin = (sin_t *)ok_mp->b_wptr;

                        ok_mp->b_wptr += extra;
                        sin->sin_family = AF_INET;
                        sin->sin_port = econnp->conn_lport;
                        sin->sin_addr.s_addr = econnp->conn_laddr_v4;
                        break;
                }
                case sizeof (sin6_t): {
                        sin6_t *sin6 = (sin6_t *)ok_mp->b_wptr;

                        ok_mp->b_wptr += extra;
                        sin6->sin6_family = AF_INET6;
                        sin6->sin6_port = econnp->conn_lport;
                        sin6->sin6_addr = econnp->conn_laddr_v6;
                        sin6->sin6_flowinfo = econnp->conn_flowinfo;
                        if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6) &&
                            (econnp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
                                sin6->sin6_scope_id =
                                    econnp->conn_ixa->ixa_scopeid;
                        } else {
                                sin6->sin6_scope_id = 0;
                        }
                        sin6->__sin6_src_id = 0;
                        break;
                }
                default:
                        break;
                }
                ASSERT(ok_mp->b_wptr <= ok_mp->b_datap->db_lim);
        }

        /*
         * If there are no options we know that the T_CONN_RES will
         * succeed. However, we can't send the T_OK_ACK upstream until
         * the tcp_accept_swap is done since it would be dangerous to
         * let the application start using the new fd prior to the swap.
         */
        tcp_accept_swap(listener, acceptor, eager);

        /*
         * tcp_accept_swap unlinks eager from listener but does not drop
         * the eager's reference on the listener.
         */
        ASSERT(eager->tcp_listener == NULL);
        ASSERT(listener->tcp_connp->conn_ref >= 5);

        /*
         * The eager is now associated with its own queue. Insert in
         * the hash so that the connection can be reused for a future
         * T_CONN_RES.
         */
        tcp_acceptor_hash_insert(acceptor_id, eager);

        /*
         * We now do the processing of options with T_CONN_RES.
         * We delay till now since we wanted to have queue to pass to
         * option processing routines that points back to the right
         * instance structure which does not happen until after
         * tcp_accept_swap().
         *
         * Note:
         * The sanity of the logic here assumes that whatever options
         * are appropriate to inherit from listner=>eager are done
         * before this point, and whatever were to be overridden (or not)
         * in transfer logic from eager=>acceptor in tcp_accept_swap().
         * [ Warning: acceptor endpoint can have T_OPTMGMT_REQ done to it
         *   before its ACCEPTOR_id comes down in T_CONN_RES ]
         * This may not be true at this point in time but can be fixed
         * independently. This option processing code starts with
         * the instantiated acceptor instance and the final queue at
         * this point.
         */

        if (tcr->OPT_length != 0) {
                /* Options to process */
                int t_error = 0;
                int sys_error = 0;
                int do_disconnect = 0;

                if (tcp_conprim_opt_process(eager, mp1,
                    &do_disconnect, &t_error, &sys_error) < 0) {
                        eager->tcp_accept_error = 1;
                        if (do_disconnect) {
                                /*
                                 * An option failed which does not allow
                                 * connection to be accepted.
                                 *
                                 * We allow T_CONN_RES to succeed and
                                 * put a T_DISCON_IND on the eager queue.
                                 */
                                ASSERT(t_error == 0 && sys_error == 0);
                                eager->tcp_send_discon_ind = 1;
                        } else {
                                ASSERT(t_error != 0);
                                freemsg(ok_mp);
                                /*
                                 * Original mp was either freed or set
                                 * to ok_mp above, so use mp1 instead.
                                 */
                                tcp_err_ack(listener, mp1, t_error, sys_error);
                                goto finish;
                        }
                }
                /*
                 * Most likely success in setting options (except if
                 * eager->tcp_send_discon_ind set).
                 * mp1 option buffer represented by OPT_length/offset
                 * potentially modified and contains results of setting
                 * options at this point
                 */
        }

        /* We no longer need mp1, since all options processing has passed */
        freemsg(mp1);

        putnext(listener->tcp_connp->conn_rq, ok_mp);

        mutex_enter(&listener->tcp_eager_lock);
        if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) {
                mblk_t  *conn_ind;

                /*
                 * This path should not be executed if listener and
                 * acceptor streams are the same.
                 */
                ASSERT(listener != acceptor);
                conn_ind = tcp_get_def_conn_ind(listener);
                mutex_exit(&listener->tcp_eager_lock);
                putnext(listener->tcp_connp->conn_rq, conn_ind);
        } else {
                mutex_exit(&listener->tcp_eager_lock);
        }

        /*
         * Done with the acceptor - free it
         *
         * Note: from this point on, no access to listener should be made
         * as listener can be equal to acceptor.
         */
finish:
        ASSERT(acceptor->tcp_detached);
        acceptor->tcp_connp->conn_rq = NULL;
        ASSERT(!IPCL_IS_NONSTR(acceptor->tcp_connp));
        acceptor->tcp_connp->conn_wq = NULL;
        (void) tcp_clean_death(acceptor, 0);
        CONN_DEC_REF(acceptor->tcp_connp);

        /*
         * We pass discon_mp to tcp_accept_finish to get on the right squeue.
         *
         * It will update the setting for sockfs/stream head and also take
         * care of any data that arrived before accept() wad called.
         * In case we already received a FIN then tcp_accept_finish will send up
         * the ordrel. It will also send up a window update if the window
         * has opened up.
         */

        /*
         * XXX: we currently have a problem if XTI application closes the
         * acceptor stream in between. This problem exists in on10-gate also
         * and is well know but nothing can be done short of major rewrite
         * to fix it. Now it is possible to take care of it by assigning TLI/XTI
         * eager same squeue as listener (we can distinguish non socket
         * listeners at the time of handling a SYN in tcp_input_listener)
         * and do most of the work that tcp_accept_finish does here itself
         * and then get behind the acceptor squeue to access the acceptor
         * queue.
         */
        /*
         * We already have a ref on tcp so no need to do one before squeue_enter
         */
        SQUEUE_ENTER_ONE(eager->tcp_connp->conn_sqp, discon_mp,
            tcp_accept_finish, eager->tcp_connp, NULL, SQ_FILL,
            SQTAG_TCP_ACCEPT_FINISH);
}


/*
 * This is the STREAMS entry point for T_CONN_RES coming down on
 * Acceptor STREAM when  sockfs listener does accept processing.
 * Read the block comment on top of tcp_input_listener().
 */
int
tcp_tpi_accept(queue_t *q, mblk_t *mp)
{
        queue_t *rq = RD(q);
        struct T_conn_res *conn_res;
        tcp_t *eager;
        tcp_t *listener;
        struct T_ok_ack *ok;
        t_scalar_t PRIM_type;
        mblk_t *discon_mp;
        conn_t *econnp;
        cred_t *cr;

        ASSERT(DB_TYPE(mp) == M_PROTO);

        /*
         * All Solaris components should pass a db_credp
         * for this TPI message, hence we ASSERT.
         * But in case there is some other M_PROTO that looks
         * like a TPI message sent by some other kernel
         * component, we check and return an error.
         */
        cr = msg_getcred(mp, NULL);
        ASSERT(cr != NULL);
        if (cr == NULL) {
                mp = mi_tpi_err_ack_alloc(mp, TSYSERR, EINVAL);
                if (mp != NULL)
                        putnext(rq, mp);
                return (0);
        }
        conn_res = (struct T_conn_res *)mp->b_rptr;
        ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
        if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_res)) {
                mp = mi_tpi_err_ack_alloc(mp, TPROTO, 0);
                if (mp != NULL)
                        putnext(rq, mp);
                return (0);
        }
        switch (conn_res->PRIM_type) {
        case O_T_CONN_RES:
        case T_CONN_RES:
                /*
                 * We pass up an err ack if allocb fails. This will
                 * cause sockfs to issue a T_DISCON_REQ which will cause
                 * tcp_eager_blowoff to be called. sockfs will then call
                 * rq->q_qinfo->qi_qclose to cleanup the acceptor stream.
                 * we need to do the allocb up here because we have to
                 * make sure rq->q_qinfo->qi_qclose still points to the
                 * correct function (tcp_tpi_close_accept) in case allocb
                 * fails.
                 */
                bcopy(mp->b_rptr + conn_res->OPT_offset,
                    &eager, conn_res->OPT_length);
                PRIM_type = conn_res->PRIM_type;
                mp->b_datap->db_type = M_PCPROTO;
                mp->b_wptr = mp->b_rptr + sizeof (struct T_ok_ack);
                ok = (struct T_ok_ack *)mp->b_rptr;
                ok->PRIM_type = T_OK_ACK;
                ok->CORRECT_prim = PRIM_type;
                econnp = eager->tcp_connp;
                econnp->conn_dev = (dev_t)RD(q)->q_ptr;
                econnp->conn_minor_arena = (vmem_t *)(WR(q)->q_ptr);
                econnp->conn_rq = rq;
                econnp->conn_wq = q;
                rq->q_ptr = econnp;
                rq->q_qinfo = &tcp_rinitv4;     /* No open - same as rinitv6 */
                q->q_ptr = econnp;
                q->q_qinfo = &tcp_winit;
                listener = eager->tcp_listener;

                /*
                 * Pre allocate the discon_ind mblk also. tcp_accept_finish will
                 * use it if something failed.
                 */
                discon_mp = allocb(MAX(sizeof (struct T_discon_ind),
                    sizeof (struct stroptions)), BPRI_HI);

                if (discon_mp == NULL) {
                        mp = mi_tpi_err_ack_alloc(mp, TPROTO, 0);
                        if (mp != NULL)
                                putnext(rq, mp);
                        return (0);
                }

                eager->tcp_issocket = B_TRUE;

                ASSERT(econnp->conn_netstack ==
                    listener->tcp_connp->conn_netstack);
                ASSERT(eager->tcp_tcps == listener->tcp_tcps);

                /* Put the ref for IP */
                CONN_INC_REF(econnp);

                /*
                 * We should have minimum of 3 references on the conn
                 * at this point. One each for TCP and IP and one for
                 * the T_conn_ind that was sent up when the 3-way handshake
                 * completed. In the normal case we would also have another
                 * reference (making a total of 4) for the conn being in the
                 * classifier hash list. However the eager could have received
                 * an RST subsequently and tcp_closei_local could have removed
                 * the eager from the classifier hash list, hence we can't
                 * assert that reference.
                 */
                ASSERT(econnp->conn_ref >= 3);

                mutex_enter(&listener->tcp_eager_lock);
                if (listener->tcp_eager_prev_q0->tcp_conn_def_q0) {
                        mblk_t *conn_ind = tcp_get_def_conn_ind(listener);

                        /* Need to get inside the listener perimeter */
                        CONN_INC_REF(listener->tcp_connp);
                        SQUEUE_ENTER_ONE(listener->tcp_connp->conn_sqp,
                            conn_ind, tcp_send_pending, listener->tcp_connp,
                            NULL, SQ_FILL, SQTAG_TCP_SEND_PENDING);
                }
                tcp_eager_unlink(eager);
                mutex_exit(&listener->tcp_eager_lock);

                /*
                 * At this point, the eager is detached from the listener
                 * but we still have an extra refs on eager (apart from the
                 * usual tcp references). The ref was placed in tcp_input_data
                 * before sending the conn_ind in tcp_send_conn_ind.
                 * The ref will be dropped in tcp_accept_finish().
                 */
                SQUEUE_ENTER_ONE(econnp->conn_sqp, discon_mp, tcp_accept_finish,
                    econnp, NULL, SQ_NODRAIN, SQTAG_TCP_ACCEPT_FINISH_Q0);

                /*
                 * Send the new local address also up to sockfs. There
                 * should already be enough space in the mp that came
                 * down from soaccept().
                 */
                if (econnp->conn_family == AF_INET) {
                        sin_t *sin;

                        ASSERT((mp->b_datap->db_lim - mp->b_datap->db_base) >=
                            (sizeof (struct T_ok_ack) + sizeof (sin_t)));
                        sin = (sin_t *)mp->b_wptr;
                        mp->b_wptr += sizeof (sin_t);
                        sin->sin_family = AF_INET;
                        sin->sin_port = econnp->conn_lport;
                        sin->sin_addr.s_addr = econnp->conn_laddr_v4;
                } else {
                        sin6_t *sin6;

                        ASSERT((mp->b_datap->db_lim - mp->b_datap->db_base) >=
                            sizeof (struct T_ok_ack) + sizeof (sin6_t));
                        sin6 = (sin6_t *)mp->b_wptr;
                        mp->b_wptr += sizeof (sin6_t);
                        sin6->sin6_family = AF_INET6;
                        sin6->sin6_port = econnp->conn_lport;
                        sin6->sin6_addr = econnp->conn_laddr_v6;
                        if (econnp->conn_ipversion == IPV4_VERSION)
                                sin6->sin6_flowinfo = 0;
                        else
                                sin6->sin6_flowinfo = econnp->conn_flowinfo;
                        if (IN6_IS_ADDR_LINKSCOPE(&econnp->conn_laddr_v6) &&
                            (econnp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
                                sin6->sin6_scope_id =
                                    econnp->conn_ixa->ixa_scopeid;
                        } else {
                                sin6->sin6_scope_id = 0;
                        }
                        sin6->__sin6_src_id = 0;
                }

                putnext(rq, mp);
                break;
        default:
                mp = mi_tpi_err_ack_alloc(mp, TNOTSUPPORT, 0);
                if (mp != NULL)
                        putnext(rq, mp);
                break;
        }
        return (0);
}

/*
 * The function called through squeue to get behind listener's perimeter to
 * send a deferred conn_ind.
 */
/* ARGSUSED */
void
tcp_send_pending(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
{
        conn_t  *lconnp = (conn_t *)arg;
        tcp_t *listener = lconnp->conn_tcp;
        struct T_conn_ind *conn_ind;
        tcp_t *tcp;

        conn_ind = (struct T_conn_ind *)mp->b_rptr;
        bcopy(mp->b_rptr + conn_ind->OPT_offset, &tcp,
            conn_ind->OPT_length);

        if (listener->tcp_state != TCPS_LISTEN) {
                /*
                 * If listener has closed, it would have caused a
                 * a cleanup/blowoff to happen for the eager, so
                 * we don't need to do anything more.
                 */
                freemsg(mp);
                return;
        }

        putnext(lconnp->conn_rq, mp);
}

/*
 * Sends the T_CONN_IND to the listener. The caller calls this
 * functions via squeue to get inside the listener's perimeter
 * once the 3 way hand shake is done a T_CONN_IND needs to be
 * sent. As an optimization, the caller can call this directly
 * if listener's perimeter is same as eager's.
 */
/* ARGSUSED */
void
tcp_send_conn_ind(void *arg, mblk_t *mp, void *arg2)
{
        conn_t                  *lconnp = (conn_t *)arg;
        tcp_t                   *listener = lconnp->conn_tcp;
        tcp_t                   *tcp;
        struct T_conn_ind       *conn_ind;
        ipaddr_t                *addr_cache;
        boolean_t               need_send_conn_ind = B_FALSE;
        tcp_stack_t             *tcps = listener->tcp_tcps;

        /* retrieve the eager */
        conn_ind = (struct T_conn_ind *)mp->b_rptr;
        ASSERT(conn_ind->OPT_offset != 0 &&
            conn_ind->OPT_length == sizeof (intptr_t));
        bcopy(mp->b_rptr + conn_ind->OPT_offset, &tcp,
            conn_ind->OPT_length);

        /*
         * TLI/XTI applications will get confused by
         * sending eager as an option since it violates
         * the option semantics. So remove the eager as
         * option since TLI/XTI app doesn't need it anyway.
         */
        if (!TCP_IS_SOCKET(listener)) {
                conn_ind->OPT_length = 0;
                conn_ind->OPT_offset = 0;
        }
        if (listener->tcp_state != TCPS_LISTEN) {
                /*
                 * If listener has closed, it would have caused a
                 * a cleanup/blowoff to happen for the eager. We
                 * just need to return.
                 */
                freemsg(mp);
                return;
        }


        /*
         * if the conn_req_q is full defer passing up the
         * T_CONN_IND until space is availabe after t_accept()
         * processing
         */
        mutex_enter(&listener->tcp_eager_lock);

        /*
         * Take the eager out, if it is in the list of droppable eagers
         * as we are here because the 3W handshake is over.
         */
        MAKE_UNDROPPABLE(tcp);

        if (listener->tcp_conn_req_cnt_q < listener->tcp_conn_req_max) {
                tcp_t *tail;

                /*
                 * The eager already has an extra ref put in tcp_input_data
                 * so that it stays till accept comes back even though it
                 * might get into TCPS_CLOSED as a result of a TH_RST etc.
                 */
                ASSERT(listener->tcp_conn_req_cnt_q0 > 0);
                listener->tcp_conn_req_cnt_q0--;
                listener->tcp_conn_req_cnt_q++;

                /* Move from SYN_RCVD to ESTABLISHED list  */
                tcp->tcp_eager_next_q0->tcp_eager_prev_q0 =
                    tcp->tcp_eager_prev_q0;
                tcp->tcp_eager_prev_q0->tcp_eager_next_q0 =
                    tcp->tcp_eager_next_q0;
                tcp->tcp_eager_prev_q0 = NULL;
                tcp->tcp_eager_next_q0 = NULL;

                /*
                 * Insert at end of the queue because sockfs
                 * sends down T_CONN_RES in chronological
                 * order. Leaving the older conn indications
                 * at front of the queue helps reducing search
                 * time.
                 */
                tail = listener->tcp_eager_last_q;
                if (tail != NULL)
                        tail->tcp_eager_next_q = tcp;
                else
                        listener->tcp_eager_next_q = tcp;
                listener->tcp_eager_last_q = tcp;
                tcp->tcp_eager_next_q = NULL;
                /*
                 * Delay sending up the T_conn_ind until we are
                 * done with the eager. Once we have have sent up
                 * the T_conn_ind, the accept can potentially complete
                 * any time and release the refhold we have on the eager.
                 */
                need_send_conn_ind = B_TRUE;
        } else {
                /*
                 * Defer connection on q0 and set deferred
                 * connection bit true
                 */
                tcp->tcp_conn_def_q0 = B_TRUE;

                /* take tcp out of q0 ... */
                tcp->tcp_eager_prev_q0->tcp_eager_next_q0 =
                    tcp->tcp_eager_next_q0;
                tcp->tcp_eager_next_q0->tcp_eager_prev_q0 =
                    tcp->tcp_eager_prev_q0;

                /* ... and place it at the end of q0 */
                tcp->tcp_eager_prev_q0 = listener->tcp_eager_prev_q0;
                tcp->tcp_eager_next_q0 = listener;
                listener->tcp_eager_prev_q0->tcp_eager_next_q0 = tcp;
                listener->tcp_eager_prev_q0 = tcp;
                tcp->tcp_conn.tcp_eager_conn_ind = mp;
        }

        /* we have timed out before */
        if (tcp->tcp_syn_rcvd_timeout != 0) {
                tcp->tcp_syn_rcvd_timeout = 0;
                listener->tcp_syn_rcvd_timeout--;
                if (listener->tcp_syn_defense &&
                    listener->tcp_syn_rcvd_timeout <=
                    (tcps->tcps_conn_req_max_q0 >> 5) &&
                    10*MINUTES < TICK_TO_MSEC(ddi_get_lbolt64() -
                    listener->tcp_last_rcv_lbolt)) {
                        /*
                         * Turn off the defense mode if we
                         * believe the SYN attack is over.
                         */
                        listener->tcp_syn_defense = B_FALSE;
                        if (listener->tcp_ip_addr_cache) {
                                kmem_free((void *)listener->tcp_ip_addr_cache,
                                    IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t));
                                listener->tcp_ip_addr_cache = NULL;
                        }
                }
        }
        addr_cache = (ipaddr_t *)(listener->tcp_ip_addr_cache);
        if (addr_cache != NULL) {
                /*
                 * We have finished a 3-way handshake with this
                 * remote host. This proves the IP addr is good.
                 * Cache it!
                 */
                addr_cache[IP_ADDR_CACHE_HASH(tcp->tcp_connp->conn_faddr_v4)] =
                    tcp->tcp_connp->conn_faddr_v4;
        }
        mutex_exit(&listener->tcp_eager_lock);
        if (need_send_conn_ind)
                putnext(lconnp->conn_rq, mp);
}