root/usr/src/uts/common/inet/optcom.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */
/* Copyright (c) 1990 Mentat Inc. */

/*
 * This file contains common code for handling Options Management requests.
 */

#include <sys/types.h>
#include <sys/stream.h>
#include <sys/stropts.h>
#include <sys/strsubr.h>
#include <sys/errno.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/ddi.h>
#include <sys/debug.h>          /* for ASSERT */
#include <sys/policy.h>

#include <inet/common.h>
#include <inet/mi.h>
#include <inet/nd.h>
#include <netinet/ip6.h>
#include <inet/ip.h>
#include <inet/mib2.h>
#include <netinet/in.h>
#include "optcom.h"

#include <inet/optcom.h>
#include <inet/ipclassifier.h>
#include <inet/proto_set.h>

/*
 * Function prototypes
 */
static t_scalar_t process_topthdrs_first_pass(mblk_t *, cred_t *, optdb_obj_t *,
    size_t *);
static t_scalar_t do_options_second_pass(queue_t *q, mblk_t *reqmp,
    mblk_t *ack_mp, cred_t *, optdb_obj_t *dbobjp,
    t_uscalar_t *worst_statusp);
static t_uscalar_t get_worst_status(t_uscalar_t, t_uscalar_t);
static int do_opt_default(queue_t *, struct T_opthdr *, uchar_t **,
    t_uscalar_t *, cred_t *, optdb_obj_t *);
static void do_opt_current(queue_t *, struct T_opthdr *, uchar_t **,
    t_uscalar_t *, cred_t *cr, optdb_obj_t *);
static void do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt,
    uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp,
    cred_t *, optdb_obj_t *dbobjp);
static boolean_t opt_level_valid(t_uscalar_t, optlevel_t *, uint_t);
static size_t opt_level_allopts_lengths(t_uscalar_t, opdes_t *, uint_t);
static boolean_t opt_length_ok(opdes_t *, t_uscalar_t optlen);
static t_uscalar_t optcom_max_optbuf_len(opdes_t *, uint_t);
static boolean_t opt_bloated_maxsize(opdes_t *);

/* Common code for sending back a T_ERROR_ACK. */
void
optcom_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
{
        if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
                qreply(q, mp);
}

/*
 * The option management routines svr4_optcom_req() and tpi_optcom_req() use
 * callback functions as arguments. Here is the expected interfaces
 * assumed from the callback functions
 *
 *
 * (1) deffn(q, optlevel, optname, optvalp)
 *
 *      - Function only called when default value comes from protocol
 *       specific code and not the option database table (indicated by
 *        OP_DEF_FN property in option database.)
 *      - Error return is -1. Valid returns are >=0.
 *      - When valid, the return value represents the length used for storing
 *              the default value of the option.
 *      - Error return implies the called routine did not recognize this
 *              option. Something downstream could so input is left unchanged
 *              in request buffer.
 *
 * (2) getfn(q, optlevel, optname, optvalp)
 *
 *      - Error return is -1. Valid returns are >=0.
 *      - When valid, the return value represents the length used for storing
 *              the actual value of the option.
 *      - Error return implies the called routine did not recognize this
 *              option. Something downstream could so input is left unchanged
 *              in request buffer.
 *
 * (3) setfn(q, optset_context, optlevel, optname, inlen, invalp,
 *      outlenp, outvalp, attrp, cr);
 *
 *      - OK return is 0, Error code is returned as a non-zero argument.
 *      - If negative it is ignored by svr4_optcom_req(). If positive, error
 *        is returned. A negative return implies that option, while handled on
 *        this stack is not handled at this level and will be handled further
 *        downstream.
 *      - Both negative and positive errors are treats as errors in an
 *        identical manner by tpi_optcom_req(). The errors affect "status"
 *        field of each option's T_opthdr. If sucessfull, an appropriate sucess
 *        result is carried. If error, it instantiated to "failure" at the
 *        topmost level and left unchanged at other levels. (This "failure" can
 *        turn to a success at another level).
 *      - optset_context passed for tpi_optcom_req(). It is interpreted as:
 *        - SETFN_OPTCOM_CHECKONLY
 *              semantics are to pretend to set the value and report
 *              back if it would be successful.
 *              This is used with T_CHECK semantics in XTI
 *        - SETFN_OPTCOM_NEGOTIATE
 *              set the value. Call from option management primitive
 *              T_OPTMGMT_REQ when T_NEGOTIATE flags is used.
 *        - SETFN_UD_NEGOTIATE
 *              option request came riding on UNITDATA primitive most often
 *              has  "this datagram" semantics to influence prpoerties
 *              affecting an outgoig datagram or associated with recived
 *              datagram
 *              [ Note: XTI permits this use outside of "this datagram"
 *              semantics also and permits setting "management related"
 *              options in this context and its test suite enforces it ]
 *        - SETFN_CONN_NEGOTATE
 *              option request came riding on CONN_REQ/RES primitive and
 *              most often has "this connection" (negotiation during
 *              "connection estblishment") semantics.
 *              [ Note: XTI permits use of these outside of "this connection"
 *              semantics and permits "management related" options in this
 *              context and its test suite enforces it. ]
 *
 *      - inlen, invalp is the option length,value requested to be set.
 *      - outlenp, outvalp represent return parameters which contain the
 *        value set and it might be different from one passed on input.
 *      - attrp points to a data structure that's used by v6 modules to
 *        store ancillary data options or sticky options.
 *      - cr points to the caller's credentials
 *      - the caller might pass same buffers for input and output and the
 *        routine should protect against this case by not updating output
 *        buffers until it is done referencing input buffers and any other
 *        issues (e.g. not use bcopy() if we do not trust what it does).
 *      - If option is not known, it returns error. We randomly pick EINVAL.
 *        It can however get called with options that are handled downstream
 *        opr upstream so for svr4_optcom_req(), it does not return error for
 *        negative return values.
 *
 */

/*
 * Upper Level Protocols call this routine when they receive
 * a T_SVR4_OPTMGMT_REQ message.  They supply callback functions
 * for setting a new value for a single options, getting the
 * current value for a single option, and checking for support
 * of a single option.  svr4_optcom_req validates the option management
 * buffer passed in, and calls the appropriate routines to do the
 * job requested.
 * XXX Code below needs some restructuring after we have some more
 * macros to support 'struct opthdr' in the headers.
 */
void
svr4_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp)
{
        pfi_t   deffn = dbobjp->odb_deffn;
        pfi_t   getfn = dbobjp->odb_getfn;
        opt_set_fn setfn = dbobjp->odb_setfn;
        opdes_t *opt_arr = dbobjp->odb_opt_des_arr;
        uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
        t_uscalar_t max_optbuf_len;
        int len;
        mblk_t  *mp1 = NULL;
        struct opthdr *next_opt;
        struct opthdr *opt;
        struct opthdr *opt1;
        struct opthdr *opt_end;
        struct opthdr *opt_start;
        opdes_t *optd;
        struct T_optmgmt_ack *toa;
        struct T_optmgmt_req *tor;
        int error;

        tor = (struct T_optmgmt_req *)mp->b_rptr;
        /* Verify message integrity. */
        if (mp->b_wptr - mp->b_rptr < sizeof (struct T_optmgmt_req))
                goto bad_opt;
        /* Verify MGMT_flags legal */
        switch (tor->MGMT_flags) {
        case T_DEFAULT:
        case T_NEGOTIATE:
        case T_CURRENT:
        case T_CHECK:
                /* OK - legal request flags */
                break;
        default:
                optcom_err_ack(q, mp, TBADFLAG, 0);
                return;
        }
        if (tor->MGMT_flags == T_DEFAULT) {
                /* Is it a request for default option settings? */

                /*
                 * Note: XXX TLI and TPI specification was unclear about
                 * semantics of T_DEFAULT and the following historical note
                 * and its interpretation is incorrect (it implies a request
                 * for default values of only the identified options not all.
                 * The semantics have been explained better in XTI spec.)
                 * However, we do not modify (comment or code) here to keep
                 * compatibility.
                 * We can rethink this if it ever becomes an issue.
                 * ----historical comment start------
                 * As we understand it, the input buffer is meaningless
                 * so we ditch the message.  A T_DEFAULT request is a
                 * request to obtain a buffer containing defaults for
                 * all supported options, so we allocate a maximum length
                 * reply.
                 * ----historical comment end -------
                 */
                /* T_DEFAULT not passed down */
                freemsg(mp);
                max_optbuf_len = optcom_max_optbuf_len(opt_arr,
                    opt_arr_cnt);
                mp = allocb(max_optbuf_len, BPRI_MED);
                if (!mp) {
no_mem:;
                        optcom_err_ack(q, mp, TSYSERR, ENOMEM);
                        return;
                }

                /* Initialize the T_optmgmt_ack header. */
                toa = (struct T_optmgmt_ack *)mp->b_rptr;
                bzero((char *)toa, max_optbuf_len);
                toa->PRIM_type = T_OPTMGMT_ACK;
                toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);
                /* TODO: Is T_DEFAULT the right thing to put in MGMT_flags? */
                toa->MGMT_flags = T_DEFAULT;

                /* Now walk the table of options passed in */
                opt = (struct opthdr *)&toa[1];
                for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
                        /*
                         * All the options in the table of options passed
                         * in are by definition supported by the protocol
                         * calling this function.
                         */
                        if (!OA_READ_PERMISSION(optd, cr))
                                continue;
                        opt->level = optd->opdes_level;
                        opt->name = optd->opdes_name;
                        if (!(optd->opdes_props & OP_DEF_FN) ||
                            ((len = (*deffn)(q, opt->level,
                            opt->name, (uchar_t *)&opt[1])) < 0)) {
                                /*
                                 * Fill length and value from table.
                                 *
                                 * Default value not instantiated from function
                                 * (or the protocol specific function failed it;
                                 * In this interpretation of T_DEFAULT, this is
                                 * the best we can do)
                                 */
                                switch (optd->opdes_size) {
                                /*
                                 * Since options are guaranteed aligned only
                                 * on a 4 byte boundary (t_scalar_t) any
                                 * option that is greater in size will default
                                 * to the bcopy below
                                 */
                                case sizeof (int32_t):
                                        *(int32_t *)&opt[1] =
                                            (int32_t)optd->opdes_default;
                                        break;
                                case sizeof (int16_t):
                                        *(int16_t *)&opt[1] =
                                            (int16_t)optd->opdes_default;
                                        break;
                                case sizeof (int8_t):
                                        *(int8_t *)&opt[1] =
                                            (int8_t)optd->opdes_default;
                                        break;
                                default:
                                        /*
                                         * other length but still assume
                                         * fixed - use bcopy
                                         */
                                        bcopy(optd->opdes_defbuf,
                                            &opt[1], optd->opdes_size);
                                        break;
                                }
                                opt->len = optd->opdes_size;
                        }
                        else
                                opt->len = (t_uscalar_t)len;
                        opt = (struct opthdr *)((char *)&opt[1] +
                            _TPI_ALIGN_OPT(opt->len));
                }

                /* Now record the final length. */
                toa->OPT_length = (t_scalar_t)((char *)opt - (char *)&toa[1]);
                mp->b_wptr = (uchar_t *)opt;
                mp->b_datap->db_type = M_PCPROTO;
                /* Ship it back. */
                qreply(q, mp);
                return;
        }
        /* T_DEFAULT processing complete - no more T_DEFAULT */

        /*
         * For T_NEGOTIATE, T_CURRENT, and T_CHECK requests, we make a
         * pass through the input buffer validating the details and
         * making sure each option is supported by the protocol.
         */
        if ((opt_start = (struct opthdr *)mi_offset_param(mp,
            tor->OPT_offset, tor->OPT_length)) == NULL)
                goto bad_opt;
        if (!__TPI_OPT_ISALIGNED(opt_start))
                goto bad_opt;

        opt_end = (struct opthdr *)((uchar_t *)opt_start +
            tor->OPT_length);

        for (opt = opt_start; opt < opt_end; opt = next_opt) {
                /*
                 * Verify we have room to reference the option header
                 * fields in the option buffer.
                 */
                if ((uchar_t *)opt + sizeof (struct opthdr) >
                    (uchar_t *)opt_end)
                        goto bad_opt;
                /*
                 * We now compute pointer to next option in buffer 'next_opt'
                 * The next_opt computation above below 'opt->len' initialized
                 * by application which cannot be trusted. The usual value
                 * too large will be captured by the loop termination condition
                 * above. We check for the following which it will miss.
                 *      -pointer space wraparound arithmetic overflow
                 *      -last option in buffer with 'opt->len' being too large
                 *       (only reason 'next_opt' should equal or exceed
                 *       'opt_end' for last option is roundup unless length is
                 *       too-large/invalid)
                 */
                next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
                    _TPI_ALIGN_OPT(opt->len));

                if ((uchar_t *)next_opt < (uchar_t *)&opt[1] ||
                    ((next_opt >= opt_end) &&
                    (((uchar_t *)next_opt - (uchar_t *)opt_end) >=
                    __TPI_ALIGN_SIZE)))
                        goto bad_opt;

                /* sanity check */
                if (opt->name == T_ALLOPT)
                        goto bad_opt;

                error = proto_opt_check(opt->level, opt->name, opt->len, NULL,
                    opt_arr, opt_arr_cnt,
                    tor->MGMT_flags == T_NEGOTIATE, tor->MGMT_flags == T_CHECK,
                    cr);
                if (error < 0) {
                        optcom_err_ack(q, mp, -error, 0);
                        return;
                } else if (error > 0) {
                        optcom_err_ack(q, mp, TSYSERR, error);
                        return;
                }
        } /* end for loop scanning option buffer */

        /* Now complete the operation as required. */
        switch (tor->MGMT_flags) {
        case T_CHECK:
                /*
                 * Historically used same as T_CURRENT (which was added to
                 * standard later). Code retained for compatibility.
                 */
                /* FALLTHROUGH */
        case T_CURRENT:
                /*
                 * Allocate a maximum size reply.  Perhaps we are supposed to
                 * assume that the input buffer includes space for the answers
                 * as well as the opthdrs, but we don't know that for sure.
                 * So, instead, we create a new output buffer, using the
                 * input buffer only as a list of options.
                 */
                max_optbuf_len = optcom_max_optbuf_len(opt_arr,
                    opt_arr_cnt);
                mp1 = allocb_tmpl(max_optbuf_len, mp);
                if (!mp1)
                        goto no_mem;
                /* Initialize the header. */
                mp1->b_datap->db_type = M_PCPROTO;
                mp1->b_wptr = &mp1->b_rptr[sizeof (struct T_optmgmt_ack)];
                toa = (struct T_optmgmt_ack *)mp1->b_rptr;
                toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);
                toa->MGMT_flags = tor->MGMT_flags;
                /*
                 * Walk through the input buffer again, this time adding
                 * entries to the output buffer for each option requested.
                 * Note, sanity of option header, last option etc, verified
                 * in first pass.
                 */
                opt1 = (struct opthdr *)&toa[1];

                for (opt = opt_start; opt < opt_end; opt = next_opt) {

                        next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
                            _TPI_ALIGN_OPT(opt->len));

                        opt1->name = opt->name;
                        opt1->level = opt->level;
                        len = (*getfn)(q, opt->level,
                            opt->name, (uchar_t *)&opt1[1]);
                        /*
                         * Failure means option is not recognized. Copy input
                         * buffer as is
                         */
                        if (len < 0) {
                                opt1->len = opt->len;
                                bcopy(&opt[1], &opt1[1], opt->len);
                        } else {
                                opt1->len = (t_uscalar_t)len;
                        }
                        opt1 = (struct opthdr *)((uchar_t *)&opt1[1] +
                            _TPI_ALIGN_OPT(opt1->len));
                } /* end for loop */

                /* Record the final length. */
                toa->OPT_length = (t_scalar_t)((uchar_t *)opt1 -
                    (uchar_t *)&toa[1]);
                mp1->b_wptr = (uchar_t *)opt1;
                /* Ditch the input buffer. */
                freemsg(mp);
                mp = mp1;
                break;

        case T_NEGOTIATE:
                /*
                 * Here we are expecting that the response buffer is exactly
                 * the same size as the input buffer.  We pass each opthdr
                 * to the protocol's set function.  If the protocol doesn't
                 * like it, it can update the value in it return argument.
                 */
                /*
                 * Pass each negotiated option through the protocol set
                 * function.
                 * Note: sanity check on option header values done in first
                 * pass and not repeated here.
                 */
                toa = (struct T_optmgmt_ack *)tor;

                for (opt = opt_start; opt < opt_end; opt = next_opt) {
                        int error;

                        next_opt = (struct opthdr *)((uchar_t *)&opt[1] +
                            _TPI_ALIGN_OPT(opt->len));

                        error = (*setfn)(q, SETFN_OPTCOM_NEGOTIATE,
                            opt->level, opt->name,
                            opt->len, (uchar_t *)&opt[1],
                            &opt->len, (uchar_t *)&opt[1], NULL, cr);
                        /*
                         * Treat positive "errors" as real.
                         * Note: negative errors are to be treated as
                         * non-fatal by svr4_optcom_req() and are
                         * returned by setfn() when it is passed an
                         * option it does not handle. Since the option
                         * passed proto_opt_lookup(), it is implied that
                         * it is valid but was either handled upstream
                         * or will be handled downstream.
                         */
                        if (error > 0) {
                                optcom_err_ack(q, mp, TSYSERR, error);
                                return;
                        }
                        /*
                         * error < 0 means option is not recognized.
                         */
                }
                break;
        default:
                optcom_err_ack(q, mp, TBADFLAG, 0);
                return;
        }

        /* Set common fields in the header. */
        toa->MGMT_flags = T_SUCCESS;
        mp->b_datap->db_type = M_PCPROTO;
        toa->PRIM_type = T_OPTMGMT_ACK;
        qreply(q, mp);
        return;
bad_opt:;
        optcom_err_ack(q, mp, TBADOPT, 0);
}

/*
 * New optcom_req inspired by TPI/XTI semantics
 */
void
tpi_optcom_req(queue_t *q, mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp)
{
        t_scalar_t t_error;
        mblk_t *toa_mp;
        size_t toa_len;
        struct T_optmgmt_ack *toa;
        struct T_optmgmt_req *tor =
            (struct T_optmgmt_req *)mp->b_rptr;
        t_uscalar_t worst_status;

        /* Verify message integrity. */
        if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_optmgmt_req)) {
                optcom_err_ack(q, mp, TBADOPT, 0);
                return;
        }

        /* Verify MGMT_flags legal */
        switch (tor->MGMT_flags) {
        case T_DEFAULT:
        case T_NEGOTIATE:
        case T_CURRENT:
        case T_CHECK:
                /* OK - legal request flags */
                break;
        default:
                optcom_err_ack(q, mp, TBADFLAG, 0);
                return;
        }

        /*
         * In this design, there are two passes required on the input buffer
         * mostly to accomodate variable length options and "T_ALLOPT" option
         * which has the semantics "all options of the specified level".
         *
         * For T_DEFAULT, T_NEGOTIATE, T_CURRENT, and T_CHECK requests, we make
         * a pass through the input buffer validating the details and making
         * sure each option is supported by the protocol. We also determine the
         * length of the option buffer to return. (Variable length options and
         * T_ALLOPT mean that length can be different for output buffer).
         */

        toa_len = 0;            /* initial value */

        /*
         * First pass, we do the following
         *      - estimate cumulative length needed for results
         *      - set "status" field based on permissions, option header check
         *        etc.
         */
        if ((t_error = process_topthdrs_first_pass(mp, cr, dbobjp,
            &toa_len)) != 0) {
                optcom_err_ack(q, mp, t_error, 0);
                return;
        }

        /*
         * A validation phase of the input buffer is done. We have also
         * obtained the length requirement and and other details about the
         * input and we liked input buffer so far.  We make another scan
         * through the input now and generate the output necessary to complete
         * the operation.
         */

        toa_mp = allocb_tmpl(toa_len, mp);
        if (!toa_mp) {
                optcom_err_ack(q, mp, TSYSERR, ENOMEM);
                return;
        }

        /*
         * Set initial values for generating output.
         */
        worst_status = T_SUCCESS; /* initial value */

        /*
         * This routine makes another pass through the option buffer this
         * time acting on the request based on "status" result in the
         * first pass. It also performs "expansion" of T_ALLOPT into
         * all options of a certain level and acts on each for this request.
         */
        if ((t_error = do_options_second_pass(q, mp, toa_mp, cr, dbobjp,
            &worst_status)) != 0) {
                freemsg(toa_mp);
                optcom_err_ack(q, mp, t_error, 0);
                return;
        }

        /*
         * Following code relies on the coincidence that T_optmgmt_req
         * and T_optmgmt_ack are identical in binary representation
         */
        toa = (struct T_optmgmt_ack *)toa_mp->b_rptr;
        toa->OPT_length = (t_scalar_t)(toa_mp->b_wptr - (toa_mp->b_rptr +
            sizeof (struct T_optmgmt_ack)));
        toa->OPT_offset = (t_scalar_t)sizeof (struct T_optmgmt_ack);

        toa->MGMT_flags = tor->MGMT_flags;

        freemsg(mp);            /* free input mblk */

        toa->PRIM_type = T_OPTMGMT_ACK;
        toa_mp->b_datap->db_type = M_PCPROTO;
        toa->MGMT_flags |= worst_status; /* XXX "worst" or "OR" TPI ? */
        qreply(q, toa_mp);
}


/*
 * Following routine makes a pass through option buffer in mp and performs the
 * following tasks.
 *      - estimate cumulative length needed for results
 *      - set "status" field based on permissions, option header check
 *        etc.
 */

static t_scalar_t
process_topthdrs_first_pass(mblk_t *mp, cred_t *cr, optdb_obj_t *dbobjp,
    size_t *toa_lenp)
{
        opdes_t *opt_arr = dbobjp->odb_opt_des_arr;
        uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
        optlevel_t *valid_level_arr = dbobjp->odb_valid_levels_arr;
        uint_t valid_level_arr_cnt = dbobjp->odb_valid_levels_arr_cnt;
        struct T_opthdr *opt;
        struct T_opthdr *opt_start, *opt_end;
        opdes_t *optd;
        size_t allopt_len;
        struct T_optmgmt_req *tor =
            (struct T_optmgmt_req *)mp->b_rptr;

        *toa_lenp = sizeof (struct T_optmgmt_ack); /* initial value */

        if ((opt_start = (struct T_opthdr *)
            mi_offset_param(mp, tor->OPT_offset, tor->OPT_length)) == NULL) {
                return (TBADOPT);
        }
        if (!__TPI_TOPT_ISALIGNED(opt_start))
                return (TBADOPT);

        opt_end = (struct T_opthdr *)((uchar_t *)opt_start + tor->OPT_length);

        for (opt = opt_start; opt && (opt < opt_end);
            opt = _TPI_TOPT_NEXTHDR(opt_start, tor->OPT_length, opt)) {
                /*
                 * Validate the option for length and alignment
                 * before accessing anything in it.
                 */
                if (!(_TPI_TOPT_VALID(opt, opt_start, opt_end)))
                        return (TBADOPT);

                /* Find the option in the opt_arr. */
                if (opt->name != T_ALLOPT) {
                        optd = proto_opt_lookup(opt->level, opt->name,
                            opt_arr, opt_arr_cnt);
                        if (optd == NULL) {
                                /*
                                 * Option not found
                                 *
                                 * Verify if level is "valid" or not.
                                 * Note: This check is required by XTI
                                 *
                                 * TPI provider always initializes
                                 * the "not supported" (or whatever) status
                                 * for the options. Other levels leave status
                                 * unchanged if they do not understand an
                                 * option.
                                 */
                                if (!opt_level_valid(opt->level,
                                    valid_level_arr, valid_level_arr_cnt))
                                        return (TBADOPT);
                                /*
                                 * level is valid - initialize
                                 * option as not supported
                                 */
                                opt->status = T_NOTSUPPORT;
                                *toa_lenp += _TPI_ALIGN_TOPT(opt->len);
                                continue;
                        }
                } else {
                        /*
                         * Handle T_ALLOPT case as a special case.
                         * Note: T_ALLOPT does not mean anything
                         * for T_CHECK operation.
                         */
                        allopt_len = 0;
                        if (tor->MGMT_flags == T_CHECK ||
                            ((allopt_len = opt_level_allopts_lengths(opt->level,
                            opt_arr, opt_arr_cnt)) == 0)) {
                                /*
                                 * This is confusing but correct !
                                 * It is not valid to to use T_ALLOPT with
                                 * T_CHECK flag.
                                 *
                                 * opt_level_allopts_lengths() is used to verify
                                 * that "level" associated with the T_ALLOPT is
                                 * supported.
                                 *
                                 */
                                opt->status = T_FAILURE;
                                *toa_lenp += _TPI_ALIGN_TOPT(opt->len);
                                continue;
                        }
                        ASSERT(allopt_len != 0); /* remove ? */

                        *toa_lenp += allopt_len;
                        opt->status = T_SUCCESS;
                        continue;
                }

                /* Additional checks dependent on operation. */
                switch (tor->MGMT_flags) {
                case T_DEFAULT:
                case T_CURRENT:

                        /*
                         * The proto_opt_lookup() routine call above approved of
                         * this option so we can work on the status for it
                         * based on the permissions for the operation. (This
                         * can override any status for it set at higher levels)
                         * We assume this override is OK since chkfn at this
                         * level approved of this option.
                         *
                         * T_CURRENT semantics:
                         * The read access is required. Else option
                         * status is T_NOTSUPPORT.
                         *
                         * T_DEFAULT semantics:
                         * Note: specification is not clear on this but we
                         * interpret T_DEFAULT semantics such that access to
                         * read value is required for access even the default
                         * value. Otherwise the option status is T_NOTSUPPORT.
                         */
                        if (!OA_READ_PERMISSION(optd, cr)) {
                                opt->status = T_NOTSUPPORT;
                                *toa_lenp += _TPI_ALIGN_TOPT(opt->len);
                                /* skip to next */
                                continue;
                        }

                        /*
                         * T_DEFAULT/T_CURRENT semantics:
                         * We know that read access is set. If no other access
                         * is set, then status is T_READONLY.
                         */
                        if (OA_READONLY_PERMISSION(optd, cr))
                                opt->status = T_READONLY;
                        else
                                opt->status = T_SUCCESS;
                        /*
                         * Option passes all checks. Make room for it in the
                         * ack. Note: size stored in table does not include
                         * space for option header.
                         */
                        *toa_lenp += sizeof (struct T_opthdr) +
                            _TPI_ALIGN_TOPT(optd->opdes_size);
                        break;

                case T_CHECK:
                case T_NEGOTIATE:

                        /*
                         * T_NEGOTIATE semantics:
                         * If for fixed length option value on input is not the
                         * same as value supplied, then status is T_FAILURE.
                         *
                         * T_CHECK semantics:
                         * If value is supplied, semantics same as T_NEGOTIATE.
                         * It is however ok not to supply a value with T_CHECK.
                         */

                        if (tor->MGMT_flags == T_NEGOTIATE ||
                            (opt->len != sizeof (struct T_opthdr))) {
                                /*
                                 * Implies "value" is specified in T_CHECK or
                                 * it is a T_NEGOTIATE request.
                                 * Verify size.
                                 * Note: This can override anything about this
                                 * option request done at a higher level.
                                 */
                                if (opt->len < sizeof (struct T_opthdr) ||
                                    !opt_length_ok(optd,
                                    opt->len - sizeof (struct T_opthdr))) {
                                        /* bad size */
                                        *toa_lenp += _TPI_ALIGN_TOPT(opt->len);
                                        opt->status = T_FAILURE;
                                        continue;
                                }
                        }
                        /*
                         * The proto_opt_lookup()  routine above() approved of
                         * this option so we can work on the status for it based
                         * on the permissions for the operation. (This can
                         * override anything set at a higher level).
                         *
                         * T_CHECK/T_NEGOTIATE semantics:
                         * Set status to T_READONLY if read is the only access
                         * permitted
                         */
                        if (OA_READONLY_PERMISSION(optd, cr)) {
                                opt->status = T_READONLY;
                                *toa_lenp += _TPI_ALIGN_TOPT(opt->len);
                                /* skip to next */
                                continue;
                        }

                        /*
                         * T_CHECK/T_NEGOTIATE semantics:
                         * If write (or execute) access is not set, then status
                         * is T_NOTSUPPORT.
                         */
                        if (!OA_WRITE_OR_EXECUTE(optd, cr)) {
                                opt->status = T_NOTSUPPORT;
                                *toa_lenp += _TPI_ALIGN_TOPT(opt->len);
                                /* skip to next option */
                                continue;
                        }
                        /*
                         * Option passes all checks. Make room for it in the
                         * ack and set success in status.
                         * Note: size stored in table does not include header
                         * length.
                         */
                        opt->status = T_SUCCESS;
                        *toa_lenp += sizeof (struct T_opthdr) +
                            _TPI_ALIGN_TOPT(optd->opdes_size);
                        break;

                default:
                        return (TBADFLAG);
                }
        } /* for loop scanning input buffer */

        return (0);             /* OK return */
}

/*
 * This routine makes another pass through the option buffer this
 * time acting on the request based on "status" result in the
 * first pass. It also performs "expansion" of T_ALLOPT into
 * all options of a certain level and acts on each for this request.
 */
static t_scalar_t
do_options_second_pass(queue_t *q, mblk_t *reqmp, mblk_t *ack_mp, cred_t *cr,
    optdb_obj_t *dbobjp, t_uscalar_t *worst_statusp)
{
        int failed_option;
        struct T_opthdr *opt;
        struct T_opthdr *opt_start, *opt_end;
        uchar_t *optr;
        uint_t optset_context;
        struct T_optmgmt_req *tor = (struct T_optmgmt_req *)reqmp->b_rptr;

        optr = (uchar_t *)ack_mp->b_rptr +
            sizeof (struct T_optmgmt_ack); /* assumed int32_t aligned */

        /*
         * Set initial values for scanning input
         */
        opt_start = (struct T_opthdr *)mi_offset_param(reqmp,
            tor->OPT_offset, tor->OPT_length);
        if (opt_start == NULL)
                return (TBADOPT);
        opt_end = (struct T_opthdr *)((uchar_t *)opt_start + tor->OPT_length);
        ASSERT(__TPI_TOPT_ISALIGNED(opt_start)); /* verified in first pass */

        for (opt = opt_start; opt && (opt < opt_end);
            opt = _TPI_TOPT_NEXTHDR(opt_start, tor->OPT_length, opt)) {

                /* verified in first pass */
                ASSERT(_TPI_TOPT_VALID(opt, opt_start, opt_end));

                /*
                 * If the first pass in process_topthdrs_first_pass()
                 * has marked the option as a failure case for the MGMT_flags
                 * semantics then there is not much to do.
                 *
                 * Note: For all practical purposes, T_READONLY status is
                 * a "success" for T_DEFAULT/T_CURRENT and "failure" for
                 * T_CHECK/T_NEGOTIATE
                 */
                failed_option =
                    (opt->status == T_NOTSUPPORT) ||
                    (opt->status == T_FAILURE) ||
                    ((tor->MGMT_flags & (T_NEGOTIATE|T_CHECK)) &&
                    (opt->status == T_READONLY));

                if (failed_option) {
                        /*
                         * According to T_DEFAULT/T_CURRENT semantics, the
                         * input values, even if present, are to be ignored.
                         * Note: Specification is not clear on this, but we
                         * interpret that even though we ignore the values, we
                         * can return them as is. So we process them similar to
                         * T_CHECK/T_NEGOTIATE case which has the semantics to
                         * return the values as is. XXX If interpretation is
                         * ever determined incorrect fill in appropriate code
                         * here to treat T_DEFAULT/T_CURRENT differently.
                         *
                         * According to T_CHECK/T_NEGOTIATE semantics,
                         * in the case of T_NOTSUPPORT/T_FAILURE/T_READONLY,
                         * the semantics are to return the "value" part of
                         * option untouched. So here we copy the option
                         * head including value part if any to output.
                         */

                        bcopy(opt, optr, opt->len);
                        optr += _TPI_ALIGN_TOPT(opt->len);

                        *worst_statusp = get_worst_status(opt->status,
                            *worst_statusp);

                        /* skip to process next option in buffer */
                        continue;

                } /* end if "failed option" */
                /*
                 * The status is T_SUCCESS or T_READONLY
                 * We process the value part here
                 */
                ASSERT(opt->status == T_SUCCESS || opt->status == T_READONLY);
                switch (tor->MGMT_flags) {
                case T_DEFAULT:
                        /*
                         * We fill default value from table or protocol specific
                         * function. If this call fails, we pass input through.
                         */
                        if (do_opt_default(q, opt, &optr, worst_statusp,
                            cr, dbobjp) < 0) {
                                opt->status = T_FAILURE;
                                bcopy(opt, optr, opt->len);
                                optr += _TPI_ALIGN_TOPT(opt->len);
                                *worst_statusp = get_worst_status(opt->status,
                                    *worst_statusp);
                        }
                        break;

                case T_CURRENT:

                        do_opt_current(q, opt, &optr, worst_statusp, cr,
                            dbobjp);
                        break;

                case T_CHECK:
                case T_NEGOTIATE:
                        if (tor->MGMT_flags == T_CHECK)
                                optset_context = SETFN_OPTCOM_CHECKONLY;
                        else    /* T_NEGOTIATE */
                                optset_context = SETFN_OPTCOM_NEGOTIATE;
                        do_opt_check_or_negotiate(q, opt, optset_context,
                            &optr, worst_statusp, cr, dbobjp);
                        break;
                default:
                        return (TBADFLAG);
                }
        } /* end for loop scanning option buffer */

        ack_mp->b_wptr = optr;
        ASSERT(ack_mp->b_wptr <= ack_mp->b_datap->db_lim);

        return (0);             /* OK return */
}


static t_uscalar_t
get_worst_status(t_uscalar_t status, t_uscalar_t current_worst_status)
{
        /*
         * Return the "worst" among the arguments "status" and
         * "current_worst_status".
         *
         * Note: Tracking "worst_status" can be made a bit simpler
         * if we use the property that status codes are bitwise
         * distinct.
         *
         * The pecking order is
         *
         * T_SUCCESS ..... best
         * T_PARTSUCCESS
         * T_FAILURE
         * T_READONLY
         * T_NOTSUPPORT... worst
         */
        if (status == current_worst_status)
                return (current_worst_status);
        switch (current_worst_status) {
        case T_SUCCESS:
                if (status == T_PARTSUCCESS)
                        return (T_PARTSUCCESS);
                /* FALLTHROUGH */
        case T_PARTSUCCESS:
                if (status == T_FAILURE)
                        return (T_FAILURE);
                /* FALLTHROUGH */
        case T_FAILURE:
                if (status == T_READONLY)
                        return (T_READONLY);
                /* FALLTHROUGH */
        case T_READONLY:
                if (status == T_NOTSUPPORT)
                        return (T_NOTSUPPORT);
                /* FALLTHROUGH */
        case T_NOTSUPPORT:
        default:
                return (current_worst_status);
        }
}

static int
do_opt_default(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp,
    t_uscalar_t *worst_statusp, cred_t *cr, optdb_obj_t *dbobjp)
{
        pfi_t   deffn = dbobjp->odb_deffn;
        opdes_t *opt_arr = dbobjp->odb_opt_des_arr;
        uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;

        struct T_opthdr *topth;
        opdes_t *optd;

        if (reqopt->name != T_ALLOPT) {
                /*
                 * lookup the option in the table and fill default value
                 */
                optd = proto_opt_lookup(reqopt->level, reqopt->name,
                    opt_arr, opt_arr_cnt);

                /* Calling routine should have verified it it exists */
                ASSERT(optd != NULL);

                topth = (struct T_opthdr *)(*resptrp);
                topth->level = reqopt->level;
                topth->name = reqopt->name;
                topth->status = reqopt->status;

                *worst_statusp = get_worst_status(reqopt->status,
                    *worst_statusp);

                if (optd->opdes_props & OP_NODEFAULT) {
                        /* header only, no default "value" part */
                        topth->len = sizeof (struct T_opthdr);
                        *resptrp += sizeof (struct T_opthdr);
                } else {
                        int deflen;

                        if (optd->opdes_props & OP_DEF_FN) {
                                deflen = (*deffn)(q, reqopt->level,
                                    reqopt->name, _TPI_TOPT_DATA(topth));
                                if (deflen >= 0) {
                                        topth->len = (t_uscalar_t)
                                            (sizeof (struct T_opthdr) + deflen);
                                } else {
                                        /*
                                         * return error, this should 'pass
                                         * through' the option and maybe some
                                         * other level will fill it in or
                                         * already did.
                                         * (No change in 'resptrp' upto here)
                                         */
                                        return (-1);
                                }
                        } else {
                                /* fill length and value part */
                                switch (optd->opdes_size) {
                                /*
                                 * Since options are guaranteed aligned only
                                 * on a 4 byte boundary (t_scalar_t) any
                                 * option that is greater in size will default
                                 * to the bcopy below
                                 */
                                case sizeof (int32_t):
                                        *(int32_t *)_TPI_TOPT_DATA(topth) =
                                            (int32_t)optd->opdes_default;
                                        break;
                                case sizeof (int16_t):
                                        *(int16_t *)_TPI_TOPT_DATA(topth) =
                                            (int16_t)optd->opdes_default;
                                        break;
                                case sizeof (int8_t):
                                        *(int8_t *)_TPI_TOPT_DATA(topth) =
                                            (int8_t)optd->opdes_default;
                                        break;
                                default:
                                        /*
                                         * other length but still assume
                                         * fixed - use bcopy
                                         */
                                        bcopy(optd->opdes_defbuf,
                                            _TPI_TOPT_DATA(topth),
                                            optd->opdes_size);
                                        break;
                                }
                                topth->len = (t_uscalar_t)(optd->opdes_size +
                                    sizeof (struct T_opthdr));
                        }
                        *resptrp += _TPI_ALIGN_TOPT(topth->len);
                }
                return (0);     /* OK return */
        }

        /*
         * T_ALLOPT processing
         *
         * lookup and stuff default values of all the options of the
         * level specified
         */
        for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
                if (reqopt->level != optd->opdes_level)
                        continue;
                /*
                 *
                 * T_DEFAULT semantics:
                 * XXX: we interpret T_DEFAULT semantics such that access to
                 * read value is required for access even the default value.
                 * Else option is ignored for T_ALLOPT request.
                 */
                if (!OA_READ_PERMISSION(optd, cr))
                        /* skip this one */
                        continue;

                /*
                 * Found option of same level as T_ALLOPT request
                 * that we can return.
                 */

                topth = (struct T_opthdr *)(*resptrp);
                topth->level = optd->opdes_level;
                topth->name = optd->opdes_name;

                /*
                 * T_DEFAULT semantics:
                 * We know that read access is set. If no other access is set,
                 * then status is T_READONLY
                 */
                if (OA_READONLY_PERMISSION(optd, cr)) {
                        topth->status = T_READONLY;
                        *worst_statusp = get_worst_status(T_READONLY,
                            *worst_statusp);
                } else {
                        topth->status = T_SUCCESS;
                        /*
                         * Note: *worst_statusp has to be T_SUCCESS or
                         * worse so no need to adjust
                         */
                }

                if (optd->opdes_props & OP_NODEFAULT) {
                        /* header only, no value part */
                        topth->len = sizeof (struct T_opthdr);
                        *resptrp += sizeof (struct T_opthdr);
                } else {
                        int deflen;

                        if (optd->opdes_props & OP_DEF_FN) {
                                deflen = (*deffn)(q, reqopt->level,
                                    reqopt->name, _TPI_TOPT_DATA(topth));
                                if (deflen >= 0) {
                                        topth->len = (t_uscalar_t)(deflen +
                                            sizeof (struct T_opthdr));
                                } else {
                                        /*
                                         * deffn failed.
                                         * return just the header as T_ALLOPT
                                         * expansion.
                                         * Some other level deffn may
                                         * supply value part.
                                         */
                                        topth->len = sizeof (struct T_opthdr);
                                        topth->status = T_FAILURE;
                                        *worst_statusp =
                                            get_worst_status(T_FAILURE,
                                            *worst_statusp);
                                }
                        } else {
                                /*
                                 * fill length and value part from
                                 * table
                                 */
                                switch (optd->opdes_size) {
                                /*
                                 * Since options are guaranteed aligned only
                                 * on a 4 byte boundary (t_scalar_t) any
                                 * option that is greater in size will default
                                 * to the bcopy below
                                 */
                                case sizeof (int32_t):
                                        *(int32_t *)_TPI_TOPT_DATA(topth) =
                                            (int32_t)optd->opdes_default;
                                        break;
                                case sizeof (int16_t):
                                        *(int16_t *)_TPI_TOPT_DATA(topth) =
                                            (int16_t)optd->opdes_default;
                                        break;
                                case sizeof (int8_t):
                                        *(int8_t *)_TPI_TOPT_DATA(topth) =
                                            (int8_t)optd->opdes_default;
                                        break;
                                default:
                                        /*
                                         * other length but still assume
                                         * fixed - use bcopy
                                         */
                                        bcopy(optd->opdes_defbuf,
                                            _TPI_TOPT_DATA(topth),
                                            optd->opdes_size);
                                }
                                topth->len = (t_uscalar_t)(optd->opdes_size +
                                    sizeof (struct T_opthdr));
                        }
                        *resptrp += _TPI_ALIGN_TOPT(topth->len);
                }
        }
        return (0);
}

static void
do_opt_current(queue_t *q, struct T_opthdr *reqopt, uchar_t **resptrp,
    t_uscalar_t *worst_statusp, cred_t *cr, optdb_obj_t *dbobjp)
{
        pfi_t   getfn = dbobjp->odb_getfn;
        opdes_t *opt_arr = dbobjp->odb_opt_des_arr;
        uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
        struct T_opthdr *topth;
        opdes_t *optd;
        int optlen;
        uchar_t *initptr = *resptrp;

        /*
         * We call getfn to get the current value of an option. The call may
         * fail in which case we copy the values from the input buffer. Maybe
         * something downstream will fill it in or something upstream did.
         */

        if (reqopt->name != T_ALLOPT) {
                topth = (struct T_opthdr *)*resptrp;
                *resptrp += sizeof (struct T_opthdr);
                optlen = (*getfn)(q, reqopt->level, reqopt->name, *resptrp);
                if (optlen >= 0) {
                        topth->len = (t_uscalar_t)(optlen +
                            sizeof (struct T_opthdr));
                        topth->level = reqopt->level;
                        topth->name = reqopt->name;
                        topth->status = reqopt->status;
                        *resptrp += _TPI_ALIGN_TOPT(optlen);
                        *worst_statusp = get_worst_status(topth->status,
                            *worst_statusp);
                } else {
                        /* failed - reset "*resptrp" pointer */
                        *resptrp -= sizeof (struct T_opthdr);
                }
        } else {                /* T_ALLOPT processing */
                /* scan and get all options */
                for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
                        /* skip other levels */
                        if (reqopt->level != optd->opdes_level)
                                continue;

                        if (!OA_READ_PERMISSION(optd, cr))
                                /* skip this one */
                                continue;

                        topth = (struct T_opthdr *)*resptrp;
                        *resptrp += sizeof (struct T_opthdr);

                        /* get option of this level */
                        optlen = (*getfn)(q, reqopt->level, optd->opdes_name,
                            *resptrp);
                        if (optlen >= 0) {
                                /* success */
                                topth->len = (t_uscalar_t)(optlen +
                                    sizeof (struct T_opthdr));
                                topth->level = reqopt->level;
                                topth->name = optd->opdes_name;
                                if (OA_READONLY_PERMISSION(optd, cr))
                                        topth->status = T_READONLY;
                                else
                                        topth->status = T_SUCCESS;
                                *resptrp += _TPI_ALIGN_TOPT(optlen);
                        } else {
                                /*
                                 * failed, return as T_FAILURE and null value
                                 * part. Maybe something downstream will
                                 * handle this one and fill in a value. Here
                                 * it is just part of T_ALLOPT expansion.
                                 */
                                topth->len = sizeof (struct T_opthdr);
                                topth->level = reqopt->level;
                                topth->name = optd->opdes_name;
                                topth->status = T_FAILURE;
                        }
                        *worst_statusp = get_worst_status(topth->status,
                            *worst_statusp);
                } /* end for loop */
        }
        if (*resptrp == initptr) {
                /*
                 * getfn failed and does not want to handle this option.
                 */
                reqopt->status = T_FAILURE;
                bcopy(reqopt, *resptrp, reqopt->len);
                *resptrp += _TPI_ALIGN_TOPT(reqopt->len);
                *worst_statusp = get_worst_status(reqopt->status,
                    *worst_statusp);
        }
}

static void
do_opt_check_or_negotiate(queue_t *q, struct T_opthdr *reqopt,
    uint_t optset_context, uchar_t **resptrp, t_uscalar_t *worst_statusp,
    cred_t *cr, optdb_obj_t *dbobjp)
{
        pfi_t   deffn = dbobjp->odb_deffn;
        opt_set_fn setfn = dbobjp->odb_setfn;
        opdes_t *opt_arr = dbobjp->odb_opt_des_arr;
        uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
        struct T_opthdr *topth;
        opdes_t *optd;
        int error;
        t_uscalar_t optlen;
        t_scalar_t optsize;
        uchar_t *initptr = *resptrp;

        ASSERT(reqopt->status == T_SUCCESS);

        if (reqopt->name != T_ALLOPT) {
                topth = (struct T_opthdr *)*resptrp;
                *resptrp += sizeof (struct T_opthdr);
                error = (*setfn)(q, optset_context, reqopt->level, reqopt->name,
                    reqopt->len - sizeof (struct T_opthdr),
                    _TPI_TOPT_DATA(reqopt), &optlen, _TPI_TOPT_DATA(topth),
                    NULL, cr);
                if (error) {
                        /* failed - reset "*resptrp" */
                        *resptrp -= sizeof (struct T_opthdr);
                } else {
                        /*
                         * success - "value" already filled in setfn()
                         */
                        topth->len = (t_uscalar_t)(optlen +
                            sizeof (struct T_opthdr));
                        topth->level = reqopt->level;
                        topth->name = reqopt->name;
                        topth->status = reqopt->status;
                        *resptrp += _TPI_ALIGN_TOPT(optlen);
                        *worst_statusp = get_worst_status(topth->status,
                            *worst_statusp);
                }
        } else {                /* T_ALLOPT processing */
                /* only for T_NEGOTIATE case */
                ASSERT(optset_context == SETFN_OPTCOM_NEGOTIATE);

                /* scan and set all options to default value */
                for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {

                        /* skip other levels */
                        if (reqopt->level != optd->opdes_level)
                                continue;

                        if (OA_EXECUTE_PERMISSION(optd, cr) ||
                            OA_NO_PERMISSION(optd, cr)) {
                                /*
                                 * skip this one too. Does not make sense to
                                 * set anything to default value for "execute"
                                 * options.
                                 */
                                continue;
                        }

                        if (OA_READONLY_PERMISSION(optd, cr)) {
                                /*
                                 * Return with T_READONLY status (and no value
                                 * part). Note: spec is not clear but
                                 * XTI test suite needs this.
                                 */
                                topth = (struct T_opthdr *)*resptrp;
                                topth->len = sizeof (struct T_opthdr);
                                *resptrp += topth->len;
                                topth->level = reqopt->level;
                                topth->name = optd->opdes_name;
                                topth->status = T_READONLY;
                                *worst_statusp = get_worst_status(topth->status,
                                    *worst_statusp);
                                continue;
                        }

                        /*
                         * It is not read only or execute type
                         * the it must have write permission
                         */
                        ASSERT(OA_WRITE_PERMISSION(optd, cr));

                        topth = (struct T_opthdr *)*resptrp;
                        *resptrp += sizeof (struct T_opthdr);

                        topth->len = sizeof (struct T_opthdr);
                        topth->level = reqopt->level;
                        topth->name = optd->opdes_name;
                        if (optd->opdes_props & OP_NODEFAULT) {
                                /*
                                 * Option of "no default value" so it does not
                                 * make sense to try to set it. We just return
                                 * header with status of T_SUCCESS
                                 * XXX should this be failure ?
                                 */
                                topth->status = T_SUCCESS;
                                continue; /* skip setting */
                        }
                        if (optd->opdes_props & OP_DEF_FN) {
                                if ((optd->opdes_props & OP_VARLEN) ||
                                    ((optsize = (*deffn)(q, reqopt->level,
                                    optd->opdes_name,
                                    (uchar_t *)optd->opdes_defbuf)) < 0)) {
                                        /* XXX - skip these too */
                                        topth->status = T_SUCCESS;
                                        continue; /* skip setting */
                                }
                        } else {
                                optsize = optd->opdes_size;
                        }


                        /* set option of this level */
                        error = (*setfn)(q, SETFN_OPTCOM_NEGOTIATE,
                            reqopt->level, optd->opdes_name, optsize,
                            (uchar_t *)optd->opdes_defbuf, &optlen,
                            _TPI_TOPT_DATA(topth), NULL, cr);
                        if (error) {
                                /*
                                 * failed, return as T_FAILURE and null value
                                 * part. Maybe something downstream will
                                 * handle this one and fill in a value. Here
                                 * it is just part of T_ALLOPT expansion.
                                 */
                                topth->status = T_FAILURE;
                                *worst_statusp = get_worst_status(topth->status,
                                    *worst_statusp);
                        } else {
                                /* success */
                                topth->len += optlen;
                                topth->status = T_SUCCESS;
                                *resptrp += _TPI_ALIGN_TOPT(optlen);
                        }
                } /* end for loop */
                /* END T_ALLOPT */
        }

        if (*resptrp == initptr) {
                /*
                 * setfn failed and does not want to handle this option.
                 */
                reqopt->status = T_FAILURE;
                bcopy(reqopt, *resptrp, reqopt->len);
                *resptrp += _TPI_ALIGN_TOPT(reqopt->len);
                *worst_statusp = get_worst_status(reqopt->status,
                    *worst_statusp);
        }
}

/*
 * The following routines process options buffer passed with
 * T_CONN_REQ, T_CONN_RES and T_UNITDATA_REQ.
 * This routine does the consistency check applied to the
 * sanity of formatting of multiple options packed in the
 * buffer.
 *
 * XTI brain damage alert:
 * XTI interface adopts the notion of an option being an
 * "absolute requirement" from OSI transport service (but applies
 * it to all transports including Internet transports).
 * The main effect of that is action on failure to "negotiate" a
 * requested option to the exact requested value
 *
 *          - if the option is an "absolute requirement", the primitive
 *            is aborted (e.g T_DISCON_REQ or T_UDERR generated)
 *          - if the option is NOT and "absolute requirement" it can
 *            just be ignored.
 *
 * We would not support "negotiating" of options on connection
 * primitives for Internet transports. However just in case we
 * forced to in order to pass strange test suites, the design here
 * tries to support these notions.
 *
 * tpi_optcom_buf(q, mp, opt_lenp, opt_offset, cred, dbobjp, thisdg_attrs,
 *      *is_absreq_failurep)
 *
 * - Verify the option buffer, if formatted badly, return error 1
 *
 * - If it is a "permissions" failure (read-only), return error 2
 *
 * - Else, process the option "in place", the following can happen,
 *           - if a "privileged" option, mark it as "ignored".
 *           - if "not supported", mark "ignored"
 *           - if "supported" attempt negotiation and fill result in
 *             the outcome
 *                      - if "absolute requirement", set "*is_absreq_failurep"
 *                      - if NOT an "absolute requirement", then our
 *                        interpretation is to mark is at ignored if
 *                        negotiation fails (Spec allows partial success
 *                        as in OSI protocols but not failure)
 *
 *   Then delete "ignored" options from option buffer and return success.
 *
 */
int
tpi_optcom_buf(queue_t *q, mblk_t *mp, t_scalar_t *opt_lenp,
    t_scalar_t opt_offset, cred_t *cr, optdb_obj_t *dbobjp,
    void *thisdg_attrs, int *is_absreq_failurep)
{
        opt_set_fn setfn = dbobjp->odb_setfn;
        opdes_t *opt_arr = dbobjp->odb_opt_des_arr;
        uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
        struct T_opthdr *opt, *opt_start, *opt_end;
        mblk_t  *copy_mp_head;
        uchar_t *optr, *init_optr;
        opdes_t *optd;
        uint_t optset_context;
        t_uscalar_t olen;
        int error = 0;

        ASSERT((uchar_t *)opt_lenp > mp->b_rptr &&
            (uchar_t *)opt_lenp < mp->b_wptr);

        copy_mp_head = NULL;
        *is_absreq_failurep = 0;
        switch (((union T_primitives *)mp->b_rptr)->type) {
        case T_CONN_REQ:
        case T_CONN_RES:
                optset_context = SETFN_CONN_NEGOTIATE;
                break;
        case T_UNITDATA_REQ:
                optset_context = SETFN_UD_NEGOTIATE;
                break;
        default:
                /*
                 * should never get here, all possible TPI primitives
                 * where this can be called from should be accounted
                 * for in the cases above
                 */
                return (EINVAL);
        }

        if ((opt_start = (struct T_opthdr *)
            mi_offset_param(mp, opt_offset, *opt_lenp)) == NULL) {
                error = ENOPROTOOPT;
                goto error_ret;
        }
        if (!__TPI_TOPT_ISALIGNED(opt_start)) {
                error = ENOPROTOOPT;
                goto error_ret;
        }

        opt_end = (struct T_opthdr *)((uchar_t *)opt_start
            + *opt_lenp);

        if ((copy_mp_head = copyb(mp)) == (mblk_t *)NULL) {
                error = ENOMEM;
                goto error_ret;
        }

        init_optr = optr = (uchar_t *)&copy_mp_head->b_rptr[opt_offset];

        for (opt = opt_start; opt && (opt < opt_end);
            opt = _TPI_TOPT_NEXTHDR(opt_start, *opt_lenp, opt)) {
                /*
                 * Validate the option for length and alignment
                 * before accessing anything in it
                 */
                if (!_TPI_TOPT_VALID(opt, opt_start, opt_end)) {
                        error = ENOPROTOOPT;
                        goto error_ret;
                }

                /* Find the option in the opt_arr. */
                optd = proto_opt_lookup(opt->level, opt->name,
                    opt_arr, opt_arr_cnt);

                if (optd == NULL) {
                        /*
                         * Option not found
                         */
                        opt->status = T_NOTSUPPORT;
                        continue;
                }

                /*
                 * Weird but as in XTI spec.
                 * Sec 6.3.6 "Privileged and ReadOnly Options"
                 * Permission problems (e.g.readonly) fail with bad access
                 * BUT "privileged" option request from those NOT PRIVILEGED
                 * are to be merely "ignored".
                 * XXX Prevents "probing" of privileged options ?
                 */
                if (OA_READONLY_PERMISSION(optd, cr)) {
                        error = EACCES;
                        goto error_ret;
                }
                if (OA_MATCHED_PRIV(optd, cr)) {
                        /*
                         * For privileged options, we DO perform
                         * access checks as is common sense
                         */
                        if (!OA_WX_ANYPRIV(optd)) {
                                error = EACCES;
                                goto error_ret;
                        }
                } else {
                        /*
                         * For non privileged, we fail instead following
                         * "ignore" semantics dictated by XTI spec for
                         * permissions problems.
                         * Sec 6.3.6 "Privileged and ReadOnly Options"
                         * XXX Should we do "ignore" semantics ?
                         */
                        if (!OA_WX_NOPRIV(optd)) { /* nopriv */
                                opt->status = T_FAILURE;
                                continue;
                        }
                }
                /*
                 *
                 * If the negotiation fails, for options that
                 * are "absolute requirement", it is a fatal error.
                 * For options that are NOT "absolute requirements",
                 * and the value fails to negotiate, the XTI spec
                 * only considers the possibility of partial success
                 * (T_PARTSUCCES - not likely for Internet protocols).
                 * The spec is in denial about complete failure
                 * (T_FAILURE) to negotiate for options that are
                 * carried on T_CONN_REQ/T_CONN_RES/T_UNITDATA
                 * We interpret the T_FAILURE to negotiate an option
                 * that is NOT an absolute requirement that it is safe
                 * to ignore it.
                 */

                /* verify length */
                if (opt->len < (t_uscalar_t)sizeof (struct T_opthdr) ||
                    !opt_length_ok(optd, opt->len - sizeof (struct T_opthdr))) {
                        /* bad size */
                        if ((optd->opdes_props & OP_NOT_ABSREQ) == 0) {
                                /* option is absolute requirement */
                                *is_absreq_failurep = 1;
                                error = EINVAL;
                                goto error_ret;
                        }
                        opt->status = T_FAILURE;
                        continue;
                }

                /*
                 * verified generic attributes. Now call set function.
                 * Note: We assume the following to simplify code.
                 * XXX If this is found not to be valid, this routine
                 * will need to be rewritten. At this point it would
                 * be premature to introduce more complexity than is
                 * needed.
                 * Assumption: For variable length options, we assume
                 * that the value returned will be same or less length
                 * (size does not increase). This makes it OK to pass the
                 * same space for output as it is on input.
                 */

                error = (*setfn)(q, optset_context, opt->level, opt->name,
                    opt->len - (t_uscalar_t)sizeof (struct T_opthdr),
                    _TPI_TOPT_DATA(opt), &olen, _TPI_TOPT_DATA(opt),
                    thisdg_attrs, cr);

                if (olen > (int)(opt->len - sizeof (struct T_opthdr))) {
                        /*
                         * Space on output more than space on input. Should
                         * not happen and we consider it a bug/error.
                         * More of a restriction than an error in our
                         * implementation. Will see if we can live with this
                         * otherwise code will get more hairy with multiple
                         * passes.
                         */
                        error = EINVAL;
                        goto error_ret;
                }
                if (error != 0) {
                        if ((optd->opdes_props & OP_NOT_ABSREQ) == 0) {
                                /* option is absolute requirement. */
                                *is_absreq_failurep = 1;
                                goto error_ret;
                        }
                        /*
                         * failed - but option "not an absolute
                         * requirement"
                         */
                        opt->status = T_FAILURE;
                        continue;
                }
                /*
                 * Fill in the only possible successful result
                 * (Note: TPI allows for T_PARTSUCCESS - partial
                 * sucess result code which is relevant in OSI world
                 * and not possible in Internet code)
                 */
                opt->status = T_SUCCESS;

                /*
                 * Add T_SUCCESS result code options to the "output" options.
                 * No T_FAILURES or T_NOTSUPPORT here as they are to be
                 * ignored.
                 * This code assumes output option buffer will
                 * be <= input option buffer.
                 *
                 * Copy option header+value
                 */
                bcopy(opt, optr, opt->len);
                optr +=  _TPI_ALIGN_TOPT(opt->len);
        }
        /*
         * Overwrite the input mblk option buffer now with the output
         * and update length, and contents in original mbl
         * (offset remains unchanged).
         */
        *opt_lenp = (t_scalar_t)(optr - init_optr);
        if (*opt_lenp > 0) {
                bcopy(init_optr, opt_start, *opt_lenp);
        }

error_ret:
        if (copy_mp_head != NULL)
                freeb(copy_mp_head);
        return (error);
}

static boolean_t
opt_level_valid(t_uscalar_t level, optlevel_t *valid_level_arr,
    uint_t valid_level_arr_cnt)
{
        optlevel_t              *olp;

        for (olp = valid_level_arr;
            olp < &valid_level_arr[valid_level_arr_cnt];
            olp++) {
                if (level == (uint_t)(*olp))
                        return (B_TRUE);
        }
        return (B_FALSE);
}


/*
 * Compute largest possible size for an option buffer containing
 * all options in one buffer.
 *
 * XXX TBD, investigate use of opt_bloated_maxsize() to avoid
 *     wastefully large buffer allocation.
 */
static size_t
opt_level_allopts_lengths(t_uscalar_t level, opdes_t *opt_arr,
    uint_t opt_arr_cnt)
{
        opdes_t         *optd;
        size_t allopt_len = 0;  /* 0 implies no option at this level */

        /*
         * Scan opt_arr computing aggregate length
         * requirement for storing values of all
         * options.
         * Note: we do not filter for permissions
         * etc. This will be >= the real aggregate
         * length required (upper bound).
         */

        for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt];
            optd++) {
                if (level == optd->opdes_level) {
                        allopt_len += sizeof (struct T_opthdr) +
                            _TPI_ALIGN_TOPT(optd->opdes_size);
                }
        }
        return (allopt_len);    /* 0 implies level not found */
}

/*
 * Compute largest possible size for an option buffer containing
 * all options in one buffer - a (theoretical?) worst case scenario
 * for certain cases.
 */
t_uscalar_t
optcom_max_optbuf_len(opdes_t *opt_arr, uint_t opt_arr_cnt)
{
        t_uscalar_t max_optbuf_len = sizeof (struct T_info_ack);
        opdes_t         *optd;

        for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
                max_optbuf_len += (t_uscalar_t)sizeof (struct T_opthdr) +
                    (t_uscalar_t)_TPI_ALIGN_TOPT(optd->opdes_size);
        }
        return (max_optbuf_len);
}

/*
 * Compute largest possible size for OPT_size for a transport.
 * Heuristic used is to add all but certain extremely large
 * size options; this is done by calling opt_bloated_maxsize().
 * It affects user level allocations in TLI/XTI code using t_alloc()
 * and other TLI/XTI implementation instance strucutures.
 * The large size options excluded are presumed to be
 * never accessed through the (theoretical?) worst case code paths
 * through TLI/XTI as they are currently IPv6 specific options.
 */

t_uscalar_t
optcom_max_optsize(opdes_t *opt_arr, uint_t opt_arr_cnt)
{
        t_uscalar_t max_optbuf_len = sizeof (struct T_info_ack);
        opdes_t         *optd;

        for (optd = opt_arr; optd < &opt_arr[opt_arr_cnt]; optd++) {
                if (!opt_bloated_maxsize(optd)) {
                        max_optbuf_len +=
                            (t_uscalar_t)sizeof (struct T_opthdr) +
                            (t_uscalar_t)_TPI_ALIGN_TOPT(optd->opdes_size);
                }
        }
        return (max_optbuf_len);
}

/*
 * The theoretical model used in optcom_max_optsize() and
 * opt_level_allopts_lengths() accounts for the worst case of all
 * possible options for the theoretical cases and results in wasteful
 * memory allocations for certain theoretically correct usage scenarios.
 * In practice, the "features" they support are rarely, if ever,
 * used and even then only by test suites for those features (VSU, VST).
 * However, they result in large allocations due to the increased transport
 * T_INFO_ACK OPT_size field affecting t_alloc() users and TLI/XTI library
 * instance data structures for applications.
 *
 * The following routine opt_bloated_maxsize() supports a hack that avoids
 * paying the tax for the bloated options by excluding them and pretending
 * they don't exist for certain features without affecting features that
 * do use them.
 *
 * XXX Currently implemented only for optcom_max_optsize()
 *     (to reduce risk late in release).
 *     TBD for future, investigate use in optcom_level_allopts_lengths() and
 *     all the instances of T_ALLOPT processing to exclude "bloated options".
 *     Will not affect VSU/VST tests as they do not test with IPPROTO_IPV6
 *     level options which are the only ones that fit the "bloated maxsize"
 *     option profile now.
 */
static boolean_t
opt_bloated_maxsize(opdes_t *optd)
{
        if (optd->opdes_level != IPPROTO_IPV6)
                return (B_FALSE);
        switch (optd->opdes_name) {
        case IPV6_HOPOPTS:
        case IPV6_DSTOPTS:
        case IPV6_RTHDRDSTOPTS:
        case IPV6_RTHDR:
        case IPV6_PATHMTU:
                return (B_TRUE);
        default:
                break;
        }
        return (B_FALSE);
}

/*
 * optlen is the length of the option content
 * Caller should check the optlen is at least sizeof (struct T_opthdr)
 */
static boolean_t
opt_length_ok(opdes_t *optd, t_uscalar_t optlen)
{
        /*
         * Verify length.
         * Value specified should match length of fixed length option or be
         * less than maxlen of variable length option.
         */
        if (optd->opdes_props & OP_VARLEN) {
                if (optlen <= optd->opdes_size)
                        return (B_TRUE);
        } else {
                /* fixed length option */
                if (optlen == optd->opdes_size)
                        return (B_TRUE);
        }
        return (B_FALSE);
}

/*
 * This routine manages the allocation and free of the space for
 * an extension header or option. Returns failure if memory
 * can not be allocated.
 */
int
optcom_pkt_set(uchar_t *invalp, uint_t inlen,
    uchar_t **optbufp, uint_t *optlenp)
{
        uchar_t *optbuf;
        uchar_t *optp;

        if (inlen == *optlenp) {
                /* Unchanged length - no need to reallocate */
                optp = *optbufp;
                bcopy(invalp, optp, inlen);
                return (0);
        }
        if (inlen > 0) {
                /* Allocate new buffer before free */
                optbuf = kmem_alloc(inlen, KM_NOSLEEP);
                if (optbuf == NULL)
                        return (ENOMEM);
        } else {
                optbuf = NULL;
        }

        /* Free old buffer */
        if (*optlenp != 0)
                kmem_free(*optbufp, *optlenp);

        if (inlen > 0)
                bcopy(invalp, optbuf, inlen);

        *optbufp = optbuf;
        *optlenp = inlen;
        return (0);
}

int
process_auxiliary_options(conn_t *connp, void *control, t_uscalar_t controllen,
    void *optbuf, optdb_obj_t *dbobjp, int (*opt_set_fn)(conn_t *,
    uint_t, int, int, uint_t, uchar_t *, uint_t *, uchar_t *, void *, cred_t *),
    cred_t *cr)
{
        struct cmsghdr *cmsg;
        opdes_t *optd;
        t_uscalar_t outlen;
        int error = EOPNOTSUPP;
        t_uscalar_t len;
        uint_t opt_arr_cnt = dbobjp->odb_opt_arr_cnt;
        opdes_t *opt_arr = dbobjp->odb_opt_des_arr;

        for (cmsg = (struct cmsghdr *)control;
            CMSG_VALID(cmsg, control, (uintptr_t)control + controllen);
            cmsg = CMSG_NEXT(cmsg)) {

                len = (t_uscalar_t)CMSG_CONTENTLEN(cmsg);
                /* Find the option in the opt_arr. */
                optd = proto_opt_lookup(cmsg->cmsg_level, cmsg->cmsg_type,
                    opt_arr, opt_arr_cnt);
                if (optd == NULL) {
                        return (EINVAL);
                }
                if (OA_READONLY_PERMISSION(optd, cr)) {
                        return (EACCES);
                }
                if (OA_MATCHED_PRIV(optd, cr)) {
                        /*
                         * For privileged options, we DO perform
                         * access checks as is common sense
                         */
                        if (!OA_WX_ANYPRIV(optd)) {
                                return (EACCES);
                        }
                } else {
                        /*
                         * For non privileged, we fail instead following
                         * "ignore" semantics dictated by XTI spec for
                         * permissions problems.
                         */
                        if (!OA_WX_NOPRIV(optd)) { /* nopriv */
                                return (EACCES);
                        }
                }
                error = opt_set_fn(connp, SETFN_UD_NEGOTIATE, optd->opdes_level,
                    optd->opdes_name, len, (uchar_t *)CMSG_CONTENT(cmsg),
                    &outlen, (uchar_t *)CMSG_CONTENT(cmsg), optbuf, cr);
                if (error > 0) {
                        return (error);
                } else if (outlen > len) {
                        return (EINVAL);
                } else {
                        /*
                         * error can be -ve if the protocol wants to
                         * pass the option to IP. We donot pass auxiliary
                         * options to IP.
                         */
                        error = 0;
                }
        }
        return (error);
}