root/usr/src/uts/common/inet/sctp/sctp_common.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
 */

#include <sys/types.h>
#include <sys/systm.h>
#include <sys/stream.h>
#include <sys/strsubr.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/kmem.h>
#include <sys/socket.h>
#include <sys/random.h>
#include <sys/tsol/tndb.h>
#include <sys/tsol/tnet.h>

#include <netinet/in.h>
#include <netinet/ip6.h>
#include <netinet/sctp.h>

#include <inet/common.h>
#include <inet/ip.h>
#include <inet/ip6.h>
#include <inet/ip_ire.h>
#include <inet/ip_if.h>
#include <inet/ip_ndp.h>
#include <inet/mib2.h>
#include <inet/nd.h>
#include <inet/optcom.h>
#include <inet/sctp_ip.h>
#include <inet/ipclassifier.h>

#include "sctp_impl.h"
#include "sctp_addr.h"
#include "sctp_asconf.h"

static struct kmem_cache *sctp_kmem_faddr_cache;
static void sctp_init_faddr(sctp_t *, sctp_faddr_t *, in6_addr_t *, mblk_t *);

/* Set the source address.  Refer to comments in sctp_get_dest(). */
void
sctp_set_saddr(sctp_t *sctp, sctp_faddr_t *fp)
{
        boolean_t v6 = !fp->sf_isv4;
        boolean_t addr_set;

        fp->sf_saddr = sctp_get_valid_addr(sctp, v6, &addr_set);
        /*
         * If there is no source address avaialble, mark this peer address
         * as unreachable for now.  When the heartbeat timer fires, it will
         * call sctp_get_dest() to re-check if there is any source address
         * available.
         */
        if (!addr_set)
                fp->sf_state = SCTP_FADDRS_UNREACH;
}

/*
 * Call this function to get information about a peer addr fp.
 *
 * Uses ip_attr_connect to avoid explicit use of ire and source address
 * selection.
 */
void
sctp_get_dest(sctp_t *sctp, sctp_faddr_t *fp)
{
        in6_addr_t      laddr;
        in6_addr_t      nexthop;
        sctp_saddr_ipif_t *sp;
        int             hdrlen;
        sctp_stack_t    *sctps = sctp->sctp_sctps;
        conn_t          *connp = sctp->sctp_connp;
        iulp_t          uinfo;
        uint_t          pmtu;
        int             error;
        uint32_t        flags = IPDF_VERIFY_DST | IPDF_IPSEC |
            IPDF_SELECT_SRC | IPDF_UNIQUE_DCE;

        /*
         * Tell sctp_make_mp it needs to call us again should we not
         * complete and set the saddr.
         */
        fp->sf_saddr = ipv6_all_zeros;

        /*
         * If this addr is not reachable, mark it as unconfirmed for now, the
         * state will be changed back to unreachable later in this function
         * if it is still the case.
         */
        if (fp->sf_state == SCTP_FADDRS_UNREACH) {
                fp->sf_state = SCTP_FADDRS_UNCONFIRMED;
        }

        /*
         * Socket is connected - enable PMTU discovery.
         */
        if (!sctps->sctps_ignore_path_mtu)
                fp->sf_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;

        ip_attr_nexthop(&connp->conn_xmit_ipp, fp->sf_ixa, &fp->sf_faddr,
            &nexthop);

        laddr = fp->sf_saddr;
        error = ip_attr_connect(connp, fp->sf_ixa, &laddr, &fp->sf_faddr,
            &nexthop, connp->conn_fport, &laddr, &uinfo, flags);

        if (error != 0) {
                dprint(3, ("sctp_get_dest: no ire for %x:%x:%x:%x\n",
                    SCTP_PRINTADDR(fp->sf_faddr)));
                /*
                 * It is tempting to just leave the src addr
                 * unspecified and let IP figure it out, but we
                 * *cannot* do this, since IP may choose a src addr
                 * that is not part of this association... unless
                 * this sctp has bound to all addrs.  So if the dest
                 * lookup fails, try to find one in our src addr
                 * list, unless the sctp has bound to all addrs, in
                 * which case we change the src addr to unspec.
                 *
                 * Note that if this is a v6 endpoint but it does
                 * not have any v4 address at this point (e.g. may
                 * have been  deleted), sctp_get_valid_addr() will
                 * return mapped INADDR_ANY.  In this case, this
                 * address should be marked not reachable so that
                 * it won't be used to send data.
                 */
                sctp_set_saddr(sctp, fp);
                if (fp->sf_state == SCTP_FADDRS_UNREACH)
                        return;
                goto check_current;
        }
        ASSERT(fp->sf_ixa->ixa_ire != NULL);
        ASSERT(!(fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)));

        if (!sctp->sctp_loopback)
                sctp->sctp_loopback = uinfo.iulp_loopback;

        /* Make sure the laddr is part of this association */
        if ((sp = sctp_saddr_lookup(sctp, &laddr, 0)) != NULL &&
            !sp->saddr_ipif_dontsrc) {
                if (sp->saddr_ipif_unconfirmed == 1)
                        sp->saddr_ipif_unconfirmed = 0;
                /* We did IPsec policy lookup for laddr already */
                fp->sf_saddr = laddr;
        } else {
                dprint(2, ("sctp_get_dest: src addr is not part of assoc "
                    "%x:%x:%x:%x\n", SCTP_PRINTADDR(laddr)));

                /*
                 * Set the src to the first saddr and hope for the best.
                 * Note that this case should very seldomly
                 * happen.  One scenario this can happen is an app
                 * explicitly bind() to an address.  But that address is
                 * not the preferred source address to send to the peer.
                 */
                sctp_set_saddr(sctp, fp);
                if (fp->sf_state == SCTP_FADDRS_UNREACH) {
                        return;
                }
        }

        /*
         * Pull out RTO information for this faddr and use it if we don't
         * have any yet.
         */
        if (fp->sf_srtt == -1 && uinfo.iulp_rtt != 0) {
                /* The cached value is in ms. */
                fp->sf_srtt = MSEC_TO_TICK(uinfo.iulp_rtt);
                fp->sf_rttvar = MSEC_TO_TICK(uinfo.iulp_rtt_sd);
                fp->sf_rto = 3 * fp->sf_srtt;

                /* Bound the RTO by configured min and max values */
                if (fp->sf_rto < sctp->sctp_rto_min) {
                        fp->sf_rto = sctp->sctp_rto_min;
                }
                if (fp->sf_rto > sctp->sctp_rto_max) {
                        fp->sf_rto = sctp->sctp_rto_max;
                }
                SCTP_MAX_RTO(sctp, fp);
        }
        pmtu = uinfo.iulp_mtu;

        /*
         * Record the MTU for this faddr. If the MTU for this faddr has
         * changed, check if the assc MTU will also change.
         */
        if (fp->sf_isv4) {
                hdrlen = sctp->sctp_hdr_len;
        } else {
                hdrlen = sctp->sctp_hdr6_len;
        }
        if ((fp->sf_pmss + hdrlen) != pmtu) {
                /* Make sure that sf_pmss is a multiple of SCTP_ALIGN. */
                fp->sf_pmss = (pmtu - hdrlen) & ~(SCTP_ALIGN - 1);
                if (fp->sf_cwnd < (fp->sf_pmss * 2)) {
                        SET_CWND(fp, fp->sf_pmss,
                            sctps->sctps_slow_start_initial);
                }
        }

check_current:
        if (fp == sctp->sctp_current)
                sctp_set_faddr_current(sctp, fp);
}

void
sctp_update_dce(sctp_t *sctp)
{
        sctp_faddr_t    *fp;
        sctp_stack_t    *sctps = sctp->sctp_sctps;
        iulp_t          uinfo;
        ip_stack_t      *ipst = sctps->sctps_netstack->netstack_ip;
        uint_t          ifindex;

        for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
                bzero(&uinfo, sizeof (uinfo));
                /*
                 * Only record the PMTU for this faddr if we actually have
                 * done discovery. This prevents initialized default from
                 * clobbering any real info that IP may have.
                 */
                if (fp->sf_pmtu_discovered) {
                        if (fp->sf_isv4) {
                                uinfo.iulp_mtu = fp->sf_pmss +
                                    sctp->sctp_hdr_len;
                        } else {
                                uinfo.iulp_mtu = fp->sf_pmss +
                                    sctp->sctp_hdr6_len;
                        }
                }
                if (sctps->sctps_rtt_updates != 0 &&
                    fp->sf_rtt_updates >= sctps->sctps_rtt_updates) {
                        /*
                         * dce_update_uinfo() merges these values with the
                         * old values.
                         */
                        uinfo.iulp_rtt = TICK_TO_MSEC(fp->sf_srtt);
                        uinfo.iulp_rtt_sd = TICK_TO_MSEC(fp->sf_rttvar);
                        fp->sf_rtt_updates = 0;
                }
                ifindex = 0;
                if (IN6_IS_ADDR_LINKSCOPE(&fp->sf_faddr)) {
                        /*
                         * If we are going to create a DCE we'd better have
                         * an ifindex
                         */
                        if (fp->sf_ixa->ixa_nce != NULL) {
                                ifindex = fp->sf_ixa->ixa_nce->nce_common->
                                    ncec_ill->ill_phyint->phyint_ifindex;
                        } else {
                                continue;
                        }
                }

                (void) dce_update_uinfo(&fp->sf_faddr, ifindex, &uinfo, ipst);
        }
}

/*
 * The sender must later set the total length in the IP header.
 */
mblk_t *
sctp_make_mp(sctp_t *sctp, sctp_faddr_t *fp, int trailer)
{
        mblk_t *mp;
        size_t ipsctplen;
        int isv4;
        sctp_stack_t *sctps = sctp->sctp_sctps;
        boolean_t src_changed = B_FALSE;

        ASSERT(fp != NULL);
        isv4 = fp->sf_isv4;

        if (SCTP_IS_ADDR_UNSPEC(isv4, fp->sf_saddr) ||
            (fp->sf_ixa->ixa_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
                /* Need to pick a source */
                sctp_get_dest(sctp, fp);
                /*
                 * Although we still may not get an IRE, the source address
                 * may be changed in sctp_get_ire().  Set src_changed to
                 * true so that the source address is copied again.
                 */
                src_changed = B_TRUE;
        }

        /* There is no suitable source address to use, return. */
        if (fp->sf_state == SCTP_FADDRS_UNREACH)
                return (NULL);

        ASSERT(fp->sf_ixa->ixa_ire != NULL);
        ASSERT(!SCTP_IS_ADDR_UNSPEC(isv4, fp->sf_saddr));

        if (isv4) {
                ipsctplen = sctp->sctp_hdr_len;
        } else {
                ipsctplen = sctp->sctp_hdr6_len;
        }

        mp = allocb(ipsctplen + sctps->sctps_wroff_xtra + trailer, BPRI_MED);
        if (mp == NULL) {
                ip1dbg(("sctp_make_mp: error making mp..\n"));
                return (NULL);
        }
        mp->b_rptr += sctps->sctps_wroff_xtra;
        mp->b_wptr = mp->b_rptr + ipsctplen;

        ASSERT(OK_32PTR(mp->b_wptr));

        if (isv4) {
                ipha_t *iph = (ipha_t *)mp->b_rptr;

                bcopy(sctp->sctp_iphc, mp->b_rptr, ipsctplen);
                if (fp != sctp->sctp_current || src_changed) {
                        /* Fix the source and destination addresses. */
                        IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, iph->ipha_dst);
                        IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr, iph->ipha_src);
                }
                /* set or clear the don't fragment bit */
                if (fp->sf_df) {
                        iph->ipha_fragment_offset_and_flags = htons(IPH_DF);
                } else {
                        iph->ipha_fragment_offset_and_flags = 0;
                }
        } else {
                bcopy(sctp->sctp_iphc6, mp->b_rptr, ipsctplen);
                if (fp != sctp->sctp_current || src_changed) {
                        /* Fix the source and destination addresses. */
                        ((ip6_t *)(mp->b_rptr))->ip6_dst = fp->sf_faddr;
                        ((ip6_t *)(mp->b_rptr))->ip6_src = fp->sf_saddr;
                }
        }
        ASSERT(sctp->sctp_connp != NULL);
        return (mp);
}

/*
 * Notify upper layers about preferred write offset, write size.
 */
void
sctp_set_ulp_prop(sctp_t *sctp)
{
        int hdrlen;
        struct sock_proto_props sopp;

        sctp_stack_t *sctps = sctp->sctp_sctps;

        if (sctp->sctp_current->sf_isv4) {
                hdrlen = sctp->sctp_hdr_len;
        } else {
                hdrlen = sctp->sctp_hdr6_len;
        }
        ASSERT(sctp->sctp_ulpd);

        sctp->sctp_connp->conn_wroff = sctps->sctps_wroff_xtra + hdrlen +
            sizeof (sctp_data_hdr_t);

        ASSERT(sctp->sctp_current->sf_pmss == sctp->sctp_mss);
        bzero(&sopp, sizeof (sopp));
        sopp.sopp_flags = SOCKOPT_MAXBLK|SOCKOPT_WROFF;
        sopp.sopp_wroff = sctp->sctp_connp->conn_wroff;
        sopp.sopp_maxblk = sctp->sctp_mss - sizeof (sctp_data_hdr_t);
        sctp->sctp_ulp_prop(sctp->sctp_ulpd, &sopp);
}

/*
 * Set the lengths in the packet and the transmit attributes.
 */
void
sctp_set_iplen(sctp_t *sctp, mblk_t *mp, ip_xmit_attr_t *ixa)
{
        uint16_t        sum = 0;
        ipha_t          *iph;
        ip6_t           *ip6h;
        mblk_t          *pmp = mp;
        boolean_t       isv4;

        isv4 = (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION);
        for (; pmp; pmp = pmp->b_cont)
                sum += pmp->b_wptr - pmp->b_rptr;

        ixa->ixa_pktlen = sum;
        if (isv4) {
                iph = (ipha_t *)mp->b_rptr;
                iph->ipha_length = htons(sum);
                ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr_len;
        } else {
                ip6h = (ip6_t *)mp->b_rptr;
                ip6h->ip6_plen = htons(sum - IPV6_HDR_LEN);
                ixa->ixa_ip_hdr_length = sctp->sctp_ip_hdr6_len;
        }
}

int
sctp_compare_faddrsets(sctp_faddr_t *a1, sctp_faddr_t *a2)
{
        int na1 = 0;
        int overlap = 0;
        int equal = 1;
        int onematch;
        sctp_faddr_t *fp1, *fp2;

        for (fp1 = a1; fp1; fp1 = fp1->sf_next) {
                onematch = 0;
                for (fp2 = a2; fp2; fp2 = fp2->sf_next) {
                        if (IN6_ARE_ADDR_EQUAL(&fp1->sf_faddr,
                            &fp2->sf_faddr)) {
                                overlap++;
                                onematch = 1;
                                break;
                        }
                        if (!onematch) {
                                equal = 0;
                        }
                }
                na1++;
        }

        if (equal) {
                return (SCTP_ADDR_EQUAL);
        }
        if (overlap == na1) {
                return (SCTP_ADDR_SUBSET);
        }
        if (overlap) {
                return (SCTP_ADDR_OVERLAP);
        }
        return (SCTP_ADDR_DISJOINT);
}

/*
 * Returns 0 on success, ENOMEM on memory allocation failure, EHOSTUNREACH
 * if the connection credentials fail remote host accreditation or
 * if the new destination does not support the previously established
 * connection security label. If sleep is true, this function should
 * never fail for a memory allocation failure. The boolean parameter
 * "first" decides whether the newly created faddr structure should be
 * added at the beginning of the list or at the end.
 *
 * Note: caller must hold conn fanout lock.
 */
int
sctp_add_faddr(sctp_t *sctp, in6_addr_t *addr, int sleep, boolean_t first)
{
        sctp_faddr_t    *faddr;
        mblk_t          *timer_mp;
        int             err;
        conn_t          *connp = sctp->sctp_connp;

        if (is_system_labeled()) {
                ip_xmit_attr_t  *ixa = connp->conn_ixa;
                ts_label_t      *effective_tsl = NULL;

                ASSERT(ixa->ixa_tsl != NULL);

                /*
                 * Verify the destination is allowed to receive packets
                 * at the security label of the connection we are initiating.
                 *
                 * tsol_check_dest() will create a new effective label for
                 * this connection with a modified label or label flags only
                 * if there are changes from the original label.
                 *
                 * Accept whatever label we get if this is the first
                 * destination address for this connection. The security
                 * label and label flags must match any previuous settings
                 * for all subsequent destination addresses.
                 */
                if (IN6_IS_ADDR_V4MAPPED(addr)) {
                        uint32_t dst;
                        IN6_V4MAPPED_TO_IPADDR(addr, dst);
                        err = tsol_check_dest(ixa->ixa_tsl,
                            &dst, IPV4_VERSION, connp->conn_mac_mode,
                            connp->conn_zone_is_global, &effective_tsl);
                } else {
                        err = tsol_check_dest(ixa->ixa_tsl,
                            addr, IPV6_VERSION, connp->conn_mac_mode,
                            connp->conn_zone_is_global, &effective_tsl);
                }
                if (err != 0)
                        return (err);

                if (sctp->sctp_faddrs == NULL && effective_tsl != NULL) {
                        ip_xmit_attr_replace_tsl(ixa, effective_tsl);
                } else if (effective_tsl != NULL) {
                        label_rele(effective_tsl);
                        return (EHOSTUNREACH);
                }
        }

        if ((faddr = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep)) == NULL)
                return (ENOMEM);
        bzero(faddr, sizeof (*faddr));
        timer_mp = sctp_timer_alloc((sctp), sctp_rexmit_timer, sleep);
        if (timer_mp == NULL) {
                kmem_cache_free(sctp_kmem_faddr_cache, faddr);
                return (ENOMEM);
        }
        ((sctpt_t *)(timer_mp->b_rptr))->sctpt_faddr = faddr;

        /* Start with any options set on the conn */
        faddr->sf_ixa = conn_get_ixa_exclusive(connp);
        if (faddr->sf_ixa == NULL) {
                freemsg(timer_mp);
                kmem_cache_free(sctp_kmem_faddr_cache, faddr);
                return (ENOMEM);
        }
        faddr->sf_ixa->ixa_notify_cookie = connp->conn_sctp;

        sctp_init_faddr(sctp, faddr, addr, timer_mp);
        ASSERT(faddr->sf_ixa->ixa_cred != NULL);

        /* ip_attr_connect didn't allow broadcats/multicast dest */
        ASSERT(faddr->sf_next == NULL);

        if (sctp->sctp_faddrs == NULL) {
                ASSERT(sctp->sctp_lastfaddr == NULL);
                /* only element on list; first and last are same */
                sctp->sctp_faddrs = sctp->sctp_lastfaddr = faddr;
        } else if (first) {
                ASSERT(sctp->sctp_lastfaddr != NULL);
                faddr->sf_next = sctp->sctp_faddrs;
                sctp->sctp_faddrs = faddr;
        } else {
                sctp->sctp_lastfaddr->sf_next = faddr;
                sctp->sctp_lastfaddr = faddr;
        }
        sctp->sctp_nfaddrs++;

        return (0);
}

sctp_faddr_t *
sctp_lookup_faddr(sctp_t *sctp, in6_addr_t *addr)
{
        sctp_faddr_t *fp;

        for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
                if (IN6_ARE_ADDR_EQUAL(&fp->sf_faddr, addr))
                        break;
        }

        return (fp);
}

sctp_faddr_t *
sctp_lookup_faddr_nosctp(sctp_faddr_t *fp, in6_addr_t *addr)
{
        for (; fp; fp = fp->sf_next) {
                if (IN6_ARE_ADDR_EQUAL(&fp->sf_faddr, addr)) {
                        break;
                }
        }

        return (fp);
}

/*
 * To change the currently used peer address to the specified one.
 */
void
sctp_set_faddr_current(sctp_t *sctp, sctp_faddr_t *fp)
{
        /* Now setup the composite header. */
        if (fp->sf_isv4) {
                IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr,
                    sctp->sctp_ipha->ipha_dst);
                IN6_V4MAPPED_TO_IPADDR(&fp->sf_saddr,
                    sctp->sctp_ipha->ipha_src);
                /* update don't fragment bit */
                if (fp->sf_df) {
                        sctp->sctp_ipha->ipha_fragment_offset_and_flags =
                            htons(IPH_DF);
                } else {
                        sctp->sctp_ipha->ipha_fragment_offset_and_flags = 0;
                }
        } else {
                sctp->sctp_ip6h->ip6_dst = fp->sf_faddr;
                sctp->sctp_ip6h->ip6_src = fp->sf_saddr;
        }

        sctp->sctp_current = fp;
        sctp->sctp_mss = fp->sf_pmss;

        /* Update the uppper layer for the change. */
        if (!SCTP_IS_DETACHED(sctp))
                sctp_set_ulp_prop(sctp);
}

void
sctp_redo_faddr_srcs(sctp_t *sctp)
{
        sctp_faddr_t *fp;

        for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
                sctp_get_dest(sctp, fp);
        }
}

void
sctp_faddr_alive(sctp_t *sctp, sctp_faddr_t *fp)
{
        int64_t now = LBOLT_FASTPATH64;

        /*
         * If we are under memory pressure, we abort association waiting
         * in zero window probing state for too long.  We do this by not
         * resetting sctp_strikes.  So if sctp_zero_win_probe continues
         * while under memory pressure, this association will eventually
         * time out.
         */
        if (!sctp->sctp_zero_win_probe || !sctp->sctp_sctps->sctps_reclaim) {
                sctp->sctp_strikes = 0;
        }
        fp->sf_strikes = 0;
        fp->sf_lastactive = now;
        fp->sf_hb_expiry = now + SET_HB_INTVL(fp);
        fp->sf_hb_pending = B_FALSE;
        if (fp->sf_state != SCTP_FADDRS_ALIVE) {
                fp->sf_state = SCTP_FADDRS_ALIVE;
                sctp_intf_event(sctp, fp->sf_faddr, SCTP_ADDR_AVAILABLE, 0);
                /* Should have a full IRE now */
                sctp_get_dest(sctp, fp);

                /*
                 * If this is the primary, switch back to it now.  And
                 * we probably want to reset the source addr used to reach
                 * it.
                 * Note that if we didn't find a source in sctp_get_dest
                 * then we'd be unreachable at this point in time.
                 */
                if (fp == sctp->sctp_primary &&
                    fp->sf_state != SCTP_FADDRS_UNREACH) {
                        sctp_set_faddr_current(sctp, fp);
                        return;
                }
        }
}

/*
 * Return B_TRUE if there is still an active peer address with zero strikes;
 * otherwise rturn B_FALSE.
 */
boolean_t
sctp_is_a_faddr_clean(sctp_t *sctp)
{
        sctp_faddr_t *fp;

        for (fp = sctp->sctp_faddrs; fp; fp = fp->sf_next) {
                if (fp->sf_state == SCTP_FADDRS_ALIVE && fp->sf_strikes == 0) {
                        return (B_TRUE);
                }
        }

        return (B_FALSE);
}

/*
 * Returns 0 if there is at leave one other active faddr, -1 if there
 * are none. If there are none left, faddr_dead() will start killing the
 * association.
 * If the downed faddr was the current faddr, a new current faddr
 * will be chosen.
 */
int
sctp_faddr_dead(sctp_t *sctp, sctp_faddr_t *fp, int newstate)
{
        sctp_faddr_t *ofp;
        sctp_stack_t *sctps = sctp->sctp_sctps;

        if (fp->sf_state == SCTP_FADDRS_ALIVE) {
                sctp_intf_event(sctp, fp->sf_faddr, SCTP_ADDR_UNREACHABLE, 0);
        }
        fp->sf_state = newstate;

        dprint(1, ("sctp_faddr_dead: %x:%x:%x:%x down (state=%d)\n",
            SCTP_PRINTADDR(fp->sf_faddr), newstate));

        if (fp == sctp->sctp_current) {
                /* Current faddr down; need to switch it */
                sctp->sctp_current = NULL;
        }

        /* Find next alive faddr */
        ofp = fp;
        for (fp = fp->sf_next; fp != NULL; fp = fp->sf_next) {
                if (fp->sf_state == SCTP_FADDRS_ALIVE) {
                        break;
                }
        }

        if (fp == NULL) {
                /* Continue from beginning of list */
                for (fp = sctp->sctp_faddrs; fp != ofp; fp = fp->sf_next) {
                        if (fp->sf_state == SCTP_FADDRS_ALIVE) {
                                break;
                        }
                }
        }

        /*
         * Find a new fp, so if the current faddr is dead, use the new fp
         * as the current one.
         */
        if (fp != ofp) {
                if (sctp->sctp_current == NULL) {
                        dprint(1, ("sctp_faddr_dead: failover->%x:%x:%x:%x\n",
                            SCTP_PRINTADDR(fp->sf_faddr)));
                        /*
                         * Note that we don't need to reset the source addr
                         * of the new fp.
                         */
                        sctp_set_faddr_current(sctp, fp);
                }
                return (0);
        }


        /* All faddrs are down; kill the association */
        dprint(1, ("sctp_faddr_dead: all faddrs down, killing assoc\n"));
        SCTPS_BUMP_MIB(sctps, sctpAborted);
        sctp_assoc_event(sctp, sctp->sctp_state < SCTPS_ESTABLISHED ?
            SCTP_CANT_STR_ASSOC : SCTP_COMM_LOST, 0, NULL);
        sctp_clean_death(sctp, sctp->sctp_client_errno ?
            sctp->sctp_client_errno : ETIMEDOUT);

        return (-1);
}

sctp_faddr_t *
sctp_rotate_faddr(sctp_t *sctp, sctp_faddr_t *ofp)
{
        sctp_faddr_t *nfp = NULL;
        sctp_faddr_t *saved_fp = NULL;
        int min_strikes;

        if (ofp == NULL) {
                ofp = sctp->sctp_current;
        }
        /* Nothing to do */
        if (sctp->sctp_nfaddrs < 2)
                return (ofp);

        /*
         * Find the next live peer address with zero strikes. In case
         * there is none, find the one with the lowest number of strikes.
         */
        min_strikes = ofp->sf_strikes;
        nfp = ofp->sf_next;
        while (nfp != ofp) {
                /* If reached end of list, continue scan from the head */
                if (nfp == NULL) {
                        nfp = sctp->sctp_faddrs;
                        continue;
                }
                if (nfp->sf_state == SCTP_FADDRS_ALIVE) {
                        if (nfp->sf_strikes == 0)
                                break;
                        if (nfp->sf_strikes < min_strikes) {
                                min_strikes = nfp->sf_strikes;
                                saved_fp = nfp;
                        }
                }
                nfp = nfp->sf_next;
        }
        /* If reached the old address, there is no zero strike path */
        if (nfp == ofp)
                nfp = NULL;

        /*
         * If there is a peer address with zero strikes  we use that, if not
         * return a peer address with fewer strikes than the one last used,
         * if neither exist we may as well stay with the old one.
         */
        if (nfp != NULL)
                return (nfp);
        if (saved_fp != NULL)
                return (saved_fp);
        return (ofp);
}

void
sctp_unlink_faddr(sctp_t *sctp, sctp_faddr_t *fp)
{
        sctp_faddr_t *fpp;

        fpp = NULL;

        if (!sctp->sctp_faddrs) {
                return;
        }

        if (fp->sf_timer_mp != NULL) {
                sctp_timer_free(fp->sf_timer_mp);
                fp->sf_timer_mp = NULL;
                fp->sf_timer_running = 0;
        }
        if (fp->sf_rc_timer_mp != NULL) {
                sctp_timer_free(fp->sf_rc_timer_mp);
                fp->sf_rc_timer_mp = NULL;
                fp->sf_rc_timer_running = 0;
        }
        if (fp->sf_ixa != NULL) {
                ixa_refrele(fp->sf_ixa);
                fp->sf_ixa = NULL;
        }

        if (fp == sctp->sctp_faddrs) {
                goto gotit;
        }

        for (fpp = sctp->sctp_faddrs; fpp->sf_next != fp; fpp = fpp->sf_next)
                ;

gotit:
        ASSERT(sctp->sctp_conn_tfp != NULL);
        mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
        if (fp == sctp->sctp_faddrs) {
                sctp->sctp_faddrs = fp->sf_next;
        } else {
                fpp->sf_next = fp->sf_next;
        }
        mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
        kmem_cache_free(sctp_kmem_faddr_cache, fp);
        sctp->sctp_nfaddrs--;
}

void
sctp_zap_faddrs(sctp_t *sctp, int caller_holds_lock)
{
        sctp_faddr_t *fp, *fpn;

        if (sctp->sctp_faddrs == NULL) {
                ASSERT(sctp->sctp_lastfaddr == NULL);
                return;
        }

        ASSERT(sctp->sctp_lastfaddr != NULL);
        sctp->sctp_lastfaddr = NULL;
        sctp->sctp_current = NULL;
        sctp->sctp_primary = NULL;

        sctp_free_faddr_timers(sctp);

        if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
                /* in conn fanout; need to hold lock */
                mutex_enter(&sctp->sctp_conn_tfp->tf_lock);
        }

        for (fp = sctp->sctp_faddrs; fp; fp = fpn) {
                fpn = fp->sf_next;
                if (fp->sf_ixa != NULL) {
                        ixa_refrele(fp->sf_ixa);
                        fp->sf_ixa = NULL;
                }
                kmem_cache_free(sctp_kmem_faddr_cache, fp);
                sctp->sctp_nfaddrs--;
        }

        sctp->sctp_faddrs = NULL;
        ASSERT(sctp->sctp_nfaddrs == 0);
        if (sctp->sctp_conn_tfp != NULL && !caller_holds_lock) {
                mutex_exit(&sctp->sctp_conn_tfp->tf_lock);
        }

}

void
sctp_zap_addrs(sctp_t *sctp)
{
        sctp_zap_faddrs(sctp, 0);
        sctp_free_saddrs(sctp);
}

/*
 * Build two SCTP header templates; one for IPv4 and one for IPv6.
 * Store them in sctp_iphc and sctp_iphc6 respectively (and related fields).
 * There are no IP addresses in the templates, but the port numbers and
 * verifier are field in from the conn_t and sctp_t.
 *
 * Returns failure if can't allocate memory, or if there is a problem
 * with a routing header/option.
 *
 * We allocate space for the minimum sctp header (sctp_hdr_t).
 *
 * We massage an routing option/header. There is no checksum implication
 * for a routing header for sctp.
 *
 * Caller needs to update conn_wroff if desired.
 *
 * TSol notes: This assumes that a SCTP association has a single peer label
 * since we only track a single pair of ipp_label_v4/v6 and not a separate one
 * for each faddr.
 */
int
sctp_build_hdrs(sctp_t *sctp, int sleep)
{
        conn_t          *connp = sctp->sctp_connp;
        ip_pkt_t        *ipp = &connp->conn_xmit_ipp;
        uint_t          ip_hdr_length;
        uchar_t         *hdrs;
        uint_t          hdrs_len;
        uint_t          ulp_hdr_length = sizeof (sctp_hdr_t);
        ipha_t          *ipha;
        ip6_t           *ip6h;
        sctp_hdr_t      *sctph;
        in6_addr_t      v6src, v6dst;
        ipaddr_t        v4src, v4dst;

        v4src = connp->conn_saddr_v4;
        v4dst = connp->conn_faddr_v4;
        v6src = connp->conn_saddr_v6;
        v6dst = connp->conn_faddr_v6;

        /* First do IPv4 header */
        ip_hdr_length = ip_total_hdrs_len_v4(ipp);

        /* In case of TX label and IP options it can be too much */
        if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
                /* Preserves existing TX errno for this */
                return (EHOSTUNREACH);
        }
        hdrs_len = ip_hdr_length + ulp_hdr_length;
        ASSERT(hdrs_len != 0);

        if (hdrs_len != sctp->sctp_iphc_len) {
                /* Allocate new before we free any old */
                hdrs = kmem_alloc(hdrs_len, sleep);
                if (hdrs == NULL)
                        return (ENOMEM);

                if (sctp->sctp_iphc != NULL)
                        kmem_free(sctp->sctp_iphc, sctp->sctp_iphc_len);
                sctp->sctp_iphc = hdrs;
                sctp->sctp_iphc_len = hdrs_len;
        } else {
                hdrs = sctp->sctp_iphc;
        }
        sctp->sctp_hdr_len = sctp->sctp_iphc_len;
        sctp->sctp_ip_hdr_len = ip_hdr_length;

        sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length);
        sctp->sctp_sctph = sctph;
        sctph->sh_sport = connp->conn_lport;
        sctph->sh_dport = connp->conn_fport;
        sctph->sh_verf = sctp->sctp_fvtag;
        sctph->sh_chksum = 0;

        ipha = (ipha_t *)hdrs;
        sctp->sctp_ipha = ipha;

        ipha->ipha_src = v4src;
        ipha->ipha_dst = v4dst;
        ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
        ipha->ipha_length = htons(hdrs_len);
        ipha->ipha_fragment_offset_and_flags = 0;

        if (ipp->ipp_fields & IPPF_IPV4_OPTIONS)
                (void) ip_massage_options(ipha, connp->conn_netstack);

        /* Now IPv6 */
        ip_hdr_length = ip_total_hdrs_len_v6(ipp);
        hdrs_len = ip_hdr_length + ulp_hdr_length;
        ASSERT(hdrs_len != 0);

        if (hdrs_len != sctp->sctp_iphc6_len) {
                /* Allocate new before we free any old */
                hdrs = kmem_alloc(hdrs_len, sleep);
                if (hdrs == NULL)
                        return (ENOMEM);

                if (sctp->sctp_iphc6 != NULL)
                        kmem_free(sctp->sctp_iphc6, sctp->sctp_iphc6_len);
                sctp->sctp_iphc6 = hdrs;
                sctp->sctp_iphc6_len = hdrs_len;
        } else {
                hdrs = sctp->sctp_iphc6;
        }
        sctp->sctp_hdr6_len = sctp->sctp_iphc6_len;
        sctp->sctp_ip_hdr6_len = ip_hdr_length;

        sctph = (sctp_hdr_t *)(hdrs + ip_hdr_length);
        sctp->sctp_sctph6 = sctph;
        sctph->sh_sport = connp->conn_lport;
        sctph->sh_dport = connp->conn_fport;
        sctph->sh_verf = sctp->sctp_fvtag;
        sctph->sh_chksum = 0;

        ip6h = (ip6_t *)hdrs;
        sctp->sctp_ip6h = ip6h;

        ip6h->ip6_src = v6src;
        ip6h->ip6_dst = v6dst;
        ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
            connp->conn_flowinfo);
        ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);

        if (ipp->ipp_fields & IPPF_RTHDR) {
                uint8_t         *end;
                ip6_rthdr_t     *rth;

                end = (uint8_t *)ip6h + ip_hdr_length;
                rth = ip_find_rthdr_v6(ip6h, end);
                if (rth != NULL) {
                        (void) ip_massage_options_v6(ip6h, rth,
                            connp->conn_netstack);
                }

                /*
                 * Verify that the first hop isn't a mapped address.
                 * Routers along the path need to do this verification
                 * for subsequent hops.
                 */
                if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
                        return (EADDRNOTAVAIL);
        }
        return (0);
}

static int
sctp_v4_label(sctp_t *sctp, sctp_faddr_t *fp)
{
        conn_t *connp = sctp->sctp_connp;

        ASSERT(fp->sf_ixa->ixa_flags & IXAF_IS_IPV4);
        return (conn_update_label(connp, fp->sf_ixa, &fp->sf_faddr,
            &connp->conn_xmit_ipp));
}

static int
sctp_v6_label(sctp_t *sctp, sctp_faddr_t *fp)
{
        conn_t *connp = sctp->sctp_connp;

        ASSERT(!(fp->sf_ixa->ixa_flags & IXAF_IS_IPV4));
        return (conn_update_label(connp, fp->sf_ixa, &fp->sf_faddr,
            &connp->conn_xmit_ipp));
}

/*
 * XXX implement more sophisticated logic
 *
 * Tsol note: We have already verified the addresses using tsol_check_dest
 * in sctp_add_faddr, thus no need to redo that here.
 * We do setup ipp_label_v4 and ipp_label_v6 based on which addresses
 * we have.
 */
int
sctp_set_hdraddrs(sctp_t *sctp)
{
        sctp_faddr_t *fp;
        int gotv4 = 0;
        int gotv6 = 0;
        conn_t *connp = sctp->sctp_connp;

        ASSERT(sctp->sctp_faddrs != NULL);
        ASSERT(sctp->sctp_nsaddrs > 0);

        /* Set up using the primary first */
        connp->conn_faddr_v6 = sctp->sctp_primary->sf_faddr;
        /* saddr may be unspec; make_mp() will handle this */
        connp->conn_saddr_v6 = sctp->sctp_primary->sf_saddr;
        connp->conn_laddr_v6 = connp->conn_saddr_v6;
        if (IN6_IS_ADDR_V4MAPPED(&sctp->sctp_primary->sf_faddr)) {
                if (!is_system_labeled() ||
                    sctp_v4_label(sctp, sctp->sctp_primary) == 0) {
                        gotv4 = 1;
                        if (connp->conn_family == AF_INET) {
                                goto done;
                        }
                }
        } else {
                if (!is_system_labeled() ||
                    sctp_v6_label(sctp, sctp->sctp_primary) == 0) {
                        gotv6 = 1;
                }
        }

        for (fp = sctp->sctp_faddrs; fp; fp = fp->sf_next) {
                if (!gotv4 && IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) {
                        if (!is_system_labeled() ||
                            sctp_v4_label(sctp, fp) == 0) {
                                gotv4 = 1;
                                if (connp->conn_family == AF_INET || gotv6) {
                                        break;
                                }
                        }
                } else if (!gotv6 && !IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) {
                        if (!is_system_labeled() ||
                            sctp_v6_label(sctp, fp) == 0) {
                                gotv6 = 1;
                                if (gotv4)
                                        break;
                        }
                }
        }

done:
        if (!gotv4 && !gotv6)
                return (EACCES);

        return (0);
}

/*
 * got_errchunk is set B_TRUE only if called from validate_init_params(), when
 * an ERROR chunk is already prepended the size of which needs updating for
 * additional unrecognized parameters. Other callers either prepend the ERROR
 * chunk with the correct size after calling this function, or they are calling
 * to add an invalid parameter to an INIT_ACK chunk, in that case no ERROR chunk
 * exists, the CAUSE blocks go into the INIT_ACK directly.
 *
 * *errmp will be non-NULL both when adding an additional CAUSE block to an
 * existing prepended COOKIE ERROR chunk (processing params of an INIT_ACK),
 * and when adding unrecognized parameters after the first, to an INIT_ACK
 * (processing params of an INIT chunk).
 */
void
sctp_add_unrec_parm(sctp_parm_hdr_t *uph, mblk_t **errmp,
    boolean_t got_errchunk)
{
        mblk_t *mp;
        sctp_parm_hdr_t *ph;
        size_t len;
        int pad;
        sctp_chunk_hdr_t *ecp;

        len = sizeof (*ph) + ntohs(uph->sph_len);
        if ((pad = len % SCTP_ALIGN) != 0) {
                pad = SCTP_ALIGN - pad;
                len += pad;
        }
        mp = allocb(len, BPRI_MED);
        if (mp == NULL) {
                return;
        }

        ph = (sctp_parm_hdr_t *)(mp->b_rptr);
        ph->sph_type = htons(PARM_UNRECOGNIZED);
        ph->sph_len = htons(len - pad);

        /* copy in the unrecognized parameter */
        bcopy(uph, ph + 1, ntohs(uph->sph_len));

        if (pad != 0)
                bzero((mp->b_rptr + len - pad), pad);

        mp->b_wptr = mp->b_rptr + len;
        if (*errmp != NULL) {
                /*
                 * Update total length if an ERROR chunk, then link
                 * this CAUSE block to the possible chain of CAUSE
                 * blocks attached to the ERROR chunk or INIT_ACK
                 * being created.
                 */
                if (got_errchunk) {
                        /* ERROR chunk already prepended */
                        ecp = (sctp_chunk_hdr_t *)((*errmp)->b_rptr);
                        ecp->sch_len = htons(ntohs(ecp->sch_len) + len);
                }
                linkb(*errmp, mp);
        } else {
                *errmp = mp;
        }
}

/*
 * o Bounds checking
 * o Updates remaining
 * o Checks alignment
 */
sctp_parm_hdr_t *
sctp_next_parm(sctp_parm_hdr_t *current, ssize_t *remaining)
{
        int pad;
        uint16_t len;

        len = ntohs(current->sph_len);
        *remaining -= len;
        if (*remaining < sizeof (*current) || len < sizeof (*current)) {
                return (NULL);
        }
        if ((pad = len & (SCTP_ALIGN - 1)) != 0) {
                pad = SCTP_ALIGN - pad;
                *remaining -= pad;
        }
        /*LINTED pointer cast may result in improper alignment*/
        current = (sctp_parm_hdr_t *)((char *)current + len + pad);
        return (current);
}

/*
 * Sets the address parameters given in the INIT chunk into sctp's
 * faddrs; if psctp is non-NULL, copies psctp's saddrs. If there are
 * no address parameters in the INIT chunk, a single faddr is created
 * from the ip hdr at the beginning of pkt.
 * If there already are existing addresses hanging from sctp, merge
 * them in, if the old info contains addresses which are not present
 * in this new info, get rid of them, and clean the pointers if there's
 * messages which have this as their target address.
 *
 * We also re-adjust the source address list here since the list may
 * contain more than what is actually part of the association. If
 * we get here from sctp_send_cookie_echo(), we are on the active
 * side and psctp will be NULL and ich will be the INIT-ACK chunk.
 * If we get here from sctp_accept_comm(), ich will be the INIT chunk
 * and psctp will the listening endpoint.
 *
 * INIT processing: When processing the INIT we inherit the src address
 * list from the listener. For a loopback or linklocal association, we
 * delete the list and just take the address from the IP header (since
 * that's how we created the INIT-ACK). Additionally, for loopback we
 * ignore the address params in the INIT. For determining which address
 * types were sent in the INIT-ACK we follow the same logic as in
 * creating the INIT-ACK. We delete addresses of the type that are not
 * supported by the peer.
 *
 * INIT-ACK processing: When processing the INIT-ACK since we had not
 * included addr params for loopback or linklocal addresses when creating
 * the INIT, we just use the address from the IP header. Further, for
 * loopback we ignore the addr param list. We mark addresses of the
 * type not supported by the peer as unconfirmed.
 *
 * In case of INIT processing we look for supported address types in the
 * supported address param, if present. In both cases the address type in
 * the IP header is supported as well as types for addresses in the param
 * list, if any.
 *
 * Once we have the supported address types sctp_check_saddr() runs through
 * the source address list and deletes or marks as unconfirmed address of
 * types not supported by the peer.
 *
 * Returns 0 on success, sys errno on failure
 */
int
sctp_get_addrparams(sctp_t *sctp, sctp_t *psctp, mblk_t *pkt,
    sctp_chunk_hdr_t *ich, uint_t *sctp_options)
{
        sctp_init_chunk_t       *init;
        ipha_t                  *iph;
        ip6_t                   *ip6h;
        in6_addr_t              hdrsaddr[1];
        in6_addr_t              hdrdaddr[1];
        sctp_parm_hdr_t         *ph;
        ssize_t                 remaining;
        int                     isv4;
        int                     err;
        sctp_faddr_t            *fp;
        int                     supp_af = 0;
        boolean_t               check_saddr = B_TRUE;
        in6_addr_t              curaddr;
        sctp_stack_t            *sctps = sctp->sctp_sctps;
        conn_t                  *connp = sctp->sctp_connp;

        if (sctp_options != NULL)
                *sctp_options = 0;

        /* extract the address from the IP header */
        isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
        if (isv4) {
                iph = (ipha_t *)pkt->b_rptr;
                IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdrsaddr);
                IN6_IPADDR_TO_V4MAPPED(iph->ipha_dst, hdrdaddr);
                supp_af |= PARM_SUPP_V4;
        } else {
                ip6h = (ip6_t *)pkt->b_rptr;
                hdrsaddr[0] = ip6h->ip6_src;
                hdrdaddr[0] = ip6h->ip6_dst;
                supp_af |= PARM_SUPP_V6;
        }

        /*
         * Unfortunately, we can't delay this because adding an faddr
         * looks for the presence of the source address (from the ire
         * for the faddr) in the source address list. We could have
         * delayed this if, say, this was a loopback/linklocal connection.
         * Now, we just end up nuking this list and taking the addr from
         * the IP header for loopback/linklocal.
         */
        if (psctp != NULL && psctp->sctp_nsaddrs > 0) {
                ASSERT(sctp->sctp_nsaddrs == 0);

                err = sctp_dup_saddrs(psctp, sctp, KM_NOSLEEP);
                if (err != 0)
                        return (err);
        }
        /*
         * We will add the faddr before parsing the address list as this
         * might be a loopback connection and we would not have to
         * go through the list.
         *
         * Make sure the header's addr is in the list
         */
        fp = sctp_lookup_faddr(sctp, hdrsaddr);
        if (fp == NULL) {
                /* not included; add it now */
                err = sctp_add_faddr(sctp, hdrsaddr, KM_NOSLEEP, B_TRUE);
                if (err != 0)
                        return (err);

                /* sctp_faddrs will be the hdr addr */
                fp = sctp->sctp_faddrs;
        }
        /* make the header addr the primary */

        if (cl_sctp_assoc_change != NULL && psctp == NULL)
                curaddr = sctp->sctp_current->sf_faddr;

        sctp->sctp_primary = fp;
        sctp->sctp_current = fp;
        sctp->sctp_mss = fp->sf_pmss;

        /* For loopback connections & linklocal get address from the header */
        if (sctp->sctp_loopback || sctp->sctp_linklocal) {
                if (sctp->sctp_nsaddrs != 0)
                        sctp_free_saddrs(sctp);
                if ((err = sctp_saddr_add_addr(sctp, hdrdaddr, 0)) != 0)
                        return (err);
                /* For loopback ignore address list */
                if (sctp->sctp_loopback)
                        return (0);
                check_saddr = B_FALSE;
        }

        /* Walk the params in the INIT [ACK], pulling out addr params */
        remaining = ntohs(ich->sch_len) - sizeof (*ich) -
            sizeof (sctp_init_chunk_t);
        if (remaining < sizeof (*ph)) {
                if (check_saddr) {
                        sctp_check_saddr(sctp, supp_af, psctp == NULL ?
                            B_FALSE : B_TRUE, hdrdaddr);
                }
                ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL);
                return (0);
        }

        init = (sctp_init_chunk_t *)(ich + 1);
        ph = (sctp_parm_hdr_t *)(init + 1);

        /* params will have already been byteordered when validating */
        while (ph != NULL) {
                if (ph->sph_type == htons(PARM_SUPP_ADDRS)) {
                        int             plen;
                        uint16_t        *p;
                        uint16_t        addrtype;

                        ASSERT(psctp != NULL);
                        plen = ntohs(ph->sph_len);
                        p = (uint16_t *)(ph + 1);
                        while (plen > 0) {
                                addrtype = ntohs(*p);
                                switch (addrtype) {
                                        case PARM_ADDR6:
                                                supp_af |= PARM_SUPP_V6;
                                                break;
                                        case PARM_ADDR4:
                                                supp_af |= PARM_SUPP_V4;
                                                break;
                                        default:
                                                break;
                                }
                                p++;
                                plen -= sizeof (*p);
                        }
                } else if (ph->sph_type == htons(PARM_ADDR4)) {
                        if (remaining >= PARM_ADDR4_LEN) {
                                in6_addr_t addr;
                                ipaddr_t ta;

                                supp_af |= PARM_SUPP_V4;
                                /*
                                 * Screen out broad/multicasts & loopback.
                                 * If the endpoint only accepts v6 address,
                                 * go to the next one.
                                 *
                                 * Subnet broadcast check is done in
                                 * sctp_add_faddr().  If the address is
                                 * a broadcast address, it won't be added.
                                 */
                                bcopy(ph + 1, &ta, sizeof (ta));
                                if (ta == 0 ||
                                    ta == INADDR_BROADCAST ||
                                    ta == htonl(INADDR_LOOPBACK) ||
                                    CLASSD(ta) || connp->conn_ipv6_v6only) {
                                        goto next;
                                }
                                IN6_INADDR_TO_V4MAPPED((struct in_addr *)
                                    (ph + 1), &addr);

                                /* Check for duplicate. */
                                if (sctp_lookup_faddr(sctp, &addr) != NULL)
                                        goto next;

                                /* OK, add it to the faddr set */
                                err = sctp_add_faddr(sctp, &addr, KM_NOSLEEP,
                                    B_FALSE);
                                /* Something is wrong...  Try the next one. */
                                if (err != 0)
                                        goto next;
                        }
                } else if (ph->sph_type == htons(PARM_ADDR6) &&
                    connp->conn_family == AF_INET6) {
                        /* An v4 socket should not take v6 addresses. */
                        if (remaining >= PARM_ADDR6_LEN) {
                                in6_addr_t *addr6;

                                supp_af |= PARM_SUPP_V6;
                                addr6 = (in6_addr_t *)(ph + 1);
                                /*
                                 * Screen out link locals, mcast, loopback
                                 * and bogus v6 address.
                                 */
                                if (IN6_IS_ADDR_LINKLOCAL(addr6) ||
                                    IN6_IS_ADDR_MULTICAST(addr6) ||
                                    IN6_IS_ADDR_LOOPBACK(addr6) ||
                                    IN6_IS_ADDR_V4MAPPED(addr6)) {
                                        goto next;
                                }
                                /* Check for duplicate. */
                                if (sctp_lookup_faddr(sctp, addr6) != NULL)
                                        goto next;

                                err = sctp_add_faddr(sctp,
                                    (in6_addr_t *)(ph + 1), KM_NOSLEEP,
                                    B_FALSE);
                                /* Something is wrong...  Try the next one. */
                                if (err != 0)
                                        goto next;
                        }
                } else if (ph->sph_type == htons(PARM_FORWARD_TSN)) {
                        if (sctp_options != NULL)
                                *sctp_options |= SCTP_PRSCTP_OPTION;
                } /* else; skip */

next:
                ph = sctp_next_parm(ph, &remaining);
        }
        if (check_saddr) {
                sctp_check_saddr(sctp, supp_af, psctp == NULL ? B_FALSE :
                    B_TRUE, hdrdaddr);
        }
        ASSERT(sctp_saddr_lookup(sctp, hdrdaddr, 0) != NULL);
        /*
         * We have the right address list now, update clustering's
         * knowledge because when we sent the INIT we had just added
         * the address the INIT was sent to.
         */
        if (psctp == NULL && cl_sctp_assoc_change != NULL) {
                uchar_t *alist;
                size_t  asize;
                uchar_t *dlist;
                size_t  dsize;

                asize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
                alist = kmem_alloc(asize, KM_NOSLEEP);
                if (alist == NULL) {
                        SCTP_KSTAT(sctps, sctp_cl_assoc_change);
                        return (ENOMEM);
                }
                /*
                 * Just include the address the INIT was sent to in the
                 * delete list and send the entire faddr list. We could
                 * do it differently (i.e include all the addresses in the
                 * add list even if it contains the original address OR
                 * remove the original address from the add list etc.), but
                 * this seems reasonable enough.
                 */
                dsize = sizeof (in6_addr_t);
                dlist = kmem_alloc(dsize, KM_NOSLEEP);
                if (dlist == NULL) {
                        kmem_free(alist, asize);
                        SCTP_KSTAT(sctps, sctp_cl_assoc_change);
                        return (ENOMEM);
                }
                bcopy(&curaddr, dlist, sizeof (curaddr));
                sctp_get_faddr_list(sctp, alist, asize);
                (*cl_sctp_assoc_change)(connp->conn_family, alist, asize,
                    sctp->sctp_nfaddrs, dlist, dsize, 1, SCTP_CL_PADDR,
                    (cl_sctp_handle_t)sctp);
                /* alist and dlist will be freed by the clustering module */
        }
        return (0);
}

/*
 * Returns 0 if the check failed and the restart should be refused,
 * 1 if the check succeeded.
 */
int
sctp_secure_restart_check(mblk_t *pkt, sctp_chunk_hdr_t *ich, uint32_t ports,
    int sleep, sctp_stack_t *sctps, ip_recv_attr_t *ira)
{
        sctp_faddr_t *fp, *fphead = NULL;
        sctp_parm_hdr_t *ph;
        ssize_t remaining;
        int isv4;
        ipha_t *iph;
        ip6_t *ip6h;
        in6_addr_t hdraddr[1];
        int retval = 0;
        sctp_tf_t *tf;
        sctp_t *sctp;
        int compres;
        sctp_init_chunk_t *init;
        int nadded = 0;

        /* extract the address from the IP header */
        isv4 = (IPH_HDR_VERSION(pkt->b_rptr) == IPV4_VERSION);
        if (isv4) {
                iph = (ipha_t *)pkt->b_rptr;
                IN6_IPADDR_TO_V4MAPPED(iph->ipha_src, hdraddr);
        } else {
                ip6h = (ip6_t *)pkt->b_rptr;
                hdraddr[0] = ip6h->ip6_src;
        }

        /* Walk the params in the INIT [ACK], pulling out addr params */
        remaining = ntohs(ich->sch_len) - sizeof (*ich) -
            sizeof (sctp_init_chunk_t);
        if (remaining < sizeof (*ph)) {
                /* no parameters; restart OK */
                return (1);
        }
        init = (sctp_init_chunk_t *)(ich + 1);
        ph = (sctp_parm_hdr_t *)(init + 1);

        while (ph != NULL) {
                sctp_faddr_t *fpa = NULL;

                /* params will have already been byteordered when validating */
                if (ph->sph_type == htons(PARM_ADDR4)) {
                        if (remaining >= PARM_ADDR4_LEN) {
                                in6_addr_t addr;
                                IN6_INADDR_TO_V4MAPPED((struct in_addr *)
                                    (ph + 1), &addr);
                                fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
                                    sleep);
                                if (fpa == NULL) {
                                        goto done;
                                }
                                bzero(fpa, sizeof (*fpa));
                                fpa->sf_faddr = addr;
                                fpa->sf_next = NULL;
                        }
                } else if (ph->sph_type == htons(PARM_ADDR6)) {
                        if (remaining >= PARM_ADDR6_LEN) {
                                fpa = kmem_cache_alloc(sctp_kmem_faddr_cache,
                                    sleep);
                                if (fpa == NULL) {
                                        goto done;
                                }
                                bzero(fpa, sizeof (*fpa));
                                bcopy(ph + 1, &fpa->sf_faddr,
                                    sizeof (fpa->sf_faddr));
                                fpa->sf_next = NULL;
                        }
                }
                /* link in the new addr, if it was an addr param */
                if (fpa != NULL) {
                        if (fphead == NULL) {
                                fphead = fpa;
                        } else {
                                fpa->sf_next = fphead;
                                fphead = fpa;
                        }
                }

                ph = sctp_next_parm(ph, &remaining);
        }

        if (fphead == NULL) {
                /* no addr parameters; restart OK */
                return (1);
        }

        /*
         * got at least one; make sure the header's addr is
         * in the list
         */
        fp = sctp_lookup_faddr_nosctp(fphead, hdraddr);
        if (fp == NULL) {
                /* not included; add it now */
                fp = kmem_cache_alloc(sctp_kmem_faddr_cache, sleep);
                if (fp == NULL) {
                        goto done;
                }
                bzero(fp, sizeof (*fp));
                fp->sf_faddr = *hdraddr;
                fp->sf_next = fphead;
                fphead = fp;
        }

        /*
         * Now, we can finally do the check: For each sctp instance
         * on the hash line for ports, compare its faddr set against
         * the new one. If the new one is a strict subset of any
         * existing sctp's faddrs, the restart is OK. However, if there
         * is an overlap, this could be an attack, so return failure.
         * If all sctp's faddrs are disjoint, this is a legitimate new
         * association.
         */
        tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
        mutex_enter(&tf->tf_lock);

        for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_conn_hash_next) {
                if (ports != sctp->sctp_connp->conn_ports) {
                        continue;
                }
                compres = sctp_compare_faddrsets(fphead, sctp->sctp_faddrs);
                if (compres <= SCTP_ADDR_SUBSET) {
                        retval = 1;
                        mutex_exit(&tf->tf_lock);
                        goto done;
                }
                if (compres == SCTP_ADDR_OVERLAP) {
                        dprint(1,
                            ("new assoc from %x:%x:%x:%x overlaps with %p\n",
                            SCTP_PRINTADDR(*hdraddr), (void *)sctp));
                        /*
                         * While we still hold the lock, we need to
                         * figure out which addresses have been
                         * added so we can include them in the abort
                         * we will send back. Since these faddrs will
                         * never be used, we overload the rto field
                         * here, setting it to 0 if the address was
                         * not added, 1 if it was added.
                         */
                        for (fp = fphead; fp; fp = fp->sf_next) {
                                if (sctp_lookup_faddr(sctp, &fp->sf_faddr)) {
                                        fp->sf_rto = 0;
                                } else {
                                        fp->sf_rto = 1;
                                        nadded++;
                                }
                        }
                        mutex_exit(&tf->tf_lock);
                        goto done;
                }
        }
        mutex_exit(&tf->tf_lock);

        /* All faddrs are disjoint; legit new association */
        retval = 1;

done:
        /* If are attempted adds, send back an abort listing the addrs */
        if (nadded > 0) {
                void *dtail;
                size_t dlen;

                dtail = kmem_alloc(PARM_ADDR6_LEN * nadded, KM_NOSLEEP);
                if (dtail == NULL) {
                        goto cleanup;
                }

                ph = dtail;
                dlen = 0;
                for (fp = fphead; fp; fp = fp->sf_next) {
                        if (fp->sf_rto == 0) {
                                continue;
                        }
                        if (IN6_IS_ADDR_V4MAPPED(&fp->sf_faddr)) {
                                ipaddr_t addr4;

                                ph->sph_type = htons(PARM_ADDR4);
                                ph->sph_len = htons(PARM_ADDR4_LEN);
                                IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, addr4);
                                ph++;
                                bcopy(&addr4, ph, sizeof (addr4));
                                ph = (sctp_parm_hdr_t *)
                                    ((char *)ph + sizeof (addr4));
                                dlen += PARM_ADDR4_LEN;
                        } else {
                                ph->sph_type = htons(PARM_ADDR6);
                                ph->sph_len = htons(PARM_ADDR6_LEN);
                                ph++;
                                bcopy(&fp->sf_faddr, ph, sizeof (fp->sf_faddr));
                                ph = (sctp_parm_hdr_t *)
                                    ((char *)ph + sizeof (fp->sf_faddr));
                                dlen += PARM_ADDR6_LEN;
                        }
                }

                /* Send off the abort */
                sctp_send_abort(sctp, sctp_init2vtag(ich),
                    SCTP_ERR_RESTART_NEW_ADDRS, dtail, dlen, pkt, 0, B_TRUE,
                    ira);

                kmem_free(dtail, PARM_ADDR6_LEN * nadded);
        }

cleanup:
        /* Clean up */
        if (fphead) {
                sctp_faddr_t *fpn;
                for (fp = fphead; fp; fp = fpn) {
                        fpn = fp->sf_next;
                        if (fp->sf_ixa != NULL) {
                                ixa_refrele(fp->sf_ixa);
                                fp->sf_ixa = NULL;
                        }
                        kmem_cache_free(sctp_kmem_faddr_cache, fp);
                }
        }

        return (retval);
}

/*
 * Reset any state related to transmitted chunks.
 */
void
sctp_congest_reset(sctp_t *sctp)
{
        sctp_faddr_t    *fp;
        sctp_stack_t    *sctps = sctp->sctp_sctps;
        mblk_t          *mp;

        for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
                fp->sf_ssthresh = sctps->sctps_initial_mtu;
                SET_CWND(fp, fp->sf_pmss, sctps->sctps_slow_start_initial);
                fp->sf_suna = 0;
                fp->sf_pba = 0;
        }
        /*
         * Clean up the transmit list as well since we have reset accounting
         * on all the fps. Send event upstream, if required.
         */
        while ((mp = sctp->sctp_xmit_head) != NULL) {
                sctp->sctp_xmit_head = mp->b_next;
                mp->b_next = NULL;
                if (sctp->sctp_xmit_head != NULL)
                        sctp->sctp_xmit_head->b_prev = NULL;
                sctp_sendfail_event(sctp, mp, 0, B_TRUE);
        }
        sctp->sctp_xmit_head = NULL;
        sctp->sctp_xmit_tail = NULL;
        sctp->sctp_xmit_unacked = NULL;

        sctp->sctp_unacked = 0;
        /*
         * Any control message as well. We will clean-up this list as well.
         * This contains any pending ASCONF request that we have queued/sent.
         * If we do get an ACK we will just drop it. However, given that
         * we are restarting chances are we aren't going to get any.
         */
        if (sctp->sctp_cxmit_list != NULL)
                sctp_asconf_free_cxmit(sctp, NULL);
        sctp->sctp_cxmit_list = NULL;
        sctp->sctp_cchunk_pend = 0;

        sctp->sctp_rexmitting = B_FALSE;
        sctp->sctp_rxt_nxttsn = 0;
        sctp->sctp_rxt_maxtsn = 0;

        sctp->sctp_zero_win_probe = B_FALSE;
}

static void
sctp_init_faddr(sctp_t *sctp, sctp_faddr_t *fp, in6_addr_t *addr,
    mblk_t *timer_mp)
{
        sctp_stack_t    *sctps = sctp->sctp_sctps;

        ASSERT(fp->sf_ixa != NULL);

        bcopy(addr, &fp->sf_faddr, sizeof (*addr));
        if (IN6_IS_ADDR_V4MAPPED(addr)) {
                fp->sf_isv4 = 1;
                /* Make sure that sf_pmss is a multiple of SCTP_ALIGN. */
                fp->sf_pmss =
                    (sctps->sctps_initial_mtu - sctp->sctp_hdr_len) &
                    ~(SCTP_ALIGN - 1);
                fp->sf_ixa->ixa_flags |= IXAF_IS_IPV4;
        } else {
                fp->sf_isv4 = 0;
                fp->sf_pmss =
                    (sctps->sctps_initial_mtu - sctp->sctp_hdr6_len) &
                    ~(SCTP_ALIGN - 1);
                fp->sf_ixa->ixa_flags &= ~IXAF_IS_IPV4;
        }
        fp->sf_cwnd = sctps->sctps_slow_start_initial * fp->sf_pmss;
        fp->sf_rto = MIN(sctp->sctp_rto_initial, sctp->sctp_rto_max_init);
        SCTP_MAX_RTO(sctp, fp);
        fp->sf_srtt = -1;
        fp->sf_rtt_updates = 0;
        fp->sf_strikes = 0;
        fp->sf_max_retr = sctp->sctp_pp_max_rxt;
        /* Mark it as not confirmed. */
        fp->sf_state = SCTP_FADDRS_UNCONFIRMED;
        fp->sf_hb_interval = sctp->sctp_hb_interval;
        fp->sf_ssthresh = sctps->sctps_initial_ssthresh;
        fp->sf_suna = 0;
        fp->sf_pba = 0;
        fp->sf_acked = 0;
        fp->sf_lastactive = fp->sf_hb_expiry = ddi_get_lbolt64();
        fp->sf_timer_mp = timer_mp;
        fp->sf_hb_pending = B_FALSE;
        fp->sf_hb_enabled = B_TRUE;
        fp->sf_df = 1;
        fp->sf_pmtu_discovered = 0;
        fp->sf_next = NULL;
        fp->sf_T3expire = 0;
        (void) random_get_pseudo_bytes((uint8_t *)&fp->sf_hb_secret,
            sizeof (fp->sf_hb_secret));
        fp->sf_rxt_unacked = 0;

        sctp_get_dest(sctp, fp);
}

/*ARGSUSED*/
static int
faddr_constructor(void *buf, void *arg, int flags)
{
        sctp_faddr_t *fp = buf;

        fp->sf_timer_mp = NULL;
        fp->sf_timer_running = 0;

        fp->sf_rc_timer_mp = NULL;
        fp->sf_rc_timer_running = 0;

        return (0);
}

/*ARGSUSED*/
static void
faddr_destructor(void *buf, void *arg)
{
        sctp_faddr_t *fp = buf;

        ASSERT(fp->sf_timer_mp == NULL);
        ASSERT(fp->sf_timer_running == 0);

        ASSERT(fp->sf_rc_timer_mp == NULL);
        ASSERT(fp->sf_rc_timer_running == 0);
}

void
sctp_faddr_init(void)
{
        sctp_kmem_faddr_cache = kmem_cache_create("sctp_faddr_cache",
            sizeof (sctp_faddr_t), 0, faddr_constructor, faddr_destructor,
            NULL, NULL, NULL, 0);
}

void
sctp_faddr_fini(void)
{
        kmem_cache_destroy(sctp_kmem_faddr_cache);
}