root/usr/src/uts/common/inet/tcp/tcp_bind.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
 * Copyright (c) 2016 by Delphix. All rights reserved.
 * Copyright 2024 Bill Sommerfeld <sommerfeld@hamachi.org>
 */

#include <sys/types.h>
#include <sys/stream.h>
#include <sys/strsun.h>
#include <sys/strsubr.h>
#include <sys/stropts.h>
#include <sys/strlog.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/suntpi.h>
#include <sys/xti_inet.h>
#include <sys/policy.h>
#include <sys/squeue_impl.h>
#include <sys/squeue.h>
#include <sys/tsol/tnet.h>

#include <rpc/pmap_prot.h>

#include <inet/common.h>
#include <inet/ip.h>
#include <inet/tcp.h>
#include <inet/tcp_impl.h>
#include <inet/proto_set.h>
#include <inet/ipsec_impl.h>

/* Setable in /etc/system */
/* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
static uint32_t tcp_random_anon_port = 1;

static int      tcp_bind_select_lport(tcp_t *, in_port_t *, boolean_t,
                    cred_t *cr);
static in_port_t        tcp_get_next_priv_port(const tcp_t *);

/*
 * Hash list insertion routine for tcp_t structures. Each hash bucket
 * contains a list of tcp_t entries, and each entry is bound to a unique
 * port. If there are multiple tcp_t's that are bound to the same port, then
 * one of them will be linked into the hash bucket list, and the rest will
 * hang off of that one entry. For each port, entries bound to a specific IP
 * address will be inserted before those those bound to INADDR_ANY.
 */
void
tcp_bind_hash_insert(tf_t *tbf, tcp_t *tcp, int caller_holds_lock)
{
        tcp_t   **tcpp;
        tcp_t   *tcpnext;
        tcp_t   *tcphash;
        conn_t  *connp = tcp->tcp_connp;
        conn_t  *connext;

        if (tcp->tcp_ptpbhn != NULL) {
                ASSERT(!caller_holds_lock);
                tcp_bind_hash_remove(tcp);
        }
        tcpp = &tbf->tf_tcp;
        if (!caller_holds_lock) {
                mutex_enter(&tbf->tf_lock);
        } else {
                ASSERT(MUTEX_HELD(&tbf->tf_lock));
        }
        tcphash = tcpp[0];
        tcpnext = NULL;
        if (tcphash != NULL) {
                /* Look for an entry using the same port */
                while ((tcphash = tcpp[0]) != NULL &&
                    connp->conn_lport != tcphash->tcp_connp->conn_lport)
                        tcpp = &(tcphash->tcp_bind_hash);

                /* The port was not found, just add to the end */
                if (tcphash == NULL)
                        goto insert;

                /*
                 * OK, there already exists an entry bound to the
                 * same port.
                 *
                 * If the new tcp bound to the INADDR_ANY address
                 * and the first one in the list is not bound to
                 * INADDR_ANY we skip all entries until we find the
                 * first one bound to INADDR_ANY.
                 * This makes sure that applications binding to a
                 * specific address get preference over those binding to
                 * INADDR_ANY.
                 */
                tcpnext = tcphash;
                connext = tcpnext->tcp_connp;
                tcphash = NULL;
                if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
                    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
                        while ((tcpnext = tcpp[0]) != NULL) {
                                connext = tcpnext->tcp_connp;
                                if (!V6_OR_V4_INADDR_ANY(
                                    connext->conn_bound_addr_v6))
                                        tcpp = &(tcpnext->tcp_bind_hash_port);
                                else
                                        break;
                        }
                        if (tcpnext != NULL) {
                                tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
                                tcphash = tcpnext->tcp_bind_hash;
                                if (tcphash != NULL) {
                                        tcphash->tcp_ptpbhn =
                                            &(tcp->tcp_bind_hash);
                                        tcpnext->tcp_bind_hash = NULL;
                                }
                        }
                } else {
                        tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
                        tcphash = tcpnext->tcp_bind_hash;
                        if (tcphash != NULL) {
                                tcphash->tcp_ptpbhn =
                                    &(tcp->tcp_bind_hash);
                                tcpnext->tcp_bind_hash = NULL;
                        }
                }
        }
insert:
        tcp->tcp_bind_hash_port = tcpnext;
        tcp->tcp_bind_hash = tcphash;
        tcp->tcp_ptpbhn = tcpp;
        tcpp[0] = tcp;
        if (!caller_holds_lock)
                mutex_exit(&tbf->tf_lock);
}

/*
 * Hash list removal routine for tcp_t structures.
 */
void
tcp_bind_hash_remove(tcp_t *tcp)
{
        tcp_t   *tcpnext;
        kmutex_t *lockp;
        tcp_stack_t     *tcps = tcp->tcp_tcps;
        conn_t          *connp = tcp->tcp_connp;

        if (tcp->tcp_ptpbhn == NULL)
                return;

        /*
         * Extract the lock pointer in case there are concurrent
         * hash_remove's for this instance.
         */
        ASSERT(connp->conn_lport != 0);
        lockp = &tcps->tcps_bind_fanout[TCP_BIND_HASH(
            connp->conn_lport)].tf_lock;

        ASSERT(lockp != NULL);
        mutex_enter(lockp);
        if (tcp->tcp_ptpbhn) {
                tcpnext = tcp->tcp_bind_hash_port;
                if (tcpnext != NULL) {
                        tcp->tcp_bind_hash_port = NULL;
                        tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
                        tcpnext->tcp_bind_hash = tcp->tcp_bind_hash;
                        if (tcpnext->tcp_bind_hash != NULL) {
                                tcpnext->tcp_bind_hash->tcp_ptpbhn =
                                    &(tcpnext->tcp_bind_hash);
                                tcp->tcp_bind_hash = NULL;
                        }
                } else if ((tcpnext = tcp->tcp_bind_hash) != NULL) {
                        tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
                        tcp->tcp_bind_hash = NULL;
                }
                *tcp->tcp_ptpbhn = tcpnext;
                tcp->tcp_ptpbhn = NULL;
        }
        mutex_exit(lockp);
}

/*
 * Don't let port fall into the privileged range.
 * Since the extra privileged ports can be arbitrary we also
 * ensure that we exclude those from consideration.
 * tcp_g_epriv_ports is not sorted thus we loop over it until
 * there are no changes.
 *
 * Note: No locks are held when inspecting tcp_g_*epriv_ports
 * but instead the code relies on:
 * - the fact that the address of the array and its size never changes
 * - the atomic assignment of the elements of the array
 *
 * Returns 0 if there are no more ports available.
 *
 * TS note: skip multilevel ports.
 */
in_port_t
tcp_update_next_port(in_port_t port, const tcp_t *tcp, boolean_t random)
{
        int i, bump;
        boolean_t restart = B_FALSE;
        tcp_stack_t *tcps = tcp->tcp_tcps;

        if (random && tcp_random_anon_port != 0) {
                (void) random_get_pseudo_bytes((uint8_t *)&port,
                    sizeof (in_port_t));
                /*
                 * Unless changed by a sys admin, the smallest anon port
                 * is 32768 and the largest anon port is 65535.  It is
                 * very likely (50%) for the random port to be smaller
                 * than the smallest anon port.  When that happens,
                 * add port % (anon port range) to the smallest anon
                 * port to get the random port.  It should fall into the
                 * valid anon port range.
                 */
                if ((port < tcps->tcps_smallest_anon_port) ||
                    (port > tcps->tcps_largest_anon_port)) {
                        if (tcps->tcps_smallest_anon_port ==
                            tcps->tcps_largest_anon_port) {
                                bump = 0;
                        } else {
                                bump = port % (tcps->tcps_largest_anon_port -
                                    tcps->tcps_smallest_anon_port);
                        }
                        port = tcps->tcps_smallest_anon_port + bump;
                }
        }

retry:
        if (port < tcps->tcps_smallest_anon_port)
                port = (in_port_t)tcps->tcps_smallest_anon_port;

        if (port > tcps->tcps_largest_anon_port) {
                if (restart)
                        return (0);
                restart = B_TRUE;
                port = (in_port_t)tcps->tcps_smallest_anon_port;
        }

        if (port < tcps->tcps_smallest_nonpriv_port)
                port = (in_port_t)tcps->tcps_smallest_nonpriv_port;

        for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
                if (port == tcps->tcps_g_epriv_ports[i]) {
                        port++;
                        /*
                         * Make sure whether the port is in the
                         * valid range.
                         */
                        goto retry;
                }
        }
        if (is_system_labeled() &&
            (i = tsol_next_port(crgetzone(tcp->tcp_connp->conn_cred), port,
            IPPROTO_TCP, B_TRUE)) != 0) {
                port = i;
                goto retry;
        }
        return (port);
}

/*
 * Return the next anonymous port in the privileged port range for
 * bind checking.  It starts at IPPORT_RESERVED - 1 and goes
 * downwards.  This is the same behavior as documented in the userland
 * library call rresvport(3SOCKET).
 *
 * TS note: skip multilevel ports.
 */
static in_port_t
tcp_get_next_priv_port(const tcp_t *tcp)
{
        static in_port_t next_priv_port = IPPORT_RESERVED - 1;
        in_port_t nextport;
        boolean_t restart = B_FALSE;
        tcp_stack_t *tcps = tcp->tcp_tcps;
retry:
        if (next_priv_port < tcps->tcps_min_anonpriv_port ||
            next_priv_port >= IPPORT_RESERVED) {
                next_priv_port = IPPORT_RESERVED - 1;
                if (restart)
                        return (0);
                restart = B_TRUE;
        }
        if (is_system_labeled() &&
            (nextport = tsol_next_port(crgetzone(tcp->tcp_connp->conn_cred),
            next_priv_port, IPPROTO_TCP, B_FALSE)) != 0) {
                next_priv_port = nextport;
                goto retry;
        }
        return (next_priv_port--);
}

static int
tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
    boolean_t bind_to_req_port_only, cred_t *cr)
{
        in_port_t       mlp_port;
        mlp_type_t      addrtype, mlptype;
        boolean_t       user_specified;
        in_port_t       allocated_port;
        in_port_t       requested_port = *requested_port_ptr;
        conn_t          *connp = tcp->tcp_connp;
        zone_t          *zone;
        tcp_stack_t     *tcps = tcp->tcp_tcps;
        in6_addr_t      v6addr = connp->conn_laddr_v6;

        zone = NULL;
        /*
         * XXX It's up to the caller to specify bind_to_req_port_only or not.
         */
        ASSERT(cr != NULL);

        /*
         * Get a valid port (within the anonymous range and should not
         * be a privileged one) to use if the user has not given a port.
         * If multiple threads are here, they may all start with
         * with the same initial port. But, it should be fine as long as
         * tcp_bindi will ensure that no two threads will be assigned
         * the same port.
         *
         * NOTE: XXX If a privileged process asks for an anonymous port, we
         * still check for ports only in the range > tcp_smallest_non_priv_port,
         * unless TCP_ANONPRIVBIND option is set.
         */
        mlptype = mlptSingle;
        mlp_port = requested_port;
        if (requested_port == 0) {
                requested_port = connp->conn_anon_priv_bind ?
                    tcp_get_next_priv_port(tcp) :
                    tcp_update_next_port(tcps->tcps_next_port_to_try,
                    tcp, B_TRUE);
                if (requested_port == 0) {
                        return (-TNOADDR);
                }
                user_specified = B_FALSE;

                /*
                 * If the user went through one of the RPC interfaces to create
                 * this socket and RPC is MLP in this zone, then give them an
                 * anonymous MLP.
                 */
                if (connp->conn_anon_mlp && is_system_labeled()) {
                        zone = crgetzone(cr);
                        addrtype = tsol_mlp_addr_type(
                            connp->conn_allzones ? ALL_ZONES : zone->zone_id,
                            IPV6_VERSION, &v6addr,
                            tcps->tcps_netstack->netstack_ip);
                        if (addrtype == mlptSingle) {
                                return (-TNOADDR);
                        }
                        mlptype = tsol_mlp_port_type(zone, IPPROTO_TCP,
                            PMAPPORT, addrtype);
                        mlp_port = PMAPPORT;
                }
        } else {
                int i;
                boolean_t priv = B_FALSE;

                /*
                 * If the requested_port is in the well-known privileged range,
                 * verify that the stream was opened by a privileged user.
                 * Note: No locks are held when inspecting tcp_g_*epriv_ports
                 * but instead the code relies on:
                 * - the fact that the address of the array and its size never
                 *   changes
                 * - the atomic assignment of the elements of the array
                 */
                if (requested_port < tcps->tcps_smallest_nonpriv_port) {
                        priv = B_TRUE;
                } else {
                        for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
                                if (requested_port ==
                                    tcps->tcps_g_epriv_ports[i]) {
                                        priv = B_TRUE;
                                        break;
                                }
                        }
                }
                if (priv) {
                        if (secpolicy_net_privaddr(cr, requested_port,
                            IPPROTO_TCP) != 0) {
                                if (connp->conn_debug) {
                                        (void) strlog(TCP_MOD_ID, 0, 1,
                                            SL_ERROR|SL_TRACE,
                                            "tcp_bind: no priv for port %d",
                                            requested_port);
                                }
                                return (-TACCES);
                        }
                }
                user_specified = B_TRUE;

                connp = tcp->tcp_connp;
                if (is_system_labeled()) {
                        zone = crgetzone(cr);
                        addrtype = tsol_mlp_addr_type(
                            connp->conn_allzones ? ALL_ZONES : zone->zone_id,
                            IPV6_VERSION, &v6addr,
                            tcps->tcps_netstack->netstack_ip);
                        if (addrtype == mlptSingle) {
                                return (-TNOADDR);
                        }
                        mlptype = tsol_mlp_port_type(zone, IPPROTO_TCP,
                            requested_port, addrtype);
                }
        }

        if (mlptype != mlptSingle) {
                if (secpolicy_net_bindmlp(cr) != 0) {
                        if (connp->conn_debug) {
                                (void) strlog(TCP_MOD_ID, 0, 1,
                                    SL_ERROR|SL_TRACE,
                                    "tcp_bind: no priv for multilevel port %d",
                                    requested_port);
                        }
                        return (-TACCES);
                }

                /*
                 * If we're specifically binding a shared IP address and the
                 * port is MLP on shared addresses, then check to see if this
                 * zone actually owns the MLP.  Reject if not.
                 */
                if (mlptype == mlptShared && addrtype == mlptShared) {
                        /*
                         * No need to handle exclusive-stack zones since
                         * ALL_ZONES only applies to the shared stack.
                         */
                        zoneid_t mlpzone;

                        mlpzone = tsol_mlp_findzone(IPPROTO_TCP,
                            htons(mlp_port));
                        if (connp->conn_zoneid != mlpzone) {
                                if (connp->conn_debug) {
                                        (void) strlog(TCP_MOD_ID, 0, 1,
                                            SL_ERROR|SL_TRACE,
                                            "tcp_bind: attempt to bind port "
                                            "%d on shared addr in zone %d "
                                            "(should be %d)",
                                            mlp_port, connp->conn_zoneid,
                                            mlpzone);
                                }
                                return (-TACCES);
                        }
                }

                if (!user_specified) {
                        int err;
                        err = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
                            requested_port, B_TRUE);
                        if (err != 0) {
                                if (connp->conn_debug) {
                                        (void) strlog(TCP_MOD_ID, 0, 1,
                                            SL_ERROR|SL_TRACE,
                                            "tcp_bind: cannot establish anon "
                                            "MLP for port %d",
                                            requested_port);
                                }
                                return (err);
                        }
                        connp->conn_anon_port = B_TRUE;
                }
                connp->conn_mlp_type = mlptype;
        }

        allocated_port = tcp_bindi(tcp, requested_port, &v6addr,
            connp->conn_reuseaddr, B_FALSE, bind_to_req_port_only,
            user_specified);

        if (allocated_port == 0) {
                connp->conn_mlp_type = mlptSingle;
                if (connp->conn_anon_port) {
                        connp->conn_anon_port = B_FALSE;
                        (void) tsol_mlp_anon(zone, mlptype, connp->conn_proto,
                            requested_port, B_FALSE);
                }
                if (bind_to_req_port_only) {
                        if (connp->conn_debug) {
                                (void) strlog(TCP_MOD_ID, 0, 1,
                                    SL_ERROR|SL_TRACE,
                                    "tcp_bind: requested addr busy");
                        }
                        return (-TADDRBUSY);
                } else {
                        /* If we are out of ports, fail the bind. */
                        if (connp->conn_debug) {
                                (void) strlog(TCP_MOD_ID, 0, 1,
                                    SL_ERROR|SL_TRACE,
                                    "tcp_bind: out of ports?");
                        }
                        return (-TNOADDR);
                }
        }

        /* Pass the allocated port back */
        *requested_port_ptr = allocated_port;
        return (0);
}

/*
 * Check the address and check/pick a local port number.
 */
int
tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
    boolean_t bind_to_req_port_only)
{
        tcp_t   *tcp = connp->conn_tcp;
        sin_t   *sin;
        sin6_t  *sin6;
        in_port_t       requested_port;
        ipaddr_t        v4addr;
        in6_addr_t      v6addr;
        ip_laddr_t      laddr_type = IPVL_UNICAST_UP;   /* INADDR_ANY */
        zoneid_t        zoneid = IPCL_ZONEID(connp);
        ip_stack_t      *ipst = connp->conn_netstack->netstack_ip;
        uint_t          scopeid = 0;
        int             error = 0;
        ip_xmit_attr_t  *ixa = connp->conn_ixa;

        ASSERT((uintptr_t)len <= (uintptr_t)INT_MAX);

        if (tcp->tcp_state == TCPS_BOUND) {
                return (0);
        } else if (tcp->tcp_state > TCPS_BOUND) {
                if (connp->conn_debug) {
                        (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
                            "tcp_bind: bad state, %d", tcp->tcp_state);
                }
                return (-TOUTSTATE);
        }

        ASSERT(sa != NULL && len != 0);

        if (!OK_32PTR((char *)sa)) {
                if (connp->conn_debug) {
                        (void) strlog(TCP_MOD_ID, 0, 1,
                            SL_ERROR|SL_TRACE,
                            "tcp_bind: bad address parameter, "
                            "address %p, len %d",
                            (void *)sa, len);
                }
                return (-TPROTO);
        }

        error = proto_verify_ip_addr(connp->conn_family, sa, len);
        if (error != 0) {
                return (error);
        }

        switch (len) {
        case sizeof (sin_t):    /* Complete IPv4 address */
                sin = (sin_t *)sa;
                requested_port = ntohs(sin->sin_port);
                v4addr = sin->sin_addr.s_addr;
                IN6_IPADDR_TO_V4MAPPED(v4addr, &v6addr);
                if (v4addr != INADDR_ANY) {
                        laddr_type = ip_laddr_verify_v4(v4addr, zoneid, ipst,
                            B_FALSE);
                }
                break;

        case sizeof (sin6_t): /* Complete IPv6 address */
                sin6 = (sin6_t *)sa;
                v6addr = sin6->sin6_addr;
                requested_port = ntohs(sin6->sin6_port);
                if (IN6_IS_ADDR_V4MAPPED(&v6addr)) {
                        if (connp->conn_ipv6_v6only)
                                return (EADDRNOTAVAIL);

                        IN6_V4MAPPED_TO_IPADDR(&v6addr, v4addr);
                        if (v4addr != INADDR_ANY) {
                                laddr_type = ip_laddr_verify_v4(v4addr,
                                    zoneid, ipst, B_FALSE);
                        }
                } else {
                        if (!IN6_IS_ADDR_UNSPECIFIED(&v6addr)) {
                                if (IN6_IS_ADDR_LINKSCOPE(&v6addr))
                                        scopeid = sin6->sin6_scope_id;
                                laddr_type = ip_laddr_verify_v6(&v6addr,
                                    zoneid, ipst, B_FALSE, scopeid);
                        }
                }
                break;

        default:
                if (connp->conn_debug) {
                        (void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
                            "tcp_bind: bad address length, %d", len);
                }
                return (EAFNOSUPPORT);
                /* return (-TBADADDR); */
        }

        /* Is the local address a valid unicast address? */
        if (laddr_type == IPVL_BAD)
                return (EADDRNOTAVAIL);

        connp->conn_bound_addr_v6 = v6addr;
        if (scopeid != 0) {
                ixa->ixa_flags |= IXAF_SCOPEID_SET;
                ixa->ixa_scopeid = scopeid;
                connp->conn_incoming_ifindex = scopeid;
        } else {
                ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
                connp->conn_incoming_ifindex = connp->conn_bound_if;
        }

        connp->conn_laddr_v6 = v6addr;
        connp->conn_saddr_v6 = v6addr;

        bind_to_req_port_only = requested_port != 0 && bind_to_req_port_only;

        error = tcp_bind_select_lport(tcp, &requested_port,
            bind_to_req_port_only, cr);
        if (error != 0) {
                connp->conn_laddr_v6 = ipv6_all_zeros;
                connp->conn_saddr_v6 = ipv6_all_zeros;
                connp->conn_bound_addr_v6 = ipv6_all_zeros;
        }
        return (error);
}

/*
 * If the "bind_to_req_port_only" parameter is set, if the requested port
 * number is available, return it, If not return 0
 *
 * If "bind_to_req_port_only" parameter is not set and
 * If the requested port number is available, return it.  If not, return
 * the first anonymous port we happen across.  If no anonymous ports are
 * available, return 0. addr is the requested local address, if any.
 *
 * In either case, when succeeding update the tcp_t to record the port number
 * and insert it in the bind hash table.
 *
 * Note that TCP over IPv4 and IPv6 sockets can use the same port number
 * without setting SO_REUSEADDR. This is needed so that they
 * can be viewed as two independent transport protocols.
 */
in_port_t
tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
    int reuseaddr, boolean_t quick_connect,
    boolean_t bind_to_req_port_only, boolean_t user_specified)
{
        /* number of times we have run around the loop */
        int count = 0;
        /* maximum number of times to run around the loop */
        int loopmax;
        conn_t *connp = tcp->tcp_connp;
        tcp_stack_t     *tcps = tcp->tcp_tcps;

        /*
         * Lookup for free addresses is done in a loop and "loopmax"
         * influences how long we spin in the loop
         */
        if (bind_to_req_port_only) {
                /*
                 * If the requested port is busy, don't bother to look
                 * for a new one. Setting loop maximum count to 1 has
                 * that effect.
                 */
                loopmax = 1;
        } else {
                /*
                 * If the requested port is busy, look for a free one
                 * in the anonymous port range.
                 * Set loopmax appropriately so that one does not look
                 * forever in the case all of the anonymous ports are in use.
                 */
                if (connp->conn_anon_priv_bind) {
                        /*
                         * loopmax =
                         *      (IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1
                         */
                        loopmax = IPPORT_RESERVED -
                            tcps->tcps_min_anonpriv_port;
                } else {
                        loopmax = (tcps->tcps_largest_anon_port -
                            tcps->tcps_smallest_anon_port + 1);
                }
        }
        do {
                uint16_t        lport;
                tf_t            *tbf;
                tcp_t           *ltcp;
                conn_t          *lconnp;

                lport = htons(port);

                /*
                 * Ensure that the tcp_t is not currently in the bind hash.
                 * Hold the lock on the hash bucket to ensure that
                 * the duplicate check plus the insertion is an atomic
                 * operation.
                 *
                 * This function does an inline lookup on the bind hash list
                 * Make sure that we access only members of tcp_t
                 * and that we don't look at tcp_tcp, since we are not
                 * doing a CONN_INC_REF.
                 */
                tcp_bind_hash_remove(tcp);
                tbf = &tcps->tcps_bind_fanout[TCP_BIND_HASH(lport)];
                mutex_enter(&tbf->tf_lock);
                for (ltcp = tbf->tf_tcp; ltcp != NULL;
                    ltcp = ltcp->tcp_bind_hash) {
                        if (lport == ltcp->tcp_connp->conn_lport)
                                break;
                }

                for (; ltcp != NULL; ltcp = ltcp->tcp_bind_hash_port) {
                        boolean_t not_socket;
                        boolean_t exclbind;

                        lconnp = ltcp->tcp_connp;

                        /*
                         * On a labeled system, we must treat bindings to ports
                         * on shared IP addresses by sockets with MAC exemption
                         * privilege as being in all zones, as there's
                         * otherwise no way to identify the right receiver.
                         */
                        if (!IPCL_BIND_ZONE_MATCH(lconnp, connp))
                                continue;

                        /*
                         * allow multiple interface-specific binds to coexist.
                         */
                        if (connp->conn_incoming_ifindex !=
                            lconnp->conn_incoming_ifindex) {
                                if ((connp->conn_incoming_ifindex != 0) &&
                                    (lconnp->conn_incoming_ifindex != 0))
                                        continue;
                        }

                        /*
                         * If TCP_EXCLBIND is set for either the bound or
                         * binding endpoint, the semantics of bind
                         * is changed according to the following.
                         *
                         * spec = specified address (v4 or v6)
                         * unspec = unspecified address (v4 or v6)
                         * A = specified addresses are different for endpoints
                         *
                         * bound        bind to         allowed
                         * -------------------------------------
                         * unspec       unspec          no
                         * unspec       spec            no
                         * spec         unspec          no
                         * spec         spec            yes if A
                         *
                         * For labeled systems, SO_MAC_EXEMPT behaves the same
                         * as TCP_EXCLBIND, except that zoneid is ignored.
                         *
                         * Note:
                         *
                         * 1. Because of TLI semantics, an endpoint can go
                         * back from, say TCP_ESTABLISHED to TCPS_LISTEN or
                         * TCPS_BOUND, depending on whether it is originally
                         * a listener or not.  That is why we need to check
                         * for states greater than or equal to TCPS_BOUND
                         * here.
                         *
                         * 2. Ideally, we should only check for state equals
                         * to TCPS_LISTEN. And the following check should be
                         * added.
                         *
                         * if (ltcp->tcp_state == TCPS_LISTEN ||
                         *      !reuseaddr || !lconnp->conn_reuseaddr) {
                         *              ...
                         * }
                         *
                         * The semantics will be changed to this.  If the
                         * endpoint on the list is in state not equal to
                         * TCPS_LISTEN and both endpoints have SO_REUSEADDR
                         * set, let the bind succeed.
                         *
                         * Because of (1), we cannot do that for TLI
                         * endpoints.  But we can do that for socket endpoints.
                         * If in future, we can change this going back
                         * semantics, we can use the above check for TLI also.
                         */
                        not_socket = !(TCP_IS_SOCKET(ltcp) &&
                            TCP_IS_SOCKET(tcp));
                        exclbind = lconnp->conn_exclbind ||
                            connp->conn_exclbind;

                        if ((lconnp->conn_mac_mode != CONN_MAC_DEFAULT) ||
                            (connp->conn_mac_mode != CONN_MAC_DEFAULT) ||
                            (exclbind && (not_socket ||
                            ltcp->tcp_state <= TCPS_ESTABLISHED))) {
                                if (V6_OR_V4_INADDR_ANY(
                                    lconnp->conn_bound_addr_v6) ||
                                    V6_OR_V4_INADDR_ANY(*laddr) ||
                                    IN6_ARE_ADDR_EQUAL(laddr,
                                    &lconnp->conn_bound_addr_v6)) {
                                        break;
                                }
                                continue;
                        }

                        /*
                         * Check ipversion to allow IPv4 and IPv6 sockets to
                         * have disjoint port number spaces, if *_EXCLBIND
                         * is not set and only if the application binds to a
                         * specific port. We use the same autoassigned port
                         * number space for IPv4 and IPv6 sockets.
                         */
                        if (connp->conn_ipversion != lconnp->conn_ipversion &&
                            bind_to_req_port_only)
                                continue;

                        /*
                         * Ideally, we should make sure that the source
                         * address, remote address, and remote port in the
                         * four tuple for this tcp-connection is unique.
                         * However, trying to find out the local source
                         * address would require too much code duplication
                         * with IP, since IP needs needs to have that code
                         * to support userland TCP implementations.
                         */
                        if (quick_connect &&
                            (ltcp->tcp_state > TCPS_LISTEN) &&
                            ((connp->conn_fport != lconnp->conn_fport) ||
                            !IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
                            &lconnp->conn_faddr_v6)))
                                continue;

                        if (!reuseaddr) {
                                /*
                                 * No socket option SO_REUSEADDR.
                                 * If existing port is bound to
                                 * a non-wildcard IP address
                                 * and the requesting stream is
                                 * bound to a distinct
                                 * different IP addresses
                                 * (non-wildcard, also), keep
                                 * going.
                                 */
                                if (!V6_OR_V4_INADDR_ANY(*laddr) &&
                                    !V6_OR_V4_INADDR_ANY(
                                    lconnp->conn_bound_addr_v6) &&
                                    !IN6_ARE_ADDR_EQUAL(laddr,
                                    &lconnp->conn_bound_addr_v6))
                                        continue;
                                if (ltcp->tcp_state >= TCPS_BOUND) {
                                        /*
                                         * This port is being used and
                                         * its state is >= TCPS_BOUND,
                                         * so we can't bind to it.
                                         */
                                        break;
                                }
                        } else {
                                /*
                                 * socket option SO_REUSEADDR is set on the
                                 * binding tcp_t.
                                 *
                                 * If two streams are bound to
                                 * same IP address or both addr
                                 * and bound source are wildcards
                                 * (INADDR_ANY), we want to stop
                                 * searching.
                                 * We have found a match of IP source
                                 * address and source port, which is
                                 * refused regardless of the
                                 * SO_REUSEADDR setting, so we break.
                                 */
                                if (IN6_ARE_ADDR_EQUAL(laddr,
                                    &lconnp->conn_bound_addr_v6) &&
                                    (ltcp->tcp_state == TCPS_LISTEN ||
                                    ltcp->tcp_state == TCPS_BOUND))
                                        break;
                        }
                }
                if (ltcp != NULL) {
                        /* The port number is busy */
                        mutex_exit(&tbf->tf_lock);
                } else {
                        /*
                         * This port is ours. Insert in fanout and mark as
                         * bound to prevent others from getting the port
                         * number.
                         */
                        tcp->tcp_state = TCPS_BOUND;
                        DTRACE_TCP6(state__change, void, NULL,
                            ip_xmit_attr_t *, connp->conn_ixa,
                            void, NULL, tcp_t *, tcp, void, NULL,
                            int32_t, TCPS_IDLE);

                        connp->conn_lport = htons(port);

                        ASSERT(&tcps->tcps_bind_fanout[TCP_BIND_HASH(
                            connp->conn_lport)] == tbf);
                        tcp_bind_hash_insert(tbf, tcp, 1);

                        mutex_exit(&tbf->tf_lock);

                        /*
                         * We don't want tcp_next_port_to_try to "inherit"
                         * a port number supplied by the user in a bind.
                         */
                        if (user_specified)
                                return (port);

                        /*
                         * This is the only place where tcp_next_port_to_try
                         * is updated. After the update, it may or may not
                         * be in the valid range.
                         */
                        if (!connp->conn_anon_priv_bind)
                                tcps->tcps_next_port_to_try = port + 1;
                        return (port);
                }

                if (connp->conn_anon_priv_bind) {
                        port = tcp_get_next_priv_port(tcp);
                } else {
                        if (count == 0 && user_specified) {
                                /*
                                 * We may have to return an anonymous port. So
                                 * get one to start with.
                                 */
                                port =
                                    tcp_update_next_port(
                                    tcps->tcps_next_port_to_try,
                                    tcp, B_TRUE);
                                user_specified = B_FALSE;
                        } else {
                                port = tcp_update_next_port(port + 1, tcp,
                                    B_FALSE);
                        }
                }
                if (port == 0)
                        break;

                /*
                 * Don't let this loop run forever in the case where
                 * all of the anonymous ports are in use.
                 */
        } while (++count < loopmax);
        return (0);
}