#include <sys/types.h>
#include <sys/stream.h>
#include <sys/strsun.h>
#include <sys/strsubr.h>
#include <sys/stropts.h>
#include <sys/strlog.h>
#define _SUN_TPI_VERSION 2
#include <sys/tihdr.h>
#include <sys/timod.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/suntpi.h>
#include <sys/xti_inet.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/sdt.h>
#include <sys/vtrace.h>
#include <sys/kmem.h>
#include <sys/ethernet.h>
#include <sys/cpuvar.h>
#include <sys/dlpi.h>
#include <sys/pattr.h>
#include <sys/policy.h>
#include <sys/priv.h>
#include <sys/zone.h>
#include <sys/sunldi.h>
#include <sys/errno.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/isa_defs.h>
#include <sys/md5.h>
#include <sys/random.h>
#include <sys/uio.h>
#include <sys/systm.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <net/if.h>
#include <net/route.h>
#include <inet/ipsec_impl.h>
#include <inet/tcp_sig.h>
#include <inet/common.h>
#include <inet/cc.h>
#include <inet/ip.h>
#include <inet/ip_impl.h>
#include <inet/ip6.h>
#include <inet/ip_ndp.h>
#include <inet/proto_set.h>
#include <inet/mib2.h>
#include <inet/optcom.h>
#include <inet/snmpcom.h>
#include <inet/kstatcom.h>
#include <inet/tcp.h>
#include <inet/tcp_impl.h>
#include <inet/tcp_cluster.h>
#include <inet/udp_impl.h>
#include <net/pfkeyv2.h>
#include <inet/ipdrop.h>
#include <inet/ipclassifier.h>
#include <inet/ip_ire.h>
#include <inet/ip_ftable.h>
#include <inet/ip_if.h>
#include <inet/ipp_common.h>
#include <inet/ip_rts.h>
#include <inet/ip_netinfo.h>
#include <sys/squeue_impl.h>
#include <sys/squeue.h>
#include <sys/tsol/label.h>
#include <sys/tsol/tnet.h>
#include <rpc/pmap_prot.h>
#include <sys/callo.h>
int tcp_squeue_wput = 2;
int tcp_squeue_flag;
uint_t tcp_free_list_max_cnt = 0;
#define TIDUSZ 4096
#define TCP_ACCEPTOR_FANOUT_SIZE 512
#ifdef _ILP32
#define TCP_ACCEPTOR_HASH(accid) \
(((uint_t)(accid) >> 8) & (TCP_ACCEPTOR_FANOUT_SIZE - 1))
#else
#define TCP_ACCEPTOR_HASH(accid) \
((uint_t)(accid) & (TCP_ACCEPTOR_FANOUT_SIZE - 1))
#endif
static uint32_t tcp_min_conn_listener = 2;
uint32_t tcp_early_abort = 30;
typedef struct tcpt_s {
pfv_t tcpt_pfv;
tcp_t *tcpt_tcp;
} tcpt_t;
void tcp_input_data(void *arg, mblk_t *mp, void *arg2,
ip_recv_attr_t *ira);
static void tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2,
ip_recv_attr_t *dummy);
static void tcp_random_init(void);
int tcp_random(void);
static int tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp,
in_port_t dstport, uint_t srcid);
static int tcp_connect_ipv6(tcp_t *tcp, in6_addr_t *dstaddrp,
in_port_t dstport, uint32_t flowinfo,
uint_t srcid, uint32_t scope_id);
static void tcp_iss_init(tcp_t *tcp);
static void tcp_reinit(tcp_t *tcp);
static void tcp_reinit_values(tcp_t *tcp);
static int tcp_wsrv(queue_t *q);
static void tcp_update_lso(tcp_t *tcp, ip_xmit_attr_t *ixa);
static void tcp_update_zcopy(tcp_t *tcp);
static void tcp_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t,
ixa_notify_arg_t);
static void *tcp_stack_init(netstackid_t stackid, netstack_t *ns);
static void tcp_stack_fini(netstackid_t stackid, void *arg);
static int tcp_squeue_switch(int);
static int tcp_open(queue_t *, dev_t *, int, int, cred_t *, boolean_t);
static int tcp_openv4(queue_t *, dev_t *, int, int, cred_t *);
static int tcp_openv6(queue_t *, dev_t *, int, int, cred_t *);
static void tcp_squeue_add(squeue_t *);
struct module_info tcp_rinfo = {
TCP_MOD_ID, TCP_MOD_NAME, 0, INFPSZ, TCP_RECV_HIWATER, TCP_RECV_LOWATER
};
static struct module_info tcp_winfo = {
TCP_MOD_ID, TCP_MOD_NAME, 0, INFPSZ, 127, 16
};
struct qinit tcp_rinitv4 = {
NULL, tcp_rsrv, tcp_openv4, tcp_tpi_close, NULL, &tcp_rinfo
};
struct qinit tcp_rinitv6 = {
NULL, tcp_rsrv, tcp_openv6, tcp_tpi_close, NULL, &tcp_rinfo
};
struct qinit tcp_winit = {
tcp_wput, tcp_wsrv, NULL, NULL, NULL, &tcp_winfo
};
struct qinit tcp_sock_winit = {
tcp_wput_sock, tcp_wsrv, NULL, NULL, NULL, &tcp_winfo
};
struct qinit tcp_fallback_sock_winit = {
tcp_wput_fallback, NULL, NULL, NULL, NULL, &tcp_winfo
};
struct qinit tcp_acceptor_rinit = {
NULL, tcp_rsrv, NULL, tcp_tpi_close_accept, NULL, &tcp_winfo
};
struct qinit tcp_acceptor_winit = {
tcp_tpi_accept, NULL, NULL, NULL, NULL, &tcp_winfo
};
struct streamtab tcpinfov4 = {
&tcp_rinitv4, &tcp_winit
};
struct streamtab tcpinfov6 = {
&tcp_rinitv6, &tcp_winit
};
#define ROUNDUP32(x) \
(((x) + (sizeof (int32_t) - 1)) & ~(sizeof (int32_t) - 1))
struct T_info_ack tcp_g_t_info_ack = {
T_INFO_ACK,
0,
T_INFINITE,
T_INVALID,
T_INVALID,
sizeof (sin_t),
0,
TIDUSZ,
T_COTS_ORD,
TCPS_IDLE,
(XPG4_1|EXPINLINE)
};
struct T_info_ack tcp_g_t_info_ack_v6 = {
T_INFO_ACK,
0,
T_INFINITE,
T_INVALID,
T_INVALID,
sizeof (sin6_t),
0,
TIDUSZ,
T_COTS_ORD,
TCPS_IDLE,
(XPG4_1|EXPINLINE)
};
extern mod_prop_info_t tcp_propinfo_tbl[];
extern int tcp_propinfo_count;
#define IS_VMLOANED_MBLK(mp) \
(((mp)->b_datap->db_struioflag & STRUIO_ZC) != 0)
uint32_t do_tcpzcopy = 1;
boolean_t tcp_static_maxpsz = B_FALSE;
static void
tcp_set_recv_threshold(tcp_t *tcp, uint32_t new_rcvthresh)
{
uint32_t default_threshold = SOCKET_RECVHIWATER >> 3;
if (IPCL_IS_NONSTR(tcp->tcp_connp)) {
conn_t *connp = tcp->tcp_connp;
struct sock_proto_props sopp;
if (new_rcvthresh > default_threshold)
new_rcvthresh = default_threshold;
sopp.sopp_flags = SOCKOPT_RCVTHRESH;
sopp.sopp_rcvthresh = new_rcvthresh;
(*connp->conn_upcalls->su_set_proto_props)
(connp->conn_upper_handle, &sopp);
}
}
void
tcp_set_ws_value(tcp_t *tcp)
{
int i;
uint32_t rwnd = tcp->tcp_rwnd;
for (i = 0; rwnd > TCP_MAXWIN && i < TCP_MAX_WINSHIFT;
i++, rwnd >>= 1)
;
tcp->tcp_rcv_ws = i;
}
void
tcp_ipsec_cleanup(tcp_t *tcp)
{
conn_t *connp = tcp->tcp_connp;
ASSERT(connp->conn_flags & IPCL_TCPCONN);
if (connp->conn_latch != NULL) {
IPLATCH_REFRELE(connp->conn_latch);
connp->conn_latch = NULL;
}
if (connp->conn_latch_in_policy != NULL) {
IPPOL_REFRELE(connp->conn_latch_in_policy);
connp->conn_latch_in_policy = NULL;
}
if (connp->conn_latch_in_action != NULL) {
IPACT_REFRELE(connp->conn_latch_in_action);
connp->conn_latch_in_action = NULL;
}
if (connp->conn_policy != NULL) {
IPPH_REFRELE(connp->conn_policy, connp->conn_netstack);
connp->conn_policy = NULL;
}
}
void
tcp_cleanup(tcp_t *tcp)
{
mblk_t *mp;
conn_t *connp = tcp->tcp_connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
netstack_t *ns = tcps->tcps_netstack;
mblk_t *tcp_rsrv_mp;
tcp_bind_hash_remove(tcp);
tcp_ipsec_cleanup(tcp);
ixa_cleanup(connp->conn_ixa);
if (connp->conn_ht_iphc != NULL) {
kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
connp->conn_ht_iphc = NULL;
connp->conn_ht_iphc_allocated = 0;
connp->conn_ht_iphc_len = 0;
connp->conn_ht_ulp = NULL;
connp->conn_ht_ulp_len = 0;
tcp->tcp_ipha = NULL;
tcp->tcp_ip6h = NULL;
tcp->tcp_tcpha = NULL;
}
ip_pkt_free(&connp->conn_xmit_ipp);
tcp_free(tcp);
ipcl_globalhash_remove(connp);
mp = tcp->tcp_timercache;
tcp_rsrv_mp = tcp->tcp_rsrv_mp;
if (connp->conn_cred != NULL) {
crfree(connp->conn_cred);
connp->conn_cred = NULL;
}
ipcl_conn_cleanup(connp);
connp->conn_flags = IPCL_TCPCONN;
connp->conn_netstack = NULL;
connp->conn_ixa->ixa_ipst = NULL;
netstack_rele(ns);
ASSERT(tcps != NULL);
tcp->tcp_tcps = NULL;
bzero(tcp, sizeof (tcp_t));
tcp->tcp_timercache = mp;
tcp->tcp_rsrv_mp = tcp_rsrv_mp;
tcp->tcp_connp = connp;
ASSERT(connp->conn_tcp == tcp);
ASSERT(connp->conn_flags & IPCL_TCPCONN);
connp->conn_state_flags = CONN_INCIPIENT;
ASSERT(connp->conn_proto == IPPROTO_TCP);
ASSERT(connp->conn_ref == 1);
}
int
tcp_set_destination(tcp_t *tcp)
{
uint32_t mss_max;
uint32_t mss;
boolean_t tcp_detached = TCP_IS_DETACHED(tcp);
conn_t *connp = tcp->tcp_connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
iulp_t uinfo;
int error;
uint32_t flags;
flags = IPDF_LSO | IPDF_ZCOPY;
flags |= IPDF_UNIQUE_DCE;
if (!tcps->tcps_ignore_path_mtu)
connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
mutex_enter(&connp->conn_lock);
error = conn_connect(connp, &uinfo, flags);
mutex_exit(&connp->conn_lock);
if (error != 0)
return (error);
error = tcp_build_hdrs(tcp);
if (error != 0)
return (error);
tcp->tcp_localnet = uinfo.iulp_localnet;
if (uinfo.iulp_rtt != 0) {
tcp->tcp_rtt_sa = MSEC2NSEC(uinfo.iulp_rtt);
tcp->tcp_rtt_sd = MSEC2NSEC(uinfo.iulp_rtt_sd);
tcp->tcp_rto = tcp_calculate_rto(tcp, tcps, 0);
}
if (uinfo.iulp_ssthresh != 0)
tcp->tcp_cwnd_ssthresh = uinfo.iulp_ssthresh;
else
tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN;
if (uinfo.iulp_spipe > 0) {
connp->conn_sndbuf = MIN(uinfo.iulp_spipe,
tcps->tcps_max_buf);
if (tcps->tcps_snd_lowat_fraction != 0) {
connp->conn_sndlowat = connp->conn_sndbuf /
tcps->tcps_snd_lowat_fraction;
}
(void) tcp_maxpsz_set(tcp, B_TRUE);
}
if (uinfo.iulp_rpipe > 0) {
tcp->tcp_rwnd = MIN(uinfo.iulp_rpipe,
tcps->tcps_max_buf);
}
if (uinfo.iulp_rtomax > 0) {
tcp->tcp_second_timer_threshold =
uinfo.iulp_rtomax;
}
if (!tcp_detached) {
if (uinfo.iulp_tstamp_ok)
tcp->tcp_snd_ts_ok = B_TRUE;
if (uinfo.iulp_wscale_ok)
tcp->tcp_snd_ws_ok = B_TRUE;
if (uinfo.iulp_sack == 2)
tcp->tcp_snd_sack_ok = B_TRUE;
if (uinfo.iulp_ecn_ok)
tcp->tcp_ecn_ok = B_TRUE;
} else {
if (uinfo.iulp_sack > 0) {
tcp->tcp_snd_sack_ok = B_TRUE;
}
}
ASSERT(uinfo.iulp_mtu != 0);
mss = tcp->tcp_initial_pmtu = uinfo.iulp_mtu;
if (connp->conn_ipversion == IPV4_VERSION)
mss_max = tcps->tcps_mss_max_ipv4;
else
mss_max = tcps->tcps_mss_max_ipv6;
if (tcp->tcp_ipsec_overhead == 0)
tcp->tcp_ipsec_overhead = conn_ipsec_length(connp);
mss -= tcp->tcp_ipsec_overhead;
if (mss < tcps->tcps_mss_min)
mss = tcps->tcps_mss_min;
if (mss > mss_max)
mss = mss_max;
tcp->tcp_mss = mss;
tcp_update_lso(tcp, connp->conn_ixa);
tcp_iss_init(tcp);
tcp->tcp_loopback = (uinfo.iulp_loopback | uinfo.iulp_local);
mutex_enter(&connp->conn_lock);
connp->conn_state_flags &= ~CONN_INCIPIENT;
mutex_exit(&connp->conn_lock);
return (0);
}
void
tcp_clean_death_wrapper(void *arg, mblk_t *mp, void *arg2,
ip_recv_attr_t *dummy)
{
tcp_t *tcp = ((conn_t *)arg)->conn_tcp;
freemsg(mp);
if (tcp->tcp_state > TCPS_BOUND)
(void) tcp_clean_death(((conn_t *)arg)->conn_tcp, ETIMEDOUT);
}
int
tcp_clean_death(tcp_t *tcp, int err)
{
mblk_t *mp;
queue_t *q;
conn_t *connp = tcp->tcp_connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
if (tcp->tcp_fused)
tcp_unfuse(tcp);
if (tcp->tcp_linger_tid != 0 &&
TCP_TIMER_CANCEL(tcp, tcp->tcp_linger_tid) >= 0) {
tcp_stop_lingering(tcp);
}
ASSERT(tcp != NULL);
ASSERT((connp->conn_family == AF_INET &&
connp->conn_ipversion == IPV4_VERSION) ||
(connp->conn_family == AF_INET6 &&
(connp->conn_ipversion == IPV4_VERSION ||
connp->conn_ipversion == IPV6_VERSION)));
if (TCP_IS_DETACHED(tcp)) {
if (tcp->tcp_hard_binding) {
tcp_closei_local(tcp);
if (!tcp->tcp_tconnind_started) {
CONN_DEC_REF(connp);
} else {
tcp->tcp_state = TCPS_BOUND;
DTRACE_TCP6(state__change, void, NULL,
ip_xmit_attr_t *, connp->conn_ixa,
void, NULL, tcp_t *, tcp, void, NULL,
int32_t, TCPS_CLOSED);
}
} else {
tcp_close_detached(tcp);
}
return (0);
}
TCP_STAT(tcps, tcp_clean_death_nondetached);
if (tcp->tcp_listen_cnt != NULL)
TCP_DECR_LISTEN_CNT(tcp);
if (tcp->tcp_state >= TCPS_ESTABLISHED &&
tcp->tcp_state < TCPS_TIME_WAIT) {
TCPS_CONN_DEC(tcps);
}
q = connp->conn_rq;
if (!IPCL_IS_NONSTR(connp)) {
ASSERT(q != NULL);
flushq(q, FLUSHALL);
}
if ((tcp->tcp_state >= TCPS_SYN_SENT) && err) {
if (tcp->tcp_state >= TCPS_ESTABLISHED &&
!TCP_IS_SOCKET(tcp)) {
(void) putnextctl1(q, M_FLUSH, FLUSHR);
}
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1, SL_TRACE|SL_ERROR,
"tcp_clean_death: discon err %d", err);
}
if (IPCL_IS_NONSTR(connp)) {
(*connp->conn_upcalls->su_disconnected)(
connp->conn_upper_handle, tcp->tcp_connid, err);
} else {
mp = mi_tpi_discon_ind(NULL, err, 0);
if (mp != NULL) {
putnext(q, mp);
} else {
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1,
SL_ERROR|SL_TRACE,
"tcp_clean_death, sending M_ERROR");
}
(void) putnextctl1(q, M_ERROR, EPROTO);
}
}
if (tcp->tcp_state <= TCPS_SYN_RCVD) {
TCPS_BUMP_MIB(tcps, tcpAttemptFails);
} else if (tcp->tcp_state <= TCPS_CLOSE_WAIT) {
TCPS_BUMP_MIB(tcps, tcpEstabResets);
}
}
if (tcp->tcp_listener != NULL && IPCL_IS_NONSTR(connp)) {
tcp_closei_local(tcp);
tcp->tcp_state = TCPS_BOUND;
DTRACE_TCP6(state__change, void, NULL, ip_xmit_attr_t *,
connp->conn_ixa, void, NULL, tcp_t *, tcp, void, NULL,
int32_t, TCPS_CLOSED);
return (0);
}
tcp_reinit(tcp);
if (IPCL_IS_NONSTR(connp))
(void) tcp_do_unbind(connp);
return (-1);
}
void
tcp_stop_lingering(tcp_t *tcp)
{
clock_t delta = 0;
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
tcp->tcp_linger_tid = 0;
if (tcp->tcp_state > TCPS_LISTEN) {
tcp_acceptor_hash_remove(tcp);
mutex_enter(&tcp->tcp_non_sq_lock);
if (tcp->tcp_flow_stopped) {
tcp_clrqfull(tcp);
}
mutex_exit(&tcp->tcp_non_sq_lock);
if (tcp->tcp_timer_tid != 0) {
delta = TCP_TIMER_CANCEL(tcp, tcp->tcp_timer_tid);
tcp->tcp_timer_tid = 0;
}
tcp_timers_stop(tcp);
tcp->tcp_detached = B_TRUE;
connp->conn_rq = NULL;
connp->conn_wq = NULL;
if (tcp->tcp_state == TCPS_TIME_WAIT) {
tcp_time_wait_append(tcp);
TCP_DBGSTAT(tcps, tcp_detach_time_wait);
goto finish;
}
if (delta >= 0) {
tcp->tcp_timer_tid = TCP_TIMER(tcp, tcp_timer,
delta ? delta : 1);
}
} else {
tcp_closei_local(tcp);
CONN_DEC_REF(connp);
}
finish:
tcp->tcp_detached = B_TRUE;
connp->conn_rq = NULL;
connp->conn_wq = NULL;
mutex_enter(&tcp->tcp_closelock);
tcp->tcp_closed = 1;
cv_signal(&tcp->tcp_closecv);
mutex_exit(&tcp->tcp_closelock);
if (IPCL_IS_NONSTR(connp)) {
sock_upcalls_t *upcalls = connp->conn_upcalls;
sock_upper_handle_t handle = connp->conn_upper_handle;
ASSERT(upcalls != NULL);
ASSERT(upcalls->su_closed != NULL);
ASSERT(handle != NULL);
mutex_enter(&connp->conn_lock);
connp->conn_upper_handle = NULL;
connp->conn_upcalls = NULL;
mutex_exit(&connp->conn_lock);
upcalls->su_closed(handle);
}
}
void
tcp_close_common(conn_t *connp, int flags)
{
tcp_t *tcp = connp->conn_tcp;
mblk_t *mp = &tcp->tcp_closemp;
boolean_t conn_ioctl_cleanup_reqd = B_FALSE;
mblk_t *bp;
ASSERT(connp->conn_ref >= 2);
mutex_enter(&connp->conn_lock);
connp->conn_state_flags |= CONN_CLOSING;
if (connp->conn_oper_pending_ill != NULL)
conn_ioctl_cleanup_reqd = B_TRUE;
CONN_INC_REF_LOCKED(connp);
mutex_exit(&connp->conn_lock);
tcp->tcp_closeflags = (uint8_t)flags;
ASSERT(connp->conn_ref >= 3);
if (mp->b_prev == NULL)
tcp->tcp_closemp_used = B_TRUE;
else
cmn_err(CE_PANIC, "tcp_close: concurrent use of tcp_closemp: "
"connp %p tcp %p\n", (void *)connp, (void *)tcp);
TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
if (conn_ioctl_cleanup_reqd)
conn_ioctl_cleanup(connp);
mutex_enter(&connp->conn_lock);
while (connp->conn_ioctlref > 0)
cv_wait(&connp->conn_cv, &connp->conn_lock);
ASSERT(connp->conn_ioctlref == 0);
ASSERT(connp->conn_oper_pending_ill == NULL);
mutex_exit(&connp->conn_lock);
SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_close_output, connp,
NULL, tcp_squeue_flag, SQTAG_IP_TCP_CLOSE);
if (IPCL_IS_NONSTR(connp) && connp->conn_linger == 0)
goto nowait;
mutex_enter(&tcp->tcp_closelock);
while (!tcp->tcp_closed) {
if (!cv_wait_sig(&tcp->tcp_closecv, &tcp->tcp_closelock)) {
if (connp->conn_linger && connp->conn_lingertime > 0) {
mutex_exit(&tcp->tcp_closelock);
CONN_INC_REF(connp);
bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL);
SQUEUE_ENTER_ONE(connp->conn_sqp, bp,
tcp_linger_interrupted, connp, NULL,
tcp_squeue_flag, SQTAG_IP_TCP_CLOSE);
mutex_enter(&tcp->tcp_closelock);
}
break;
}
}
while (!tcp->tcp_closed)
cv_wait(&tcp->tcp_closecv, &tcp->tcp_closelock);
mutex_exit(&tcp->tcp_closelock);
if (tcp->tcp_wait_for_eagers && !IPCL_IS_NONSTR(connp)) {
mutex_enter(&connp->conn_lock);
while (connp->conn_ref != 1) {
cv_wait(&connp->conn_cv, &connp->conn_lock);
}
mutex_exit(&connp->conn_lock);
}
nowait:
connp->conn_cpid = NOPID;
}
static void
tcp_linger_interrupted(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *dummy)
{
conn_t *connp = (conn_t *)arg;
tcp_t *tcp = connp->conn_tcp;
freeb(mp);
if (tcp->tcp_linger_tid != 0 &&
TCP_TIMER_CANCEL(tcp, tcp->tcp_linger_tid) >= 0) {
tcp_stop_lingering(tcp);
tcp->tcp_client_errno = EINTR;
}
}
void
tcp_close_mpp(mblk_t **mpp)
{
mblk_t *mp;
if ((mp = *mpp) != NULL) {
do {
mp->b_next = NULL;
mp->b_prev = NULL;
} while ((mp = mp->b_cont) != NULL);
mp = *mpp;
*mpp = NULL;
freemsg(mp);
}
}
void
tcp_close_detached(tcp_t *tcp)
{
if (tcp->tcp_fused)
tcp_unfuse(tcp);
tcp_closei_local(tcp);
CONN_DEC_REF(tcp->tcp_connp);
}
void
tcp_closei_local(tcp_t *tcp)
{
conn_t *connp = tcp->tcp_connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
int32_t oldstate;
if (!TCP_IS_SOCKET(tcp))
tcp_acceptor_hash_remove(tcp);
if (tcp->tcp_state >= TCPS_ESTABLISHED &&
tcp->tcp_state < TCPS_TIME_WAIT) {
TCPS_CONN_DEC(tcps);
}
if (tcp->tcp_listener != NULL) {
tcp_t *listener = tcp->tcp_listener;
mutex_enter(&listener->tcp_eager_lock);
if (!tcp->tcp_tconnind_started) {
tcp_eager_unlink(tcp);
mutex_exit(&listener->tcp_eager_lock);
ASSERT(tcp->tcp_detached);
connp->conn_rq = NULL;
connp->conn_wq = NULL;
CONN_DEC_REF(listener->tcp_connp);
} else {
mutex_exit(&listener->tcp_eager_lock);
}
}
tcp_timers_stop(tcp);
if (tcp->tcp_state == TCPS_LISTEN) {
if (tcp->tcp_ip_addr_cache) {
kmem_free((void *)tcp->tcp_ip_addr_cache,
IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t));
tcp->tcp_ip_addr_cache = NULL;
}
}
if (tcp->tcp_listen_cnt != NULL)
TCP_DECR_LISTEN_CNT(tcp);
mutex_enter(&tcp->tcp_non_sq_lock);
if (tcp->tcp_flow_stopped)
tcp_clrqfull(tcp);
mutex_exit(&tcp->tcp_non_sq_lock);
tcp_bind_hash_remove(tcp);
if (tcp->tcp_state == TCPS_TIME_WAIT)
(void) tcp_time_wait_remove(tcp, NULL);
CL_INET_DISCONNECT(connp);
ipcl_hash_remove(connp);
oldstate = tcp->tcp_state;
tcp->tcp_state = TCPS_CLOSED;
DTRACE_TCP6(state__change, void, NULL, ip_xmit_attr_t *,
connp->conn_ixa, void, NULL, tcp_t *, tcp, void, NULL,
int32_t, oldstate);
ixa_cleanup(connp->conn_ixa);
mutex_enter(&connp->conn_lock);
connp->conn_state_flags |= CONN_CONDEMNED;
mutex_exit(&connp->conn_lock);
ASSERT(tcp->tcp_time_wait_next == NULL);
ASSERT(tcp->tcp_time_wait_prev == NULL);
ASSERT(tcp->tcp_time_wait_expire == 0);
tcp_ipsec_cleanup(tcp);
}
void
tcp_free(tcp_t *tcp)
{
mblk_t *mp;
conn_t *connp = tcp->tcp_connp;
ASSERT(tcp != NULL);
ASSERT(tcp->tcp_ptpahn == NULL && tcp->tcp_acceptor_hash == NULL);
connp->conn_rq = NULL;
connp->conn_wq = NULL;
tcp_close_mpp(&tcp->tcp_xmit_head);
tcp_close_mpp(&tcp->tcp_reass_head);
if (tcp->tcp_rcv_list != NULL) {
tcp_close_mpp(&tcp->tcp_rcv_list);
}
if ((mp = tcp->tcp_urp_mp) != NULL) {
freemsg(mp);
}
if ((mp = tcp->tcp_urp_mark_mp) != NULL) {
freemsg(mp);
}
if (tcp->tcp_fused_sigurg_mp != NULL) {
ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
freeb(tcp->tcp_fused_sigurg_mp);
tcp->tcp_fused_sigurg_mp = NULL;
}
if (tcp->tcp_ordrel_mp != NULL) {
ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
freeb(tcp->tcp_ordrel_mp);
tcp->tcp_ordrel_mp = NULL;
}
TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, tcp);
bzero(&tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
if (tcp->tcp_hopopts != NULL) {
mi_free(tcp->tcp_hopopts);
tcp->tcp_hopopts = NULL;
tcp->tcp_hopoptslen = 0;
}
ASSERT(tcp->tcp_hopoptslen == 0);
if (tcp->tcp_dstopts != NULL) {
mi_free(tcp->tcp_dstopts);
tcp->tcp_dstopts = NULL;
tcp->tcp_dstoptslen = 0;
}
ASSERT(tcp->tcp_dstoptslen == 0);
if (tcp->tcp_rthdrdstopts != NULL) {
mi_free(tcp->tcp_rthdrdstopts);
tcp->tcp_rthdrdstopts = NULL;
tcp->tcp_rthdrdstoptslen = 0;
}
ASSERT(tcp->tcp_rthdrdstoptslen == 0);
if (tcp->tcp_rthdr != NULL) {
mi_free(tcp->tcp_rthdr);
tcp->tcp_rthdr = NULL;
tcp->tcp_rthdrlen = 0;
}
ASSERT(tcp->tcp_rthdrlen == 0);
tcp_close_mpp(&tcp->tcp_conn.tcp_eager_conn_ind);
if (tcp->tcp_sig_sa_in != NULL) {
tcpsig_sa_rele(tcp->tcp_sig_sa_in);
tcp->tcp_sig_sa_in = NULL;
}
if (tcp->tcp_sig_sa_out != NULL) {
tcpsig_sa_rele(tcp->tcp_sig_sa_out);
tcp->tcp_sig_sa_out = NULL;
}
if (tcp->tcp_cc_algo != NULL && tcp->tcp_cc_algo->cb_destroy != NULL)
tcp->tcp_cc_algo->cb_destroy(&tcp->tcp_ccv);
if (connp->conn_upper_handle != NULL) {
sock_upcalls_t *upcalls = connp->conn_upcalls;
sock_upper_handle_t handle = connp->conn_upper_handle;
mutex_enter(&connp->conn_lock);
connp->conn_upper_handle = NULL;
connp->conn_upcalls = NULL;
mutex_exit(&connp->conn_lock);
if (IPCL_IS_NONSTR(connp)) {
ASSERT(upcalls != NULL);
ASSERT(upcalls->su_closed != NULL);
ASSERT(handle != NULL);
upcalls->su_closed(handle);
tcp->tcp_detached = B_TRUE;
}
}
}
conn_t *
tcp_get_conn(void *arg, tcp_stack_t *tcps)
{
tcp_t *tcp = NULL;
conn_t *connp = NULL;
squeue_t *sqp = (squeue_t *)arg;
tcp_squeue_priv_t *tcp_time_wait;
netstack_t *ns;
mblk_t *tcp_rsrv_mp = NULL;
tcp_time_wait =
*((tcp_squeue_priv_t **)squeue_getprivate(sqp, SQPRIVATE_TCP));
mutex_enter(&tcp_time_wait->tcp_time_wait_lock);
tcp = tcp_time_wait->tcp_free_list;
ASSERT((tcp != NULL) ^ (tcp_time_wait->tcp_free_list_cnt == 0));
if (tcp != NULL) {
tcp_time_wait->tcp_free_list = tcp->tcp_time_wait_next;
tcp_time_wait->tcp_free_list_cnt--;
mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
tcp->tcp_time_wait_next = NULL;
connp = tcp->tcp_connp;
connp->conn_flags |= IPCL_REUSED;
ASSERT(tcp->tcp_tcps == NULL);
ASSERT(connp->conn_netstack == NULL);
ASSERT(tcp->tcp_rsrv_mp != NULL);
ns = tcps->tcps_netstack;
netstack_hold(ns);
connp->conn_netstack = ns;
connp->conn_ixa->ixa_ipst = ns->netstack_ip;
tcp->tcp_tcps = tcps;
ipcl_globalhash_insert(connp);
connp->conn_ixa->ixa_notify_cookie = tcp;
ASSERT(connp->conn_ixa->ixa_notify == tcp_notify);
connp->conn_recv = tcp_input_data;
ASSERT(connp->conn_recvicmp == tcp_icmp_input);
ASSERT(connp->conn_verifyicmp == tcp_verifyicmp);
return (connp);
}
mutex_exit(&tcp_time_wait->tcp_time_wait_lock);
tcp_rsrv_mp = allocb(0, BPRI_HI);
if (tcp_rsrv_mp == NULL)
return (NULL);
if ((connp = ipcl_conn_create(IPCL_TCPCONN, KM_NOSLEEP,
tcps->tcps_netstack)) == NULL) {
freeb(tcp_rsrv_mp);
return (NULL);
}
tcp = connp->conn_tcp;
tcp->tcp_rsrv_mp = tcp_rsrv_mp;
mutex_init(&tcp->tcp_rsrv_mp_lock, NULL, MUTEX_DEFAULT, NULL);
tcp->tcp_tcps = tcps;
connp->conn_recv = tcp_input_data;
connp->conn_recvicmp = tcp_icmp_input;
connp->conn_verifyicmp = tcp_verifyicmp;
connp->conn_ixa->ixa_notify = tcp_notify;
connp->conn_ixa->ixa_notify_cookie = tcp;
return (connp);
}
static int
tcp_connect_ipv4(tcp_t *tcp, ipaddr_t *dstaddrp, in_port_t dstport,
uint_t srcid)
{
ipaddr_t dstaddr = *dstaddrp;
uint16_t lport;
conn_t *connp = tcp->tcp_connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
int error;
ASSERT(connp->conn_ipversion == IPV4_VERSION);
if (dstaddr == INADDR_ANY) {
dstaddr = htonl(INADDR_LOOPBACK);
*dstaddrp = dstaddr;
}
if (srcid != 0 && connp->conn_laddr_v4 == INADDR_ANY) {
if (!ip_srcid_find_id(srcid, &connp->conn_laddr_v6,
IPCL_ZONEID(connp), B_TRUE, tcps->tcps_netstack)) {
return (EADDRNOTAVAIL);
}
connp->conn_saddr_v6 = connp->conn_laddr_v6;
}
IN6_IPADDR_TO_V4MAPPED(dstaddr, &connp->conn_faddr_v6);
connp->conn_fport = dstport;
if (tcp->tcp_state == TCPS_IDLE) {
lport = tcp_update_next_port(tcps->tcps_next_port_to_try,
tcp, B_TRUE);
lport = tcp_bindi(tcp, lport, &connp->conn_laddr_v6, 0, B_TRUE,
B_FALSE, B_FALSE);
if (lport == 0)
return (-TNOADDR);
}
error = tcp_set_destination(tcp);
if (error != 0)
return (error);
if (connp->conn_faddr_v4 == connp->conn_laddr_v4 &&
connp->conn_fport == connp->conn_lport)
return (-TBADADDR);
tcp->tcp_state = TCPS_SYN_SENT;
return (ipcl_conn_insert_v4(connp));
}
static int
tcp_connect_ipv6(tcp_t *tcp, in6_addr_t *dstaddrp, in_port_t dstport,
uint32_t flowinfo, uint_t srcid, uint32_t scope_id)
{
uint16_t lport;
conn_t *connp = tcp->tcp_connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
int error;
ASSERT(connp->conn_family == AF_INET6);
if (connp->conn_ipversion != IPV6_VERSION)
return (-TBADADDR);
if (IN6_IS_ADDR_UNSPECIFIED(dstaddrp))
*dstaddrp = ipv6_loopback;
if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
if (!ip_srcid_find_id(srcid, &connp->conn_laddr_v6,
IPCL_ZONEID(connp), B_FALSE, tcps->tcps_netstack)) {
return (EADDRNOTAVAIL);
}
connp->conn_saddr_v6 = connp->conn_laddr_v6;
}
if (scope_id != 0 && IN6_IS_ADDR_LINKSCOPE(dstaddrp)) {
connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
connp->conn_ixa->ixa_scopeid = scope_id;
} else {
connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
}
connp->conn_flowinfo = flowinfo;
connp->conn_faddr_v6 = *dstaddrp;
connp->conn_fport = dstport;
if (tcp->tcp_state == TCPS_IDLE) {
lport = tcp_update_next_port(tcps->tcps_next_port_to_try,
tcp, B_TRUE);
lport = tcp_bindi(tcp, lport, &connp->conn_laddr_v6, 0, B_TRUE,
B_FALSE, B_FALSE);
if (lport == 0)
return (-TNOADDR);
}
error = tcp_set_destination(tcp);
if (error != 0)
return (error);
if (IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6, &connp->conn_laddr_v6) &&
connp->conn_fport == connp->conn_lport)
return (-TBADADDR);
tcp->tcp_state = TCPS_SYN_SENT;
return (ipcl_conn_insert_v6(connp));
}
static int
tcp_disconnect_common(tcp_t *tcp, t_scalar_t seqnum)
{
conn_t *lconnp;
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
if (tcp->tcp_state <= TCPS_BOUND) {
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
"tcp_disconnect: bad state, %d", tcp->tcp_state);
}
return (TOUTSTATE);
} else if (tcp->tcp_state >= TCPS_ESTABLISHED) {
TCPS_CONN_DEC(tcps);
}
if (seqnum == -1 || tcp->tcp_conn_req_max == 0) {
int old_state = tcp->tcp_state;
ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
ASSERT(tcp->tcp_time_wait_next == NULL);
ASSERT(tcp->tcp_time_wait_prev == NULL);
ASSERT(tcp->tcp_time_wait_expire == 0);
if (connp->conn_ipversion == IPV4_VERSION) {
lconnp = ipcl_lookup_listener_v4(connp->conn_lport,
connp->conn_laddr_v4, IPCL_ZONEID(connp), ipst);
} else {
uint_t ifindex = 0;
if (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)
ifindex = connp->conn_ixa->ixa_scopeid;
lconnp = ipcl_lookup_listener_v6(connp->conn_lport,
&connp->conn_laddr_v6, ifindex, IPCL_ZONEID(connp),
ipst);
}
if (tcp->tcp_conn_req_max && lconnp == NULL) {
tcp->tcp_state = TCPS_LISTEN;
DTRACE_TCP6(state__change, void, NULL, ip_xmit_attr_t *,
connp->conn_ixa, void, NULL, tcp_t *, tcp, void,
NULL, int32_t, old_state);
} else if (old_state > TCPS_BOUND) {
tcp->tcp_conn_req_max = 0;
tcp->tcp_state = TCPS_BOUND;
DTRACE_TCP6(state__change, void, NULL, ip_xmit_attr_t *,
connp->conn_ixa, void, NULL, tcp_t *, tcp, void,
NULL, int32_t, old_state);
if (tcp->tcp_listen_cnt != NULL)
TCP_DECR_LISTEN_CNT(tcp);
}
if (lconnp != NULL)
CONN_DEC_REF(lconnp);
switch (old_state) {
case TCPS_SYN_SENT:
case TCPS_SYN_RCVD:
TCPS_BUMP_MIB(tcps, tcpAttemptFails);
break;
case TCPS_ESTABLISHED:
case TCPS_CLOSE_WAIT:
TCPS_BUMP_MIB(tcps, tcpEstabResets);
break;
}
if (tcp->tcp_fused)
tcp_unfuse(tcp);
mutex_enter(&tcp->tcp_eager_lock);
if ((tcp->tcp_conn_req_cnt_q0 != 0) ||
(tcp->tcp_conn_req_cnt_q != 0)) {
tcp_eager_cleanup(tcp, 0);
}
mutex_exit(&tcp->tcp_eager_lock);
tcp_xmit_ctl("tcp_disconnect", tcp, tcp->tcp_snxt,
tcp->tcp_rnxt, TH_RST | TH_ACK);
tcp_reinit(tcp);
return (0);
} else if (!tcp_eager_blowoff(tcp, seqnum)) {
return (TBADSEQ);
}
return (0);
}
void
tcp_disconnect(tcp_t *tcp, mblk_t *mp)
{
t_scalar_t seqnum;
int error;
conn_t *connp = tcp->tcp_connp;
ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= (uintptr_t)INT_MAX);
if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_discon_req)) {
tcp_err_ack(tcp, mp, TPROTO, 0);
return;
}
seqnum = ((struct T_discon_req *)mp->b_rptr)->SEQ_number;
error = tcp_disconnect_common(tcp, seqnum);
if (error != 0)
tcp_err_ack(tcp, mp, error, 0);
else {
if (tcp->tcp_state >= TCPS_ESTABLISHED) {
(void) putnextctl1(connp->conn_rq, M_FLUSH, FLUSHRW);
}
mp = mi_tpi_ok_ack_alloc(mp);
if (mp != NULL)
putnext(connp->conn_rq, mp);
}
}
static void
tcp_reinit(tcp_t *tcp)
{
mblk_t *mp;
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
int32_t oldstate;
ASSERT(tcp->tcp_listener == NULL);
ASSERT((connp->conn_family == AF_INET &&
connp->conn_ipversion == IPV4_VERSION) ||
(connp->conn_family == AF_INET6 &&
(connp->conn_ipversion == IPV4_VERSION ||
connp->conn_ipversion == IPV6_VERSION)));
tcp_timers_stop(tcp);
tcp_close_mpp(&tcp->tcp_xmit_head);
if (tcp->tcp_snd_zcopy_aware)
tcp_zcopy_notify(tcp);
tcp->tcp_xmit_last = tcp->tcp_xmit_tail = NULL;
tcp->tcp_unsent = tcp->tcp_xmit_tail_unsent = 0;
mutex_enter(&tcp->tcp_non_sq_lock);
if (tcp->tcp_flow_stopped &&
TCP_UNSENT_BYTES(tcp) <= connp->conn_sndlowat) {
tcp_clrqfull(tcp);
}
mutex_exit(&tcp->tcp_non_sq_lock);
tcp_close_mpp(&tcp->tcp_reass_head);
tcp->tcp_reass_tail = NULL;
if (tcp->tcp_rcv_list != NULL) {
tcp_close_mpp(&tcp->tcp_rcv_list);
tcp->tcp_rcv_last_head = NULL;
tcp->tcp_rcv_last_tail = NULL;
tcp->tcp_rcv_cnt = 0;
}
tcp->tcp_rcv_last_tail = NULL;
if ((mp = tcp->tcp_urp_mp) != NULL) {
freemsg(mp);
tcp->tcp_urp_mp = NULL;
}
if ((mp = tcp->tcp_urp_mark_mp) != NULL) {
freemsg(mp);
tcp->tcp_urp_mark_mp = NULL;
}
if (tcp->tcp_fused_sigurg_mp != NULL) {
ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
freeb(tcp->tcp_fused_sigurg_mp);
tcp->tcp_fused_sigurg_mp = NULL;
}
if (tcp->tcp_ordrel_mp != NULL) {
ASSERT(!IPCL_IS_NONSTR(tcp->tcp_connp));
freeb(tcp->tcp_ordrel_mp);
tcp->tcp_ordrel_mp = NULL;
}
tcp_close_mpp(&tcp->tcp_conn.tcp_eager_conn_ind);
CL_INET_DISCONNECT(connp);
ASSERT(tcp->tcp_time_wait_next == NULL);
ASSERT(tcp->tcp_time_wait_prev == NULL);
ASSERT(tcp->tcp_time_wait_expire == 0);
tcp_reinit_values(tcp);
ipcl_hash_remove(connp);
ixa_cleanup(connp->conn_ixa);
tcp_ipsec_cleanup(tcp);
connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
oldstate = tcp->tcp_state;
if (tcp->tcp_conn_req_max != 0) {
tcp->tcp_state = TCPS_LISTEN;
tcp->tcp_eager_next_q0 = tcp->tcp_eager_prev_q0 = tcp;
tcp->tcp_eager_next_drop_q0 = tcp;
tcp->tcp_eager_prev_drop_q0 = tcp;
connp->conn_recv = tcp_input_listener_unbound;
connp->conn_proto = IPPROTO_TCP;
connp->conn_faddr_v6 = ipv6_all_zeros;
connp->conn_fport = 0;
(void) ipcl_bind_insert(connp);
} else {
tcp->tcp_state = TCPS_BOUND;
}
tcp_init_values(tcp, NULL);
DTRACE_TCP6(state__change, void, NULL, ip_xmit_attr_t *,
connp->conn_ixa, void, NULL, tcp_t *, tcp, void, NULL,
int32_t, oldstate);
ASSERT(tcp->tcp_ptpbhn != NULL);
tcp->tcp_rwnd = connp->conn_rcvbuf;
tcp->tcp_mss = connp->conn_ipversion != IPV4_VERSION ?
tcps->tcps_mss_def_ipv6 : tcps->tcps_mss_def_ipv4;
}
static void
tcp_reinit_values(tcp_t *tcp)
{
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
#define DONTCARE(x)
#define PRESERVE(x)
PRESERVE(tcp->tcp_bind_hash_port);
PRESERVE(tcp->tcp_bind_hash);
PRESERVE(tcp->tcp_ptpbhn);
PRESERVE(tcp->tcp_acceptor_hash);
PRESERVE(tcp->tcp_ptpahn);
ASSERT(tcp->tcp_time_wait_next == NULL);
ASSERT(tcp->tcp_time_wait_prev == NULL);
ASSERT(tcp->tcp_time_wait_expire == 0);
PRESERVE(tcp->tcp_state);
PRESERVE(connp->conn_rq);
PRESERVE(connp->conn_wq);
ASSERT(tcp->tcp_xmit_head == NULL);
ASSERT(tcp->tcp_xmit_last == NULL);
ASSERT(tcp->tcp_unsent == 0);
ASSERT(tcp->tcp_xmit_tail == NULL);
ASSERT(tcp->tcp_xmit_tail_unsent == 0);
tcp->tcp_snxt = 0;
tcp->tcp_suna = 0;
tcp->tcp_swnd = 0;
DONTCARE(tcp->tcp_cwnd);
if (connp->conn_ht_iphc != NULL) {
kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
connp->conn_ht_iphc = NULL;
connp->conn_ht_iphc_allocated = 0;
connp->conn_ht_iphc_len = 0;
connp->conn_ht_ulp = NULL;
connp->conn_ht_ulp_len = 0;
tcp->tcp_ipha = NULL;
tcp->tcp_ip6h = NULL;
tcp->tcp_tcpha = NULL;
}
ip_pkt_free(&connp->conn_xmit_ipp);
DONTCARE(tcp->tcp_naglim);
DONTCARE(tcp->tcp_ipha);
DONTCARE(tcp->tcp_ip6h);
DONTCARE(tcp->tcp_tcpha);
tcp->tcp_valid_bits = 0;
DONTCARE(tcp->tcp_timer_backoff);
DONTCARE(tcp->tcp_last_recv_time);
tcp->tcp_last_rcv_lbolt = 0;
tcp->tcp_init_cwnd = 0;
tcp->tcp_urp_last_valid = 0;
tcp->tcp_hard_binding = 0;
tcp->tcp_fin_acked = 0;
tcp->tcp_fin_rcvd = 0;
tcp->tcp_fin_sent = 0;
tcp->tcp_ordrel_done = 0;
tcp->tcp_detached = 0;
tcp->tcp_snd_ws_ok = B_FALSE;
tcp->tcp_snd_ts_ok = B_FALSE;
tcp->tcp_zero_win_probe = 0;
tcp->tcp_loopback = 0;
tcp->tcp_localnet = 0;
tcp->tcp_syn_defense = 0;
tcp->tcp_set_timer = 0;
tcp->tcp_active_open = 0;
tcp->tcp_rexmit = B_FALSE;
tcp->tcp_xmit_zc_clean = B_FALSE;
tcp->tcp_snd_sack_ok = B_FALSE;
tcp->tcp_hwcksum = B_FALSE;
DONTCARE(tcp->tcp_maxpsz_multiplier);
tcp->tcp_conn_def_q0 = 0;
tcp->tcp_ip_forward_progress = B_FALSE;
tcp->tcp_ecn_ok = B_FALSE;
tcp->tcp_cwr = B_FALSE;
tcp->tcp_ecn_echo_on = B_FALSE;
tcp->tcp_is_wnd_shrnk = B_FALSE;
TCP_NOTSACK_REMOVE_ALL(tcp->tcp_notsack_list, tcp);
bzero(&tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
tcp->tcp_rcv_ws = 0;
tcp->tcp_snd_ws = 0;
tcp->tcp_ts_recent = 0;
tcp->tcp_rnxt = 0;
DONTCARE(tcp->tcp_rwnd);
tcp->tcp_initial_pmtu = 0;
ASSERT(tcp->tcp_reass_head == NULL);
ASSERT(tcp->tcp_reass_tail == NULL);
tcp->tcp_cwnd_cnt = 0;
ASSERT(tcp->tcp_rcv_list == NULL);
ASSERT(tcp->tcp_rcv_last_head == NULL);
ASSERT(tcp->tcp_rcv_last_tail == NULL);
ASSERT(tcp->tcp_rcv_cnt == 0);
DONTCARE(tcp->tcp_cwnd_ssthresh);
DONTCARE(tcp->tcp_cwnd_max);
tcp->tcp_csuna = 0;
tcp->tcp_rto = 0;
DONTCARE(tcp->tcp_rtt_sa);
DONTCARE(tcp->tcp_rtt_sd);
tcp->tcp_rtt_update = 0;
tcp->tcp_rtt_sum = 0;
tcp->tcp_rtt_cnt = 0;
DONTCARE(tcp->tcp_swl1);
DONTCARE(tcp->tcp_swl2);
tcp->tcp_rack = 0;
tcp->tcp_rack_cnt = 0;
tcp->tcp_rack_cur_max = 0;
tcp->tcp_rack_abs_max = 0;
tcp->tcp_max_swnd = 0;
ASSERT(tcp->tcp_listener == NULL);
DONTCARE(tcp->tcp_irs);
DONTCARE(tcp->tcp_iss);
DONTCARE(tcp->tcp_fss);
DONTCARE(tcp->tcp_urg);
ASSERT(tcp->tcp_conn_req_cnt_q == 0);
ASSERT(tcp->tcp_conn_req_cnt_q0 == 0);
PRESERVE(tcp->tcp_conn_req_max);
PRESERVE(tcp->tcp_conn_req_seqnum);
DONTCARE(tcp->tcp_first_timer_threshold);
DONTCARE(tcp->tcp_second_timer_threshold);
DONTCARE(tcp->tcp_first_ctimer_threshold);
DONTCARE(tcp->tcp_second_ctimer_threshold);
DONTCARE(tcp->tcp_urp_last);
ASSERT(tcp->tcp_urp_mp == NULL);
ASSERT(tcp->tcp_urp_mark_mp == NULL);
ASSERT(tcp->tcp_fused_sigurg_mp == NULL);
ASSERT(tcp->tcp_eager_next_q == NULL);
ASSERT(tcp->tcp_eager_last_q == NULL);
ASSERT((tcp->tcp_eager_next_q0 == NULL &&
tcp->tcp_eager_prev_q0 == NULL) ||
tcp->tcp_eager_next_q0 == tcp->tcp_eager_prev_q0);
ASSERT(tcp->tcp_conn.tcp_eager_conn_ind == NULL);
ASSERT((tcp->tcp_eager_next_drop_q0 == NULL &&
tcp->tcp_eager_prev_drop_q0 == NULL) ||
tcp->tcp_eager_next_drop_q0 == tcp->tcp_eager_prev_drop_q0);
DONTCARE(tcp->tcp_ka_rinterval);
DONTCARE(tcp->tcp_ka_abort_thres);
DONTCARE(tcp->tcp_ka_cnt);
tcp->tcp_client_errno = 0;
DONTCARE(connp->conn_sum);
connp->conn_faddr_v6 = ipv6_all_zeros;
PRESERVE(connp->conn_bound_addr_v6);
tcp->tcp_last_sent_len = 0;
tcp->tcp_dupack_cnt = 0;
connp->conn_fport = 0;
PRESERVE(connp->conn_lport);
PRESERVE(tcp->tcp_acceptor_lockp);
ASSERT(tcp->tcp_ordrel_mp == NULL);
PRESERVE(tcp->tcp_acceptor_id);
DONTCARE(tcp->tcp_ipsec_overhead);
PRESERVE(connp->conn_family);
if (connp->conn_family == AF_INET6) {
connp->conn_ipversion = IPV6_VERSION;
tcp->tcp_mss = tcps->tcps_mss_def_ipv6;
} else {
connp->conn_ipversion = IPV4_VERSION;
tcp->tcp_mss = tcps->tcps_mss_def_ipv4;
}
connp->conn_bound_if = 0;
connp->conn_recv_ancillary.crb_all = 0;
tcp->tcp_recvifindex = 0;
tcp->tcp_recvhops = 0;
tcp->tcp_closed = 0;
if (tcp->tcp_hopopts != NULL) {
mi_free(tcp->tcp_hopopts);
tcp->tcp_hopopts = NULL;
tcp->tcp_hopoptslen = 0;
}
ASSERT(tcp->tcp_hopoptslen == 0);
if (tcp->tcp_dstopts != NULL) {
mi_free(tcp->tcp_dstopts);
tcp->tcp_dstopts = NULL;
tcp->tcp_dstoptslen = 0;
}
ASSERT(tcp->tcp_dstoptslen == 0);
if (tcp->tcp_rthdrdstopts != NULL) {
mi_free(tcp->tcp_rthdrdstopts);
tcp->tcp_rthdrdstopts = NULL;
tcp->tcp_rthdrdstoptslen = 0;
}
ASSERT(tcp->tcp_rthdrdstoptslen == 0);
if (tcp->tcp_rthdr != NULL) {
mi_free(tcp->tcp_rthdr);
tcp->tcp_rthdr = NULL;
tcp->tcp_rthdrlen = 0;
}
ASSERT(tcp->tcp_rthdrlen == 0);
tcp->tcp_fused = B_FALSE;
tcp->tcp_unfusable = B_FALSE;
tcp->tcp_fused_sigurg = B_FALSE;
tcp->tcp_loopback_peer = NULL;
tcp->tcp_lso = B_FALSE;
tcp->tcp_in_ack_unsent = 0;
tcp->tcp_cork = B_FALSE;
tcp->tcp_tconnind_started = B_FALSE;
PRESERVE(tcp->tcp_squeue_bytes);
tcp->tcp_closemp_used = B_FALSE;
PRESERVE(tcp->tcp_rsrv_mp);
PRESERVE(tcp->tcp_rsrv_mp_lock);
#ifdef DEBUG
DONTCARE(tcp->tcmp_stk[0]);
#endif
PRESERVE(tcp->tcp_connid);
if (tcp->tcp_sig_sa_in != NULL) {
tcpsig_sa_rele(tcp->tcp_sig_sa_in);
tcp->tcp_sig_sa_in = NULL;
}
if (tcp->tcp_sig_sa_out != NULL) {
tcpsig_sa_rele(tcp->tcp_sig_sa_out);
tcp->tcp_sig_sa_out = NULL;
}
ASSERT(tcp->tcp_listen_cnt == NULL);
ASSERT(tcp->tcp_reass_tid == 0);
if (tcp->tcp_cc_algo->cb_destroy != NULL)
tcp->tcp_cc_algo->cb_destroy(&tcp->tcp_ccv);
tcp->tcp_cc_algo = NULL;
#undef DONTCARE
#undef PRESERVE
}
void
tcp_init_values(tcp_t *tcp, tcp_t *parent)
{
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
ASSERT((connp->conn_family == AF_INET &&
connp->conn_ipversion == IPV4_VERSION) ||
(connp->conn_family == AF_INET6 &&
(connp->conn_ipversion == IPV4_VERSION ||
connp->conn_ipversion == IPV6_VERSION)));
tcp->tcp_ccv.type = IPPROTO_TCP;
tcp->tcp_ccv.ccvc.tcp = tcp;
if (parent == NULL) {
tcp->tcp_cc_algo = tcps->tcps_default_cc_algo;
tcp->tcp_naglim = tcps->tcps_naglim_def;
tcp->tcp_rto_initial = tcps->tcps_rexmit_interval_initial;
tcp->tcp_rto_min = tcps->tcps_rexmit_interval_min;
tcp->tcp_rto_max = tcps->tcps_rexmit_interval_max;
tcp->tcp_first_ctimer_threshold =
tcps->tcps_ip_notify_cinterval;
tcp->tcp_second_ctimer_threshold =
tcps->tcps_ip_abort_cinterval;
tcp->tcp_first_timer_threshold = tcps->tcps_ip_notify_interval;
tcp->tcp_second_timer_threshold = tcps->tcps_ip_abort_interval;
tcp->tcp_fin_wait_2_flush_interval =
tcps->tcps_fin_wait_2_flush_interval;
tcp->tcp_ka_interval = tcps->tcps_keepalive_interval;
tcp->tcp_ka_abort_thres = tcps->tcps_keepalive_abort_interval;
tcp->tcp_ka_cnt = 0;
tcp->tcp_ka_rinterval = 0;
} else {
tcp->tcp_cc_algo = parent->tcp_cc_algo;
tcp->tcp_naglim = parent->tcp_naglim;
tcp->tcp_rto_initial = parent->tcp_rto_initial;
tcp->tcp_rto_min = parent->tcp_rto_min;
tcp->tcp_rto_max = parent->tcp_rto_max;
tcp->tcp_first_ctimer_threshold =
parent->tcp_first_ctimer_threshold;
tcp->tcp_second_ctimer_threshold =
parent->tcp_second_ctimer_threshold;
tcp->tcp_first_timer_threshold =
parent->tcp_first_timer_threshold;
tcp->tcp_second_timer_threshold =
parent->tcp_second_timer_threshold;
tcp->tcp_fin_wait_2_flush_interval =
parent->tcp_fin_wait_2_flush_interval;
tcp->tcp_quickack = parent->tcp_quickack;
tcp->tcp_md5sig = parent->tcp_md5sig;
tcp->tcp_ka_interval = parent->tcp_ka_interval;
tcp->tcp_ka_abort_thres = parent->tcp_ka_abort_thres;
tcp->tcp_ka_cnt = parent->tcp_ka_cnt;
tcp->tcp_ka_rinterval = parent->tcp_ka_rinterval;
tcp->tcp_init_cwnd = parent->tcp_init_cwnd;
}
if (tcp->tcp_cc_algo->cb_init != NULL)
VERIFY(tcp->tcp_cc_algo->cb_init(&tcp->tcp_ccv) == 0);
tcp->tcp_rtt_sa = MSEC2NSEC(tcp->tcp_rto_initial) << 2;
tcp->tcp_rtt_sd = MSEC2NSEC(tcp->tcp_rto_initial) >> 1;
tcp->tcp_rto = tcp_calculate_rto(tcp, tcps,
tcps->tcps_conn_grace_period);
tcp->tcp_timer_backoff = 0;
tcp->tcp_ms_we_have_waited = 0;
tcp->tcp_last_recv_time = ddi_get_lbolt();
tcp->tcp_cwnd_max = tcps->tcps_cwnd_max_;
tcp->tcp_cwnd_ssthresh = TCP_MAX_LARGEWIN;
tcp->tcp_maxpsz_multiplier = tcps->tcps_maxpsz_multiplier;
tcp->tcp_fused = B_FALSE;
tcp->tcp_unfusable = B_FALSE;
tcp->tcp_fused_sigurg = B_FALSE;
tcp->tcp_loopback_peer = NULL;
connp->conn_mlp_type = mlptSingle;
tcp->tcp_rcv_ws = TCP_MAX_WINSHIFT;
tcp->tcp_rwnd = connp->conn_rcvbuf;
tcp->tcp_cork = B_FALSE;
if (!connp->conn_debug)
connp->conn_debug = tcps->tcps_dbg;
}
void
tcp_update_pmtu(tcp_t *tcp, boolean_t decrease_only)
{
uint32_t pmtu;
int32_t mss;
conn_t *connp = tcp->tcp_connp;
ip_xmit_attr_t *ixa = connp->conn_ixa;
iaflags_t ixaflags;
if (tcp->tcp_tcps->tcps_ignore_path_mtu)
return;
if (tcp->tcp_state < TCPS_ESTABLISHED)
return;
pmtu = ip_get_pmtu(ixa);
ixaflags = ixa->ixa_flags;
mss = pmtu - connp->conn_ht_iphc_len - conn_ipsec_length(connp);
if (mss == tcp->tcp_mss)
return;
if (mss > tcp->tcp_mss && decrease_only)
return;
DTRACE_PROBE2(tcp_update_pmtu, int32_t, tcp->tcp_mss, uint32_t, mss);
ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu;
tcp_mss_set(tcp, mss);
if (mss < tcp->tcp_tcps->tcps_mss_min)
ixaflags |= IXAF_PMTU_TOO_SMALL;
if (ixaflags & IXAF_PMTU_TOO_SMALL)
ixaflags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
if ((connp->conn_ipversion == IPV4_VERSION) &&
!(ixaflags & IXAF_PMTU_IPV4_DF)) {
tcp->tcp_ipha->ipha_fragment_offset_and_flags = 0;
}
ixa->ixa_flags = ixaflags;
}
int
tcp_maxpsz_set(tcp_t *tcp, boolean_t set_maxblk)
{
conn_t *connp = tcp->tcp_connp;
queue_t *q = connp->conn_rq;
int32_t mss = tcp->tcp_mss;
int maxpsz;
if (TCP_IS_DETACHED(tcp))
return (mss);
if (tcp->tcp_fused) {
maxpsz = tcp_fuse_maxpsz(tcp);
mss = INFPSZ;
} else if (tcp->tcp_maxpsz_multiplier == 0) {
maxpsz = MSS_ROUNDUP(connp->conn_sndbuf, mss);
mss = INFPSZ;
} else {
maxpsz = tcp->tcp_maxpsz_multiplier * mss;
if (maxpsz > connp->conn_sndbuf / 2) {
maxpsz = connp->conn_sndbuf / 2;
maxpsz = MSS_ROUNDUP(maxpsz, mss);
}
}
(void) proto_set_maxpsz(q, connp, maxpsz);
if (!(IPCL_IS_NONSTR(connp)))
connp->conn_wq->q_maxpsz = maxpsz;
if (set_maxblk)
(void) proto_set_tx_maxblk(q, connp, mss);
return (mss);
}
static int
tcp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
{
return (tcp_open(q, devp, flag, sflag, credp, B_FALSE));
}
static int
tcp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
{
return (tcp_open(q, devp, flag, sflag, credp, B_TRUE));
}
conn_t *
tcp_create_common(cred_t *credp, boolean_t isv6, boolean_t issocket,
int *errorp)
{
tcp_t *tcp = NULL;
conn_t *connp;
zoneid_t zoneid;
tcp_stack_t *tcps;
squeue_t *sqp;
ASSERT(errorp != NULL);
if (credp == kcred && nfs_global_client_only != 0) {
zoneid = GLOBAL_ZONEID;
tcps = netstack_find_by_stackid(GLOBAL_NETSTACKID)->
netstack_tcp;
ASSERT(tcps != NULL);
} else {
netstack_t *ns;
int err;
if ((err = secpolicy_basic_net_access(credp)) != 0) {
*errorp = err;
return (NULL);
}
ns = netstack_find_by_cred(credp);
ASSERT(ns != NULL);
tcps = ns->netstack_tcp;
ASSERT(tcps != NULL);
if (tcps->tcps_netstack->netstack_stackid !=
GLOBAL_NETSTACKID)
zoneid = GLOBAL_ZONEID;
else
zoneid = crgetzoneid(credp);
}
sqp = IP_SQUEUE_GET((uint_t)gethrtime());
connp = tcp_get_conn(sqp, tcps);
netstack_rele(tcps->tcps_netstack);
if (connp == NULL) {
*errorp = ENOSR;
return (NULL);
}
ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
connp->conn_sqp = sqp;
connp->conn_initial_sqp = connp->conn_sqp;
connp->conn_ixa->ixa_sqp = connp->conn_sqp;
tcp = connp->conn_tcp;
connp->conn_ixa->ixa_flags |= IXAF_SET_ULP_CKSUM | IXAF_VERIFY_SOURCE |
IXAF_VERIFY_PMTU | IXAF_VERIFY_LSO;
if (!tcps->tcps_dev_flow_ctl)
connp->conn_ixa->ixa_flags |= IXAF_NO_DEV_FLOW_CTL;
if (isv6) {
connp->conn_ixa->ixa_src_preferences = IPV6_PREFER_SRC_DEFAULT;
connp->conn_ipversion = IPV6_VERSION;
connp->conn_family = AF_INET6;
tcp->tcp_mss = tcps->tcps_mss_def_ipv6;
connp->conn_default_ttl = tcps->tcps_ipv6_hoplimit;
} else {
connp->conn_ipversion = IPV4_VERSION;
connp->conn_family = AF_INET;
tcp->tcp_mss = tcps->tcps_mss_def_ipv4;
connp->conn_default_ttl = tcps->tcps_ipv4_ttl;
}
connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
crhold(credp);
connp->conn_cred = credp;
connp->conn_cpid = curproc->p_pid;
connp->conn_open_time = ddi_get_lbolt64();
ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
connp->conn_ixa->ixa_cred = credp;
connp->conn_ixa->ixa_cpid = connp->conn_cpid;
connp->conn_zoneid = zoneid;
connp->conn_ixa->ixa_zoneid = zoneid;
connp->conn_mlp_type = mlptSingle;
ASSERT(connp->conn_netstack == tcps->tcps_netstack);
ASSERT(tcp->tcp_tcps == tcps);
if (getpflags(NET_MAC_AWARE, credp) != 0)
connp->conn_mac_mode = CONN_MAC_AWARE;
connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
if (issocket) {
tcp->tcp_issocket = 1;
}
connp->conn_rcvbuf = tcps->tcps_recv_hiwat;
connp->conn_sndbuf = tcps->tcps_xmit_hiwat;
if (tcps->tcps_snd_lowat_fraction != 0) {
connp->conn_sndlowat = connp->conn_sndbuf /
tcps->tcps_snd_lowat_fraction;
} else {
connp->conn_sndlowat = tcps->tcps_xmit_lowat;
}
connp->conn_so_type = SOCK_STREAM;
connp->conn_wroff = connp->conn_ht_iphc_allocated +
tcps->tcps_wroff_xtra;
SOCK_CONNID_INIT(tcp->tcp_connid);
tcp->tcp_state = TCPS_IDLE;
tcp_init_values(tcp, NULL);
return (connp);
}
static int
tcp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
boolean_t isv6)
{
tcp_t *tcp = NULL;
conn_t *connp = NULL;
int err;
vmem_t *minor_arena = NULL;
dev_t conn_dev;
boolean_t issocket;
if (q->q_ptr != NULL)
return (0);
if (sflag == MODOPEN)
return (EINVAL);
if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
minor_arena = ip_minor_arena_la;
} else {
if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
return (EBUSY);
}
minor_arena = ip_minor_arena_sa;
}
ASSERT(minor_arena != NULL);
*devp = makedevice(getmajor(*devp), (minor_t)conn_dev);
if (flag & SO_FALLBACK) {
RD(q)->q_ptr = (void *)conn_dev;
WR(q)->q_qinfo = &tcp_fallback_sock_winit;
WR(q)->q_ptr = (void *)minor_arena;
qprocson(q);
return (0);
} else if (flag & SO_ACCEPTOR) {
q->q_qinfo = &tcp_acceptor_rinit;
RD(q)->q_ptr = (void *)conn_dev;
WR(q)->q_qinfo = &tcp_acceptor_winit;
WR(q)->q_ptr = (void *)minor_arena;
qprocson(q);
return (0);
}
issocket = flag & SO_SOCKSTR;
connp = tcp_create_common(credp, isv6, issocket, &err);
if (connp == NULL) {
inet_minor_free(minor_arena, conn_dev);
q->q_ptr = WR(q)->q_ptr = NULL;
return (err);
}
connp->conn_rq = q;
connp->conn_wq = WR(q);
q->q_ptr = WR(q)->q_ptr = connp;
connp->conn_dev = conn_dev;
connp->conn_minor_arena = minor_arena;
ASSERT(q->q_qinfo == &tcp_rinitv4 || q->q_qinfo == &tcp_rinitv6);
ASSERT(WR(q)->q_qinfo == &tcp_winit);
tcp = connp->conn_tcp;
if (issocket) {
WR(q)->q_qinfo = &tcp_sock_winit;
} else {
#ifdef _ILP32
tcp->tcp_acceptor_id = (t_uscalar_t)RD(q);
#else
tcp->tcp_acceptor_id = conn_dev;
#endif
tcp_acceptor_hash_insert(tcp->tcp_acceptor_id, tcp);
}
mutex_enter(&connp->conn_lock);
CONN_INC_REF_LOCKED(connp);
ASSERT(connp->conn_ref == 2);
connp->conn_state_flags &= ~CONN_INCIPIENT;
mutex_exit(&connp->conn_lock);
qprocson(q);
return (0);
}
int
tcp_build_hdrs(tcp_t *tcp)
{
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
char buf[TCP_MAX_HDR_LENGTH];
uint_t buflen;
uint_t ulplen = TCP_MIN_HEADER_LENGTH;
uint_t extralen = TCP_MAX_TCP_OPTIONS_LENGTH;
tcpha_t *tcpha;
uint32_t cksum;
int error;
buflen = connp->conn_ht_ulp_len;
if (buflen != 0) {
bcopy(connp->conn_ht_ulp, buf, buflen);
extralen -= buflen - ulplen;
ulplen = buflen;
}
mutex_enter(&connp->conn_lock);
error = conn_build_hdr_template(connp, ulplen, extralen,
&connp->conn_laddr_v6, &connp->conn_faddr_v6, connp->conn_flowinfo);
mutex_exit(&connp->conn_lock);
if (error != 0)
return (error);
tcpha = (tcpha_t *)connp->conn_ht_ulp;
tcp->tcp_tcpha = tcpha;
if (buflen != 0) {
bcopy(buf, connp->conn_ht_ulp, buflen);
} else {
tcpha->tha_sum = 0;
tcpha->tha_urp = 0;
tcpha->tha_ack = 0;
tcpha->tha_offset_and_reserved = (5 << 4);
tcpha->tha_lport = connp->conn_lport;
tcpha->tha_fport = connp->conn_fport;
}
cksum = sizeof (tcpha_t) + connp->conn_sum;
cksum = (cksum >> 16) + (cksum & 0xFFFF);
ASSERT(cksum < 0x10000);
tcpha->tha_sum = htons(cksum);
if (connp->conn_ipversion == IPV4_VERSION)
tcp->tcp_ipha = (ipha_t *)connp->conn_ht_iphc;
else
tcp->tcp_ip6h = (ip6_t *)connp->conn_ht_iphc;
if (connp->conn_ht_iphc_allocated + tcps->tcps_wroff_xtra >
connp->conn_wroff) {
connp->conn_wroff = connp->conn_ht_iphc_allocated +
tcps->tcps_wroff_xtra;
(void) proto_set_tx_wroff(connp->conn_rq, connp,
connp->conn_wroff);
}
return (0);
}
int
tcp_rwnd_set(tcp_t *tcp, uint32_t rwnd)
{
uint32_t mss = tcp->tcp_mss;
uint32_t old_max_rwnd;
uint32_t max_transmittable_rwnd;
boolean_t tcp_detached = TCP_IS_DETACHED(tcp);
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
rwnd = MAX(rwnd, tcps->tcps_recv_hiwat_minmss * mss);
if (tcp->tcp_fused) {
size_t sth_hiwat;
tcp_t *peer_tcp = tcp->tcp_loopback_peer;
ASSERT(peer_tcp != NULL);
sth_hiwat = tcp_fuse_set_rcv_hiwat(tcp, rwnd);
if (!tcp_detached) {
(void) proto_set_rx_hiwat(connp->conn_rq, connp,
sth_hiwat);
tcp_set_recv_threshold(tcp, sth_hiwat >> 3);
}
if (tcp->tcp_tcpha != NULL) {
tcp->tcp_tcpha->tha_win =
htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws);
}
if ((tcp->tcp_rcv_ws > 0) && rwnd > tcp->tcp_cwnd_max)
tcp->tcp_cwnd_max = rwnd;
(void) tcp_maxpsz_set(peer_tcp, B_TRUE);
return (sth_hiwat);
}
if (tcp_detached)
old_max_rwnd = tcp->tcp_rwnd;
else
old_max_rwnd = connp->conn_rcvbuf;
if (rwnd < old_max_rwnd && tcp->tcp_state > TCPS_SYN_SENT) {
rwnd = MSS_ROUNDUP(old_max_rwnd, mss);
}
max_transmittable_rwnd = TCP_MAXWIN << tcp->tcp_rcv_ws;
if (rwnd > max_transmittable_rwnd) {
rwnd = max_transmittable_rwnd -
(max_transmittable_rwnd % mss);
if (rwnd < mss)
rwnd = max_transmittable_rwnd;
tcp->tcp_rwnd = old_max_rwnd = rwnd;
}
if (tcp->tcp_localnet) {
tcp->tcp_rack_abs_max =
MIN(tcps->tcps_local_dacks_max, rwnd / mss / 2);
} else {
tcp->tcp_rack_abs_max =
MIN(tcps->tcps_deferred_acks_max, rwnd / mss / 2);
}
if (tcp->tcp_rack_cur_max > tcp->tcp_rack_abs_max)
tcp->tcp_rack_cur_max = tcp->tcp_rack_abs_max;
else
tcp->tcp_rack_cur_max = 0;
tcp->tcp_rwnd += rwnd - old_max_rwnd;
connp->conn_rcvbuf = rwnd;
if (tcp->tcp_tcpha != NULL) {
tcp->tcp_tcpha->tha_win =
htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws);
}
if ((tcp->tcp_rcv_ws > 0) && rwnd > tcp->tcp_cwnd_max)
tcp->tcp_cwnd_max = rwnd;
if (tcp_detached)
return (rwnd);
tcp_set_recv_threshold(tcp, rwnd >> 3);
(void) proto_set_rx_hiwat(connp->conn_rq, connp, rwnd);
return (rwnd);
}
int
tcp_do_unbind(conn_t *connp)
{
tcp_t *tcp = connp->conn_tcp;
int32_t oldstate;
switch (tcp->tcp_state) {
case TCPS_BOUND:
case TCPS_LISTEN:
break;
default:
return (-TOUTSTATE);
}
mutex_enter(&tcp->tcp_eager_lock);
if (tcp->tcp_conn_req_cnt_q0 != 0 || tcp->tcp_conn_req_cnt_q != 0) {
tcp_eager_cleanup(tcp, 0);
}
mutex_exit(&tcp->tcp_eager_lock);
if (tcp->tcp_listen_cnt != NULL)
TCP_DECR_LISTEN_CNT(tcp);
connp->conn_laddr_v6 = ipv6_all_zeros;
connp->conn_saddr_v6 = ipv6_all_zeros;
tcp_bind_hash_remove(tcp);
oldstate = tcp->tcp_state;
tcp->tcp_state = TCPS_IDLE;
DTRACE_TCP6(state__change, void, NULL, ip_xmit_attr_t *,
connp->conn_ixa, void, NULL, tcp_t *, tcp, void, NULL,
int32_t, oldstate);
ip_unbind(connp);
bzero(&connp->conn_ports, sizeof (connp->conn_ports));
return (0);
}
void
tcp_get_proto_props(tcp_t *tcp, struct sock_proto_props *sopp)
{
conn_t *connp = tcp->tcp_connp;
sopp->sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_MAXBLK | SOCKOPT_WROFF;
sopp->sopp_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
sopp->sopp_rxhiwat = tcp->tcp_fused ?
tcp_fuse_set_rcv_hiwat(tcp, connp->conn_rcvbuf) :
connp->conn_rcvbuf;
if (tcp->tcp_fused) {
ASSERT(tcp->tcp_loopback);
ASSERT(tcp->tcp_loopback_peer != NULL);
sopp->sopp_wroff = 0;
(void) tcp_maxpsz_set(tcp->tcp_loopback_peer, B_TRUE);
} else if (tcp->tcp_snd_sack_ok) {
sopp->sopp_wroff = connp->conn_ht_iphc_allocated +
(tcp->tcp_loopback ? 0 : tcp->tcp_tcps->tcps_wroff_xtra);
} else {
sopp->sopp_wroff = connp->conn_ht_iphc_len +
(tcp->tcp_loopback ? 0 : tcp->tcp_tcps->tcps_wroff_xtra);
}
if (tcp->tcp_loopback) {
sopp->sopp_flags |= SOCKOPT_LOOPBACK;
sopp->sopp_loopback = B_TRUE;
}
}
boolean_t
tcp_zcopy_check(tcp_t *tcp)
{
conn_t *connp = tcp->tcp_connp;
ip_xmit_attr_t *ixa = connp->conn_ixa;
boolean_t zc_enabled = B_FALSE;
tcp_stack_t *tcps = tcp->tcp_tcps;
if (do_tcpzcopy == 2)
zc_enabled = B_TRUE;
else if ((do_tcpzcopy == 1) && (ixa->ixa_flags & IXAF_ZCOPY_CAPAB))
zc_enabled = B_TRUE;
tcp->tcp_snd_zcopy_on = zc_enabled;
if (!TCP_IS_DETACHED(tcp)) {
if (zc_enabled) {
ixa->ixa_flags |= IXAF_VERIFY_ZCOPY;
(void) proto_set_tx_copyopt(connp->conn_rq, connp,
ZCVMSAFE);
TCP_STAT(tcps, tcp_zcopy_on);
} else {
ixa->ixa_flags &= ~IXAF_VERIFY_ZCOPY;
(void) proto_set_tx_copyopt(connp->conn_rq, connp,
ZCVMUNSAFE);
TCP_STAT(tcps, tcp_zcopy_off);
}
}
return (zc_enabled);
}
mblk_t *
tcp_zcopy_backoff(tcp_t *tcp, mblk_t *bp, boolean_t fix_xmitlist)
{
mblk_t *nbp;
mblk_t *head = NULL;
mblk_t *tail = NULL;
tcp_stack_t *tcps = tcp->tcp_tcps;
ASSERT(bp != NULL);
while (bp != NULL) {
if (IS_VMLOANED_MBLK(bp)) {
TCP_STAT(tcps, tcp_zcopy_backoff);
if ((nbp = copyb(bp)) == NULL) {
tcp->tcp_xmit_zc_clean = B_FALSE;
if (tail != NULL)
tail->b_cont = bp;
return ((head == NULL) ? bp : head);
}
if (bp->b_datap->db_struioflag & STRUIO_ZCNOTIFY) {
if (fix_xmitlist)
tcp_zcopy_notify(tcp);
else
nbp->b_datap->db_struioflag |=
STRUIO_ZCNOTIFY;
}
nbp->b_cont = bp->b_cont;
if (fix_xmitlist) {
nbp->b_prev = bp->b_prev;
nbp->b_next = bp->b_next;
if (tcp->tcp_xmit_tail == bp)
tcp->tcp_xmit_tail = nbp;
}
bp->b_prev = NULL;
bp->b_next = NULL;
freeb(bp);
bp = nbp;
}
if (head == NULL) {
head = bp;
}
if (tail == NULL) {
tail = bp;
} else {
tail->b_cont = bp;
tail = bp;
}
bp = bp->b_cont;
}
if (fix_xmitlist) {
tcp->tcp_xmit_last = tail;
tcp->tcp_xmit_zc_clean = B_TRUE;
}
return (head);
}
void
tcp_zcopy_notify(tcp_t *tcp)
{
struct stdata *stp;
conn_t *connp;
if (tcp->tcp_detached)
return;
connp = tcp->tcp_connp;
if (IPCL_IS_NONSTR(connp)) {
(*connp->conn_upcalls->su_zcopy_notify)
(connp->conn_upper_handle);
return;
}
stp = STREAM(connp->conn_rq);
mutex_enter(&stp->sd_lock);
stp->sd_flag |= STZCNOTIFY;
cv_broadcast(&stp->sd_zcopy_wait);
mutex_exit(&stp->sd_lock);
}
static void
tcp_update_lso(tcp_t *tcp, ip_xmit_attr_t *ixa)
{
if (ixa->ixa_ip_hdr_length != ((ixa->ixa_flags & IXAF_IS_IPV4) ?
IP_SIMPLE_HDR_LENGTH : IPV6_HDR_LEN))
return;
if (ixa->ixa_flags & IXAF_LSO_CAPAB) {
ill_lso_capab_t *lsoc = &ixa->ixa_lso_capab;
uint_t lso_max = (ixa->ixa_flags & IXAF_IS_IPV4) ?
lsoc->ill_lso_max_tcpv4 : lsoc->ill_lso_max_tcpv6;
ASSERT3U(lso_max, >, 0);
tcp->tcp_lso_max = MIN(TCP_MAX_LSO_LENGTH, lso_max);
DTRACE_PROBE3(tcp_update_lso, boolean_t, tcp->tcp_lso,
boolean_t, B_TRUE, uint32_t, tcp->tcp_lso_max);
if (!tcp->tcp_lso)
tcp->tcp_maxpsz_multiplier = 0;
tcp->tcp_lso = B_TRUE;
TCP_STAT(tcp->tcp_tcps, tcp_lso_enabled);
} else {
DTRACE_PROBE3(tcp_update_lso, boolean_t, tcp->tcp_lso,
boolean_t, B_FALSE, uint32_t, tcp->tcp_lso_max);
if (tcp->tcp_lso) {
tcp->tcp_maxpsz_multiplier =
tcp->tcp_tcps->tcps_maxpsz_multiplier;
ixa->ixa_fragsize = ixa->ixa_pmtu;
tcp->tcp_lso = B_FALSE;
TCP_STAT(tcp->tcp_tcps, tcp_lso_disabled);
}
}
(void) tcp_maxpsz_set(tcp, B_TRUE);
}
static void
tcp_update_zcopy(tcp_t *tcp)
{
conn_t *connp = tcp->tcp_connp;
tcp_stack_t *tcps = tcp->tcp_tcps;
if (tcp->tcp_snd_zcopy_on) {
tcp->tcp_snd_zcopy_on = B_FALSE;
if (!TCP_IS_DETACHED(tcp)) {
(void) proto_set_tx_copyopt(connp->conn_rq, connp,
ZCVMUNSAFE);
TCP_STAT(tcps, tcp_zcopy_off);
}
} else {
tcp->tcp_snd_zcopy_on = B_TRUE;
if (!TCP_IS_DETACHED(tcp)) {
(void) proto_set_tx_copyopt(connp->conn_rq, connp,
ZCVMSAFE);
TCP_STAT(tcps, tcp_zcopy_on);
}
}
}
static void
tcp_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype,
ixa_notify_arg_t narg)
{
tcp_t *tcp = (tcp_t *)arg;
conn_t *connp = tcp->tcp_connp;
switch (ntype) {
case IXAN_LSO:
tcp_update_lso(tcp, connp->conn_ixa);
break;
case IXAN_PMTU:
tcp_update_pmtu(tcp, B_FALSE);
break;
case IXAN_ZCOPY:
tcp_update_zcopy(tcp);
break;
default:
break;
}
}
static int
tcp_wsrv(queue_t *q)
{
tcp_stack_t *tcps = Q_TO_TCP(q)->tcp_tcps;
TCP_STAT(tcps, tcp_wsrv_called);
return (0);
}
tcp_t *
tcp_acceptor_hash_lookup(t_uscalar_t id, tcp_stack_t *tcps)
{
tf_t *tf;
tcp_t *tcp;
tf = &tcps->tcps_acceptor_fanout[TCP_ACCEPTOR_HASH(id)];
mutex_enter(&tf->tf_lock);
for (tcp = tf->tf_tcp; tcp != NULL;
tcp = tcp->tcp_acceptor_hash) {
if (tcp->tcp_acceptor_id == id) {
CONN_INC_REF(tcp->tcp_connp);
mutex_exit(&tf->tf_lock);
return (tcp);
}
}
mutex_exit(&tf->tf_lock);
return (NULL);
}
void
tcp_acceptor_hash_insert(t_uscalar_t id, tcp_t *tcp)
{
tf_t *tf;
tcp_t **tcpp;
tcp_t *tcpnext;
tcp_stack_t *tcps = tcp->tcp_tcps;
tf = &tcps->tcps_acceptor_fanout[TCP_ACCEPTOR_HASH(id)];
if (tcp->tcp_ptpahn != NULL)
tcp_acceptor_hash_remove(tcp);
tcpp = &tf->tf_tcp;
mutex_enter(&tf->tf_lock);
tcpnext = tcpp[0];
if (tcpnext)
tcpnext->tcp_ptpahn = &tcp->tcp_acceptor_hash;
tcp->tcp_acceptor_hash = tcpnext;
tcp->tcp_ptpahn = tcpp;
tcpp[0] = tcp;
tcp->tcp_acceptor_lockp = &tf->tf_lock;
mutex_exit(&tf->tf_lock);
}
void
tcp_acceptor_hash_remove(tcp_t *tcp)
{
tcp_t *tcpnext;
kmutex_t *lockp;
lockp = tcp->tcp_acceptor_lockp;
if (tcp->tcp_ptpahn == NULL)
return;
ASSERT(lockp != NULL);
mutex_enter(lockp);
if (tcp->tcp_ptpahn) {
tcpnext = tcp->tcp_acceptor_hash;
if (tcpnext) {
tcpnext->tcp_ptpahn = tcp->tcp_ptpahn;
tcp->tcp_acceptor_hash = NULL;
}
*tcp->tcp_ptpahn = tcpnext;
tcp->tcp_ptpahn = NULL;
}
mutex_exit(lockp);
tcp->tcp_acceptor_lockp = NULL;
}
#define DEG_3 31
#define SEP_3 3
static int tcp_randtbl[DEG_3 + 1];
static int *tcp_random_fptr = &tcp_randtbl[SEP_3 + 1];
static int *tcp_random_rptr = &tcp_randtbl[1];
static int *tcp_random_state = &tcp_randtbl[1];
static int *tcp_random_end_ptr = &tcp_randtbl[DEG_3 + 1];
kmutex_t tcp_random_lock;
void
tcp_random_init(void)
{
int i;
hrtime_t hrt;
time_t wallclock;
uint64_t result;
hrt = gethrtime();
(void) drv_getparm(TIME, &wallclock);
result = (uint64_t)wallclock ^ (uint64_t)hrt;
mutex_enter(&tcp_random_lock);
tcp_random_state[0] = ((result >> 32) & 0xffffffff) *
(result & 0xffffffff);
for (i = 1; i < DEG_3; i++)
tcp_random_state[i] = 1103515245 * tcp_random_state[i - 1]
+ 12345;
tcp_random_fptr = &tcp_random_state[SEP_3];
tcp_random_rptr = &tcp_random_state[0];
mutex_exit(&tcp_random_lock);
for (i = 0; i < 10 * DEG_3; i++)
(void) tcp_random();
}
int
tcp_random(void)
{
int i;
mutex_enter(&tcp_random_lock);
*tcp_random_fptr += *tcp_random_rptr;
i = ((*tcp_random_fptr >> 15) & 0x1ffff) + 1;
if (++tcp_random_fptr >= tcp_random_end_ptr) {
tcp_random_fptr = tcp_random_state;
++tcp_random_rptr;
} else if (++tcp_random_rptr >= tcp_random_end_ptr)
tcp_random_rptr = tcp_random_state;
mutex_exit(&tcp_random_lock);
return (i);
}
#define PASSWD_SIZE 16
void
tcp_iss_key_init(uint8_t *phrase, int len, tcp_stack_t *tcps)
{
struct {
int32_t current_time;
uint32_t randnum;
uint16_t pad;
uint8_t ether[6];
uint8_t passwd[PASSWD_SIZE];
} tcp_iss_cookie;
time_t t;
(void) drv_getparm(TIME, &t);
tcp_iss_cookie.current_time = t;
tcp_iss_cookie.randnum = (uint32_t)(gethrtime() + tcp_random());
tcp_iss_cookie.pad = 0x365c;
bcopy(&cpu_list->cpu_type_info, &tcp_iss_cookie.passwd,
min(PASSWD_SIZE, sizeof (cpu_list->cpu_type_info)));
bcopy(phrase, &tcp_iss_cookie.passwd, min(PASSWD_SIZE, len));
(void) localetheraddr(NULL,
(struct ether_addr *)&tcp_iss_cookie.ether);
mutex_enter(&tcps->tcps_iss_key_lock);
MD5Init(&tcps->tcps_iss_key);
MD5Update(&tcps->tcps_iss_key, (uchar_t *)&tcp_iss_cookie,
sizeof (tcp_iss_cookie));
mutex_exit(&tcps->tcps_iss_key_lock);
}
void
tcp_ddi_g_init(void)
{
tcp_timercache = kmem_cache_create("tcp_timercache",
sizeof (tcp_timer_t) + sizeof (mblk_t), 0,
NULL, NULL, NULL, NULL, NULL, 0);
tcp_notsack_blk_cache = kmem_cache_create("tcp_notsack_blk_cache",
sizeof (notsack_blk_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
mutex_init(&tcp_random_lock, NULL, MUTEX_DEFAULT, NULL);
tcp_random_init();
ip_squeue_init(tcp_squeue_add);
tcp_g_kstat = tcp_g_kstat_init(&tcp_g_statistics);
tcp_squeue_flag = tcp_squeue_switch(tcp_squeue_wput);
netstack_register(NS_TCP, tcp_stack_init, NULL, tcp_stack_fini);
}
#define INET_NAME "ip"
static void *
tcp_stack_init(netstackid_t stackid, netstack_t *ns)
{
tcp_stack_t *tcps;
int i;
int error = 0;
major_t major;
size_t arrsz;
tcps = (tcp_stack_t *)kmem_zalloc(sizeof (*tcps), KM_SLEEP);
tcps->tcps_netstack = ns;
mutex_init(&tcps->tcps_iss_key_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&tcps->tcps_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL);
tcps->tcps_g_num_epriv_ports = TCP_NUM_EPRIV_PORTS;
tcps->tcps_g_epriv_ports[0] = ULP_DEF_EPRIV_PORT1;
tcps->tcps_g_epriv_ports[1] = ULP_DEF_EPRIV_PORT2;
tcps->tcps_min_anonpriv_port = 512;
tcps->tcps_bind_fanout = kmem_zalloc(sizeof (tf_t) *
TCP_BIND_FANOUT_SIZE, KM_SLEEP);
tcps->tcps_acceptor_fanout = kmem_zalloc(sizeof (tf_t) *
TCP_ACCEPTOR_FANOUT_SIZE, KM_SLEEP);
for (i = 0; i < TCP_BIND_FANOUT_SIZE; i++) {
mutex_init(&tcps->tcps_bind_fanout[i].tf_lock, NULL,
MUTEX_DEFAULT, NULL);
}
for (i = 0; i < TCP_ACCEPTOR_FANOUT_SIZE; i++) {
mutex_init(&tcps->tcps_acceptor_fanout[i].tf_lock, NULL,
MUTEX_DEFAULT, NULL);
}
ip_drop_register(&tcps->tcps_dropper, "TCP IPsec policy enforcement");
arrsz = tcp_propinfo_count * sizeof (mod_prop_info_t);
tcps->tcps_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz,
KM_SLEEP);
bcopy(tcp_propinfo_tbl, tcps->tcps_propinfo_tbl, arrsz);
tcp_max_optsize = optcom_max_optsize(tcp_opt_obj.odb_opt_des_arr,
tcp_opt_obj.odb_opt_arr_cnt);
tcp_iss_key_init((uint8_t *)&tcp_g_t_info_ack,
sizeof (tcp_g_t_info_ack), tcps);
tcps->tcps_kstat = tcp_kstat2_init(stackid);
tcps->tcps_mibkp = tcp_kstat_init(stackid);
major = mod_name_to_major(INET_NAME);
error = ldi_ident_from_major(major, &tcps->tcps_ldi_ident);
ASSERT(error == 0);
tcps->tcps_ixa_cleanup_mp = allocb_wait(0, BPRI_MED, STR_NOSIG, NULL);
ASSERT(tcps->tcps_ixa_cleanup_mp != NULL);
cv_init(&tcps->tcps_ixa_cleanup_ready_cv, NULL, CV_DEFAULT, NULL);
cv_init(&tcps->tcps_ixa_cleanup_done_cv, NULL, CV_DEFAULT, NULL);
mutex_init(&tcps->tcps_ixa_cleanup_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&tcps->tcps_reclaim_lock, NULL, MUTEX_DEFAULT, NULL);
tcps->tcps_reclaim = B_FALSE;
tcps->tcps_reclaim_tid = 0;
tcps->tcps_reclaim_period = tcps->tcps_rexmit_interval_max;
mutex_enter(&cpu_lock);
tcps->tcps_sc_cnt = MAX(ncpus, boot_ncpus);
mutex_exit(&cpu_lock);
tcps->tcps_sc = kmem_zalloc(max_ncpus * sizeof (tcp_stats_cpu_t *),
KM_SLEEP);
for (i = 0; i < tcps->tcps_sc_cnt; i++) {
tcps->tcps_sc[i] = kmem_zalloc(sizeof (tcp_stats_cpu_t),
KM_SLEEP);
}
mutex_init(&tcps->tcps_listener_conf_lock, NULL, MUTEX_DEFAULT, NULL);
list_create(&tcps->tcps_listener_conf, sizeof (tcp_listener_t),
offsetof(tcp_listener_t, tl_link));
tcps->tcps_default_cc_algo = cc_load_algo(CC_DEFAULT_ALGO_NAME);
VERIFY3P(tcps->tcps_default_cc_algo, !=, NULL);
tcpsig_init(tcps);
return (tcps);
}
void
tcp_ddi_g_destroy(void)
{
tcp_g_kstat_fini(tcp_g_kstat);
tcp_g_kstat = NULL;
bzero(&tcp_g_statistics, sizeof (tcp_g_statistics));
mutex_destroy(&tcp_random_lock);
kmem_cache_destroy(tcp_timercache);
kmem_cache_destroy(tcp_notsack_blk_cache);
netstack_unregister(NS_TCP);
}
static void
tcp_stack_fini(netstackid_t stackid, void *arg)
{
tcp_stack_t *tcps = (tcp_stack_t *)arg;
int i;
freeb(tcps->tcps_ixa_cleanup_mp);
tcps->tcps_ixa_cleanup_mp = NULL;
cv_destroy(&tcps->tcps_ixa_cleanup_ready_cv);
cv_destroy(&tcps->tcps_ixa_cleanup_done_cv);
mutex_destroy(&tcps->tcps_ixa_cleanup_lock);
mutex_enter(&tcps->tcps_reclaim_lock);
tcps->tcps_reclaim = B_FALSE;
mutex_exit(&tcps->tcps_reclaim_lock);
if (tcps->tcps_reclaim_tid != 0)
(void) untimeout(tcps->tcps_reclaim_tid);
mutex_destroy(&tcps->tcps_reclaim_lock);
tcp_listener_conf_cleanup(tcps);
for (i = 0; i < tcps->tcps_sc_cnt; i++)
kmem_free(tcps->tcps_sc[i], sizeof (tcp_stats_cpu_t));
kmem_free(tcps->tcps_sc, max_ncpus * sizeof (tcp_stats_cpu_t *));
kmem_free(tcps->tcps_propinfo_tbl,
tcp_propinfo_count * sizeof (mod_prop_info_t));
tcps->tcps_propinfo_tbl = NULL;
for (i = 0; i < TCP_BIND_FANOUT_SIZE; i++) {
ASSERT(tcps->tcps_bind_fanout[i].tf_tcp == NULL);
mutex_destroy(&tcps->tcps_bind_fanout[i].tf_lock);
}
for (i = 0; i < TCP_ACCEPTOR_FANOUT_SIZE; i++) {
ASSERT(tcps->tcps_acceptor_fanout[i].tf_tcp == NULL);
mutex_destroy(&tcps->tcps_acceptor_fanout[i].tf_lock);
}
kmem_free(tcps->tcps_bind_fanout, sizeof (tf_t) * TCP_BIND_FANOUT_SIZE);
tcps->tcps_bind_fanout = NULL;
kmem_free(tcps->tcps_acceptor_fanout, sizeof (tf_t) *
TCP_ACCEPTOR_FANOUT_SIZE);
tcps->tcps_acceptor_fanout = NULL;
mutex_destroy(&tcps->tcps_iss_key_lock);
mutex_destroy(&tcps->tcps_epriv_port_lock);
ip_drop_unregister(&tcps->tcps_dropper);
tcp_kstat2_fini(stackid, tcps->tcps_kstat);
tcps->tcps_kstat = NULL;
tcp_kstat_fini(stackid, tcps->tcps_mibkp);
tcps->tcps_mibkp = NULL;
tcpsig_fini(tcps);
ldi_ident_release(tcps->tcps_ldi_ident);
kmem_free(tcps, sizeof (*tcps));
}
static void
tcp_iss_init(tcp_t *tcp)
{
MD5_CTX context;
struct { uint32_t ports; in6_addr_t src; in6_addr_t dst; } arg;
uint32_t answer[4];
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
tcps->tcps_iss_incr_extra += (tcps->tcps_iss_incr >> 1);
tcp->tcp_iss = tcps->tcps_iss_incr_extra;
switch (tcps->tcps_strong_iss) {
case 2:
mutex_enter(&tcps->tcps_iss_key_lock);
context = tcps->tcps_iss_key;
mutex_exit(&tcps->tcps_iss_key_lock);
arg.ports = connp->conn_ports;
arg.src = connp->conn_laddr_v6;
arg.dst = connp->conn_faddr_v6;
MD5Update(&context, (uchar_t *)&arg, sizeof (arg));
MD5Final((uchar_t *)answer, &context);
tcp->tcp_iss += answer[0] ^ answer[1] ^ answer[2] ^ answer[3];
case 1:
tcp->tcp_iss += (gethrtime() >> ISS_NSEC_SHT) + tcp_random();
break;
default:
tcp->tcp_iss += (uint32_t)gethrestime_sec() *
tcps->tcps_iss_incr;
break;
}
tcp->tcp_valid_bits = TCP_ISS_VALID;
tcp->tcp_fss = tcp->tcp_iss - 1;
tcp->tcp_suna = tcp->tcp_iss;
tcp->tcp_snxt = tcp->tcp_iss + 1;
tcp->tcp_rexmit_nxt = tcp->tcp_snxt;
tcp->tcp_csuna = tcp->tcp_snxt;
}
void
tcp_setqfull(tcp_t *tcp)
{
tcp_stack_t *tcps = tcp->tcp_tcps;
conn_t *connp = tcp->tcp_connp;
if (tcp->tcp_closed)
return;
conn_setqfull(connp, &tcp->tcp_flow_stopped);
if (tcp->tcp_flow_stopped)
TCP_STAT(tcps, tcp_flwctl_on);
}
void
tcp_clrqfull(tcp_t *tcp)
{
conn_t *connp = tcp->tcp_connp;
if (tcp->tcp_closed)
return;
conn_clrqfull(connp, &tcp->tcp_flow_stopped);
}
static int
tcp_squeue_switch(int val)
{
int rval = SQ_FILL;
switch (val) {
case 1:
rval = SQ_NODRAIN;
break;
case 2:
rval = SQ_PROCESS;
break;
default:
break;
}
return (rval);
}
static void
tcp_squeue_add(squeue_t *sqp)
{
tcp_squeue_priv_t *tcp_time_wait = kmem_zalloc(
sizeof (tcp_squeue_priv_t), KM_SLEEP);
*squeue_getprivate(sqp, SQPRIVATE_TCP) = (intptr_t)tcp_time_wait;
if (tcp_free_list_max_cnt == 0) {
int tcp_ncpus = ((boot_max_ncpus == -1) ?
max_ncpus : boot_max_ncpus);
tcp_free_list_max_cnt = (freemem * PAGESIZE) /
(tcp_ncpus * sizeof (tcp_t) * 100);
}
tcp_time_wait->tcp_free_list_cnt = 0;
}
int
tcp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
boolean_t bind_to_req_port_only)
{
int error;
tcp_t *tcp = connp->conn_tcp;
if (tcp->tcp_state >= TCPS_BOUND) {
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
"tcp_bind: bad state, %d", tcp->tcp_state);
}
return (-TOUTSTATE);
}
error = tcp_bind_check(connp, sa, len, cr, bind_to_req_port_only);
if (error != 0)
return (error);
ASSERT(tcp->tcp_state == TCPS_BOUND);
tcp->tcp_conn_req_max = 0;
return (0);
}
int
tcp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
cred_t *cr, pid_t pid)
{
tcp_t *tcp = connp->conn_tcp;
sin_t *sin = (sin_t *)sa;
sin6_t *sin6 = (sin6_t *)sa;
ipaddr_t *dstaddrp;
in_port_t dstport;
uint_t srcid;
int error;
uint32_t mss;
mblk_t *syn_mp;
tcp_stack_t *tcps = tcp->tcp_tcps;
int32_t oldstate;
ip_xmit_attr_t *ixa = connp->conn_ixa;
oldstate = tcp->tcp_state;
switch (len) {
default:
return (EINVAL);
case sizeof (sin_t):
sin = (sin_t *)sa;
if (sin->sin_port == 0) {
return (-TBADADDR);
}
if (connp->conn_ipv6_v6only) {
return (EAFNOSUPPORT);
}
break;
case sizeof (sin6_t):
sin6 = (sin6_t *)sa;
if (sin6->sin6_port == 0) {
return (-TBADADDR);
}
break;
}
if (connp->conn_family == AF_INET6 &&
connp->conn_ipversion == IPV6_VERSION &&
IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
if (connp->conn_ipv6_v6only)
return (EADDRNOTAVAIL);
connp->conn_ipversion = IPV4_VERSION;
}
switch (tcp->tcp_state) {
case TCPS_LISTEN:
if (IPCL_IS_NONSTR(connp))
return (EOPNOTSUPP);
case TCPS_IDLE:
case TCPS_BOUND:
break;
default:
return (-TOUTSTATE);
}
if (connp->conn_cred != cr) {
crhold(cr);
crfree(connp->conn_cred);
connp->conn_cred = cr;
}
connp->conn_cpid = pid;
ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED));
ixa->ixa_cred = cr;
ixa->ixa_cpid = pid;
if (is_system_labeled()) {
ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
}
if (connp->conn_family == AF_INET6) {
if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
error = tcp_connect_ipv6(tcp, &sin6->sin6_addr,
sin6->sin6_port, sin6->sin6_flowinfo,
sin6->__sin6_src_id, sin6->sin6_scope_id);
} else {
if (!IN6_IS_ADDR_UNSPECIFIED(
&connp->conn_bound_addr_v6) &&
!IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
return (EADDRNOTAVAIL);
}
dstaddrp = &V4_PART_OF_V6((sin6->sin6_addr));
dstport = sin6->sin6_port;
srcid = sin6->__sin6_src_id;
error = tcp_connect_ipv4(tcp, dstaddrp, dstport,
srcid);
}
} else {
dstaddrp = &sin->sin_addr.s_addr;
dstport = sin->sin_port;
srcid = 0;
error = tcp_connect_ipv4(tcp, dstaddrp, dstport, srcid);
}
if (error != 0)
goto connect_failed;
CL_INET_CONNECT(connp, B_TRUE, error);
if (error != 0)
goto connect_failed;
TCPS_BUMP_MIB(tcps, tcpActiveOpens);
tcp->tcp_active_open = 1;
mss = tcp->tcp_mss - connp->conn_ht_iphc_len;
tcp->tcp_rwnd = connp->conn_rcvbuf;
tcp->tcp_rwnd = MAX(MSS_ROUNDUP(tcp->tcp_rwnd, mss),
tcps->tcps_recv_hiwat_minmss * mss);
connp->conn_rcvbuf = tcp->tcp_rwnd;
tcp_set_ws_value(tcp);
tcp->tcp_tcpha->tha_win = htons(tcp->tcp_rwnd >> tcp->tcp_rcv_ws);
if (tcp->tcp_rcv_ws > 0 || tcps->tcps_wscale_always)
tcp->tcp_snd_ws_ok = B_TRUE;
if (tcps->tcps_tstamp_always ||
(tcp->tcp_rcv_ws && tcps->tcps_tstamp_if_wscale)) {
tcp->tcp_snd_ts_ok = B_TRUE;
}
if (tcps->tcps_sack_permitted == 2) {
ASSERT(tcp->tcp_num_sack_blk == 0);
ASSERT(tcp->tcp_notsack_list == NULL);
tcp->tcp_snd_sack_ok = B_TRUE;
}
if (tcps->tcps_ecn_permitted == 2)
tcp->tcp_ecn_ok = B_TRUE;
DTRACE_TCP6(state__change, void, NULL, ip_xmit_attr_t *,
connp->conn_ixa, void, NULL, tcp_t *, tcp, void, NULL,
int32_t, TCPS_BOUND);
TCP_TIMER_RESTART(tcp, tcp->tcp_rto);
syn_mp = tcp_xmit_mp(tcp, NULL, 0, NULL, NULL,
tcp->tcp_iss, B_FALSE, NULL, B_FALSE);
if (syn_mp != NULL) {
SOCK_CONNID_BUMP(tcp->tcp_connid);
DTRACE_TCP5(connect__request, mblk_t *, NULL,
ip_xmit_attr_t *, connp->conn_ixa,
void_ip_t *, syn_mp->b_rptr, tcp_t *, tcp,
tcph_t *,
&syn_mp->b_rptr[connp->conn_ixa->ixa_ip_hdr_length]);
tcp_send_data(tcp, syn_mp);
}
if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
return (0);
connect_failed:
connp->conn_faddr_v6 = ipv6_all_zeros;
connp->conn_fport = 0;
tcp->tcp_state = oldstate;
if (tcp->tcp_conn.tcp_opts_conn_req != NULL)
tcp_close_mpp(&tcp->tcp_conn.tcp_opts_conn_req);
return (error);
}
int
tcp_do_listen(conn_t *connp, struct sockaddr *sa, socklen_t len,
int backlog, cred_t *cr, boolean_t bind_to_req_port_only)
{
tcp_t *tcp = connp->conn_tcp;
int error = 0;
tcp_stack_t *tcps = tcp->tcp_tcps;
int32_t oldstate;
ASSERT(cr != NULL);
if (tcp->tcp_state >= TCPS_BOUND) {
if ((tcp->tcp_state == TCPS_BOUND ||
tcp->tcp_state == TCPS_LISTEN) && backlog > 0) {
goto do_listen;
}
if (connp->conn_debug) {
(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
"tcp_listen: bad state, %d", tcp->tcp_state);
}
return (-TOUTSTATE);
} else {
sin6_t addr;
sin_t *sin;
sin6_t *sin6;
if (sa == NULL) {
ASSERT(IPCL_IS_NONSTR(connp));
if (connp->conn_family == AF_INET) {
len = sizeof (sin_t);
sin = (sin_t *)&addr;
*sin = sin_null;
sin->sin_family = AF_INET;
} else {
ASSERT(connp->conn_family == AF_INET6);
len = sizeof (sin6_t);
sin6 = (sin6_t *)&addr;
*sin6 = sin6_null;
sin6->sin6_family = AF_INET6;
}
sa = (struct sockaddr *)&addr;
}
error = tcp_bind_check(connp, sa, len, cr,
bind_to_req_port_only);
if (error)
return (error);
}
do_listen:
ASSERT(tcp->tcp_state == TCPS_BOUND || tcp->tcp_state == TCPS_LISTEN);
tcp->tcp_conn_req_max = backlog;
if (tcp->tcp_conn_req_max) {
if (tcp->tcp_conn_req_max < tcps->tcps_conn_req_min)
tcp->tcp_conn_req_max = tcps->tcps_conn_req_min;
if (tcp->tcp_conn_req_max > tcps->tcps_conn_req_max_q)
tcp->tcp_conn_req_max = tcps->tcps_conn_req_max_q;
if (tcp->tcp_state != TCPS_LISTEN) {
tcp->tcp_state = TCPS_LISTEN;
DTRACE_TCP6(state__change, void, NULL, ip_xmit_attr_t *,
connp->conn_ixa, void, NULL, tcp_t *, tcp,
void, NULL, int32_t, TCPS_BOUND);
tcp->tcp_eager_next_q0 = tcp->tcp_eager_prev_q0 = tcp;
tcp->tcp_eager_next_drop_q0 = tcp;
tcp->tcp_eager_prev_drop_q0 = tcp;
tcp->tcp_second_ctimer_threshold =
tcps->tcps_ip_abort_linterval;
}
}
connp->conn_recv = tcp_input_listener_unbound;
error = ip_laddr_fanout_insert(connp);
if (error != 0) {
oldstate = tcp->tcp_state;
tcp->tcp_state = TCPS_IDLE;
DTRACE_TCP6(state__change, void, NULL, ip_xmit_attr_t *,
connp->conn_ixa, void, NULL, tcp_t *, tcp, void, NULL,
int32_t, oldstate);
connp->conn_bound_addr_v6 = ipv6_all_zeros;
connp->conn_laddr_v6 = ipv6_all_zeros;
connp->conn_saddr_v6 = ipv6_all_zeros;
connp->conn_ports = 0;
if (connp->conn_anon_port) {
zone_t *zone;
zone = crgetzone(cr);
connp->conn_anon_port = B_FALSE;
(void) tsol_mlp_anon(zone, connp->conn_mlp_type,
connp->conn_proto, connp->conn_lport, B_FALSE);
}
connp->conn_mlp_type = mlptSingle;
tcp_bind_hash_remove(tcp);
return (error);
} else {
if (!list_is_empty(&tcps->tcps_listener_conf) &&
tcp->tcp_listen_cnt == NULL) {
tcp_listen_cnt_t *tlc;
uint32_t ratio;
ratio = tcp_find_listener_conf(tcps,
ntohs(connp->conn_lport));
if (ratio != 0) {
uint32_t mem_ratio, tot_buf;
tlc = kmem_alloc(sizeof (tcp_listen_cnt_t),
KM_SLEEP);
if ((tot_buf = connp->conn_rcvbuf +
connp->conn_sndbuf) < MB) {
mem_ratio = MB / tot_buf;
tlc->tlc_max = maxusers / ratio *
mem_ratio;
} else {
mem_ratio = tot_buf / MB;
tlc->tlc_max = maxusers / ratio /
mem_ratio;
}
if (tlc->tlc_max <= tcp_min_conn_listener)
tlc->tlc_max = tcp_min_conn_listener;
tlc->tlc_cnt = 1;
tlc->tlc_drop = 0;
tcp->tcp_listen_cnt = tlc;
}
}
}
return (error);
}