#include <sys/types.h>
#include <sys/strlog.h>
#include <sys/policy.h>
#include <sys/strsun.h>
#include <sys/squeue_impl.h>
#include <sys/squeue.h>
#include <inet/common.h>
#include <inet/ip.h>
#include <inet/tcp.h>
#include <inet/tcp_impl.h>
static boolean_t tcp_do_reclaim = B_TRUE;
static mblk_t *tcp_ioctl_abort_build_msg(tcp_ioc_abort_conn_t *, tcp_t *);
static void tcp_ioctl_abort_dump(tcp_ioc_abort_conn_t *);
static void tcp_ioctl_abort_handler(void *arg, mblk_t *mp, void *arg2,
ip_recv_attr_t *dummy);
static int tcp_ioctl_abort(tcp_ioc_abort_conn_t *, tcp_stack_t *tcps);
void tcp_ioctl_abort_conn(queue_t *, mblk_t *);
static int tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *, int, int *,
boolean_t, tcp_stack_t *);
#define TCP_AC_V4LADDR(acp) ((sin_t *)&(acp)->ac_local)
#define TCP_AC_V4RADDR(acp) ((sin_t *)&(acp)->ac_remote)
#define TCP_AC_V4LOCAL(acp) (TCP_AC_V4LADDR(acp)->sin_addr.s_addr)
#define TCP_AC_V4REMOTE(acp) (TCP_AC_V4RADDR(acp)->sin_addr.s_addr)
#define TCP_AC_V4LPORT(acp) (TCP_AC_V4LADDR(acp)->sin_port)
#define TCP_AC_V4RPORT(acp) (TCP_AC_V4RADDR(acp)->sin_port)
#define TCP_AC_V6LADDR(acp) ((sin6_t *)&(acp)->ac_local)
#define TCP_AC_V6RADDR(acp) ((sin6_t *)&(acp)->ac_remote)
#define TCP_AC_V6LOCAL(acp) (TCP_AC_V6LADDR(acp)->sin6_addr)
#define TCP_AC_V6REMOTE(acp) (TCP_AC_V6RADDR(acp)->sin6_addr)
#define TCP_AC_V6LPORT(acp) (TCP_AC_V6LADDR(acp)->sin6_port)
#define TCP_AC_V6RPORT(acp) (TCP_AC_V6RADDR(acp)->sin6_port)
#define TCP_AC_GET_ERRCODE(state, err) { \
switch ((state)) { \
case TCPS_SYN_SENT: \
case TCPS_SYN_RCVD: \
(err) = ECONNREFUSED; \
break; \
case TCPS_ESTABLISHED: \
case TCPS_FIN_WAIT_1: \
case TCPS_FIN_WAIT_2: \
case TCPS_CLOSE_WAIT: \
(err) = ECONNRESET; \
break; \
case TCPS_CLOSING: \
case TCPS_LAST_ACK: \
case TCPS_TIME_WAIT: \
(err) = 0; \
break; \
default: \
(err) = ENXIO; \
} \
}
#define TCP_AC_ADDR_MATCH(acp, connp, tcp) \
(((acp)->ac_local.ss_family == AF_INET) ? \
((TCP_AC_V4LOCAL((acp)) == INADDR_ANY || \
TCP_AC_V4LOCAL((acp)) == (connp)->conn_laddr_v4) && \
(TCP_AC_V4REMOTE((acp)) == INADDR_ANY || \
TCP_AC_V4REMOTE((acp)) == (connp)->conn_faddr_v4) && \
(TCP_AC_V4LPORT((acp)) == 0 || \
TCP_AC_V4LPORT((acp)) == (connp)->conn_lport) && \
(TCP_AC_V4RPORT((acp)) == 0 || \
TCP_AC_V4RPORT((acp)) == (connp)->conn_fport) && \
(acp)->ac_start <= (tcp)->tcp_state && \
(acp)->ac_end >= (tcp)->tcp_state) : \
((IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6LOCAL((acp))) || \
IN6_ARE_ADDR_EQUAL(&TCP_AC_V6LOCAL((acp)), \
&(connp)->conn_laddr_v6)) && \
(IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6REMOTE((acp))) || \
IN6_ARE_ADDR_EQUAL(&TCP_AC_V6REMOTE((acp)), \
&(connp)->conn_faddr_v6)) && \
(TCP_AC_V6LPORT((acp)) == 0 || \
TCP_AC_V6LPORT((acp)) == (connp)->conn_lport) && \
(TCP_AC_V6RPORT((acp)) == 0 || \
TCP_AC_V6RPORT((acp)) == (connp)->conn_fport) && \
(acp)->ac_start <= (tcp)->tcp_state && \
(acp)->ac_end >= (tcp)->tcp_state))
#define TCP_AC_MATCH(acp, connp, tcp) \
(((acp)->ac_zoneid == ALL_ZONES || \
(acp)->ac_zoneid == (connp)->conn_zoneid) ? \
TCP_AC_ADDR_MATCH(acp, connp, tcp) : 0)
static mblk_t *
tcp_ioctl_abort_build_msg(tcp_ioc_abort_conn_t *acp, tcp_t *tp)
{
mblk_t *mp;
tcp_ioc_abort_conn_t *tacp;
mp = allocb(sizeof (uint32_t) + sizeof (*acp), BPRI_LO);
if (mp == NULL)
return (NULL);
*((uint32_t *)mp->b_rptr) = TCP_IOC_ABORT_CONN;
tacp = (tcp_ioc_abort_conn_t *)((uchar_t *)mp->b_rptr +
sizeof (uint32_t));
tacp->ac_start = acp->ac_start;
tacp->ac_end = acp->ac_end;
tacp->ac_zoneid = acp->ac_zoneid;
if (acp->ac_local.ss_family == AF_INET) {
tacp->ac_local.ss_family = AF_INET;
tacp->ac_remote.ss_family = AF_INET;
TCP_AC_V4LOCAL(tacp) = tp->tcp_connp->conn_laddr_v4;
TCP_AC_V4REMOTE(tacp) = tp->tcp_connp->conn_faddr_v4;
TCP_AC_V4LPORT(tacp) = tp->tcp_connp->conn_lport;
TCP_AC_V4RPORT(tacp) = tp->tcp_connp->conn_fport;
} else {
tacp->ac_local.ss_family = AF_INET6;
tacp->ac_remote.ss_family = AF_INET6;
TCP_AC_V6LOCAL(tacp) = tp->tcp_connp->conn_laddr_v6;
TCP_AC_V6REMOTE(tacp) = tp->tcp_connp->conn_faddr_v6;
TCP_AC_V6LPORT(tacp) = tp->tcp_connp->conn_lport;
TCP_AC_V6RPORT(tacp) = tp->tcp_connp->conn_fport;
}
mp->b_wptr = (uchar_t *)mp->b_rptr + sizeof (uint32_t) + sizeof (*acp);
return (mp);
}
static void
tcp_ioctl_abort_dump(tcp_ioc_abort_conn_t *acp)
{
char lbuf[128];
char rbuf[128];
sa_family_t af;
in_port_t lport, rport;
ushort_t logflags;
af = acp->ac_local.ss_family;
if (af == AF_INET) {
(void) inet_ntop(af, (const void *)&TCP_AC_V4LOCAL(acp),
lbuf, 128);
(void) inet_ntop(af, (const void *)&TCP_AC_V4REMOTE(acp),
rbuf, 128);
lport = ntohs(TCP_AC_V4LPORT(acp));
rport = ntohs(TCP_AC_V4RPORT(acp));
} else {
(void) inet_ntop(af, (const void *)&TCP_AC_V6LOCAL(acp),
lbuf, 128);
(void) inet_ntop(af, (const void *)&TCP_AC_V6REMOTE(acp),
rbuf, 128);
lport = ntohs(TCP_AC_V6LPORT(acp));
rport = ntohs(TCP_AC_V6RPORT(acp));
}
logflags = SL_TRACE | SL_NOTE;
if (acp->ac_zoneid == GLOBAL_ZONEID || acp->ac_zoneid == ALL_ZONES)
logflags |= SL_CONSOLE;
(void) strlog(TCP_MOD_ID, 0, 1, logflags,
"TCP_IOC_ABORT_CONN: local = %s:%d, remote = %s:%d, "
"start = %d, end = %d\n", lbuf, lport, rbuf, rport,
acp->ac_start, acp->ac_end);
}
static void
tcp_ioctl_abort_handler(void *arg, mblk_t *mp, void *arg2,
ip_recv_attr_t *dummy)
{
conn_t *connp = (conn_t *)arg;
tcp_t *tcp = connp->conn_tcp;
tcp_ioc_abort_conn_t *acp;
if (tcp->tcp_state == TCPS_CLOSED ||
tcp->tcp_state == TCPS_BOUND) {
freemsg(mp);
return;
}
acp = (tcp_ioc_abort_conn_t *)(mp->b_rptr + sizeof (uint32_t));
if (tcp->tcp_state <= acp->ac_end) {
int errcode;
TCP_AC_GET_ERRCODE(tcp->tcp_state, errcode);
(void) tcp_clean_death(tcp, errcode);
}
freemsg(mp);
}
static int
tcp_ioctl_abort_bucket(tcp_ioc_abort_conn_t *acp, int index, int *count,
boolean_t exact, tcp_stack_t *tcps)
{
int nmatch, err = 0;
tcp_t *tcp;
MBLKP mp, last, listhead = NULL;
conn_t *tconnp;
connf_t *connfp;
ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
connfp = &ipst->ips_ipcl_conn_fanout[index];
startover:
nmatch = 0;
last = NULL;
mutex_enter(&connfp->connf_lock);
for (tconnp = connfp->connf_head; tconnp != NULL;
tconnp = tconnp->conn_next) {
tcp = tconnp->conn_tcp;
if (TCP_AC_MATCH(acp, tconnp, tcp)) {
CONN_INC_REF(tconnp);
mp = tcp_ioctl_abort_build_msg(acp, tcp);
if (mp == NULL) {
err = ENOMEM;
CONN_DEC_REF(tconnp);
break;
}
mp->b_prev = (mblk_t *)tcp;
if (listhead == NULL) {
listhead = mp;
last = mp;
} else {
last->b_next = mp;
last = mp;
}
nmatch++;
if (exact)
break;
}
if (nmatch >= 500)
break;
}
mutex_exit(&connfp->connf_lock);
while ((mp = listhead) != NULL) {
listhead = listhead->b_next;
tcp = (tcp_t *)mp->b_prev;
mp->b_next = mp->b_prev = NULL;
SQUEUE_ENTER_ONE(tcp->tcp_connp->conn_sqp, mp,
tcp_ioctl_abort_handler, tcp->tcp_connp, NULL,
SQ_FILL, SQTAG_TCP_ABORT_BUCKET);
}
*count += nmatch;
if (nmatch >= 500 && err == 0)
goto startover;
return (err);
}
static int
tcp_ioctl_abort(tcp_ioc_abort_conn_t *acp, tcp_stack_t *tcps)
{
sa_family_t af;
uint32_t ports;
uint16_t *pports;
int err = 0, count = 0;
boolean_t exact = B_FALSE;
int index = -1;
ushort_t logflags;
ip_stack_t *ipst = tcps->tcps_netstack->netstack_ip;
af = acp->ac_local.ss_family;
if (af == AF_INET) {
if (TCP_AC_V4REMOTE(acp) != INADDR_ANY &&
TCP_AC_V4LPORT(acp) != 0 && TCP_AC_V4RPORT(acp) != 0) {
pports = (uint16_t *)&ports;
pports[1] = TCP_AC_V4LPORT(acp);
pports[0] = TCP_AC_V4RPORT(acp);
exact = (TCP_AC_V4LOCAL(acp) != INADDR_ANY);
}
} else {
if (!IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6REMOTE(acp)) &&
TCP_AC_V6LPORT(acp) != 0 && TCP_AC_V6RPORT(acp) != 0) {
pports = (uint16_t *)&ports;
pports[1] = TCP_AC_V6LPORT(acp);
pports[0] = TCP_AC_V6RPORT(acp);
exact = !IN6_IS_ADDR_UNSPECIFIED(&TCP_AC_V6LOCAL(acp));
}
}
if (index != -1) {
err = tcp_ioctl_abort_bucket(acp, index,
&count, exact, tcps);
} else {
for (index = 0;
index < ipst->ips_ipcl_conn_fanout_size;
index++) {
err = tcp_ioctl_abort_bucket(acp, index,
&count, exact, tcps);
if (err != 0)
break;
}
}
logflags = SL_TRACE | SL_NOTE;
if (acp->ac_zoneid == GLOBAL_ZONEID || acp->ac_zoneid == ALL_ZONES)
logflags |= SL_CONSOLE;
(void) strlog(TCP_MOD_ID, 0, 1, logflags, "TCP_IOC_ABORT_CONN: "
"aborted %d connection%c\n", count, ((count > 1) ? 's' : ' '));
if (err == 0 && count == 0)
err = ENOENT;
return (err);
}
void
tcp_ioctl_abort_conn(queue_t *q, mblk_t *mp)
{
int err;
IOCP iocp;
MBLKP mp1;
sa_family_t laf, raf;
tcp_ioc_abort_conn_t *acp;
zone_t *zptr;
conn_t *connp = Q_TO_CONN(q);
zoneid_t zoneid = connp->conn_zoneid;
tcp_t *tcp = connp->conn_tcp;
tcp_stack_t *tcps = tcp->tcp_tcps;
iocp = (IOCP)mp->b_rptr;
if ((mp1 = mp->b_cont) == NULL ||
iocp->ioc_count != sizeof (tcp_ioc_abort_conn_t)) {
err = EINVAL;
goto out;
}
if (secpolicy_ip_config(iocp->ioc_cr, B_FALSE) != 0) {
err = EPERM;
goto out;
}
if (mp1->b_cont != NULL) {
freemsg(mp1->b_cont);
mp1->b_cont = NULL;
}
acp = (tcp_ioc_abort_conn_t *)mp1->b_rptr;
laf = acp->ac_local.ss_family;
raf = acp->ac_remote.ss_family;
if (acp->ac_zoneid != GLOBAL_ZONEID && acp->ac_zoneid != ALL_ZONES) {
zptr = zone_find_by_id(zoneid);
if (zptr != NULL) {
zone_rele(zptr);
} else {
err = EINVAL;
goto out;
}
}
if (tcps->tcps_netstack->netstack_stackid != GLOBAL_NETSTACKID)
acp->ac_zoneid = GLOBAL_ZONEID;
if (acp->ac_start < TCPS_SYN_SENT || acp->ac_end > TCPS_TIME_WAIT ||
acp->ac_start > acp->ac_end || laf != raf ||
(laf != AF_INET && laf != AF_INET6)) {
err = EINVAL;
goto out;
}
tcp_ioctl_abort_dump(acp);
err = tcp_ioctl_abort(acp, tcps);
out:
if (mp1 != NULL) {
freemsg(mp1);
mp->b_cont = NULL;
}
if (err != 0)
miocnak(q, mp, 0, err);
else
miocack(q, mp, 0, 0);
}
void
tcp_reclaim_timer(void *arg)
{
tcp_stack_t *tcps = (tcp_stack_t *)arg;
int64_t tot_conn = 0;
int i;
extern pgcnt_t lotsfree, needfree;
for (i = 0; i < tcps->tcps_sc_cnt; i++)
tot_conn += tcps->tcps_sc[i]->tcp_sc_conn_cnt;
mutex_enter(&tcps->tcps_reclaim_lock);
if (!tcps->tcps_reclaim) {
mutex_exit(&tcps->tcps_reclaim_lock);
return;
}
if ((freemem >= lotsfree + needfree) || tot_conn < maxusers) {
tcps->tcps_reclaim = B_FALSE;
tcps->tcps_reclaim_tid = 0;
} else {
tcps->tcps_reclaim_tid = timeout(tcp_reclaim_timer,
tcps, MSEC_TO_TICK(tcps->tcps_reclaim_period));
}
mutex_exit(&tcps->tcps_reclaim_lock);
}
void
tcp_conn_reclaim(void *arg)
{
netstack_handle_t nh;
netstack_t *ns;
tcp_stack_t *tcps;
extern pgcnt_t lotsfree, needfree;
if (!tcp_do_reclaim)
return;
if (freemem >= lotsfree + needfree)
return;
netstack_next_init(&nh);
while ((ns = netstack_next(&nh)) != NULL) {
int i;
int64_t tot_conn = 0;
if ((tcps = ns->netstack_tcp) == NULL) {
netstack_rele(ns);
continue;
}
for (i = 0; i < tcps->tcps_sc_cnt; i++)
tot_conn += tcps->tcps_sc[i]->tcp_sc_conn_cnt;
if (tot_conn < maxusers) {
netstack_rele(ns);
continue;
}
mutex_enter(&tcps->tcps_reclaim_lock);
if (!tcps->tcps_reclaim) {
tcps->tcps_reclaim = B_TRUE;
tcps->tcps_reclaim_tid = timeout(tcp_reclaim_timer,
tcps, MSEC_TO_TICK(tcps->tcps_reclaim_period));
TCP_STAT(tcps, tcp_reclaim_cnt);
}
mutex_exit(&tcps->tcps_reclaim_lock);
netstack_rele(ns);
}
netstack_next_fini(&nh);
}
uint32_t
tcp_find_listener_conf(tcp_stack_t *tcps, in_port_t port)
{
tcp_listener_t *tl;
uint32_t ratio = 0;
mutex_enter(&tcps->tcps_listener_conf_lock);
for (tl = list_head(&tcps->tcps_listener_conf); tl != NULL;
tl = list_next(&tcps->tcps_listener_conf, tl)) {
if (tl->tl_port == port) {
ratio = tl->tl_ratio;
break;
}
}
mutex_exit(&tcps->tcps_listener_conf_lock);
return (ratio);
}
void
tcp_listener_conf_cleanup(tcp_stack_t *tcps)
{
tcp_listener_t *tl;
mutex_enter(&tcps->tcps_listener_conf_lock);
while ((tl = list_head(&tcps->tcps_listener_conf)) != NULL) {
list_remove(&tcps->tcps_listener_conf, tl);
kmem_free(tl, sizeof (tcp_listener_t));
}
mutex_destroy(&tcps->tcps_listener_conf_lock);
list_destroy(&tcps->tcps_listener_conf);
}
void
tcp_stack_cpu_add(tcp_stack_t *tcps, processorid_t cpu_seqid)
{
int i;
if (cpu_seqid < tcps->tcps_sc_cnt)
return;
for (i = tcps->tcps_sc_cnt; i <= cpu_seqid; i++) {
ASSERT(tcps->tcps_sc[i] == NULL);
tcps->tcps_sc[i] = kmem_zalloc(sizeof (tcp_stats_cpu_t),
KM_SLEEP);
}
membar_producer();
tcps->tcps_sc_cnt = cpu_seqid + 1;
}
char *
tcp_display(tcp_t *tcp, char *sup_buf, char format)
{
char buf1[30];
static char priv_buf[INET6_ADDRSTRLEN * 2 + 80];
char *buf;
char *cp;
in6_addr_t local, remote;
char local_addrbuf[INET6_ADDRSTRLEN];
char remote_addrbuf[INET6_ADDRSTRLEN];
conn_t *connp;
if (sup_buf != NULL)
buf = sup_buf;
else
buf = priv_buf;
if (tcp == NULL)
return ("NULL_TCP");
connp = tcp->tcp_connp;
switch (tcp->tcp_state) {
case TCPS_CLOSED:
cp = "TCP_CLOSED";
break;
case TCPS_IDLE:
cp = "TCP_IDLE";
break;
case TCPS_BOUND:
cp = "TCP_BOUND";
break;
case TCPS_LISTEN:
cp = "TCP_LISTEN";
break;
case TCPS_SYN_SENT:
cp = "TCP_SYN_SENT";
break;
case TCPS_SYN_RCVD:
cp = "TCP_SYN_RCVD";
break;
case TCPS_ESTABLISHED:
cp = "TCP_ESTABLISHED";
break;
case TCPS_CLOSE_WAIT:
cp = "TCP_CLOSE_WAIT";
break;
case TCPS_FIN_WAIT_1:
cp = "TCP_FIN_WAIT_1";
break;
case TCPS_CLOSING:
cp = "TCP_CLOSING";
break;
case TCPS_LAST_ACK:
cp = "TCP_LAST_ACK";
break;
case TCPS_FIN_WAIT_2:
cp = "TCP_FIN_WAIT_2";
break;
case TCPS_TIME_WAIT:
cp = "TCP_TIME_WAIT";
break;
default:
(void) mi_sprintf(buf1, "TCPUnkState(%d)", tcp->tcp_state);
cp = buf1;
break;
}
switch (format) {
case DISP_ADDR_AND_PORT:
if (connp->conn_ipversion == IPV4_VERSION) {
IN6_IPADDR_TO_V4MAPPED(connp->conn_laddr_v4, &local);
IN6_IPADDR_TO_V4MAPPED(connp->conn_faddr_v4, &remote);
} else {
local = connp->conn_laddr_v6;
remote = connp->conn_faddr_v6;
}
(void) inet_ntop(AF_INET6, &local, local_addrbuf,
sizeof (local_addrbuf));
(void) inet_ntop(AF_INET6, &remote, remote_addrbuf,
sizeof (remote_addrbuf));
(void) mi_sprintf(buf, "[%s.%u, %s.%u] %s",
local_addrbuf, ntohs(connp->conn_lport), remote_addrbuf,
ntohs(connp->conn_fport), cp);
break;
case DISP_PORT_ONLY:
default:
(void) mi_sprintf(buf, "[%u, %u] %s",
ntohs(connp->conn_lport), ntohs(connp->conn_fport), cp);
break;
}
return (buf);
}