#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_kern_tls.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/arb.h>
#include <sys/domain.h>
#ifdef TCP_HHOOK
#include <sys/hhook.h>
#endif
#include <sys/kernel.h>
#ifdef KERN_TLS
#include <sys/ktls.h>
#endif
#include <sys/lock.h>
#include <sys/mbuf.h>
#include <sys/mutex.h>
#include <sys/protosw.h>
#include <sys/qmath.h>
#include <sys/sdt.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/stats.h>
#include <net/if.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#include <netinet/ip_options.h>
#ifdef INET6
#include <netinet6/in6_pcb.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#endif
#include <netinet/tcp.h>
#define TCPOUTFLAGS
#include <netinet/tcp_fsm.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_var.h>
#include <netinet/tcp_log_buf.h>
#include <netinet/tcp_syncache.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcpip.h>
#include <netinet/cc/cc.h>
#include <netinet/tcp_fastopen.h>
#ifdef TCP_OFFLOAD
#include <netinet/tcp_offload.h>
#endif
#include <netinet/tcp_ecn.h>
#include <netipsec/ipsec_support.h>
#include <netinet/udp.h>
#include <netinet/udp_var.h>
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
VNET_DEFINE(int, path_mtu_discovery) = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, path_mtu_discovery, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(path_mtu_discovery), 1,
"Enable Path MTU Discovery");
VNET_DEFINE(int, tcp_do_tso) = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, tso, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_tso), 0,
"Enable TCP Segmentation Offload");
VNET_DEFINE(int, tcp_sendspace) = 1024*32;
#define V_tcp_sendspace VNET(tcp_sendspace)
SYSCTL_INT(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_sendspace), 0, "Initial send socket buffer size");
VNET_DEFINE(int, tcp_do_autosndbuf) = 1;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_do_autosndbuf), 0,
"Enable automatic send buffer sizing");
VNET_DEFINE(int, tcp_autosndbuf_inc) = 8*1024;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_inc, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autosndbuf_inc), 0,
"Incrementor step size of automatic send buffer");
VNET_DEFINE(int, tcp_autosndbuf_max) = 8*1024*1024;
SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_max, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_autosndbuf_max), 0,
"Max size of automatic send buffer");
VNET_DEFINE(int, tcp_sendbuf_auto_lowat) = 0;
#define V_tcp_sendbuf_auto_lowat VNET(tcp_sendbuf_auto_lowat)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(tcp_sendbuf_auto_lowat), 0,
"Modify threshold for auto send buffer growth to account for SO_SNDLOWAT");
#define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags) \
KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\
tcp_timer_active((tp), TT_REXMT) || \
tcp_timer_active((tp), TT_PERSIST), \
("neither rexmt nor persist timer is set"))
#ifdef TCP_HHOOK
void
hhook_run_tcp_est_out(struct tcpcb *tp, struct tcphdr *th,
struct tcpopt *to, uint32_t len, int tso)
{
struct tcp_hhook_data hhook_data;
if (V_tcp_hhh[HHOOK_TCP_EST_OUT]->hhh_nhooks > 0) {
hhook_data.tp = tp;
hhook_data.th = th;
hhook_data.to = to;
hhook_data.len = len;
hhook_data.tso = tso;
hhook_run_hooks(V_tcp_hhh[HHOOK_TCP_EST_OUT], &hhook_data,
&tp->t_osd);
}
}
#endif
void
cc_after_idle(struct tcpcb *tp)
{
INP_WLOCK_ASSERT(tptoinpcb(tp));
if (CC_ALGO(tp)->after_idle != NULL)
CC_ALGO(tp)->after_idle(&tp->t_ccv);
}
int
tcp_default_output(struct tcpcb *tp)
{
struct socket *so = tptosocket(tp);
struct inpcb *inp = tptoinpcb(tp);
int32_t len;
uint32_t recwin, sendwin;
uint16_t flags;
int off, error = 0;
u_int if_hw_tsomaxsegcount = 0;
u_int if_hw_tsomaxsegsize = 0;
struct mbuf *m;
struct ip *ip = NULL;
struct tcphdr *th;
u_char opt[TCP_MAXOLEN];
unsigned ipoptlen, optlen, hdrlen, ulen;
unsigned ipsec_optlen = 0;
int idle, sendalot, curticks;
int sack_bytes_rxmt;
struct sackhole *p;
int tso, mtu;
struct tcpopt to;
struct udphdr *udp = NULL;
struct tcp_log_buffer *lgb;
unsigned int wanted_cookie = 0;
unsigned int dont_sendalot = 0;
bool sack_rxmit;
#ifdef INET6
struct ip6_hdr *ip6 = NULL;
const bool isipv6 = (inp->inp_vflag & INP_IPV6) != 0;
#endif
#ifdef KERN_TLS
const bool hw_tls = tp->t_nic_ktls_xmit != 0;
#else
const bool hw_tls = false;
#endif
NET_EPOCH_ASSERT();
INP_WLOCK_ASSERT(inp);
#ifdef TCP_OFFLOAD
if (tp->t_flags & TF_TOE)
return (tcp_offload_output(tp));
#endif
if ((tp->t_flags & TF_FASTOPEN) &&
((tp->t_state == TCPS_SYN_SENT) ||
(tp->t_state == TCPS_SYN_RECEIVED)) &&
SEQ_GT(tp->snd_max, tp->snd_una) &&
(tp->snd_nxt != tp->snd_una))
return (0);
idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
if (idle && (((ticks - tp->t_rcvtime) >= tp->t_rxtcur) ||
(tp->t_sndtime && ((ticks - tp->t_sndtime) >= tp->t_rxtcur))))
cc_after_idle(tp);
tp->t_flags &= ~TF_LASTIDLE;
if (idle) {
if (tp->t_flags & TF_MORETOCOME) {
tp->t_flags |= TF_LASTIDLE;
idle = 0;
}
}
again:
sendwin = 0;
if ((tp->t_flags & TF_SACK_PERMIT) &&
(tp->sackhint.nexthole != NULL) &&
!IN_FASTRECOVERY(tp->t_flags)) {
sendwin = tcp_sack_adjust(tp);
}
sendalot = 0;
tso = 0;
mtu = 0;
off = tp->snd_nxt - tp->snd_una;
sendwin = min(tp->snd_wnd, tp->snd_cwnd + sendwin);
flags = tcp_outflags[tp->t_state];
sack_bytes_rxmt = 0;
len = 0;
if ((tp->t_flags & TF_SACK_PERMIT) &&
(IN_FASTRECOVERY(tp->t_flags) ||
(SEQ_LT(tp->snd_nxt, tp->snd_max) && (tp->t_dupacks >= tcprexmtthresh))) &&
(p = tcp_sack_output(tp, &sack_bytes_rxmt))) {
int32_t cwin;
if (IN_FASTRECOVERY(tp->t_flags)) {
cwin = imax(sendwin - tcp_compute_pipe(tp), 0);
} else {
cwin = imax(sendwin - off, 0);
}
if (SEQ_GT(p->end, tp->snd_recover)) {
if (SEQ_GEQ(p->rxmit, tp->snd_recover)) {
p = NULL;
sack_rxmit = false;
goto after_sack_rexmit;
} else {
len = SEQ_SUB(tp->snd_recover, p->rxmit);
if (cwin <= len) {
len = cwin;
} else {
sendalot = 1;
}
}
} else {
len = SEQ_SUB(p->end, p->rxmit);
if (cwin <= len) {
len = cwin;
} else {
sendalot = 1;
}
}
if (len > 0) {
off = SEQ_SUB(p->rxmit, tp->snd_una);
KASSERT(off >= 0,("%s: sack block to the left of una : %d",
__func__, off));
}
sack_rxmit = true;
} else {
p = NULL;
sack_rxmit = false;
}
after_sack_rexmit:
if (tp->t_flags & TF_NEEDFIN)
flags |= TH_FIN;
if (tp->t_flags & TF_NEEDSYN)
flags |= TH_SYN;
SOCK_SENDBUF_LOCK(so);
if (tp->t_flags & TF_FORCEDATA) {
if (sendwin == 0) {
if (off < sbused(&so->so_snd))
flags &= ~TH_FIN;
sendwin = 1;
} else {
tcp_timer_activate(tp, TT_PERSIST, 0);
tp->t_rxtshift = 0;
}
}
if (!sack_rxmit) {
if ((sack_bytes_rxmt == 0) || SEQ_LT(tp->snd_nxt, tp->snd_max)) {
len = imin(sbavail(&so->so_snd), sendwin) - off;
} else {
len = imax(
imin(sbavail(&so->so_snd), sendwin) -
imax(tcp_compute_pipe(tp), off), 0);
}
}
if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) {
if (tp->t_state != TCPS_SYN_RECEIVED)
flags &= ~TH_SYN;
if ((tp->t_flags & TF_FASTOPEN) &&
(tp->t_state == TCPS_SYN_RECEIVED))
flags &= ~TH_SYN;
off--, len++;
}
if ((flags & TH_SYN) && (tp->t_flags & TF_NOOPT)) {
len = 0;
flags &= ~TH_FIN;
}
if ((tp->t_flags & TF_FASTOPEN) &&
(((flags & TH_SYN) && (tp->t_rxtshift > 0)) ||
((tp->t_state == TCPS_SYN_SENT) &&
(tp->t_tfo_client_cookie_len == 0)) ||
(flags & TH_RST)))
len = 0;
if ((flags & TH_SYN) && !(tp->t_flags & TF_FASTOPEN)) {
len = 0;
}
if (len <= 0) {
len = 0;
if ((sendwin == 0) && (TCPS_HAVEESTABLISHED(tp->t_state)) &&
(off < (int) sbavail(&so->so_snd)) &&
!tcp_timer_active(tp, TT_PERSIST)) {
tcp_timer_activate(tp, TT_REXMT, 0);
tp->t_rxtshift = 0;
tp->snd_nxt = tp->snd_una;
if (!tcp_timer_active(tp, TT_PERSIST))
tcp_setpersist(tp);
}
}
KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
tcp_sndbuf_autoscale(tp, so, sendwin);
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
#ifdef INET6
if (isipv6 && IPSEC_ENABLED(ipv6))
ipsec_optlen = IPSEC_HDRSIZE(ipv6, inp);
#ifdef INET
else
#endif
#endif
#ifdef INET
if (IPSEC_ENABLED(ipv4))
ipsec_optlen = IPSEC_HDRSIZE(ipv4, inp);
#endif
#endif
#ifdef INET6
if (isipv6)
ipoptlen = ip6_optlen(inp);
else
#endif
if (inp->inp_options)
ipoptlen = inp->inp_options->m_len -
offsetof(struct ipoption, ipopt_list);
else
ipoptlen = 0;
ipoptlen += ipsec_optlen;
if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg &&
(tp->t_port == 0) &&
((tp->t_flags & TF_SIGNATURE) == 0) &&
(!sack_rxmit || V_tcp_sack_tso) &&
(ipoptlen == 0 || (ipoptlen == ipsec_optlen &&
(tp->t_flags2 & TF2_IPSEC_TSO) != 0)) &&
!(flags & TH_SYN))
tso = 1;
if (SEQ_LT((sack_rxmit ? p->rxmit : tp->snd_nxt) + len,
tp->snd_una + sbused(&so->so_snd))) {
flags &= ~TH_FIN;
}
recwin = lmin(lmax(sbspace(&so->so_rcv), 0),
(long)TCP_MAXWIN << tp->rcv_scale);
if (len) {
if (len >= tp->t_maxseg)
goto send;
if ((((tp->t_flags & TF_SIGNATURE) ?
PADTCPOLEN(TCPOLEN_SIGNATURE) : 0) +
((tp->t_flags & TF_RCVD_TSTMP) ?
PADTCPOLEN(TCPOLEN_TIMESTAMP) : 0) +
len) >= tp->t_maxseg)
goto send;
if (!(tp->t_flags & TF_MORETOCOME) &&
(idle || (tp->t_flags & TF_NODELAY)) &&
(uint32_t)len + (uint32_t)off >= sbavail(&so->so_snd) &&
(tp->t_flags & TF_NOPUSH) == 0) {
goto send;
}
if (tp->t_flags & TF_FORCEDATA)
goto send;
if (len >= tp->max_sndwnd / 2 && tp->max_sndwnd > 0)
goto send;
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
goto send;
if (sack_rxmit)
goto send;
}
if (recwin > 0 && !(tp->t_flags & TF_NEEDSYN) &&
!(tp->t_flags & TF_DELACK) &&
!TCPS_HAVERCVDFIN(tp->t_state)) {
int32_t adv;
int oldwin;
adv = recwin;
if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) {
oldwin = (tp->rcv_adv - tp->rcv_nxt);
if (adv > oldwin)
adv -= oldwin;
else
adv = 0;
} else
oldwin = 0;
if (oldwin >> tp->rcv_scale >= (adv + oldwin) >> tp->rcv_scale)
goto dontupdate;
if (adv >= (int32_t)(2 * tp->t_maxseg) &&
(adv >= (int32_t)(so->so_rcv.sb_hiwat / 4) ||
recwin <= (so->so_rcv.sb_hiwat / 8) ||
so->so_rcv.sb_hiwat <= 8 * tp->t_maxseg ||
adv >= TCP_MAXWIN << tp->rcv_scale))
goto send;
if (2 * adv >= (int32_t)so->so_rcv.sb_hiwat)
goto send;
}
dontupdate:
if (tp->t_flags & TF_ACKNOW)
goto send;
if ((flags & TH_RST) ||
((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0))
goto send;
if (SEQ_GT(tp->snd_up, tp->snd_una))
goto send;
if (flags & TH_FIN &&
((tp->t_flags & TF_SENTFIN) == 0 || tp->snd_nxt == tp->snd_una))
goto send;
if ((tp->t_flags & TF_SACK_PERMIT) &&
SEQ_GT(tp->snd_max, tp->snd_una) &&
!tcp_timer_active(tp, TT_REXMT) &&
!tcp_timer_active(tp, TT_PERSIST)) {
tcp_timer_activate(tp, TT_REXMT, TP_RXTCUR(tp));
goto just_return;
}
if (sbavail(&so->so_snd) && !tcp_timer_active(tp, TT_REXMT) &&
!tcp_timer_active(tp, TT_PERSIST)) {
tp->t_rxtshift = 0;
tcp_setpersist(tp);
}
just_return:
SOCK_SENDBUF_UNLOCK(so);
return (0);
send:
SOCK_SENDBUF_LOCK_ASSERT(so);
if (len > 0) {
if (len >= tp->t_maxseg)
tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
else
tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
}
optlen = 0;
#ifdef INET6
if (isipv6)
hdrlen = sizeof (struct ip6_hdr) + sizeof (struct tcphdr);
else
#endif
hdrlen = sizeof (struct tcpiphdr);
if (flags & TH_SYN) {
tp->snd_nxt = tp->iss;
}
to.to_flags = 0;
if ((tp->t_flags & TF_NOOPT) == 0) {
if (flags & TH_SYN) {
to.to_mss = tcp_mssopt(&inp->inp_inc);
if (tp->t_port)
to.to_mss -= V_tcp_udp_tunneling_overhead;
to.to_flags |= TOF_MSS;
if ((tp->t_flags & TF_FASTOPEN) &&
(tp->t_rxtshift == 0)) {
if (tp->t_state == TCPS_SYN_RECEIVED) {
to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN;
to.to_tfo_cookie =
(u_int8_t *)&tp->t_tfo_cookie.server;
to.to_flags |= TOF_FASTOPEN;
wanted_cookie = 1;
} else if (tp->t_state == TCPS_SYN_SENT) {
to.to_tfo_len =
tp->t_tfo_client_cookie_len;
to.to_tfo_cookie =
tp->t_tfo_cookie.client;
to.to_flags |= TOF_FASTOPEN;
wanted_cookie = 1;
dont_sendalot = 1;
}
}
}
if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) {
to.to_wscale = tp->request_r_scale;
to.to_flags |= TOF_SCALE;
}
if ((tp->t_flags & TF_RCVD_TSTMP) ||
((flags & TH_SYN) && (tp->t_flags & TF_REQ_TSTMP))) {
curticks = tcp_ts_getticks();
to.to_tsval = curticks + tp->ts_offset;
to.to_tsecr = tp->ts_recent;
to.to_flags |= TOF_TS;
if (tp->t_rxtshift == 1)
tp->t_badrxtwin = curticks;
}
if (tp->rfbuf_ts == 0 &&
(so->so_rcv.sb_flags & SB_AUTOSIZE))
tp->rfbuf_ts = tcp_ts_getticks();
if (tp->t_flags & TF_SACK_PERMIT) {
if (flags & TH_SYN)
to.to_flags |= TOF_SACKPERM;
else if (TCPS_HAVEESTABLISHED(tp->t_state) &&
tp->rcv_numsacks > 0) {
to.to_flags |= TOF_SACK;
to.to_nsacks = tp->rcv_numsacks;
to.to_sacks = (u_char *)tp->sackblks;
}
}
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
if (tp->t_flags & TF_SIGNATURE)
to.to_flags |= TOF_SIGNATURE;
#endif
hdrlen += optlen = tcp_addoptions(&to, opt);
if ((tp->t_flags & TF_FASTOPEN) && wanted_cookie &&
!(to.to_flags & TOF_FASTOPEN))
len = 0;
}
if (tp->t_port) {
if (V_tcp_udp_tunneling_port == 0) {
SOCK_SENDBUF_UNLOCK(so);
return (EHOSTUNREACH);
}
hdrlen += sizeof(struct udphdr);
}
if (len + optlen + ipoptlen > tp->t_maxseg) {
flags &= ~TH_FIN;
if (tso) {
u_int if_hw_tsomax;
u_int moff;
int max_len;
if_hw_tsomax = tp->t_tsomax;
if_hw_tsomaxsegcount = tp->t_tsomaxsegcount;
if_hw_tsomaxsegsize = tp->t_tsomaxsegsize;
KASSERT(ipoptlen == ipsec_optlen,
("%s: TSO can't do IP options", __func__));
if (if_hw_tsomax != 0) {
max_len = if_hw_tsomax - hdrlen -
ipsec_optlen - max_linkhdr;
if (max_len <= 0) {
len = 0;
} else if (len > max_len) {
sendalot = 1;
len = max_len;
}
}
max_len = tp->t_maxseg - optlen - ipsec_optlen;
if (((uint32_t)off + (uint32_t)len) <
sbavail(&so->so_snd)) {
moff = len % max_len;
if (moff != 0) {
len -= moff;
sendalot = 1;
}
}
if (len <= max_len) {
len = max_len;
sendalot = 1;
tso = 0;
}
if (tp->t_flags & TF_NEEDFIN)
sendalot = 1;
} else {
if (optlen + ipoptlen >= tp->t_maxseg) {
SOCK_SENDBUF_UNLOCK(so);
error = EMSGSIZE;
sack_rxmit = false;
goto out;
}
len = tp->t_maxseg - optlen - ipoptlen;
sendalot = 1;
if (dont_sendalot)
sendalot = 0;
}
} else
tso = 0;
KASSERT(len + hdrlen + ipoptlen <= IP_MAXPACKET,
("%s: len > IP_MAXPACKET", __func__));
#ifdef INET6
if (max_linkhdr + hdrlen > MCLBYTES)
#else
if (max_linkhdr + hdrlen > MHLEN)
#endif
panic("tcphdr too big");
KASSERT(len >= 0, ("[%s:%d]: len < 0", __func__, __LINE__));
if (len) {
struct mbuf *mb;
struct sockbuf *msb;
u_int moff;
if ((tp->t_flags & TF_FORCEDATA) && len == 1) {
TCPSTAT_INC(tcps_sndprobe);
#ifdef STATS
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
stats_voi_update_abs_u32(tp->t_stats,
VOI_TCP_RETXPB, len);
else
stats_voi_update_abs_u64(tp->t_stats,
VOI_TCP_TXPB, len);
#endif
} else if (SEQ_LT(tp->snd_nxt, tp->snd_max) || sack_rxmit) {
tp->t_sndrexmitpack++;
TCPSTAT_INC(tcps_sndrexmitpack);
TCPSTAT_ADD(tcps_sndrexmitbyte, len);
if (sack_rxmit) {
TCPSTAT_INC(tcps_sack_rexmits);
if (tso) {
TCPSTAT_INC(tcps_sack_rexmits_tso);
}
TCPSTAT_ADD(tcps_sack_rexmit_bytes, len);
}
#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RETXPB,
len);
#endif
} else {
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
#ifdef STATS
stats_voi_update_abs_u64(tp->t_stats, VOI_TCP_TXPB,
len);
#endif
}
#ifdef INET6
if (MHLEN < hdrlen + max_linkhdr)
m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
else
#endif
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
SOCK_SENDBUF_UNLOCK(so);
error = ENOBUFS;
sack_rxmit = false;
goto out;
}
m->m_data += max_linkhdr;
m->m_len = hdrlen;
mb = sbsndptr_noadv(&so->so_snd, off, &moff);
if (len <= MHLEN - hdrlen - max_linkhdr && !hw_tls) {
m_copydata(mb, moff, len,
mtod(m, caddr_t) + hdrlen);
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
sbsndptr_adv(&so->so_snd, mb, len);
m->m_len += len;
} else {
int32_t old_len;
if (SEQ_LT(tp->snd_nxt, tp->snd_max))
msb = NULL;
else
msb = &so->so_snd;
old_len = len;
m->m_next = tcp_m_copym(mb, moff,
&len, if_hw_tsomaxsegcount,
if_hw_tsomaxsegsize, msb, hw_tls);
if (old_len != len)
flags &= ~TH_FIN;
if (len <= (tp->t_maxseg - optlen)) {
tso = 0;
}
if (m->m_next == NULL) {
SOCK_SENDBUF_UNLOCK(so);
(void) m_free(m);
error = ENOBUFS;
sack_rxmit = false;
goto out;
}
}
if (((uint32_t)off + (uint32_t)len == sbused(&so->so_snd)) &&
!(flags & TH_SYN))
flags |= TH_PUSH;
SOCK_SENDBUF_UNLOCK(so);
} else {
SOCK_SENDBUF_UNLOCK(so);
if (tp->t_flags & TF_ACKNOW)
TCPSTAT_INC(tcps_sndacks);
else if (flags & (TH_SYN|TH_FIN|TH_RST))
TCPSTAT_INC(tcps_sndctrl);
else if (SEQ_GT(tp->snd_up, tp->snd_una))
TCPSTAT_INC(tcps_sndurg);
else
TCPSTAT_INC(tcps_sndwinup);
m = m_gethdr(M_NOWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
sack_rxmit = false;
goto out;
}
#ifdef INET6
if (isipv6 && (MHLEN < hdrlen + max_linkhdr) &&
MHLEN >= hdrlen) {
M_ALIGN(m, hdrlen);
} else
#endif
m->m_data += max_linkhdr;
m->m_len = hdrlen;
}
SOCK_SENDBUF_UNLOCK_ASSERT(so);
m->m_pkthdr.rcvif = (struct ifnet *)0;
#ifdef MAC
mac_inpcb_create_mbuf(inp, m);
#endif
#ifdef INET6
if (isipv6) {
ip6 = mtod(m, struct ip6_hdr *);
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip6 + sizeof(struct ip6_hdr));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
udp->uh_dport = tp->t_port;
ulen = hdrlen + len - sizeof(struct ip6_hdr);
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
} else {
th = (struct tcphdr *)(ip6 + 1);
}
tcpip_fillheaders(inp, tp->t_port, ip6, th);
} else
#endif
{
ip = mtod(m, struct ip *);
if (tp->t_port) {
udp = (struct udphdr *)((caddr_t)ip + sizeof(struct ip));
udp->uh_sport = htons(V_tcp_udp_tunneling_port);
udp->uh_dport = tp->t_port;
ulen = hdrlen + len - sizeof(struct ip);
udp->uh_ulen = htons(ulen);
th = (struct tcphdr *)(udp + 1);
} else
th = (struct tcphdr *)(ip + 1);
tcpip_fillheaders(inp, tp->t_port, ip, th);
}
if (flags & TH_FIN && tp->t_flags & TF_SENTFIN &&
tp->snd_nxt == tp->snd_max)
tp->snd_nxt--;
if (tp->t_state == TCPS_SYN_SENT && V_tcp_do_ecn) {
flags |= tcp_ecn_output_syn_sent(tp);
}
if ((TCPS_HAVERCVDSYN(tp->t_state)) &&
(tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))) {
int ect = tcp_ecn_output_established(tp, &flags, len, sack_rxmit);
if ((tp->t_state == TCPS_SYN_RECEIVED) &&
(tp->t_flags2 & TF2_ECN_SND_ECE))
tp->t_flags2 &= ~TF2_ECN_SND_ECE;
#ifdef INET6
if (isipv6) {
ip6->ip6_flow &= ~htonl(IPTOS_ECN_MASK << IPV6_FLOWLABEL_LEN);
ip6->ip6_flow |= htonl(ect << IPV6_FLOWLABEL_LEN);
}
else
#endif
{
ip->ip_tos &= ~IPTOS_ECN_MASK;
ip->ip_tos |= ect;
}
}
if (!sack_rxmit) {
if (len || (flags & (TH_SYN|TH_FIN)) ||
tcp_timer_active(tp, TT_PERSIST))
th->th_seq = htonl(tp->snd_nxt);
else
th->th_seq = htonl(tp->snd_max);
} else {
th->th_seq = htonl(p->rxmit);
p->rxmit += len;
if ((tp->t_flags & TF_LRD) && SEQ_GEQ(p->rxmit, p->end))
p->rxmit = SEQ_MAX(p->rxmit, tp->snd_recover);
tp->sackhint.sack_bytes_rexmit += len;
}
if (IN_RECOVERY(tp->t_flags)) {
tp->sackhint.prr_out += len;
}
th->th_ack = htonl(tp->rcv_nxt);
if (optlen) {
bcopy(opt, th + 1, optlen);
th->th_off = (sizeof (struct tcphdr) + optlen) >> 2;
}
if (flags & TH_RST) {
recwin = 0;
} else {
if (recwin < (so->so_rcv.sb_hiwat / 4) &&
recwin < tp->t_maxseg)
recwin = 0;
if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
recwin < (tp->rcv_adv - tp->rcv_nxt))
recwin = (tp->rcv_adv - tp->rcv_nxt);
}
if (flags & TH_SYN)
th->th_win = htons((u_short)
(min(sbspace(&so->so_rcv), TCP_MAXWIN)));
else {
recwin = roundup2(recwin, 1 << tp->rcv_scale);
th->th_win = htons((u_short)(recwin >> tp->rcv_scale));
}
if (th->th_win == 0) {
tp->t_sndzerowin++;
tp->t_flags |= TF_RXWIN0SENT;
} else
tp->t_flags &= ~TF_RXWIN0SENT;
if (SEQ_GT(tp->snd_up, tp->snd_nxt)) {
th->th_urp = htons((u_short)(tp->snd_up - tp->snd_nxt));
flags |= TH_URG;
} else {
tp->snd_up = tp->snd_una;
}
tcp_set_flags(th, flags);
m->m_pkthdr.len = hdrlen + len;
#if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
if (to.to_flags & TOF_SIGNATURE) {
if (!TCPMD5_ENABLED() || (error = TCPMD5_OUTPUT(m, th,
(u_char *)(th + 1) + (to.to_signature - opt))) != 0) {
m_freem(m);
goto out;
}
}
#endif
#ifdef INET6
if (isipv6) {
if (tp->t_port) {
m->m_pkthdr.csum_flags = CSUM_UDP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0);
th->th_sum = htons(0);
UDPSTAT_INC(udps_opackets);
} else {
m->m_pkthdr.csum_flags = CSUM_TCP_IPV6;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in6_cksum_pseudo(ip6,
sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP,
0);
}
}
#endif
#if defined(INET6) && defined(INET)
else
#endif
#ifdef INET
{
if (tp->t_port) {
m->m_pkthdr.csum_flags = CSUM_UDP;
m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
udp->uh_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP));
th->th_sum = htons(0);
UDPSTAT_INC(udps_opackets);
} else {
m->m_pkthdr.csum_flags = CSUM_TCP;
m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum);
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) +
IPPROTO_TCP + len + optlen));
}
KASSERT(ip->ip_v == IPVERSION,
("%s: IP version incorrect: %d", __func__, ip->ip_v));
}
#endif
if (tso) {
KASSERT(len > tp->t_maxseg - optlen - ipsec_optlen,
("%s: len <= tso_segsz", __func__));
m->m_pkthdr.csum_flags |= CSUM_TSO;
m->m_pkthdr.tso_segsz = tp->t_maxseg - optlen - ipsec_optlen;
}
KASSERT(len + hdrlen == m_length(m, NULL),
("%s: mbuf chain shorter than expected: %d + %u != %u",
__func__, len, hdrlen, m_length(m, NULL)));
#ifdef TCP_HHOOK
hhook_run_tcp_est_out(tp, th, &to, len, tso);
#endif
TCP_PROBE3(debug__output, tp, th, m);
if (tcp_bblogging_on(tp))
lgb = tcp_log_event(tp, th, &so->so_rcv, &so->so_snd,
TCP_LOG_OUT, ERRNO_UNK, len, NULL, false, NULL, NULL, 0,
NULL);
else
lgb = NULL;
#ifdef INET6
if (isipv6) {
ip6->ip6_hlim = in6_selecthlim(inp, NULL);
ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss)
tp->t_flags2 |= TF2_PLPMTU_PMTUD;
else
tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
if (tp->t_state == TCPS_SYN_SENT)
TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
TCP_PROBE5(send, NULL, tp, ip6, tp, th);
error = ip6_output(m, inp->in6p_outputopts, &inp->inp_route6,
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
NULL, NULL, inp);
if (error == EMSGSIZE && inp->inp_route6.ro_nh != NULL)
mtu = inp->inp_route6.ro_nh->nh_mtu;
}
#endif
#if defined(INET) && defined(INET6)
else
#endif
#ifdef INET
{
ip->ip_len = htons(m->m_pkthdr.len);
#ifdef INET6
if (inp->inp_vflag & INP_IPV6PROTO)
ip->ip_ttl = in6_selecthlim(inp, NULL);
#endif
if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) {
tp->t_flags2 |= TF2_PLPMTU_PMTUD;
if (tp->t_port == 0 || len < V_tcp_minmss) {
ip->ip_off |= htons(IP_DF);
}
} else {
tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
}
if (tp->t_state == TCPS_SYN_SENT)
TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);
TCP_PROBE5(send, NULL, tp, ip, tp, th);
error = ip_output(m, inp->inp_options, &inp->inp_route,
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, inp);
if (error == EMSGSIZE && inp->inp_route.ro_nh != NULL)
mtu = inp->inp_route.ro_nh->nh_mtu;
}
#endif
if (lgb != NULL) {
lgb->tlb_errno = error;
lgb = NULL;
}
out:
if (error == 0)
tcp_account_for_send(tp, len, (tp->snd_nxt != tp->snd_max), 0, hw_tls);
if (flags & TH_RST) {
TCPSTAT_INC(tcps_sndtotal);
return (0);
} else if ((tp->t_flags & TF_FORCEDATA) == 0 ||
!tcp_timer_active(tp, TT_PERSIST)) {
tcp_seq startseq = tp->snd_nxt;
if (flags & (TH_SYN|TH_FIN)) {
if (flags & TH_SYN)
tp->snd_nxt++;
if (flags & TH_FIN) {
tp->snd_nxt++;
tp->t_flags |= TF_SENTFIN;
}
}
if (sack_rxmit)
goto timer;
tp->snd_nxt += len;
if (SEQ_GT(tp->snd_nxt, tp->snd_max)) {
if (tp->snd_una == tp->snd_max)
tp->t_acktime = ticks;
tp->snd_max = tp->snd_nxt;
tp->t_sndtime = ticks;
if (tp->t_rtttime == 0) {
tp->t_rtttime = ticks;
tp->t_rtseq = startseq;
TCPSTAT_INC(tcps_segstimed);
}
#ifdef STATS
if (!(tp->t_flags & TF_GPUTINPROG) && len) {
tp->t_flags |= TF_GPUTINPROG;
tp->gput_seq = startseq;
tp->gput_ack = startseq +
ulmin(sbavail(&so->so_snd) - off, sendwin);
tp->gput_ts = tcp_ts_getticks();
}
#endif
}
timer:
if (!tcp_timer_active(tp, TT_REXMT) &&
((sack_rxmit && tp->snd_nxt != tp->snd_max) ||
(tp->snd_nxt != tp->snd_una))) {
if (tcp_timer_active(tp, TT_PERSIST)) {
tcp_timer_activate(tp, TT_PERSIST, 0);
tp->t_rxtshift = 0;
}
tcp_timer_activate(tp, TT_REXMT, TP_RXTCUR(tp));
} else if (len == 0 && sbavail(&so->so_snd) &&
!tcp_timer_active(tp, TT_REXMT) &&
!tcp_timer_active(tp, TT_PERSIST)) {
tp->t_rxtshift = 0;
tcp_setpersist(tp);
}
} else {
int xlen = len;
if (flags & TH_SYN)
++xlen;
if (flags & TH_FIN) {
++xlen;
tp->t_flags |= TF_SENTFIN;
}
if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
tp->snd_max = tp->snd_nxt + xlen;
}
if ((error == 0) &&
(tp->rcv_numsacks > 0) &&
TCPS_HAVEESTABLISHED(tp->t_state) &&
(tp->t_flags & TF_SACK_PERMIT)) {
tcp_clean_dsack_blocks(tp);
}
if ((error == 0) &&
sack_rxmit &&
SEQ_LT(tp->snd_nxt, SEQ_MIN(p->rxmit, p->end))) {
tp->snd_nxt = SEQ_MIN(p->rxmit, p->end);
}
if (error) {
if (((tp->t_flags & TF_FORCEDATA) == 0 ||
!tcp_timer_active(tp, TT_PERSIST)) &&
((flags & TH_SYN) == 0) &&
(error != EPERM)) {
if (sack_rxmit) {
p->rxmit = SEQ_MIN(p->end, p->rxmit) - len;
tp->sackhint.sack_bytes_rexmit -= len;
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
("sackhint bytes rtx >= 0"));
KASSERT((flags & TH_FIN) == 0,
("error while FIN with SACK rxmit"));
} else {
tp->snd_nxt -= len;
if (flags & TH_FIN)
tp->snd_nxt--;
}
if (IN_RECOVERY(tp->t_flags))
tp->sackhint.prr_out -= len;
}
SOCK_SENDBUF_UNLOCK_ASSERT(so);
switch (error) {
case EACCES:
case EPERM:
tp->t_softerror = error;
return (error);
case ENOBUFS:
TCP_XMIT_TIMER_ASSERT(tp, len, flags);
tp->snd_cwnd = tcp_maxseg(tp);
return (0);
case EMSGSIZE:
if (tso)
tp->t_flags &= ~TF_TSO;
if (mtu != 0) {
tcp_mss_update(tp, -1, mtu, NULL, NULL);
goto again;
}
return (error);
case EHOSTDOWN:
case EHOSTUNREACH:
case ENETDOWN:
case ENETUNREACH:
if (TCPS_HAVERCVDSYN(tp->t_state)) {
tp->t_softerror = error;
return (0);
}
default:
return (error);
}
}
TCPSTAT_INC(tcps_sndtotal);
if (SEQ_GT(tp->rcv_nxt + recwin, tp->rcv_adv))
tp->rcv_adv = tp->rcv_nxt + recwin;
tp->last_ack_sent = tp->rcv_nxt;
tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
if (tcp_timer_active(tp, TT_DELACK))
tcp_timer_activate(tp, TT_DELACK, 0);
if (sendalot)
goto again;
return (0);
}
void
tcp_setpersist(struct tcpcb *tp)
{
int t = ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1;
int tt;
int maxunacktime;
tp->t_flags &= ~TF_PREVVALID;
if (tcp_timer_active(tp, TT_REXMT))
panic("tcp_setpersist: retransmit pending");
if (tp->t_state == TCPS_CLOSED)
return;
TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift],
tcp_persmin, tcp_persmax);
if (TP_MAXUNACKTIME(tp) && tp->t_acktime) {
maxunacktime = tp->t_acktime + TP_MAXUNACKTIME(tp) - ticks;
if (maxunacktime < 1)
maxunacktime = 1;
if (maxunacktime < tt)
tt = maxunacktime;
}
tcp_timer_activate(tp, TT_PERSIST, tt);
if (tp->t_rxtshift < V_tcp_retries)
tp->t_rxtshift++;
}
int
tcp_addoptions(struct tcpopt *to, u_char *optp)
{
u_int32_t mask, optlen = 0;
for (mask = 1; mask < TOF_MAXOPT; mask <<= 1) {
if ((to->to_flags & mask) != mask)
continue;
if (optlen == TCP_MAXOLEN)
break;
switch (to->to_flags & mask) {
case TOF_MSS:
while (optlen % 4) {
optlen += TCPOLEN_NOP;
*optp++ = TCPOPT_NOP;
}
if (TCP_MAXOLEN - optlen < TCPOLEN_MAXSEG)
continue;
optlen += TCPOLEN_MAXSEG;
*optp++ = TCPOPT_MAXSEG;
*optp++ = TCPOLEN_MAXSEG;
to->to_mss = htons(to->to_mss);
bcopy((u_char *)&to->to_mss, optp, sizeof(to->to_mss));
optp += sizeof(to->to_mss);
break;
case TOF_SCALE:
while (!optlen || optlen % 2 != 1) {
optlen += TCPOLEN_NOP;
*optp++ = TCPOPT_NOP;
}
if (TCP_MAXOLEN - optlen < TCPOLEN_WINDOW)
continue;
optlen += TCPOLEN_WINDOW;
*optp++ = TCPOPT_WINDOW;
*optp++ = TCPOLEN_WINDOW;
*optp++ = to->to_wscale;
break;
case TOF_SACKPERM:
while (optlen % 2) {
optlen += TCPOLEN_NOP;
*optp++ = TCPOPT_NOP;
}
if (TCP_MAXOLEN - optlen < TCPOLEN_SACK_PERMITTED)
continue;
optlen += TCPOLEN_SACK_PERMITTED;
*optp++ = TCPOPT_SACK_PERMITTED;
*optp++ = TCPOLEN_SACK_PERMITTED;
break;
case TOF_TS:
while (!optlen || optlen % 4 != 2) {
optlen += TCPOLEN_NOP;
*optp++ = TCPOPT_NOP;
}
if (TCP_MAXOLEN - optlen < TCPOLEN_TIMESTAMP)
continue;
optlen += TCPOLEN_TIMESTAMP;
*optp++ = TCPOPT_TIMESTAMP;
*optp++ = TCPOLEN_TIMESTAMP;
to->to_tsval = htonl(to->to_tsval);
to->to_tsecr = htonl(to->to_tsecr);
bcopy((u_char *)&to->to_tsval, optp, sizeof(to->to_tsval));
optp += sizeof(to->to_tsval);
bcopy((u_char *)&to->to_tsecr, optp, sizeof(to->to_tsecr));
optp += sizeof(to->to_tsecr);
break;
case TOF_SIGNATURE:
{
int siglen = TCPOLEN_SIGNATURE - 2;
while (!optlen || optlen % 4 != 2) {
optlen += TCPOLEN_NOP;
*optp++ = TCPOPT_NOP;
}
if (TCP_MAXOLEN - optlen < TCPOLEN_SIGNATURE) {
to->to_flags &= ~TOF_SIGNATURE;
continue;
}
optlen += TCPOLEN_SIGNATURE;
*optp++ = TCPOPT_SIGNATURE;
*optp++ = TCPOLEN_SIGNATURE;
to->to_signature = optp;
while (siglen--)
*optp++ = 0;
break;
}
case TOF_SACK:
{
int sackblks = 0;
struct sackblk *sack = (struct sackblk *)to->to_sacks;
tcp_seq sack_seq;
while (!optlen || optlen % 4 != 2) {
optlen += TCPOLEN_NOP;
*optp++ = TCPOPT_NOP;
}
if (TCP_MAXOLEN - optlen < TCPOLEN_SACKHDR + TCPOLEN_SACK)
continue;
optlen += TCPOLEN_SACKHDR;
*optp++ = TCPOPT_SACK;
sackblks = min(to->to_nsacks,
(TCP_MAXOLEN - optlen) / TCPOLEN_SACK);
*optp++ = TCPOLEN_SACKHDR + sackblks * TCPOLEN_SACK;
while (sackblks--) {
sack_seq = htonl(sack->start);
bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
optp += sizeof(sack_seq);
sack_seq = htonl(sack->end);
bcopy((u_char *)&sack_seq, optp, sizeof(sack_seq));
optp += sizeof(sack_seq);
optlen += TCPOLEN_SACK;
sack++;
}
TCPSTAT_INC(tcps_sack_send_blocks);
break;
}
case TOF_FASTOPEN:
{
int total_len;
total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len;
if (TCP_MAXOLEN - optlen < total_len) {
to->to_flags &= ~TOF_FASTOPEN;
continue;
}
*optp++ = TCPOPT_FAST_OPEN;
*optp++ = total_len;
if (to->to_tfo_len > 0) {
bcopy(to->to_tfo_cookie, optp, to->to_tfo_len);
optp += to->to_tfo_len;
}
optlen += total_len;
break;
}
default:
panic("%s: unknown TCP option type", __func__);
break;
}
}
if (optlen % 4) {
optlen += TCPOLEN_EOL;
*optp++ = TCPOPT_EOL;
}
while (optlen % 4) {
optlen += TCPOLEN_PAD;
*optp++ = TCPOPT_PAD;
}
KASSERT(optlen <= TCP_MAXOLEN, ("%s: TCP options too long", __func__));
return (optlen);
}
struct mbuf *
tcp_m_copym(struct mbuf *m, int32_t off0, int32_t *plen,
int32_t seglimit, int32_t segsize, struct sockbuf *sb, bool hw_tls)
{
#ifdef KERN_TLS
struct ktls_session *tls, *ntls;
struct mbuf *start __diagused;
#endif
struct mbuf *n, **np;
struct mbuf *top;
int32_t off = off0;
int32_t len = *plen;
int32_t fragsize;
int32_t len_cp = 0;
int32_t *pkthdrlen;
uint32_t mlen, frags;
bool copyhdr;
KASSERT(off >= 0, ("tcp_m_copym, negative off %d", off));
KASSERT(len >= 0, ("tcp_m_copym, negative len %d", len));
if (off == 0 && m->m_flags & M_PKTHDR)
copyhdr = true;
else
copyhdr = false;
while (off > 0) {
KASSERT(m != NULL, ("tcp_m_copym, offset > size of mbuf chain"));
if (off < m->m_len)
break;
off -= m->m_len;
if ((sb) && (m == sb->sb_sndptr)) {
sb->sb_sndptroff += m->m_len;
sb->sb_sndptr = m->m_next;
}
m = m->m_next;
}
np = ⊤
top = NULL;
pkthdrlen = NULL;
#ifdef KERN_TLS
if (hw_tls && (m->m_flags & M_EXTPG))
tls = m->m_epg_tls;
else
tls = NULL;
start = m;
#endif
while (len > 0) {
if (m == NULL) {
KASSERT(len == M_COPYALL,
("tcp_m_copym, length > size of mbuf chain"));
*plen = len_cp;
if (pkthdrlen != NULL)
*pkthdrlen = len_cp;
break;
}
#ifdef KERN_TLS
if (hw_tls) {
if (m->m_flags & M_EXTPG)
ntls = m->m_epg_tls;
else
ntls = NULL;
if (tls != ntls) {
MPASS(m != start);
*plen = len_cp;
if (pkthdrlen != NULL)
*pkthdrlen = len_cp;
break;
}
}
#endif
mlen = min(len, m->m_len - off);
if (seglimit) {
if (m->m_flags & M_EXTPG) {
fragsize = min(segsize, PAGE_SIZE);
frags = 3;
} else {
fragsize = segsize;
frags = 0;
}
if ((frags + 1) >= seglimit) {
*plen = len_cp;
if (pkthdrlen != NULL)
*pkthdrlen = len_cp;
break;
}
if ((frags + howmany(mlen, fragsize)) >= seglimit) {
mlen = (seglimit - frags - 1) * fragsize;
len = mlen;
*plen = len_cp + len;
if (pkthdrlen != NULL)
*pkthdrlen = *plen;
}
frags += howmany(mlen, fragsize);
if (frags == 0)
frags++;
seglimit -= frags;
KASSERT(seglimit > 0,
("%s: seglimit went too low", __func__));
}
if (copyhdr)
n = m_gethdr(M_NOWAIT, m->m_type);
else
n = m_get(M_NOWAIT, m->m_type);
*np = n;
if (n == NULL)
goto nospace;
if (copyhdr) {
if (!m_dup_pkthdr(n, m, M_NOWAIT))
goto nospace;
if (len == M_COPYALL)
n->m_pkthdr.len -= off0;
else
n->m_pkthdr.len = len;
pkthdrlen = &n->m_pkthdr.len;
copyhdr = false;
}
n->m_len = mlen;
len_cp += n->m_len;
if (m->m_flags & (M_EXT | M_EXTPG)) {
n->m_data = m->m_data + off;
mb_dupcl(n, m);
} else
bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t),
(u_int)n->m_len);
if (sb && (sb->sb_sndptr == m) &&
((n->m_len + off) >= m->m_len) && m->m_next) {
sb->sb_sndptroff += m->m_len;
sb->sb_sndptr = m->m_next;
}
off = 0;
if (len != M_COPYALL) {
len -= n->m_len;
}
m = m->m_next;
np = &n->m_next;
}
return (top);
nospace:
m_freem(top);
return (NULL);
}
void
tcp_sndbuf_autoscale(struct tcpcb *tp, struct socket *so, uint32_t sendwin)
{
if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
int lowat;
lowat = V_tcp_sendbuf_auto_lowat ? so->so_snd.sb_lowat : 0;
if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat - lowat &&
sbused(&so->so_snd) >=
(so->so_snd.sb_hiwat / 8 * 7) - lowat &&
sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
sendwin >= (sbused(&so->so_snd) -
(tp->snd_nxt - tp->snd_una))) {
if (!sbreserve_locked(so, SO_SND,
min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
V_tcp_autosndbuf_max), curthread))
so->so_snd.sb_flags &= ~SB_AUTOSIZE;
}
}
}