#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
#include <sys/module.h>
#include <sys/hhook.h>
#include <sys/khelp.h>
#include <sys/module_khelp.h>
#include <sys/socket.h>
#include <sys/sockopt.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_var.h>
#include <netinet/khelp/h_ertt.h>
#include <vm/uma.h>
uma_zone_t txseginfo_zone;
#define DLYACK_SMOOTH 5
#define MAX_TS_ERR 10
static int ertt_packet_measurement_hook(int hhook_type, int hhook_id,
void *udata, void *ctx_data, void *hdata, struct osd *hosd);
static int ertt_add_tx_segment_info_hook(int hhook_type, int hhook_id,
void *udata, void *ctx_data, void *hdata, struct osd *hosd);
static int ertt_mod_init(void);
static int ertt_mod_destroy(void);
static int ertt_uma_ctor(void *mem, int size, void *arg, int flags);
static void ertt_uma_dtor(void *mem, int size, void *arg);
struct txseginfo {
uint32_t len;
tcp_seq seq;
uint32_t tx_ts;
uint32_t rx_ts;
uint32_t flags;
TAILQ_ENTRY (txseginfo) txsegi_lnk;
};
#define TXSI_TSO 0x01
#define TXSI_RTT_MEASURE_START 0x02
#define TXSI_RX_MEASURE_END 0x04
struct helper ertt_helper = {
.mod_init = ertt_mod_init,
.mod_destroy = ertt_mod_destroy,
.h_flags = HELPER_NEEDS_OSD,
.h_classes = HELPER_CLASS_TCP
};
struct hookinfo ertt_hooks[] = {
{
.hook_type = HHOOK_TYPE_TCP,
.hook_id = HHOOK_TCP_EST_IN,
.hook_udata = NULL,
.hook_func = &ertt_packet_measurement_hook
},
{
.hook_type = HHOOK_TYPE_TCP,
.hook_id = HHOOK_TCP_EST_OUT,
.hook_udata = NULL,
.hook_func = &ertt_add_tx_segment_info_hook
}
};
#define MULTI_ACK 0x01
#define OLD_TXSI 0x02
#define CORRECT_ACK 0X04
#define FORCED_MEASUREMENT 0X08
static void inline
marked_packet_rtt(struct txseginfo *txsi, struct ertt *e_t, struct tcpcb *tp,
uint32_t *pmeasurenext, int *pmeasurenext_len, int *prtt_bytes_adjust,
int mflag)
{
if (mflag & (MULTI_ACK|OLD_TXSI)) {
*pmeasurenext = txsi->tx_ts;
*pmeasurenext_len = txsi->len;
*prtt_bytes_adjust += *pmeasurenext_len;
} else {
if (mflag & FORCED_MEASUREMENT) {
e_t->markedpkt_rtt = tcp_ts_getticks() -
*pmeasurenext + 1;
e_t->bytes_tx_in_marked_rtt = e_t->bytes_tx_in_rtt +
*pmeasurenext_len - *prtt_bytes_adjust;
} else {
e_t->markedpkt_rtt = tcp_ts_getticks() -
txsi->tx_ts + 1;
e_t->bytes_tx_in_marked_rtt = e_t->bytes_tx_in_rtt -
*prtt_bytes_adjust;
}
e_t->marked_snd_cwnd = tp->snd_cwnd;
e_t->flags &= ~ERTT_MEASUREMENT_IN_PROGRESS;
e_t->flags |= ERTT_NEW_MEASUREMENT;
if (tp->t_flags & TF_TSO) {
tp->t_flags &= ~TF_TSO;
e_t->flags |= ERTT_TSO_DISABLED;
}
}
}
static int
ertt_packet_measurement_hook(int hhook_type, int hhook_id, void *udata,
void *ctx_data, void *hdata, struct osd *hosd)
{
struct ertt *e_t;
struct tcpcb *tp;
struct tcphdr *th;
struct tcpopt *to;
struct tcp_hhook_data *thdp;
struct txseginfo *txsi;
int acked, measurenext_len, multiack, new_sacked_bytes, rtt_bytes_adjust;
uint32_t measurenext, rts;
tcp_seq ack;
KASSERT(ctx_data != NULL, ("%s: ctx_data is NULL!", __func__));
KASSERT(hdata != NULL, ("%s: hdata is NULL!", __func__));
e_t = (struct ertt *)hdata;
thdp = ctx_data;
tp = thdp->tp;
th = thdp->th;
to = thdp->to;
new_sacked_bytes = (tp->sackhint.last_sack_ack != 0);
measurenext = measurenext_len = multiack = rts = rtt_bytes_adjust = 0;
acked = th->th_ack - tp->snd_una;
INP_WLOCK_ASSERT(tptoinpcb(tp));
if (acked > 0 || new_sacked_bytes) {
if (acked == 0 && new_sacked_bytes) {
ack = tp->sackhint.last_sack_ack;
} else
ack = th->th_ack;
txsi = TAILQ_FIRST(&e_t->txsegi_q);
while (txsi != NULL) {
rts = 0;
if (SEQ_GT(ack, txsi->seq + txsi->len)) {
if (txsi->flags & TXSI_RTT_MEASURE_START ||
measurenext) {
marked_packet_rtt(txsi, e_t, tp,
&measurenext, &measurenext_len,
&rtt_bytes_adjust, MULTI_ACK);
}
TAILQ_REMOVE(&e_t->txsegi_q, txsi, txsegi_lnk);
uma_zfree(txseginfo_zone, txsi);
txsi = TAILQ_FIRST(&e_t->txsegi_q);
continue;
}
if (!new_sacked_bytes) {
if (acked > tp->t_maxseg) {
e_t->dlyack_rx +=
(e_t->dlyack_rx < DLYACK_SMOOTH) ?
1 : 0;
multiack = 1;
} else if (acked > txsi->len) {
multiack = 1;
e_t->dlyack_rx +=
(e_t->dlyack_rx < DLYACK_SMOOTH) ?
1 : 0;
} else if (acked == tp->t_maxseg ||
acked == txsi->len) {
e_t->dlyack_rx -=
(e_t->dlyack_rx > 0) ? 1 : 0;
}
}
if (e_t->timestamp_errors < MAX_TS_ERR &&
(to->to_flags & TOF_TS) != 0 && to->to_tsecr) {
rts = to->to_tsecr;
if (!e_t->dlyack_rx && TSTMP_LT(rts, txsi->tx_ts))
break;
if (TSTMP_GT(rts, txsi->tx_ts)) {
if (txsi->flags & TXSI_RTT_MEASURE_START
|| measurenext) {
marked_packet_rtt(txsi, e_t, tp,
&measurenext, &measurenext_len,
&rtt_bytes_adjust, OLD_TXSI);
}
TAILQ_REMOVE(&e_t->txsegi_q, txsi,
txsegi_lnk);
uma_zfree(txseginfo_zone, txsi);
txsi = TAILQ_FIRST(&e_t->txsegi_q);
continue;
}
if (rts == txsi->tx_ts &&
TSTMP_LT(to->to_tsval, txsi->rx_ts)) {
e_t->timestamp_errors++;
}
}
if (SEQ_LEQ(ack, txsi->seq))
break;
if (!e_t->dlyack_rx || multiack || new_sacked_bytes) {
e_t->rtt = tcp_ts_getticks() - txsi->tx_ts + 1;
if (e_t->rtt < e_t->minrtt || e_t->minrtt == 0)
e_t->minrtt = e_t->rtt;
if (e_t->rtt > e_t->maxrtt || e_t->maxrtt == 0)
e_t->maxrtt = e_t->rtt;
}
if (txsi->flags & TXSI_RTT_MEASURE_START || measurenext)
marked_packet_rtt(txsi, e_t, tp,
&measurenext, &measurenext_len,
&rtt_bytes_adjust, CORRECT_ACK);
if (txsi->flags & TXSI_TSO) {
if (txsi->len > acked) {
txsi->len -= acked;
txsi->seq += acked;
txsi->flags &= ~TXSI_RTT_MEASURE_START;
break;
}
txsi->len = 0;
}
TAILQ_REMOVE(&e_t->txsegi_q, txsi, txsegi_lnk);
uma_zfree(txseginfo_zone, txsi);
break;
}
if (measurenext) {
marked_packet_rtt(txsi, e_t, tp,
&measurenext, &measurenext_len,
&rtt_bytes_adjust, FORCED_MEASUREMENT);
}
}
return (0);
}
static int
ertt_add_tx_segment_info_hook(int hhook_type, int hhook_id, void *udata,
void *ctx_data, void *hdata, struct osd *hosd)
{
struct ertt *e_t;
struct tcpcb *tp;
struct tcphdr *th;
struct tcpopt *to;
struct tcp_hhook_data *thdp;
struct txseginfo *txsi;
uint32_t len;
int tso;
KASSERT(ctx_data != NULL, ("%s: ctx_data is NULL!", __func__));
KASSERT(hdata != NULL, ("%s: hdata is NULL!", __func__));
e_t = (struct ertt *)hdata;
thdp = ctx_data;
tp = thdp->tp;
th = thdp->th;
to = thdp->to;
len = thdp->len;
tso = thdp->tso;
INP_WLOCK_ASSERT(tptoinpcb(tp));
if (len > 0) {
txsi = uma_zalloc(txseginfo_zone, M_NOWAIT);
if (txsi != NULL) {
txsi->flags = 0;
txsi->seq = ntohl(th->th_seq);
txsi->len = len;
if (tso)
txsi->flags |= TXSI_TSO;
else if (e_t->flags & ERTT_TSO_DISABLED) {
tp->t_flags |= TF_TSO;
e_t->flags &= ~ERTT_TSO_DISABLED;
}
if (e_t->flags & ERTT_MEASUREMENT_IN_PROGRESS) {
e_t->bytes_tx_in_rtt += len;
} else {
txsi->flags |= TXSI_RTT_MEASURE_START;
e_t->flags |= ERTT_MEASUREMENT_IN_PROGRESS;
e_t->bytes_tx_in_rtt = len;
}
if (((tp->t_flags & TF_NOOPT) == 0) &&
(to->to_flags & TOF_TS)) {
txsi->tx_ts = ntohl(to->to_tsval) -
tp->ts_offset;
txsi->rx_ts = ntohl(to->to_tsecr);
} else {
txsi->tx_ts = tcp_ts_getticks();
txsi->rx_ts = 0;
}
TAILQ_INSERT_TAIL(&e_t->txsegi_q, txsi, txsegi_lnk);
}
}
return (0);
}
static int
ertt_mod_init(void)
{
txseginfo_zone = uma_zcreate("ertt_txseginfo", sizeof(struct txseginfo),
NULL, NULL, NULL, NULL, 0, 0);
return (0);
}
static int
ertt_mod_destroy(void)
{
uma_zdestroy(txseginfo_zone);
return (0);
}
static int
ertt_uma_ctor(void *mem, int size, void *arg, int flags)
{
struct ertt *e_t;
e_t = mem;
TAILQ_INIT(&e_t->txsegi_q);
e_t->timestamp_errors = 0;
e_t->minrtt = 0;
e_t->maxrtt = 0;
e_t->rtt = 0;
e_t->flags = 0;
e_t->dlyack_rx = 0;
e_t->bytes_tx_in_rtt = 0;
e_t->markedpkt_rtt = 0;
return (0);
}
static void
ertt_uma_dtor(void *mem, int size, void *arg)
{
struct ertt *e_t;
struct txseginfo *n_txsi, *txsi;
e_t = mem;
txsi = TAILQ_FIRST(&e_t->txsegi_q);
while (txsi != NULL) {
n_txsi = TAILQ_NEXT(txsi, txsegi_lnk);
uma_zfree(txseginfo_zone, txsi);
txsi = n_txsi;
}
}
KHELP_DECLARE_MOD_UMA(ertt, &ertt_helper, ertt_hooks, 1, sizeof(struct ertt),
ertt_uma_ctor, ertt_uma_dtor);