#include <sys/errno.h>
#include <sys/types.h>
#include <sys/kmem.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/modctl.h>
#include <sys/time.h>
#include <inet/tcp_impl.h>
#include <inet/cc.h>
#include <inet/cc/cc_cubic.h>
#include <inet/cc/cc_module.h>
static struct modlmisc cc_cubic_modlmisc = {
&mod_miscops,
"Cubic Congestion Control"
};
static struct modlinkage cc_cubic_modlinkage = {
MODREV_1,
&cc_cubic_modlmisc,
NULL
};
static struct cc_algo *newreno_cc_algo;
static void cubic_ack_received(struct cc_var *ccv, uint16_t type);
static void cubic_cb_destroy(struct cc_var *ccv);
static int cubic_cb_init(struct cc_var *ccv);
static void cubic_cong_signal(struct cc_var *ccv, uint32_t type);
static void cubic_conn_init(struct cc_var *ccv);
static void cubic_post_recovery(struct cc_var *ccv);
static void cubic_record_rtt(struct cc_var *ccv);
static void cubic_ssthresh_update(struct cc_var *ccv);
static void cubic_after_idle(struct cc_var *ccv);
struct cubic {
int64_t K;
hrtime_t sum_rtt_nsecs;
uint32_t max_cwnd;
uint32_t prev_max_cwnd;
uint32_t num_cong_events;
hrtime_t min_rtt_nsecs;
hrtime_t mean_rtt_nsecs;
int epoch_ack_count;
hrtime_t t_last_cong;
};
struct cc_algo cubic_cc_algo = {
.name = "cubic",
.ack_received = cubic_ack_received,
.cb_destroy = cubic_cb_destroy,
.cb_init = cubic_cb_init,
.cong_signal = cubic_cong_signal,
.conn_init = cubic_conn_init,
.post_recovery = cubic_post_recovery,
.after_idle = cubic_after_idle,
};
int
_init(void)
{
int err;
if ((newreno_cc_algo = cc_load_algo("newreno")) == NULL)
return (EINVAL);
if ((err = cc_register_algo(&cubic_cc_algo)) == 0) {
if ((err = mod_install(&cc_cubic_modlinkage)) != 0)
(void) cc_deregister_algo(&cubic_cc_algo);
}
return (err);
}
int
_fini(void)
{
return (EBUSY);
}
int
_info(struct modinfo *modinfop)
{
return (mod_info(&cc_cubic_modlinkage, modinfop));
}
static void
cubic_ack_received(struct cc_var *ccv, uint16_t type)
{
struct cubic *cubic_data;
uint32_t w_tf, w_cubic_next;
hrtime_t nsecs_since_cong;
cubic_data = ccv->cc_data;
cubic_record_rtt(ccv);
if (type == CC_ACK && !IN_RECOVERY(ccv->flags) &&
(ccv->flags & CCF_CWND_LIMITED) && (!CC_ABC(ccv) ||
CCV(ccv, tcp_cwnd) <= CCV(ccv, tcp_cwnd_ssthresh) ||
(CC_ABC(ccv) && (ccv->flags & CCF_ABC_SENTAWND)))) {
if (CCV(ccv, tcp_cwnd) <= CCV(ccv, tcp_cwnd_ssthresh) ||
cubic_data->min_rtt_nsecs == TCPTV_SRTTBASE)
newreno_cc_algo->ack_received(ccv, type);
else {
nsecs_since_cong = gethrtime() -
cubic_data->t_last_cong;
w_tf = tf_cwnd(nsecs_since_cong,
cubic_data->mean_rtt_nsecs, cubic_data->max_cwnd,
CCV(ccv, tcp_mss));
w_cubic_next = cubic_cwnd(nsecs_since_cong +
cubic_data->mean_rtt_nsecs, cubic_data->max_cwnd,
CCV(ccv, tcp_mss), cubic_data->K);
ccv->flags &= ~CCF_ABC_SENTAWND;
if (w_cubic_next < w_tf) {
if (CCV(ccv, tcp_cwnd) < w_tf)
CCV(ccv, tcp_cwnd) = w_tf;
} else if (CCV(ccv, tcp_cwnd) < w_cubic_next) {
if (CC_ABC(ccv))
CCV(ccv, tcp_cwnd) = MIN(w_cubic_next,
INT_MAX);
else
CCV(ccv, tcp_cwnd) += MAX(1,
((MIN(w_cubic_next, INT_MAX) -
CCV(ccv, tcp_cwnd)) *
CCV(ccv, tcp_mss)) /
CCV(ccv, tcp_cwnd));
}
if (cubic_data->num_cong_events == 0 &&
cubic_data->max_cwnd < CCV(ccv, tcp_cwnd)) {
cubic_data->max_cwnd = CCV(ccv, tcp_cwnd);
cubic_data->K = cubic_k(cubic_data->max_cwnd /
CCV(ccv, tcp_mss));
}
}
}
}
static void
cubic_after_idle(struct cc_var *ccv)
{
struct cubic *cubic_data;
cubic_data = ccv->cc_data;
cubic_data->max_cwnd = max(cubic_data->max_cwnd, CCV(ccv, tcp_cwnd));
cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, tcp_mss));
newreno_cc_algo->after_idle(ccv);
cubic_data->t_last_cong = gethrtime();
}
static void
cubic_cb_destroy(struct cc_var *ccv)
{
if (ccv->cc_data != NULL)
kmem_free(ccv->cc_data, sizeof (struct cubic));
}
static int
cubic_cb_init(struct cc_var *ccv)
{
struct cubic *cubic_data;
cubic_data = kmem_zalloc(sizeof (struct cubic), KM_NOSLEEP);
if (cubic_data == NULL)
return (ENOMEM);
cubic_data->t_last_cong = gethrtime();
cubic_data->min_rtt_nsecs = TCPTV_SRTTBASE;
cubic_data->mean_rtt_nsecs = 1;
ccv->cc_data = cubic_data;
return (0);
}
static void
cubic_cong_signal(struct cc_var *ccv, uint32_t type)
{
struct cubic *cubic_data;
uint32_t cwin;
uint32_t mss;
cubic_data = ccv->cc_data;
cwin = CCV(ccv, tcp_cwnd);
mss = CCV(ccv, tcp_mss);
switch (type) {
case CC_NDUPACK:
if (!IN_FASTRECOVERY(ccv->flags)) {
if (!IN_CONGRECOVERY(ccv->flags)) {
cubic_ssthresh_update(ccv);
cubic_data->num_cong_events++;
cubic_data->prev_max_cwnd =
cubic_data->max_cwnd;
cubic_data->max_cwnd = cwin;
CCV(ccv, tcp_cwnd) =
CCV(ccv, tcp_cwnd_ssthresh);
}
ENTER_RECOVERY(ccv->flags);
}
break;
case CC_ECN:
if (!IN_CONGRECOVERY(ccv->flags)) {
cubic_ssthresh_update(ccv);
cubic_data->num_cong_events++;
cubic_data->prev_max_cwnd = cubic_data->max_cwnd;
cubic_data->max_cwnd = cwin;
cubic_data->t_last_cong = gethrtime();
CCV(ccv, tcp_cwnd) = CCV(ccv, tcp_cwnd_ssthresh);
ENTER_CONGRECOVERY(ccv->flags);
}
break;
case CC_RTO:
cubic_data->num_cong_events++;
cubic_data->t_last_cong = gethrtime();
cubic_ssthresh_update(ccv);
cubic_data->max_cwnd = cwin;
CCV(ccv, tcp_cwnd) = mss;
break;
}
}
static void
cubic_conn_init(struct cc_var *ccv)
{
struct cubic *cubic_data;
cubic_data = ccv->cc_data;
cubic_data->max_cwnd = CCV(ccv, tcp_cwnd);
}
static void
cubic_post_recovery(struct cc_var *ccv)
{
struct cubic *cubic_data;
uint32_t mss, pipe;
cubic_data = ccv->cc_data;
if (cubic_data->max_cwnd < cubic_data->prev_max_cwnd) {
cubic_data->max_cwnd = (cubic_data->max_cwnd * CUBIC_FC_FACTOR)
>> CUBIC_SHIFT;
}
mss = CCV(ccv, tcp_mss);
if (IN_FASTRECOVERY(ccv->flags)) {
pipe = CCV(ccv, tcp_snxt) - CCV(ccv, tcp_suna);
if (pipe < CCV(ccv, tcp_cwnd_ssthresh)) {
CCV(ccv, tcp_cwnd) = MAX(pipe, mss) + mss;
} else {
CCV(ccv, tcp_cwnd) = max(mss, ((CUBIC_BETA *
cubic_data->max_cwnd) >> CUBIC_SHIFT));
}
} else {
CCV(ccv, tcp_cwnd) = max(mss, CCV(ccv, tcp_cwnd));
}
cubic_data->t_last_cong = gethrtime();
if (cubic_data->epoch_ack_count > 0 &&
cubic_data->sum_rtt_nsecs >= cubic_data->epoch_ack_count) {
cubic_data->mean_rtt_nsecs =
(cubic_data->sum_rtt_nsecs / cubic_data->epoch_ack_count);
}
cubic_data->epoch_ack_count = 0;
cubic_data->sum_rtt_nsecs = 0;
cubic_data->K = cubic_k(cubic_data->max_cwnd / mss);
}
static void
cubic_record_rtt(struct cc_var *ccv)
{
struct cubic *cubic_data;
int t_srtt_nsecs;
if (CCV(ccv, tcp_rtt_update) >= CUBIC_MIN_RTT_SAMPLES) {
cubic_data = ccv->cc_data;
t_srtt_nsecs = CCV(ccv, tcp_rtt_sa) >> 3;
if ((t_srtt_nsecs < cubic_data->min_rtt_nsecs ||
cubic_data->min_rtt_nsecs == TCPTV_SRTTBASE)) {
cubic_data->min_rtt_nsecs = max(1, t_srtt_nsecs);
if (cubic_data->min_rtt_nsecs >
cubic_data->mean_rtt_nsecs)
cubic_data->mean_rtt_nsecs =
cubic_data->min_rtt_nsecs;
}
cubic_data->sum_rtt_nsecs += t_srtt_nsecs;
cubic_data->epoch_ack_count++;
}
}
static void
cubic_ssthresh_update(struct cc_var *ccv)
{
struct cubic *cubic_data;
cubic_data = ccv->cc_data;
if (cubic_data->num_cong_events == 0)
CCV(ccv, tcp_cwnd_ssthresh) = CCV(ccv, tcp_cwnd) >> 1;
else
CCV(ccv, tcp_cwnd_ssthresh) =
(CCV(ccv, tcp_cwnd) * CUBIC_BETA) >> CUBIC_SHIFT;
}