#ifndef _NETINET_TCP_RACK_H_
#define _NETINET_TCP_RACK_H_
#define RACK_ACKED 0x000001
#define RACK_TO_REXT 0x000002
#define RACK_DEFERRED 0x000004
#define RACK_OVERMAX 0x000008
#define RACK_SACK_PASSED 0x000010
#define RACK_WAS_SACKPASS 0x000020
#define RACK_HAS_FIN 0x000040
#define RACK_TLP 0x000080
#define RACK_RWND_COLLAPSED 0x000100
#define RACK_APP_LIMITED 0x000200
#define RACK_WAS_ACKED 0x000400
#define RACK_HAS_SYN 0x000800
#define RACK_SENT_W_DSACK 0x001000
#define RACK_SENT_SP 0x002000
#define RACK_SENT_FP 0x004000
#define RACK_HAD_PUSH 0x008000
#define RACK_MUST_RXT 0x010000
#define RACK_IN_GP_WIN 0x020000
#define RACK_SHUFFLED 0x040000
#define RACK_MERGED 0x080000
#define RACK_PMTU_CHG 0x100000
#define RACK_STRADDLE 0x200000
#define RACK_WAS_LOST 0x400000
#define RACK_IS_PCM 0x800000
#define RACK_NUM_OF_RETRANS 3
#define RACK_INITIAL_RTO 1000000
#define RACK_REQ_AVG 3
struct rack_sendmap {
TAILQ_ENTRY(rack_sendmap) next;
TAILQ_ENTRY(rack_sendmap) r_tnext;
uint32_t bindex;
uint32_t r_start;
uint32_t r_end;
uint32_t r_rtr_bytes;
uint32_t r_flags : 24,
r_rtr_cnt : 8;
uint32_t r_act_rxt_cnt;
struct mbuf *m;
uint32_t soff;
uint32_t orig_m_len;
uint32_t orig_t_space;
uint32_t r_nseq_appl;
uint8_t r_dupack;
uint8_t r_in_tmap;
uint8_t r_limit_type;
uint8_t r_just_ret : 1,
r_one_out_nr : 1,
r_no_rtt_allowed : 1,
r_hw_tls : 1,
r_avail : 4;
uint64_t r_tim_lastsent[RACK_NUM_OF_RETRANS];
uint64_t r_ack_arrival;
uint32_t r_fas;
uint8_t r_bas;
};
struct deferred_opt_list {
TAILQ_ENTRY(deferred_opt_list) next;
int optname;
uint64_t optval;
};
static inline uint64_t
rack_to_usec_ts(struct timeval *tv)
{
return ((tv->tv_sec * HPTS_USEC_IN_SEC) + tv->tv_usec);
}
static inline uint32_t
rack_ts_to_msec(uint64_t ts)
{
return((uint32_t)(ts / HPTS_MSEC_IN_SEC));
}
TAILQ_HEAD(rack_head, rack_sendmap);
TAILQ_HEAD(def_opt_head, deferred_opt_list);
#define MAP_MERGE 0x01
#define MAP_SPLIT 0x02
#define MAP_NEW 0x03
#define MAP_SACK_M1 0x04
#define MAP_SACK_M2 0x05
#define MAP_SACK_M3 0x06
#define MAP_SACK_M4 0x07
#define MAP_SACK_M5 0x08
#define MAP_FREE 0x09
#define MAP_TRIM_HEAD 0x0a
#define RACK_LIMIT_TYPE_SPLIT 1
#define RACK_RTT_EMPTY 0x00000001
#define RACK_RTT_VALID 0x00000002
struct rack_rtt_sample {
uint32_t rs_flags;
uint32_t rs_rtt_lowest;
uint32_t rs_rtt_highest;
uint32_t rs_rtt_cnt;
uint32_t rs_us_rtt;
int32_t confidence;
uint64_t rs_rtt_tot;
uint16_t rs_us_rtrcnt;
};
#define RACK_LOG_TYPE_ACK 0x01
#define RACK_LOG_TYPE_OUT 0x02
#define RACK_LOG_TYPE_TO 0x03
#define RACK_LOG_TYPE_ALLOC 0x04
#define RACK_LOG_TYPE_FREE 0x05
#define RACK_TO_FRM_TMR 1
#define RACK_TO_FRM_TLP 2
#define RACK_TO_FRM_RACK 3
#define RACK_TO_FRM_KEEP 4
#define RACK_TO_FRM_PERSIST 5
#define RACK_TO_FRM_DELACK 6
#define RCV_PATH_RTT_MS 10
struct rack_opts_stats {
uint64_t tcp_rack_tlp_reduce;
uint64_t tcp_rack_pace_always;
uint64_t tcp_rack_pace_reduce;
uint64_t tcp_rack_max_seg;
uint64_t tcp_rack_prr_sendalot;
uint64_t tcp_rack_min_to;
uint64_t tcp_rack_early_seg;
uint64_t tcp_rack_reord_thresh;
uint64_t tcp_rack_reord_fade;
uint64_t tcp_rack_tlp_thresh;
uint64_t tcp_rack_pkt_delay;
uint64_t tcp_rack_tlp_inc_var;
uint64_t tcp_tlp_use;
uint64_t tcp_rack_idle_reduce;
uint64_t tcp_rack_idle_reduce_high;
uint64_t rack_no_timer_in_hpts;
uint64_t tcp_rack_min_pace_seg;
uint64_t tcp_rack_pace_rate_ca;
uint64_t tcp_rack_rr;
uint64_t tcp_rack_rrr_no_conf_rate;
uint64_t tcp_initial_rate;
uint64_t tcp_initial_win;
uint64_t tcp_hdwr_pacing;
uint64_t tcp_gp_inc_ss;
uint64_t tcp_gp_inc_ca;
uint64_t tcp_gp_inc_rec;
uint64_t tcp_rack_force_max_seg;
uint64_t tcp_rack_pace_rate_ss;
uint64_t tcp_rack_pace_rate_rec;
uint64_t tcp_sack_path_1;
uint64_t tcp_sack_path_2a;
uint64_t tcp_sack_path_2b;
uint64_t tcp_sack_path_3;
uint64_t tcp_sack_path_4;
uint64_t tcp_rack_scwnd;
uint64_t tcp_rack_noprr;
uint64_t tcp_rack_cfg_rate;
uint64_t tcp_timely_dyn;
uint64_t tcp_rack_mbufq;
uint64_t tcp_fillcw;
uint64_t tcp_npush;
uint64_t tcp_lscwnd;
uint64_t tcp_profile;
uint64_t tcp_hdwr_rate_cap;
uint64_t tcp_pacing_rate_cap;
uint64_t tcp_pacing_up_only;
uint64_t tcp_use_cmp_acks;
uint64_t tcp_rack_abc_val;
uint64_t tcp_rec_abc_val;
uint64_t tcp_rack_measure_cnt;
uint64_t tcp_rack_delayed_ack;
uint64_t tcp_rack_rtt_use;
uint64_t tcp_data_after_close;
uint64_t tcp_defer_opt;
uint64_t tcp_pol_detect;
uint64_t tcp_rack_beta;
uint64_t tcp_rack_beta_ecn;
uint64_t tcp_rack_timer_slop;
uint64_t tcp_rack_dsack_opt;
uint64_t tcp_rack_hi_beta;
uint64_t tcp_split_limit;
uint64_t tcp_rack_pacing_divisor;
uint64_t tcp_rack_min_seg;
uint64_t tcp_dgp_in_rec;
uint64_t tcp_notimely;
uint64_t tcp_honor_hpts;
uint64_t tcp_dyn_rec;
uint64_t tcp_fillcw_rate_cap;
uint64_t tcp_pol_mss;
};
#define RACK_RTTS_INIT 0
#define RACK_RTTS_NEWRTT 1
#define RACK_RTTS_EXITPROBE 2
#define RACK_RTTS_ENTERPROBE 3
#define RACK_RTTS_REACHTARGET 4
#define RACK_RTTS_SEEHBP 5
#define RACK_RTTS_NOBACKOFF 6
#define RACK_RTTS_SAFETY 7
#define RACK_USE_BEG 1
#define RACK_USE_END 2
#define RACK_USE_END_OR_THACK 3
#define TLP_USE_ID 1
#define TLP_USE_TWO_ONE 2
#define TLP_USE_TWO_TWO 3
#define RACK_MIN_BW 8000
#define CCSP_DIS_MASK 0x0001
#define HYBRID_DIS_MASK 0x0002
#define RACK_QUALITY_NONE 0
#define RACK_QUALITY_HIGH 1
#define RACK_QUALITY_APPLIMITED 2
#define RACK_QUALITY_PERSIST 3
#define RACK_QUALITY_PROBERTT 4
#define RACK_QUALITY_ALLACKED 5
#define MIN_GP_WIN 6
#ifdef _KERNEL
#define RACK_OPTS_SIZE (sizeof(struct rack_opts_stats)/sizeof(uint64_t))
extern counter_u64_t rack_opts_arry[RACK_OPTS_SIZE];
#define RACK_OPTS_ADD(name, amm) counter_u64_add(rack_opts_arry[(offsetof(struct rack_opts_stats, name)/sizeof(uint64_t))], (amm))
#define RACK_OPTS_INC(name) RACK_OPTS_ADD(name, 1)
#endif
#define TT_RACK_FR_TMR 0x2000
#define RACK_GP_HIST 4
#define RETRAN_CNT_SIZE 16
#define RACK_NUM_FSB_DEBUG 16
#ifdef _KERNEL
struct rack_fast_send_blk {
uint32_t left_to_send;
uint16_t tcp_ip_hdr_len;
uint8_t tcp_flags;
uint8_t hoplimit;
uint8_t *tcp_ip_hdr;
uint32_t recwin;
uint32_t off;
struct tcphdr *th;
struct udphdr *udp;
struct mbuf *m;
uint32_t o_m_len;
uint32_t o_t_len;
uint32_t rfo_apply_push : 1,
hw_tls : 1,
unused : 30;
};
struct tailq_hash;
struct rack_pcm_info {
uint64_t send_time;
uint32_t sseq;
uint32_t eseq;
uint16_t cnt;
uint16_t cnt_alloc;
};
#define RACK_DEFAULT_PCM_ARRAY 16
struct rack_pcm_stats {
uint32_t sseq;
uint32_t eseq;
uint64_t ack_time;
};
struct rack_control {
struct tailq_hash *tqh;
struct rack_head rc_tmap;
struct rack_sendmap *rc_tlpsend;
struct rack_sendmap *rc_resend;
struct rack_fast_send_blk fsb;
uint32_t timer_slop;
uint16_t pace_len_divisor;
uint16_t rc_user_set_min_segs;
uint32_t rc_hpts_flags;
uint32_t rc_fixed_pacing_rate_ca;
uint32_t rc_fixed_pacing_rate_rec;
uint32_t rc_fixed_pacing_rate_ss;
uint32_t cwnd_to_use;
uint32_t rc_timer_exp;
uint32_t rc_rack_min_rtt;
uint32_t rc_rack_largest_cwnd;
struct rack_head rc_free;
uint64_t last_hw_bw_req;
uint64_t crte_prev_rate;
uint64_t bw_rate_cap;
uint64_t last_cumack_advance;
uint32_t rc_reorder_ts;
uint32_t rc_tlp_new_data;
uint32_t rc_prr_out;
uint32_t rc_prr_recovery_fs;
uint32_t rc_prr_sndcnt;
uint32_t rc_sacked;
uint32_t last_sent_tlp_seq;
uint32_t rc_prr_delivered;
uint16_t rc_tlp_cnt_out;
uint16_t last_sent_tlp_len;
uint32_t rc_loss_count;
uint32_t rc_reorder_fade;
uint32_t rc_rack_tmit_time;
uint32_t rc_holes_rxt;
uint32_t rc_num_maps_alloced;
uint32_t rc_rcvtime;
uint32_t rc_num_split_allocs;
uint32_t rc_split_limit;
uint32_t rc_last_output_to;
uint32_t rc_went_idle_time;
struct rack_sendmap *rc_sacklast;
struct rack_sendmap *rc_first_appl;
struct rack_sendmap *rc_end_appl;
struct sack_filter rack_sf;
uint32_t rc_pace_max_segs;
uint32_t rc_pace_min_segs;
uint32_t rc_app_limited_cnt;
uint16_t rack_per_of_gp_ss;
uint16_t rack_per_of_gp_ca;
uint16_t rack_per_of_gp_rec;
uint16_t rack_per_of_gp_probertt;
uint32_t rc_high_rwnd;
struct rack_rtt_sample rack_rs;
const struct tcp_hwrate_limit_table *crte;
uint32_t rc_agg_early;
uint32_t rc_agg_delayed;
uint32_t rc_tlp_rxt_last_time;
uint64_t rc_gp_output_ts;
uint64_t rc_gp_cumack_ts;
struct timeval act_rcv_time;
uint64_t gp_bw;
uint64_t init_rate;
#ifdef NETFLIX_SHARED_CWND
struct shared_cwnd *rc_scw;
#endif
uint64_t last_gp_comp_bw;
uint64_t last_max_bw;
struct time_filter_small rc_gp_min_rtt;
struct def_opt_head opt_list;
uint64_t lt_bw_time;
uint64_t lt_bw_bytes;
uint64_t lt_timemark;
struct tcp_sendfile_track *rc_last_sft;
uint32_t lt_seq;
int32_t rc_rtt_diff;
uint64_t last_tmit_time_acked;
uint64_t last_sendtime;
uint64_t last_gpest;
uint64_t last_tm_mark;
uint64_t fillcw_cap;
struct rack_pcm_info pcm_i;
struct rack_pcm_stats *pcm_s;
uint32_t gp_gain_req;
uint32_t last_rnd_of_gp_rise;
uint32_t gp_rnd_thresh;
uint32_t ss_hi_fs;
uint32_t gate_to_fs;
uint32_t pcm_max_seg;
uint32_t last_pcm_round;
uint32_t pcm_idle_rounds;
uint32_t rc_gp_srtt;
uint32_t rc_prev_gp_srtt;
uint32_t rc_entry_gp_rtt;
uint32_t rc_loss_at_start;
uint32_t rc_considered_lost;
uint32_t dsack_round_end;
uint32_t current_round;
uint32_t roundends;
uint32_t num_dsack;
uint32_t forced_ack_ts;
uint32_t last_collapse_point;
uint32_t high_collapse_point;
uint32_t rc_lower_rtt_us_cts;
uint32_t rc_time_probertt_entered;
uint32_t rc_time_probertt_starts;
uint32_t rc_lowest_us_rtt;
uint32_t rc_highest_us_rtt;
uint32_t rc_last_us_rtt;
uint32_t rc_time_of_last_probertt;
uint32_t rc_target_probertt_flight;
uint32_t rc_probertt_sndmax_atexit;
uint32_t rc_cwnd_at_erec;
uint32_t rc_ssthresh_at_erec;
uint32_t dsack_byte_cnt;
uint32_t retran_during_recovery;
uint32_t rc_gp_lowrtt;
uint32_t rc_gp_high_rwnd;
uint32_t rc_snd_max_at_rto;
uint32_t rc_out_at_rto;
int32_t rc_scw_index;
uint32_t max_reduction;
uint32_t side_chan_dis_mask;
uint32_t rc_tlp_threshold;
uint32_t rc_last_timeout_snduna;
uint32_t last_tlp_acked_start;
uint32_t last_tlp_acked_end;
uint32_t rc_min_to;
uint32_t rc_pkt_delay;
uint32_t persist_lost_ends;
uint32_t cleared_app_ack_seq;
uint32_t last_rcv_tstmp_for_rtt;
uint32_t last_time_of_arm_rcv;
uint32_t rto_ssthresh;
uint32_t recovery_rxt_cnt;
uint32_t rc_saved_beta;
uint32_t rc_saved_beta_ecn;
uint16_t rc_cnt_of_retran[RETRAN_CNT_SIZE];
uint16_t rc_early_recovery_segs;
uint16_t rc_reorder_shift;
uint8_t rack_per_upper_bound_ss;
uint8_t rack_per_upper_bound_ca;
uint8_t cleared_app_ack;
uint8_t dsack_persist;
uint8_t rc_no_push_at_mrtt;
uint8_t num_measurements;
uint8_t req_measurements;
uint8_t saved_hibeta;
uint8_t rc_tlp_cwnd_reduce;
uint8_t rc_prr_sendalot;
uint8_t rc_rate_sample_method;
uint8_t client_suggested_maxseg;
uint8_t use_gp_not_last;
uint8_t pacing_method;
};
#endif
#define RACK_PACING_NONE 0x00
#define RACK_DGP_PACING 0x01
#define RACK_REG_PACING 0x02
#define HYBRID_LOG_NO_ROOM 0
#define HYBRID_LOG_TURNED_OFF 1
#define HYBRID_LOG_NO_PACING 2
#define HYBRID_LOG_RULES_SET 3
#define HYBRID_LOG_NO_RANGE 4
#define HYBRID_LOG_RULES_APP 5
#define HYBRID_LOG_REQ_COMP 6
#define HYBRID_LOG_BW_MEASURE 7
#define HYBRID_LOG_RATE_CAP 8
#define HYBRID_LOG_CAP_CALC 9
#define HYBRID_LOG_ISSAME 10
#define HYBRID_LOG_ALLSENT 11
#define HYBRID_LOG_OUTOFTIME 12
#define HYBRID_LOG_CAPERROR 13
#define HYBRID_LOG_EXTEND 14
#define HYBRID_LOG_SENT_LOST 15
#define RACK_TIMELY_CNT_BOOST 5
#define RACK_HYSTART_OFF 0
#define RACK_HYSTART_ON 1
#define RACK_HYSTART_ON_W_SC 2
#define RACK_HYSTART_ON_W_SC_C 3
#define MAX_USER_SET_SEG 0x3f
#define RACK_FREE_CNT_MAX 0x2f
#ifdef _KERNEL
struct tcp_rack {
int32_t(*r_substate) (struct mbuf *, struct tcphdr *,
struct socket *, struct tcpcb *, struct tcpopt *,
int32_t, int32_t, uint32_t, int, int, uint8_t);
struct tcpcb *rc_tp;
struct inpcb *rc_inp;
uint8_t rc_free_cnt : 6,
rc_skip_timely : 1,
pcm_enabled : 1;
uint8_t client_bufferlvl : 3,
rack_deferred_inited : 1,
full_size_rxt: 1,
shape_rxt_to_pacing_min : 1,
rc_ack_required: 1,
r_use_hpts_min : 1;
uint8_t no_prr_addback : 1,
gp_ready : 1,
defer_options: 1,
dis_lt_bw : 1,
rc_ack_can_sendout_data: 1,
rc_pacing_cc_set: 1,
rc_rack_tmr_std_based :1,
rc_rack_use_dsack: 1;
uint8_t rc_dsack_round_seen: 1,
rc_last_tlp_acked_set: 1,
rc_last_tlp_past_cumack: 1,
rc_last_sent_tlp_seq_valid: 1,
rc_last_sent_tlp_past_cumack: 1,
probe_not_answered: 1,
rack_hibeta : 1,
lt_bw_up : 1;
uint32_t rc_rack_rtt;
uint16_t r_mbuf_queue : 1,
rtt_limit_mul : 4,
r_limit_scw : 1,
r_must_retran : 1,
r_use_cmp_ack: 1,
r_ent_rec_ns: 1,
r_might_revert: 1,
r_fast_output: 1,
r_fsb_inited: 1,
r_rack_hw_rate_caps: 1,
r_up_only: 1,
r_via_fill_cw : 1,
r_rcvpath_rtt_up : 1;
uint8_t rc_user_set_max_segs : 7,
rc_fillcw_apply_discount;
uint8_t rc_labc;
uint16_t forced_ack : 1,
rc_gp_incr : 1,
rc_gp_bwred : 1,
rc_gp_timely_inc_cnt : 3,
rc_gp_timely_dec_cnt : 3,
r_use_labc_for_rec: 1,
rc_highly_buffered: 1,
rc_dragged_bottom: 1,
rc_pace_dnd : 1,
rc_initial_ss_comp : 1,
rc_gp_filled : 1,
rc_hw_nobuf : 1;
uint8_t r_state : 4,
rc_catch_up : 1,
rc_hybrid_mode : 1,
rc_suspicious : 1,
rc_new_rnd_needed: 1;
uint8_t rc_tmr_stopped : 7,
t_timers_stopped : 1;
uint8_t rc_enobuf : 7,
rc_on_min_to : 1;
uint8_t r_timer_override : 1,
r_is_v6 : 1,
rc_in_persist : 1,
rc_tlp_in_progress : 1,
rc_always_pace : 1,
rc_pace_to_cwnd : 1,
rc_pace_fill_if_rttin_range : 1,
rc_srtt_measure_made : 1;
uint8_t app_limited_needs_set : 1,
use_fixed_rate : 1,
rc_has_collapsed : 1,
use_lesser_lt_bw : 1,
cspr_is_fcc : 1,
rack_hdrw_pacing : 1,
rack_hdw_pace_ena : 1,
rack_attempt_hdwr_pace : 1;
uint8_t rack_tlp_threshold_use : 3,
rack_rec_nonrxt_use_cr : 1,
rack_enable_scwnd : 1,
rack_attempted_scwnd : 1,
rack_no_prr : 1,
rack_scwnd_is_idle : 1;
uint8_t rc_allow_data_af_clo: 1,
delayed_ack : 1,
set_pacing_done_a_iw : 1,
use_rack_rr : 1,
alloc_limit_reported : 1,
rack_avail : 2,
rc_force_max_seg : 1;
uint8_t r_early : 1,
r_late : 1,
r_wanted_output: 1,
r_rr_config : 2,
r_persist_lt_bw_off : 1,
r_collapse_point_valid : 1,
dgp_on : 1;
uint16_t rto_from_rec: 1,
avail_bit: 4,
pcm_in_progress: 1,
pcm_needed: 1,
rc_sendvars_notset : 1,
rc_gp_rtt_set : 1,
rc_gp_dyn_mul : 1,
rc_gp_saw_rec : 1,
rc_gp_saw_ca : 1,
rc_gp_saw_ss : 1,
rc_gp_no_rec_chg : 1,
in_probe_rtt : 1,
measure_saw_probe_rtt : 1;
struct rack_control r_ctl;
} __aligned(CACHE_LINE_SIZE);
void rack_update_pcm_ack(struct tcp_rack *rack, int was_cumack,
uint32_t ss, uint32_t es);
#endif
#endif