The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/tcp_stacks/tcp_bbr.h

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2016-2020 Netflix, Inc.
    3  *
    4  * Redistribution and use in source and binary forms, with or without
    5  * modification, are permitted provided that the following conditions
    6  * are met:
    7  * 1. Redistributions of source code must retain the above copyright
    8  *    notice, this list of conditions and the following disclaimer.
    9  * 2. Redistributions in binary form must reproduce the above copyright
   10  *    notice, this list of conditions and the following disclaimer in the
   11  *    documentation and/or other materials provided with the distribution.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  *
   25  * $FreeBSD$
   26  */
   27 
   28 #ifndef _NETINET_TCP_BBR_H_
   29 #define _NETINET_TCP_BBR_H_
   30 
   31 #define BBR_INITIAL_RTO  1000000        /* 1 second in micro-seconds */
   32 /* Send map flags */
   33 #define BBR_ACKED         0x0001        /* The remote endpoint acked this */
   34 #define BBR_WAS_RENEGED   0x0002        /* The peer reneged the ack  */
   35 #define BBR_RXT_CLEARED   0x0004        /* ACK Cleared by the RXT timer  */
   36 #define BBR_OVERMAX       0x0008        /* We have more retran's then we can
   37                                          * fit */
   38 #define BBR_SACK_PASSED   0x0010        /* A sack was done above this block */
   39 #define BBR_WAS_SACKPASS  0x0020        /* We retransmitted due to SACK pass */
   40 #define BBR_HAS_FIN       0x0040        /* segment is sent with fin */
   41 #define BBR_TLP           0x0080        /* segment sent as tail-loss-probe */
   42 #define BBR_HAS_SYN       0x0100        /* segment has the syn */
   43 #define BBR_MARKED_LOST   0x0200        /*
   44                                          * This segments is lost and
   45                                          * totaled into bbr->rc_ctl.rc_lost
   46                                          */
   47 #define BBR_RWND_COLLAPSED 0x0400       /* The peer collapsed the rwnd on the segment */
   48 #define BBR_NUM_OF_RETRANS 7
   49 
   50 /* Defines for socket options to set pacing overheads */
   51 #define BBR_INCL_ENET_OH 0x01
   52 #define BBR_INCL_IP_OH   0x02
   53 #define BBR_INCL_TCP_OH  0x03
   54 
   55 /*
   56  * With the addition of both measurement algorithms
   57  * I had to move over the size of a
   58  * cache line (unfortunately). For now there is
   59  * no way around this. We may be able to cut back
   60  * at some point I hope.
   61  */
   62 struct bbr_sendmap {
   63         TAILQ_ENTRY(bbr_sendmap) r_next;        /* seq number arrayed next */
   64         TAILQ_ENTRY(bbr_sendmap) r_tnext;       /* Time of tmit based next */
   65         uint32_t r_start;       /* Sequence number of the segment */
   66         uint32_t r_end;         /* End seq, this is 1 beyond actually */
   67 
   68         uint32_t r_rtr_bytes;   /* How many bytes have been retransmitted */
   69         uint32_t r_delivered;   /* Delivered amount at send */
   70 
   71         uint32_t r_del_time;    /* The time of the last delivery update */
   72         uint8_t r_rtr_cnt:4,    /* Retran count, index this -1 to get time
   73                                  * sent */
   74                 r_rtt_not_allowed:1,    /* No rtt measurement allowed */
   75                 r_is_drain:1,   /* In a draining cycle */
   76                 r_app_limited:1,/* We went app limited */
   77                 r_ts_valid:1;   /* Timestamp field is valid (r_del_ack_ts) */
   78         uint8_t r_dupack;       /* Dup ack count */
   79         uint8_t r_in_tmap:1,    /* Flag to see if its in the r_tnext array */
   80                 r_is_smallmap:1,/* Was logged as a small-map send-map item */
   81                 r_is_gain:1,    /* Was in gain cycle */
   82                 r_bbr_state:5;  /* The BBR state at send */
   83         uint8_t r_limit_type;   /* is this entry counted against a limit? */
   84 
   85         uint16_t r_flags;       /* Flags as defined above */
   86         uint16_t r_spare16;
   87         uint32_t r_del_ack_ts;  /* At send what timestamp of peer was (if r_ts_valid set) */
   88         /****************Cache line*****************/
   89         uint32_t r_tim_lastsent[BBR_NUM_OF_RETRANS];
   90         /*
   91          * Question, should we instead just grab the sending b/w
   92          * from the filter with the gain and store it in a
   93          * uint64_t instead?
   94          */
   95         uint32_t r_first_sent_time; /* Time of first pkt in flight sent */
   96         uint32_t r_pacing_delay;        /* pacing delay of this send */
   97         uint32_t r_flight_at_send;      /* flight at the time of the send */
   98 #ifdef _KERNEL
   99 }           __aligned(CACHE_LINE_SIZE);
  100 #else
  101 };
  102 #endif
  103 #define BBR_LIMIT_TYPE_SPLIT    1
  104 
  105 TAILQ_HEAD(bbr_head, bbr_sendmap);
  106 
  107 #define BBR_SEGMENT_TIME_SIZE 1500      /* How many bytes in time_between */
  108 
  109 #define BBR_MIN_SEG 1460                /* MSS size */
  110 #define BBR_MAX_GAIN_VALUE 0xffff
  111 
  112 #define BBR_TIMER_FUDGE  1500   /* 1.5ms in micro seconds */
  113 
  114 /* BW twiddle secret codes */
  115 #define BBR_RED_BW_CONGSIG       0      /* We enter recovery and set using b/w */
  116 #define BBR_RED_BW_RATECAL       1      /* We are calculating the loss rate */
  117 #define BBR_RED_BW_USELRBW       2      /* We are dropping the lower b/w with
  118                                          * cDR */
  119 #define BBR_RED_BW_SETHIGHLOSS   3      /* We have set our highloss value at
  120                                          * exit from probe-rtt */
  121 #define BBR_RED_BW_PE_CLREARLY   4      /* We have decided to clear the
  122                                          * reduction early */
  123 #define BBR_RED_BW_PE_CLAFDEL    5      /* We are clearing it on schedule
  124                                          * delayed */
  125 #define BBR_RED_BW_REC_ENDCLL    6      /* Recover exits save high if needed
  126                                          * an clear to start measuring */
  127 #define BBR_RED_BW_PE_NOEARLY_OUT 7     /* Set pkt epoch judged that we do not
  128                                          * get out of jail early */
  129 /* For calculating a rate */
  130 #define BBR_CALC_BW     1
  131 #define BBR_CALC_LOSS   2
  132 
  133 #define BBR_RTT_BY_TIMESTAMP    0
  134 #define BBR_RTT_BY_EXACTMATCH   1
  135 #define BBR_RTT_BY_EARLIER_RET  2
  136 #define BBR_RTT_BY_THIS_RETRAN  3
  137 #define BBR_RTT_BY_SOME_RETRAN  4
  138 #define BBR_RTT_BY_TSMATCHING   5
  139 
  140 /* Markers to track where we enter persists from */
  141 #define BBR_PERSISTS_FROM_1     1
  142 #define BBR_PERSISTS_FROM_2     2
  143 #define BBR_PERSISTS_FROM_3     3
  144 #define BBR_PERSISTS_FROM_4     4
  145 #define BBR_PERSISTS_FROM_5     5
  146 
  147 /* magic cookies to ask for the RTT */
  148 #define BBR_RTT_PROP    0
  149 #define BBR_RTT_RACK    1
  150 #define BBR_RTT_PKTRTT  2
  151 #define BBR_SRTT        3
  152 
  153 #define BBR_SACKED 0
  154 #define BBR_CUM_ACKED  1
  155 
  156 /* threshold in useconds where we consider we need a higher min cwnd */
  157 #define BBR_HIGH_SPEED 1000
  158 #define BBR_HIGHSPEED_NUM_MSS 12
  159 
  160 #define MAX_REDUCE_RXT 3        /* What is the maximum times we are willing to
  161                                  * reduce b/w in RTX's. Setting this has a
  162                                  * multiplicative effect e.g. if we are
  163                                  * reducing by 20% then setting it to 3 means
  164                                  * you will have reduced the b/w estimate by >
  165                                  * 60% before you stop. */
  166 /*
  167  * We use the rate sample structure to
  168  * assist in single sack/ack rate and rtt
  169  * calculation. In the future we will expand
  170  * this in BBR to do forward rate sample
  171  * b/w estimation.
  172  */
  173 #define BBR_RS_RTT_EMPTY 0x00000001     /* Nothing yet stored in RTT's */
  174 #define BBR_RS_BW_EMPTY  0x00000002     /* Nothing yet stored in cDR */
  175 #define BBR_RS_RTT_VALID 0x00000004     /* We have at least one valid RTT */
  176 #define BBR_RS_BW_VAILD  0x00000008     /* We have a valid cDR */
  177 #define BBR_RS_EMPTY   (BBR_RS_RTT_EMPTY|BBR_RS_BW_EMPTY)
  178 struct bbr_rtt_sample {
  179         uint32_t rs_flags;
  180         uint32_t rs_rtt_lowest;
  181         uint32_t rs_rtt_lowest_sendtime;
  182         uint32_t rs_rtt_low_seq_start;
  183 
  184         uint32_t rs_rtt_highest;
  185         uint32_t rs_rtt_cnt;
  186 
  187         uint64_t rs_rtt_tot;
  188         uint32_t cur_rtt;
  189         uint32_t cur_rtt_bytecnt;
  190 
  191         uint32_t cur_rtt_rsmcnt;
  192         uint32_t rc_crtt_set:1,
  193                 avail_bits:31;
  194         uint64_t rs_cDR;
  195 };
  196 
  197 /* RTT shrink reasons */
  198 #define BBR_RTTS_INIT     0
  199 #define BBR_RTTS_NEWRTT   1
  200 #define BBR_RTTS_RTTPROBE 2
  201 #define BBR_RTTS_WASIDLE  3
  202 #define BBR_RTTS_PERSIST  4
  203 #define BBR_RTTS_REACHTAR 5
  204 #define BBR_RTTS_ENTERPROBE 6
  205 #define BBR_RTTS_SHRINK_PG 7
  206 #define BBR_RTTS_SHRINK_PG_FINAL 8
  207 #define BBR_RTTS_NEW_TARGET 9
  208 #define BBR_RTTS_LEAVE_DRAIN 10
  209 #define BBR_RTTS_RESETS_VALUES 11
  210 
  211 #define BBR_NUM_RATES 5
  212 /* Rate flags */
  213 #define BBR_RT_FLAG_FREE       0x00     /* Is on the free list */
  214 #define BBR_RT_FLAG_INUSE      0x01     /* Has been allocated */
  215 #define BBR_RT_FLAG_READY      0x02     /* Ready to initiate a measurement. */
  216 #define BBR_RT_FLAG_CAPPED_PRE 0x04     /* Ready to cap if we send the next segment */
  217 #define BBR_RT_FLAG_CAPPED     0x08     /* Measurement is capped */
  218 #define BBR_RT_FLAG_PASTFA     0x10     /* Past the first ack. */
  219 #define BBR_RT_FLAG_LIMITED    0x20     /* Saw application/cwnd or rwnd limited period */
  220 #define BBR_RT_SEEN_A_ACK      0x40     /* A ack has been saved */
  221 #define BBR_RT_PREV_RTT_SET    0x80     /* There was a RTT set in */
  222 #define BBR_RT_PREV_SEND_TIME  0x100    /*
  223                                          *There was a RTT send time set that can be used
  224                                          * no snd_limits
  225                                          */
  226 #define BBR_RT_SET_GRADIENT    0x200
  227 #define BBR_RT_TS_VALID        0x400
  228 
  229 struct bbr_log {
  230         union {
  231                 struct bbr_sendmap *rsm;        /* For alloc/free */
  232                 uint64_t sb_acc;        /* For out/ack or t-o */
  233         };
  234         struct tcpcb *tp;
  235         uint32_t t_flags;
  236         uint32_t th_seq;
  237         uint32_t th_ack;
  238         uint32_t snd_una;
  239         uint32_t snd_nxt;
  240         uint32_t snd_max;
  241         uint32_t snd_cwnd;
  242         uint32_t snd_wnd;
  243         uint32_t rc_lost;
  244         uint32_t target_cwnd;   /* UU */
  245         uint32_t inflight;      /* UU */
  246         uint32_t applimited;    /* UU */
  247         /* Things for BBR */
  248         uint32_t delivered;     /* UU */
  249         uint64_t cur_del_rate;  /* UU */
  250         uint64_t delRate;       /* UU */
  251         uint64_t rttProp;       /* UU */
  252         uint64_t lt_bw;         /* UU */
  253         uint32_t timeStamp;
  254         uint32_t time;
  255         uint32_t slot;          /* UU */
  256         uint32_t delayed_by;
  257         uint32_t exp_del;
  258         uint32_t pkts_out;
  259         uint32_t new_win;
  260         uint32_t hptsi_gain;    /* UU */
  261         uint32_t cwnd_gain;     /* UU */
  262         uint32_t epoch;         /* UU */
  263         uint32_t lt_epoch;      /* UU */
  264         /* Sack fun */
  265         uint32_t blk_start[4];  /* xx */
  266         uint32_t blk_end[4];
  267         uint32_t len;           /* Timeout T3=1, TLP=2, RACK=3 */
  268         uint8_t type;
  269         uint8_t n_sackblks;
  270         uint8_t applied;        /* UU */
  271         uint8_t inhpts;         /* UU */
  272         uint8_t __spare;        /* UU */
  273         uint8_t use_lt_bw;      /* UU */
  274 };
  275 
  276 struct bbr_log_sysctl_out {
  277         uint32_t bbr_log_at;
  278         uint32_t bbr_log_max;
  279         struct bbr_log entries[0];
  280 };
  281 
  282 /*
  283  * Magic numbers for logging timeout events if the
  284  * logging is enabled.
  285  */
  286 #define BBR_TO_FRM_TMR  1
  287 #define BBR_TO_FRM_TLP  2
  288 #define BBR_TO_FRM_RACK 3
  289 #define BBR_TO_FRM_KEEP 4
  290 #define BBR_TO_FRM_PERSIST 5
  291 #define BBR_TO_FRM_DELACK 6
  292 
  293 #define BBR_SEES_STRETCH_ACK 1
  294 #define BBR_SEES_COMPRESSED_ACKS 2
  295 
  296 /*
  297  * As we get each SACK we wade through the
  298  * rc_map and mark off what is acked.
  299  * We also increment rc_sacked as well.
  300  *
  301  * We also pay attention to missing entries
  302  * based on the time and possibly mark them
  303  * for retransmit. If we do and we are not already
  304  * in recovery we enter recovery. In doing
  305  * so we claer prr_delivered/holes_rxt and prr_sent_dur_rec.
  306  * We also setup rc_next/rc_snd_nxt/rc_send_end so
  307  * we will know where to send from. When not in
  308  * recovery rc_next will be NULL and rc_snd_nxt should
  309  * equal snd_max.
  310  *
  311  * Whenever we retransmit from recovery we increment
  312  * rc_holes_rxt as we retran a block and mark it as retransmitted
  313  * with the time it was sent. During non-recovery sending we
  314  * add to our map and note the time down of any send expanding
  315  * the rc_map at the tail and moving rc_snd_nxt up with snd_max.
  316  *
  317  * In recovery during SACK/ACK processing if a chunk has
  318  * been retransmitted and it is now acked, we decrement rc_holes_rxt.
  319  * When we retransmit from the scoreboard we use
  320  * rc_next and rc_snd_nxt/rc_send_end to help us
  321  * find what needs to be retran.
  322  *
  323  * To calculate pipe we simply take (snd_max - snd_una) + rc_holes_rxt
  324  * This gets us the effect of RFC6675 pipe, counting twice for
  325  * bytes retransmitted.
  326  */
  327 
  328 #define TT_BBR_FR_TMR   0x2001
  329 
  330 #define BBR_SCALE 8
  331 #define BBR_UNIT (1 << BBR_SCALE)
  332 
  333 #define BBR_NUM_RTTS_FOR_DEL_LIMIT 8    /* How many pkt-rtts do we keep
  334                                          * Delivery rate for */
  335 #define BBR_NUM_RTTS_FOR_GOOG_DEL_LIMIT 10      /* How many pkt-rtts do we keep
  336                                                  * Delivery rate for google */
  337 
  338 #define BBR_SECONDS_NO_RTT 10   /* 10 seconds with no RTT shrinkage */
  339 #define BBR_PROBERTT_MAX 200    /* 200ms */
  340 #define BBR_PROBERTT_NUM_MSS 4
  341 #define BBR_STARTUP_EPOCHS 3
  342 #define USECS_IN_MSEC 1000
  343 #define BBR_TIME_TO_SECONDS(a) (a / USECS_IN_SECOND)
  344 #define BBR_TIME_TO_MILLI(a) (a / MS_IN_USEC)
  345 
  346 /* BBR keeps time in usec's so we divide by 1000 and round up */
  347 #define BBR_TS_TO_MS(t)  ((t+999)/MS_IN_USEC)
  348 
  349 /*
  350  * Locking for the rack control block.
  351  * a) Locked by INP_WLOCK
  352  * b) Locked by the hpts-mutex
  353  *
  354  */
  355 #define BBR_STATE_STARTUP   0x01
  356 #define BBR_STATE_DRAIN     0x02
  357 #define BBR_STATE_PROBE_BW  0x03
  358 #define BBR_STATE_PROBE_RTT 0x04
  359 #define BBR_STATE_IDLE_EXIT 0x05
  360 
  361 /* Substate defines for STATE == PROBE_BW */
  362 #define BBR_SUB_GAIN  0         /* State 0 where we are 5/4 BBR_UNIT */
  363 #define BBR_SUB_DRAIN 1         /* State 1 where we are at 3/4 BBR_UNIT */
  364 #define BBR_SUB_LEVEL1 2        /* State 1 first BBR_UNIT */
  365 #define BBR_SUB_LEVEL2 3        /* State 2nd BBR_UNIT */
  366 #define BBR_SUB_LEVEL3 4        /* State 3rd BBR_UNIT */
  367 #define BBR_SUB_LEVEL4 5        /* State 4th BBR_UNIT */
  368 #define BBR_SUB_LEVEL5 6        /* State 5th BBR_UNIT */
  369 #define BBR_SUB_LEVEL6 7        /* State last BBR_UNIT */
  370 #define BBR_SUBSTATE_COUNT 8
  371 
  372 /* Single remaining reduce log */
  373 #define BBR_REDUCE_AT_FR 5
  374 
  375 #define BBR_BIG_LOG_SIZE 300000
  376 
  377 struct bbr_stats {
  378         uint64_t bbr_badfr;             /* 0 */
  379         uint64_t bbr_badfr_bytes;       /* 1 */
  380         uint64_t bbr_saw_oerr;          /* 2 */
  381         uint64_t bbr_saw_emsgsiz;       /* 3 */
  382         uint64_t bbr_reorder_seen;      /* 4 */
  383         uint64_t bbr_tlp_tot;           /* 5 */
  384         uint64_t bbr_tlp_newdata;       /* 6 */
  385         uint64_t bbr_offset_recovery;   /* 7 */
  386         uint64_t bbr_tlp_retran_fail;   /* 8 */
  387         uint64_t bbr_to_tot;            /* 9 */
  388         uint64_t bbr_to_arm_rack;       /* 10 */
  389         uint64_t bbr_enter_probertt;    /* 11 */
  390         uint64_t bbr_tlp_set;           /* 12 */
  391         uint64_t bbr_resends_set;       /* 13 */
  392         uint64_t bbr_force_output;      /* 14 */
  393         uint64_t bbr_to_arm_tlp;        /* 15 */
  394         uint64_t bbr_paced_segments;    /* 16 */
  395         uint64_t bbr_saw_enobuf;        /* 17 */
  396         uint64_t bbr_to_alloc_failed;   /* 18 */
  397         uint64_t bbr_to_alloc_emerg;    /* 19 */
  398         uint64_t bbr_sack_proc_all;     /* 20 */
  399         uint64_t bbr_sack_proc_short;   /* 21 */
  400         uint64_t bbr_sack_proc_restart; /* 22 */
  401         uint64_t bbr_to_alloc;          /* 23 */
  402         uint64_t bbr_offset_drop;       /* 24 */
  403         uint64_t bbr_runt_sacks;        /* 25 */
  404         uint64_t bbr_sack_passed;       /* 26 */
  405         uint64_t bbr_rlock_left_ret0;   /* 27 */
  406         uint64_t bbr_rlock_left_ret1;   /* 28 */
  407         uint64_t bbr_dynamic_rwnd;      /* 29 */
  408         uint64_t bbr_static_rwnd;       /* 30 */
  409         uint64_t bbr_sack_blocks;       /* 31 */
  410         uint64_t bbr_sack_blocks_skip;  /* 32 */
  411         uint64_t bbr_sack_search_both;  /* 33 */
  412         uint64_t bbr_sack_search_fwd;   /* 34 */
  413         uint64_t bbr_sack_search_back;  /* 35 */
  414         uint64_t bbr_plain_acks;        /* 36 */
  415         uint64_t bbr_acks_with_sacks;   /* 37 */
  416         uint64_t bbr_progress_drops;    /* 38 */
  417         uint64_t bbr_early;             /* 39 */
  418         uint64_t bbr_reneges_seen;      /* 40 */
  419         uint64_t bbr_persist_reneg;     /* 41 */
  420         uint64_t bbr_dropped_af_data;   /* 42 */
  421         uint64_t bbr_failed_mbuf_aloc;  /* 43 */
  422         uint64_t bbr_cwnd_limited;      /* 44 */
  423         uint64_t bbr_rwnd_limited;      /* 45 */
  424         uint64_t bbr_app_limited;       /* 46 */
  425         uint64_t bbr_force_timer_start; /* 47 */
  426         uint64_t bbr_hpts_min_time;     /* 48 */
  427         uint64_t bbr_meets_tso_thresh;  /* 49 */
  428         uint64_t bbr_miss_tso_rwnd;     /* 50 */
  429         uint64_t bbr_miss_tso_cwnd;     /* 51 */
  430         uint64_t bbr_miss_tso_app;      /* 52 */
  431         uint64_t bbr_miss_retran;       /* 53 */
  432         uint64_t bbr_miss_tlp;          /* 54 */
  433         uint64_t bbr_miss_unknown;      /* 55 */
  434         uint64_t bbr_hdwr_rl_add_ok;    /* 56 */
  435         uint64_t bbr_hdwr_rl_add_fail;  /* 57 */
  436         uint64_t bbr_hdwr_rl_mod_ok;    /* 58 */
  437         uint64_t bbr_hdwr_rl_mod_fail;  /* 59 */
  438         uint64_t bbr_collapsed_win;     /* 60 */
  439         uint64_t bbr_alloc_limited;     /* 61 */
  440         uint64_t bbr_alloc_limited_conns; /* 62 */
  441         uint64_t bbr_split_limited;     /* 63 */
  442 };
  443 
  444 /*
  445  * The structure bbr_opt_stats is a simple
  446  * way to see how many options are being
  447  * changed in the stack.
  448  */
  449 struct bbr_opts_stats {
  450         uint64_t tcp_bbr_pace_per_sec;
  451         uint64_t tcp_bbr_pace_del_tar;
  452         uint64_t tcp_bbr_pace_seg_max;
  453         uint64_t tcp_bbr_pace_seg_min;
  454         uint64_t tcp_bbr_pace_cross;
  455         uint64_t tcp_bbr_drain_inc_extra;
  456         uint64_t tcp_bbr_unlimited;
  457         uint64_t tcp_bbr_iwintso;
  458         uint64_t tcp_bbr_rec_over_hpts;
  459         uint64_t tcp_bbr_recforce;
  460         uint64_t tcp_bbr_startup_pg;
  461         uint64_t tcp_bbr_drain_pg;
  462         uint64_t tcp_bbr_rwnd_is_app;
  463         uint64_t tcp_bbr_probe_rtt_int;
  464         uint64_t tcp_bbr_one_retran;
  465         uint64_t tcp_bbr_startup_loss_exit;
  466         uint64_t tcp_bbr_use_lowgain;
  467         uint64_t tcp_bbr_lowgain_thresh;
  468         uint64_t tcp_bbr_lowgain_half;
  469         uint64_t tcp_bbr_lowgain_fd;
  470         uint64_t tcp_bbr_usedel_rate;
  471         uint64_t tcp_bbr_min_rto;
  472         uint64_t tcp_bbr_max_rto;
  473         uint64_t tcp_rack_pace_max_seg;
  474         uint64_t tcp_rack_min_to;
  475         uint64_t tcp_rack_reord_thresh;
  476         uint64_t tcp_rack_reord_fade;
  477         uint64_t tcp_rack_tlp_thresh;
  478         uint64_t tcp_rack_pkt_delay;
  479         uint64_t tcp_bbr_startup_exit_epoch;
  480         uint64_t tcp_bbr_ack_comp_alg;
  481         uint64_t tcp_rack_cheat;
  482         uint64_t tcp_iwnd_tso;
  483         uint64_t tcp_utter_max_tso;
  484         uint64_t tcp_hdwr_pacing;
  485         uint64_t tcp_extra_state;
  486         uint64_t tcp_floor_min_tso;
  487         /* New */
  488         uint64_t tcp_bbr_algorithm;
  489         uint64_t tcp_bbr_tslimits;
  490         uint64_t tcp_bbr_probertt_len;
  491         uint64_t tcp_bbr_probertt_gain;
  492         uint64_t tcp_bbr_topaceout;
  493         uint64_t tcp_use_rackcheat;
  494         uint64_t tcp_delack;
  495         uint64_t tcp_maxpeak;
  496         uint64_t tcp_retran_wtso;
  497         uint64_t tcp_data_ac;
  498         uint64_t tcp_ts_raises;
  499         uint64_t tcp_pacing_oh_tmr;
  500         uint64_t tcp_pacing_oh;
  501         uint64_t tcp_policer_det;
  502 };
  503 
  504 #ifdef _KERNEL
  505 #define BBR_STAT_SIZE (sizeof(struct bbr_stats)/sizeof(uint64_t))
  506 extern counter_u64_t bbr_stat_arry[BBR_STAT_SIZE];
  507 #define BBR_STAT_ADD(name, amm) counter_u64_add(bbr_stat_arry[(offsetof(struct bbr_stats, name)/sizeof(uint64_t))], (amm))
  508 #define BBR_STAT_INC(name) BBR_STAT_ADD(name, 1)
  509 #define BBR_OPTS_SIZE (sizeof(struct bbr_stats)/sizeof(uint64_t))
  510 extern counter_u64_t bbr_opts_arry[BBR_OPTS_SIZE];
  511 #define BBR_OPTS_ADD(name, amm) counter_u64_add(bbr_opts_arry[(offsetof(struct bbr_opts_stats, name)/sizeof(uint64_t))], (amm))
  512 #define BBR_OPTS_INC(name) BBR_OPTS_ADD(name, 1)
  513 #endif
  514 
  515 #define BBR_NUM_LOSS_RATES 3
  516 #define BBR_NUM_BW_RATES 3
  517 
  518 #define BBR_RECOVERY_LOWRTT 1
  519 #define BBR_RECOVERY_MEDRTT 2
  520 #define BBR_RECOVERY_HIGHRTT 3
  521 #define BBR_RECOVERY_EXTREMERTT 4
  522 
  523 struct bbr_control {
  524         /*******************************/
  525         /* Cache line 2 from bbr start */
  526         /*******************************/
  527         struct bbr_head rc_map; /* List of all segments Lock(a) */
  528         struct bbr_head rc_tmap;        /* List in transmit order Lock(a) */
  529         struct bbr_sendmap *rc_resend;  /* something we have been asked to
  530                                          * resend */
  531         uint32_t rc_last_delay_val;     /* How much we expect to delay Lock(a) */
  532         uint32_t rc_bbr_hptsi_gain:16,  /* Current hptsi gain Lock(a) */
  533                  rc_hpts_flags:16;      /* flags on whats on the pacer wheel */
  534 
  535         uint32_t rc_delivered;  /* BRR delivered amount Lock(a) */
  536         uint32_t rc_hptsi_agg_delay;    /* How much time are we behind */
  537 
  538         uint32_t rc_flight_at_input;
  539         uint32_t rc_lost_bytes;         /* Total bytes currently marked lost */
  540         /*******************************/
  541         /* Cache line 3 from bbr start */
  542         /*******************************/
  543         struct time_filter rc_delrate;
  544         /*******************************/
  545         /* Cache line 4 from bbr start */
  546         /*******************************/
  547         struct bbr_head rc_free;        /* List of Free map entries Lock(a) */
  548         struct bbr_sendmap *rc_tlp_send;        /* something we have been
  549                                                  * asked to resend */
  550         uint32_t rc_del_time;
  551         uint32_t rc_target_at_state;    /* Target for a state */
  552 
  553         uint16_t rc_free_cnt;   /* Number of free entries on the rc_free list
  554                                  * Lock(a) */
  555         uint16_t rc_startup_pg;
  556 
  557         uint32_t cur_rtt;       /* Last RTT from ack */
  558 
  559         uint32_t rc_went_idle_time;     /* Used for persits to see if its
  560                                          * probe-rtt qualified */
  561         uint32_t rc_pace_max_segs:17,   /* How much in any single TSO we send Lock(a) */
  562                  rc_pace_min_segs:15;   /* The minimum single segment size before we enter persists */
  563 
  564         uint32_t rc_rtt_shrinks;        /* Time of last rtt shrinkage Lock(a) */
  565         uint32_t r_app_limited_until;
  566         uint32_t rc_timer_exp;  /* If a timer ticks of expiry */
  567         uint32_t rc_rcv_epoch_start;    /* Start time of the Epoch Lock(a) */
  568 
  569         /*******************************/
  570         /* Cache line 5 from bbr start */
  571         /*******************************/
  572 
  573         uint32_t rc_lost_at_pktepoch;   /* what the lost value was at the last
  574                                          * pkt-epoch */
  575         uint32_t r_measurement_count;   /* count of measurement applied lock(a) */
  576 
  577         uint32_t rc_last_tlp_seq;       /* Last tlp sequence Lock(a) */
  578         uint16_t rc_reorder_shift;      /* Socket option value Lock(a) */
  579         uint16_t rc_pkt_delay;  /* Socket option value Lock(a) */
  580 
  581         struct bbr_sendmap *rc_sacklast;        /* sack remembered place
  582                                                  * Lock(a) */
  583         struct bbr_sendmap *rc_next;    /* remembered place where we next
  584                                          * retransmit at Lock(a) */
  585 
  586         uint32_t rc_sacked;     /* Tot sacked on scoreboard Lock(a) */
  587         uint32_t rc_holes_rxt;  /* Tot retraned from scoreboard Lock(a) */
  588 
  589         uint32_t rc_reorder_ts; /* Last time we saw reordering Lock(a) */
  590         uint32_t rc_init_rwnd;  /* Initial rwnd when we transitioned */
  591                                 /*- ---
  592                                  * used only initial and close
  593                                  */
  594         uint32_t rc_high_rwnd;  /* Highest rwnd seen */
  595         uint32_t rc_lowest_rtt; /* Smallest RTT we have seen */
  596 
  597         uint32_t rc_last_rtt;   /* Last valid measured RTT that ack'd data */
  598         uint32_t bbr_cross_over;
  599 
  600         /*******************************/
  601         /* Cache line 6 from bbr start */
  602         /*******************************/
  603         struct sack_filter bbr_sf;
  604 
  605         /*******************************/
  606         /* Cache line 7 from bbr start */
  607         /*******************************/
  608         struct time_filter_small rc_rttprop;
  609         uint32_t last_inbound_ts;       /* Peers last timestamp */
  610 
  611         uint32_t rc_inc_tcp_oh: 1,
  612                  rc_inc_ip_oh: 1,
  613                  rc_inc_enet_oh:1,
  614                  rc_incr_tmrs:1,
  615                  restrict_growth:28;
  616         uint32_t rc_lt_epoch_use;       /* When we started lt-bw use Lock(a) */
  617 
  618         uint32_t rc_recovery_start;     /* Time we start recovery Lock(a) */
  619         uint32_t rc_lt_del;     /* Delivered at lt bw sampling start Lock(a) */
  620 
  621         uint64_t rc_bbr_cur_del_rate;   /* Current measured delivery rate
  622                                          * Lock(a) */
  623 
  624         /*******************************/
  625         /* Cache line 8 from bbr start */
  626         /*******************************/
  627         uint32_t rc_cwnd_on_ent;        /* On entry to recovery the cwnd
  628                                          * Lock(a) */
  629         uint32_t rc_agg_early;  /* aggregate amount early */
  630 
  631         uint32_t rc_rcvtime;    /* When we last received data Lock(a) */
  632         uint32_t rc_pkt_epoch_del;      /* seq num that we need for RTT epoch */
  633 
  634         uint32_t rc_pkt_epoch;  /* Epoch based on packet RTTs */
  635         uint32_t rc_pkt_epoch_time;     /* Time we started the pkt epoch */
  636 
  637         uint32_t rc_pkt_epoch_rtt;      /* RTT using the packet epoch */
  638         uint32_t rc_rtt_epoch;  /* Current RTT epoch, it ticks every rttProp
  639                                  * Lock(a) */
  640         uint32_t lowest_rtt;
  641         uint32_t bbr_smallest_srtt_this_state;
  642 
  643         uint32_t rc_lt_epoch;   /* LT epoch start of bw_sampling */
  644         uint32_t rc_lost_at_startup;
  645 
  646         uint32_t rc_bbr_state_atflight;
  647         uint32_t rc_bbr_last_startup_epoch;     /* Last startup epoch where we
  648                                                  * increased 20% */
  649         uint32_t rc_bbr_enters_probertt;        /* Timestamp we entered
  650                                                  * probertt Lock(a) */
  651         uint32_t rc_lt_time;    /* Time of lt sampling start Lock(a) */
  652 
  653         /*******************************/
  654         /* Cache line 9 from bbr start */
  655         /*******************************/
  656         uint64_t rc_lt_bw;      /* LT bw calculated Lock(a) */
  657         uint64_t rc_bbr_lastbtlbw;      /* For startup, what was last btlbw I
  658                                          * saw to check the 20% gain Lock(a) */
  659 
  660         uint32_t rc_bbr_cwnd_gain;      /* Current cwnd gain Lock(a) */
  661         uint32_t rc_pkt_epoch_loss_rate;        /* pkt-epoch loss rate */
  662 
  663         uint32_t rc_saved_cwnd; /* Saved cwnd during Probe-rtt drain Lock(a) */
  664         uint32_t substate_pe;
  665 
  666         uint32_t rc_lost;       /* Number of bytes lost Lock(a) */
  667         uint32_t rc_exta_time_gd; /* How much extra time we got in d/g */
  668 
  669         uint32_t rc_lt_lost;    /* Number of lt bytes lost at sampling start
  670                                  * Lock(a) */
  671         uint32_t rc_bbr_state_time;
  672 
  673         uint32_t rc_min_to;     /* Socket option value Lock(a) */
  674         uint32_t rc_initial_hptsi_bw;   /* Our initial startup bw Lock(a) */
  675 
  676         uint32_t bbr_lost_at_state;     /* Temp counter debug lost value as we
  677                                          * enter a state */
  678         /*******************************/
  679         /* Cache line 10 from bbr start */
  680         /*******************************/
  681         uint32_t rc_level_state_extra;
  682         uint32_t rc_red_cwnd_pe;
  683         const struct tcp_hwrate_limit_table *crte;
  684         uint64_t red_bw;
  685 
  686         uint32_t rc_probertt_int;
  687         uint32_t rc_probertt_srttchktim;        /* Time we last did a srtt
  688                                                  * check  */
  689         uint32_t gain_epoch;    /* Epoch we should be out of gain */
  690         uint32_t rc_min_rto_ms;
  691 
  692         uint32_t rc_reorder_fade;       /* Socket option value Lock(a) */
  693         uint32_t last_startup_measure;
  694 
  695         int32_t bbr_hptsi_per_second;
  696         int32_t bbr_hptsi_segments_delay_tar;
  697 
  698         int32_t bbr_hptsi_segments_max;
  699         uint32_t bbr_rttprobe_gain_val;
  700         /*******************************/
  701         /* Cache line 11 from bbr start */
  702         /*******************************/
  703         uint32_t cur_rtt_send_time;     /* Time we sent our rtt measured packet */
  704         uint32_t bbr_peer_tsratio;      /* Our calculated ts ratio to multply */
  705         uint32_t bbr_ts_check_tstmp;    /* When we filled it the TS that came on the ack */
  706         uint32_t bbr_ts_check_our_cts;  /* When we filled it the cts of the send */
  707         uint32_t rc_tlp_rxt_last_time;
  708         uint32_t bbr_smallest_srtt_state2;
  709         uint32_t bbr_hdwr_cnt_noset_snt;        /* count of hw pacing sends during delay */
  710         uint32_t startup_last_srtt;
  711         uint32_t rc_ack_hdwr_delay;
  712         uint32_t highest_hdwr_delay;            /* Largest delay we have seen from hardware */
  713         uint32_t non_gain_extra;
  714         uint32_t recovery_lr;                   /* The sum of the loss rate from the pe's during recovery */
  715         uint32_t last_in_probertt;
  716         uint32_t flightsize_at_drain;           /* In draining what was the last marked flight size */
  717         uint32_t rc_pe_of_prtt;                 /* PE we went into probe-rtt */
  718         uint32_t ts_in;                         /* ts that went with the last rtt */
  719 
  720         uint16_t rc_tlp_seg_send_cnt;   /* Number of times we have TLP sent
  721                                          * rc_last_tlp_seq Lock(a) */
  722         uint16_t rc_drain_pg;
  723         uint32_t rc_num_maps_alloced;           /* num send map entries allocated */
  724         uint32_t rc_num_split_allocs;           /* num split map entries allocated */
  725         uint16_t rc_num_small_maps_alloced;     /* Number of sack blocks
  726                                                  * allocated */
  727         uint16_t bbr_hptsi_bytes_min;
  728 
  729         uint16_t bbr_hptsi_segments_floor;
  730         uint16_t bbr_utter_max;
  731         uint16_t bbr_google_discount;
  732 
  733 };
  734 
  735 struct socket;
  736 struct tcp_bbr {
  737         /* First cache line 0x00 */
  738         int32_t(*r_substate) (struct mbuf *, struct tcphdr *,
  739             struct socket *, struct tcpcb *, struct tcpopt *,
  740             int32_t, int32_t, uint32_t, int32_t, int32_t, uint8_t);     /* Lock(a) */
  741         struct tcpcb *rc_tp;    /* The tcpcb Lock(a) */
  742         struct inpcb *rc_inp;   /* The inpcb Lock(a) */
  743         struct timeval rc_tv;
  744         uint32_t rc_pacer_started;  /* Time we started the pacer */
  745         uint16_t no_pacing_until:8, /* No pacing until N packet epochs */
  746                  ts_can_raise:1,/* TS b/w calculations can raise the bw higher */
  747                  skip_gain:1,   /* Skip the gain cycle (hardware pacing) */
  748                  gain_is_limited:1,     /* With hardware pacing we are limiting gain */
  749                  output_error_seen:1,
  750                  oerror_cnt:4,
  751                 hw_pacing_set:1;        /* long enough has passed for us to start pacing */
  752         uint16_t xxx_r_ack_count;       /* During recovery count of ack's received
  753                                  * that added data since output */
  754         uint16_t bbr_segs_rcvd; /* In Segment count since we sent a ack */
  755 
  756         uint8_t bbr_timer_src:4,        /* Used for debugging Lock(a) */
  757                 bbr_use_rack_cheat:1,   /* Use the rack cheat */
  758                 bbr_init_win_cheat:1,   /* Send full IW for TSO */
  759                 bbr_attempt_hdwr_pace:1,/* Try to do hardware pacing */
  760                 bbr_hdrw_pacing:1;      /* Hardware pacing is available */
  761         uint8_t bbr_hdw_pace_ena:1,     /* Does the connection allow hardware pacing to be attempted */
  762                 bbr_prev_in_rec:1,      /* We were previously in recovery */
  763                 pkt_conservation:1,
  764                 use_policer_detection:1,
  765                 xxx_bbr_hdw_pace_idx:4; /* If hardware pacing is on, index to slot in pace tbl */
  766         uint16_t r_wanted_output:1,
  767                  rtt_valid:1,
  768                  rc_timer_first:1,
  769                  rc_output_starts_timer:1,
  770                  rc_resends_use_tso:1,
  771                  rc_all_timers_stopped:1,
  772                  rc_loss_exit:1,
  773                  rc_ack_was_delayed:1,
  774                  rc_lt_is_sampling:1,
  775                  rc_filled_pipe:1,
  776                  rc_tlp_new_data:1,
  777                  rc_hit_state_1:1,
  778                  rc_ts_valid:1,
  779                  rc_prtt_set_ts:1,
  780                  rc_is_pkt_epoch_now:1,
  781                  rc_has_collapsed:1;
  782 
  783         uint8_t r_state:4,      /* Current bbr state Lock(a) */
  784                 r_agg_early_set:1,      /* Did we get called early */
  785                 r_init_rtt:1,
  786                 r_use_policer:1,        /* For google mode only */
  787                 r_recovery_bw:1;
  788         uint8_t r_timer_override:1,     /* pacer override Lock(a)  0/1 */
  789                 rc_in_persist:1,
  790                 rc_lt_use_bw:1,
  791                 rc_allow_data_af_clo:1,
  792                 rc_tlp_rtx_out:1,       /* A TLP is in flight  */
  793                 rc_tlp_in_progress:1,   /* a TLP timer is running needed? */
  794                 rc_use_idle_restart:1;   /* Do we restart fast after idle (persist or applim) */
  795         uint8_t rc_bbr_state:3, /* What is the major BBR state */
  796                 rc_bbr_substate:3,      /* For probeBW state */
  797                 r_is_v6:1,
  798                 rc_past_init_win:1;
  799         uint8_t rc_last_options;
  800         uint8_t rc_tlp_threshold;       /* Socket option value Lock(a) */
  801         uint8_t rc_max_rto_sec;
  802         uint8_t rc_cwnd_limited:1,      /* We are cwnd limited */
  803                 rc_tmr_stopped:7;       /* What timers have been stopped  */
  804         uint8_t rc_use_google:1,
  805                 rc_use_ts_limit:1,
  806                 rc_ts_data_set:1,       /* We have filled a set point to determine */
  807                 rc_ts_clock_set:1,      /* We have determined the ts type */
  808                 rc_ts_cant_be_used:1,   /* We determined we can't use ts values */
  809                 rc_ack_is_cumack:1,
  810                 rc_no_pacing:1,
  811                 alloc_limit_reported:1;
  812         uint8_t rc_init_win;
  813         /* Cache line 2 0x40 */
  814         struct bbr_control r_ctl;
  815 #ifdef _KERNEL
  816 }       __aligned(CACHE_LINE_SIZE);
  817 #else
  818 };
  819 #endif
  820 
  821 #endif

Cache object: 775f158029d0c0a1db1bb237161eec9e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.