The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/cc/cc_cubic.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2008-2010 Lawrence Stewart <lstewart@freebsd.org>
    5  * Copyright (c) 2010 The FreeBSD Foundation
    6  * All rights reserved.
    7  *
    8  * This software was developed by Lawrence Stewart while studying at the Centre
    9  * for Advanced Internet Architectures, Swinburne University of Technology, made
   10  * possible in part by a grant from the Cisco University Research Program Fund
   11  * at Community Foundation Silicon Valley.
   12  *
   13  * Portions of this software were developed at the Centre for Advanced
   14  * Internet Architectures, Swinburne University of Technology, Melbourne,
   15  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
   16  *
   17  * Redistribution and use in source and binary forms, with or without
   18  * modification, are permitted provided that the following conditions
   19  * are met:
   20  * 1. Redistributions of source code must retain the above copyright
   21  *    notice, this list of conditions and the following disclaimer.
   22  * 2. Redistributions in binary form must reproduce the above copyright
   23  *    notice, this list of conditions and the following disclaimer in the
   24  *    documentation and/or other materials provided with the distribution.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  */
   38 
   39 /*
   40  * An implementation of the CUBIC congestion control algorithm for FreeBSD,
   41  * based on the Internet Draft "draft-rhee-tcpm-cubic-02" by Rhee, Xu and Ha.
   42  * Originally released as part of the NewTCP research project at Swinburne
   43  * University of Technology's Centre for Advanced Internet Architectures,
   44  * Melbourne, Australia, which was made possible in part by a grant from the
   45  * Cisco University Research Program Fund at Community Foundation Silicon
   46  * Valley. More details are available at:
   47  *   http://caia.swin.edu.au/urp/newtcp/
   48  */
   49 
   50 #include <sys/cdefs.h>
   51 __FBSDID("$FreeBSD$");
   52 
   53 #include <sys/param.h>
   54 #include <sys/kernel.h>
   55 #include <sys/limits.h>
   56 #include <sys/malloc.h>
   57 #include <sys/module.h>
   58 #include <sys/socket.h>
   59 #include <sys/socketvar.h>
   60 #include <sys/sysctl.h>
   61 #include <sys/systm.h>
   62 
   63 #include <net/vnet.h>
   64 
   65 #include <net/route.h>
   66 #include <net/route/nhop.h>
   67 
   68 #include <netinet/in_pcb.h>
   69 #include <netinet/tcp.h>
   70 #include <netinet/tcp_seq.h>
   71 #include <netinet/tcp_timer.h>
   72 #include <netinet/tcp_var.h>
   73 #include <netinet/tcp_log_buf.h>
   74 #include <netinet/tcp_hpts.h>
   75 #include <netinet/cc/cc.h>
   76 #include <netinet/cc/cc_cubic.h>
   77 #include <netinet/cc/cc_module.h>
   78 
   79 static void     cubic_ack_received(struct cc_var *ccv, uint16_t type);
   80 static void     cubic_cb_destroy(struct cc_var *ccv);
   81 static int      cubic_cb_init(struct cc_var *ccv, void *ptr);
   82 static void     cubic_cong_signal(struct cc_var *ccv, uint32_t type);
   83 static void     cubic_conn_init(struct cc_var *ccv);
   84 static int      cubic_mod_init(void);
   85 static void     cubic_post_recovery(struct cc_var *ccv);
   86 static void     cubic_record_rtt(struct cc_var *ccv);
   87 static void     cubic_ssthresh_update(struct cc_var *ccv, uint32_t maxseg);
   88 static void     cubic_after_idle(struct cc_var *ccv);
   89 static size_t   cubic_data_sz(void);
   90 static void     cubic_newround(struct cc_var *ccv, uint32_t round_cnt);
   91 static void     cubic_rttsample(struct cc_var *ccv, uint32_t usec_rtt,
   92        uint32_t rxtcnt, uint32_t fas);
   93 
   94 struct cc_algo cubic_cc_algo = {
   95         .name = "cubic",
   96         .ack_received = cubic_ack_received,
   97         .cb_destroy = cubic_cb_destroy,
   98         .cb_init = cubic_cb_init,
   99         .cong_signal = cubic_cong_signal,
  100         .conn_init = cubic_conn_init,
  101         .mod_init = cubic_mod_init,
  102         .post_recovery = cubic_post_recovery,
  103         .after_idle = cubic_after_idle,
  104         .cc_data_sz = cubic_data_sz,
  105         .rttsample = cubic_rttsample,
  106         .newround = cubic_newround
  107 };
  108 
  109 static void
  110 cubic_log_hystart_event(struct cc_var *ccv, struct cubic *cubicd, uint8_t mod, uint32_t flex1)
  111 {
  112         /*
  113          * Types of logs (mod value)
  114          * 1 - rtt_thresh in flex1, checking to see if RTT is to great.
  115          * 2 - rtt is too great, rtt_thresh in flex1.
  116          * 3 - CSS is active incr in flex1
  117          * 4 - A new round is beginning flex1 is round count
  118          * 5 - A new RTT measurement flex1 is the new measurement.
  119          * 6 - We enter CA ssthresh is also in flex1.
  120          * 7 - Socket option to change hystart executed opt.val in flex1.
  121          * 8 - Back out of CSS into SS, flex1 is the css_baseline_minrtt
  122          * 9 - We enter CA, via an ECN mark.
  123          * 10 - We enter CA, via a loss.
  124          * 11 - We have slipped out of SS into CA via cwnd growth.
  125          * 12 - After idle has re-enabled hystart++
  126          */
  127         struct tcpcb *tp;
  128 
  129         if (hystart_bblogs == 0)
  130                 return;
  131         tp = ccv->ccvc.tcp;
  132         if (tp->t_logstate != TCP_LOG_STATE_OFF) {
  133                 union tcp_log_stackspecific log;
  134                 struct timeval tv;
  135 
  136                 memset(&log, 0, sizeof(log));
  137                 log.u_bbr.flex1 = flex1;
  138                 log.u_bbr.flex2 = cubicd->css_current_round_minrtt;
  139                 log.u_bbr.flex3 = cubicd->css_lastround_minrtt;
  140                 log.u_bbr.flex4 = cubicd->css_rttsample_count;
  141                 log.u_bbr.flex5 = cubicd->css_entered_at_round;
  142                 log.u_bbr.flex6 = cubicd->css_baseline_minrtt;
  143                 /* We only need bottom 16 bits of flags */
  144                 log.u_bbr.flex7 = cubicd->flags & 0x0000ffff;
  145                 log.u_bbr.flex8 = mod;
  146                 log.u_bbr.epoch = cubicd->css_current_round;
  147                 log.u_bbr.timeStamp = tcp_get_usecs(&tv);
  148                 log.u_bbr.lt_epoch = cubicd->css_fas_at_css_entry;
  149                 log.u_bbr.pkts_out = cubicd->css_last_fas;
  150                 log.u_bbr.delivered = cubicd->css_lowrtt_fas;
  151                 log.u_bbr.pkt_epoch = ccv->flags;
  152                 TCP_LOG_EVENTP(tp, NULL,
  153                     &tptosocket(tp)->so_rcv,
  154                     &tptosocket(tp)->so_snd,
  155                     TCP_HYSTART, 0,
  156                     0, &log, false, &tv);
  157         }
  158 }
  159 
  160 static void
  161 cubic_does_slow_start(struct cc_var *ccv, struct cubic *cubicd)
  162 {
  163         /*
  164          * In slow-start with ABC enabled and no RTO in sight?
  165          * (Must not use abc_l_var > 1 if slow starting after
  166          * an RTO. On RTO, snd_nxt = snd_una, so the
  167          * snd_nxt == snd_max check is sufficient to
  168          * handle this).
  169          *
  170          * XXXLAS: Find a way to signal SS after RTO that
  171          * doesn't rely on tcpcb vars.
  172          */
  173         u_int cw = CCV(ccv, snd_cwnd);
  174         u_int incr = CCV(ccv, t_maxseg);
  175         uint16_t abc_val;
  176 
  177         cubicd->flags |= CUBICFLAG_IN_SLOWSTART;
  178         if (ccv->flags & CCF_USE_LOCAL_ABC)
  179                 abc_val = ccv->labc;
  180         else
  181                 abc_val = V_tcp_abc_l_var;
  182         if ((ccv->flags & CCF_HYSTART_ALLOWED) &&
  183             (cubicd->flags & CUBICFLAG_HYSTART_ENABLED) &&
  184             ((cubicd->flags & CUBICFLAG_HYSTART_IN_CSS) == 0)) {
  185                 /*
  186                  * Hystart is allowed and still enabled and we are not yet
  187                  * in CSS. Lets check to see if we can make a decision on
  188                  * if we need to go into CSS.
  189                  */
  190                 if ((cubicd->css_rttsample_count >= hystart_n_rttsamples) &&
  191                     (cubicd->css_current_round_minrtt != 0xffffffff) &&
  192                     (cubicd->css_lastround_minrtt != 0xffffffff)) {
  193                         uint32_t rtt_thresh;
  194 
  195                         /* Clamp (minrtt_thresh, lastround/8, maxrtt_thresh) */
  196                         rtt_thresh = (cubicd->css_lastround_minrtt >> 3);
  197                         if (rtt_thresh < hystart_minrtt_thresh)
  198                                 rtt_thresh = hystart_minrtt_thresh;
  199                         if (rtt_thresh > hystart_maxrtt_thresh)
  200                                 rtt_thresh = hystart_maxrtt_thresh;
  201                         cubic_log_hystart_event(ccv, cubicd, 1, rtt_thresh);
  202 
  203                         if (cubicd->css_current_round_minrtt >= (cubicd->css_lastround_minrtt + rtt_thresh)) {
  204                                 /* Enter CSS */
  205                                 cubicd->flags |= CUBICFLAG_HYSTART_IN_CSS;
  206                                 cubicd->css_fas_at_css_entry = cubicd->css_lowrtt_fas;
  207                                 /* 
  208                                  * The draft (v4) calls for us to set baseline to css_current_round_min
  209                                  * but that can cause an oscillation. We probably shoudl be using
  210                                  * css_lastround_minrtt, but the authors insist that will cause
  211                                  * issues on exiting early. We will leave the draft version for now
  212                                  * but I suspect this is incorrect.
  213                                  */
  214                                 cubicd->css_baseline_minrtt = cubicd->css_current_round_minrtt;
  215                                 cubicd->css_entered_at_round = cubicd->css_current_round;
  216                                 cubic_log_hystart_event(ccv, cubicd, 2, rtt_thresh);
  217                         }
  218                 }
  219         }
  220         if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
  221                 incr = min(ccv->bytes_this_ack,
  222                            ccv->nsegs * abc_val *
  223                            CCV(ccv, t_maxseg));
  224         else
  225                 incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
  226 
  227         /* Only if Hystart is enabled will the flag get set */
  228         if (cubicd->flags & CUBICFLAG_HYSTART_IN_CSS) {
  229                 incr /= hystart_css_growth_div;
  230                 cubic_log_hystart_event(ccv, cubicd, 3, incr);
  231         }
  232         /* ABC is on by default, so incr equals 0 frequently. */
  233         if (incr > 0)
  234                 CCV(ccv, snd_cwnd) = min((cw + incr),
  235                                          TCP_MAXWIN << CCV(ccv, snd_scale));
  236 }
  237 
  238 static void
  239 cubic_ack_received(struct cc_var *ccv, uint16_t type)
  240 {
  241         struct cubic *cubic_data;
  242         unsigned long w_tf, w_cubic_next;
  243         int ticks_since_cong;
  244 
  245         cubic_data = ccv->cc_data;
  246         cubic_record_rtt(ccv);
  247 
  248         /*
  249          * For a regular ACK and we're not in cong/fast recovery and
  250          * we're cwnd limited, always recalculate cwnd.
  251          */
  252         if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
  253             (ccv->flags & CCF_CWND_LIMITED)) {
  254                  /* Use the logic in NewReno ack_received() for slow start. */
  255                 if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||
  256                     cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) {
  257                         cubic_does_slow_start(ccv, cubic_data);
  258                 } else {
  259                         if (cubic_data->flags & CUBICFLAG_HYSTART_IN_CSS) {
  260                                 /*
  261                                  * We have slipped into CA with
  262                                  * CSS active. Deactivate all.
  263                                  */
  264                                 /* Turn off the CSS flag */
  265                                 cubic_data->flags &= ~CUBICFLAG_HYSTART_IN_CSS;
  266                                 /* Disable use of CSS in the future except long idle  */
  267                                 cubic_data->flags &= ~CUBICFLAG_HYSTART_ENABLED;
  268                                 cubic_log_hystart_event(ccv, cubic_data, 11, CCV(ccv, snd_ssthresh));
  269                         }
  270                         if ((cubic_data->flags & CUBICFLAG_RTO_EVENT) &&
  271                             (cubic_data->flags & CUBICFLAG_IN_SLOWSTART)) {
  272                                 /* RFC8312 Section 4.7 */
  273                                 cubic_data->flags &= ~(CUBICFLAG_RTO_EVENT |
  274                                                        CUBICFLAG_IN_SLOWSTART);
  275                                 cubic_data->max_cwnd = CCV(ccv, snd_cwnd);
  276                                 cubic_data->K = 0;
  277                         } else if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART |
  278                                                  CUBICFLAG_IN_APPLIMIT)) {
  279                                 cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART |
  280                                                        CUBICFLAG_IN_APPLIMIT);
  281                                 cubic_data->t_last_cong = ticks;
  282                                 cubic_data->K = cubic_k(cubic_data->max_cwnd /
  283                                                         CCV(ccv, t_maxseg));
  284                         }
  285                         if ((ticks_since_cong =
  286                             ticks - cubic_data->t_last_cong) < 0) {
  287                                 /*
  288                                  * dragging t_last_cong along
  289                                  */
  290                                 ticks_since_cong = INT_MAX;
  291                                 cubic_data->t_last_cong = ticks - INT_MAX;
  292                         }
  293                         /*
  294                          * The mean RTT is used to best reflect the equations in
  295                          * the I-D. Using min_rtt in the tf_cwnd calculation
  296                          * causes w_tf to grow much faster than it should if the
  297                          * RTT is dominated by network buffering rather than
  298                          * propagation delay.
  299                          */
  300                         w_tf = tf_cwnd(ticks_since_cong,
  301                             cubic_data->mean_rtt_ticks, cubic_data->max_cwnd,
  302                             CCV(ccv, t_maxseg));
  303 
  304                         w_cubic_next = cubic_cwnd(ticks_since_cong +
  305                             cubic_data->mean_rtt_ticks, cubic_data->max_cwnd,
  306                             CCV(ccv, t_maxseg), cubic_data->K);
  307 
  308                         ccv->flags &= ~CCF_ABC_SENTAWND;
  309 
  310                         if (w_cubic_next < w_tf) {
  311                                 /*
  312                                  * TCP-friendly region, follow tf
  313                                  * cwnd growth.
  314                                  */
  315                                 if (CCV(ccv, snd_cwnd) < w_tf)
  316                                         CCV(ccv, snd_cwnd) = ulmin(w_tf, INT_MAX);
  317                         } else if (CCV(ccv, snd_cwnd) < w_cubic_next) {
  318                                 /*
  319                                  * Concave or convex region, follow CUBIC
  320                                  * cwnd growth.
  321                                  * Only update snd_cwnd, if it doesn't shrink.
  322                                  */
  323                                 CCV(ccv, snd_cwnd) = ulmin(w_cubic_next,
  324                                     INT_MAX);
  325                         }
  326 
  327                         /*
  328                          * If we're not in slow start and we're probing for a
  329                          * new cwnd limit at the start of a connection
  330                          * (happens when hostcache has a relevant entry),
  331                          * keep updating our current estimate of the
  332                          * max_cwnd.
  333                          */
  334                         if (((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) &&
  335                             cubic_data->max_cwnd < CCV(ccv, snd_cwnd)) {
  336                                 cubic_data->max_cwnd = CCV(ccv, snd_cwnd);
  337                                 cubic_data->K = cubic_k(cubic_data->max_cwnd /
  338                                     CCV(ccv, t_maxseg));
  339                         }
  340                 }
  341         } else if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
  342             !(ccv->flags & CCF_CWND_LIMITED)) {
  343                 cubic_data->flags |= CUBICFLAG_IN_APPLIMIT;
  344         }
  345 }
  346 
  347 /*
  348  * This is a CUBIC specific implementation of after_idle.
  349  *   - Reset cwnd by calling New Reno implementation of after_idle.
  350  *   - Reset t_last_cong.
  351  */
  352 static void
  353 cubic_after_idle(struct cc_var *ccv)
  354 {
  355         struct cubic *cubic_data;
  356 
  357         cubic_data = ccv->cc_data;
  358 
  359         cubic_data->max_cwnd = ulmax(cubic_data->max_cwnd, CCV(ccv, snd_cwnd));
  360         cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg));
  361         if ((cubic_data->flags & CUBICFLAG_HYSTART_ENABLED) == 0) {
  362                 /*
  363                  * Re-enable hystart if we have been idle.
  364                  */
  365                 cubic_data->flags &= ~CUBICFLAG_HYSTART_IN_CSS;
  366                 cubic_data->flags |= CUBICFLAG_HYSTART_ENABLED;
  367                 cubic_log_hystart_event(ccv, cubic_data, 12, CCV(ccv, snd_ssthresh));
  368         }
  369         newreno_cc_after_idle(ccv);
  370         cubic_data->t_last_cong = ticks;
  371 }
  372 
  373 static void
  374 cubic_cb_destroy(struct cc_var *ccv)
  375 {
  376         free(ccv->cc_data, M_CC_MEM);
  377 }
  378 
  379 static size_t
  380 cubic_data_sz(void)
  381 {
  382         return (sizeof(struct cubic));
  383 }
  384 
  385 static int
  386 cubic_cb_init(struct cc_var *ccv, void *ptr)
  387 {
  388         struct cubic *cubic_data;
  389 
  390         INP_WLOCK_ASSERT(tptoinpcb(ccv->ccvc.tcp));
  391         if (ptr == NULL) {
  392                 cubic_data = malloc(sizeof(struct cubic), M_CC_MEM, M_NOWAIT|M_ZERO);
  393                 if (cubic_data == NULL)
  394                         return (ENOMEM);
  395         } else
  396                 cubic_data = ptr;
  397 
  398         /* Init some key variables with sensible defaults. */
  399         cubic_data->t_last_cong = ticks;
  400         cubic_data->min_rtt_ticks = TCPTV_SRTTBASE;
  401         cubic_data->mean_rtt_ticks = 1;
  402 
  403         ccv->cc_data = cubic_data;
  404         cubic_data->flags = CUBICFLAG_HYSTART_ENABLED;
  405         /* At init set both to infinity */
  406         cubic_data->css_lastround_minrtt = 0xffffffff;
  407         cubic_data->css_current_round_minrtt = 0xffffffff;
  408         cubic_data->css_current_round = 0;
  409         cubic_data->css_baseline_minrtt = 0xffffffff;
  410         cubic_data->css_rttsample_count = 0;
  411         cubic_data->css_entered_at_round = 0;
  412         cubic_data->css_fas_at_css_entry = 0;
  413         cubic_data->css_lowrtt_fas = 0;
  414         cubic_data->css_last_fas = 0;
  415 
  416         return (0);
  417 }
  418 
  419 /*
  420  * Perform any necessary tasks before we enter congestion recovery.
  421  */
  422 static void
  423 cubic_cong_signal(struct cc_var *ccv, uint32_t type)
  424 {
  425         struct cubic *cubic_data;
  426         u_int mss;
  427 
  428         cubic_data = ccv->cc_data;
  429         mss = tcp_maxseg(ccv->ccvc.tcp);
  430 
  431         switch (type) {
  432         case CC_NDUPACK:
  433                 if (cubic_data->flags & CUBICFLAG_HYSTART_ENABLED) {
  434                         /* Make sure the flags are all off we had a loss */
  435                         cubic_data->flags &= ~CUBICFLAG_HYSTART_ENABLED;
  436                         cubic_data->flags &= ~CUBICFLAG_HYSTART_IN_CSS;
  437                         cubic_log_hystart_event(ccv, cubic_data, 10, CCV(ccv, snd_ssthresh));
  438                 }
  439                 if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) {
  440                         if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
  441                                 cubic_ssthresh_update(ccv, mss);
  442                                 cubic_data->flags |= CUBICFLAG_CONG_EVENT;
  443                                 cubic_data->t_last_cong = ticks;
  444                                 cubic_data->K = cubic_k(cubic_data->max_cwnd / mss);
  445                         }
  446                         ENTER_RECOVERY(CCV(ccv, t_flags));
  447                 }
  448                 break;
  449 
  450         case CC_ECN:
  451                 if (cubic_data->flags & CUBICFLAG_HYSTART_ENABLED) {
  452                         /* Make sure the flags are all off we had a loss */
  453                         cubic_data->flags &= ~CUBICFLAG_HYSTART_ENABLED;
  454                         cubic_data->flags &= ~CUBICFLAG_HYSTART_IN_CSS;
  455                         cubic_log_hystart_event(ccv, cubic_data, 9, CCV(ccv, snd_ssthresh));
  456                 }
  457                 if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
  458                         cubic_ssthresh_update(ccv, mss);
  459                         cubic_data->flags |= CUBICFLAG_CONG_EVENT;
  460                         cubic_data->t_last_cong = ticks;
  461                         cubic_data->K = cubic_k(cubic_data->max_cwnd / mss);
  462                         CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
  463                         ENTER_CONGRECOVERY(CCV(ccv, t_flags));
  464                 }
  465                 break;
  466 
  467         case CC_RTO:
  468                 /* RFC8312 Section 4.7 */
  469                 if (CCV(ccv, t_rxtshift) == 1) {
  470                         cubic_data->t_last_cong_prev = cubic_data->t_last_cong;
  471                         cubic_data->prev_max_cwnd_cp = cubic_data->prev_max_cwnd;
  472                 }
  473                 cubic_data->flags |= CUBICFLAG_CONG_EVENT | CUBICFLAG_RTO_EVENT;
  474                 cubic_data->prev_max_cwnd = cubic_data->max_cwnd;
  475                 CCV(ccv, snd_ssthresh) = ((uint64_t)CCV(ccv, snd_cwnd) *
  476                                           CUBIC_BETA) >> CUBIC_SHIFT;
  477                 CCV(ccv, snd_cwnd) = mss;
  478                 break;
  479 
  480         case CC_RTO_ERR:
  481                 cubic_data->flags &= ~(CUBICFLAG_CONG_EVENT | CUBICFLAG_RTO_EVENT);
  482                 cubic_data->max_cwnd = cubic_data->prev_max_cwnd;
  483                 cubic_data->prev_max_cwnd = cubic_data->prev_max_cwnd_cp;
  484                 cubic_data->t_last_cong = cubic_data->t_last_cong_prev;
  485                 cubic_data->K = cubic_k(cubic_data->max_cwnd / mss);
  486                 break;
  487         }
  488 }
  489 
  490 static void
  491 cubic_conn_init(struct cc_var *ccv)
  492 {
  493         struct cubic *cubic_data;
  494 
  495         cubic_data = ccv->cc_data;
  496 
  497         /*
  498          * Ensure we have a sane initial value for max_cwnd recorded. Without
  499          * this here bad things happen when entries from the TCP hostcache
  500          * get used.
  501          */
  502         cubic_data->max_cwnd = CCV(ccv, snd_cwnd);
  503 }
  504 
  505 static int
  506 cubic_mod_init(void)
  507 {
  508         return (0);
  509 }
  510 
  511 /*
  512  * Perform any necessary tasks before we exit congestion recovery.
  513  */
  514 static void
  515 cubic_post_recovery(struct cc_var *ccv)
  516 {
  517         struct cubic *cubic_data;
  518         int pipe;
  519 
  520         cubic_data = ccv->cc_data;
  521         pipe = 0;
  522 
  523         if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
  524                 /*
  525                  * If inflight data is less than ssthresh, set cwnd
  526                  * conservatively to avoid a burst of data, as suggested in
  527                  * the NewReno RFC. Otherwise, use the CUBIC method.
  528                  *
  529                  * XXXLAS: Find a way to do this without needing curack
  530                  */
  531                 if (V_tcp_do_newsack)
  532                         pipe = tcp_compute_pipe(ccv->ccvc.tcp);
  533                 else
  534                         pipe = CCV(ccv, snd_max) - ccv->curack;
  535 
  536                 if (pipe < CCV(ccv, snd_ssthresh))
  537                         /*
  538                          * Ensure that cwnd does not collapse to 1 MSS under
  539                          * adverse conditions. Implements RFC6582
  540                          */
  541                         CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) +
  542                             CCV(ccv, t_maxseg);
  543                 else
  544                         /* Update cwnd based on beta and adjusted max_cwnd. */
  545                         CCV(ccv, snd_cwnd) = max(((uint64_t)cubic_data->max_cwnd *
  546                             CUBIC_BETA) >> CUBIC_SHIFT,
  547                             2 * CCV(ccv, t_maxseg));
  548         }
  549 
  550         /* Calculate the average RTT between congestion epochs. */
  551         if (cubic_data->epoch_ack_count > 0 &&
  552             cubic_data->sum_rtt_ticks >= cubic_data->epoch_ack_count) {
  553                 cubic_data->mean_rtt_ticks = (int)(cubic_data->sum_rtt_ticks /
  554                     cubic_data->epoch_ack_count);
  555         }
  556 
  557         cubic_data->epoch_ack_count = 0;
  558         cubic_data->sum_rtt_ticks = 0;
  559 }
  560 
  561 /*
  562  * Record the min RTT and sum samples for the epoch average RTT calculation.
  563  */
  564 static void
  565 cubic_record_rtt(struct cc_var *ccv)
  566 {
  567         struct cubic *cubic_data;
  568         int t_srtt_ticks;
  569 
  570         /* Ignore srtt until a min number of samples have been taken. */
  571         if (CCV(ccv, t_rttupdated) >= CUBIC_MIN_RTT_SAMPLES) {
  572                 cubic_data = ccv->cc_data;
  573                 t_srtt_ticks = CCV(ccv, t_srtt) / TCP_RTT_SCALE;
  574 
  575                 /*
  576                  * Record the current SRTT as our minrtt if it's the smallest
  577                  * we've seen or minrtt is currently equal to its initialised
  578                  * value.
  579                  *
  580                  * XXXLAS: Should there be some hysteresis for minrtt?
  581                  */
  582                 if ((t_srtt_ticks < cubic_data->min_rtt_ticks ||
  583                     cubic_data->min_rtt_ticks == TCPTV_SRTTBASE)) {
  584                         cubic_data->min_rtt_ticks = max(1, t_srtt_ticks);
  585 
  586                         /*
  587                          * If the connection is within its first congestion
  588                          * epoch, ensure we prime mean_rtt_ticks with a
  589                          * reasonable value until the epoch average RTT is
  590                          * calculated in cubic_post_recovery().
  591                          */
  592                         if (cubic_data->min_rtt_ticks >
  593                             cubic_data->mean_rtt_ticks)
  594                                 cubic_data->mean_rtt_ticks =
  595                                     cubic_data->min_rtt_ticks;
  596                 }
  597 
  598                 /* Sum samples for epoch average RTT calculation. */
  599                 cubic_data->sum_rtt_ticks += t_srtt_ticks;
  600                 cubic_data->epoch_ack_count++;
  601         }
  602 }
  603 
  604 /*
  605  * Update the ssthresh in the event of congestion.
  606  */
  607 static void
  608 cubic_ssthresh_update(struct cc_var *ccv, uint32_t maxseg)
  609 {
  610         struct cubic *cubic_data;
  611         uint32_t ssthresh;
  612         uint32_t cwnd;
  613 
  614         cubic_data = ccv->cc_data;
  615         cwnd = CCV(ccv, snd_cwnd);
  616 
  617         /* Fast convergence heuristic. */
  618         if (cwnd < cubic_data->max_cwnd) {
  619                 cwnd = ((uint64_t)cwnd * CUBIC_FC_FACTOR) >> CUBIC_SHIFT;
  620         }
  621         cubic_data->prev_max_cwnd = cubic_data->max_cwnd;
  622         cubic_data->max_cwnd = cwnd;
  623 
  624         /*
  625          * On the first congestion event, set ssthresh to cwnd * 0.5
  626          * and reduce max_cwnd to cwnd * beta. This aligns the cubic concave
  627          * region appropriately. On subsequent congestion events, set
  628          * ssthresh to cwnd * beta.
  629          */
  630         if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) {
  631                 ssthresh = cwnd >> 1;
  632                 cubic_data->max_cwnd = ((uint64_t)cwnd *
  633                     CUBIC_BETA) >> CUBIC_SHIFT;
  634         } else {
  635                 ssthresh = ((uint64_t)cwnd *
  636                     CUBIC_BETA) >> CUBIC_SHIFT;
  637         }
  638         CCV(ccv, snd_ssthresh) = max(ssthresh, 2 * maxseg);
  639 }
  640 
  641 static void
  642 cubic_rttsample(struct cc_var *ccv, uint32_t usec_rtt, uint32_t rxtcnt, uint32_t fas)
  643 {
  644         struct cubic *cubicd;
  645 
  646         cubicd = ccv->cc_data;
  647         if (rxtcnt > 1) {
  648                 /*
  649                  * Only look at RTT's that are non-ambiguous.
  650                  */
  651                 return;
  652         }
  653         cubicd->css_rttsample_count++;
  654         cubicd->css_last_fas = fas;
  655         if (cubicd->css_current_round_minrtt > usec_rtt) {
  656                 cubicd->css_current_round_minrtt = usec_rtt;
  657                 cubicd->css_lowrtt_fas = cubicd->css_last_fas;
  658         }
  659         if ((cubicd->css_rttsample_count >= hystart_n_rttsamples) &&
  660             (cubicd->css_current_round_minrtt != 0xffffffff) &&
  661             (cubicd->css_current_round_minrtt < cubicd->css_baseline_minrtt) &&
  662             (cubicd->css_lastround_minrtt != 0xffffffff)) {
  663                 /*
  664                  * We were in CSS and the RTT is now less, we
  665                  * entered CSS erroneously.
  666                  */
  667                 cubicd->flags &= ~CUBICFLAG_HYSTART_IN_CSS;
  668                 cubic_log_hystart_event(ccv, cubicd, 8, cubicd->css_baseline_minrtt);
  669                 cubicd->css_baseline_minrtt = 0xffffffff;
  670         }
  671         if (cubicd->flags & CUBICFLAG_HYSTART_ENABLED)
  672                 cubic_log_hystart_event(ccv, cubicd, 5, usec_rtt);
  673 }
  674 
  675 static void
  676 cubic_newround(struct cc_var *ccv, uint32_t round_cnt)
  677 {
  678         struct cubic *cubicd;
  679 
  680         cubicd = ccv->cc_data;
  681         /* We have entered a new round */
  682         cubicd->css_lastround_minrtt = cubicd->css_current_round_minrtt;
  683         cubicd->css_current_round_minrtt = 0xffffffff;
  684         cubicd->css_rttsample_count = 0;
  685         cubicd->css_current_round = round_cnt;
  686         if ((cubicd->flags & CUBICFLAG_HYSTART_IN_CSS) &&
  687             ((round_cnt - cubicd->css_entered_at_round) >= hystart_css_rounds)) {
  688                 /* Enter CA */
  689                 if (ccv->flags & CCF_HYSTART_CAN_SH_CWND) {
  690                         /*
  691                          * We engage more than snd_ssthresh, engage
  692                          * the brakes!! Though we will stay in SS to
  693                          * creep back up again, so lets leave CSS active
  694                          * and give us hystart_css_rounds more rounds.
  695                          */
  696                         if (ccv->flags & CCF_HYSTART_CONS_SSTH) {
  697                                 CCV(ccv, snd_ssthresh) = ((cubicd->css_lowrtt_fas + cubicd->css_fas_at_css_entry) / 2);
  698                         } else {
  699                                 CCV(ccv, snd_ssthresh) = cubicd->css_lowrtt_fas;
  700                         }
  701                         CCV(ccv, snd_cwnd) = cubicd->css_fas_at_css_entry;
  702                         cubicd->css_entered_at_round = round_cnt;
  703                 } else {
  704                         CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
  705                         /* Turn off the CSS flag */
  706                         cubicd->flags &= ~CUBICFLAG_HYSTART_IN_CSS;
  707                         /* Disable use of CSS in the future except long idle  */
  708                         cubicd->flags &= ~CUBICFLAG_HYSTART_ENABLED;
  709                 }
  710                 cubic_log_hystart_event(ccv, cubicd, 6, CCV(ccv, snd_ssthresh));
  711         }
  712         if (cubicd->flags & CUBICFLAG_HYSTART_ENABLED)
  713                 cubic_log_hystart_event(ccv, cubicd, 4, round_cnt);
  714 }
  715 
  716 DECLARE_CC_MODULE(cubic, &cubic_cc_algo);
  717 MODULE_VERSION(cubic, 2);

Cache object: 14f4034179edb00781fe6f7aa253634e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.