The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/cc/cc_cdg.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2009-2013
    5  *      Swinburne University of Technology, Melbourne, Australia
    6  * All rights reserved.
    7  *
    8  * This software was developed at the Centre for Advanced Internet
    9  * Architectures, Swinburne University of Technology, by David Hayes, made
   10  * possible in part by a gift from The Cisco University Research Program Fund,
   11  * a corporate advised fund of Silicon Valley Community Foundation. Development
   12  * and testing were further assisted by a grant from the FreeBSD Foundation.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  *
   23  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   26  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   33  * SUCH DAMAGE.
   34  */
   35 
   36 /*
   37  * CAIA Delay-Gradient (CDG) congestion control algorithm
   38  *
   39  * An implemention of the delay-gradient congestion control algorithm proposed
   40  * in the following paper:
   41  *
   42  * D. A. Hayes and G. Armitage, "Revisiting TCP Congestion Control using Delay
   43  * Gradients", in IFIP Networking, Valencia, Spain, 9-13 May 2011.
   44  *
   45  * Developed as part of the NewTCP research project at Swinburne University of
   46  * Technology's Centre for Advanced Internet Architectures, Melbourne,
   47  * Australia. More details are available at:
   48  *   http://caia.swin.edu.au/urp/newtcp/
   49  */
   50 
   51 #include <sys/cdefs.h>
   52 __FBSDID("$FreeBSD$");
   53 
   54 #include <sys/param.h>
   55 #include <sys/hhook.h>
   56 #include <sys/kernel.h>
   57 #include <sys/khelp.h>
   58 #include <sys/limits.h>
   59 #include <sys/lock.h>
   60 #include <sys/malloc.h>
   61 #include <sys/module.h>
   62 #include <sys/queue.h>
   63 #include <sys/socket.h>
   64 #include <sys/socketvar.h>
   65 #include <sys/sysctl.h>
   66 #include <sys/systm.h>
   67 
   68 #include <net/vnet.h>
   69 
   70 #include <net/route.h>
   71 #include <net/route/nhop.h>
   72 
   73 #include <netinet/in_pcb.h>
   74 #include <netinet/tcp.h>
   75 #include <netinet/tcp_seq.h>
   76 #include <netinet/tcp_timer.h>
   77 #include <netinet/tcp_var.h>
   78 #include <netinet/cc/cc.h>
   79 #include <netinet/cc/cc_module.h>
   80 
   81 #include <netinet/khelp/h_ertt.h>
   82 
   83 #include <vm/uma.h>
   84 
   85 #define CDG_VERSION "0.1"
   86 
   87 /* Private delay-gradient induced congestion control signal. */
   88 #define CC_CDG_DELAY 0x01000000
   89 
   90 /* NewReno window deflation factor on loss (as a percentage). */
   91 #define RENO_BETA 50
   92 
   93 /* Queue states. */
   94 #define CDG_Q_EMPTY     1
   95 #define CDG_Q_RISING    2
   96 #define CDG_Q_FALLING   3
   97 #define CDG_Q_FULL      4
   98 #define CDG_Q_UNKNOWN   9999
   99 
  100 /* Number of bit shifts used in probexp lookup table. */
  101 #define EXP_PREC 15
  102 
  103 /* Largest gradient represented in probexp lookup table. */
  104 #define MAXGRAD 5
  105 
  106 /*
  107  * Delay Precision Enhance - number of bit shifts used for qtrend related
  108  * integer arithmetic precision.
  109  */
  110 #define D_P_E 7
  111 
  112 struct qdiff_sample {
  113         long qdiff;
  114         STAILQ_ENTRY(qdiff_sample) qdiff_lnk;
  115 };
  116 
  117 struct cdg {
  118         long max_qtrend;
  119         long min_qtrend;
  120         STAILQ_HEAD(minrtts_head, qdiff_sample) qdiffmin_q;
  121         STAILQ_HEAD(maxrtts_head, qdiff_sample) qdiffmax_q;
  122         long window_incr;
  123         /* rttcount for window increase when in congestion avoidance */
  124         long rtt_count;
  125         /* maximum measured rtt within an rtt period */
  126         int maxrtt_in_rtt;
  127         /* maximum measured rtt within prev rtt period */
  128         int maxrtt_in_prevrtt;
  129         /* minimum measured rtt within an rtt period */
  130         int minrtt_in_rtt;
  131         /* minimum measured rtt within prev rtt period */
  132         int minrtt_in_prevrtt;
  133         /* consecutive congestion episode counter */
  134         uint32_t consec_cong_cnt;
  135         /* when tracking a new reno type loss window */
  136         uint32_t shadow_w;
  137         /* maximum number of samples in the moving average queue */
  138         int sample_q_size;
  139         /* number of samples in the moving average queue */
  140         int num_samples;
  141         /* estimate of the queue state of the path */
  142         int queue_state;
  143 };
  144 
  145 /*
  146  * Lookup table for:
  147  *   (1 - exp(-x)) << EXP_PREC, where x = [0,MAXGRAD] in 2^-7 increments
  148  *
  149  * Note: probexp[0] is set to 10 (not 0) as a safety for very low increase
  150  * gradients.
  151  */
  152 static const int probexp[641] = {
  153    10,255,508,759,1008,1255,1501,1744,1985,2225,2463,2698,2932,3165,3395,3624,
  154    3850,4075,4299,4520,4740,4958,5175,5389,5602,5814,6024,6232,6438,6643,6846,
  155    7048,7248,7447,7644,7839,8033,8226,8417,8606,8794,8981,9166,9350,9532,9713,
  156    9892,10070,10247,10422,10596,10769,10940,11110,11278,11445,11611,11776,11939,
  157    12101,12262,12422,12580,12737,12893,13048,13201,13354,13505,13655,13803,13951,
  158    14097,14243,14387,14530,14672,14813,14952,15091,15229,15365,15500,15635,15768,
  159    15900,16032,16162,16291,16419,16547,16673,16798,16922,17046,17168,17289,17410,
  160    17529,17648,17766,17882,17998,18113,18227,18340,18453,18564,18675,18784,18893,
  161    19001,19108,19215,19320,19425,19529,19632,19734,19835,19936,20036,20135,20233,
  162    20331,20427,20523,20619,20713,20807,20900,20993,21084,21175,21265,21355,21444,
  163    21532,21619,21706,21792,21878,21962,22046,22130,22213,22295,22376,22457,22537,
  164    22617,22696,22774,22852,22929,23006,23082,23157,23232,23306,23380,23453,23525,
  165    23597,23669,23739,23810,23879,23949,24017,24085,24153,24220,24286,24352,24418,
  166    24483,24547,24611,24675,24738,24800,24862,24924,24985,25045,25106,25165,25224,
  167    25283,25341,25399,25456,25513,25570,25626,25681,25737,25791,25846,25899,25953,
  168    26006,26059,26111,26163,26214,26265,26316,26366,26416,26465,26514,26563,26611,
  169    26659,26707,26754,26801,26847,26893,26939,26984,27029,27074,27118,27162,27206,
  170    27249,27292,27335,27377,27419,27460,27502,27543,27583,27624,27664,27703,27743,
  171    27782,27821,27859,27897,27935,27973,28010,28047,28084,28121,28157,28193,28228,
  172    28263,28299,28333,28368,28402,28436,28470,28503,28536,28569,28602,28634,28667,
  173    28699,28730,28762,28793,28824,28854,28885,28915,28945,28975,29004,29034,29063,
  174    29092,29120,29149,29177,29205,29232,29260,29287,29314,29341,29368,29394,29421,
  175    29447,29472,29498,29524,29549,29574,29599,29623,29648,29672,29696,29720,29744,
  176    29767,29791,29814,29837,29860,29882,29905,29927,29949,29971,29993,30014,30036,
  177    30057,30078,30099,30120,30141,30161,30181,30201,30221,30241,30261,30280,30300,
  178    30319,30338,30357,30376,30394,30413,30431,30449,30467,30485,30503,30521,30538,
  179    30555,30573,30590,30607,30624,30640,30657,30673,30690,30706,30722,30738,30753,
  180    30769,30785,30800,30815,30831,30846,30861,30876,30890,30905,30919,30934,30948,
  181    30962,30976,30990,31004,31018,31031,31045,31058,31072,31085,31098,31111,31124,
  182    31137,31149,31162,31174,31187,31199,31211,31223,31235,31247,31259,31271,31283,
  183    31294,31306,31317,31328,31339,31351,31362,31373,31383,31394,31405,31416,31426,
  184    31436,31447,31457,31467,31477,31487,31497,31507,31517,31527,31537,31546,31556,
  185    31565,31574,31584,31593,31602,31611,31620,31629,31638,31647,31655,31664,31673,
  186    31681,31690,31698,31706,31715,31723,31731,31739,31747,31755,31763,31771,31778,
  187    31786,31794,31801,31809,31816,31824,31831,31838,31846,31853,31860,31867,31874,
  188    31881,31888,31895,31902,31908,31915,31922,31928,31935,31941,31948,31954,31960,
  189    31967,31973,31979,31985,31991,31997,32003,32009,32015,32021,32027,32033,32038,
  190    32044,32050,32055,32061,32066,32072,32077,32083,32088,32093,32098,32104,32109,
  191    32114,32119,32124,32129,32134,32139,32144,32149,32154,32158,32163,32168,32173,
  192    32177,32182,32186,32191,32195,32200,32204,32209,32213,32217,32222,32226,32230,
  193    32234,32238,32242,32247,32251,32255,32259,32263,32267,32270,32274,32278,32282,
  194    32286,32290,32293,32297,32301,32304,32308,32311,32315,32318,32322,32325,32329,
  195    32332,32336,32339,32342,32346,32349,32352,32356,32359,32362,32365,32368,32371,
  196    32374,32377,32381,32384,32387,32389,32392,32395,32398,32401,32404,32407,32410,
  197    32412,32415,32418,32421,32423,32426,32429,32431,32434,32437,32439,32442,32444,
  198    32447,32449,32452,32454,32457,32459,32461,32464,32466,32469,32471,32473,32476,
  199    32478,32480,32482,32485,32487,32489,32491,32493,32495,32497,32500,32502,32504,
  200    32506,32508,32510,32512,32514,32516,32518,32520,32522,32524,32526,32527,32529,
  201    32531,32533,32535,32537,32538,32540,32542,32544,32545,32547};
  202 
  203 static uma_zone_t qdiffsample_zone;
  204 static int ertt_id;
  205 
  206 VNET_DEFINE_STATIC(uint32_t, cdg_alpha_inc);
  207 VNET_DEFINE_STATIC(uint32_t, cdg_beta_delay);
  208 VNET_DEFINE_STATIC(uint32_t, cdg_beta_loss);
  209 VNET_DEFINE_STATIC(uint32_t, cdg_smoothing_factor);
  210 VNET_DEFINE_STATIC(uint32_t, cdg_exp_backoff_scale);
  211 VNET_DEFINE_STATIC(uint32_t, cdg_consec_cong);
  212 VNET_DEFINE_STATIC(uint32_t, cdg_hold_backoff);
  213 #define V_cdg_alpha_inc         VNET(cdg_alpha_inc)
  214 #define V_cdg_beta_delay        VNET(cdg_beta_delay)
  215 #define V_cdg_beta_loss         VNET(cdg_beta_loss)
  216 #define V_cdg_smoothing_factor  VNET(cdg_smoothing_factor)
  217 #define V_cdg_exp_backoff_scale VNET(cdg_exp_backoff_scale)
  218 #define V_cdg_consec_cong       VNET(cdg_consec_cong)
  219 #define V_cdg_hold_backoff      VNET(cdg_hold_backoff)
  220 
  221 /* Function prototypes. */
  222 static int cdg_mod_init(void);
  223 static int cdg_mod_destroy(void);
  224 static void cdg_conn_init(struct cc_var *ccv);
  225 static int cdg_cb_init(struct cc_var *ccv, void *ptr);
  226 static void cdg_cb_destroy(struct cc_var *ccv);
  227 static void cdg_cong_signal(struct cc_var *ccv, uint32_t signal_type);
  228 static void cdg_ack_received(struct cc_var *ccv, uint16_t ack_type);
  229 static size_t cdg_data_sz(void);
  230 
  231 struct cc_algo cdg_cc_algo = {
  232         .name = "cdg",
  233         .mod_init = cdg_mod_init,
  234         .ack_received = cdg_ack_received,
  235         .cb_destroy = cdg_cb_destroy,
  236         .cb_init = cdg_cb_init,
  237         .conn_init = cdg_conn_init,
  238         .cong_signal = cdg_cong_signal,
  239         .mod_destroy = cdg_mod_destroy,
  240         .cc_data_sz = cdg_data_sz,
  241         .post_recovery = newreno_cc_post_recovery,
  242         .after_idle = newreno_cc_after_idle,
  243 };
  244 
  245 /* Vnet created and being initialised. */
  246 static void
  247 cdg_init_vnet(const void *unused __unused)
  248 {
  249 
  250         V_cdg_alpha_inc = 0;
  251         V_cdg_beta_delay = 70;
  252         V_cdg_beta_loss = 50;
  253         V_cdg_smoothing_factor = 8;
  254         V_cdg_exp_backoff_scale = 3;
  255         V_cdg_consec_cong = 5;
  256         V_cdg_hold_backoff = 5;
  257 }
  258 
  259 static int
  260 cdg_mod_init(void)
  261 {
  262         VNET_ITERATOR_DECL(v);
  263 
  264         ertt_id = khelp_get_id("ertt");
  265         if (ertt_id <= 0)
  266                 return (EINVAL);
  267 
  268         qdiffsample_zone = uma_zcreate("cdg_qdiffsample",
  269             sizeof(struct qdiff_sample), NULL, NULL, NULL, NULL, 0, 0);
  270 
  271         VNET_LIST_RLOCK();
  272         VNET_FOREACH(v) {
  273                 CURVNET_SET(v);
  274                 cdg_init_vnet(NULL);
  275                 CURVNET_RESTORE();
  276         }
  277         VNET_LIST_RUNLOCK();
  278         return (0);
  279 }
  280 
  281 static int
  282 cdg_mod_destroy(void)
  283 {
  284 
  285         uma_zdestroy(qdiffsample_zone);
  286         return (0);
  287 }
  288 
  289 static size_t
  290 cdg_data_sz(void)
  291 {
  292         return (sizeof(struct cdg));
  293 }
  294 
  295 static int
  296 cdg_cb_init(struct cc_var *ccv, void *ptr)
  297 {
  298         struct cdg *cdg_data;
  299 
  300         INP_WLOCK_ASSERT(tptoinpcb(ccv->ccvc.tcp));
  301         if (ptr == NULL) {
  302                 cdg_data = malloc(sizeof(struct cdg), M_CC_MEM, M_NOWAIT);
  303                 if (cdg_data == NULL)
  304                         return (ENOMEM);
  305         } else {
  306                 cdg_data = ptr;
  307         }
  308         cdg_data->shadow_w = 0;
  309         cdg_data->max_qtrend = 0;
  310         cdg_data->min_qtrend = 0;
  311         cdg_data->queue_state = CDG_Q_UNKNOWN;
  312         cdg_data->maxrtt_in_rtt = 0;
  313         cdg_data->maxrtt_in_prevrtt = 0;
  314         cdg_data->minrtt_in_rtt = INT_MAX;
  315         cdg_data->minrtt_in_prevrtt = 0;
  316         cdg_data->window_incr = 0;
  317         cdg_data->rtt_count = 0;
  318         cdg_data->consec_cong_cnt = 0;
  319         cdg_data->sample_q_size = V_cdg_smoothing_factor;
  320         cdg_data->num_samples = 0;
  321         STAILQ_INIT(&cdg_data->qdiffmin_q);
  322         STAILQ_INIT(&cdg_data->qdiffmax_q);
  323 
  324         ccv->cc_data = cdg_data;
  325 
  326         return (0);
  327 }
  328 
  329 static void
  330 cdg_conn_init(struct cc_var *ccv)
  331 {
  332         struct cdg *cdg_data = ccv->cc_data;
  333 
  334         /*
  335          * Initialise the shadow_cwnd in case we are competing with loss based
  336          * flows from the start
  337          */
  338         cdg_data->shadow_w = CCV(ccv, snd_cwnd);
  339 }
  340 
  341 static void
  342 cdg_cb_destroy(struct cc_var *ccv)
  343 {
  344         struct cdg *cdg_data;
  345         struct qdiff_sample *qds, *qds_n;
  346 
  347         cdg_data = ccv->cc_data;
  348 
  349         qds = STAILQ_FIRST(&cdg_data->qdiffmin_q);
  350         while (qds != NULL) {
  351                 qds_n = STAILQ_NEXT(qds, qdiff_lnk);
  352                 uma_zfree(qdiffsample_zone,qds);
  353                 qds = qds_n;
  354         }
  355 
  356         qds = STAILQ_FIRST(&cdg_data->qdiffmax_q);
  357         while (qds != NULL) {
  358                 qds_n = STAILQ_NEXT(qds, qdiff_lnk);
  359                 uma_zfree(qdiffsample_zone,qds);
  360                 qds = qds_n;
  361         }
  362 
  363         free(ccv->cc_data, M_CC_MEM);
  364 }
  365 
  366 static int
  367 cdg_beta_handler(SYSCTL_HANDLER_ARGS)
  368 {
  369         int error;
  370         uint32_t new;
  371 
  372         new = *(uint32_t *)arg1;
  373         error = sysctl_handle_int(oidp, &new, 0, req);
  374         if (error == 0 && req->newptr != NULL) {
  375                 if (new == 0 || new > 100)
  376                         error = EINVAL;
  377                 else
  378                         *(uint32_t *)arg1 = new;
  379         }
  380 
  381         return (error);
  382 }
  383 
  384 static int
  385 cdg_exp_backoff_scale_handler(SYSCTL_HANDLER_ARGS)
  386 {
  387         int error;
  388         uint32_t new;
  389 
  390         new = *(uint32_t *)arg1;
  391         error = sysctl_handle_int(oidp, &new, 0, req);
  392         if (error == 0 && req->newptr != NULL) {
  393                 if (new < 1)
  394                         error = EINVAL;
  395                 else
  396                         *(uint32_t *)arg1 = new;
  397         }
  398 
  399         return (error);
  400 }
  401 
  402 static inline uint32_t
  403 cdg_window_decrease(struct cc_var *ccv, unsigned long owin, unsigned int beta)
  404 {
  405 
  406         return ((ulmin(CCV(ccv, snd_wnd), owin) * beta) / 100);
  407 }
  408 
  409 /*
  410  * Window increase function
  411  * This window increase function is independent of the initial window size
  412  * to ensure small window flows are not discriminated against (i.e. fairness).
  413  * It increases at 1pkt/rtt like Reno for alpha_inc rtts, and then 2pkts/rtt for
  414  * the next alpha_inc rtts, etc.
  415  */
  416 static void
  417 cdg_window_increase(struct cc_var *ccv, int new_measurement)
  418 {
  419         struct cdg *cdg_data;
  420         int incr, s_w_incr;
  421 
  422         cdg_data = ccv->cc_data;
  423         incr = s_w_incr = 0;
  424 
  425         if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)) {
  426                 /* Slow start. */
  427                 incr = CCV(ccv, t_maxseg);
  428                 s_w_incr = incr;
  429                 cdg_data->window_incr = cdg_data->rtt_count = 0;
  430         } else {
  431                 /* Congestion avoidance. */
  432                 if (new_measurement) {
  433                         s_w_incr = CCV(ccv, t_maxseg);
  434                         if (V_cdg_alpha_inc == 0) {
  435                                 incr = CCV(ccv, t_maxseg);
  436                         } else {
  437                                 if (++cdg_data->rtt_count >= V_cdg_alpha_inc) {
  438                                         cdg_data->window_incr++;
  439                                         cdg_data->rtt_count = 0;
  440                                 }
  441                                 incr = CCV(ccv, t_maxseg) *
  442                                     cdg_data->window_incr;
  443                         }
  444                 }
  445         }
  446 
  447         if (cdg_data->shadow_w > 0)
  448                 cdg_data->shadow_w = ulmin(cdg_data->shadow_w + s_w_incr,
  449                     TCP_MAXWIN << CCV(ccv, snd_scale));
  450 
  451         CCV(ccv, snd_cwnd) = ulmin(CCV(ccv, snd_cwnd) + incr,
  452             TCP_MAXWIN << CCV(ccv, snd_scale));
  453 }
  454 
  455 static void
  456 cdg_cong_signal(struct cc_var *ccv, uint32_t signal_type)
  457 {
  458         struct cdg *cdg_data = ccv->cc_data;
  459 
  460         switch(signal_type) {
  461         case CC_CDG_DELAY:
  462                 CCV(ccv, snd_ssthresh) = cdg_window_decrease(ccv,
  463                     CCV(ccv, snd_cwnd), V_cdg_beta_delay);
  464                 CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
  465                 CCV(ccv, snd_recover) = CCV(ccv, snd_max);
  466                 cdg_data->window_incr = cdg_data->rtt_count = 0;
  467                 ENTER_CONGRECOVERY(CCV(ccv, t_flags));
  468                 break;
  469         case CC_NDUPACK:
  470                 /*
  471                  * If already responding to congestion OR we have guessed no
  472                  * queue in the path is full.
  473                  */
  474                 if (IN_CONGRECOVERY(CCV(ccv, t_flags)) ||
  475                     cdg_data->queue_state < CDG_Q_FULL) {
  476                         CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd);
  477                         CCV(ccv, snd_recover) = CCV(ccv, snd_max);
  478                 } else {
  479                         /*
  480                          * Loss is likely to be congestion related. We have
  481                          * inferred a queue full state, so have shadow window
  482                          * react to loss as NewReno would.
  483                          */
  484                         if (cdg_data->shadow_w > 0)
  485                                 cdg_data->shadow_w = cdg_window_decrease(ccv,
  486                                     cdg_data->shadow_w, RENO_BETA);
  487 
  488                         CCV(ccv, snd_ssthresh) = max(cdg_data->shadow_w,
  489                             cdg_window_decrease(ccv, CCV(ccv, snd_cwnd),
  490                             V_cdg_beta_loss));
  491 
  492                         cdg_data->window_incr = cdg_data->rtt_count = 0;
  493                 }
  494                 ENTER_RECOVERY(CCV(ccv, t_flags));
  495                 break;
  496         default:
  497                 newreno_cc_cong_signal(ccv, signal_type);
  498                 break;
  499         }
  500 }
  501 
  502 /*
  503  * Using a negative exponential probabilistic backoff so that sources with
  504  * varying RTTs which share the same link will, on average, have the same
  505  * probability of backoff over time.
  506  *
  507  * Prob_backoff = 1 - exp(-qtrend / V_cdg_exp_backoff_scale), where
  508  * V_cdg_exp_backoff_scale is the average qtrend for the exponential backoff.
  509  */
  510 static inline int
  511 prob_backoff(long qtrend)
  512 {
  513         int backoff, idx, p;
  514 
  515         backoff = (qtrend > ((MAXGRAD * V_cdg_exp_backoff_scale) << D_P_E));
  516 
  517         if (!backoff) {
  518                 if (V_cdg_exp_backoff_scale > 1)
  519                         idx = (qtrend + V_cdg_exp_backoff_scale / 2) /
  520                             V_cdg_exp_backoff_scale;
  521                 else
  522                         idx = qtrend;
  523 
  524                 /* Backoff probability proportional to rate of queue growth. */
  525                 p = (INT_MAX / (1 << EXP_PREC)) * probexp[idx];
  526                 backoff = (random() < p);
  527         }
  528 
  529         return (backoff);
  530 }
  531 
  532 static inline void
  533 calc_moving_average(struct cdg *cdg_data, long qdiff_max, long qdiff_min)
  534 {
  535         struct qdiff_sample *qds;
  536 
  537         ++cdg_data->num_samples;
  538         if (cdg_data->num_samples > cdg_data->sample_q_size) {
  539                 /* Minimum RTT. */
  540                 qds = STAILQ_FIRST(&cdg_data->qdiffmin_q);
  541                 cdg_data->min_qtrend =  cdg_data->min_qtrend +
  542                     (qdiff_min - qds->qdiff) / cdg_data->sample_q_size;
  543                 STAILQ_REMOVE_HEAD(&cdg_data->qdiffmin_q, qdiff_lnk);
  544                 qds->qdiff = qdiff_min;
  545                 STAILQ_INSERT_TAIL(&cdg_data->qdiffmin_q, qds, qdiff_lnk);
  546 
  547                 /* Maximum RTT. */
  548                 qds = STAILQ_FIRST(&cdg_data->qdiffmax_q);
  549                 cdg_data->max_qtrend =  cdg_data->max_qtrend +
  550                     (qdiff_max - qds->qdiff) / cdg_data->sample_q_size;
  551                 STAILQ_REMOVE_HEAD(&cdg_data->qdiffmax_q, qdiff_lnk);
  552                 qds->qdiff = qdiff_max;
  553                 STAILQ_INSERT_TAIL(&cdg_data->qdiffmax_q, qds, qdiff_lnk);
  554                 --cdg_data->num_samples;
  555         } else {
  556                 qds = uma_zalloc(qdiffsample_zone, M_NOWAIT);
  557                 if (qds != NULL) {
  558                         cdg_data->min_qtrend = cdg_data->min_qtrend +
  559                             qdiff_min / cdg_data->sample_q_size;
  560                         qds->qdiff = qdiff_min;
  561                         STAILQ_INSERT_TAIL(&cdg_data->qdiffmin_q, qds,
  562                             qdiff_lnk);
  563                 }
  564 
  565                 qds = uma_zalloc(qdiffsample_zone, M_NOWAIT);
  566                 if (qds) {
  567                         cdg_data->max_qtrend = cdg_data->max_qtrend +
  568                             qdiff_max / cdg_data->sample_q_size;
  569                         qds->qdiff = qdiff_max;
  570                         STAILQ_INSERT_TAIL(&cdg_data->qdiffmax_q, qds,
  571                             qdiff_lnk);
  572                 }
  573         }
  574 }
  575 
  576 static void
  577 cdg_ack_received(struct cc_var *ccv, uint16_t ack_type)
  578 {
  579         struct cdg *cdg_data;
  580         struct ertt *e_t;
  581         long qdiff_max, qdiff_min;
  582         int congestion, new_measurement, slowstart;
  583 
  584         cdg_data = ccv->cc_data;
  585         e_t = (struct ertt *)khelp_get_osd(&CCV(ccv, t_osd), ertt_id);
  586         new_measurement = e_t->flags & ERTT_NEW_MEASUREMENT;
  587         congestion = 0;
  588         cdg_data->maxrtt_in_rtt = imax(e_t->rtt, cdg_data->maxrtt_in_rtt);
  589         cdg_data->minrtt_in_rtt = imin(e_t->rtt, cdg_data->minrtt_in_rtt);
  590 
  591         if (new_measurement) {
  592                 slowstart = (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh));
  593                 /*
  594                  * Update smoothed gradient measurements. Since we are only
  595                  * using one measurement per RTT, use max or min rtt_in_rtt.
  596                  * This is also less noisy than a sample RTT measurement. Max
  597                  * RTT measurements can have trouble due to OS issues.
  598                  */
  599                 if (cdg_data->maxrtt_in_prevrtt) {
  600                         qdiff_max = ((long)(cdg_data->maxrtt_in_rtt -
  601                             cdg_data->maxrtt_in_prevrtt) << D_P_E );
  602                         qdiff_min = ((long)(cdg_data->minrtt_in_rtt -
  603                             cdg_data->minrtt_in_prevrtt) << D_P_E );
  604 
  605                         if (cdg_data->sample_q_size == 0) {
  606                                 cdg_data->max_qtrend = qdiff_max;
  607                                 cdg_data->min_qtrend = qdiff_min;
  608                         } else
  609                                 calc_moving_average(cdg_data, qdiff_max, qdiff_min);
  610 
  611                         /* Probabilistic backoff with respect to gradient. */
  612                         if (slowstart && qdiff_min > 0)
  613                                 congestion = prob_backoff(qdiff_min);
  614                         else if (cdg_data->min_qtrend > 0)
  615                                 congestion = prob_backoff(cdg_data->min_qtrend);
  616                         else if (slowstart && qdiff_max > 0)
  617                                 congestion = prob_backoff(qdiff_max);
  618                         else if (cdg_data->max_qtrend > 0)
  619                                 congestion = prob_backoff(cdg_data->max_qtrend);
  620 
  621                         /* Update estimate of queue state. */
  622                         if (cdg_data->min_qtrend > 0 &&
  623                             cdg_data->max_qtrend <= 0) {
  624                                 cdg_data->queue_state = CDG_Q_FULL;
  625                         } else if (cdg_data->min_qtrend >= 0 &&
  626                             cdg_data->max_qtrend < 0) {
  627                                 cdg_data->queue_state = CDG_Q_EMPTY;
  628                                 cdg_data->shadow_w = 0;
  629                         } else if (cdg_data->min_qtrend > 0 &&
  630                             cdg_data->max_qtrend > 0) {
  631                                 cdg_data->queue_state = CDG_Q_RISING;
  632                         } else if (cdg_data->min_qtrend < 0 &&
  633                             cdg_data->max_qtrend < 0) {
  634                                 cdg_data->queue_state = CDG_Q_FALLING;
  635                         }
  636 
  637                         if (cdg_data->min_qtrend < 0 ||
  638                             cdg_data->max_qtrend < 0)
  639                                 cdg_data->consec_cong_cnt = 0;
  640                 }
  641 
  642                 cdg_data->minrtt_in_prevrtt = cdg_data->minrtt_in_rtt;
  643                 cdg_data->minrtt_in_rtt = INT_MAX;
  644                 cdg_data->maxrtt_in_prevrtt = cdg_data->maxrtt_in_rtt;
  645                 cdg_data->maxrtt_in_rtt = 0;
  646                 e_t->flags &= ~ERTT_NEW_MEASUREMENT;
  647         }
  648 
  649         if (congestion) {
  650                 cdg_data->consec_cong_cnt++;
  651                 if (!IN_RECOVERY(CCV(ccv, t_flags))) {
  652                         if (cdg_data->consec_cong_cnt <= V_cdg_consec_cong)
  653                                 cdg_cong_signal(ccv, CC_CDG_DELAY);
  654                         else
  655                                 /*
  656                                  * We have been backing off but the queue is not
  657                                  * falling. Assume we are competing with
  658                                  * loss-based flows and don't back off for the
  659                                  * next V_cdg_hold_backoff RTT periods.
  660                                  */
  661                                 if (cdg_data->consec_cong_cnt >=
  662                                     V_cdg_consec_cong + V_cdg_hold_backoff)
  663                                         cdg_data->consec_cong_cnt = 0;
  664 
  665                         /* Won't see effect until 2nd RTT. */
  666                         cdg_data->maxrtt_in_prevrtt = 0;
  667                         /*
  668                          * Resync shadow window in case we are competing with a
  669                          * loss based flow
  670                          */
  671                         cdg_data->shadow_w = ulmax(CCV(ccv, snd_cwnd),
  672                             cdg_data->shadow_w);
  673                 }
  674         } else if (ack_type == CC_ACK)
  675                 cdg_window_increase(ccv, new_measurement);
  676 }
  677 
  678 /* When a vnet is created and being initialised, init the per-stack CDG vars. */
  679 VNET_SYSINIT(cdg_init_vnet, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
  680     cdg_init_vnet, NULL);
  681 
  682 SYSCTL_DECL(_net_inet_tcp_cc_cdg);
  683 SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, cdg, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
  684     "CAIA delay-gradient congestion control related settings");
  685 
  686 SYSCTL_STRING(_net_inet_tcp_cc_cdg, OID_AUTO, version,
  687     CTLFLAG_RD, CDG_VERSION, sizeof(CDG_VERSION) - 1,
  688     "Current algorithm/implementation version number");
  689 
  690 SYSCTL_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, alpha_inc,
  691     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(cdg_alpha_inc), 0,
  692     "Increment the window increase factor alpha by 1 MSS segment every "
  693     "alpha_inc RTTs during congestion avoidance mode.");
  694 
  695 SYSCTL_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, beta_delay,
  696     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
  697     &VNET_NAME(cdg_beta_delay), 70, &cdg_beta_handler, "IU",
  698     "Delay-based window decrease factor as a percentage "
  699     "(on delay-based backoff, w = w * beta_delay / 100)");
  700 
  701 SYSCTL_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, beta_loss,
  702     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
  703     &VNET_NAME(cdg_beta_loss), 50, &cdg_beta_handler, "IU",
  704     "Loss-based window decrease factor as a percentage "
  705     "(on loss-based backoff, w = w * beta_loss / 100)");
  706 
  707 SYSCTL_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, exp_backoff_scale,
  708     CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
  709     &VNET_NAME(cdg_exp_backoff_scale), 2, &cdg_exp_backoff_scale_handler, "IU",
  710     "Scaling parameter for the probabilistic exponential backoff");
  711 
  712 SYSCTL_UINT(_net_inet_tcp_cc_cdg,  OID_AUTO, smoothing_factor,
  713     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(cdg_smoothing_factor), 8,
  714     "Number of samples used for moving average smoothing (0 = no smoothing)");
  715 
  716 SYSCTL_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compete_consec_cong,
  717     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(cdg_consec_cong), 5,
  718     "Number of consecutive delay-gradient based congestion episodes which will "
  719     "trigger loss based CC compatibility");
  720 
  721 SYSCTL_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compete_hold_backoff,
  722     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(cdg_hold_backoff), 5,
  723     "Number of consecutive delay-gradient based congestion episodes to hold "
  724     "the window backoff for loss based CC compatibility");
  725 
  726 DECLARE_CC_MODULE(cdg, &cdg_cc_algo);
  727 MODULE_VERSION(cdg, 2);
  728 MODULE_DEPEND(cdg, ertt, 1, 1, 1);

Cache object: a6fa463af8aa3ad046913955acdc8afd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.