The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/cc/cc.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2007-2008
    5  *      Swinburne University of Technology, Melbourne, Australia.
    6  * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
    7  * Copyright (c) 2010 The FreeBSD Foundation
    8  * All rights reserved.
    9  *
   10  * This software was developed at the Centre for Advanced Internet
   11  * Architectures, Swinburne University of Technology, by Lawrence Stewart and
   12  * James Healy, made possible in part by a grant from the Cisco University
   13  * Research Program Fund at Community Foundation Silicon Valley.
   14  *
   15  * Portions of this software were developed at the Centre for Advanced
   16  * Internet Architectures, Swinburne University of Technology, Melbourne,
   17  * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
   18  *
   19  * Redistribution and use in source and binary forms, with or without
   20  * modification, are permitted provided that the following conditions
   21  * are met:
   22  * 1. Redistributions of source code must retain the above copyright
   23  *    notice, this list of conditions and the following disclaimer.
   24  * 2. Redistributions in binary form must reproduce the above copyright
   25  *    notice, this list of conditions and the following disclaimer in the
   26  *    documentation and/or other materials provided with the distribution.
   27  *
   28  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   31  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   38  * SUCH DAMAGE.
   39  */
   40 
   41 /*
   42  * This software was first released in 2007 by James Healy and Lawrence Stewart
   43  * whilst working on the NewTCP research project at Swinburne University of
   44  * Technology's Centre for Advanced Internet Architectures, Melbourne,
   45  * Australia, which was made possible in part by a grant from the Cisco
   46  * University Research Program Fund at Community Foundation Silicon Valley.
   47  * More details are available at:
   48  *   http://caia.swin.edu.au/urp/newtcp/
   49  */
   50 
   51 #include <sys/cdefs.h>
   52 __FBSDID("$FreeBSD$");
   53 #include <opt_cc.h>
   54 #include <sys/param.h>
   55 #include <sys/kernel.h>
   56 #include <sys/libkern.h>
   57 #include <sys/lock.h>
   58 #include <sys/malloc.h>
   59 #include <sys/module.h>
   60 #include <sys/mutex.h>
   61 #include <sys/queue.h>
   62 #include <sys/rwlock.h>
   63 #include <sys/sbuf.h>
   64 #include <sys/socket.h>
   65 #include <sys/socketvar.h>
   66 #include <sys/sysctl.h>
   67 
   68 #include <net/vnet.h>
   69 
   70 #include <netinet/in.h>
   71 #include <netinet/in_pcb.h>
   72 #include <netinet/tcp.h>
   73 #include <netinet/tcp_seq.h>
   74 #include <netinet/tcp_var.h>
   75 #include <netinet/tcp_log_buf.h>
   76 #include <netinet/tcp_hpts.h>
   77 #include <netinet/cc/cc.h>
   78 #include <netinet/cc/cc_module.h>
   79 
   80 /*
   81  * Have a sane default if no CC_DEFAULT is specified in the kernel config file.
   82  */
   83 #ifndef CC_DEFAULT
   84 #define CC_DEFAULT "cubic"
   85 #endif
   86 
   87 uint32_t hystart_minrtt_thresh = 4000;
   88 uint32_t hystart_maxrtt_thresh = 16000;
   89 uint32_t hystart_n_rttsamples = 8;
   90 uint32_t hystart_css_growth_div = 4;
   91 uint32_t hystart_css_rounds = 5;
   92 uint32_t hystart_bblogs = 0;
   93 
   94 MALLOC_DEFINE(M_CC_MEM, "CC Mem", "Congestion Control State memory");
   95 
   96 /*
   97  * List of available cc algorithms on the current system. First element
   98  * is used as the system default CC algorithm.
   99  */
  100 struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
  101 
  102 /* Protects the cc_list TAILQ. */
  103 struct rwlock cc_list_lock;
  104 
  105 VNET_DEFINE(struct cc_algo *, default_cc_ptr) = NULL;
  106 
  107 VNET_DEFINE(uint32_t, newreno_beta) = 50;
  108 #define V_newreno_beta VNET(newreno_beta)
  109 VNET_DEFINE(uint32_t, newreno_beta_ecn) = 80;
  110 
  111 void
  112 cc_refer(struct cc_algo *algo)
  113 {
  114         CC_LIST_LOCK_ASSERT();
  115         refcount_acquire(&algo->cc_refcount);
  116 }
  117 
  118 void
  119 cc_release(struct cc_algo *algo)
  120 {
  121         CC_LIST_LOCK_ASSERT();
  122         refcount_release(&algo->cc_refcount);
  123 }
  124 
  125 
  126 void
  127 cc_attach(struct tcpcb *tp, struct cc_algo *algo)
  128 {
  129         /*
  130          * Attach the tcpcb to the algorithm.
  131          */
  132         CC_LIST_RLOCK();
  133         CC_ALGO(tp) = algo;
  134         cc_refer(algo);
  135         CC_LIST_RUNLOCK();
  136 }
  137 
  138 void
  139 cc_detach(struct tcpcb *tp)
  140 {
  141         struct cc_algo *algo;
  142 
  143         CC_LIST_RLOCK();
  144         algo = CC_ALGO(tp);
  145         CC_ALGO(tp) = NULL;
  146         cc_release(algo);
  147         CC_LIST_RUNLOCK();
  148 }
  149 
  150 /*
  151  * Sysctl handler to show and change the default CC algorithm.
  152  */
  153 static int
  154 cc_default_algo(SYSCTL_HANDLER_ARGS)
  155 {
  156         char default_cc[TCP_CA_NAME_MAX];
  157         struct cc_algo *funcs;
  158         int error;
  159 
  160         /* Get the current default: */
  161         CC_LIST_RLOCK();
  162         if (CC_DEFAULT_ALGO() != NULL)
  163                 strlcpy(default_cc, CC_DEFAULT_ALGO()->name, sizeof(default_cc));
  164         else
  165                 memset(default_cc, 0, TCP_CA_NAME_MAX);
  166         CC_LIST_RUNLOCK();
  167 
  168         error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req);
  169 
  170         /* Check for error or no change */
  171         if (error != 0 || req->newptr == NULL)
  172                 goto done;
  173 
  174         error = ESRCH;
  175         /* Find algo with specified name and set it to default. */
  176         CC_LIST_RLOCK();
  177         STAILQ_FOREACH(funcs, &cc_list, entries) {
  178                 if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
  179                         continue;
  180                 if (funcs->flags & CC_MODULE_BEING_REMOVED) {
  181                         /* Its being removed, its not eligible */
  182                         continue;
  183                 }
  184                 V_default_cc_ptr = funcs;
  185                 error = 0;
  186                 break;
  187         }
  188         CC_LIST_RUNLOCK();
  189 done:
  190         return (error);
  191 }
  192 
  193 /*
  194  * Sysctl handler to display the list of available CC algorithms.
  195  */
  196 static int
  197 cc_list_available(SYSCTL_HANDLER_ARGS)
  198 {
  199         struct cc_algo *algo;
  200         int error, nalgos;
  201         int linesz;
  202         char *buffer, *cp;
  203         size_t bufsz, outsz;
  204 
  205         error = nalgos = 0;
  206         CC_LIST_RLOCK();
  207         STAILQ_FOREACH(algo, &cc_list, entries) {
  208                 nalgos++;
  209         }
  210         CC_LIST_RUNLOCK();
  211         if (nalgos == 0) {
  212                 return (ENOENT);
  213         }
  214         bufsz = (nalgos+2) * ((TCP_CA_NAME_MAX + 13) + 1);
  215         buffer = malloc(bufsz, M_TEMP, M_WAITOK);
  216         cp = buffer;
  217 
  218         linesz = snprintf(cp, bufsz, "\n%-16s%c %s\n", "CCmod", 'D',
  219             "PCB count");
  220         cp += linesz;
  221         bufsz -= linesz;
  222         outsz = linesz;
  223         CC_LIST_RLOCK();
  224         STAILQ_FOREACH(algo, &cc_list, entries) {
  225                 linesz = snprintf(cp, bufsz, "%-16s%c %u\n",
  226                     algo->name,
  227                     (algo == CC_DEFAULT_ALGO()) ? '*' : ' ',
  228                     algo->cc_refcount);
  229                 if (linesz >= bufsz) {
  230                         error = EOVERFLOW;
  231                         break;
  232                 }
  233                 cp += linesz;
  234                 bufsz -= linesz;
  235                 outsz += linesz;
  236         }
  237         CC_LIST_RUNLOCK();
  238         if (error == 0)
  239                 error = sysctl_handle_string(oidp, buffer, outsz + 1, req);
  240         free(buffer, M_TEMP);
  241         return (error);
  242 }
  243 
  244 /*
  245  * Return the number of times a proposed removal_cc is
  246  * being used as the default.
  247  */
  248 static int
  249 cc_check_default(struct cc_algo *remove_cc)
  250 {
  251         int cnt = 0;
  252         VNET_ITERATOR_DECL(vnet_iter);
  253 
  254         CC_LIST_LOCK_ASSERT();
  255 
  256         VNET_LIST_RLOCK_NOSLEEP();
  257         VNET_FOREACH(vnet_iter) {
  258                 CURVNET_SET(vnet_iter);
  259                 if ((CC_DEFAULT_ALGO() != NULL) &&
  260                     strncmp(CC_DEFAULT_ALGO()->name,
  261                             remove_cc->name,
  262                             TCP_CA_NAME_MAX) == 0) {
  263                         cnt++;
  264                 }
  265                 CURVNET_RESTORE();
  266         }
  267         VNET_LIST_RUNLOCK_NOSLEEP();
  268         return (cnt);
  269 }
  270 
  271 /*
  272  * Initialise CC subsystem on system boot.
  273  */
  274 static void
  275 cc_init(void)
  276 {
  277         CC_LIST_LOCK_INIT();
  278         STAILQ_INIT(&cc_list);
  279 }
  280 
  281 /*
  282  * Returns non-zero on success, 0 on failure.
  283  */
  284 static int
  285 cc_deregister_algo_locked(struct cc_algo *remove_cc)
  286 {
  287         struct cc_algo *funcs;
  288         int found = 0;
  289 
  290         /* This is unlikely to fail */
  291         STAILQ_FOREACH(funcs, &cc_list, entries) {
  292                 if (funcs == remove_cc)
  293                         found = 1;
  294         }
  295         if (found == 0) {
  296                 /* Nothing to remove? */
  297                 return (ENOENT);
  298         }
  299         /* We assert it should have been MOD_QUIESCE'd */
  300         KASSERT((remove_cc->flags & CC_MODULE_BEING_REMOVED),
  301                 ("remove_cc:%p does not have CC_MODULE_BEING_REMOVED flag", remove_cc));
  302         if (cc_check_default(remove_cc)) {
  303                 return(EBUSY);
  304         }
  305         if (remove_cc->cc_refcount != 0) {
  306                 return (EBUSY);
  307         }
  308         /* Remove algo from cc_list so that new connections can't use it. */
  309         STAILQ_REMOVE(&cc_list, remove_cc, cc_algo, entries);
  310         return (0);
  311 }
  312 
  313 /*
  314  * Returns non-zero on success, 0 on failure.
  315  */
  316 int
  317 cc_deregister_algo(struct cc_algo *remove_cc)
  318 {
  319         int ret;
  320 
  321         CC_LIST_WLOCK();
  322         ret = cc_deregister_algo_locked(remove_cc);
  323         CC_LIST_WUNLOCK();
  324         return (ret);
  325 }
  326 
  327 /*
  328  * Returns 0 on success, non-zero on failure.
  329  */
  330 int
  331 cc_register_algo(struct cc_algo *add_cc)
  332 {
  333         struct cc_algo *funcs;
  334         int err;
  335 
  336         err = 0;
  337 
  338         /*
  339          * Iterate over list of registered CC algorithms and make sure
  340          * we're not trying to add a duplicate.
  341          */
  342         CC_LIST_WLOCK();
  343         STAILQ_FOREACH(funcs, &cc_list, entries) {
  344                 if (funcs == add_cc ||
  345                     strncmp(funcs->name, add_cc->name,
  346                             TCP_CA_NAME_MAX) == 0) {
  347                         err = EEXIST;
  348                         break;
  349                 }
  350         }
  351         /* Init its reference count */
  352         if (err == 0)
  353                 refcount_init(&add_cc->cc_refcount, 0);
  354         /*
  355          * The first loaded congestion control module will become
  356          * the default until we find the "CC_DEFAULT" defined in
  357          * the config (if we do).
  358          */
  359         if (!err) {
  360                 STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
  361                 if (strcmp(add_cc->name, CC_DEFAULT) == 0) {
  362                         V_default_cc_ptr = add_cc;
  363                 } else if (V_default_cc_ptr == NULL) {
  364                         V_default_cc_ptr = add_cc;
  365                 }
  366         }
  367         CC_LIST_WUNLOCK();
  368 
  369         return (err);
  370 }
  371 
  372 static void
  373 vnet_cc_sysinit(void *arg)
  374 {
  375         struct cc_algo *cc;
  376 
  377         if (IS_DEFAULT_VNET(curvnet))
  378                 return;
  379 
  380         CURVNET_SET(vnet0);
  381         cc = V_default_cc_ptr;
  382         CURVNET_RESTORE();
  383 
  384         V_default_cc_ptr = cc;
  385 }
  386 VNET_SYSINIT(vnet_cc_sysinit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
  387     vnet_cc_sysinit, NULL);
  388 
  389 /*
  390  * Perform any necessary tasks before we exit congestion recovery.
  391  */
  392 void
  393 newreno_cc_post_recovery(struct cc_var *ccv)
  394 {
  395         int pipe;
  396 
  397         if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
  398                 /*
  399                  * Fast recovery will conclude after returning from this
  400                  * function. Window inflation should have left us with
  401                  * approximately snd_ssthresh outstanding data. But in case we
  402                  * would be inclined to send a burst, better to do it via the
  403                  * slow start mechanism.
  404                  *
  405                  * XXXLAS: Find a way to do this without needing curack
  406                  */
  407                 if (V_tcp_do_newsack)
  408                         pipe = tcp_compute_pipe(ccv->ccvc.tcp);
  409                 else
  410                         pipe = CCV(ccv, snd_max) - ccv->curack;
  411                 if (pipe < CCV(ccv, snd_ssthresh))
  412                         /*
  413                          * Ensure that cwnd does not collapse to 1 MSS under
  414                          * adverse conditions. Implements RFC6582
  415                          */
  416                         CCV(ccv, snd_cwnd) = max(pipe, CCV(ccv, t_maxseg)) +
  417                             CCV(ccv, t_maxseg);
  418                 else
  419                         CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
  420         }
  421 }
  422 
  423 void
  424 newreno_cc_after_idle(struct cc_var *ccv)
  425 {
  426         uint32_t rw;
  427         /*
  428          * If we've been idle for more than one retransmit timeout the old
  429          * congestion window is no longer current and we have to reduce it to
  430          * the restart window before we can transmit again.
  431          *
  432          * The restart window is the initial window or the last CWND, whichever
  433          * is smaller.
  434          *
  435          * This is done to prevent us from flooding the path with a full CWND at
  436          * wirespeed, overloading router and switch buffers along the way.
  437          *
  438          * See RFC5681 Section 4.1. "Restarting Idle Connections".
  439          *
  440          * In addition, per RFC2861 Section 2, the ssthresh is set to the
  441          * maximum of the former ssthresh or 3/4 of the old cwnd, to
  442          * not exit slow-start prematurely.
  443          */
  444         rw = tcp_compute_initwnd(tcp_maxseg(ccv->ccvc.tcp));
  445 
  446         CCV(ccv, snd_ssthresh) = max(CCV(ccv, snd_ssthresh),
  447             CCV(ccv, snd_cwnd)-(CCV(ccv, snd_cwnd)>>2));
  448 
  449         CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd));
  450 }
  451 
  452 /*
  453  * Perform any necessary tasks before we enter congestion recovery.
  454  */
  455 void
  456 newreno_cc_cong_signal(struct cc_var *ccv, uint32_t type)
  457 {
  458         uint32_t cwin, factor;
  459         u_int mss;
  460 
  461         cwin = CCV(ccv, snd_cwnd);
  462         mss = tcp_fixed_maxseg(ccv->ccvc.tcp);
  463         /*
  464          * Other TCP congestion controls use newreno_cong_signal(), but
  465          * with their own private cc_data. Make sure the cc_data is used
  466          * correctly.
  467          */
  468         factor = V_newreno_beta;
  469 
  470         /* Catch algos which mistakenly leak private signal types. */
  471         KASSERT((type & CC_SIGPRIVMASK) == 0,
  472             ("%s: congestion signal type 0x%08x is private\n", __func__, type));
  473 
  474         cwin = max(((uint64_t)cwin * (uint64_t)factor) / (100ULL * (uint64_t)mss),
  475             2) * mss;
  476 
  477         switch (type) {
  478         case CC_NDUPACK:
  479                 if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) {
  480                         if (!IN_CONGRECOVERY(CCV(ccv, t_flags)))
  481                                 CCV(ccv, snd_ssthresh) = cwin;
  482                         ENTER_RECOVERY(CCV(ccv, t_flags));
  483                 }
  484                 break;
  485         case CC_ECN:
  486                 if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
  487                         CCV(ccv, snd_ssthresh) = cwin;
  488                         CCV(ccv, snd_cwnd) = cwin;
  489                         ENTER_CONGRECOVERY(CCV(ccv, t_flags));
  490                 }
  491                 break;
  492         case CC_RTO:
  493                 CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd),
  494                                                  CCV(ccv, snd_cwnd)) / 2 / mss,
  495                                              2) * mss;
  496                 CCV(ccv, snd_cwnd) = mss;
  497                 break;
  498         }
  499 }
  500 
  501 void
  502 newreno_cc_ack_received(struct cc_var *ccv, uint16_t type)
  503 {
  504         if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
  505             (ccv->flags & CCF_CWND_LIMITED)) {
  506                 u_int cw = CCV(ccv, snd_cwnd);
  507                 u_int incr = CCV(ccv, t_maxseg);
  508 
  509                 /*
  510                  * Regular in-order ACK, open the congestion window.
  511                  * Method depends on which congestion control state we're
  512                  * in (slow start or cong avoid) and if ABC (RFC 3465) is
  513                  * enabled.
  514                  *
  515                  * slow start: cwnd <= ssthresh
  516                  * cong avoid: cwnd > ssthresh
  517                  *
  518                  * slow start and ABC (RFC 3465):
  519                  *   Grow cwnd exponentially by the amount of data
  520                  *   ACKed capping the max increment per ACK to
  521                  *   (abc_l_var * maxseg) bytes.
  522                  *
  523                  * slow start without ABC (RFC 5681):
  524                  *   Grow cwnd exponentially by maxseg per ACK.
  525                  *
  526                  * cong avoid and ABC (RFC 3465):
  527                  *   Grow cwnd linearly by maxseg per RTT for each
  528                  *   cwnd worth of ACKed data.
  529                  *
  530                  * cong avoid without ABC (RFC 5681):
  531                  *   Grow cwnd linearly by approximately maxseg per RTT using
  532                  *   maxseg^2 / cwnd per ACK as the increment.
  533                  *   If cwnd > maxseg^2, fix the cwnd increment at 1 byte to
  534                  *   avoid capping cwnd.
  535                  */
  536                 if (cw > CCV(ccv, snd_ssthresh)) {
  537                         if (V_tcp_do_rfc3465) {
  538                                 if (ccv->flags & CCF_ABC_SENTAWND)
  539                                         ccv->flags &= ~CCF_ABC_SENTAWND;
  540                                 else
  541                                         incr = 0;
  542                         } else
  543                                 incr = max((incr * incr / cw), 1);
  544                 } else if (V_tcp_do_rfc3465) {
  545                         /*
  546                          * In slow-start with ABC enabled and no RTO in sight?
  547                          * (Must not use abc_l_var > 1 if slow starting after
  548                          * an RTO. On RTO, snd_nxt = snd_una, so the
  549                          * snd_nxt == snd_max check is sufficient to
  550                          * handle this).
  551                          *
  552                          * XXXLAS: Find a way to signal SS after RTO that
  553                          * doesn't rely on tcpcb vars.
  554                          */
  555                         uint16_t abc_val;
  556 
  557                         if (ccv->flags & CCF_USE_LOCAL_ABC)
  558                                 abc_val = ccv->labc;
  559                         else
  560                                 abc_val = V_tcp_abc_l_var;
  561                         if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
  562                                 incr = min(ccv->bytes_this_ack,
  563                                     ccv->nsegs * abc_val *
  564                                     CCV(ccv, t_maxseg));
  565                         else
  566                                 incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg));
  567 
  568                 }
  569                 /* ABC is on by default, so incr equals 0 frequently. */
  570                 if (incr > 0)
  571                         CCV(ccv, snd_cwnd) = min(cw + incr,
  572                             TCP_MAXWIN << CCV(ccv, snd_scale));
  573         }
  574 }
  575 
  576 static int
  577 cc_stop_new_assignments(struct cc_algo *algo)
  578 {
  579         CC_LIST_WLOCK();
  580         if (cc_check_default(algo)) {
  581                 /* A default cannot be removed */
  582                 CC_LIST_WUNLOCK();
  583                 return (EBUSY);
  584         }
  585         algo->flags |= CC_MODULE_BEING_REMOVED;
  586         CC_LIST_WUNLOCK();
  587         return (0);
  588 }
  589 
  590 /*
  591  * Handles kld related events. Returns 0 on success, non-zero on failure.
  592  */
  593 int
  594 cc_modevent(module_t mod, int event_type, void *data)
  595 {
  596         struct cc_algo *algo;
  597         int err;
  598 
  599         err = 0;
  600         algo = (struct cc_algo *)data;
  601 
  602         switch(event_type) {
  603         case MOD_LOAD:
  604                 if ((algo->cc_data_sz == NULL) && (algo->cb_init != NULL)) {
  605                         /*
  606                          * A module must have a cc_data_sz function
  607                          * even if it has no data it should return 0.
  608                          */
  609                         printf("Module Load Fails, it lacks a cc_data_sz() function but has a cb_init()!\n");
  610                         err = EINVAL;
  611                         break;
  612                 }
  613                 if (algo->mod_init != NULL)
  614                         err = algo->mod_init();
  615                 if (!err)
  616                         err = cc_register_algo(algo);
  617                 break;
  618 
  619         case MOD_SHUTDOWN:
  620                 break;
  621         case MOD_QUIESCE:
  622                 /* Stop any new assigments */
  623                 err = cc_stop_new_assignments(algo);
  624                 break;
  625         case MOD_UNLOAD:
  626                 /* 
  627                  * Deregister and remove the module from the list 
  628                  */
  629                 CC_LIST_WLOCK();
  630                 /* Even with -f we can't unload if its the default */
  631                 if (cc_check_default(algo)) {
  632                         /* A default cannot be removed */
  633                         CC_LIST_WUNLOCK();
  634                         return (EBUSY);
  635                 }
  636                 /*
  637                  * If -f was used and users are still attached to
  638                  * the algorithm things are going to go boom.
  639                  */
  640                 err = cc_deregister_algo_locked(algo);
  641                 CC_LIST_WUNLOCK();
  642                 if ((err == 0) && (algo->mod_destroy != NULL)) {
  643                         algo->mod_destroy();
  644                 }
  645                 break;
  646         default:
  647                 err = EINVAL;
  648                 break;
  649         }
  650 
  651         return (err);
  652 }
  653 
  654 SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
  655 
  656 /* Declare sysctl tree and populate it. */
  657 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
  658     "Congestion control related settings");
  659 
  660 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm,
  661     CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
  662     NULL, 0, cc_default_algo, "A",
  663     "Default congestion control algorithm");
  664 
  665 SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available,
  666     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
  667     NULL, 0, cc_list_available, "A",
  668     "List available congestion control algorithms");
  669 
  670 SYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, hystartplusplus,
  671     CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
  672     "New Reno related HyStart++ settings");
  673 
  674 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, minrtt_thresh,
  675     CTLFLAG_RW,
  676     &hystart_minrtt_thresh, 4000,
  677    "HyStarts++ minimum RTT thresh used in clamp (in microseconds)");
  678 
  679 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, maxrtt_thresh,
  680     CTLFLAG_RW,
  681     &hystart_maxrtt_thresh, 16000,
  682    "HyStarts++ maximum RTT thresh used in clamp (in microseconds)");
  683 
  684 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, n_rttsamples,
  685     CTLFLAG_RW,
  686     &hystart_n_rttsamples, 8,
  687    "The number of RTT samples that must be seen to consider HyStart++");
  688 
  689 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, css_growth_div,
  690     CTLFLAG_RW,
  691     &hystart_css_growth_div, 4,
  692    "The divisor to the growth when in Hystart++ CSS");
  693 
  694 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, css_rounds,
  695     CTLFLAG_RW,
  696     &hystart_css_rounds, 5,
  697    "The number of rounds HyStart++ lasts in CSS before falling to CA");
  698 
  699 SYSCTL_UINT(_net_inet_tcp_cc_hystartplusplus, OID_AUTO, bblogs,
  700     CTLFLAG_RW,
  701     &hystart_bblogs, 0,
  702    "Do we enable HyStart++ Black Box logs to be generated if BB logging is on");
  703 
  704 VNET_DEFINE(int, cc_do_abe) = 0;
  705 SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe, CTLFLAG_VNET | CTLFLAG_RW,
  706     &VNET_NAME(cc_do_abe), 0,
  707     "Enable draft-ietf-tcpm-alternativebackoff-ecn (TCP Alternative Backoff with ECN)");
  708 
  709 VNET_DEFINE(int, cc_abe_frlossreduce) = 0;
  710 SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe_frlossreduce, CTLFLAG_VNET | CTLFLAG_RW,
  711     &VNET_NAME(cc_abe_frlossreduce), 0,
  712     "Apply standard beta instead of ABE-beta during ECN-signalled congestion "
  713     "recovery episodes if loss also needs to be repaired");

Cache object: 6d18246d061d395c2476e39be42cdfdf


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.