The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/altq/altq_rmclass.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: altq_rmclass.c,v 1.18 2006/11/16 01:32:37 christos Exp $       */
    2 /*      $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $      */
    3 
    4 /*
    5  * Copyright (c) 1991-1997 Regents of the University of California.
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  * 3. All advertising materials mentioning features or use of this software
   17  *    must display the following acknowledgement:
   18  *      This product includes software developed by the Network Research
   19  *      Group at Lawrence Berkeley Laboratory.
   20  * 4. Neither the name of the University nor of the Laboratory may be used
   21  *    to endorse or promote products derived from this software without
   22  *    specific prior written permission.
   23  *
   24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   34  * SUCH DAMAGE.
   35  *
   36  * LBL code modified by speer@eng.sun.com, May 1977.
   37  * For questions and/or comments, please send mail to cbq@ee.lbl.gov
   38  */
   39 
   40 #include <sys/cdefs.h>
   41 __KERNEL_RCSID(0, "$NetBSD: altq_rmclass.c,v 1.18 2006/11/16 01:32:37 christos Exp $");
   42 
   43 #ident "@(#)rm_class.c  1.48     97/12/05 SMI"
   44 
   45 #ifdef _KERNEL_OPT
   46 #include "opt_altq.h"
   47 #include "opt_inet.h"
   48 #endif
   49 
   50 #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
   51 
   52 #include <sys/param.h>
   53 #include <sys/malloc.h>
   54 #include <sys/mbuf.h>
   55 #include <sys/socket.h>
   56 #include <sys/systm.h>
   57 #include <sys/errno.h>
   58 #include <sys/time.h>
   59 #ifdef ALTQ3_COMPAT
   60 #include <sys/kernel.h>
   61 #endif
   62 
   63 #include <net/if.h>
   64 #ifdef ALTQ3_COMPAT
   65 #include <netinet/in.h>
   66 #include <netinet/in_systm.h>
   67 #include <netinet/ip.h>
   68 #endif
   69 
   70 #include <altq/altq.h>
   71 #include <altq/altq_rmclass.h>
   72 #include <altq/altq_rmclass_debug.h>
   73 #include <altq/altq_red.h>
   74 #include <altq/altq_rio.h>
   75 
   76 /*
   77  * Local Macros
   78  */
   79 
   80 #define reset_cutoff(ifd)       { ifd->cutoff_ = RM_MAXDEPTH; }
   81 
   82 /*
   83  * Local routines.
   84  */
   85 
   86 static int      rmc_satisfied(struct rm_class *, struct timeval *);
   87 static void     rmc_wrr_set_weights(struct rm_ifdat *);
   88 static void     rmc_depth_compute(struct rm_class *);
   89 static void     rmc_depth_recompute(rm_class_t *);
   90 
   91 static mbuf_t   *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
   92 static mbuf_t   *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
   93 
   94 static int      _rmc_addq(rm_class_t *, mbuf_t *);
   95 static void     _rmc_dropq(rm_class_t *);
   96 static mbuf_t   *_rmc_getq(rm_class_t *);
   97 static mbuf_t   *_rmc_pollq(rm_class_t *);
   98 
   99 static int      rmc_under_limit(struct rm_class *, struct timeval *);
  100 static void     rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
  101 static void     rmc_drop_action(struct rm_class *);
  102 static void     rmc_restart(struct rm_class *);
  103 static void     rmc_root_overlimit(struct rm_class *, struct rm_class *);
  104 
  105 #define BORROW_OFFTIME
  106 /*
  107  * BORROW_OFFTIME (experimental):
  108  * borrow the offtime of the class borrowing from.
  109  * the reason is that when its own offtime is set, the class is unable
  110  * to borrow much, especially when cutoff is taking effect.
  111  * but when the borrowed class is overloaded (advidle is close to minidle),
  112  * use the borrowing class's offtime to avoid overload.
  113  */
  114 #define ADJUST_CUTOFF
  115 /*
  116  * ADJUST_CUTOFF (experimental):
  117  * if no underlimit class is found due to cutoff, increase cutoff and
  118  * retry the scheduling loop.
  119  * also, don't invoke delay_actions while cutoff is taking effect,
  120  * since a sleeping class won't have a chance to be scheduled in the
  121  * next loop.
  122  *
  123  * now heuristics for setting the top-level variable (cutoff_) becomes:
  124  *      1. if a packet arrives for a not-overlimit class, set cutoff
  125  *         to the depth of the class.
  126  *      2. if cutoff is i, and a packet arrives for an overlimit class
  127  *         with an underlimit ancestor at a lower level than i (say j),
  128  *         then set cutoff to j.
  129  *      3. at scheduling a packet, if there is no underlimit class
  130  *         due to the current cutoff level, increase cutoff by 1 and
  131  *         then try to schedule again.
  132  */
  133 
  134 /*
  135  * rm_class_t *
  136  * rmc_newclass(...) - Create a new resource management class at priority
  137  * 'pri' on the interface given by 'ifd'.
  138  *
  139  * nsecPerByte  is the data rate of the interface in nanoseconds/byte.
  140  *              E.g., 800 for a 10Mb/s ethernet.  If the class gets less
  141  *              than 100% of the bandwidth, this number should be the
  142  *              'effective' rate for the class.  Let f be the
  143  *              bandwidth fraction allocated to this class, and let
  144  *              nsPerByte be the data rate of the output link in
  145  *              nanoseconds/byte.  Then nsecPerByte is set to
  146  *              nsPerByte / f.  E.g., 1600 (= 800 / .5)
  147  *              for a class that gets 50% of an ethernet's bandwidth.
  148  *
  149  * action       the routine to call when the class is over limit.
  150  *
  151  * maxq         max allowable queue size for class (in packets).
  152  *
  153  * parent       parent class pointer.
  154  *
  155  * borrow       class to borrow from (should be either 'parent' or null).
  156  *
  157  * maxidle      max value allowed for class 'idle' time estimate (this
  158  *              parameter determines how large an initial burst of packets
  159  *              can be before overlimit action is invoked.
  160  *
  161  * offtime      how long 'delay' action will delay when class goes over
  162  *              limit (this parameter determines the steady-state burst
  163  *              size when a class is running over its limit).
  164  *
  165  * Maxidle and offtime have to be computed from the following:  If the
  166  * average packet size is s, the bandwidth fraction allocated to this
  167  * class is f, we want to allow b packet bursts, and the gain of the
  168  * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
  169  *
  170  *   ptime = s * nsPerByte * (1 - f) / f
  171  *   maxidle = ptime * (1 - g^b) / g^b
  172  *   minidle = -ptime * (1 / (f - 1))
  173  *   offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
  174  *
  175  * Operationally, it's convenient to specify maxidle & offtime in units
  176  * independent of the link bandwidth so the maxidle & offtime passed to
  177  * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
  178  * (The constant factor is a scale factor needed to make the parameters
  179  * integers.  This scaling also means that the 'unscaled' values of
  180  * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
  181  * not nanoseconds.)  Also note that the 'idle' filter computation keeps
  182  * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
  183  * maxidle also must be scaled upward by this value.  Thus, the passed
  184  * values for maxidle and offtime can be computed as follows:
  185  *
  186  * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
  187  * offtime = offtime * 8 / (1000 * nsecPerByte)
  188  *
  189  * When USE_HRTIME is employed, then maxidle and offtime become:
  190  *      maxidle = maxilde * (8.0 / nsecPerByte);
  191  *      offtime = offtime * (8.0 / nsecPerByte);
  192  */
  193 struct rm_class *
  194 rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
  195     void (*action)(rm_class_t *, rm_class_t *), int maxq,
  196     struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
  197     int minidle, u_int offtime, int pktsize, int flags)
  198 {
  199         struct rm_class *cl;
  200         struct rm_class *peer;
  201         int              s;
  202 
  203         if (pri >= RM_MAXPRIO)
  204                 return (NULL);
  205 #ifndef ALTQ_RED
  206         if (flags & RMCF_RED) {
  207 #ifdef ALTQ_DEBUG
  208                 printf("rmc_newclass: RED not configured for CBQ!\n");
  209 #endif
  210                 return (NULL);
  211         }
  212 #endif
  213 #ifndef ALTQ_RIO
  214         if (flags & RMCF_RIO) {
  215 #ifdef ALTQ_DEBUG
  216                 printf("rmc_newclass: RIO not configured for CBQ!\n");
  217 #endif
  218                 return (NULL);
  219         }
  220 #endif
  221 
  222         cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_WAITOK|M_ZERO);
  223         if (cl == NULL)
  224                 return (NULL);
  225         CALLOUT_INIT(&cl->callout_);
  226 
  227         cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
  228         if (cl->q_ == NULL) {
  229                 free(cl, M_DEVBUF);
  230                 return (NULL);
  231         }
  232 
  233         /*
  234          * Class initialization.
  235          */
  236         cl->children_ = NULL;
  237         cl->parent_ = parent;
  238         cl->borrow_ = borrow;
  239         cl->leaf_ = 1;
  240         cl->ifdat_ = ifd;
  241         cl->pri_ = pri;
  242         cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
  243         cl->depth_ = 0;
  244         cl->qthresh_ = 0;
  245         cl->ns_per_byte_ = nsecPerByte;
  246 
  247         qlimit(cl->q_) = maxq;
  248         qtype(cl->q_) = Q_DROPHEAD;
  249         qlen(cl->q_) = 0;
  250         cl->flags_ = flags;
  251 
  252 #if 1 /* minidle is also scaled in ALTQ */
  253         cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
  254         if (cl->minidle_ > 0)
  255                 cl->minidle_ = 0;
  256 #else
  257         cl->minidle_ = minidle;
  258 #endif
  259         cl->maxidle_ = (maxidle * nsecPerByte) / 8;
  260         if (cl->maxidle_ == 0)
  261                 cl->maxidle_ = 1;
  262 #if 1 /* offtime is also scaled in ALTQ */
  263         cl->avgidle_ = cl->maxidle_;
  264         cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
  265         if (cl->offtime_ == 0)
  266                 cl->offtime_ = 1;
  267 #else
  268         cl->avgidle_ = 0;
  269         cl->offtime_ = (offtime * nsecPerByte) / 8;
  270 #endif
  271         cl->overlimit = action;
  272 
  273 #ifdef ALTQ_RED
  274         if (flags & (RMCF_RED|RMCF_RIO)) {
  275                 int red_flags, red_pkttime;
  276 
  277                 red_flags = 0;
  278                 if (flags & RMCF_ECN)
  279                         red_flags |= REDF_ECN;
  280                 if (flags & RMCF_FLOWVALVE)
  281                         red_flags |= REDF_FLOWVALVE;
  282 #ifdef ALTQ_RIO
  283                 if (flags & RMCF_CLEARDSCP)
  284                         red_flags |= RIOF_CLEARDSCP;
  285 #endif
  286                 red_pkttime = nsecPerByte * pktsize  / 1000;
  287 
  288                 if (flags & RMCF_RED) {
  289                         cl->red_ = red_alloc(0, 0,
  290                             qlimit(cl->q_) * 10/100,
  291                             qlimit(cl->q_) * 30/100,
  292                             red_flags, red_pkttime);
  293                         if (cl->red_ != NULL)
  294                                 qtype(cl->q_) = Q_RED;
  295                 }
  296 #ifdef ALTQ_RIO
  297                 else {
  298                         cl->red_ = (red_t *)rio_alloc(0, NULL,
  299                                                       red_flags, red_pkttime);
  300                         if (cl->red_ != NULL)
  301                                 qtype(cl->q_) = Q_RIO;
  302                 }
  303 #endif
  304         }
  305 #endif /* ALTQ_RED */
  306 
  307         /*
  308          * put the class into the class tree
  309          */
  310         s = splnet();
  311         if ((peer = ifd->active_[pri]) != NULL) {
  312                 /* find the last class at this pri */
  313                 cl->peer_ = peer;
  314                 while (peer->peer_ != ifd->active_[pri])
  315                         peer = peer->peer_;
  316                 peer->peer_ = cl;
  317         } else {
  318                 ifd->active_[pri] = cl;
  319                 cl->peer_ = cl;
  320         }
  321 
  322         if (cl->parent_) {
  323                 cl->next_ = parent->children_;
  324                 parent->children_ = cl;
  325                 parent->leaf_ = 0;
  326         }
  327 
  328         /*
  329          * Compute the depth of this class and its ancestors in the class
  330          * hierarchy.
  331          */
  332         rmc_depth_compute(cl);
  333 
  334         /*
  335          * If CBQ's WRR is enabled, then initialize the class WRR state.
  336          */
  337         if (ifd->wrr_) {
  338                 ifd->num_[pri]++;
  339                 ifd->alloc_[pri] += cl->allotment_;
  340                 rmc_wrr_set_weights(ifd);
  341         }
  342         splx(s);
  343         return (cl);
  344 }
  345 
  346 int
  347 rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
  348     int minidle, u_int offtime, int pktsize)
  349 {
  350         struct rm_ifdat *ifd;
  351         u_int            old_allotment;
  352         int              s;
  353 
  354         ifd = cl->ifdat_;
  355         old_allotment = cl->allotment_;
  356 
  357         s = splnet();
  358         cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
  359         cl->qthresh_ = 0;
  360         cl->ns_per_byte_ = nsecPerByte;
  361 
  362         qlimit(cl->q_) = maxq;
  363 
  364 #if 1 /* minidle is also scaled in ALTQ */
  365         cl->minidle_ = (minidle * nsecPerByte) / 8;
  366         if (cl->minidle_ > 0)
  367                 cl->minidle_ = 0;
  368 #else
  369         cl->minidle_ = minidle;
  370 #endif
  371         cl->maxidle_ = (maxidle * nsecPerByte) / 8;
  372         if (cl->maxidle_ == 0)
  373                 cl->maxidle_ = 1;
  374 #if 1 /* offtime is also scaled in ALTQ */
  375         cl->avgidle_ = cl->maxidle_;
  376         cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
  377         if (cl->offtime_ == 0)
  378                 cl->offtime_ = 1;
  379 #else
  380         cl->avgidle_ = 0;
  381         cl->offtime_ = (offtime * nsecPerByte) / 8;
  382 #endif
  383 
  384         /*
  385          * If CBQ's WRR is enabled, then initialize the class WRR state.
  386          */
  387         if (ifd->wrr_) {
  388                 ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
  389                 rmc_wrr_set_weights(ifd);
  390         }
  391         splx(s);
  392         return (0);
  393 }
  394 
  395 /*
  396  * static void
  397  * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
  398  *      the appropriate run robin weights for the CBQ weighted round robin
  399  *      algorithm.
  400  *
  401  *      Returns: NONE
  402  */
  403 
  404 static void
  405 rmc_wrr_set_weights(struct rm_ifdat *ifd)
  406 {
  407         int             i;
  408         struct rm_class *cl, *clh;
  409 
  410         for (i = 0; i < RM_MAXPRIO; i++) {
  411                 /*
  412                  * This is inverted from that of the simulator to
  413                  * maintain precision.
  414                  */
  415                 if (ifd->num_[i] == 0)
  416                         ifd->M_[i] = 0;
  417                 else
  418                         ifd->M_[i] = ifd->alloc_[i] /
  419                                 (ifd->num_[i] * ifd->maxpkt_);
  420                 /*
  421                  * Compute the weighted allotment for each class.
  422                  * This takes the expensive div instruction out
  423                  * of the main loop for the wrr scheduling path.
  424                  * These only get recomputed when a class comes or
  425                  * goes.
  426                  */
  427                 if (ifd->active_[i] != NULL) {
  428                         clh = cl = ifd->active_[i];
  429                         do {
  430                                 /* safe-guard for slow link or alloc_ == 0 */
  431                                 if (ifd->M_[i] == 0)
  432                                         cl->w_allotment_ = 0;
  433                                 else
  434                                         cl->w_allotment_ = cl->allotment_ /
  435                                                 ifd->M_[i];
  436                                 cl = cl->peer_;
  437                         } while ((cl != NULL) && (cl != clh));
  438                 }
  439         }
  440 }
  441 
  442 int
  443 rmc_get_weight(struct rm_ifdat *ifd, int pri)
  444 {
  445         if ((pri >= 0) && (pri < RM_MAXPRIO))
  446                 return (ifd->M_[pri]);
  447         else
  448                 return (0);
  449 }
  450 
  451 /*
  452  * static void
  453  * rmc_depth_compute(struct rm_class *cl) - This function computes the
  454  *      appropriate depth of class 'cl' and its ancestors.
  455  *
  456  *      Returns:        NONE
  457  */
  458 
  459 static void
  460 rmc_depth_compute(struct rm_class *cl)
  461 {
  462         rm_class_t      *t = cl, *p;
  463 
  464         /*
  465          * Recompute the depth for the branch of the tree.
  466          */
  467         while (t != NULL) {
  468                 p = t->parent_;
  469                 if (p && (t->depth_ >= p->depth_)) {
  470                         p->depth_ = t->depth_ + 1;
  471                         t = p;
  472                 } else
  473                         t = NULL;
  474         }
  475 }
  476 
  477 /*
  478  * static void
  479  * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
  480  *      the depth of the tree after a class has been deleted.
  481  *
  482  *      Returns:        NONE
  483  */
  484 
  485 static void
  486 rmc_depth_recompute(rm_class_t *cl)
  487 {
  488 #if 1 /* ALTQ */
  489         rm_class_t      *p, *t;
  490 
  491         p = cl;
  492         while (p != NULL) {
  493                 if ((t = p->children_) == NULL) {
  494                         p->depth_ = 0;
  495                 } else {
  496                         int cdepth = 0;
  497 
  498                         while (t != NULL) {
  499                                 if (t->depth_ > cdepth)
  500                                         cdepth = t->depth_;
  501                                 t = t->next_;
  502                         }
  503 
  504                         if (p->depth_ == cdepth + 1)
  505                                 /* no change to this parent */
  506                                 return;
  507 
  508                         p->depth_ = cdepth + 1;
  509                 }
  510 
  511                 p = p->parent_;
  512         }
  513 #else
  514         rm_class_t      *t;
  515 
  516         if (cl->depth_ >= 1) {
  517                 if (cl->children_ == NULL) {
  518                         cl->depth_ = 0;
  519                 } else if ((t = cl->children_) != NULL) {
  520                         while (t != NULL) {
  521                                 if (t->children_ != NULL)
  522                                         rmc_depth_recompute(t);
  523                                 t = t->next_;
  524                         }
  525                 } else
  526                         rmc_depth_compute(cl);
  527         }
  528 #endif
  529 }
  530 
  531 /*
  532  * void
  533  * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
  534  *      function deletes a class from the link-sharing structure and frees
  535  *      all resources associated with the class.
  536  *
  537  *      Returns: NONE
  538  */
  539 
  540 void
  541 rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
  542 {
  543         struct rm_class *p, *head, *previous;
  544         int              s;
  545 
  546         ASSERT(cl->children_ == NULL);
  547 
  548         if (cl->sleeping_)
  549                 CALLOUT_STOP(&cl->callout_);
  550 
  551         s = splnet();
  552         /*
  553          * Free packets in the packet queue.
  554          * XXX - this may not be a desired behavior.  Packets should be
  555          *              re-queued.
  556          */
  557         rmc_dropall(cl);
  558 
  559         /*
  560          * If the class has a parent, then remove the class from the
  561          * class from the parent's children chain.
  562          */
  563         if (cl->parent_ != NULL) {
  564                 head = cl->parent_->children_;
  565                 p = previous = head;
  566                 if (head->next_ == NULL) {
  567                         ASSERT(head == cl);
  568                         cl->parent_->children_ = NULL;
  569                         cl->parent_->leaf_ = 1;
  570                 } else while (p != NULL) {
  571                         if (p == cl) {
  572                                 if (cl == head)
  573                                         cl->parent_->children_ = cl->next_;
  574                                 else
  575                                         previous->next_ = cl->next_;
  576                                 cl->next_ = NULL;
  577                                 p = NULL;
  578                         } else {
  579                                 previous = p;
  580                                 p = p->next_;
  581                         }
  582                 }
  583         }
  584 
  585         /*
  586          * Delete class from class priority peer list.
  587          */
  588         if ((p = ifd->active_[cl->pri_]) != NULL) {
  589                 /*
  590                  * If there is more than one member of this priority
  591                  * level, then look for class(cl) in the priority level.
  592                  */
  593                 if (p != p->peer_) {
  594                         while (p->peer_ != cl)
  595                                 p = p->peer_;
  596                         p->peer_ = cl->peer_;
  597 
  598                         if (ifd->active_[cl->pri_] == cl)
  599                                 ifd->active_[cl->pri_] = cl->peer_;
  600                 } else {
  601                         ASSERT(p == cl);
  602                         ifd->active_[cl->pri_] = NULL;
  603                 }
  604         }
  605 
  606         /*
  607          * Recompute the WRR weights.
  608          */
  609         if (ifd->wrr_) {
  610                 ifd->alloc_[cl->pri_] -= cl->allotment_;
  611                 ifd->num_[cl->pri_]--;
  612                 rmc_wrr_set_weights(ifd);
  613         }
  614 
  615         /*
  616          * Re-compute the depth of the tree.
  617          */
  618 #if 1 /* ALTQ */
  619         rmc_depth_recompute(cl->parent_);
  620 #else
  621         rmc_depth_recompute(ifd->root_);
  622 #endif
  623 
  624         splx(s);
  625 
  626         /*
  627          * Free the class structure.
  628          */
  629         if (cl->red_ != NULL) {
  630 #ifdef ALTQ_RIO
  631                 if (q_is_rio(cl->q_))
  632                         rio_destroy((rio_t *)cl->red_);
  633 #endif
  634 #ifdef ALTQ_RED
  635                 if (q_is_red(cl->q_))
  636                         red_destroy(cl->red_);
  637 #endif
  638         }
  639         free(cl->q_, M_DEVBUF);
  640         free(cl, M_DEVBUF);
  641 }
  642 
  643 
  644 /*
  645  * int
  646  * rmc_init(...) - Initialize the resource management data structures
  647  *      associated with the output portion of interface 'ifp'.  'ifd' is
  648  *      where the structures will be built (for backwards compatibility, the
  649  *      structures aren't kept in the ifnet struct).  'nsecPerByte'
  650  *      gives the link speed (inverse of bandwidth) in nanoseconds/byte.
  651  *      'restart' is the driver-specific routine that the generic 'delay
  652  *      until under limit' action will call to restart output.  `maxq'
  653  *      is the queue size of the 'link' & 'default' classes.  'maxqueued'
  654  *      is the maximum number of packets that the resource management
  655  *      code will allow to be queued 'downstream' (this is typically 1).
  656  *
  657  *      Returns:        0 on success
  658  */
  659 
  660 int
  661 rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
  662     void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
  663     int minidle, u_int offtime, int flags)
  664 {
  665         int i, mtu;
  666 
  667         /*
  668          * Initialize the CBQ tracing/debug facility.
  669          */
  670         CBQTRACEINIT();
  671 
  672         mtu = ifq->altq_ifp->if_mtu;
  673         if (mtu < 1) {
  674                 printf("altq: %s: invalid MTU (interface not initialized?)\n",
  675                     ifq->altq_ifp->if_xname);
  676                 return (EINVAL);
  677         }
  678 
  679         (void)memset((char *)ifd, 0, sizeof (*ifd));
  680         ifd->ifq_ = ifq;
  681         ifd->restart = restart;
  682         ifd->maxqueued_ = maxqueued;
  683         ifd->ns_per_byte_ = nsecPerByte;
  684         ifd->maxpkt_ = mtu;
  685         ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
  686         ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
  687 #if 1
  688         ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
  689         if (mtu * nsecPerByte > 10 * 1000000)
  690                 ifd->maxiftime_ /= 4;
  691 #endif
  692 
  693         reset_cutoff(ifd);
  694         CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
  695 
  696         /*
  697          * Initialize the CBQ's WRR state.
  698          */
  699         for (i = 0; i < RM_MAXPRIO; i++) {
  700                 ifd->alloc_[i] = 0;
  701                 ifd->M_[i] = 0;
  702                 ifd->num_[i] = 0;
  703                 ifd->na_[i] = 0;
  704                 ifd->active_[i] = NULL;
  705         }
  706 
  707         /*
  708          * Initialize current packet state.
  709          */
  710         ifd->qi_ = 0;
  711         ifd->qo_ = 0;
  712         for (i = 0; i < RM_MAXQUEUED; i++) {
  713                 ifd->class_[i] = NULL;
  714                 ifd->curlen_[i] = 0;
  715                 ifd->borrowed_[i] = NULL;
  716         }
  717 
  718         /*
  719          * Create the root class of the link-sharing structure.
  720          */
  721         if ((ifd->root_ = rmc_newclass(0, ifd,
  722                                        nsecPerByte,
  723                                        rmc_root_overlimit, maxq, 0, 0,
  724                                        maxidle, minidle, offtime,
  725                                        0, 0)) == NULL) {
  726                 printf("rmc_init: root class not allocated\n");
  727                 return (ENOMEM);
  728         }
  729         ifd->root_->depth_ = 0;
  730 
  731         return (0);
  732 }
  733 
  734 /*
  735  * void
  736  * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
  737  *      mbuf 'm' to queue for resource class 'cl'.  This routine is called
  738  *      by a driver's if_output routine.  This routine must be called with
  739  *      output packet completion interrupts locked out (to avoid racing with
  740  *      rmc_dequeue_next).
  741  *
  742  *      Returns:        0 on successful queueing
  743  *                      -1 when packet drop occurs
  744  */
  745 int
  746 rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
  747 {
  748         struct timeval   now;
  749         struct rm_ifdat *ifd = cl->ifdat_;
  750         int              cpri = cl->pri_;
  751         int              is_empty = qempty(cl->q_);
  752 
  753         RM_GETTIME(now);
  754         if (ifd->cutoff_ > 0) {
  755                 if (TV_LT(&cl->undertime_, &now)) {
  756                         if (ifd->cutoff_ > cl->depth_)
  757                                 ifd->cutoff_ = cl->depth_;
  758                         CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
  759                 }
  760 #if 1 /* ALTQ */
  761                 else {
  762                         /*
  763                          * the class is overlimit. if the class has
  764                          * underlimit ancestors, set cutoff to the lowest
  765                          * depth among them.
  766                          */
  767                         struct rm_class *borrow = cl->borrow_;
  768 
  769                         while (borrow != NULL &&
  770                                borrow->depth_ < ifd->cutoff_) {
  771                                 if (TV_LT(&borrow->undertime_, &now)) {
  772                                         ifd->cutoff_ = borrow->depth_;
  773                                         CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
  774                                         break;
  775                                 }
  776                                 borrow = borrow->borrow_;
  777                         }
  778                 }
  779 #else /* !ALTQ */
  780                 else if ((ifd->cutoff_ > 1) && cl->borrow_) {
  781                         if (TV_LT(&cl->borrow_->undertime_, &now)) {
  782                                 ifd->cutoff_ = cl->borrow_->depth_;
  783                                 CBQTRACE(rmc_queue_packet, 'ffob',
  784                                          cl->borrow_->depth_);
  785                         }
  786                 }
  787 #endif /* !ALTQ */
  788         }
  789 
  790         if (_rmc_addq(cl, m) < 0)
  791                 /* failed */
  792                 return (-1);
  793 
  794         if (is_empty) {
  795                 CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
  796                 ifd->na_[cpri]++;
  797         }
  798 
  799         if (qlen(cl->q_) > qlimit(cl->q_)) {
  800                 /* note: qlimit can be set to 0 or 1 */
  801                 rmc_drop_action(cl);
  802                 return (-1);
  803         }
  804         return (0);
  805 }
  806 
  807 /*
  808  * void
  809  * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
  810  *      classes to see if there are satified.
  811  */
  812 
  813 static void
  814 rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
  815 {
  816         int              i;
  817         rm_class_t      *p, *bp;
  818 
  819         for (i = RM_MAXPRIO - 1; i >= 0; i--) {
  820                 if ((bp = ifd->active_[i]) != NULL) {
  821                         p = bp;
  822                         do {
  823                                 if (!rmc_satisfied(p, now)) {
  824                                         ifd->cutoff_ = p->depth_;
  825                                         return;
  826                                 }
  827                                 p = p->peer_;
  828                         } while (p != bp);
  829                 }
  830         }
  831 
  832         reset_cutoff(ifd);
  833 }
  834 
  835 /*
  836  * rmc_satisfied - Return 1 of the class is satisfied.  O, otherwise.
  837  */
  838 
  839 static int
  840 rmc_satisfied(struct rm_class *cl, struct timeval *now)
  841 {
  842         rm_class_t      *p;
  843 
  844         if (cl == NULL)
  845                 return (1);
  846         if (TV_LT(now, &cl->undertime_))
  847                 return (1);
  848         if (cl->depth_ == 0) {
  849                 if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
  850                         return (0);
  851                 else
  852                         return (1);
  853         }
  854         if (cl->children_ != NULL) {
  855                 p = cl->children_;
  856                 while (p != NULL) {
  857                         if (!rmc_satisfied(p, now))
  858                                 return (0);
  859                         p = p->next_;
  860                 }
  861         }
  862 
  863         return (1);
  864 }
  865 
  866 /*
  867  * Return 1 if class 'cl' is under limit or can borrow from a parent,
  868  * 0 if overlimit.  As a side-effect, this routine will invoke the
  869  * class overlimit action if the class if overlimit.
  870  */
  871 
  872 static int
  873 rmc_under_limit(struct rm_class *cl, struct timeval *now)
  874 {
  875         rm_class_t      *p = cl;
  876         rm_class_t      *top;
  877         struct rm_ifdat *ifd = cl->ifdat_;
  878 
  879         ifd->borrowed_[ifd->qi_] = NULL;
  880         /*
  881          * If cl is the root class, then always return that it is
  882          * underlimit.  Otherwise, check to see if the class is underlimit.
  883          */
  884         if (cl->parent_ == NULL)
  885                 return (1);
  886 
  887         if (cl->sleeping_) {
  888                 if (TV_LT(now, &cl->undertime_))
  889                         return (0);
  890 
  891                 CALLOUT_STOP(&cl->callout_);
  892                 cl->sleeping_ = 0;
  893                 cl->undertime_.tv_sec = 0;
  894                 return (1);
  895         }
  896 
  897         top = NULL;
  898         while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
  899                 if (((cl = cl->borrow_) == NULL) ||
  900                     (cl->depth_ > ifd->cutoff_)) {
  901 #ifdef ADJUST_CUTOFF
  902                         if (cl != NULL)
  903                                 /* cutoff is taking effect, just
  904                                    return false without calling
  905                                    the delay action. */
  906                                 return (0);
  907 #endif
  908 #ifdef BORROW_OFFTIME
  909                         /*
  910                          * check if the class can borrow offtime too.
  911                          * borrow offtime from the top of the borrow
  912                          * chain if the top class is not overloaded.
  913                          */
  914                         if (cl != NULL) {
  915                                 /* cutoff is taking effect, use this class as top. */
  916                                 top = cl;
  917                                 CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
  918                         }
  919                         if (top != NULL && top->avgidle_ == top->minidle_)
  920                                 top = NULL;
  921                         p->overtime_ = *now;
  922                         (p->overlimit)(p, top);
  923 #else
  924                         p->overtime_ = *now;
  925                         (p->overlimit)(p, NULL);
  926 #endif
  927                         return (0);
  928                 }
  929                 top = cl;
  930         }
  931 
  932         if (cl != p)
  933                 ifd->borrowed_[ifd->qi_] = cl;
  934         return (1);
  935 }
  936 
  937 /*
  938  * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
  939  *      Packet-by-packet round robin.
  940  *
  941  * The heart of the weighted round-robin scheduler, which decides which
  942  * class next gets to send a packet.  Highest priority first, then
  943  * weighted round-robin within priorites.
  944  *
  945  * Each able-to-send class gets to send until its byte allocation is
  946  * exhausted.  Thus, the active pointer is only changed after a class has
  947  * exhausted its allocation.
  948  *
  949  * If the scheduler finds no class that is underlimit or able to borrow,
  950  * then the first class found that had a nonzero queue and is allowed to
  951  * borrow gets to send.
  952  */
  953 
  954 static mbuf_t *
  955 _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
  956 {
  957         struct rm_class *cl = NULL, *first = NULL;
  958         u_int            deficit;
  959         int              cpri;
  960         mbuf_t          *m;
  961         struct timeval   now;
  962 
  963         RM_GETTIME(now);
  964 
  965         /*
  966          * if the driver polls the top of the queue and then removes
  967          * the polled packet, we must return the same packet.
  968          */
  969         if (op == ALTDQ_REMOVE && ifd->pollcache_) {
  970                 cl = ifd->pollcache_;
  971                 cpri = cl->pri_;
  972                 if (ifd->efficient_) {
  973                         /* check if this class is overlimit */
  974                         if (cl->undertime_.tv_sec != 0 &&
  975                             rmc_under_limit(cl, &now) == 0)
  976                                 first = cl;
  977                 }
  978                 ifd->pollcache_ = NULL;
  979                 goto _wrr_out;
  980         }
  981         else {
  982                 /* mode == ALTDQ_POLL || pollcache == NULL */
  983                 ifd->pollcache_ = NULL;
  984                 ifd->borrowed_[ifd->qi_] = NULL;
  985         }
  986 #ifdef ADJUST_CUTOFF
  987  _again:
  988 #endif
  989         for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
  990                 if (ifd->na_[cpri] == 0)
  991                         continue;
  992                 deficit = 0;
  993                 /*
  994                  * Loop through twice for a priority level, if some class
  995                  * was unable to send a packet the first round because
  996                  * of the weighted round-robin mechanism.
  997                  * During the second loop at this level, deficit==2.
  998                  * (This second loop is not needed if for every class,
  999                  * "M[cl->pri_])" times "cl->allotment" is greater than
 1000                  * the byte size for the largest packet in the class.)
 1001                  */
 1002  _wrr_loop:
 1003                 cl = ifd->active_[cpri];
 1004                 ASSERT(cl != NULL);
 1005                 do {
 1006                         if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
 1007                                 cl->bytes_alloc_ += cl->w_allotment_;
 1008                         if (!qempty(cl->q_)) {
 1009                                 if ((cl->undertime_.tv_sec == 0) ||
 1010                                     rmc_under_limit(cl, &now)) {
 1011                                         if (cl->bytes_alloc_ > 0 || deficit > 1)
 1012                                                 goto _wrr_out;
 1013 
 1014                                         /* underlimit but no alloc */
 1015                                         deficit = 1;
 1016 #if 1
 1017                                         ifd->borrowed_[ifd->qi_] = NULL;
 1018 #endif
 1019                                 }
 1020                                 else if (first == NULL && cl->borrow_ != NULL)
 1021                                         first = cl; /* borrowing candidate */
 1022                         }
 1023 
 1024                         cl->bytes_alloc_ = 0;
 1025                         cl = cl->peer_;
 1026                 } while (cl != ifd->active_[cpri]);
 1027 
 1028                 if (deficit == 1) {
 1029                         /* first loop found an underlimit class with deficit */
 1030                         /* Loop on same priority level, with new deficit.  */
 1031                         deficit = 2;
 1032                         goto _wrr_loop;
 1033                 }
 1034         }
 1035 
 1036 #ifdef ADJUST_CUTOFF
 1037         /*
 1038          * no underlimit class found.  if cutoff is taking effect,
 1039          * increase cutoff and try again.
 1040          */
 1041         if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
 1042                 ifd->cutoff_++;
 1043                 CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
 1044                 goto _again;
 1045         }
 1046 #endif /* ADJUST_CUTOFF */
 1047         /*
 1048          * If LINK_EFFICIENCY is turned on, then the first overlimit
 1049          * class we encounter will send a packet if all the classes
 1050          * of the link-sharing structure are overlimit.
 1051          */
 1052         reset_cutoff(ifd);
 1053         CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
 1054 
 1055         if (!ifd->efficient_ || first == NULL)
 1056                 return (NULL);
 1057 
 1058         cl = first;
 1059         cpri = cl->pri_;
 1060 #if 0   /* too time-consuming for nothing */
 1061         if (cl->sleeping_)
 1062                 CALLOUT_STOP(&cl->callout_);
 1063         cl->sleeping_ = 0;
 1064         cl->undertime_.tv_sec = 0;
 1065 #endif
 1066         ifd->borrowed_[ifd->qi_] = cl->borrow_;
 1067         ifd->cutoff_ = cl->borrow_->depth_;
 1068 
 1069         /*
 1070          * Deque the packet and do the book keeping...
 1071          */
 1072  _wrr_out:
 1073         if (op == ALTDQ_REMOVE) {
 1074                 m = _rmc_getq(cl);
 1075                 if (m == NULL)
 1076                         panic("_rmc_wrr_dequeue_next");
 1077                 if (qempty(cl->q_))
 1078                         ifd->na_[cpri]--;
 1079 
 1080                 /*
 1081                  * Update class statistics and link data.
 1082                  */
 1083                 if (cl->bytes_alloc_ > 0)
 1084                         cl->bytes_alloc_ -= m_pktlen(m);
 1085 
 1086                 if ((cl->bytes_alloc_ <= 0) || first == cl)
 1087                         ifd->active_[cl->pri_] = cl->peer_;
 1088                 else
 1089                         ifd->active_[cl->pri_] = cl;
 1090 
 1091                 ifd->class_[ifd->qi_] = cl;
 1092                 ifd->curlen_[ifd->qi_] = m_pktlen(m);
 1093                 ifd->now_[ifd->qi_] = now;
 1094                 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
 1095                 ifd->queued_++;
 1096         } else {
 1097                 /* mode == ALTDQ_PPOLL */
 1098                 m = _rmc_pollq(cl);
 1099                 ifd->pollcache_ = cl;
 1100         }
 1101         return (m);
 1102 }
 1103 
 1104 /*
 1105  * Dequeue & return next packet from the highest priority class that
 1106  * has a packet to send & has enough allocation to send it.  This
 1107  * routine is called by a driver whenever it needs a new packet to
 1108  * output.
 1109  */
 1110 static mbuf_t *
 1111 _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
 1112 {
 1113         mbuf_t          *m;
 1114         int              cpri;
 1115         struct rm_class *cl, *first = NULL;
 1116         struct timeval   now;
 1117 
 1118         RM_GETTIME(now);
 1119 
 1120         /*
 1121          * if the driver polls the top of the queue and then removes
 1122          * the polled packet, we must return the same packet.
 1123          */
 1124         if (op == ALTDQ_REMOVE && ifd->pollcache_) {
 1125                 cl = ifd->pollcache_;
 1126                 cpri = cl->pri_;
 1127                 ifd->pollcache_ = NULL;
 1128                 goto _prr_out;
 1129         } else {
 1130                 /* mode == ALTDQ_POLL || pollcache == NULL */
 1131                 ifd->pollcache_ = NULL;
 1132                 ifd->borrowed_[ifd->qi_] = NULL;
 1133         }
 1134 #ifdef ADJUST_CUTOFF
 1135  _again:
 1136 #endif
 1137         for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
 1138                 if (ifd->na_[cpri] == 0)
 1139                         continue;
 1140                 cl = ifd->active_[cpri];
 1141                 ASSERT(cl != NULL);
 1142                 do {
 1143                         if (!qempty(cl->q_)) {
 1144                                 if ((cl->undertime_.tv_sec == 0) ||
 1145                                     rmc_under_limit(cl, &now))
 1146                                         goto _prr_out;
 1147                                 if (first == NULL && cl->borrow_ != NULL)
 1148                                         first = cl;
 1149                         }
 1150                         cl = cl->peer_;
 1151                 } while (cl != ifd->active_[cpri]);
 1152         }
 1153 
 1154 #ifdef ADJUST_CUTOFF
 1155         /*
 1156          * no underlimit class found.  if cutoff is taking effect, increase
 1157          * cutoff and try again.
 1158          */
 1159         if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
 1160                 ifd->cutoff_++;
 1161                 goto _again;
 1162         }
 1163 #endif /* ADJUST_CUTOFF */
 1164         /*
 1165          * If LINK_EFFICIENCY is turned on, then the first overlimit
 1166          * class we encounter will send a packet if all the classes
 1167          * of the link-sharing structure are overlimit.
 1168          */
 1169         reset_cutoff(ifd);
 1170         if (!ifd->efficient_ || first == NULL)
 1171                 return (NULL);
 1172 
 1173         cl = first;
 1174         cpri = cl->pri_;
 1175 #if 0   /* too time-consuming for nothing */
 1176         if (cl->sleeping_)
 1177                 CALLOUT_STOP(&cl->callout_);
 1178         cl->sleeping_ = 0;
 1179         cl->undertime_.tv_sec = 0;
 1180 #endif
 1181         ifd->borrowed_[ifd->qi_] = cl->borrow_;
 1182         ifd->cutoff_ = cl->borrow_->depth_;
 1183 
 1184         /*
 1185          * Deque the packet and do the book keeping...
 1186          */
 1187  _prr_out:
 1188         if (op == ALTDQ_REMOVE) {
 1189                 m = _rmc_getq(cl);
 1190                 if (m == NULL)
 1191                         panic("_rmc_prr_dequeue_next");
 1192                 if (qempty(cl->q_))
 1193                         ifd->na_[cpri]--;
 1194 
 1195                 ifd->active_[cpri] = cl->peer_;
 1196 
 1197                 ifd->class_[ifd->qi_] = cl;
 1198                 ifd->curlen_[ifd->qi_] = m_pktlen(m);
 1199                 ifd->now_[ifd->qi_] = now;
 1200                 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
 1201                 ifd->queued_++;
 1202         } else {
 1203                 /* mode == ALTDQ_POLL */
 1204                 m = _rmc_pollq(cl);
 1205                 ifd->pollcache_ = cl;
 1206         }
 1207         return (m);
 1208 }
 1209 
 1210 /*
 1211  * mbuf_t *
 1212  * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
 1213  *      is invoked by the packet driver to get the next packet to be
 1214  *      dequeued and output on the link.  If WRR is enabled, then the
 1215  *      WRR dequeue next routine will determine the next packet to sent.
 1216  *      Otherwise, packet-by-packet round robin is invoked.
 1217  *
 1218  *      Returns:        NULL, if a packet is not available or if all
 1219  *                      classes are overlimit.
 1220  *
 1221  *                      Otherwise, Pointer to the next packet.
 1222  */
 1223 
 1224 mbuf_t *
 1225 rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
 1226 {
 1227         if (ifd->queued_ >= ifd->maxqueued_)
 1228                 return (NULL);
 1229         else if (ifd->wrr_)
 1230                 return (_rmc_wrr_dequeue_next(ifd, mode));
 1231         else
 1232                 return (_rmc_prr_dequeue_next(ifd, mode));
 1233 }
 1234 
 1235 /*
 1236  * Update the utilization estimate for the packet that just completed.
 1237  * The packet's class & the parent(s) of that class all get their
 1238  * estimators updated.  This routine is called by the driver's output-
 1239  * packet-completion interrupt service routine.
 1240  */
 1241 
 1242 /*
 1243  * a macro to approximate "divide by 1000" that gives 0.000999,
 1244  * if a value has enough effective digits.
 1245  * (on pentium, mul takes 9 cycles but div takes 46!)
 1246  */
 1247 #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
 1248 void
 1249 rmc_update_class_util(struct rm_ifdat *ifd)
 1250 {
 1251         int              idle, avgidle, pktlen;
 1252         int              pkt_time, tidle;
 1253         rm_class_t      *cl, *borrowed;
 1254         rm_class_t      *borrows;
 1255         struct timeval  *nowp;
 1256 
 1257         /*
 1258          * Get the most recent completed class.
 1259          */
 1260         if ((cl = ifd->class_[ifd->qo_]) == NULL)
 1261                 return;
 1262 
 1263         pktlen = ifd->curlen_[ifd->qo_];
 1264         borrowed = ifd->borrowed_[ifd->qo_];
 1265         borrows = borrowed;
 1266 
 1267         PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
 1268 
 1269         /*
 1270          * Run estimator on class and its ancestors.
 1271          */
 1272         /*
 1273          * rm_update_class_util is designed to be called when the
 1274          * transfer is completed from a xmit complete interrupt,
 1275          * but most drivers don't implement an upcall for that.
 1276          * so, just use estimated completion time.
 1277          * as a result, ifd->qi_ and ifd->qo_ are always synced.
 1278          */
 1279         nowp = &ifd->now_[ifd->qo_];
 1280         /* get pkt_time (for link) in usec */
 1281 #if 1  /* use approximation */
 1282         pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
 1283         pkt_time = NSEC_TO_USEC(pkt_time);
 1284 #else
 1285         pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
 1286 #endif
 1287 #if 1 /* ALTQ4PPP */
 1288         if (TV_LT(nowp, &ifd->ifnow_)) {
 1289                 int iftime;
 1290 
 1291                 /*
 1292                  * make sure the estimated completion time does not go
 1293                  * too far.  it can happen when the link layer supports
 1294                  * data compression or the interface speed is set to
 1295                  * a much lower value.
 1296                  */
 1297                 TV_DELTA(&ifd->ifnow_, nowp, iftime);
 1298                 if (iftime+pkt_time < ifd->maxiftime_) {
 1299                         TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
 1300                 } else {
 1301                         TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
 1302                 }
 1303         } else {
 1304                 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
 1305         }
 1306 #else
 1307         if (TV_LT(nowp, &ifd->ifnow_)) {
 1308                 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
 1309         } else {
 1310                 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
 1311         }
 1312 #endif
 1313 
 1314         while (cl != NULL) {
 1315                 TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
 1316                 if (idle >= 2000000)
 1317                         /*
 1318                          * this class is idle enough, reset avgidle.
 1319                          * (TV_DELTA returns 2000000 us when delta is large.)
 1320                          */
 1321                         cl->avgidle_ = cl->maxidle_;
 1322 
 1323                 /* get pkt_time (for class) in usec */
 1324 #if 1  /* use approximation */
 1325                 pkt_time = pktlen * cl->ns_per_byte_;
 1326                 pkt_time = NSEC_TO_USEC(pkt_time);
 1327 #else
 1328                 pkt_time = pktlen * cl->ns_per_byte_ / 1000;
 1329 #endif
 1330                 idle -= pkt_time;
 1331 
 1332                 avgidle = cl->avgidle_;
 1333                 avgidle += idle - (avgidle >> RM_FILTER_GAIN);
 1334                 cl->avgidle_ = avgidle;
 1335 
 1336                 /* Are we overlimit ? */
 1337                 if (avgidle <= 0) {
 1338                         CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
 1339 #if 1 /* ALTQ */
 1340                         /*
 1341                          * need some lower bound for avgidle, otherwise
 1342                          * a borrowing class gets unbounded penalty.
 1343                          */
 1344                         if (avgidle < cl->minidle_)
 1345                                 avgidle = cl->avgidle_ = cl->minidle_;
 1346 #endif
 1347                         /* set next idle to make avgidle 0 */
 1348                         tidle = pkt_time +
 1349                                 (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
 1350                         TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
 1351                         ++cl->stats_.over;
 1352                 } else {
 1353                         cl->avgidle_ =
 1354                             (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
 1355                         cl->undertime_.tv_sec = 0;
 1356                         if (cl->sleeping_) {
 1357                                 CALLOUT_STOP(&cl->callout_);
 1358                                 cl->sleeping_ = 0;
 1359                         }
 1360                 }
 1361 
 1362                 if (borrows != NULL) {
 1363                         if (borrows != cl)
 1364                                 ++cl->stats_.borrows;
 1365                         else
 1366                                 borrows = NULL;
 1367                 }
 1368                 cl->last_ = ifd->ifnow_;
 1369                 cl->last_pkttime_ = pkt_time;
 1370 
 1371 #if 1
 1372                 if (cl->parent_ == NULL) {
 1373                         /* take stats of root class */
 1374                         PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
 1375                 }
 1376 #endif
 1377 
 1378                 cl = cl->parent_;
 1379         }
 1380 
 1381         /*
 1382          * Check to see if cutoff needs to set to a new level.
 1383          */
 1384         cl = ifd->class_[ifd->qo_];
 1385         if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
 1386 #if 1 /* ALTQ */
 1387                 if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
 1388                         rmc_tl_satisfied(ifd, nowp);
 1389                         CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
 1390                 } else {
 1391                         ifd->cutoff_ = borrowed->depth_;
 1392                         CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
 1393                 }
 1394 #else /* !ALTQ */
 1395                 if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
 1396                         reset_cutoff(ifd);
 1397 #ifdef notdef
 1398                         rmc_tl_satisfied(ifd, &now);
 1399 #endif
 1400                         CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
 1401                 } else {
 1402                         ifd->cutoff_ = borrowed->depth_;
 1403                         CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
 1404                 }
 1405 #endif /* !ALTQ */
 1406         }
 1407 
 1408         /*
 1409          * Release class slot
 1410          */
 1411         ifd->borrowed_[ifd->qo_] = NULL;
 1412         ifd->class_[ifd->qo_] = NULL;
 1413         ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
 1414         ifd->queued_--;
 1415 }
 1416 
 1417 /*
 1418  * void
 1419  * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
 1420  *      over-limit action routines.  These get invoked by rmc_under_limit()
 1421  *      if a class with packets to send if over its bandwidth limit & can't
 1422  *      borrow from a parent class.
 1423  *
 1424  *      Returns: NONE
 1425  */
 1426 
 1427 static void
 1428 rmc_drop_action(struct rm_class *cl)
 1429 {
 1430         struct rm_ifdat *ifd = cl->ifdat_;
 1431 
 1432         ASSERT(qlen(cl->q_) > 0);
 1433         _rmc_dropq(cl);
 1434         if (qempty(cl->q_))
 1435                 ifd->na_[cl->pri_]--;
 1436 }
 1437 
 1438 void
 1439 rmc_dropall(struct rm_class *cl)
 1440 {
 1441         struct rm_ifdat *ifd = cl->ifdat_;
 1442 
 1443         if (!qempty(cl->q_)) {
 1444                 _flushq(cl->q_);
 1445 
 1446                 ifd->na_[cl->pri_]--;
 1447         }
 1448 }
 1449 
 1450 #if (__FreeBSD_version > 300000)
 1451 /* hzto() is removed from FreeBSD-3.0 */
 1452 static int hzto(struct timeval *);
 1453 
 1454 static int
 1455 hzto(struct timeval *tv)
 1456 {
 1457         struct timeval t2;
 1458 
 1459         getmicrotime(&t2);
 1460         t2.tv_sec = tv->tv_sec - t2.tv_sec;
 1461         t2.tv_usec = tv->tv_usec - t2.tv_usec;
 1462         return (tvtohz(&t2));
 1463 }
 1464 #endif /* __FreeBSD_version > 300000 */
 1465 
 1466 /*
 1467  * void
 1468  * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
 1469  *      delay action routine.  It is invoked via rmc_under_limit when the
 1470  *      packet is discoverd to be overlimit.
 1471  *
 1472  *      If the delay action is result of borrow class being overlimit, then
 1473  *      delay for the offtime of the borrowing class that is overlimit.
 1474  *
 1475  *      Returns: NONE
 1476  */
 1477 
 1478 void
 1479 rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
 1480 {
 1481         int     ndelay, t, extradelay;
 1482 
 1483         cl->stats_.overactions++;
 1484         TV_DELTA(&cl->undertime_, &cl->overtime_, ndelay);
 1485 #ifndef BORROW_OFFTIME
 1486         ndelay += cl->offtime_;
 1487 #endif
 1488 
 1489         if (!cl->sleeping_) {
 1490                 CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
 1491 #ifdef BORROW_OFFTIME
 1492                 if (borrow != NULL)
 1493                         extradelay = borrow->offtime_;
 1494                 else
 1495 #endif
 1496                         extradelay = cl->offtime_;
 1497 
 1498 #ifdef ALTQ
 1499                 /*
 1500                  * XXX recalculate suspend time:
 1501                  * current undertime is (tidle + pkt_time) calculated
 1502                  * from the last transmission.
 1503                  *      tidle: time required to bring avgidle back to 0
 1504                  *      pkt_time: target waiting time for this class
 1505                  * we need to replace pkt_time by offtime
 1506                  */
 1507                 extradelay -= cl->last_pkttime_;
 1508 #endif
 1509                 if (extradelay > 0) {
 1510                         TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
 1511                         ndelay += extradelay;
 1512                 }
 1513 
 1514                 cl->sleeping_ = 1;
 1515                 cl->stats_.delays++;
 1516 
 1517                 /*
 1518                  * Since packets are phased randomly with respect to the
 1519                  * clock, 1 tick (the next clock tick) can be an arbitrarily
 1520                  * short time so we have to wait for at least two ticks.
 1521                  * NOTE:  If there's no other traffic, we need the timer as
 1522                  * a 'backstop' to restart this class.
 1523                  */
 1524                 if (ndelay > tick * 2) {
 1525 #ifdef __FreeBSD__
 1526                         /* FreeBSD rounds up the tick */
 1527                         t = hzto(&cl->undertime_);
 1528 #else
 1529                         /* other BSDs round down the tick */
 1530                         t = hzto(&cl->undertime_) + 1;
 1531 #endif
 1532                 } else
 1533                         t = 2;
 1534                 CALLOUT_RESET(&cl->callout_, t,
 1535                               (timeout_t *)rmc_restart, (caddr_t)cl);
 1536         }
 1537 }
 1538 
 1539 /*
 1540  * void
 1541  * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
 1542  *      called by the system timer code & is responsible checking if the
 1543  *      class is still sleeping (it might have been restarted as a side
 1544  *      effect of the queue scan on a packet arrival) and, if so, restarting
 1545  *      output for the class.  Inspecting the class state & restarting output
 1546  *      require locking the class structure.  In general the driver is
 1547  *      responsible for locking but this is the only routine that is not
 1548  *      called directly or indirectly from the interface driver so it has
 1549  *      know about system locking conventions.  Under bsd, locking is done
 1550  *      by raising IPL to splnet so that's what's implemented here.  On a
 1551  *      different system this would probably need to be changed.
 1552  *
 1553  *      Returns:        NONE
 1554  */
 1555 
 1556 static void
 1557 rmc_restart(struct rm_class *cl)
 1558 {
 1559         struct rm_ifdat *ifd = cl->ifdat_;
 1560         int              s;
 1561 
 1562         s = splnet();
 1563         if (cl->sleeping_) {
 1564                 cl->sleeping_ = 0;
 1565                 cl->undertime_.tv_sec = 0;
 1566 
 1567                 if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
 1568                         CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
 1569                         (ifd->restart)(ifd->ifq_);
 1570                 }
 1571         }
 1572         splx(s);
 1573 }
 1574 
 1575 /*
 1576  * void
 1577  * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
 1578  *      handling routine for the root class of the link sharing structure.
 1579  *
 1580  *      Returns: NONE
 1581  */
 1582 
 1583 static void
 1584 rmc_root_overlimit(struct rm_class *cl,
 1585     struct rm_class *borrow)
 1586 {
 1587         panic("rmc_root_overlimit");
 1588 }
 1589 
 1590 /*
 1591  * Packet Queue handling routines.  Eventually, this is to localize the
 1592  *      effects on the code whether queues are red queues or droptail
 1593  *      queues.
 1594  */
 1595 
 1596 static int
 1597 _rmc_addq(rm_class_t *cl, mbuf_t *m)
 1598 {
 1599 #ifdef ALTQ_RIO
 1600         if (q_is_rio(cl->q_))
 1601                 return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
 1602 #endif
 1603 #ifdef ALTQ_RED
 1604         if (q_is_red(cl->q_))
 1605                 return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
 1606 #endif /* ALTQ_RED */
 1607 
 1608         if (cl->flags_ & RMCF_CLEARDSCP)
 1609                 write_dsfield(m, cl->pktattr_, 0);
 1610 
 1611         _addq(cl->q_, m);
 1612         return (0);
 1613 }
 1614 
 1615 /* note: _rmc_dropq is not called for red */
 1616 static void
 1617 _rmc_dropq(rm_class_t *cl)
 1618 {
 1619         mbuf_t  *m;
 1620 
 1621         if ((m = _getq(cl->q_)) != NULL)
 1622                 m_freem(m);
 1623 }
 1624 
 1625 static mbuf_t *
 1626 _rmc_getq(rm_class_t *cl)
 1627 {
 1628 #ifdef ALTQ_RIO
 1629         if (q_is_rio(cl->q_))
 1630                 return rio_getq((rio_t *)cl->red_, cl->q_);
 1631 #endif
 1632 #ifdef ALTQ_RED
 1633         if (q_is_red(cl->q_))
 1634                 return red_getq(cl->red_, cl->q_);
 1635 #endif
 1636         return _getq(cl->q_);
 1637 }
 1638 
 1639 static mbuf_t *
 1640 _rmc_pollq(rm_class_t *cl)
 1641 {
 1642         return qhead(cl->q_);
 1643 }
 1644 
 1645 #ifdef CBQ_TRACE
 1646 
 1647 struct cbqtrace          cbqtrace_buffer[NCBQTRACE+1];
 1648 struct cbqtrace         *cbqtrace_ptr = NULL;
 1649 int                      cbqtrace_count;
 1650 
 1651 /*
 1652  * DDB hook to trace cbq events:
 1653  *  the last 1024 events are held in a circular buffer.
 1654  *  use "call cbqtrace_dump(N)" to display 20 events from Nth event.
 1655  */
 1656 void cbqtrace_dump(int);
 1657 static char *rmc_funcname(void *);
 1658 
 1659 static struct rmc_funcs {
 1660         void    *func;
 1661         char    *name;
 1662 } rmc_funcs[] =
 1663 {
 1664         rmc_init,               "rmc_init",
 1665         rmc_queue_packet,       "rmc_queue_packet",
 1666         rmc_under_limit,        "rmc_under_limit",
 1667         rmc_update_class_util,  "rmc_update_class_util",
 1668         rmc_delay_action,       "rmc_delay_action",
 1669         rmc_restart,            "rmc_restart",
 1670         _rmc_wrr_dequeue_next,  "_rmc_wrr_dequeue_next",
 1671         NULL,                   NULL
 1672 };
 1673 
 1674 static char *
 1675 rmc_funcname(void *func)
 1676 {
 1677         struct rmc_funcs *fp;
 1678 
 1679         for (fp = rmc_funcs; fp->func != NULL; fp++)
 1680                 if (fp->func == func)
 1681                         return (fp->name);
 1682         return ("unknown");
 1683 }
 1684 
 1685 void
 1686 cbqtrace_dump(int counter)
 1687 {
 1688         int      i, *p;
 1689         char    *cp;
 1690 
 1691         counter = counter % NCBQTRACE;
 1692         p = (int *)&cbqtrace_buffer[counter];
 1693 
 1694         for (i=0; i<20; i++) {
 1695                 printf("[0x%x] ", *p++);
 1696                 printf("%s: ", rmc_funcname((void *)*p++));
 1697                 cp = (char *)p++;
 1698                 printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
 1699                 printf("%d\n",*p++);
 1700 
 1701                 if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
 1702                         p = (int *)cbqtrace_buffer;
 1703         }
 1704 }
 1705 #endif /* CBQ_TRACE */
 1706 #endif /* ALTQ_CBQ */
 1707 
 1708 #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
 1709 #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
 1710 
 1711 void
 1712 _addq(class_queue_t *q, mbuf_t *m)
 1713 {
 1714         mbuf_t  *m0;
 1715 
 1716         if ((m0 = qtail(q)) != NULL)
 1717                 m->m_nextpkt = m0->m_nextpkt;
 1718         else
 1719                 m0 = m;
 1720         m0->m_nextpkt = m;
 1721         qtail(q) = m;
 1722         qlen(q)++;
 1723 }
 1724 
 1725 mbuf_t *
 1726 _getq(class_queue_t *q)
 1727 {
 1728         mbuf_t  *m, *m0;
 1729 
 1730         if ((m = qtail(q)) == NULL)
 1731                 return (NULL);
 1732         if ((m0 = m->m_nextpkt) != m)
 1733                 m->m_nextpkt = m0->m_nextpkt;
 1734         else {
 1735                 ASSERT(qlen(q) == 1);
 1736                 qtail(q) = NULL;
 1737         }
 1738         qlen(q)--;
 1739         m0->m_nextpkt = NULL;
 1740         return (m0);
 1741 }
 1742 
 1743 /* drop a packet at the tail of the queue */
 1744 mbuf_t *
 1745 _getq_tail(class_queue_t *q)
 1746 {
 1747         mbuf_t  *m, *m0, *prev;
 1748 
 1749         if ((m = m0 = qtail(q)) == NULL)
 1750                 return NULL;
 1751         do {
 1752                 prev = m0;
 1753                 m0 = m0->m_nextpkt;
 1754         } while (m0 != m);
 1755         prev->m_nextpkt = m->m_nextpkt;
 1756         if (prev == m)  {
 1757                 ASSERT(qlen(q) == 1);
 1758                 qtail(q) = NULL;
 1759         } else
 1760                 qtail(q) = prev;
 1761         qlen(q)--;
 1762         m->m_nextpkt = NULL;
 1763         return (m);
 1764 }
 1765 
 1766 /* randomly select a packet in the queue */
 1767 mbuf_t *
 1768 _getq_random(class_queue_t *q)
 1769 {
 1770         struct mbuf     *m;
 1771         int              i, n;
 1772 
 1773         if ((m = qtail(q)) == NULL)
 1774                 return NULL;
 1775         if (m->m_nextpkt == m) {
 1776                 ASSERT(qlen(q) == 1);
 1777                 qtail(q) = NULL;
 1778         } else {
 1779                 struct mbuf *prev = NULL;
 1780 
 1781                 n = arc4random() % qlen(q) + 1;
 1782                 for (i = 0; i < n; i++) {
 1783                         prev = m;
 1784                         m = m->m_nextpkt;
 1785                 }
 1786                 prev->m_nextpkt = m->m_nextpkt;
 1787                 if (m == qtail(q))
 1788                         qtail(q) = prev;
 1789         }
 1790         qlen(q)--;
 1791         m->m_nextpkt = NULL;
 1792         return (m);
 1793 }
 1794 
 1795 void
 1796 _removeq(class_queue_t *q, mbuf_t *m)
 1797 {
 1798         mbuf_t  *m0, *prev;
 1799 
 1800         m0 = qtail(q);
 1801         do {
 1802                 prev = m0;
 1803                 m0 = m0->m_nextpkt;
 1804         } while (m0 != m);
 1805         prev->m_nextpkt = m->m_nextpkt;
 1806         if (prev == m)
 1807                 qtail(q) = NULL;
 1808         else if (qtail(q) == m)
 1809                 qtail(q) = prev;
 1810         qlen(q)--;
 1811 }
 1812 
 1813 void
 1814 _flushq(class_queue_t *q)
 1815 {
 1816         mbuf_t *m;
 1817 
 1818         while ((m = _getq(q)) != NULL)
 1819                 m_freem(m);
 1820         ASSERT(qlen(q) == 0);
 1821 }
 1822 
 1823 #endif /* !__GNUC__ || ALTQ_DEBUG */
 1824 #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */

Cache object: 50c78cbcb57df807b0df75e2dc4448e4


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.