The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/net/altq/altq_rio.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (C) 1998-2003
    3  *      Sony Computer Science Laboratories Inc.  All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 /*-
   27  * Copyright (c) 1990-1994 Regents of the University of California.
   28  * All rights reserved.
   29  *
   30  * Redistribution and use in source and binary forms, with or without
   31  * modification, are permitted provided that the following conditions
   32  * are met:
   33  * 1. Redistributions of source code must retain the above copyright
   34  *    notice, this list of conditions and the following disclaimer.
   35  * 2. Redistributions in binary form must reproduce the above copyright
   36  *    notice, this list of conditions and the following disclaimer in the
   37  *    documentation and/or other materials provided with the distribution.
   38  * 3. All advertising materials mentioning features or use of this software
   39  *    must display the following acknowledgement:
   40  *      This product includes software developed by the Computer Systems
   41  *      Engineering Group at Lawrence Berkeley Laboratory.
   42  * 4. Neither the name of the University nor of the Laboratory may be used
   43  *    to endorse or promote products derived from this software without
   44  *    specific prior written permission.
   45  *
   46  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   47  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   48  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   49  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   50  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   51  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   52  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   53  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   54  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   55  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   56  * SUCH DAMAGE.
   57  *
   58  * $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $
   59  * $FreeBSD$
   60  */
   61 
   62 #include "opt_altq.h"
   63 #include "opt_inet.h"
   64 #include "opt_inet6.h"
   65 #ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */
   66 
   67 #include <sys/param.h>
   68 #include <sys/malloc.h>
   69 #include <sys/mbuf.h>
   70 #include <sys/socket.h>
   71 #include <sys/systm.h>
   72 #include <sys/errno.h>
   73 #if 1 /* ALTQ3_COMPAT */
   74 #include <sys/proc.h>
   75 #include <sys/sockio.h>
   76 #include <sys/kernel.h>
   77 #endif
   78 
   79 #include <net/if.h>
   80 #include <net/if_var.h>
   81 
   82 #include <netinet/in.h>
   83 #include <netinet/in_systm.h>
   84 #include <netinet/ip.h>
   85 #ifdef INET6
   86 #include <netinet/ip6.h>
   87 #endif
   88 
   89 #include <netpfil/pf/pf.h>
   90 #include <netpfil/pf/pf_altq.h>
   91 #include <net/altq/altq.h>
   92 #include <net/altq/altq_cdnr.h>
   93 #include <net/altq/altq_red.h>
   94 #include <net/altq/altq_rio.h>
   95 
   96 /*
   97  * RIO: RED with IN/OUT bit
   98  *   described in
   99  *      "Explicit Allocation of Best Effort Packet Delivery Service"
  100  *      David D. Clark and Wenjia Fang, MIT Lab for Computer Science
  101  *      http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
  102  *
  103  * this implementation is extended to support more than 2 drop precedence
  104  * values as described in RFC2597 (Assured Forwarding PHB Group).
  105  *
  106  */
  107 /*
  108  * AF DS (differentiated service) codepoints.
  109  * (classes can be mapped to CBQ or H-FSC classes.)
  110  *
  111  *      0   1   2   3   4   5   6   7
  112  *    +---+---+---+---+---+---+---+---+
  113  *    |   CLASS   |DropPre| 0 |  CU   |
  114  *    +---+---+---+---+---+---+---+---+
  115  *
  116  *    class 1: 001
  117  *    class 2: 010
  118  *    class 3: 011
  119  *    class 4: 100
  120  *
  121  *    low drop prec:    01
  122  *    medium drop prec: 10
  123  *    high drop prec:   01
  124  */
  125 
  126 /* normal red parameters */
  127 #define W_WEIGHT        512     /* inverse of weight of EWMA (511/512) */
  128                                 /* q_weight = 0.00195 */
  129 
  130 /* red parameters for a slow link */
  131 #define W_WEIGHT_1      128     /* inverse of weight of EWMA (127/128) */
  132                                 /* q_weight = 0.0078125 */
  133 
  134 /* red parameters for a very slow link (e.g., dialup) */
  135 #define W_WEIGHT_2      64      /* inverse of weight of EWMA (63/64) */
  136                                 /* q_weight = 0.015625 */
  137 
  138 /* fixed-point uses 12-bit decimal places */
  139 #define FP_SHIFT        12      /* fixed-point shift */
  140 
  141 /* red parameters for drop probability */
  142 #define INV_P_MAX       10      /* inverse of max drop probability */
  143 #define TH_MIN           5      /* min threshold */
  144 #define TH_MAX          15      /* max threshold */
  145 
  146 #define RIO_LIMIT       60      /* default max queue length */
  147 #define RIO_STATS               /* collect statistics */
  148 
  149 #define TV_DELTA(a, b, delta) {                                 \
  150         int     xxs;                                            \
  151                                                                 \
  152         delta = (a)->tv_usec - (b)->tv_usec;                    \
  153         if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) {           \
  154                 if (xxs < 0) {                                  \
  155                         delta = 60000000;                       \
  156                 } else if (xxs > 4)  {                          \
  157                         if (xxs > 60)                           \
  158                                 delta = 60000000;               \
  159                         else                                    \
  160                                 delta += xxs * 1000000;         \
  161                 } else while (xxs > 0) {                        \
  162                         delta += 1000000;                       \
  163                         xxs--;                                  \
  164                 }                                               \
  165         }                                                       \
  166 }
  167 
  168 /* default rio parameter values */
  169 static struct redparams default_rio_params[RIO_NDROPPREC] = {
  170   /* th_min,             th_max,     inv_pmax */
  171   { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
  172   { TH_MAX + TH_MIN,     TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
  173   { TH_MIN,              TH_MAX,     INV_P_MAX }  /* high drop precedence */
  174 };
  175 
  176 /* internal function prototypes */
  177 static int dscp2index(u_int8_t);
  178 
  179 rio_t *
  180 rio_alloc(int weight, struct redparams *params, int flags, int pkttime)
  181 {
  182         rio_t   *rp;
  183         int      w, i;
  184         int      npkts_per_sec;
  185 
  186         rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO);
  187         if (rp == NULL)
  188                 return (NULL);
  189 
  190         rp->rio_flags = flags;
  191         if (pkttime == 0)
  192                 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
  193                 rp->rio_pkttime = 800;
  194         else
  195                 rp->rio_pkttime = pkttime;
  196 
  197         if (weight != 0)
  198                 rp->rio_weight = weight;
  199         else {
  200                 /* use default */
  201                 rp->rio_weight = W_WEIGHT;
  202 
  203                 /* when the link is very slow, adjust red parameters */
  204                 npkts_per_sec = 1000000 / rp->rio_pkttime;
  205                 if (npkts_per_sec < 50) {
  206                         /* up to about 400Kbps */
  207                         rp->rio_weight = W_WEIGHT_2;
  208                 } else if (npkts_per_sec < 300) {
  209                         /* up to about 2.4Mbps */
  210                         rp->rio_weight = W_WEIGHT_1;
  211                 }
  212         }
  213 
  214         /* calculate wshift.  weight must be power of 2 */
  215         w = rp->rio_weight;
  216         for (i = 0; w > 1; i++)
  217                 w = w >> 1;
  218         rp->rio_wshift = i;
  219         w = 1 << rp->rio_wshift;
  220         if (w != rp->rio_weight) {
  221                 printf("invalid weight value %d for red! use %d\n",
  222                        rp->rio_weight, w);
  223                 rp->rio_weight = w;
  224         }
  225 
  226         /* allocate weight table */
  227         rp->rio_wtab = wtab_alloc(rp->rio_weight);
  228 
  229         for (i = 0; i < RIO_NDROPPREC; i++) {
  230                 struct dropprec_state *prec = &rp->rio_precstate[i];
  231 
  232                 prec->avg = 0;
  233                 prec->idle = 1;
  234 
  235                 if (params == NULL || params[i].inv_pmax == 0)
  236                         prec->inv_pmax = default_rio_params[i].inv_pmax;
  237                 else
  238                         prec->inv_pmax = params[i].inv_pmax;
  239                 if (params == NULL || params[i].th_min == 0)
  240                         prec->th_min = default_rio_params[i].th_min;
  241                 else
  242                         prec->th_min = params[i].th_min;
  243                 if (params == NULL || params[i].th_max == 0)
  244                         prec->th_max = default_rio_params[i].th_max;
  245                 else
  246                         prec->th_max = params[i].th_max;
  247 
  248                 /*
  249                  * th_min_s and th_max_s are scaled versions of th_min
  250                  * and th_max to be compared with avg.
  251                  */
  252                 prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
  253                 prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
  254 
  255                 /*
  256                  * precompute probability denominator
  257                  *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
  258                  */
  259                 prec->probd = (2 * (prec->th_max - prec->th_min)
  260                                * prec->inv_pmax) << FP_SHIFT;
  261 
  262                 microtime(&prec->last);
  263         }
  264 
  265         return (rp);
  266 }
  267 
  268 void
  269 rio_destroy(rio_t *rp)
  270 {
  271         wtab_destroy(rp->rio_wtab);
  272         free(rp, M_DEVBUF);
  273 }
  274 
  275 void
  276 rio_getstats(rio_t *rp, struct redstats *sp)
  277 {
  278         int     i;
  279 
  280         for (i = 0; i < RIO_NDROPPREC; i++) {
  281                 bcopy(&rp->q_stats[i], sp, sizeof(struct redstats));
  282                 sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
  283                 sp++;
  284         }
  285 }
  286 
  287 #if (RIO_NDROPPREC == 3)
  288 /*
  289  * internally, a drop precedence value is converted to an index
  290  * starting from 0.
  291  */
  292 static int
  293 dscp2index(u_int8_t dscp)
  294 {
  295         int     dpindex = dscp & AF_DROPPRECMASK;
  296 
  297         if (dpindex == 0)
  298                 return (0);
  299         return ((dpindex >> 3) - 1);
  300 }
  301 #endif
  302 
  303 #if 1
  304 /*
  305  * kludge: when a packet is dequeued, we need to know its drop precedence
  306  * in order to keep the queue length of each drop precedence.
  307  * use m_pkthdr.rcvif to pass this info.
  308  */
  309 #define RIOM_SET_PRECINDEX(m, idx)      \
  310         do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0)
  311 #define RIOM_GET_PRECINDEX(m)   \
  312         ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
  313         (m)->m_pkthdr.rcvif = NULL; idx; })
  314 #endif
  315 
  316 int
  317 rio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m,
  318     struct altq_pktattr *pktattr)
  319 {
  320         int                      avg, droptype;
  321         u_int8_t                 dsfield, odsfield;
  322         int                      dpindex, i, n, t;
  323         struct timeval           now;
  324         struct dropprec_state   *prec;
  325 
  326         dsfield = odsfield = read_dsfield(m, pktattr);
  327         dpindex = dscp2index(dsfield);
  328 
  329         /*
  330          * update avg of the precedence states whose drop precedence
  331          * is larger than or equal to the drop precedence of the packet
  332          */
  333         now.tv_sec = 0;
  334         for (i = dpindex; i < RIO_NDROPPREC; i++) {
  335                 prec = &rp->rio_precstate[i];
  336                 avg = prec->avg;
  337                 if (prec->idle) {
  338                         prec->idle = 0;
  339                         if (now.tv_sec == 0)
  340                                 microtime(&now);
  341                         t = (now.tv_sec - prec->last.tv_sec);
  342                         if (t > 60)
  343                                 avg = 0;
  344                         else {
  345                                 t = t * 1000000 +
  346                                         (now.tv_usec - prec->last.tv_usec);
  347                                 n = t / rp->rio_pkttime;
  348                                 /* calculate (avg = (1 - Wq)^n * avg) */
  349                                 if (n > 0)
  350                                         avg = (avg >> FP_SHIFT) *
  351                                                 pow_w(rp->rio_wtab, n);
  352                         }
  353                 }
  354 
  355                 /* run estimator. (avg is scaled by WEIGHT in fixed-point) */
  356                 avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
  357                 prec->avg = avg;                /* save the new value */
  358                 /*
  359                  * count keeps a tally of arriving traffic that has not
  360                  * been dropped.
  361                  */
  362                 prec->count++;
  363         }
  364 
  365         prec = &rp->rio_precstate[dpindex];
  366         avg = prec->avg;
  367 
  368         /* see if we drop early */
  369         droptype = DTYPE_NODROP;
  370         if (avg >= prec->th_min_s && prec->qlen > 1) {
  371                 if (avg >= prec->th_max_s) {
  372                         /* avg >= th_max: forced drop */
  373                         droptype = DTYPE_FORCED;
  374                 } else if (prec->old == 0) {
  375                         /* first exceeds th_min */
  376                         prec->count = 1;
  377                         prec->old = 1;
  378                 } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
  379                                       prec->probd, prec->count)) {
  380                         /* unforced drop by red */
  381                         droptype = DTYPE_EARLY;
  382                 }
  383         } else {
  384                 /* avg < th_min */
  385                 prec->old = 0;
  386         }
  387 
  388         /*
  389          * if the queue length hits the hard limit, it's a forced drop.
  390          */
  391         if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
  392                 droptype = DTYPE_FORCED;
  393 
  394         if (droptype != DTYPE_NODROP) {
  395                 /* always drop incoming packet (as opposed to randomdrop) */
  396                 for (i = dpindex; i < RIO_NDROPPREC; i++)
  397                         rp->rio_precstate[i].count = 0;
  398 #ifdef RIO_STATS
  399                 if (droptype == DTYPE_EARLY)
  400                         rp->q_stats[dpindex].drop_unforced++;
  401                 else
  402                         rp->q_stats[dpindex].drop_forced++;
  403                 PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
  404 #endif
  405                 m_freem(m);
  406                 return (-1);
  407         }
  408 
  409         for (i = dpindex; i < RIO_NDROPPREC; i++)
  410                 rp->rio_precstate[i].qlen++;
  411 
  412         /* save drop precedence index in mbuf hdr */
  413         RIOM_SET_PRECINDEX(m, dpindex);
  414 
  415         if (rp->rio_flags & RIOF_CLEARDSCP)
  416                 dsfield &= ~DSCP_MASK;
  417 
  418         if (dsfield != odsfield)
  419                 write_dsfield(m, pktattr, dsfield);
  420 
  421         _addq(q, m);
  422 
  423 #ifdef RIO_STATS
  424         PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
  425 #endif
  426         return (0);
  427 }
  428 
  429 struct mbuf *
  430 rio_getq(rio_t *rp, class_queue_t *q)
  431 {
  432         struct mbuf     *m;
  433         int              dpindex, i;
  434 
  435         if ((m = _getq(q)) == NULL)
  436                 return NULL;
  437 
  438         dpindex = RIOM_GET_PRECINDEX(m);
  439         for (i = dpindex; i < RIO_NDROPPREC; i++) {
  440                 if (--rp->rio_precstate[i].qlen == 0) {
  441                         if (rp->rio_precstate[i].idle == 0) {
  442                                 rp->rio_precstate[i].idle = 1;
  443                                 microtime(&rp->rio_precstate[i].last);
  444                         }
  445                 }
  446         }
  447         return (m);
  448 }
  449 
  450 #endif /* ALTQ_RIO */

Cache object: 1e6273f83cb44c1d30eee840ed26d4cf


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.