The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/altq/altq_rio.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: altq_rio.c,v 1.6 2004/02/13 18:02:05 wiz Exp $ */
    2 /*      $KAME: altq_rio.c,v 1.8 2000/12/14 08:12:46 thorpej Exp $       */
    3 
    4 /*
    5  * Copyright (C) 1998-2000
    6  *      Sony Computer Science Laboratories Inc.  All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 /*
   30  * Copyright (c) 1990-1994 Regents of the University of California.
   31  * All rights reserved.
   32  *
   33  * Redistribution and use in source and binary forms, with or without
   34  * modification, are permitted provided that the following conditions
   35  * are met:
   36  * 1. Redistributions of source code must retain the above copyright
   37  *    notice, this list of conditions and the following disclaimer.
   38  * 2. Redistributions in binary form must reproduce the above copyright
   39  *    notice, this list of conditions and the following disclaimer in the
   40  *    documentation and/or other materials provided with the distribution.
   41  * 3. All advertising materials mentioning features or use of this software
   42  *    must display the following acknowledgement:
   43  *      This product includes software developed by the Computer Systems
   44  *      Engineering Group at Lawrence Berkeley Laboratory.
   45  * 4. Neither the name of the University nor of the Laboratory may be used
   46  *    to endorse or promote products derived from this software without
   47  *    specific prior written permission.
   48  *
   49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   59  * SUCH DAMAGE.
   60  */
   61 
   62 #include <sys/cdefs.h>
   63 __KERNEL_RCSID(0, "$NetBSD: altq_rio.c,v 1.6 2004/02/13 18:02:05 wiz Exp $");
   64 
   65 #if defined(__FreeBSD__) || defined(__NetBSD__)
   66 #include "opt_altq.h"
   67 #if (__FreeBSD__ != 2)
   68 #include "opt_inet.h"
   69 #ifdef __FreeBSD__
   70 #include "opt_inet6.h"
   71 #endif
   72 #endif
   73 #endif /* __FreeBSD__ || __NetBSD__ */
   74 #ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */
   75 
   76 #include <sys/param.h>
   77 #include <sys/malloc.h>
   78 #include <sys/mbuf.h>
   79 #include <sys/socket.h>
   80 #include <sys/sockio.h>
   81 #include <sys/systm.h>
   82 #include <sys/proc.h>
   83 #include <sys/errno.h>
   84 #include <sys/kernel.h>
   85 
   86 #include <net/if.h>
   87 #include <net/if_types.h>
   88 
   89 #include <netinet/in.h>
   90 #include <netinet/in_systm.h>
   91 #include <netinet/ip.h>
   92 #ifdef INET6
   93 #include <netinet/ip6.h>
   94 #endif
   95 
   96 #include <altq/altq.h>
   97 #include <altq/altq_conf.h>
   98 #include <altq/altq_cdnr.h>
   99 #include <altq/altq_red.h>
  100 #include <altq/altq_rio.h>
  101 
  102 /*
  103  * RIO: RED with IN/OUT bit
  104  *   described in
  105  *      "Explicit Allocation of Best Effort Packet Delivery Service"
  106  *      David D. Clark and Wenjia Fang, MIT Lab for Computer Science
  107  *      http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf}
  108  *
  109  * this implementation is extended to support more than 2 drop precedence
  110  * values as described in RFC2597 (Assured Forwarding PHB Group).
  111  *
  112  */
  113 /*
  114  * AF DS (differentiated service) codepoints.
  115  * (classes can be mapped to CBQ or H-FSC classes.)
  116  * 
  117  *      0   1   2   3   4   5   6   7
  118  *    +---+---+---+---+---+---+---+---+
  119  *    |   CLASS   |DropPre| 0 |  CU   |
  120  *    +---+---+---+---+---+---+---+---+
  121  *
  122  *    class 1: 001
  123  *    class 2: 010
  124  *    class 3: 011
  125  *    class 4: 100
  126  *
  127  *    low drop prec:    01
  128  *    medium drop prec: 10
  129  *    high drop prec:   01
  130  */
  131 
  132 /* normal red parameters */
  133 #define W_WEIGHT        512     /* inverse of weight of EWMA (511/512) */
  134                                 /* q_weight = 0.00195 */
  135 
  136 /* red parameters for a slow link */
  137 #define W_WEIGHT_1      128     /* inverse of weight of EWMA (127/128) */
  138                                 /* q_weight = 0.0078125 */
  139 
  140 /* red parameters for a very slow link (e.g., dialup) */
  141 #define W_WEIGHT_2      64      /* inverse of weight of EWMA (63/64) */
  142                                 /* q_weight = 0.015625 */
  143 
  144 /* fixed-point uses 12-bit decimal places */
  145 #define FP_SHIFT        12      /* fixed-point shift */
  146 
  147 /* red parameters for drop probability */
  148 #define INV_P_MAX       10      /* inverse of max drop probability */
  149 #define TH_MIN           5      /* min threshold */
  150 #define TH_MAX          15      /* max threshold */
  151 
  152 #define RIO_LIMIT       60      /* default max queue length */
  153 
  154 #define TV_DELTA(a, b, delta) {                                 \
  155         register int    xxs;                                    \
  156                                                                 \
  157         delta = (a)->tv_usec - (b)->tv_usec;                    \
  158         if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) {           \
  159                 if (xxs < 0) {                                  \
  160                         printf("rm_class: bogus time values");  \
  161                         delta = 60000000;                       \
  162                 } else if (xxs > 4)  {                          \
  163                         if (xxs > 60)                           \
  164                                 delta = 60000000;               \
  165                         else                                    \
  166                                 delta += xxs * 1000000;         \
  167                 } else while (xxs > 0) {                        \
  168                         delta += 1000000;                       \
  169                         xxs--;                                  \
  170                 }                                               \
  171         }                                                       \
  172 }
  173 
  174 /* rio_list keeps all rio_queue_t's allocated. */
  175 static rio_queue_t *rio_list = NULL;
  176 /* default rio parameter values */
  177 static struct redparams default_rio_params[RIO_NDROPPREC] = {
  178   /* th_min,             th_max,     inv_pmax */
  179   { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */
  180   { TH_MAX + TH_MIN,     TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */
  181   { TH_MIN,              TH_MAX,     INV_P_MAX }  /* high drop precedence */
  182 };
  183 
  184 /* internal function prototypes */
  185 static int rio_enqueue __P((struct ifaltq *, struct mbuf *,
  186                             struct altq_pktattr *));
  187 static struct mbuf *rio_dequeue __P((struct ifaltq *, int));
  188 static int rio_request __P((struct ifaltq *, int, void *));
  189 static int rio_detach __P((rio_queue_t *));
  190 static int dscp2index __P((u_int8_t));
  191 
  192 /*
  193  * rio device interface
  194  */
  195 altqdev_decl(rio);
  196 
  197 int
  198 rioopen(dev, flag, fmt, p)
  199         dev_t dev;
  200         int flag, fmt;
  201         struct proc *p;
  202 {
  203         /* everything will be done when the queueing scheme is attached. */
  204         return 0;
  205 }
  206 
  207 int
  208 rioclose(dev, flag, fmt, p)
  209         dev_t dev;
  210         int flag, fmt;
  211         struct proc *p;
  212 {
  213         rio_queue_t *rqp;
  214         int err, error = 0;
  215 
  216         while ((rqp = rio_list) != NULL) {
  217                 /* destroy all */
  218                 err = rio_detach(rqp);
  219                 if (err != 0 && error == 0)
  220                         error = err;
  221         }
  222 
  223         return error;
  224 }
  225 
  226 int
  227 rioioctl(dev, cmd, addr, flag, p)
  228         dev_t dev;
  229         ioctlcmd_t cmd;
  230         caddr_t addr;
  231         int flag;
  232         struct proc *p;
  233 {
  234         rio_queue_t *rqp;
  235         struct rio_interface *ifacep;
  236         struct ifnet *ifp;
  237         int     error = 0;
  238 
  239         /* check super-user privilege */
  240         switch (cmd) {
  241         case RIO_GETSTATS:
  242                 break;
  243         default:
  244 #if (__FreeBSD_version > 400000)
  245                 if ((error = suser(p)) != 0)
  246                         return (error);
  247 #else
  248                 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
  249                         return (error);
  250 #endif
  251                 break;
  252         }
  253     
  254         switch (cmd) {
  255 
  256         case RIO_ENABLE:
  257                 ifacep = (struct rio_interface *)addr;
  258                 if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
  259                         error = EBADF;
  260                         break;
  261                 }
  262                 error = altq_enable(rqp->rq_ifq);
  263                 break;
  264 
  265         case RIO_DISABLE:
  266                 ifacep = (struct rio_interface *)addr;
  267                 if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
  268                         error = EBADF;
  269                         break;
  270                 }
  271                 error = altq_disable(rqp->rq_ifq);
  272                 break;
  273 
  274         case RIO_IF_ATTACH:
  275                 ifp = ifunit(((struct rio_interface *)addr)->rio_ifname);
  276                 if (ifp == NULL) {
  277                         error = ENXIO;
  278                         break;
  279                 }
  280 
  281                 /* allocate and initialize rio_queue_t */
  282                 MALLOC(rqp, rio_queue_t *, sizeof(rio_queue_t), M_DEVBUF, M_WAITOK);
  283                 if (rqp == NULL) {
  284                         error = ENOMEM;
  285                         break;
  286                 }
  287                 (void)memset(rqp, 0, sizeof(rio_queue_t));
  288 
  289                 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
  290                        M_DEVBUF, M_WAITOK);
  291                 if (rqp->rq_q == NULL) {
  292                         FREE(rqp, M_DEVBUF);
  293                         error = ENOMEM;
  294                         break;
  295                 }
  296                 (void)memset(rqp->rq_q, 0, sizeof(class_queue_t));
  297 
  298                 rqp->rq_rio = rio_alloc(0, NULL, 0, 0);
  299                 if (rqp->rq_rio == NULL) {
  300                         FREE(rqp->rq_q, M_DEVBUF);
  301                         FREE(rqp, M_DEVBUF);
  302                         error = ENOMEM;
  303                         break;
  304                 }
  305 
  306                 rqp->rq_ifq = &ifp->if_snd;
  307                 qtail(rqp->rq_q) = NULL;
  308                 qlen(rqp->rq_q) = 0;
  309                 qlimit(rqp->rq_q) = RIO_LIMIT;
  310                 qtype(rqp->rq_q) = Q_RIO;
  311 
  312                 /*
  313                  * set RIO to this ifnet structure.
  314                  */
  315                 error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp,
  316                                     rio_enqueue, rio_dequeue, rio_request,
  317                                     NULL, NULL);
  318                 if (error) {
  319                         rio_destroy(rqp->rq_rio);
  320                         FREE(rqp->rq_q, M_DEVBUF);
  321                         FREE(rqp, M_DEVBUF);
  322                         break;
  323                 }
  324 
  325                 /* add this state to the rio list */
  326                 rqp->rq_next = rio_list;
  327                 rio_list = rqp;
  328                 break;
  329 
  330         case RIO_IF_DETACH:
  331                 ifacep = (struct rio_interface *)addr;
  332                 if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) {
  333                         error = EBADF;
  334                         break;
  335                 }
  336                 error = rio_detach(rqp);
  337                 break;
  338 
  339         case RIO_GETSTATS:
  340                 do {
  341                         struct rio_stats *q_stats;
  342                         rio_t *rp;
  343                         int i;
  344 
  345                         q_stats = (struct rio_stats *)addr;
  346                         if ((rqp = altq_lookup(q_stats->iface.rio_ifname,
  347                                                ALTQT_RIO)) == NULL) {
  348                                 error = EBADF;
  349                                 break;
  350                         }
  351 
  352                         rp = rqp->rq_rio;
  353 
  354                         q_stats->q_limit = qlimit(rqp->rq_q);
  355                         q_stats->weight = rp->rio_weight;
  356                         q_stats->flags = rp->rio_flags;
  357 
  358                         for (i = 0; i < RIO_NDROPPREC; i++) {
  359                                 q_stats->q_len[i] = rp->rio_precstate[i].qlen;
  360                                 (void)memcpy(&q_stats->q_stats[i],
  361                                     &rp->q_stats[i], sizeof(struct redstats));
  362                                 q_stats->q_stats[i].q_avg =
  363                                     rp->rio_precstate[i].avg >> rp->rio_wshift;
  364 
  365                                 q_stats->q_params[i].inv_pmax
  366                                         = rp->rio_precstate[i].inv_pmax;
  367                                 q_stats->q_params[i].th_min
  368                                         = rp->rio_precstate[i].th_min;
  369                                 q_stats->q_params[i].th_max
  370                                         = rp->rio_precstate[i].th_max;
  371                         }
  372                 } while (0);
  373                 break;
  374 
  375         case RIO_CONFIG:
  376                 do {
  377                         struct rio_conf *fc;
  378                         rio_t   *new;
  379                         int s, limit, i;
  380 
  381                         fc = (struct rio_conf *)addr;
  382                         if ((rqp = altq_lookup(fc->iface.rio_ifname,
  383                                                ALTQT_RIO)) == NULL) {
  384                                 error = EBADF;
  385                                 break;
  386                         }
  387 
  388                         new = rio_alloc(fc->rio_weight, &fc->q_params[0],
  389                                         fc->rio_flags, fc->rio_pkttime);
  390                         if (new == NULL) {
  391                                 error = ENOMEM;
  392                                 break;
  393                         }
  394 
  395                         s = splnet();
  396                         _flushq(rqp->rq_q);
  397                         limit = fc->rio_limit;
  398                         if (limit < fc->q_params[RIO_NDROPPREC-1].th_max)
  399                                 limit = fc->q_params[RIO_NDROPPREC-1].th_max;
  400                         qlimit(rqp->rq_q) = limit;
  401 
  402                         rio_destroy(rqp->rq_rio);
  403                         rqp->rq_rio = new;
  404 
  405                         splx(s);
  406 
  407                         /* write back new values */
  408                         fc->rio_limit = limit;
  409                         for (i = 0; i < RIO_NDROPPREC; i++) {
  410                                 fc->q_params[i].inv_pmax =
  411                                         rqp->rq_rio->rio_precstate[i].inv_pmax;
  412                                 fc->q_params[i].th_min =
  413                                         rqp->rq_rio->rio_precstate[i].th_min;
  414                                 fc->q_params[i].th_max =
  415                                         rqp->rq_rio->rio_precstate[i].th_max;
  416                         }
  417                 } while (0);
  418                 break;
  419 
  420         case RIO_SETDEFAULTS:
  421                 do {
  422                         struct redparams *rp;
  423                         int i;
  424 
  425                         rp = (struct redparams *)addr;
  426                         for (i = 0; i < RIO_NDROPPREC; i++)
  427                                 default_rio_params[i] = rp[i];
  428                 } while (0);
  429                 break;
  430 
  431         default:
  432                 error = EINVAL;
  433                 break;
  434         }
  435 
  436         return error;
  437 }
  438 
  439 static int
  440 rio_detach(rqp)
  441         rio_queue_t *rqp;
  442 {
  443         rio_queue_t *tmp;
  444         int error = 0;
  445 
  446         if (ALTQ_IS_ENABLED(rqp->rq_ifq))
  447                 altq_disable(rqp->rq_ifq);
  448 
  449         if ((error = altq_detach(rqp->rq_ifq)))
  450                 return (error);
  451 
  452         if (rio_list == rqp)
  453                 rio_list = rqp->rq_next;
  454         else {
  455                 for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next)
  456                         if (tmp->rq_next == rqp) {
  457                                 tmp->rq_next = rqp->rq_next;
  458                                 break;
  459                         }
  460                 if (tmp == NULL)
  461                         printf("rio_detach: no state found in rio_list!\n");
  462         }
  463 
  464         rio_destroy(rqp->rq_rio);
  465         FREE(rqp->rq_q, M_DEVBUF);
  466         FREE(rqp, M_DEVBUF);
  467         return (error);
  468 }
  469 
  470 /*
  471  * rio support routines
  472  */
  473 static int
  474 rio_request(ifq, req, arg)
  475         struct ifaltq *ifq;
  476         int req;
  477         void *arg;
  478 {
  479         rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
  480 
  481         switch (req) {
  482         case ALTRQ_PURGE:
  483                 _flushq(rqp->rq_q);
  484                 if (ALTQ_IS_ENABLED(ifq))
  485                         ifq->ifq_len = 0;
  486                 break;
  487         }
  488         return (0);
  489 }
  490 
  491 
  492 rio_t *
  493 rio_alloc(weight, params, flags, pkttime)
  494         int     weight;
  495         struct redparams *params;
  496         int     flags, pkttime;
  497 {
  498         rio_t   *rp;
  499         int     w, i;
  500         int     npkts_per_sec;
  501         
  502         MALLOC(rp, rio_t *, sizeof(rio_t), M_DEVBUF, M_WAITOK);
  503         if (rp == NULL)
  504                 return (NULL);
  505         (void)memset(rp, 0, sizeof(rio_t));
  506 
  507         rp->rio_flags = flags;
  508         if (pkttime == 0)
  509                 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
  510                 rp->rio_pkttime = 800;
  511         else 
  512                 rp->rio_pkttime = pkttime;
  513 
  514         if (weight != 0)
  515                 rp->rio_weight = weight;
  516         else {
  517                 /* use derfault */
  518                 rp->rio_weight = W_WEIGHT;
  519 
  520                 /* when the link is very slow, adjust red parameters */
  521                 npkts_per_sec = 1000000 / rp->rio_pkttime;
  522                 if (npkts_per_sec < 50) {
  523                         /* up to about 400Kbps */
  524                         rp->rio_weight = W_WEIGHT_2;
  525                 } else if (npkts_per_sec < 300) {
  526                         /* up to about 2.4Mbps */
  527                         rp->rio_weight = W_WEIGHT_1;
  528                 }
  529         }
  530 
  531         /* calculate wshift.  weight must be power of 2 */
  532         w = rp->rio_weight;
  533         for (i = 0; w > 1; i++)
  534                 w = w >> 1;
  535         rp->rio_wshift = i;
  536         w = 1 << rp->rio_wshift;
  537         if (w != rp->rio_weight) {
  538                 printf("invalid weight value %d for red! use %d\n",
  539                        rp->rio_weight, w);
  540                 rp->rio_weight = w;
  541         }
  542 
  543         /* allocate weight table */
  544         rp->rio_wtab = wtab_alloc(rp->rio_weight);
  545 
  546         for (i = 0; i < RIO_NDROPPREC; i++) {
  547                 struct dropprec_state *prec = &rp->rio_precstate[i];
  548 
  549                 prec->avg = 0;
  550                 prec->idle = 1;
  551 
  552                 if (params == NULL || params[i].inv_pmax == 0)
  553                         prec->inv_pmax = default_rio_params[i].inv_pmax;
  554                 else
  555                         prec->inv_pmax = params[i].inv_pmax;
  556                 if (params == NULL || params[i].th_min == 0)
  557                         prec->th_min = default_rio_params[i].th_min;
  558                 else
  559                         prec->th_min = params[i].th_min;
  560                 if (params == NULL || params[i].th_max == 0)
  561                         prec->th_max = default_rio_params[i].th_max;
  562                 else
  563                         prec->th_max = params[i].th_max;
  564 
  565                 /*
  566                  * th_min_s and th_max_s are scaled versions of th_min
  567                  * and th_max to be compared with avg.
  568                  */
  569                 prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT);
  570                 prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT);
  571 
  572                 /*
  573                  * precompute probability denominator
  574                  *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
  575                  */
  576                 prec->probd = (2 * (prec->th_max - prec->th_min)
  577                                * prec->inv_pmax) << FP_SHIFT;
  578 
  579                 microtime(&prec->last);
  580         }
  581 
  582         return (rp);
  583 }
  584 
  585 void
  586 rio_destroy(rp)
  587         rio_t *rp;
  588 {
  589         wtab_destroy(rp->rio_wtab);
  590         FREE(rp, M_DEVBUF);
  591 }
  592 
  593 void 
  594 rio_getstats(rp, sp)
  595         rio_t *rp;
  596         struct redstats *sp;
  597 {
  598         int i;
  599         
  600         for (i = 0; i < RIO_NDROPPREC; i++) {
  601                 (void)memcpy(sp, &rp->q_stats[i], sizeof(struct redstats));
  602                 sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift;
  603                 sp++;
  604         }
  605 }
  606 
  607 /*
  608  * enqueue routine:
  609  *
  610  *      returns: 0 when successfully queued.
  611  *               ENOBUFS when drop occurs.
  612  */
  613 static int
  614 rio_enqueue(ifq, m, pktattr)
  615         struct ifaltq *ifq;
  616         struct mbuf *m;
  617         struct altq_pktattr *pktattr;
  618 {
  619         rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
  620         int error = 0;
  621 
  622         if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0)
  623                 ifq->ifq_len++;
  624         else
  625                 error = ENOBUFS;
  626         return error;
  627 }
  628 
  629 #if (RIO_NDROPPREC == 3)
  630 /*
  631  * internally, a drop precedence value is converted to an index
  632  * starting from 0.
  633  */
  634 static int
  635 dscp2index(u_int8_t dscp)
  636 {
  637         int dpindex = dscp & AF_DROPPRECMASK;
  638 
  639         if (dpindex == 0)
  640                 return (0);
  641         return ((dpindex >> 3) - 1);
  642 }
  643 #endif
  644 
  645 #if 1
  646 /*
  647  * kludge: when a packet is dequeued, we need to know its drop precedence
  648  * in order to keep the queue length of each drop precedence.
  649  * use m_pkthdr.rcvif to pass this info.
  650  */
  651 #define RIOM_SET_PRECINDEX(m, idx)      \
  652         do { (m)->m_pkthdr.rcvif = (struct ifnet *)((long)(idx)); } while (0)
  653 #define RIOM_GET_PRECINDEX(m)   \
  654         ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \
  655         (m)->m_pkthdr.rcvif = NULL; idx; })
  656 #endif
  657 
  658 int
  659 rio_addq(rp, q, m, pktattr)
  660         rio_t *rp;
  661         class_queue_t *q;
  662         struct mbuf *m;
  663         struct altq_pktattr *pktattr;
  664 {
  665         int avg, droptype;
  666         u_int8_t dsfield, odsfield;
  667         int dpindex, i, n, t;
  668         struct timeval now;
  669         struct dropprec_state *prec;
  670 
  671         dsfield = odsfield = read_dsfield(m, pktattr);
  672         dpindex = dscp2index(dsfield);
  673 
  674         /*
  675          * update avg of the precedence states whose drop precedence
  676          * is larger than or equal to the drop precedence of the packet
  677          */
  678         now.tv_sec = 0;
  679         for (i = dpindex; i < RIO_NDROPPREC; i++) {
  680                 prec = &rp->rio_precstate[i];
  681                 avg = prec->avg;
  682                 if (prec->idle) {
  683                         prec->idle = 0;
  684                         if (now.tv_sec == 0)
  685                                 microtime(&now);
  686                         t = (now.tv_sec - prec->last.tv_sec);
  687                         if (t > 60)
  688                                 avg = 0;
  689                         else {
  690                                 t = t * 1000000 +
  691                                         (now.tv_usec - prec->last.tv_usec);
  692                                 n = t / rp->rio_pkttime;
  693                                 /* calculate (avg = (1 - Wq)^n * avg) */
  694                                 if (n > 0)
  695                                         avg = (avg >> FP_SHIFT) *
  696                                                 pow_w(rp->rio_wtab, n);
  697                         }
  698                 }
  699 
  700                 /* run estimator. (avg is scaled by WEIGHT in fixed-point) */
  701                 avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift);
  702                 prec->avg = avg;                /* save the new value */
  703                 /*
  704                  * count keeps a tally of arriving traffic that has not
  705                  * been dropped.
  706                  */
  707                 prec->count++;
  708         }
  709 
  710         prec = &rp->rio_precstate[dpindex];
  711         avg = prec->avg;
  712     
  713         /* see if we drop early */
  714         droptype = DTYPE_NODROP;
  715         if (avg >= prec->th_min_s && prec->qlen > 1) {
  716                 if (avg >= prec->th_max_s) {
  717                         /* avg >= th_max: forced drop */
  718                         droptype = DTYPE_FORCED;
  719                 } else if (prec->old == 0) {
  720                         /* first exceeds th_min */
  721                         prec->count = 1;
  722                         prec->old = 1;
  723                 } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift,
  724                                       prec->probd, prec->count)) {
  725                         /* unforced drop by red */
  726                         droptype = DTYPE_EARLY;
  727                 }
  728         } else {
  729                 /* avg < th_min */
  730                 prec->old = 0;
  731         }
  732 
  733         /*
  734          * if the queue length hits the hard limit, it's a forced drop.
  735          */
  736         if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
  737                 droptype = DTYPE_FORCED;
  738 
  739         if (droptype != DTYPE_NODROP) {
  740                 /* always drop incoming packet (as opposed to randomdrop) */
  741                 for (i = dpindex; i < RIO_NDROPPREC; i++)
  742                         rp->rio_precstate[i].count = 0;
  743 #ifdef RIO_STATS
  744                 if (droptype == DTYPE_EARLY)
  745                         rp->q_stats[dpindex].drop_unforced++;
  746                 else
  747                         rp->q_stats[dpindex].drop_forced++;
  748                 PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m));
  749 #endif
  750                 m_freem(m);
  751                 return (-1);
  752         }
  753 
  754         for (i = dpindex; i < RIO_NDROPPREC; i++)
  755                 rp->rio_precstate[i].qlen++;
  756 
  757         /* save drop precedence index in mbuf hdr */
  758         RIOM_SET_PRECINDEX(m, dpindex);
  759 
  760         if (rp->rio_flags & RIOF_CLEARDSCP)
  761                 dsfield &= ~DSCP_MASK;
  762 
  763         if (dsfield != odsfield)
  764                 write_dsfield(m, pktattr, dsfield);
  765 
  766         _addq(q, m);
  767 
  768 #ifdef RIO_STATS
  769         PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m));
  770 #endif
  771         return (0);
  772 }
  773 
  774 /*
  775  * dequeue routine:
  776  *      must be called in splnet.
  777  *
  778  *      returns: mbuf dequeued.
  779  *               NULL when no packet is available in the queue.
  780  */
  781 
  782 static struct mbuf *
  783 rio_dequeue(ifq, op)
  784         struct ifaltq *ifq;
  785         int op;
  786 {
  787         rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc;
  788         struct mbuf *m = NULL;
  789 
  790         if (op == ALTDQ_POLL)
  791                 return qhead(rqp->rq_q);
  792 
  793         m = rio_getq(rqp->rq_rio, rqp->rq_q);
  794         if (m != NULL)
  795                 ifq->ifq_len--;
  796         return m;
  797 }
  798 
  799 struct mbuf *
  800 rio_getq(rp, q)
  801         rio_t *rp;
  802         class_queue_t *q;
  803 {
  804         struct mbuf *m;
  805         int dpindex, i;
  806 
  807         if ((m = _getq(q)) == NULL)
  808                 return NULL;
  809 
  810         dpindex = RIOM_GET_PRECINDEX(m);
  811         for (i = dpindex; i < RIO_NDROPPREC; i++) {
  812                 if (--rp->rio_precstate[i].qlen == 0) {
  813                         if (rp->rio_precstate[i].idle == 0) {
  814                                 rp->rio_precstate[i].idle = 1;
  815                                 microtime(&rp->rio_precstate[i].last);
  816                         }
  817                 }
  818         }
  819         return (m);
  820 }
  821 
  822 #ifdef KLD_MODULE
  823 
  824 static struct altqsw rio_sw =
  825         {"rio", rioopen, rioclose, rioioctl};
  826 
  827 ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw);
  828 
  829 #endif /* KLD_MODULE */
  830 
  831 #endif /* ALTQ_RIO */

Cache object: 9c9fbef430b93c252b8e9da1d7c148c1


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.