The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/altq/altq_red.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: altq_red.c,v 1.10 2004/02/13 18:02:05 wiz Exp $        */
    2 /*      $KAME: altq_red.c,v 1.9 2002/01/07 11:25:40 kjc Exp $   */
    3 
    4 /*
    5  * Copyright (C) 1997-2000
    6  *      Sony Computer Science Laboratories Inc.  All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  *
   29  */
   30 /*
   31  * Copyright (c) 1990-1994 Regents of the University of California.
   32  * All rights reserved.
   33  *
   34  * Redistribution and use in source and binary forms, with or without
   35  * modification, are permitted provided that the following conditions
   36  * are met:
   37  * 1. Redistributions of source code must retain the above copyright
   38  *    notice, this list of conditions and the following disclaimer.
   39  * 2. Redistributions in binary form must reproduce the above copyright
   40  *    notice, this list of conditions and the following disclaimer in the
   41  *    documentation and/or other materials provided with the distribution.
   42  * 3. All advertising materials mentioning features or use of this software
   43  *    must display the following acknowledgement:
   44  *      This product includes software developed by the Computer Systems
   45  *      Engineering Group at Lawrence Berkeley Laboratory.
   46  * 4. Neither the name of the University nor of the Laboratory may be used
   47  *    to endorse or promote products derived from this software without
   48  *    specific prior written permission.
   49  *
   50  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   51  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   52  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   53  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   54  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   55  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   56  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   57  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   58  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   59  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   60  * SUCH DAMAGE.
   61  */
   62 
   63 #include <sys/cdefs.h>
   64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.10 2004/02/13 18:02:05 wiz Exp $");
   65 
   66 #if defined(__FreeBSD__) || defined(__NetBSD__)
   67 #include "opt_altq.h"
   68 #if (__FreeBSD__ != 2)
   69 #include "opt_inet.h"
   70 #ifdef __FreeBSD__
   71 #include "opt_inet6.h"
   72 #endif
   73 #endif
   74 #endif /* __FreeBSD__ || __NetBSD__ */
   75 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
   76 
   77 #include <sys/param.h>
   78 #include <sys/malloc.h>
   79 #include <sys/mbuf.h>
   80 #include <sys/socket.h>
   81 #include <sys/sockio.h>
   82 #include <sys/systm.h>
   83 #include <sys/proc.h>
   84 #include <sys/errno.h>
   85 #include <sys/kernel.h>
   86 #ifdef ALTQ_FLOWVALVE
   87 #include <sys/queue.h>
   88 #include <sys/time.h>
   89 #endif
   90 
   91 #include <net/if.h>
   92 #include <net/if_types.h>
   93 
   94 #include <netinet/in.h>
   95 #include <netinet/in_systm.h>
   96 #include <netinet/ip.h>
   97 #ifdef INET6
   98 #include <netinet/ip6.h>
   99 #endif
  100 
  101 #include <altq/altq.h>
  102 #include <altq/altq_conf.h>
  103 #include <altq/altq_red.h>
  104 #ifdef ALTQ_FLOWVALVE
  105 #include <altq/altq_flowvalve.h>
  106 #endif
  107 
  108 /*
  109  * ALTQ/RED (Random Early Detection) implementation using 32-bit
  110  * fixed-point calculation.
  111  *
  112  * written by kjc using the ns code as a reference.
  113  * you can learn more about red and ns from Sally's home page at
  114  * http://www-nrg.ee.lbl.gov/floyd/
  115  *
  116  * most of the red parameter values are fixed in this implementation
  117  * to prevent fixed-point overflow/underflow.
  118  * if you change the parameters, watch out for overflow/underflow!
  119  *
  120  * the parameters used are recommended values by Sally.
  121  * the corresponding ns config looks:
  122  *      q_weight=0.00195
  123  *      minthresh=5 maxthresh=15 queue-size=60
  124  *      linterm=30
  125  *      dropmech=drop-tail
  126  *      bytes=false (can't be handled by 32-bit fixed-point)
  127  *      doubleq=false dqthresh=false 
  128  *      wait=true
  129  */
  130 /*
  131  * alternative red parameters for a slow link.
  132  *
  133  * assume the queue length becomes from zero to L and keeps L, it takes
  134  * N packets for q_avg to reach 63% of L.
  135  * when q_weight is 0.002, N is about 500 packets.
  136  * for a slow link like dial-up, 500 packets takes more than 1 minute!
  137  * when q_weight is 0.008, N is about 127 packets.
  138  * when q_weight is 0.016, N is about 63 packets.
  139  * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
  140  * are allowed for 0.016.
  141  * see Sally's paper for more details.
  142  */
  143 /* normal red parameters */
  144 #define W_WEIGHT        512     /* inverse of weight of EWMA (511/512) */
  145                                 /* q_weight = 0.00195 */
  146 
  147 /* red parameters for a slow link */
  148 #define W_WEIGHT_1      128     /* inverse of weight of EWMA (127/128) */
  149                                 /* q_weight = 0.0078125 */
  150 
  151 /* red parameters for a very slow link (e.g., dialup) */
  152 #define W_WEIGHT_2      64      /* inverse of weight of EWMA (63/64) */
  153                                 /* q_weight = 0.015625 */
  154 
  155 /* fixed-point uses 12-bit decimal places */
  156 #define FP_SHIFT        12      /* fixed-point shift */
  157 
  158 /* red parameters for drop probability */
  159 #define INV_P_MAX       10      /* inverse of max drop probability */
  160 #define TH_MIN          5       /* min threshold */
  161 #define TH_MAX          15      /* max threshold */
  162 
  163 #define RED_LIMIT       60      /* default max queue length */
  164 
  165 /*
  166  * our default policy for forced-drop is drop-tail.
  167  * (in altq-1.1.2 or earlier, the default was random-drop.
  168  * but it makes more sense to punish the cause of the surge.)
  169  * to switch to the random-drop policy, define "RED_RANDOM_DROP".
  170  */
  171 
  172 #ifdef ALTQ_FLOWVALVE
  173 /*
  174  * flow-valve is an extension to protect red from unresponsive flows
  175  * and to promote end-to-end congestion control.
  176  * flow-valve observes the average drop rates of the flows that have
  177  * experienced packet drops in the recent past.
  178  * when the average drop rate exceeds the threshold, the flow is
  179  * blocked by the flow-valve.  the trapped flow should back off
  180  * exponentially to escape from the flow-valve.
  181  */
  182 #ifdef RED_RANDOM_DROP
  183 #error "random-drop can't be used with flow-valve!"
  184 #endif
  185 #endif /* ALTQ_FLOWVALVE */
  186 
  187 /* red_list keeps all red_queue_t's allocated. */
  188 static red_queue_t *red_list = NULL;
  189 
  190 /* default red parameter values */
  191 static int default_th_min = TH_MIN;
  192 static int default_th_max = TH_MAX;
  193 static int default_inv_pmax = INV_P_MAX;
  194 
  195 /* internal function prototypes */
  196 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
  197                             struct altq_pktattr *));
  198 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
  199 static int red_request __P((struct ifaltq *, int, void *));
  200 static void red_purgeq __P((red_queue_t *));
  201 static int red_detach __P((red_queue_t *));
  202 #ifdef ALTQ_FLOWVALVE
  203 static __inline struct fve *flowlist_lookup __P((struct flowvalve *,
  204                          struct altq_pktattr *, struct timeval *));
  205 static __inline struct fve *flowlist_reclaim __P((struct flowvalve *,
  206                                                   struct altq_pktattr *));
  207 static __inline void flowlist_move_to_head __P((struct flowvalve *,
  208                                                 struct fve *));
  209 static __inline int fv_p2f __P((struct flowvalve *, int));
  210 static struct flowvalve *fv_alloc __P((struct red *));
  211 static void fv_destroy __P((struct flowvalve *));
  212 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
  213                              struct fve **));
  214 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
  215                               struct fve *));
  216 #endif
  217 
  218 /*
  219  * red device interface
  220  */
  221 altqdev_decl(red);
  222 
  223 int
  224 redopen(dev, flag, fmt, p)
  225         dev_t dev;
  226         int flag, fmt;
  227         struct proc *p;
  228 {
  229         /* everything will be done when the queueing scheme is attached. */
  230         return 0;
  231 }
  232 
  233 int
  234 redclose(dev, flag, fmt, p)
  235         dev_t dev;
  236         int flag, fmt;
  237         struct proc *p;
  238 {
  239         red_queue_t *rqp;
  240         int err, error = 0;
  241 
  242         while ((rqp = red_list) != NULL) {
  243                 /* destroy all */
  244                 err = red_detach(rqp);
  245                 if (err != 0 && error == 0)
  246                         error = err;
  247         }
  248 
  249         return error;
  250 }
  251 
  252 int
  253 redioctl(dev, cmd, addr, flag, p)
  254         dev_t dev;
  255         ioctlcmd_t cmd;
  256         caddr_t addr;
  257         int flag;
  258         struct proc *p;
  259 {
  260         red_queue_t *rqp;
  261         struct red_interface *ifacep;
  262         struct ifnet *ifp;
  263         int     error = 0;
  264 
  265         /* check super-user privilege */
  266         switch (cmd) {
  267         case RED_GETSTATS:
  268                 break;
  269         default:
  270 #if (__FreeBSD_version > 400000)
  271                 if ((error = suser(p)) != 0)
  272 #else
  273                 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
  274 #endif
  275                         return (error);
  276                 break;
  277         }
  278     
  279         switch (cmd) {
  280 
  281         case RED_ENABLE:
  282                 ifacep = (struct red_interface *)addr;
  283                 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
  284                         error = EBADF;
  285                         break;
  286                 }
  287                 error = altq_enable(rqp->rq_ifq);
  288                 break;
  289 
  290         case RED_DISABLE:
  291                 ifacep = (struct red_interface *)addr;
  292                 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
  293                         error = EBADF;
  294                         break;
  295                 }
  296                 error = altq_disable(rqp->rq_ifq);
  297                 break;
  298 
  299         case RED_IF_ATTACH:
  300                 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
  301                 if (ifp == NULL) {
  302                         error = ENXIO;
  303                         break;
  304                 }
  305 
  306                 /* allocate and initialize red_queue_t */
  307                 MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
  308                 if (rqp == NULL) {
  309                         error = ENOMEM;
  310                         break;
  311                 }
  312                 (void)memset(rqp, 0, sizeof(red_queue_t));
  313 
  314                 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
  315                        M_DEVBUF, M_WAITOK);
  316                 if (rqp->rq_q == NULL) {
  317                         FREE(rqp, M_DEVBUF);
  318                         error = ENOMEM;
  319                         break;
  320                 }
  321                 (void)memset(rqp->rq_q, 0, sizeof(class_queue_t));
  322 
  323                 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
  324                 if (rqp->rq_red == NULL) {
  325                         FREE(rqp->rq_q, M_DEVBUF);
  326                         FREE(rqp, M_DEVBUF);
  327                         error = ENOMEM;
  328                         break;
  329                 }
  330 
  331                 rqp->rq_ifq = &ifp->if_snd;
  332                 qtail(rqp->rq_q) = NULL;
  333                 qlen(rqp->rq_q) = 0;
  334                 qlimit(rqp->rq_q) = RED_LIMIT;
  335                 qtype(rqp->rq_q) = Q_RED;
  336 
  337                 /*
  338                  * set RED to this ifnet structure.
  339                  */
  340                 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
  341                                     red_enqueue, red_dequeue, red_request,
  342                                     NULL, NULL);
  343                 if (error) {
  344                         red_destroy(rqp->rq_red);
  345                         FREE(rqp->rq_q, M_DEVBUF);
  346                         FREE(rqp, M_DEVBUF);
  347                         break;
  348                 }
  349 
  350                 /* add this state to the red list */
  351                 rqp->rq_next = red_list;
  352                 red_list = rqp;
  353                 break;
  354 
  355         case RED_IF_DETACH:
  356                 ifacep = (struct red_interface *)addr;
  357                 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
  358                         error = EBADF;
  359                         break;
  360                 }
  361                 error = red_detach(rqp);
  362                 break;
  363 
  364         case RED_GETSTATS:
  365                 do {
  366                         struct red_stats *q_stats;
  367                         red_t *rp;
  368 
  369                         q_stats = (struct red_stats *)addr;
  370                         if ((rqp = altq_lookup(q_stats->iface.red_ifname,
  371                                              ALTQT_RED)) == NULL) {
  372                                 error = EBADF;
  373                                 break;
  374                         }
  375 
  376                         q_stats->q_len     = qlen(rqp->rq_q);
  377                         q_stats->q_limit   = qlimit(rqp->rq_q);
  378 
  379                         rp = rqp->rq_red;
  380                         q_stats->q_avg     = rp->red_avg >> rp->red_wshift;
  381                         q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
  382                         q_stats->drop_cnt  = rp->red_stats.drop_cnt;
  383                         q_stats->drop_forced   = rp->red_stats.drop_forced;
  384                         q_stats->drop_unforced = rp->red_stats.drop_unforced;
  385                         q_stats->marked_packets = rp->red_stats.marked_packets;
  386 
  387                         q_stats->weight         = rp->red_weight;
  388                         q_stats->inv_pmax       = rp->red_inv_pmax;
  389                         q_stats->th_min         = rp->red_thmin;
  390                         q_stats->th_max         = rp->red_thmax;
  391 
  392 #ifdef ALTQ_FLOWVALVE
  393                         if (rp->red_flowvalve != NULL) {
  394                                 struct flowvalve *fv = rp->red_flowvalve;
  395                                 q_stats->fv_flows    = fv->fv_flows;
  396                                 q_stats->fv_pass     = fv->fv_stats.pass;
  397                                 q_stats->fv_predrop  = fv->fv_stats.predrop;
  398                                 q_stats->fv_alloc    = fv->fv_stats.alloc;
  399                                 q_stats->fv_escape   = fv->fv_stats.escape;
  400                         } else {
  401 #endif /* ALTQ_FLOWVALVE */
  402                                 q_stats->fv_flows    = 0;
  403                                 q_stats->fv_pass     = 0;
  404                                 q_stats->fv_predrop  = 0;
  405                                 q_stats->fv_alloc    = 0;
  406                                 q_stats->fv_escape   = 0;
  407 #ifdef ALTQ_FLOWVALVE
  408                         }
  409 #endif /* ALTQ_FLOWVALVE */
  410                 } while (0);
  411                 break;
  412 
  413         case RED_CONFIG:
  414                 do {
  415                         struct red_conf *fc;
  416                         red_t *new;
  417                         int s, limit;
  418 
  419                         fc = (struct red_conf *)addr;
  420                         if ((rqp = altq_lookup(fc->iface.red_ifname,
  421                                                ALTQT_RED)) == NULL) {
  422                                 error = EBADF;
  423                                 break;
  424                         }
  425                         new = red_alloc(fc->red_weight,
  426                                         fc->red_inv_pmax,
  427                                         fc->red_thmin,
  428                                         fc->red_thmax,
  429                                         fc->red_flags,
  430                                         fc->red_pkttime);
  431                         if (new == NULL) {
  432                                 error = ENOMEM;
  433                                 break;
  434                         }
  435 
  436                         s = splnet();
  437                         red_purgeq(rqp);
  438                         limit = fc->red_limit;
  439                         if (limit < fc->red_thmax)
  440                                 limit = fc->red_thmax;
  441                         qlimit(rqp->rq_q) = limit;
  442                         fc->red_limit = limit;  /* write back the new value */
  443 
  444                         red_destroy(rqp->rq_red);
  445                         rqp->rq_red = new;
  446 
  447                         splx(s);
  448 
  449                         /* write back new values */
  450                         fc->red_limit = limit;
  451                         fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
  452                         fc->red_thmin = rqp->rq_red->red_thmin;
  453                         fc->red_thmax = rqp->rq_red->red_thmax;
  454 
  455                 } while (0);
  456                 break;
  457 
  458         case RED_SETDEFAULTS:
  459                 do {
  460                         struct redparams *rp;
  461 
  462                         rp = (struct redparams *)addr;
  463 
  464                         default_th_min = rp->th_min;
  465                         default_th_max = rp->th_max;
  466                         default_inv_pmax = rp->inv_pmax;
  467                 } while (0);
  468                 break;
  469                 
  470         default:
  471                 error = EINVAL;
  472                 break;
  473         }
  474         return error;
  475 }
  476 
  477 static int
  478 red_detach(rqp)
  479         red_queue_t *rqp;
  480 {
  481         red_queue_t *tmp;
  482         int error = 0;
  483 
  484         if (ALTQ_IS_ENABLED(rqp->rq_ifq))
  485                 altq_disable(rqp->rq_ifq);
  486 
  487         if ((error = altq_detach(rqp->rq_ifq)))
  488                 return (error);
  489 
  490         if (red_list == rqp)
  491                 red_list = rqp->rq_next;
  492         else {
  493                 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
  494                         if (tmp->rq_next == rqp) {
  495                                 tmp->rq_next = rqp->rq_next;
  496                                 break;
  497                         }
  498                 if (tmp == NULL)
  499                         printf("red_detach: no state found in red_list!\n");
  500         }
  501 
  502         red_destroy(rqp->rq_red);
  503         FREE(rqp->rq_q, M_DEVBUF);
  504         FREE(rqp, M_DEVBUF);
  505         return (error);
  506 }
  507 
  508 /*
  509  * red support routines
  510  */
  511 
  512 red_t *
  513 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
  514         int     weight, inv_pmax, th_min, th_max;
  515         int     flags, pkttime;
  516 {
  517         red_t   *rp;
  518         int     w, i;
  519         int     npkts_per_sec;
  520         
  521         MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
  522         if (rp == NULL)
  523                 return (NULL);
  524         (void)memset(rp, 0, sizeof(red_t));
  525 
  526         rp->red_avg = 0;
  527         rp->red_idle = 1;
  528 
  529         if (weight == 0)
  530                 rp->red_weight = W_WEIGHT;
  531         else
  532                 rp->red_weight = weight;
  533         if (inv_pmax == 0)
  534                 rp->red_inv_pmax = default_inv_pmax;
  535         else
  536                 rp->red_inv_pmax = inv_pmax;
  537         if (th_min == 0)
  538                 rp->red_thmin = default_th_min;
  539         else
  540                 rp->red_thmin = th_min;
  541         if (th_max == 0)
  542                 rp->red_thmax = default_th_max;
  543         else
  544                 rp->red_thmax = th_max;
  545 
  546         rp->red_flags = flags;
  547 
  548         if (pkttime == 0)
  549                 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
  550                 rp->red_pkttime = 800;
  551         else 
  552                 rp->red_pkttime = pkttime;
  553 
  554         if (weight == 0) {
  555                 /* when the link is very slow, adjust red parameters */
  556                 npkts_per_sec = 1000000 / rp->red_pkttime;
  557                 if (npkts_per_sec < 50) {
  558                         /* up to about 400Kbps */
  559                         rp->red_weight = W_WEIGHT_2;
  560                 } else if (npkts_per_sec < 300) {
  561                         /* up to about 2.4Mbps */
  562                         rp->red_weight = W_WEIGHT_1;
  563                 }
  564         }
  565 
  566         /* calculate wshift.  weight must be power of 2 */
  567         w = rp->red_weight;
  568         for (i = 0; w > 1; i++)
  569                 w = w >> 1;
  570         rp->red_wshift = i;
  571         w = 1 << rp->red_wshift;
  572         if (w != rp->red_weight) {
  573                 printf("invalid weight value %d for red! use %d\n",
  574                        rp->red_weight, w);
  575                 rp->red_weight = w;
  576         }
  577         
  578         /*
  579          * thmin_s and thmax_s are scaled versions of th_min and th_max
  580          * to be compared with avg.
  581          */
  582         rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
  583         rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
  584 
  585         /*
  586          * precompute probability denominator
  587          *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
  588          */
  589         rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
  590                          * rp->red_inv_pmax) << FP_SHIFT;
  591 
  592         /* allocate weight table */
  593         rp->red_wtab = wtab_alloc(rp->red_weight);
  594 
  595         microtime(&rp->red_last);
  596 #ifdef ALTQ_FLOWVALVE
  597         if (flags & REDF_FLOWVALVE)
  598                 rp->red_flowvalve = fv_alloc(rp);
  599         /* if fv_alloc failes, flowvalve is just disabled */
  600 #endif
  601         return (rp);
  602 }
  603 
  604 void
  605 red_destroy(rp)
  606         red_t *rp;
  607 {
  608 #ifdef ALTQ_FLOWVALVE
  609         if (rp->red_flowvalve != NULL)
  610                 fv_destroy(rp->red_flowvalve);
  611 #endif
  612         wtab_destroy(rp->red_wtab);
  613         FREE(rp, M_DEVBUF);
  614 }
  615 
  616 void 
  617 red_getstats(rp, sp)
  618         red_t *rp;
  619         struct redstats *sp;
  620 {
  621         sp->q_avg               = rp->red_avg >> rp->red_wshift;
  622         sp->xmit_cnt            = rp->red_stats.xmit_cnt;
  623         sp->drop_cnt            = rp->red_stats.drop_cnt;
  624         sp->drop_forced         = rp->red_stats.drop_forced;
  625         sp->drop_unforced       = rp->red_stats.drop_unforced;
  626         sp->marked_packets      = rp->red_stats.marked_packets;
  627 }
  628 
  629 /*
  630  * enqueue routine:
  631  *
  632  *      returns: 0 when successfully queued.
  633  *               ENOBUFS when drop occurs.
  634  */
  635 static int
  636 red_enqueue(ifq, m, pktattr)
  637         struct ifaltq *ifq;
  638         struct mbuf *m;
  639         struct altq_pktattr *pktattr;
  640 {
  641         red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
  642 
  643         if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
  644                 return ENOBUFS;
  645         ifq->ifq_len++;
  646         return 0;
  647 }
  648 
  649 int
  650 red_addq(rp, q, m, pktattr)
  651         red_t *rp;
  652         class_queue_t *q;
  653         struct mbuf *m;
  654         struct altq_pktattr *pktattr;
  655 {
  656         int avg, droptype;
  657         int n;
  658 #ifdef ALTQ_FLOWVALVE
  659         struct fve *fve = NULL;
  660 
  661         if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
  662                 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
  663                         m_freem(m);
  664                         return (-1);
  665                 }
  666 #endif
  667 
  668         avg = rp->red_avg;
  669 
  670         /*
  671          * if we were idle, we pretend that n packets arrived during
  672          * the idle period.
  673          */
  674         if (rp->red_idle) {
  675                 struct timeval now;
  676                 int t;
  677 
  678                 rp->red_idle = 0;
  679                 microtime(&now);
  680                 t = (now.tv_sec - rp->red_last.tv_sec);
  681                 if (t > 60) {
  682                         /*
  683                          * being idle for more than 1 minute, set avg to zero.
  684                          * this prevents t from overflow.
  685                          */
  686                         avg = 0;
  687                 } else {
  688                         t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
  689                         n = t / rp->red_pkttime - 1;
  690 
  691                         /* the following line does (avg = (1 - Wq)^n * avg) */
  692                         if (n > 0)
  693                                 avg = (avg >> FP_SHIFT) *
  694                                     pow_w(rp->red_wtab, n);
  695                 }
  696         }
  697 
  698         /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
  699         avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
  700         rp->red_avg = avg;              /* save the new value */
  701 
  702         /*
  703          * red_count keeps a tally of arriving traffic that has not
  704          * been dropped.
  705          */
  706         rp->red_count++;
  707 
  708         /* see if we drop early */
  709         droptype = DTYPE_NODROP;
  710         if (avg >= rp->red_thmin_s && qlen(q) > 1) {
  711                 if (avg >= rp->red_thmax_s) {
  712                         /* avg >= th_max: forced drop */
  713                         droptype = DTYPE_FORCED;
  714                 } else if (rp->red_old == 0) {
  715                         /* first exceeds th_min */
  716                         rp->red_count = 1;
  717                         rp->red_old = 1;
  718                 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
  719                                       rp->red_probd, rp->red_count)) {
  720                         /* mark or drop by red */
  721                         if ((rp->red_flags & REDF_ECN) &&
  722                             mark_ecn(m, pktattr, rp->red_flags)) {
  723                                 /* successfully marked.  do not drop. */
  724                                 rp->red_count = 0;
  725 #ifdef RED_STATS
  726                                 rp->red_stats.marked_packets++;
  727 #endif
  728                         } else { 
  729                                 /* unforced drop by red */
  730                                 droptype = DTYPE_EARLY;
  731                         }
  732                 }
  733         } else {
  734                 /* avg < th_min */
  735                 rp->red_old = 0;
  736         }
  737 
  738         /*
  739          * if the queue length hits the hard limit, it's a forced drop.
  740          */
  741         if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
  742                 droptype = DTYPE_FORCED;
  743 
  744 #ifdef RED_RANDOM_DROP
  745         /* if successful or forced drop, enqueue this packet. */
  746         if (droptype != DTYPE_EARLY)
  747                 _addq(q, m);
  748 #else
  749         /* if successful, enqueue this packet. */
  750         if (droptype == DTYPE_NODROP)
  751                 _addq(q, m);
  752 #endif
  753         if (droptype != DTYPE_NODROP) {
  754                 if (droptype == DTYPE_EARLY) {
  755                         /* drop the incoming packet */
  756 #ifdef RED_STATS
  757                         rp->red_stats.drop_unforced++;
  758 #endif
  759                 } else {
  760                         /* forced drop, select a victim packet in the queue. */
  761 #ifdef RED_RANDOM_DROP
  762                         m = _getq_random(q);
  763 #endif
  764 #ifdef RED_STATS
  765                         rp->red_stats.drop_forced++;
  766 #endif
  767                 }
  768 #ifdef RED_STATS
  769                 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
  770 #endif
  771                 rp->red_count = 0;
  772 #ifdef ALTQ_FLOWVALVE
  773                 if (rp->red_flowvalve != NULL)
  774                         fv_dropbyred(rp->red_flowvalve, pktattr, fve);
  775 #endif
  776                 m_freem(m);
  777                 return (-1);
  778         }
  779         /* successfully queued */
  780 #ifdef RED_STATS
  781         PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
  782 #endif
  783         return (0);
  784 }
  785 
  786 /*
  787  * early-drop probability is calculated as follows:
  788  *   prob = p_max * (avg - th_min) / (th_max - th_min)
  789  *   prob_a = prob / (2 - count*prob)
  790  *          = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
  791  * here prob_a increases as successive undrop count increases.
  792  * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
  793  * becomes 1 when (count >= (2 / prob))).
  794  */
  795 int
  796 drop_early(fp_len, fp_probd, count)
  797         int fp_len;     /* (avg - TH_MIN) in fixed-point */
  798         int fp_probd;   /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
  799         int count;      /* how many successive undropped packets */
  800 {
  801         int d;          /* denominator of drop-probability */
  802 
  803         d = fp_probd - count * fp_len;
  804         if (d <= 0)
  805                 /* count exceeds the hard limit: drop or mark */
  806                 return (1);
  807 
  808         /*
  809          * now the range of d is [1..600] in fixed-point. (when
  810          * th_max-th_min=10 and p_max=1/30)
  811          * drop probability = (avg - TH_MIN) / d
  812          */
  813 
  814         if ((random() % d) < fp_len) {
  815                 /* drop or mark */
  816                 return (1);
  817         }
  818         /* no drop/mark */
  819         return (0);
  820 }
  821 
  822 /*
  823  * try to mark CE bit to the packet.
  824  *    returns 1 if successfully marked, 0 otherwise.
  825  */
  826 int
  827 mark_ecn(m, pktattr, flags)
  828         struct mbuf *m;
  829         struct altq_pktattr *pktattr;
  830         int flags;
  831 {
  832         struct mbuf *m0;
  833 
  834         if (pktattr == NULL ||
  835             (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
  836                 return (0);
  837 
  838         /* verify that pattr_hdr is within the mbuf data */
  839         for (m0 = m; m0 != NULL; m0 = m0->m_next)
  840                 if ((pktattr->pattr_hdr >= m0->m_data) &&
  841                     (pktattr->pattr_hdr < m0->m_data + m0->m_len))
  842                         break;
  843         if (m0 == NULL) {
  844                 /* ick, pattr_hdr is stale */
  845                 pktattr->pattr_af = AF_UNSPEC;
  846                 return (0);
  847         }
  848 
  849         switch (pktattr->pattr_af) {
  850         case AF_INET:
  851                 if (flags & REDF_ECN4) {
  852                         struct ip *ip = (struct ip *)pktattr->pattr_hdr;
  853                         u_int8_t otos;
  854                         int sum;
  855             
  856                         if (ip->ip_v != 4)
  857                                 return (0);     /* version mismatch! */
  858 
  859                         if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
  860                                 return (0);     /* not-ECT */
  861                         if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
  862                                 return (1);     /* already marked */
  863 
  864                         /*
  865                          * ecn-capable but not marked,
  866                          * mark CE and update checksum
  867                          */
  868                         otos = ip->ip_tos;
  869                         ip->ip_tos |= IPTOS_ECN_CE;
  870                         /*
  871                          * update checksum (from RFC1624)
  872                          *         HC' = ~(~HC + ~m + m')
  873                          */
  874                         sum = ~ntohs(ip->ip_sum) & 0xffff;
  875                         sum += (~otos & 0xffff) + ip->ip_tos;
  876                         sum = (sum >> 16) + (sum & 0xffff);
  877                         sum += (sum >> 16);  /* add carry */
  878                         ip->ip_sum = htons(~sum & 0xffff);
  879                         return (1);
  880                 }
  881                 break;
  882 #ifdef INET6
  883         case AF_INET6:
  884                 if (flags & REDF_ECN6) {
  885                         struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
  886                         u_int32_t flowlabel;
  887 
  888                         flowlabel = ntohl(ip6->ip6_flow);
  889                         if ((flowlabel >> 28) != 6)
  890                                 return (0);     /* version mismatch! */
  891                         if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
  892                             (IPTOS_ECN_NOTECT << 20))
  893                                 return (0);     /* not-ECT */
  894                         if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
  895                             (IPTOS_ECN_CE << 20))
  896                                 return (1);     /* already marked */
  897                         /*
  898                          * ecn-capable but not marked,  mark CE
  899                          */
  900                         flowlabel |= (IPTOS_ECN_CE << 20);
  901                         ip6->ip6_flow = htonl(flowlabel);
  902                         return (1);
  903                 }
  904                 break;
  905 #endif  /* INET6 */
  906         }
  907 
  908         /* not marked */
  909         return (0);
  910 }
  911 
  912 /*
  913  * dequeue routine:
  914  *      must be called in splnet.
  915  *
  916  *      returns: mbuf dequeued.
  917  *               NULL when no packet is available in the queue.
  918  */
  919 
  920 static struct mbuf *
  921 red_dequeue(ifq, op)
  922         struct ifaltq *ifq;
  923         int op;
  924 {
  925         red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
  926         struct mbuf *m;
  927 
  928         if (op == ALTDQ_POLL)
  929                 return qhead(rqp->rq_q);
  930 
  931         /* op == ALTDQ_REMOVE */
  932         m =  red_getq(rqp->rq_red, rqp->rq_q);
  933         if (m != NULL)
  934                 ifq->ifq_len--;
  935         return (m);
  936 }
  937 
  938 struct mbuf *
  939 red_getq(rp, q)
  940         red_t *rp;
  941         class_queue_t *q;
  942 {
  943         struct mbuf *m;
  944         
  945         if ((m = _getq(q)) == NULL) {
  946                 if (rp->red_idle == 0) {
  947                         rp->red_idle = 1;
  948                         microtime(&rp->red_last);
  949                 }
  950                 return NULL;
  951         }
  952 
  953         rp->red_idle = 0;
  954         return (m);
  955 }
  956 
  957 static int
  958 red_request(ifq, req, arg)
  959         struct ifaltq *ifq;
  960         int req;
  961         void *arg;
  962 {
  963         red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
  964 
  965         switch (req) {
  966         case ALTRQ_PURGE:
  967                 red_purgeq(rqp);
  968                 break;
  969         }
  970         return (0);
  971 }
  972 
  973 static void
  974 red_purgeq(rqp)
  975         red_queue_t *rqp;
  976 {
  977         _flushq(rqp->rq_q);
  978         if (ALTQ_IS_ENABLED(rqp->rq_ifq))
  979                 rqp->rq_ifq->ifq_len = 0;
  980 }
  981 
  982 
  983 /*
  984  * helper routine to calibrate avg during idle.
  985  * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
  986  * here Wq = 1/weight and the code assumes Wq is close to zero.
  987  *
  988  * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
  989  */
  990 static struct wtab *wtab_list = NULL;   /* pointer to wtab list */
  991 
  992 struct wtab *
  993 wtab_alloc(weight)
  994         int weight;
  995 {
  996         struct wtab *w;
  997         int i;
  998 
  999         for (w = wtab_list; w != NULL; w = w->w_next)
 1000                 if (w->w_weight == weight) {
 1001                         w->w_refcount++;
 1002                         return (w);
 1003                 }
 1004 
 1005         MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
 1006         if (w == NULL)
 1007                 panic("wtab_alloc: malloc failed!");
 1008         (void)memset(w, 0, sizeof(struct wtab));
 1009         w->w_weight = weight;
 1010         w->w_refcount = 1;
 1011         w->w_next = wtab_list;
 1012         wtab_list = w;
 1013 
 1014         /* initialize the weight table */
 1015         w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
 1016         for (i = 1; i < 32; i++) {
 1017                 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
 1018                 if (w->w_tab[i] == 0 && w->w_param_max == 0)
 1019                         w->w_param_max = 1 << i;
 1020         }
 1021 
 1022         return (w);
 1023 }
 1024 
 1025 int 
 1026 wtab_destroy(w)
 1027         struct wtab *w;
 1028 {
 1029         struct wtab *prev;
 1030 
 1031         if (--w->w_refcount > 0)
 1032                 return (0);
 1033 
 1034         if (wtab_list == w)
 1035                 wtab_list = w->w_next;
 1036         else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
 1037                 if (prev->w_next == w) {
 1038                         prev->w_next = w->w_next;
 1039                         break;
 1040                 }
 1041 
 1042         FREE(w, M_DEVBUF);
 1043         return (0);
 1044 }
 1045 
 1046 int32_t 
 1047 pow_w(w, n)
 1048         struct wtab *w;
 1049         int n;
 1050 {
 1051         int     i, bit;
 1052         int32_t val;
 1053 
 1054         if (n >= w->w_param_max)
 1055                 return (0);
 1056 
 1057         val = 1 << FP_SHIFT;
 1058         if (n <= 0)
 1059                 return (val);
 1060 
 1061         bit = 1;
 1062         i = 0;
 1063         while (n) {
 1064                 if (n & bit) {
 1065                         val = (val * w->w_tab[i]) >> FP_SHIFT;
 1066                         n &= ~bit;
 1067                 }
 1068                 i++;
 1069                 bit <<=  1;
 1070         }
 1071         return (val);
 1072 }
 1073 
 1074 #ifdef ALTQ_FLOWVALVE
 1075 
 1076 #define FV_PSHIFT       7       /* weight of average drop rate -- 1/128 */
 1077 #define FV_PSCALE(x)    ((x) << FV_PSHIFT)
 1078 #define FV_PUNSCALE(x)  ((x) >> FV_PSHIFT)
 1079 #define FV_FSHIFT       5       /* weight of average fraction -- 1/32 */
 1080 #define FV_FSCALE(x)    ((x) << FV_FSHIFT)
 1081 #define FV_FUNSCALE(x)  ((x) >> FV_FSHIFT)
 1082 
 1083 #define FV_TIMER        (3 * hz)        /* timer value for garbage collector */
 1084 #define FV_FLOWLISTSIZE         64      /* how many flows in flowlist */
 1085 
 1086 #define FV_N                    10      /* update fve_f every FV_N packets */
 1087 
 1088 #define FV_BACKOFFTHRESH        1  /* backoff threshold interval in second */
 1089 #define FV_TTHRESH              3  /* time threshold to delete fve */  
 1090 #define FV_ALPHA                5  /* extra packet count */
 1091 
 1092 #if (__FreeBSD_version > 300000)
 1093 #define FV_TIMESTAMP(tp)        getmicrotime(tp)
 1094 #else
 1095 #define FV_TIMESTAMP(tp)        { (*(tp)) = time; }
 1096 #endif
 1097 
 1098 /*
 1099  * Brtt table: 127 entry table to convert drop rate (p) to
 1100  * the corresponding bandwidth fraction (f)
 1101  * the following equation is implemented to use scaled values,
 1102  * fve_p and fve_f, in the fixed point format.
 1103  *
 1104  *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
 1105  *   f = Brtt(p) / (max_th + alpha)
 1106  */
 1107 #define BRTT_SIZE       128
 1108 #define BRTT_SHIFT      12
 1109 #define BRTT_MASK       0x0007f000
 1110 #define BRTT_PMAX       (1 << (FV_PSHIFT + FP_SHIFT))
 1111 
 1112 const int brtt_tab[BRTT_SIZE] = {
 1113         0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 
 1114         392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 
 1115         225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 
 1116         145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 
 1117         98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 
 1118         67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 
 1119         47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 
 1120         33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 
 1121         24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 
 1122         18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 
 1123         14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 
 1124         10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 
 1125         8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 
 1126         6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 
 1127         5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 
 1128         4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
 1129 };
 1130 
 1131 static __inline struct fve *
 1132 flowlist_lookup(fv, pktattr, now)
 1133         struct flowvalve *fv;
 1134         struct altq_pktattr *pktattr;
 1135         struct timeval *now;
 1136 {
 1137         struct fve *fve;
 1138         int flows;
 1139         struct ip *ip;
 1140 #ifdef INET6
 1141         struct ip6_hdr *ip6;
 1142 #endif
 1143         struct timeval tthresh;
 1144 
 1145         if (pktattr == NULL)
 1146                 return (NULL);
 1147 
 1148         tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
 1149         flows = 0;
 1150         /*
 1151          * search the flow list
 1152          */
 1153         switch (pktattr->pattr_af) {
 1154         case AF_INET:
 1155                 ip = (struct ip *)pktattr->pattr_hdr;
 1156                 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
 1157                         if (fve->fve_lastdrop.tv_sec == 0)
 1158                                 break;
 1159                         if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
 1160                                 fve->fve_lastdrop.tv_sec = 0;
 1161                                 break;
 1162                         }
 1163                         if (fve->fve_flow.flow_af == AF_INET &&
 1164                             fve->fve_flow.flow_ip.ip_src.s_addr ==
 1165                             ip->ip_src.s_addr &&
 1166                             fve->fve_flow.flow_ip.ip_dst.s_addr ==
 1167                             ip->ip_dst.s_addr)
 1168                                 return (fve);
 1169                         flows++;
 1170                 }
 1171                 break;
 1172 #ifdef INET6
 1173         case AF_INET6:
 1174                 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
 1175                 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
 1176                         if (fve->fve_lastdrop.tv_sec == 0)
 1177                                 break;
 1178                         if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
 1179                                 fve->fve_lastdrop.tv_sec = 0;
 1180                                 break;
 1181                         }
 1182                         if (fve->fve_flow.flow_af == AF_INET6 &&
 1183                             IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
 1184                                                &ip6->ip6_src) &&
 1185                             IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
 1186                                                &ip6->ip6_dst))
 1187                                 return (fve);
 1188                         flows++;
 1189                 }
 1190                 break;
 1191 #endif /* INET6 */
 1192 
 1193         default:
 1194                 /* unknown protocol.  no drop. */
 1195                 return (NULL);
 1196         }
 1197         fv->fv_flows = flows;   /* save the number of active fve's */
 1198         return (NULL);
 1199 }
 1200 
 1201 static __inline struct fve *
 1202 flowlist_reclaim(fv, pktattr)
 1203         struct flowvalve *fv;
 1204         struct altq_pktattr *pktattr;
 1205 {
 1206         struct fve *fve;
 1207         struct ip *ip;
 1208 #ifdef INET6
 1209         struct ip6_hdr *ip6;
 1210 #endif
 1211 
 1212         /*
 1213          * get an entry from the tail of the LRU list.
 1214          */
 1215         fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
 1216 
 1217         switch (pktattr->pattr_af) {
 1218         case AF_INET:
 1219                 ip = (struct ip *)pktattr->pattr_hdr;
 1220                 fve->fve_flow.flow_af = AF_INET;
 1221                 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
 1222                 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
 1223                 break;
 1224 #ifdef INET6
 1225         case AF_INET6:
 1226                 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
 1227                 fve->fve_flow.flow_af = AF_INET6;
 1228                 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
 1229                 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
 1230                 break;
 1231 #endif
 1232         }
 1233 
 1234         fve->fve_state = Green;
 1235         fve->fve_p = 0.0;
 1236         fve->fve_f = 0.0;
 1237         fve->fve_ifseq = fv->fv_ifseq - 1;
 1238         fve->fve_count = 0;
 1239 
 1240         fv->fv_flows++;
 1241 #ifdef FV_STATS
 1242         fv->fv_stats.alloc++;
 1243 #endif
 1244         return (fve);
 1245 }
 1246 
 1247 static __inline void
 1248 flowlist_move_to_head(fv, fve)
 1249         struct flowvalve *fv;
 1250         struct fve *fve;
 1251 {
 1252         if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
 1253                 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
 1254                 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
 1255         }
 1256 }
 1257 
 1258 /*
 1259  * allocate flowvalve structure
 1260  */
 1261 static struct flowvalve *
 1262 fv_alloc(rp)
 1263         struct red *rp;
 1264 {
 1265         struct flowvalve *fv;
 1266         struct fve *fve;
 1267         int i, num;
 1268 
 1269         num = FV_FLOWLISTSIZE;
 1270         MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
 1271                M_DEVBUF, M_WAITOK);
 1272         if (fv == NULL)
 1273                 return (NULL);
 1274         (void)memset(fv, 0, sizeof(struct flowvalve));
 1275 
 1276         MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
 1277                M_DEVBUF, M_WAITOK);
 1278         if (fv->fv_fves == NULL) {
 1279                 FREE(fv, M_DEVBUF);
 1280                 return (NULL);
 1281         }
 1282         (void)memset(fv->fv_fves, 0, sizeof(struct fve) * num);
 1283 
 1284         fv->fv_flows = 0;
 1285         TAILQ_INIT(&fv->fv_flowlist);
 1286         for (i = 0; i < num; i++) {
 1287                 fve = &fv->fv_fves[i];
 1288                 fve->fve_lastdrop.tv_sec = 0;
 1289                 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
 1290         }
 1291 
 1292         /* initialize drop rate threshold in scaled fixed-point */
 1293         fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
 1294 
 1295         /* initialize drop rate to fraction table */
 1296         MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
 1297                M_DEVBUF, M_WAITOK);
 1298         if (fv->fv_p2ftab == NULL) {
 1299                 FREE(fv->fv_fves, M_DEVBUF);
 1300                 FREE(fv, M_DEVBUF);
 1301                 return (NULL);
 1302         }
 1303         /*
 1304          * create the p2f table.
 1305          * (shift is used to keep the precision)
 1306          */
 1307         for (i = 1; i < BRTT_SIZE; i++) {
 1308                 int f;
 1309 
 1310                 f = brtt_tab[i] << 8;
 1311                 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
 1312         }
 1313 
 1314         return (fv);
 1315 }
 1316 
 1317 static void fv_destroy(fv)
 1318         struct flowvalve *fv;
 1319 {
 1320         FREE(fv->fv_p2ftab, M_DEVBUF);
 1321         FREE(fv->fv_fves, M_DEVBUF);
 1322         FREE(fv, M_DEVBUF);
 1323 }
 1324 
 1325 static __inline int
 1326 fv_p2f(fv, p)
 1327         struct flowvalve        *fv;
 1328         int     p;
 1329 {
 1330         int val, f;
 1331 
 1332         if (p >= BRTT_PMAX)
 1333                 f = fv->fv_p2ftab[BRTT_SIZE-1];
 1334         else if ((val = (p & BRTT_MASK)))
 1335                 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
 1336         else
 1337                 f = fv->fv_p2ftab[1];
 1338         return (f);
 1339 }
 1340 
 1341 /*
 1342  * check if an arriving packet should be pre-dropped.
 1343  * called from red_addq() when a packet arrives.
 1344  * returns 1 when the packet should be pre-dropped.
 1345  * should be called in splnet.
 1346  */
 1347 static int
 1348 fv_checkflow(fv, pktattr, fcache)
 1349         struct flowvalve *fv;
 1350         struct altq_pktattr *pktattr;
 1351         struct fve **fcache;
 1352 {
 1353         struct fve *fve;
 1354         struct timeval now;
 1355 
 1356         fv->fv_ifseq++;
 1357         FV_TIMESTAMP(&now);
 1358 
 1359         if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
 1360                 /* no matching entry in the flowlist */
 1361                 return (0);
 1362 
 1363         *fcache = fve;
 1364 
 1365         /* update fraction f for every FV_N packets */
 1366         if (++fve->fve_count == FV_N) {
 1367                 /*
 1368                  * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
 1369                  */
 1370                 fve->fve_f =
 1371                         (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
 1372                         + fve->fve_f - FV_FUNSCALE(fve->fve_f);
 1373                 fve->fve_ifseq = fv->fv_ifseq;
 1374                 fve->fve_count = 0;
 1375         }
 1376 
 1377         /*
 1378          * overpumping test
 1379          */
 1380         if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
 1381                 int fthresh;
 1382 
 1383                 /* calculate a threshold */
 1384                 fthresh = fv_p2f(fv, fve->fve_p);
 1385                 if (fve->fve_f > fthresh)
 1386                         fve->fve_state = Red;
 1387         }
 1388 
 1389         if (fve->fve_state == Red) {
 1390                 /*
 1391                  * backoff test
 1392                  */
 1393                 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
 1394                         /* no drop for at least FV_BACKOFFTHRESH sec */
 1395                         fve->fve_p = 0;
 1396                         fve->fve_state = Green;
 1397 #ifdef FV_STATS
 1398                         fv->fv_stats.escape++;
 1399 #endif
 1400                 } else {
 1401                         /* block this flow */
 1402                         flowlist_move_to_head(fv, fve);
 1403                         fve->fve_lastdrop = now;
 1404 #ifdef FV_STATS
 1405                         fv->fv_stats.predrop++;
 1406 #endif
 1407                         return (1);
 1408                 }
 1409         }
 1410 
 1411         /*
 1412          * p = (1 - Wp) * p
 1413          */
 1414         fve->fve_p -= FV_PUNSCALE(fve->fve_p);
 1415         if (fve->fve_p < 0)
 1416                 fve->fve_p = 0;
 1417 #ifdef FV_STATS
 1418         fv->fv_stats.pass++;
 1419 #endif
 1420         return (0);
 1421 }
 1422 
 1423 /*
 1424  * called from red_addq when a packet is dropped by red.
 1425  * should be called in splnet.
 1426  */
 1427 static void fv_dropbyred(fv, pktattr, fcache)
 1428         struct flowvalve *fv;
 1429         struct altq_pktattr *pktattr;
 1430         struct fve *fcache;
 1431 {
 1432         struct fve *fve;
 1433         struct timeval now;
 1434 
 1435         if (pktattr == NULL)
 1436                 return;
 1437         FV_TIMESTAMP(&now);
 1438 
 1439         if (fcache != NULL)
 1440                 /* the fve of this packet is already cached */
 1441                 fve = fcache;
 1442         else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
 1443                 fve = flowlist_reclaim(fv, pktattr);
 1444 
 1445         flowlist_move_to_head(fv, fve);
 1446 
 1447         /*
 1448          * update p:  the following line cancels the update
 1449          *            in fv_checkflow() and calculate
 1450          *      p = Wp + (1 - Wp) * p
 1451          */
 1452         fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
 1453 
 1454         fve->fve_lastdrop = now;
 1455 }
 1456 
 1457 #endif /* ALTQ_FLOWVALVE */
 1458 
 1459 #ifdef KLD_MODULE
 1460 
 1461 static struct altqsw red_sw =
 1462         {"red", redopen, redclose, redioctl};
 1463 
 1464 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
 1465 
 1466 #endif /* KLD_MODULE */
 1467 
 1468 #endif /* ALTQ_RED */

Cache object: ba043d2a6358aec1c839d69116277dcd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.