The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_dummynet.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1998-2002 Luigi Rizzo, Universita` di Pisa
    3  * Portions Copyright (c) 2000 Akamba Corp.
    4  * All rights reserved
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  *
   27  * $FreeBSD: releng/6.1/sys/netinet/ip_dummynet.c 155946 2006-02-23 08:28:15Z ume $
   28  */
   29 
   30 #define DUMMYNET_DEBUG
   31 
   32 #include "opt_inet6.h"
   33 
   34 /*
   35  * This module implements IP dummynet, a bandwidth limiter/delay emulator
   36  * used in conjunction with the ipfw package.
   37  * Description of the data structures used is in ip_dummynet.h
   38  * Here you mainly find the following blocks of code:
   39  *  + variable declarations;
   40  *  + heap management functions;
   41  *  + scheduler and dummynet functions;
   42  *  + configuration and initialization.
   43  *
   44  * NOTA BENE: critical sections are protected by the "dummynet lock".
   45  *
   46  * Most important Changes:
   47  *
   48  * 011004: KLDable
   49  * 010124: Fixed WF2Q behaviour
   50  * 010122: Fixed spl protection.
   51  * 000601: WF2Q support
   52  * 000106: large rewrite, use heaps to handle very many pipes.
   53  * 980513:      initial release
   54  *
   55  * include files marked with XXX are probably not needed
   56  */
   57 
   58 #include <sys/param.h>
   59 #include <sys/systm.h>
   60 #include <sys/malloc.h>
   61 #include <sys/mbuf.h>
   62 #include <sys/kernel.h>
   63 #include <sys/module.h>
   64 #include <sys/proc.h>
   65 #include <sys/socket.h>
   66 #include <sys/socketvar.h>
   67 #include <sys/time.h>
   68 #include <sys/sysctl.h>
   69 #include <net/if.h>
   70 #include <net/route.h>
   71 #include <netinet/in.h>
   72 #include <netinet/in_systm.h>
   73 #include <netinet/in_var.h>
   74 #include <netinet/ip.h>
   75 #include <netinet/ip_fw.h>
   76 #include <netinet/ip_dummynet.h>
   77 #include <netinet/ip_var.h>
   78 
   79 #include <netinet/if_ether.h> /* for struct arpcom */
   80 #include <net/bridge.h>
   81 
   82 #include <netinet/ip6.h>       /* for ip6_input, ip6_output prototypes */
   83 #include <netinet6/ip6_var.h>
   84 
   85 /*
   86  * We keep a private variable for the simulation time, but we could
   87  * probably use an existing one ("softticks" in sys/kern/kern_timeout.c)
   88  */
   89 static dn_key curr_time = 0 ; /* current simulation time */
   90 
   91 static int dn_hash_size = 64 ;  /* default hash size */
   92 
   93 /* statistics on number of queue searches and search steps */
   94 static int searches, search_steps ;
   95 static int pipe_expire = 1 ;   /* expire queue if empty */
   96 static int dn_max_ratio = 16 ; /* max queues/buckets ratio */
   97 
   98 static int red_lookup_depth = 256;      /* RED - default lookup table depth */
   99 static int red_avg_pkt_size = 512;      /* RED - default medium packet size */
  100 static int red_max_pkt_size = 1500;     /* RED - default max packet size */
  101 
  102 /*
  103  * Three heaps contain queues and pipes that the scheduler handles:
  104  *
  105  * ready_heap contains all dn_flow_queue related to fixed-rate pipes.
  106  *
  107  * wfq_ready_heap contains the pipes associated with WF2Q flows
  108  *
  109  * extract_heap contains pipes associated with delay lines.
  110  *
  111  */
  112 
  113 MALLOC_DEFINE(M_DUMMYNET, "dummynet", "dummynet heap");
  114 
  115 static struct dn_heap ready_heap, extract_heap, wfq_ready_heap ;
  116 
  117 static int      heap_init(struct dn_heap *h, int size);
  118 static int      heap_insert (struct dn_heap *h, dn_key key1, void *p);
  119 static void     heap_extract(struct dn_heap *h, void *obj);
  120 static void     transmit_event(struct dn_pipe *pipe, struct mbuf **head,
  121                     struct mbuf **tail);
  122 static void     ready_event(struct dn_flow_queue *q, struct mbuf **head,
  123                     struct mbuf **tail);
  124 static void     ready_event_wfq(struct dn_pipe *p, struct mbuf **head,
  125                     struct mbuf **tail);
  126 
  127 #define HASHSIZE        16
  128 #define HASH(num)       ((((num) >> 8) ^ ((num) >> 4) ^ (num)) & 0x0f)
  129 static struct dn_pipe_head      pipehash[HASHSIZE];     /* all pipes */
  130 static struct dn_flow_set_head  flowsethash[HASHSIZE];  /* all flowsets */
  131 
  132 static struct callout dn_timeout;
  133 
  134 extern  void (*bridge_dn_p)(struct mbuf *, struct ifnet *);
  135 
  136 #ifdef SYSCTL_NODE
  137 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet,
  138                 CTLFLAG_RW, 0, "Dummynet");
  139 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
  140             CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
  141 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time,
  142             CTLFLAG_RD, &curr_time, 0, "Current tick");
  143 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
  144             CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
  145 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
  146             CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
  147 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches,
  148             CTLFLAG_RD, &searches, 0, "Number of queue searches");
  149 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps,
  150             CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
  151 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
  152             CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
  153 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
  154             CTLFLAG_RW, &dn_max_ratio, 0,
  155         "Max ratio between dynamic queues and buckets");
  156 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_lookup_depth,
  157         CTLFLAG_RD, &red_lookup_depth, 0, "Depth of RED lookup table");
  158 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_avg_pkt_size,
  159         CTLFLAG_RD, &red_avg_pkt_size, 0, "RED Medium packet size");
  160 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, red_max_pkt_size,
  161         CTLFLAG_RD, &red_max_pkt_size, 0, "RED Max packet size");
  162 #endif
  163 
  164 #ifdef DUMMYNET_DEBUG
  165 int     dummynet_debug = 0;
  166 #ifdef SYSCTL_NODE
  167 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dummynet_debug,
  168             0, "control debugging printfs");
  169 #endif
  170 #define DPRINTF(X)      if (dummynet_debug) printf X
  171 #else
  172 #define DPRINTF(X)
  173 #endif
  174 
  175 static struct mtx dummynet_mtx;
  176 /*
  177  * NB: Recursion is needed to deal with re-entry via ICMP.  That is,
  178  *     a packet may be dispatched via ip_input from dummynet_io and
  179  *     re-enter through ip_output.  Yech.
  180  */
  181 #define DUMMYNET_LOCK_INIT() \
  182         mtx_init(&dummynet_mtx, "dummynet", NULL, MTX_DEF | MTX_RECURSE)
  183 #define DUMMYNET_LOCK_DESTROY() mtx_destroy(&dummynet_mtx)
  184 #define DUMMYNET_LOCK()         mtx_lock(&dummynet_mtx)
  185 #define DUMMYNET_UNLOCK()       mtx_unlock(&dummynet_mtx)
  186 #define DUMMYNET_LOCK_ASSERT()  do {                            \
  187         mtx_assert(&dummynet_mtx, MA_OWNED);                    \
  188         NET_ASSERT_GIANT();                                     \
  189 } while (0)
  190 
  191 static int config_pipe(struct dn_pipe *p);
  192 static int ip_dn_ctl(struct sockopt *sopt);
  193 
  194 static void dummynet(void *);
  195 static void dummynet_flush(void);
  196 static void dummynet_send(struct mbuf *);
  197 void dummynet_drain(void);
  198 static ip_dn_io_t dummynet_io;
  199 static void dn_rule_delete(void *);
  200 
  201 /*
  202  * Heap management functions.
  203  *
  204  * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
  205  * Some macros help finding parent/children so we can optimize them.
  206  *
  207  * heap_init() is called to expand the heap when needed.
  208  * Increment size in blocks of 16 entries.
  209  * XXX failure to allocate a new element is a pretty bad failure
  210  * as we basically stall a whole queue forever!!
  211  * Returns 1 on error, 0 on success
  212  */
  213 #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
  214 #define HEAP_LEFT(x) ( 2*(x) + 1 )
  215 #define HEAP_IS_LEFT(x) ( (x) & 1 )
  216 #define HEAP_RIGHT(x) ( 2*(x) + 2 )
  217 #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
  218 #define HEAP_INCREMENT  15
  219 
  220 static int
  221 heap_init(struct dn_heap *h, int new_size)
  222 {
  223     struct dn_heap_entry *p;
  224 
  225     if (h->size >= new_size ) {
  226         printf("dummynet: %s, Bogus call, have %d want %d\n", __func__,
  227                 h->size, new_size);
  228         return 0 ;
  229     }
  230     new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ;
  231     p = malloc(new_size * sizeof(*p), M_DUMMYNET, M_NOWAIT);
  232     if (p == NULL) {
  233         printf("dummynet: %s, resize %d failed\n", __func__, new_size );
  234         return 1 ; /* error */
  235     }
  236     if (h->size > 0) {
  237         bcopy(h->p, p, h->size * sizeof(*p) );
  238         free(h->p, M_DUMMYNET);
  239     }
  240     h->p = p ;
  241     h->size = new_size ;
  242     return 0 ;
  243 }
  244 
  245 /*
  246  * Insert element in heap. Normally, p != NULL, we insert p in
  247  * a new position and bubble up. If p == NULL, then the element is
  248  * already in place, and key is the position where to start the
  249  * bubble-up.
  250  * Returns 1 on failure (cannot allocate new heap entry)
  251  *
  252  * If offset > 0 the position (index, int) of the element in the heap is
  253  * also stored in the element itself at the given offset in bytes.
  254  */
  255 #define SET_OFFSET(heap, node) \
  256     if (heap->offset > 0) \
  257             *((int *)((char *)(heap->p[node].object) + heap->offset)) = node ;
  258 /*
  259  * RESET_OFFSET is used for sanity checks. It sets offset to an invalid value.
  260  */
  261 #define RESET_OFFSET(heap, node) \
  262     if (heap->offset > 0) \
  263             *((int *)((char *)(heap->p[node].object) + heap->offset)) = -1 ;
  264 static int
  265 heap_insert(struct dn_heap *h, dn_key key1, void *p)
  266 {
  267     int son = h->elements ;
  268 
  269     if (p == NULL)      /* data already there, set starting point */
  270         son = key1 ;
  271     else {              /* insert new element at the end, possibly resize */
  272         son = h->elements ;
  273         if (son == h->size) /* need resize... */
  274             if (heap_init(h, h->elements+1) )
  275                 return 1 ; /* failure... */
  276         h->p[son].object = p ;
  277         h->p[son].key = key1 ;
  278         h->elements++ ;
  279     }
  280     while (son > 0) {                           /* bubble up */
  281         int father = HEAP_FATHER(son) ;
  282         struct dn_heap_entry tmp  ;
  283 
  284         if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
  285             break ; /* found right position */
  286         /* son smaller than father, swap and repeat */
  287         HEAP_SWAP(h->p[son], h->p[father], tmp) ;
  288         SET_OFFSET(h, son);
  289         son = father ;
  290     }
  291     SET_OFFSET(h, son);
  292     return 0 ;
  293 }
  294 
  295 /*
  296  * remove top element from heap, or obj if obj != NULL
  297  */
  298 static void
  299 heap_extract(struct dn_heap *h, void *obj)
  300 {
  301     int child, father, max = h->elements - 1 ;
  302 
  303     if (max < 0) {
  304         printf("dummynet: warning, extract from empty heap 0x%p\n", h);
  305         return ;
  306     }
  307     father = 0 ; /* default: move up smallest child */
  308     if (obj != NULL) { /* extract specific element, index is at offset */
  309         if (h->offset <= 0)
  310             panic("dummynet: heap_extract from middle not supported on this heap!!!\n");
  311         father = *((int *)((char *)obj + h->offset)) ;
  312         if (father < 0 || father >= h->elements) {
  313             printf("dummynet: heap_extract, father %d out of bound 0..%d\n",
  314                 father, h->elements);
  315             panic("dummynet: heap_extract");
  316         }
  317     }
  318     RESET_OFFSET(h, father);
  319     child = HEAP_LEFT(father) ;         /* left child */
  320     while (child <= max) {              /* valid entry */
  321         if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
  322             child = child+1 ;           /* take right child, otherwise left */
  323         h->p[father] = h->p[child] ;
  324         SET_OFFSET(h, father);
  325         father = child ;
  326         child = HEAP_LEFT(child) ;   /* left child for next loop */
  327     }
  328     h->elements-- ;
  329     if (father != max) {
  330         /*
  331          * Fill hole with last entry and bubble up, reusing the insert code
  332          */
  333         h->p[father] = h->p[max] ;
  334         heap_insert(h, father, NULL); /* this one cannot fail */
  335     }
  336 }
  337 
  338 #if 0
  339 /*
  340  * change object position and update references
  341  * XXX this one is never used!
  342  */
  343 static void
  344 heap_move(struct dn_heap *h, dn_key new_key, void *object)
  345 {
  346     int temp;
  347     int i ;
  348     int max = h->elements-1 ;
  349     struct dn_heap_entry buf ;
  350 
  351     if (h->offset <= 0)
  352         panic("cannot move items on this heap");
  353 
  354     i = *((int *)((char *)object + h->offset));
  355     if (DN_KEY_LT(new_key, h->p[i].key) ) { /* must move up */
  356         h->p[i].key = new_key ;
  357         for (; i>0 && DN_KEY_LT(new_key, h->p[(temp = HEAP_FATHER(i))].key) ;
  358                  i = temp ) { /* bubble up */
  359             HEAP_SWAP(h->p[i], h->p[temp], buf) ;
  360             SET_OFFSET(h, i);
  361         }
  362     } else {            /* must move down */
  363         h->p[i].key = new_key ;
  364         while ( (temp = HEAP_LEFT(i)) <= max ) { /* found left child */
  365             if ((temp != max) && DN_KEY_GT(h->p[temp].key, h->p[temp+1].key))
  366                 temp++ ; /* select child with min key */
  367             if (DN_KEY_GT(new_key, h->p[temp].key)) { /* go down */
  368                 HEAP_SWAP(h->p[i], h->p[temp], buf) ;
  369                 SET_OFFSET(h, i);
  370             } else
  371                 break ;
  372             i = temp ;
  373         }
  374     }
  375     SET_OFFSET(h, i);
  376 }
  377 #endif /* heap_move, unused */
  378 
  379 /*
  380  * heapify() will reorganize data inside an array to maintain the
  381  * heap property. It is needed when we delete a bunch of entries.
  382  */
  383 static void
  384 heapify(struct dn_heap *h)
  385 {
  386     int i ;
  387 
  388     for (i = 0 ; i < h->elements ; i++ )
  389         heap_insert(h, i , NULL) ;
  390 }
  391 
  392 /*
  393  * cleanup the heap and free data structure
  394  */
  395 static void
  396 heap_free(struct dn_heap *h)
  397 {
  398     if (h->size >0 )
  399         free(h->p, M_DUMMYNET);
  400     bzero(h, sizeof(*h) );
  401 }
  402 
  403 /*
  404  * --- end of heap management functions ---
  405  */
  406 
  407 /*
  408  * Return the mbuf tag holding the dummynet state.  As an optimization
  409  * this is assumed to be the first tag on the list.  If this turns out
  410  * wrong we'll need to search the list.
  411  */
  412 static struct dn_pkt_tag *
  413 dn_tag_get(struct mbuf *m)
  414 {
  415     struct m_tag *mtag = m_tag_first(m);
  416     KASSERT(mtag != NULL &&
  417             mtag->m_tag_cookie == MTAG_ABI_COMPAT &&
  418             mtag->m_tag_id == PACKET_TAG_DUMMYNET,
  419             ("packet on dummynet queue w/o dummynet tag!"));
  420     return (struct dn_pkt_tag *)(mtag+1);
  421 }
  422 
  423 /*
  424  * Scheduler functions:
  425  *
  426  * transmit_event() is called when the delay-line needs to enter
  427  * the scheduler, either because of existing pkts getting ready,
  428  * or new packets entering the queue. The event handled is the delivery
  429  * time of the packet.
  430  *
  431  * ready_event() does something similar with fixed-rate queues, and the
  432  * event handled is the finish time of the head pkt.
  433  *
  434  * wfq_ready_event() does something similar with WF2Q queues, and the
  435  * event handled is the start time of the head pkt.
  436  *
  437  * In all cases, we make sure that the data structures are consistent
  438  * before passing pkts out, because this might trigger recursive
  439  * invocations of the procedures.
  440  */
  441 static void
  442 transmit_event(struct dn_pipe *pipe, struct mbuf **head, struct mbuf **tail)
  443 {
  444         struct mbuf *m;
  445         struct dn_pkt_tag *pkt;
  446 
  447         DUMMYNET_LOCK_ASSERT();
  448 
  449         while ((m = pipe->head) != NULL) {
  450                 pkt = dn_tag_get(m);
  451                 if (!DN_KEY_LEQ(pkt->output_time, curr_time))
  452                         break;
  453 
  454                 pipe->head = m->m_nextpkt;
  455                 if (*tail != NULL)
  456                         (*tail)->m_nextpkt = m;
  457                 else
  458                         *head = m;
  459                 *tail = m;
  460         }
  461         if (*tail != NULL)
  462                 (*tail)->m_nextpkt = NULL;
  463 
  464         /* If there are leftover packets, put into the heap for next event. */
  465         if ((m = pipe->head) != NULL) {
  466                 pkt = dn_tag_get(m);
  467                 /*
  468                  * XXX: Should check errors on heap_insert, by draining the
  469                  * whole pipe p and hoping in the future we are more successful.
  470                  */
  471                 heap_insert(&extract_heap, pkt->output_time, pipe);
  472         }
  473 }
  474 
  475 /*
  476  * the following macro computes how many ticks we have to wait
  477  * before being able to transmit a packet. The credit is taken from
  478  * either a pipe (WF2Q) or a flow_queue (per-flow queueing)
  479  */
  480 #define SET_TICKS(_m, q, p)     \
  481     ((_m)->m_pkthdr.len*8*hz - (q)->numbytes + p->bandwidth - 1 ) / \
  482             p->bandwidth ;
  483 
  484 /*
  485  * extract pkt from queue, compute output time (could be now)
  486  * and put into delay line (p_queue)
  487  */
  488 static void
  489 move_pkt(struct mbuf *pkt, struct dn_flow_queue *q,
  490         struct dn_pipe *p, int len)
  491 {
  492     struct dn_pkt_tag *dt = dn_tag_get(pkt);
  493 
  494     q->head = pkt->m_nextpkt ;
  495     q->len-- ;
  496     q->len_bytes -= len ;
  497 
  498     dt->output_time = curr_time + p->delay ;
  499 
  500     if (p->head == NULL)
  501         p->head = pkt;
  502     else
  503         p->tail->m_nextpkt = pkt;
  504     p->tail = pkt;
  505     p->tail->m_nextpkt = NULL;
  506 }
  507 
  508 /*
  509  * ready_event() is invoked every time the queue must enter the
  510  * scheduler, either because the first packet arrives, or because
  511  * a previously scheduled event fired.
  512  * On invokation, drain as many pkts as possible (could be 0) and then
  513  * if there are leftover packets reinsert the pkt in the scheduler.
  514  */
  515 static void
  516 ready_event(struct dn_flow_queue *q, struct mbuf **head, struct mbuf **tail)
  517 {
  518     struct mbuf *pkt;
  519     struct dn_pipe *p = q->fs->pipe ;
  520     int p_was_empty ;
  521 
  522     DUMMYNET_LOCK_ASSERT();
  523 
  524     if (p == NULL) {
  525         printf("dummynet: ready_event- pipe is gone\n");
  526         return ;
  527     }
  528     p_was_empty = (p->head == NULL) ;
  529 
  530     /*
  531      * schedule fixed-rate queues linked to this pipe:
  532      * Account for the bw accumulated since last scheduling, then
  533      * drain as many pkts as allowed by q->numbytes and move to
  534      * the delay line (in p) computing output time.
  535      * bandwidth==0 (no limit) means we can drain the whole queue,
  536      * setting len_scaled = 0 does the job.
  537      */
  538     q->numbytes += ( curr_time - q->sched_time ) * p->bandwidth;
  539     while ( (pkt = q->head) != NULL ) {
  540         int len = pkt->m_pkthdr.len;
  541         int len_scaled = p->bandwidth ? len*8*hz : 0 ;
  542         if (len_scaled > q->numbytes )
  543             break ;
  544         q->numbytes -= len_scaled ;
  545         move_pkt(pkt, q, p, len);
  546     }
  547     /*
  548      * If we have more packets queued, schedule next ready event
  549      * (can only occur when bandwidth != 0, otherwise we would have
  550      * flushed the whole queue in the previous loop).
  551      * To this purpose we record the current time and compute how many
  552      * ticks to go for the finish time of the packet.
  553      */
  554     if ( (pkt = q->head) != NULL ) { /* this implies bandwidth != 0 */
  555         dn_key t = SET_TICKS(pkt, q, p); /* ticks i have to wait */
  556         q->sched_time = curr_time ;
  557         heap_insert(&ready_heap, curr_time + t, (void *)q );
  558         /* XXX should check errors on heap_insert, and drain the whole
  559          * queue on error hoping next time we are luckier.
  560          */
  561     } else {    /* RED needs to know when the queue becomes empty */
  562         q->q_time = curr_time;
  563         q->numbytes = 0;
  564     }
  565     /*
  566      * If the delay line was empty call transmit_event() now.
  567      * Otherwise, the scheduler will take care of it.
  568      */
  569     if (p_was_empty)
  570         transmit_event(p, head, tail);
  571 }
  572 
  573 /*
  574  * Called when we can transmit packets on WF2Q queues. Take pkts out of
  575  * the queues at their start time, and enqueue into the delay line.
  576  * Packets are drained until p->numbytes < 0. As long as
  577  * len_scaled >= p->numbytes, the packet goes into the delay line
  578  * with a deadline p->delay. For the last packet, if p->numbytes<0,
  579  * there is an additional delay.
  580  */
  581 static void
  582 ready_event_wfq(struct dn_pipe *p, struct mbuf **head, struct mbuf **tail)
  583 {
  584     int p_was_empty = (p->head == NULL) ;
  585     struct dn_heap *sch = &(p->scheduler_heap);
  586     struct dn_heap *neh = &(p->not_eligible_heap) ;
  587 
  588     DUMMYNET_LOCK_ASSERT();
  589 
  590     if (p->if_name[0] == 0) /* tx clock is simulated */
  591         p->numbytes += ( curr_time - p->sched_time ) * p->bandwidth;
  592     else { /* tx clock is for real, the ifq must be empty or this is a NOP */
  593         if (p->ifp && p->ifp->if_snd.ifq_head != NULL)
  594             return ;
  595         else {
  596             DPRINTF(("dummynet: pipe %d ready from %s --\n",
  597                 p->pipe_nr, p->if_name));
  598         }
  599     }
  600 
  601     /*
  602      * While we have backlogged traffic AND credit, we need to do
  603      * something on the queue.
  604      */
  605     while ( p->numbytes >=0 && (sch->elements>0 || neh->elements >0) ) {
  606         if (sch->elements > 0) { /* have some eligible pkts to send out */
  607             struct dn_flow_queue *q = sch->p[0].object ;
  608             struct mbuf *pkt = q->head;
  609             struct dn_flow_set *fs = q->fs;
  610             u_int64_t len = pkt->m_pkthdr.len;
  611             int len_scaled = p->bandwidth ? len*8*hz : 0 ;
  612 
  613             heap_extract(sch, NULL); /* remove queue from heap */
  614             p->numbytes -= len_scaled ;
  615             move_pkt(pkt, q, p, len);
  616 
  617             p->V += (len<<MY_M) / p->sum ; /* update V */
  618             q->S = q->F ; /* update start time */
  619             if (q->len == 0) { /* Flow not backlogged any more */
  620                 fs->backlogged-- ;
  621                 heap_insert(&(p->idle_heap), q->F, q);
  622             } else { /* still backlogged */
  623                 /*
  624                  * update F and position in backlogged queue, then
  625                  * put flow in not_eligible_heap (we will fix this later).
  626                  */
  627                 len = (q->head)->m_pkthdr.len;
  628                 q->F += (len<<MY_M)/(u_int64_t) fs->weight ;
  629                 if (DN_KEY_LEQ(q->S, p->V))
  630                     heap_insert(neh, q->S, q);
  631                 else
  632                     heap_insert(sch, q->F, q);
  633             }
  634         }
  635         /*
  636          * now compute V = max(V, min(S_i)). Remember that all elements in sch
  637          * have by definition S_i <= V so if sch is not empty, V is surely
  638          * the max and we must not update it. Conversely, if sch is empty
  639          * we only need to look at neh.
  640          */
  641         if (sch->elements == 0 && neh->elements > 0)
  642             p->V = MAX64 ( p->V, neh->p[0].key );
  643         /* move from neh to sch any packets that have become eligible */
  644         while (neh->elements > 0 && DN_KEY_LEQ(neh->p[0].key, p->V) ) {
  645             struct dn_flow_queue *q = neh->p[0].object ;
  646             heap_extract(neh, NULL);
  647             heap_insert(sch, q->F, q);
  648         }
  649 
  650         if (p->if_name[0] != '\0') {/* tx clock is from a real thing */
  651             p->numbytes = -1 ; /* mark not ready for I/O */
  652             break ;
  653         }
  654     }
  655     if (sch->elements == 0 && neh->elements == 0 && p->numbytes >= 0
  656             && p->idle_heap.elements > 0) {
  657         /*
  658          * no traffic and no events scheduled. We can get rid of idle-heap.
  659          */
  660         int i ;
  661 
  662         for (i = 0 ; i < p->idle_heap.elements ; i++) {
  663             struct dn_flow_queue *q = p->idle_heap.p[i].object ;
  664 
  665             q->F = 0 ;
  666             q->S = q->F + 1 ;
  667         }
  668         p->sum = 0 ;
  669         p->V = 0 ;
  670         p->idle_heap.elements = 0 ;
  671     }
  672     /*
  673      * If we are getting clocks from dummynet (not a real interface) and
  674      * If we are under credit, schedule the next ready event.
  675      * Also fix the delivery time of the last packet.
  676      */
  677     if (p->if_name[0]==0 && p->numbytes < 0) { /* this implies bandwidth >0 */
  678         dn_key t=0 ; /* number of ticks i have to wait */
  679 
  680         if (p->bandwidth > 0)
  681             t = ( p->bandwidth -1 - p->numbytes) / p->bandwidth ;
  682         dn_tag_get(p->tail)->output_time += t ;
  683         p->sched_time = curr_time ;
  684         heap_insert(&wfq_ready_heap, curr_time + t, (void *)p);
  685         /* XXX should check errors on heap_insert, and drain the whole
  686          * queue on error hoping next time we are luckier.
  687          */
  688     }
  689     /*
  690      * If the delay line was empty call transmit_event() now.
  691      * Otherwise, the scheduler will take care of it.
  692      */
  693     if (p_was_empty)
  694         transmit_event(p, head, tail);
  695 }
  696 
  697 /*
  698  * This is called once per tick, or HZ times per second. It is used to
  699  * increment the current tick counter and schedule expired events.
  700  */
  701 static void
  702 dummynet(void * __unused unused)
  703 {
  704     struct mbuf *head = NULL, *tail = NULL;
  705     struct dn_pipe *pipe;
  706     struct dn_heap *heaps[3];
  707     struct dn_heap *h;
  708     void *p; /* generic parameter to handler */
  709     int i;
  710 
  711     heaps[0] = &ready_heap ;            /* fixed-rate queues */
  712     heaps[1] = &wfq_ready_heap ;        /* wfq queues */
  713     heaps[2] = &extract_heap ;          /* delay line */
  714 
  715     DUMMYNET_LOCK();
  716     curr_time++ ;
  717     for (i=0; i < 3 ; i++) {
  718         h = heaps[i];
  719         while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) {
  720             if (h->p[0].key > curr_time)
  721                 printf("dummynet: warning, heap %d is %d ticks late\n",
  722                     i, (int)(curr_time - h->p[0].key));
  723             p = h->p[0].object ; /* store a copy before heap_extract */
  724             heap_extract(h, NULL); /* need to extract before processing */
  725             if (i == 0)
  726                 ready_event(p, &head, &tail);
  727             else if (i == 1) {
  728                 struct dn_pipe *pipe = p;
  729                 if (pipe->if_name[0] != '\0')
  730                     printf("dummynet: bad ready_event_wfq for pipe %s\n",
  731                         pipe->if_name);
  732                 else
  733                     ready_event_wfq(p, &head, &tail);
  734             } else
  735                 transmit_event(p, &head, &tail);
  736         }
  737     }
  738     /* Sweep pipes trying to expire idle flow_queues. */
  739     for (i = 0; i < HASHSIZE; i++)
  740         SLIST_FOREACH(pipe, &pipehash[i], next)
  741                 if (pipe->idle_heap.elements > 0 &&
  742                     DN_KEY_LT(pipe->idle_heap.p[0].key, pipe->V) ) {
  743                         struct dn_flow_queue *q = pipe->idle_heap.p[0].object;
  744 
  745                         heap_extract(&(pipe->idle_heap), NULL);
  746                         q->S = q->F + 1; /* Mark timestamp as invalid. */
  747                         pipe->sum -= q->fs->weight;
  748                 }
  749 
  750     DUMMYNET_UNLOCK();
  751 
  752     if (head != NULL)
  753         dummynet_send(head);
  754 
  755     callout_reset(&dn_timeout, 1, dummynet, NULL);
  756 }
  757 
  758 static void
  759 dummynet_send(struct mbuf *m)
  760 {
  761         struct dn_pkt_tag *pkt;
  762         struct mbuf *n;
  763         struct ip *ip;
  764 
  765         for (; m != NULL; m = n) {
  766                 n = m->m_nextpkt;
  767                 m->m_nextpkt = NULL;
  768                 pkt = dn_tag_get(m);
  769                 switch (pkt->dn_dir) {
  770                 case DN_TO_IP_OUT:
  771                         ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL);
  772                         break ;
  773                   case DN_TO_IP_IN :
  774                         ip = mtod(m, struct ip *);
  775                         ip->ip_len = htons(ip->ip_len);
  776                         ip->ip_off = htons(ip->ip_off);
  777                         ip_input(m);
  778                         break;
  779 #ifdef INET6
  780                 case DN_TO_IP6_IN:
  781                         ip6_input(m);
  782                         break;
  783 
  784                 case DN_TO_IP6_OUT:
  785                         ip6_output(m, NULL, NULL, IPV6_FORWARDING, NULL, NULL, NULL);
  786                         break;
  787 #endif
  788                 case DN_TO_IFB_FWD:
  789                         if (bridge_dn_p != NULL)
  790                                 ((*bridge_dn_p)(m, pkt->ifp));
  791                         else
  792                                 printf("dummynet: if_bridge not loaded\n");
  793 
  794                         break;
  795                 case DN_TO_BDG_FWD :
  796                         /*
  797                          * The bridge requires/assumes the Ethernet header is
  798                          * contiguous in the first mbuf header.  Ensure this
  799                          * is true.
  800                          */
  801                         if (BDG_LOADED) {
  802                                 if (m->m_len < ETHER_HDR_LEN &&
  803                                     (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
  804                                         printf("dummynet/bridge: pullup fail, "
  805                                             "dropping pkt\n");
  806                                         break;
  807                                 }
  808                                 m = bdg_forward_ptr(m, pkt->ifp);
  809                         } else {
  810                                 /*
  811                                  * somebody unloaded the bridge module.
  812                                  * Drop pkt
  813                                  */
  814                                 /* XXX rate limit */
  815                                 printf("dummynet: dropping bridged packet "
  816                                     "trapped in pipe\n");
  817                         }
  818                         if (m)
  819                                 m_freem(m);
  820                         break;
  821                 case DN_TO_ETH_DEMUX:
  822                         /*
  823                          * The Ethernet code assumes the Ethernet header is
  824                          * contiguous in the first mbuf header.
  825                          * Insure this is true.
  826                          */
  827                         if (m->m_len < ETHER_HDR_LEN &&
  828                             (m = m_pullup(m, ETHER_HDR_LEN)) == NULL) {
  829                                 printf("dummynet/ether: pullup failed, "
  830                                     "dropping packet\n");
  831                                 break;
  832                         }
  833                         ether_demux(m->m_pkthdr.rcvif, m);
  834                         break;
  835                 case DN_TO_ETH_OUT:
  836                         ether_output_frame(pkt->ifp, m);
  837                         break;
  838                 default:
  839                         printf("dummynet: bad switch %d!\n", pkt->dn_dir);
  840                         m_freem(m);
  841                         break;
  842                 }
  843         }
  844 }
  845 
  846 /*
  847  * Unconditionally expire empty queues in case of shortage.
  848  * Returns the number of queues freed.
  849  */
  850 static int
  851 expire_queues(struct dn_flow_set *fs)
  852 {
  853     struct dn_flow_queue *q, *prev ;
  854     int i, initial_elements = fs->rq_elements ;
  855 
  856     if (fs->last_expired == time_second)
  857         return 0 ;
  858     fs->last_expired = time_second ;
  859     for (i = 0 ; i <= fs->rq_size ; i++) /* last one is overflow */
  860         for (prev=NULL, q = fs->rq[i] ; q != NULL ; )
  861             if (q->head != NULL || q->S != q->F+1) {
  862                 prev = q ;
  863                 q = q->next ;
  864             } else { /* entry is idle, expire it */
  865                 struct dn_flow_queue *old_q = q ;
  866 
  867                 if (prev != NULL)
  868                     prev->next = q = q->next ;
  869                 else
  870                     fs->rq[i] = q = q->next ;
  871                 fs->rq_elements-- ;
  872                 free(old_q, M_DUMMYNET);
  873             }
  874     return initial_elements - fs->rq_elements ;
  875 }
  876 
  877 /*
  878  * If room, create a new queue and put at head of slot i;
  879  * otherwise, create or use the default queue.
  880  */
  881 static struct dn_flow_queue *
  882 create_queue(struct dn_flow_set *fs, int i)
  883 {
  884     struct dn_flow_queue *q ;
  885 
  886     if (fs->rq_elements > fs->rq_size * dn_max_ratio &&
  887             expire_queues(fs) == 0) {
  888         /*
  889          * No way to get room, use or create overflow queue.
  890          */
  891         i = fs->rq_size ;
  892         if ( fs->rq[i] != NULL )
  893             return fs->rq[i] ;
  894     }
  895     q = malloc(sizeof(*q), M_DUMMYNET, M_NOWAIT | M_ZERO);
  896     if (q == NULL) {
  897         printf("dummynet: sorry, cannot allocate queue for new flow\n");
  898         return NULL ;
  899     }
  900     q->fs = fs ;
  901     q->hash_slot = i ;
  902     q->next = fs->rq[i] ;
  903     q->S = q->F + 1;   /* hack - mark timestamp as invalid */
  904     fs->rq[i] = q ;
  905     fs->rq_elements++ ;
  906     return q ;
  907 }
  908 
  909 /*
  910  * Given a flow_set and a pkt in last_pkt, find a matching queue
  911  * after appropriate masking. The queue is moved to front
  912  * so that further searches take less time.
  913  */
  914 static struct dn_flow_queue *
  915 find_queue(struct dn_flow_set *fs, struct ipfw_flow_id *id)
  916 {
  917     int i = 0 ; /* we need i and q for new allocations */
  918     struct dn_flow_queue *q, *prev;
  919     int is_v6 = IS_IP6_FLOW_ID(id);
  920 
  921     if ( !(fs->flags_fs & DN_HAVE_FLOW_MASK) )
  922         q = fs->rq[0] ;
  923     else {
  924         /* first, do the masking, then hash */
  925         id->dst_port &= fs->flow_mask.dst_port ;
  926         id->src_port &= fs->flow_mask.src_port ;
  927         id->proto &= fs->flow_mask.proto ;
  928         id->flags = 0 ; /* we don't care about this one */
  929         if (is_v6) {
  930             APPLY_MASK(&id->dst_ip6, &fs->flow_mask.dst_ip6);
  931             APPLY_MASK(&id->src_ip6, &fs->flow_mask.src_ip6);
  932             id->flow_id6 &= fs->flow_mask.flow_id6;
  933 
  934             i = ((id->dst_ip6.__u6_addr.__u6_addr32[0]) & 0xffff)^
  935                 ((id->dst_ip6.__u6_addr.__u6_addr32[1]) & 0xffff)^
  936                 ((id->dst_ip6.__u6_addr.__u6_addr32[2]) & 0xffff)^
  937                 ((id->dst_ip6.__u6_addr.__u6_addr32[3]) & 0xffff)^
  938 
  939                 ((id->dst_ip6.__u6_addr.__u6_addr32[0] >> 15) & 0xffff)^
  940                 ((id->dst_ip6.__u6_addr.__u6_addr32[1] >> 15) & 0xffff)^
  941                 ((id->dst_ip6.__u6_addr.__u6_addr32[2] >> 15) & 0xffff)^
  942                 ((id->dst_ip6.__u6_addr.__u6_addr32[3] >> 15) & 0xffff)^
  943 
  944                 ((id->src_ip6.__u6_addr.__u6_addr32[0] << 1) & 0xfffff)^
  945                 ((id->src_ip6.__u6_addr.__u6_addr32[1] << 1) & 0xfffff)^
  946                 ((id->src_ip6.__u6_addr.__u6_addr32[2] << 1) & 0xfffff)^
  947                 ((id->src_ip6.__u6_addr.__u6_addr32[3] << 1) & 0xfffff)^
  948 
  949                 ((id->src_ip6.__u6_addr.__u6_addr32[0] << 16) & 0xffff)^
  950                 ((id->src_ip6.__u6_addr.__u6_addr32[1] << 16) & 0xffff)^
  951                 ((id->src_ip6.__u6_addr.__u6_addr32[2] << 16) & 0xffff)^
  952                 ((id->src_ip6.__u6_addr.__u6_addr32[3] << 16) & 0xffff)^
  953 
  954                 (id->dst_port << 1) ^ (id->src_port) ^
  955                 (id->proto ) ^
  956                 (id->flow_id6);
  957         } else {
  958             id->dst_ip &= fs->flow_mask.dst_ip ;
  959             id->src_ip &= fs->flow_mask.src_ip ;
  960 
  961             i = ( (id->dst_ip) & 0xffff ) ^
  962                 ( (id->dst_ip >> 15) & 0xffff ) ^
  963                 ( (id->src_ip << 1) & 0xffff ) ^
  964                 ( (id->src_ip >> 16 ) & 0xffff ) ^
  965                 (id->dst_port << 1) ^ (id->src_port) ^
  966                 (id->proto );
  967         }
  968         i = i % fs->rq_size ;
  969         /* finally, scan the current list for a match */
  970         searches++ ;
  971         for (prev=NULL, q = fs->rq[i] ; q ; ) {
  972             search_steps++;
  973             if (is_v6 &&
  974                     IN6_ARE_ADDR_EQUAL(&id->dst_ip6,&q->id.dst_ip6) &&  
  975                     IN6_ARE_ADDR_EQUAL(&id->src_ip6,&q->id.src_ip6) &&  
  976                     id->dst_port == q->id.dst_port &&
  977                     id->src_port == q->id.src_port &&
  978                     id->proto == q->id.proto &&
  979                     id->flags == q->id.flags &&
  980                     id->flow_id6 == q->id.flow_id6)
  981                 break ; /* found */
  982 
  983             if (!is_v6 && id->dst_ip == q->id.dst_ip &&
  984                     id->src_ip == q->id.src_ip &&
  985                     id->dst_port == q->id.dst_port &&
  986                     id->src_port == q->id.src_port &&
  987                     id->proto == q->id.proto &&
  988                     id->flags == q->id.flags)
  989                 break ; /* found */
  990 
  991             /* No match. Check if we can expire the entry */
  992             if (pipe_expire && q->head == NULL && q->S == q->F+1 ) {
  993                 /* entry is idle and not in any heap, expire it */
  994                 struct dn_flow_queue *old_q = q ;
  995 
  996                 if (prev != NULL)
  997                     prev->next = q = q->next ;
  998                 else
  999                     fs->rq[i] = q = q->next ;
 1000                 fs->rq_elements-- ;
 1001                 free(old_q, M_DUMMYNET);
 1002                 continue ;
 1003             }
 1004             prev = q ;
 1005             q = q->next ;
 1006         }
 1007         if (q && prev != NULL) { /* found and not in front */
 1008             prev->next = q->next ;
 1009             q->next = fs->rq[i] ;
 1010             fs->rq[i] = q ;
 1011         }
 1012     }
 1013     if (q == NULL) { /* no match, need to allocate a new entry */
 1014         q = create_queue(fs, i);
 1015         if (q != NULL)
 1016         q->id = *id ;
 1017     }
 1018     return q ;
 1019 }
 1020 
 1021 static int
 1022 red_drops(struct dn_flow_set *fs, struct dn_flow_queue *q, int len)
 1023 {
 1024     /*
 1025      * RED algorithm
 1026      *
 1027      * RED calculates the average queue size (avg) using a low-pass filter
 1028      * with an exponential weighted (w_q) moving average:
 1029      *  avg  <-  (1-w_q) * avg + w_q * q_size
 1030      * where q_size is the queue length (measured in bytes or * packets).
 1031      *
 1032      * If q_size == 0, we compute the idle time for the link, and set
 1033      *  avg = (1 - w_q)^(idle/s)
 1034      * where s is the time needed for transmitting a medium-sized packet.
 1035      *
 1036      * Now, if avg < min_th the packet is enqueued.
 1037      * If avg > max_th the packet is dropped. Otherwise, the packet is
 1038      * dropped with probability P function of avg.
 1039      *
 1040      */
 1041 
 1042     int64_t p_b = 0;
 1043     /* queue in bytes or packets ? */
 1044     u_int q_size = (fs->flags_fs & DN_QSIZE_IS_BYTES) ? q->len_bytes : q->len;
 1045 
 1046     DPRINTF(("\ndummynet: %d q: %2u ", (int) curr_time, q_size));
 1047 
 1048     /* average queue size estimation */
 1049     if (q_size != 0) {
 1050         /*
 1051          * queue is not empty, avg <- avg + (q_size - avg) * w_q
 1052          */
 1053         int diff = SCALE(q_size) - q->avg;
 1054         int64_t v = SCALE_MUL((int64_t) diff, (int64_t) fs->w_q);
 1055 
 1056         q->avg += (int) v;
 1057     } else {
 1058         /*
 1059          * queue is empty, find for how long the queue has been
 1060          * empty and use a lookup table for computing
 1061          * (1 - * w_q)^(idle_time/s) where s is the time to send a
 1062          * (small) packet.
 1063          * XXX check wraps...
 1064          */
 1065         if (q->avg) {
 1066             u_int t = (curr_time - q->q_time) / fs->lookup_step;
 1067 
 1068             q->avg = (t < fs->lookup_depth) ?
 1069                     SCALE_MUL(q->avg, fs->w_q_lookup[t]) : 0;
 1070         }
 1071     }
 1072     DPRINTF(("dummynet: avg: %u ", SCALE_VAL(q->avg)));
 1073 
 1074     /* should i drop ? */
 1075 
 1076     if (q->avg < fs->min_th) {
 1077         q->count = -1;
 1078         return 0; /* accept packet ; */
 1079     }
 1080     if (q->avg >= fs->max_th) { /* average queue >=  max threshold */
 1081         if (fs->flags_fs & DN_IS_GENTLE_RED) {
 1082             /*
 1083              * According to Gentle-RED, if avg is greater than max_th the
 1084              * packet is dropped with a probability
 1085              *  p_b = c_3 * avg - c_4
 1086              * where c_3 = (1 - max_p) / max_th, and c_4 = 1 - 2 * max_p
 1087              */
 1088             p_b = SCALE_MUL((int64_t) fs->c_3, (int64_t) q->avg) - fs->c_4;
 1089         } else {
 1090             q->count = -1;
 1091             DPRINTF(("dummynet: - drop"));
 1092             return 1 ;
 1093         }
 1094     } else if (q->avg > fs->min_th) {
 1095         /*
 1096          * we compute p_b using the linear dropping function p_b = c_1 *
 1097          * avg - c_2, where c_1 = max_p / (max_th - min_th), and c_2 =
 1098          * max_p * min_th / (max_th - min_th)
 1099          */
 1100         p_b = SCALE_MUL((int64_t) fs->c_1, (int64_t) q->avg) - fs->c_2;
 1101     }
 1102     if (fs->flags_fs & DN_QSIZE_IS_BYTES)
 1103         p_b = (p_b * len) / fs->max_pkt_size;
 1104     if (++q->count == 0)
 1105         q->random = random() & 0xffff;
 1106     else {
 1107         /*
 1108          * q->count counts packets arrived since last drop, so a greater
 1109          * value of q->count means a greater packet drop probability.
 1110          */
 1111         if (SCALE_MUL(p_b, SCALE((int64_t) q->count)) > q->random) {
 1112             q->count = 0;
 1113             DPRINTF(("dummynet: - red drop"));
 1114             /* after a drop we calculate a new random value */
 1115             q->random = random() & 0xffff;
 1116             return 1;    /* drop */
 1117         }
 1118     }
 1119     /* end of RED algorithm */
 1120     return 0 ; /* accept */
 1121 }
 1122 
 1123 static __inline struct dn_flow_set *
 1124 locate_flowset(int fs_nr)
 1125 {
 1126         struct dn_flow_set *fs;
 1127 
 1128         SLIST_FOREACH(fs, &flowsethash[HASH(fs_nr)], next)
 1129                 if (fs->fs_nr == fs_nr)
 1130                         return (fs);
 1131 
 1132         return (NULL);
 1133 }
 1134 
 1135 static __inline struct dn_pipe *
 1136 locate_pipe(int pipe_nr)
 1137 {
 1138         struct dn_pipe *pipe;
 1139 
 1140         SLIST_FOREACH(pipe, &pipehash[HASH(pipe_nr)], next)
 1141                 if (pipe->pipe_nr == pipe_nr)
 1142                         return (pipe);
 1143 
 1144         return (NULL);
 1145 }
 1146 
 1147 /*
 1148  * dummynet hook for packets. Below 'pipe' is a pipe or a queue
 1149  * depending on whether WF2Q or fixed bw is used.
 1150  *
 1151  * pipe_nr      pipe or queue the packet is destined for.
 1152  * dir          where shall we send the packet after dummynet.
 1153  * m            the mbuf with the packet
 1154  * ifp          the 'ifp' parameter from the caller.
 1155  *              NULL in ip_input, destination interface in ip_output,
 1156  *              real_dst in bdg_forward
 1157  * rule         matching rule, in case of multiple passes
 1158  *
 1159  */
 1160 static int
 1161 dummynet_io(struct mbuf *m, int dir, struct ip_fw_args *fwa)
 1162 {
 1163     struct mbuf *head = NULL, *tail = NULL;
 1164     struct dn_pkt_tag *pkt;
 1165     struct m_tag *mtag;
 1166     struct dn_flow_set *fs = NULL;
 1167     struct dn_pipe *pipe ;
 1168     u_int64_t len = m->m_pkthdr.len ;
 1169     struct dn_flow_queue *q = NULL ;
 1170     int is_pipe;
 1171     ipfw_insn *cmd = ACTION_PTR(fwa->rule);
 1172 
 1173     KASSERT(m->m_nextpkt == NULL,
 1174         ("dummynet_io: mbuf queue passed to dummynet"));
 1175 
 1176     if (cmd->opcode == O_LOG)
 1177         cmd += F_LEN(cmd);
 1178     is_pipe = (cmd->opcode == O_PIPE);
 1179 
 1180     DUMMYNET_LOCK();
 1181     /*
 1182      * This is a dummynet rule, so we expect an O_PIPE or O_QUEUE rule.
 1183      *
 1184      * XXXGL: probably the pipe->fs and fs->pipe logic here
 1185      * below can be simplified.
 1186      */
 1187     if (is_pipe) {
 1188         pipe = locate_pipe(fwa->cookie);
 1189         if (pipe != NULL)
 1190                 fs = &(pipe->fs);
 1191     } else
 1192         fs = locate_flowset(fwa->cookie);
 1193 
 1194     if (fs == NULL)
 1195         goto dropit;    /* This queue/pipe does not exist! */
 1196     pipe = fs->pipe;
 1197     if (pipe == NULL) { /* Must be a queue, try find a matching pipe. */
 1198         pipe = locate_pipe(fs->parent_nr);
 1199         if (pipe != NULL)
 1200             fs->pipe = pipe;
 1201         else {
 1202             printf("dummynet: no pipe %d for queue %d, drop pkt\n",
 1203                 fs->parent_nr, fs->fs_nr);
 1204             goto dropit ;
 1205         }
 1206     }
 1207     q = find_queue(fs, &(fwa->f_id));
 1208     if ( q == NULL )
 1209         goto dropit ;           /* cannot allocate queue                */
 1210     /*
 1211      * update statistics, then check reasons to drop pkt
 1212      */
 1213     q->tot_bytes += len ;
 1214     q->tot_pkts++ ;
 1215     if ( fs->plr && random() < fs->plr )
 1216         goto dropit ;           /* random pkt drop                      */
 1217     if ( fs->flags_fs & DN_QSIZE_IS_BYTES) {
 1218         if (q->len_bytes > fs->qsize)
 1219             goto dropit ;       /* queue size overflow                  */
 1220     } else {
 1221         if (q->len >= fs->qsize)
 1222             goto dropit ;       /* queue count overflow                 */
 1223     }
 1224     if ( fs->flags_fs & DN_IS_RED && red_drops(fs, q, len) )
 1225         goto dropit ;
 1226 
 1227     /* XXX expensive to zero, see if we can remove it*/
 1228     mtag = m_tag_get(PACKET_TAG_DUMMYNET,
 1229                 sizeof(struct dn_pkt_tag), M_NOWAIT|M_ZERO);
 1230     if ( mtag == NULL )
 1231         goto dropit ;           /* cannot allocate packet header        */
 1232     m_tag_prepend(m, mtag);     /* attach to mbuf chain */
 1233 
 1234     pkt = (struct dn_pkt_tag *)(mtag+1);
 1235     /* ok, i can handle the pkt now... */
 1236     /* build and enqueue packet + parameters */
 1237     pkt->rule = fwa->rule ;
 1238     pkt->dn_dir = dir ;
 1239 
 1240     pkt->ifp = fwa->oif;
 1241 
 1242     if (q->head == NULL)
 1243         q->head = m;
 1244     else
 1245         q->tail->m_nextpkt = m;
 1246     q->tail = m;
 1247     q->len++;
 1248     q->len_bytes += len ;
 1249 
 1250     if ( q->head != m )         /* flow was not idle, we are done */
 1251         goto done;
 1252     /*
 1253      * If we reach this point the flow was previously idle, so we need
 1254      * to schedule it. This involves different actions for fixed-rate or
 1255      * WF2Q queues.
 1256      */
 1257     if (is_pipe) {
 1258         /*
 1259          * Fixed-rate queue: just insert into the ready_heap.
 1260          */
 1261         dn_key t = 0 ;
 1262         if (pipe->bandwidth)
 1263             t = SET_TICKS(m, q, pipe);
 1264         q->sched_time = curr_time ;
 1265         if (t == 0)     /* must process it now */
 1266             ready_event(q, &head, &tail);
 1267         else
 1268             heap_insert(&ready_heap, curr_time + t , q );
 1269     } else {
 1270         /*
 1271          * WF2Q. First, compute start time S: if the flow was idle (S=F+1)
 1272          * set S to the virtual time V for the controlling pipe, and update
 1273          * the sum of weights for the pipe; otherwise, remove flow from
 1274          * idle_heap and set S to max(F,V).
 1275          * Second, compute finish time F = S + len/weight.
 1276          * Third, if pipe was idle, update V=max(S, V).
 1277          * Fourth, count one more backlogged flow.
 1278          */
 1279         if (DN_KEY_GT(q->S, q->F)) { /* means timestamps are invalid */
 1280             q->S = pipe->V ;
 1281             pipe->sum += fs->weight ; /* add weight of new queue */
 1282         } else {
 1283             heap_extract(&(pipe->idle_heap), q);
 1284             q->S = MAX64(q->F, pipe->V ) ;
 1285         }
 1286         q->F = q->S + ( len<<MY_M )/(u_int64_t) fs->weight;
 1287 
 1288         if (pipe->not_eligible_heap.elements == 0 &&
 1289                 pipe->scheduler_heap.elements == 0)
 1290             pipe->V = MAX64 ( q->S, pipe->V );
 1291         fs->backlogged++ ;
 1292         /*
 1293          * Look at eligibility. A flow is not eligibile if S>V (when
 1294          * this happens, it means that there is some other flow already
 1295          * scheduled for the same pipe, so the scheduler_heap cannot be
 1296          * empty). If the flow is not eligible we just store it in the
 1297          * not_eligible_heap. Otherwise, we store in the scheduler_heap
 1298          * and possibly invoke ready_event_wfq() right now if there is
 1299          * leftover credit.
 1300          * Note that for all flows in scheduler_heap (SCH), S_i <= V,
 1301          * and for all flows in not_eligible_heap (NEH), S_i > V .
 1302          * So when we need to compute max( V, min(S_i) ) forall i in SCH+NEH,
 1303          * we only need to look into NEH.
 1304          */
 1305         if (DN_KEY_GT(q->S, pipe->V) ) { /* not eligible */
 1306             if (pipe->scheduler_heap.elements == 0)
 1307                 printf("dummynet: ++ ouch! not eligible but empty scheduler!\n");
 1308             heap_insert(&(pipe->not_eligible_heap), q->S, q);
 1309         } else {
 1310             heap_insert(&(pipe->scheduler_heap), q->F, q);
 1311             if (pipe->numbytes >= 0) { /* pipe is idle */
 1312                 if (pipe->scheduler_heap.elements != 1)
 1313                     printf("dummynet: OUCH! pipe should have been idle!\n");
 1314                 DPRINTF(("dummynet: waking up pipe %d at %d\n",
 1315                         pipe->pipe_nr, (int)(q->F >> MY_M)));
 1316                 pipe->sched_time = curr_time ;
 1317                 ready_event_wfq(pipe, &head, &tail);
 1318             }
 1319         }
 1320     }
 1321 done:
 1322     DUMMYNET_UNLOCK();
 1323     if (head != NULL)
 1324         dummynet_send(head);
 1325     return 0;
 1326 
 1327 dropit:
 1328     if (q)
 1329         q->drops++ ;
 1330     DUMMYNET_UNLOCK();
 1331     m_freem(m);
 1332     return ( (fs && (fs->flags_fs & DN_NOERROR)) ? 0 : ENOBUFS);
 1333 }
 1334 
 1335 /*
 1336  * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT)
 1337  * Doing this would probably save us the initial bzero of dn_pkt
 1338  */
 1339 #define DN_FREE_PKT(_m) do {                            \
 1340         m_freem(_m);                                    \
 1341 } while (0)
 1342 
 1343 /*
 1344  * Dispose all packets and flow_queues on a flow_set.
 1345  * If all=1, also remove red lookup table and other storage,
 1346  * including the descriptor itself.
 1347  * For the one in dn_pipe MUST also cleanup ready_heap...
 1348  */
 1349 static void
 1350 purge_flow_set(struct dn_flow_set *fs, int all)
 1351 {
 1352     struct dn_flow_queue *q, *qn ;
 1353     int i ;
 1354 
 1355     DUMMYNET_LOCK_ASSERT();
 1356 
 1357     for (i = 0 ; i <= fs->rq_size ; i++ ) {
 1358         for (q = fs->rq[i] ; q ; q = qn ) {
 1359             struct mbuf *m, *mnext;
 1360 
 1361             mnext = q->head;
 1362             while ((m = mnext) != NULL) {
 1363                 mnext = m->m_nextpkt;
 1364                 DN_FREE_PKT(m);
 1365             }
 1366             qn = q->next ;
 1367             free(q, M_DUMMYNET);
 1368         }
 1369         fs->rq[i] = NULL ;
 1370     }
 1371     fs->rq_elements = 0 ;
 1372     if (all) {
 1373         /* RED - free lookup table */
 1374         if (fs->w_q_lookup)
 1375             free(fs->w_q_lookup, M_DUMMYNET);
 1376         if (fs->rq)
 1377             free(fs->rq, M_DUMMYNET);
 1378         /* if this fs is not part of a pipe, free it */
 1379         if (fs->pipe && fs != &(fs->pipe->fs) )
 1380             free(fs, M_DUMMYNET);
 1381     }
 1382 }
 1383 
 1384 /*
 1385  * Dispose all packets queued on a pipe (not a flow_set).
 1386  * Also free all resources associated to a pipe, which is about
 1387  * to be deleted.
 1388  */
 1389 static void
 1390 purge_pipe(struct dn_pipe *pipe)
 1391 {
 1392     struct mbuf *m, *mnext;
 1393 
 1394     purge_flow_set( &(pipe->fs), 1 );
 1395 
 1396     mnext = pipe->head;
 1397     while ((m = mnext) != NULL) {
 1398         mnext = m->m_nextpkt;
 1399         DN_FREE_PKT(m);
 1400     }
 1401 
 1402     heap_free( &(pipe->scheduler_heap) );
 1403     heap_free( &(pipe->not_eligible_heap) );
 1404     heap_free( &(pipe->idle_heap) );
 1405 }
 1406 
 1407 /*
 1408  * Delete all pipes and heaps returning memory. Must also
 1409  * remove references from all ipfw rules to all pipes.
 1410  */
 1411 static void
 1412 dummynet_flush(void)
 1413 {
 1414         struct dn_pipe *pipe, *pipe1;
 1415         struct dn_flow_set *fs, *fs1;
 1416         int i;
 1417 
 1418         DUMMYNET_LOCK();
 1419         /* Free heaps so we don't have unwanted events. */
 1420         heap_free(&ready_heap);
 1421         heap_free(&wfq_ready_heap);
 1422         heap_free(&extract_heap);
 1423 
 1424         /*
 1425          * Now purge all queued pkts and delete all pipes.
 1426          *
 1427          * XXXGL: can we merge the for(;;) cycles into one or not?
 1428          */
 1429         for (i = 0; i < HASHSIZE; i++)
 1430                 SLIST_FOREACH_SAFE(fs, &flowsethash[i], next, fs1) {
 1431                         SLIST_REMOVE(&flowsethash[i], fs, dn_flow_set, next);
 1432                         purge_flow_set(fs, 1);
 1433                 }
 1434         for (i = 0; i < HASHSIZE; i++)
 1435                 SLIST_FOREACH_SAFE(pipe, &pipehash[i], next, pipe1) {
 1436                         SLIST_REMOVE(&pipehash[i], pipe, dn_pipe, next);
 1437                         purge_pipe(pipe);
 1438                         free(pipe, M_DUMMYNET);
 1439                 }
 1440         DUMMYNET_UNLOCK();
 1441 }
 1442 
 1443 extern struct ip_fw *ip_fw_default_rule ;
 1444 static void
 1445 dn_rule_delete_fs(struct dn_flow_set *fs, void *r)
 1446 {
 1447     int i ;
 1448     struct dn_flow_queue *q ;
 1449     struct mbuf *m ;
 1450 
 1451     for (i = 0 ; i <= fs->rq_size ; i++) /* last one is ovflow */
 1452         for (q = fs->rq[i] ; q ; q = q->next )
 1453             for (m = q->head ; m ; m = m->m_nextpkt ) {
 1454                 struct dn_pkt_tag *pkt = dn_tag_get(m) ;
 1455                 if (pkt->rule == r)
 1456                     pkt->rule = ip_fw_default_rule ;
 1457             }
 1458 }
 1459 /*
 1460  * when a firewall rule is deleted, scan all queues and remove the flow-id
 1461  * from packets matching this rule.
 1462  */
 1463 void
 1464 dn_rule_delete(void *r)
 1465 {
 1466     struct dn_pipe *pipe;
 1467     struct dn_flow_set *fs;
 1468     struct dn_pkt_tag *pkt;
 1469     struct mbuf *m;
 1470     int i;
 1471 
 1472     DUMMYNET_LOCK();
 1473     /*
 1474      * If the rule references a queue (dn_flow_set), then scan
 1475      * the flow set, otherwise scan pipes. Should do either, but doing
 1476      * both does not harm.
 1477      */
 1478     for (i = 0; i < HASHSIZE; i++)
 1479         SLIST_FOREACH(fs, &flowsethash[i], next)
 1480                 dn_rule_delete_fs(fs, r);
 1481 
 1482     for (i = 0; i < HASHSIZE; i++)
 1483         SLIST_FOREACH(pipe, &pipehash[i], next) {
 1484                 fs = &(pipe->fs);
 1485                 dn_rule_delete_fs(fs, r);
 1486                 for (m = pipe->head ; m ; m = m->m_nextpkt ) {
 1487                         pkt = dn_tag_get(m);
 1488                         if (pkt->rule == r)
 1489                                 pkt->rule = ip_fw_default_rule;
 1490                 }
 1491         }
 1492     DUMMYNET_UNLOCK();
 1493 }
 1494 
 1495 /*
 1496  * setup RED parameters
 1497  */
 1498 static int
 1499 config_red(struct dn_flow_set *p, struct dn_flow_set * x)
 1500 {
 1501     int i;
 1502 
 1503     x->w_q = p->w_q;
 1504     x->min_th = SCALE(p->min_th);
 1505     x->max_th = SCALE(p->max_th);
 1506     x->max_p = p->max_p;
 1507 
 1508     x->c_1 = p->max_p / (p->max_th - p->min_th);
 1509     x->c_2 = SCALE_MUL(x->c_1, SCALE(p->min_th));
 1510     if (x->flags_fs & DN_IS_GENTLE_RED) {
 1511         x->c_3 = (SCALE(1) - p->max_p) / p->max_th;
 1512         x->c_4 = (SCALE(1) - 2 * p->max_p);
 1513     }
 1514 
 1515     /* if the lookup table already exist, free and create it again */
 1516     if (x->w_q_lookup) {
 1517         free(x->w_q_lookup, M_DUMMYNET);
 1518         x->w_q_lookup = NULL ;
 1519     }
 1520     if (red_lookup_depth == 0) {
 1521         printf("\ndummynet: net.inet.ip.dummynet.red_lookup_depth must be > 0\n");
 1522         free(x, M_DUMMYNET);
 1523         return EINVAL;
 1524     }
 1525     x->lookup_depth = red_lookup_depth;
 1526     x->w_q_lookup = (u_int *) malloc(x->lookup_depth * sizeof(int),
 1527             M_DUMMYNET, M_NOWAIT);
 1528     if (x->w_q_lookup == NULL) {
 1529         printf("dummynet: sorry, cannot allocate red lookup table\n");
 1530         free(x, M_DUMMYNET);
 1531         return ENOSPC;
 1532     }
 1533 
 1534     /* fill the lookup table with (1 - w_q)^x */
 1535     x->lookup_step = p->lookup_step ;
 1536     x->lookup_weight = p->lookup_weight ;
 1537     x->w_q_lookup[0] = SCALE(1) - x->w_q;
 1538     for (i = 1; i < x->lookup_depth; i++)
 1539         x->w_q_lookup[i] = SCALE_MUL(x->w_q_lookup[i - 1], x->lookup_weight);
 1540     if (red_avg_pkt_size < 1)
 1541         red_avg_pkt_size = 512 ;
 1542     x->avg_pkt_size = red_avg_pkt_size ;
 1543     if (red_max_pkt_size < 1)
 1544         red_max_pkt_size = 1500 ;
 1545     x->max_pkt_size = red_max_pkt_size ;
 1546     return 0 ;
 1547 }
 1548 
 1549 static int
 1550 alloc_hash(struct dn_flow_set *x, struct dn_flow_set *pfs)
 1551 {
 1552     if (x->flags_fs & DN_HAVE_FLOW_MASK) {     /* allocate some slots */
 1553         int l = pfs->rq_size;
 1554 
 1555         if (l == 0)
 1556             l = dn_hash_size;
 1557         if (l < 4)
 1558             l = 4;
 1559         else if (l > DN_MAX_HASH_SIZE)
 1560             l = DN_MAX_HASH_SIZE;
 1561         x->rq_size = l;
 1562     } else                  /* one is enough for null mask */
 1563         x->rq_size = 1;
 1564     x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *),
 1565             M_DUMMYNET, M_NOWAIT | M_ZERO);
 1566     if (x->rq == NULL) {
 1567         printf("dummynet: sorry, cannot allocate queue\n");
 1568         return (ENOMEM);
 1569     }
 1570     x->rq_elements = 0;
 1571     return 0 ;
 1572 }
 1573 
 1574 static void
 1575 set_fs_parms(struct dn_flow_set *x, struct dn_flow_set *src)
 1576 {
 1577     x->flags_fs = src->flags_fs;
 1578     x->qsize = src->qsize;
 1579     x->plr = src->plr;
 1580     x->flow_mask = src->flow_mask;
 1581     if (x->flags_fs & DN_QSIZE_IS_BYTES) {
 1582         if (x->qsize > 1024*1024)
 1583             x->qsize = 1024*1024 ;
 1584     } else {
 1585         if (x->qsize == 0)
 1586             x->qsize = 50 ;
 1587         if (x->qsize > 100)
 1588             x->qsize = 50 ;
 1589     }
 1590     /* configuring RED */
 1591     if ( x->flags_fs & DN_IS_RED )
 1592         config_red(src, x) ;    /* XXX should check errors */
 1593 }
 1594 
 1595 /*
 1596  * setup pipe or queue parameters.
 1597  */
 1598 
 1599 static int
 1600 config_pipe(struct dn_pipe *p)
 1601 {
 1602     struct dn_flow_set *pfs = &(p->fs);
 1603     struct dn_flow_queue *q;
 1604     int i, error;
 1605 
 1606     /*
 1607      * The config program passes parameters as follows:
 1608      * bw = bits/second (0 means no limits),
 1609      * delay = ms, must be translated into ticks.
 1610      * qsize = slots/bytes
 1611      */
 1612     p->delay = ( p->delay * hz ) / 1000 ;
 1613     /* We need either a pipe number or a flow_set number */
 1614     if (p->pipe_nr == 0 && pfs->fs_nr == 0)
 1615         return EINVAL ;
 1616     if (p->pipe_nr != 0 && pfs->fs_nr != 0)
 1617         return EINVAL ;
 1618     if (p->pipe_nr != 0) { /* this is a pipe */
 1619         struct dn_pipe *pipe;
 1620 
 1621         DUMMYNET_LOCK();
 1622         pipe = locate_pipe(p->pipe_nr); /* locate pipe */
 1623 
 1624         if (pipe == NULL) {     /* new pipe */
 1625             pipe = malloc(sizeof(struct dn_pipe), M_DUMMYNET,
 1626                 M_NOWAIT | M_ZERO);
 1627             if (pipe == NULL) {
 1628                 DUMMYNET_UNLOCK();
 1629                 printf("dummynet: no memory for new pipe\n");
 1630                 return (ENOMEM);
 1631             }
 1632             pipe->pipe_nr = p->pipe_nr;
 1633             pipe->fs.pipe = pipe ;
 1634             /* idle_heap is the only one from which we extract from the middle.
 1635              */
 1636             pipe->idle_heap.size = pipe->idle_heap.elements = 0 ;
 1637             pipe->idle_heap.offset=OFFSET_OF(struct dn_flow_queue, heap_pos);
 1638         } else
 1639             /* Flush accumulated credit for all queues */
 1640             for (i = 0; i <= pipe->fs.rq_size; i++)
 1641                 for (q = pipe->fs.rq[i]; q; q = q->next)
 1642                     q->numbytes = 0;
 1643 
 1644         pipe->bandwidth = p->bandwidth ;
 1645         pipe->numbytes = 0; /* just in case... */
 1646         bcopy(p->if_name, pipe->if_name, sizeof(p->if_name) );
 1647         pipe->ifp = NULL ; /* reset interface ptr */
 1648         pipe->delay = p->delay ;
 1649         set_fs_parms(&(pipe->fs), pfs);
 1650 
 1651 
 1652         if (pipe->fs.rq == NULL) {      /* a new pipe */
 1653             error = alloc_hash(&(pipe->fs), pfs);
 1654             if (error) {
 1655                 DUMMYNET_UNLOCK();
 1656                 free(pipe, M_DUMMYNET);
 1657                 return (error);
 1658             }
 1659             SLIST_INSERT_HEAD(&pipehash[HASH(pipe->pipe_nr)], pipe, next);
 1660         }
 1661         DUMMYNET_UNLOCK();
 1662     } else { /* config queue */
 1663         struct dn_flow_set *fs;
 1664 
 1665         DUMMYNET_LOCK();
 1666         fs = locate_flowset(pfs->fs_nr); /* locate flow_set */
 1667 
 1668         if (fs == NULL) {       /* new */
 1669             if (pfs->parent_nr == 0) {  /* need link to a pipe */
 1670                 DUMMYNET_UNLOCK();
 1671                 return EINVAL ;
 1672             }
 1673             fs = malloc(sizeof(struct dn_flow_set), M_DUMMYNET,
 1674                 M_NOWAIT|M_ZERO);
 1675             if (fs == NULL) {
 1676                 DUMMYNET_UNLOCK();
 1677                 printf("dummynet: no memory for new flow_set\n");
 1678                 return (ENOMEM);
 1679             }
 1680             fs->fs_nr = pfs->fs_nr;
 1681             fs->parent_nr = pfs->parent_nr;
 1682             fs->weight = pfs->weight;
 1683             if (fs->weight == 0)
 1684                 fs->weight = 1;
 1685             else if (fs->weight > 100)
 1686                 fs->weight = 100;
 1687         } else {
 1688             /* Change parent pipe not allowed; must delete and recreate */
 1689             if (pfs->parent_nr != 0 && fs->parent_nr != pfs->parent_nr) {
 1690                 DUMMYNET_UNLOCK();
 1691                 return EINVAL ;
 1692             }
 1693         }
 1694         set_fs_parms(fs, pfs);
 1695 
 1696         if (fs->rq == NULL) {   /* a new flow_set */
 1697             error = alloc_hash(fs, pfs);
 1698             if (error) {
 1699                 DUMMYNET_UNLOCK();
 1700                 free(fs, M_DUMMYNET);
 1701                 return (error);
 1702             }
 1703             SLIST_INSERT_HEAD(&flowsethash[HASH(fs->fs_nr)], fs, next);
 1704         }
 1705         DUMMYNET_UNLOCK();
 1706     }
 1707     return 0 ;
 1708 }
 1709 
 1710 /*
 1711  * Helper function to remove from a heap queues which are linked to
 1712  * a flow_set about to be deleted.
 1713  */
 1714 static void
 1715 fs_remove_from_heap(struct dn_heap *h, struct dn_flow_set *fs)
 1716 {
 1717     int i = 0, found = 0 ;
 1718     for (; i < h->elements ;)
 1719         if ( ((struct dn_flow_queue *)h->p[i].object)->fs == fs) {
 1720             h->elements-- ;
 1721             h->p[i] = h->p[h->elements] ;
 1722             found++ ;
 1723         } else
 1724             i++ ;
 1725     if (found)
 1726         heapify(h);
 1727 }
 1728 
 1729 /*
 1730  * helper function to remove a pipe from a heap (can be there at most once)
 1731  */
 1732 static void
 1733 pipe_remove_from_heap(struct dn_heap *h, struct dn_pipe *p)
 1734 {
 1735     if (h->elements > 0) {
 1736         int i = 0 ;
 1737         for (i=0; i < h->elements ; i++ ) {
 1738             if (h->p[i].object == p) { /* found it */
 1739                 h->elements-- ;
 1740                 h->p[i] = h->p[h->elements] ;
 1741                 heapify(h);
 1742                 break ;
 1743             }
 1744         }
 1745     }
 1746 }
 1747 
 1748 /*
 1749  * drain all queues. Called in case of severe mbuf shortage.
 1750  */
 1751 void
 1752 dummynet_drain()
 1753 {
 1754     struct dn_flow_set *fs;
 1755     struct dn_pipe *pipe;
 1756     struct mbuf *m, *mnext;
 1757     int i;
 1758 
 1759     DUMMYNET_LOCK_ASSERT();
 1760 
 1761     heap_free(&ready_heap);
 1762     heap_free(&wfq_ready_heap);
 1763     heap_free(&extract_heap);
 1764     /* remove all references to this pipe from flow_sets */
 1765     for (i = 0; i < HASHSIZE; i++)
 1766         SLIST_FOREACH(fs, &flowsethash[i], next)
 1767                 purge_flow_set(fs, 0);
 1768 
 1769     for (i = 0; i < HASHSIZE; i++) {
 1770         SLIST_FOREACH(pipe, &pipehash[i], next) {
 1771                 purge_flow_set(&(pipe->fs), 0);
 1772 
 1773                 mnext = pipe->head;
 1774                 while ((m = mnext) != NULL) {
 1775                         mnext = m->m_nextpkt;
 1776                         DN_FREE_PKT(m);
 1777                 }
 1778                 pipe->head = pipe->tail = NULL;
 1779         }
 1780     }
 1781 }
 1782 
 1783 /*
 1784  * Fully delete a pipe or a queue, cleaning up associated info.
 1785  */
 1786 static int
 1787 delete_pipe(struct dn_pipe *p)
 1788 {
 1789     if (p->pipe_nr == 0 && p->fs.fs_nr == 0)
 1790         return EINVAL ;
 1791     if (p->pipe_nr != 0 && p->fs.fs_nr != 0)
 1792         return EINVAL ;
 1793     if (p->pipe_nr != 0) { /* this is an old-style pipe */
 1794         struct dn_pipe *pipe;
 1795         struct dn_flow_set *fs;
 1796         int i;
 1797 
 1798         DUMMYNET_LOCK();
 1799         pipe = locate_pipe(p->pipe_nr); /* locate pipe */
 1800 
 1801         if (pipe == NULL) {
 1802             DUMMYNET_UNLOCK();
 1803             return (ENOENT);    /* not found */
 1804         }
 1805 
 1806         /* Unlink from list of pipes. */
 1807         SLIST_REMOVE(&pipehash[HASH(pipe->pipe_nr)], pipe, dn_pipe, next);
 1808 
 1809         /* Remove all references to this pipe from flow_sets. */
 1810         for (i = 0; i < HASHSIZE; i++)
 1811             SLIST_FOREACH(fs, &flowsethash[i], next)
 1812                 if (fs->pipe == pipe) {
 1813                         printf("dummynet: ++ ref to pipe %d from fs %d\n",
 1814                             p->pipe_nr, fs->fs_nr);
 1815                         fs->pipe = NULL ;
 1816                         purge_flow_set(fs, 0);
 1817                 }
 1818         fs_remove_from_heap(&ready_heap, &(pipe->fs));
 1819         purge_pipe(pipe); /* remove all data associated to this pipe */
 1820         /* remove reference to here from extract_heap and wfq_ready_heap */
 1821         pipe_remove_from_heap(&extract_heap, pipe);
 1822         pipe_remove_from_heap(&wfq_ready_heap, pipe);
 1823         DUMMYNET_UNLOCK();
 1824 
 1825         free(pipe, M_DUMMYNET);
 1826     } else { /* this is a WF2Q queue (dn_flow_set) */
 1827         struct dn_flow_set *fs;
 1828 
 1829         DUMMYNET_LOCK();
 1830         fs = locate_flowset(p->fs.fs_nr); /* locate set */
 1831 
 1832         if (fs == NULL) {
 1833             DUMMYNET_UNLOCK();
 1834             return (ENOENT); /* not found */
 1835         }
 1836 
 1837         /* Unlink from list of flowsets. */
 1838         SLIST_REMOVE( &flowsethash[HASH(fs->fs_nr)], fs, dn_flow_set, next);
 1839 
 1840         if (fs->pipe != NULL) {
 1841             /* Update total weight on parent pipe and cleanup parent heaps. */
 1842             fs->pipe->sum -= fs->weight * fs->backlogged ;
 1843             fs_remove_from_heap(&(fs->pipe->not_eligible_heap), fs);
 1844             fs_remove_from_heap(&(fs->pipe->scheduler_heap), fs);
 1845 #if 1   /* XXX should i remove from idle_heap as well ? */
 1846             fs_remove_from_heap(&(fs->pipe->idle_heap), fs);
 1847 #endif
 1848         }
 1849         purge_flow_set(fs, 1);
 1850         DUMMYNET_UNLOCK();
 1851     }
 1852     return 0 ;
 1853 }
 1854 
 1855 /*
 1856  * helper function used to copy data from kernel in DUMMYNET_GET
 1857  */
 1858 static char *
 1859 dn_copy_set(struct dn_flow_set *set, char *bp)
 1860 {
 1861     int i, copied = 0 ;
 1862     struct dn_flow_queue *q, *qp = (struct dn_flow_queue *)bp;
 1863 
 1864     DUMMYNET_LOCK_ASSERT();
 1865 
 1866     for (i = 0 ; i <= set->rq_size ; i++)
 1867         for (q = set->rq[i] ; q ; q = q->next, qp++ ) {
 1868             if (q->hash_slot != i)
 1869                 printf("dummynet: ++ at %d: wrong slot (have %d, "
 1870                     "should be %d)\n", copied, q->hash_slot, i);
 1871             if (q->fs != set)
 1872                 printf("dummynet: ++ at %d: wrong fs ptr (have %p, should be %p)\n",
 1873                         i, q->fs, set);
 1874             copied++ ;
 1875             bcopy(q, qp, sizeof( *q ) );
 1876             /* cleanup pointers */
 1877             qp->next = NULL ;
 1878             qp->head = qp->tail = NULL ;
 1879             qp->fs = NULL ;
 1880         }
 1881     if (copied != set->rq_elements)
 1882         printf("dummynet: ++ wrong count, have %d should be %d\n",
 1883             copied, set->rq_elements);
 1884     return (char *)qp ;
 1885 }
 1886 
 1887 static size_t
 1888 dn_calc_size(void)
 1889 {
 1890     struct dn_flow_set *fs;
 1891     struct dn_pipe *pipe;
 1892     size_t size = 0;
 1893     int i;
 1894 
 1895     DUMMYNET_LOCK_ASSERT();
 1896     /*
 1897      * Compute size of data structures: list of pipes and flow_sets.
 1898      */
 1899     for (i = 0; i < HASHSIZE; i++) {
 1900         SLIST_FOREACH(pipe, &pipehash[i], next)
 1901                 size += sizeof(*pipe) +
 1902                     pipe->fs.rq_elements * sizeof(struct dn_flow_queue);
 1903         SLIST_FOREACH(fs, &flowsethash[i], next)
 1904                 size += sizeof (*fs) +
 1905                     fs->rq_elements * sizeof(struct dn_flow_queue);
 1906     }
 1907     return size;
 1908 }
 1909 
 1910 static int
 1911 dummynet_get(struct sockopt *sopt)
 1912 {
 1913     char *buf, *bp ; /* bp is the "copy-pointer" */
 1914     size_t size ;
 1915     struct dn_flow_set *fs;
 1916     struct dn_pipe *pipe;
 1917     int error=0, i ;
 1918 
 1919     /* XXX lock held too long */
 1920     DUMMYNET_LOCK();
 1921     /*
 1922      * XXX: Ugly, but we need to allocate memory with M_WAITOK flag and we
 1923      *      cannot use this flag while holding a mutex.
 1924      */
 1925     for (i = 0; i < 10; i++) {
 1926         size = dn_calc_size();
 1927         DUMMYNET_UNLOCK();
 1928         buf = malloc(size, M_TEMP, M_WAITOK);
 1929         DUMMYNET_LOCK();
 1930         if (size == dn_calc_size())
 1931                 break;
 1932         free(buf, M_TEMP);
 1933         buf = NULL;
 1934     }
 1935     if (buf == NULL) {
 1936         DUMMYNET_UNLOCK();
 1937         return ENOBUFS ;
 1938     }
 1939     bp = buf;
 1940     for (i = 0; i < HASHSIZE; i++)
 1941         SLIST_FOREACH(pipe, &pipehash[i], next) {
 1942                 struct dn_pipe *pipe_bp = (struct dn_pipe *)bp;
 1943 
 1944                 /*
 1945                  * Copy pipe descriptor into *bp, convert delay back to ms,
 1946                  * then copy the flow_set descriptor(s) one at a time.
 1947                  * After each flow_set, copy the queue descriptor it owns.
 1948                  */
 1949                 bcopy(pipe, bp, sizeof(*pipe));
 1950                 pipe_bp->delay = (pipe_bp->delay * 1000) / hz;
 1951                 /*
 1952                  * XXX the following is a hack based on ->next being the
 1953                  * first field in dn_pipe and dn_flow_set. The correct
 1954                  * solution would be to move the dn_flow_set to the beginning
 1955                  * of struct dn_pipe.
 1956                  */
 1957                 pipe_bp->next.sle_next = (struct dn_pipe *)DN_IS_PIPE;
 1958                 /* Clean pointers. */
 1959                 pipe_bp->head = pipe_bp->tail = NULL;
 1960                 pipe_bp->fs.next.sle_next = NULL;
 1961                 pipe_bp->fs.pipe = NULL;
 1962                 pipe_bp->fs.rq = NULL;
 1963 
 1964                 bp += sizeof(*pipe) ;
 1965                 bp = dn_copy_set(&(pipe->fs), bp);
 1966         }
 1967 
 1968     for (i = 0; i < HASHSIZE; i++)
 1969         SLIST_FOREACH(fs, &flowsethash[i], next) {
 1970                 struct dn_flow_set *fs_bp = (struct dn_flow_set *)bp;
 1971 
 1972                 bcopy(fs, bp, sizeof(*fs));
 1973                 /* XXX same hack as above */
 1974                 fs_bp->next.sle_next = (struct dn_flow_set *)DN_IS_QUEUE;
 1975                 fs_bp->pipe = NULL;
 1976                 fs_bp->rq = NULL;
 1977                 bp += sizeof(*fs);
 1978                 bp = dn_copy_set(fs, bp);
 1979         }
 1980 
 1981     DUMMYNET_UNLOCK();
 1982 
 1983     error = sooptcopyout(sopt, buf, size);
 1984     free(buf, M_TEMP);
 1985     return error ;
 1986 }
 1987 
 1988 /*
 1989  * Handler for the various dummynet socket options (get, flush, config, del)
 1990  */
 1991 static int
 1992 ip_dn_ctl(struct sockopt *sopt)
 1993 {
 1994     int error = 0 ;
 1995     struct dn_pipe *p, tmp_pipe;
 1996 
 1997     /* Disallow sets in really-really secure mode. */
 1998     if (sopt->sopt_dir == SOPT_SET) {
 1999 #if __FreeBSD_version >= 500034
 2000         error =  securelevel_ge(sopt->sopt_td->td_ucred, 3);
 2001         if (error)
 2002             return (error);
 2003 #else
 2004         if (securelevel >= 3)
 2005             return (EPERM);
 2006 #endif
 2007     }
 2008 
 2009     switch (sopt->sopt_name) {
 2010     default :
 2011         printf("dummynet: -- unknown option %d", sopt->sopt_name);
 2012         return EINVAL ;
 2013 
 2014     case IP_DUMMYNET_GET :
 2015         error = dummynet_get(sopt);
 2016         break ;
 2017 
 2018     case IP_DUMMYNET_FLUSH :
 2019         dummynet_flush() ;
 2020         break ;
 2021 
 2022     case IP_DUMMYNET_CONFIGURE :
 2023         p = &tmp_pipe ;
 2024         error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
 2025         if (error)
 2026             break ;
 2027         error = config_pipe(p);
 2028         break ;
 2029 
 2030     case IP_DUMMYNET_DEL :      /* remove a pipe or queue */
 2031         p = &tmp_pipe ;
 2032         error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
 2033         if (error)
 2034             break ;
 2035 
 2036         error = delete_pipe(p);
 2037         break ;
 2038     }
 2039     return error ;
 2040 }
 2041 
 2042 static void
 2043 ip_dn_init(void)
 2044 {
 2045     int i;
 2046 
 2047     if (bootverbose)
 2048             printf("DUMMYNET with IPv6 initialized (040826)\n");
 2049 
 2050     DUMMYNET_LOCK_INIT();
 2051 
 2052     for (i = 0; i < HASHSIZE; i++) {
 2053         SLIST_INIT(&pipehash[i]);
 2054         SLIST_INIT(&flowsethash[i]);
 2055     }
 2056     ready_heap.size = ready_heap.elements = 0 ;
 2057     ready_heap.offset = 0 ;
 2058 
 2059     wfq_ready_heap.size = wfq_ready_heap.elements = 0 ;
 2060     wfq_ready_heap.offset = 0 ;
 2061 
 2062     extract_heap.size = extract_heap.elements = 0 ;
 2063     extract_heap.offset = 0 ;
 2064 
 2065     ip_dn_ctl_ptr = ip_dn_ctl;
 2066     ip_dn_io_ptr = dummynet_io;
 2067     ip_dn_ruledel_ptr = dn_rule_delete;
 2068 
 2069     callout_init(&dn_timeout, NET_CALLOUT_MPSAFE);
 2070     callout_reset(&dn_timeout, 1, dummynet, NULL);
 2071 }
 2072 
 2073 #ifdef KLD_MODULE
 2074 static void
 2075 ip_dn_destroy(void)
 2076 {
 2077     ip_dn_ctl_ptr = NULL;
 2078     ip_dn_io_ptr = NULL;
 2079     ip_dn_ruledel_ptr = NULL;
 2080 
 2081     callout_stop(&dn_timeout);
 2082     dummynet_flush();
 2083 
 2084     DUMMYNET_LOCK_DESTROY();
 2085 }
 2086 #endif /* KLD_MODULE */
 2087 
 2088 static int
 2089 dummynet_modevent(module_t mod, int type, void *data)
 2090 {
 2091         switch (type) {
 2092         case MOD_LOAD:
 2093                 if (DUMMYNET_LOADED) {
 2094                     printf("DUMMYNET already loaded\n");
 2095                     return EEXIST ;
 2096                 }
 2097                 ip_dn_init();
 2098                 break;
 2099 
 2100         case MOD_UNLOAD:
 2101 #if !defined(KLD_MODULE)
 2102                 printf("dummynet statically compiled, cannot unload\n");
 2103                 return EINVAL ;
 2104 #else
 2105                 ip_dn_destroy();
 2106 #endif
 2107                 break ;
 2108         default:
 2109                 return EOPNOTSUPP;
 2110                 break ;
 2111         }
 2112         return 0 ;
 2113 }
 2114 
 2115 static moduledata_t dummynet_mod = {
 2116         "dummynet",
 2117         dummynet_modevent,
 2118         NULL
 2119 };
 2120 DECLARE_MODULE(dummynet, dummynet_mod, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY);
 2121 MODULE_DEPEND(dummynet, ipfw, 2, 2, 2);
 2122 MODULE_VERSION(dummynet, 1);

Cache object: ea8e38f4499f21216915f411806fc3b4


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.