The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netinet/ip_dummynet.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1998-2000 Luigi Rizzo, Universita` di Pisa
    3  * Portions Copyright (c) 2000 Akamba Corp.
    4  * All rights reserved
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  *
   27  * $FreeBSD$
   28  */
   29 
   30 #define DEB(x)
   31 #define DDB(x)  x
   32 
   33 /*
   34  * This module implements IP dummynet, a bandwidth limiter/delay emulator
   35  * used in conjunction with the ipfw package.
   36  *
   37  * Most important Changes:
   38  *
   39  * 000106: large rewrite, use heaps to handle very many pipes.
   40  * 980513:      initial release
   41  *
   42  * include files marked with XXX are probably not needed
   43  */
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/malloc.h>
   48 #include <sys/mbuf.h>
   49 #include <sys/queue.h>                  /* XXX */
   50 #include <sys/kernel.h>
   51 #include <sys/socket.h>
   52 #include <sys/socketvar.h>
   53 #include <sys/time.h>
   54 #include <sys/sysctl.h>
   55 #include <net/if.h>
   56 #include <net/route.h>
   57 #include <netinet/in.h>
   58 #include <netinet/in_systm.h>
   59 #include <netinet/in_var.h>
   60 #include <netinet/ip.h>
   61 #include <netinet/ip_fw.h>
   62 #include <netinet/ip_dummynet.h>
   63 #include <netinet/ip_var.h>
   64 
   65 #include "opt_bdg.h"
   66 #ifdef BRIDGE
   67 #include <netinet/if_ether.h> /* for struct arpcom */
   68 #include <net/bridge.h>
   69 #endif
   70 
   71 /*
   72  * we keep a private variable for the simulation time, but probably
   73  * it would be better to use the already existing one "softticks"
   74  * (in sys/kern/kern_timer.c)
   75  */
   76 static dn_key curr_time = 0 ; /* current simulation time */
   77 
   78 static int dn_hash_size = 64 ;  /* default hash size */
   79 
   80 /* statistics on number of queue searches and search steps */
   81 static int searches, search_steps ;
   82 static int pipe_expire = 0 ;    /* expire queue if empty */
   83 static int dn_max_ratio = 16 ; /* max queues/buckets ratio */
   84 
   85 static struct dn_heap ready_heap, extract_heap ;
   86 static int heap_init(struct dn_heap *h, int size) ;
   87 static int heap_insert (struct dn_heap *h, dn_key key1, void *p);
   88 static void heap_extract(struct dn_heap *h);
   89 static void transmit_event(struct dn_pipe *pipe);
   90 static void ready_event(struct dn_flow_queue *q);
   91 
   92 static struct dn_pipe *all_pipes = NULL ;       /* list of all pipes */
   93 
   94 #ifdef SYSCTL_NODE
   95 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet,
   96                 CTLFLAG_RW, 0, "Dummynet");
   97 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
   98             CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
   99 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time,
  100             CTLFLAG_RD, &curr_time, 0, "Current tick");
  101 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
  102             CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
  103 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
  104             CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
  105 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches,
  106             CTLFLAG_RD, &searches, 0, "Number of queue searches");
  107 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps,
  108             CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
  109 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
  110             CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
  111 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
  112             CTLFLAG_RW, &dn_max_ratio, 0, 
  113         "Max ratio between dynamic queues and buckets");
  114 #endif
  115 
  116 static int ip_dn_ctl(struct sockopt *sopt);
  117 
  118 static void rt_unref(struct rtentry *);
  119 static void dummynet(void *);
  120 static void dummynet_flush(void);
  121 
  122 /*
  123  * ip_fw_chain is used when deleting a pipe, because ipfw rules can
  124  * hold references to the pipe.
  125  */
  126 extern LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain;
  127 
  128 static void
  129 rt_unref(struct rtentry *rt)
  130 {
  131     if (rt == NULL)
  132         return ;
  133     if (rt->rt_refcnt <= 0)
  134         printf("-- warning, refcnt now %d, decreasing\n", rt->rt_refcnt);
  135     RTFREE(rt);
  136 }
  137 
  138 /*
  139  * Heap management functions.
  140  *
  141  * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
  142  * Some macros help finding parent/children so we can optimize them.
  143  *
  144  * heap_init() is called to expand the heap when needed.
  145  * Increment size in blocks of 256 entries (which make one 4KB page)
  146  * XXX failure to allocate a new element is a pretty bad failure
  147  * as we basically stall a whole queue forever!!
  148  * Returns 1 on error, 0 on success
  149  */
  150 #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
  151 #define HEAP_LEFT(x) ( 2*(x) + 1 )
  152 #define HEAP_IS_LEFT(x) ( (x) & 1 )
  153 #define HEAP_RIGHT(x) ( 2*(x) + 1 )
  154 #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
  155 #define HEAP_INCREMENT  255
  156 
  157 static int
  158 heap_init(struct dn_heap *h, int new_size)
  159 {       
  160     struct dn_heap_entry *p;
  161 
  162     if (h->size >= new_size ) {
  163         printf("heap_init, Bogus call, have %d want %d\n",
  164                 h->size, new_size);
  165         return 0 ;
  166     }   
  167     new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ;
  168     p = malloc(new_size * sizeof(*p), M_IPFW, M_DONTWAIT );
  169     if (p == NULL) {
  170         printf(" heap_init, resize %d failed\n", new_size );
  171         return 1 ; /* error */
  172     }
  173     if (h->size > 0) {
  174         bcopy(h->p, p, h->size * sizeof(*p) );
  175         free(h->p, M_IPFW);
  176     }
  177     h->p = p ;
  178     h->size = new_size ;
  179     return 0 ;
  180 }
  181 
  182 /*
  183  * Insert element in heap. Normally, p != NULL, we insert p in
  184  * a new position and bubble up. If p == NULL, then the element is
  185  * already in place, and key is the position where to start the
  186  * bubble-up.
  187  * Returns 1 on failure (cannot allocate new heap entry)
  188  */
  189 static int
  190 heap_insert(struct dn_heap *h, dn_key key1, void *p)
  191 {   
  192     int son = h->elements ;
  193 
  194     if (p == NULL)      /* data already there, set starting point */
  195         son = key1 ;
  196     else {              /* insert new element at the end, possibly resize */
  197         son = h->elements ;
  198         if (son == h->size) /* need resize... */
  199             if (heap_init(h, h->elements+1) )
  200                 return 1 ; /* failure... */
  201         h->p[son].object = p ;
  202         h->p[son].key = key1 ;
  203         h->elements++ ;
  204     }
  205     while (son > 0) {                           /* bubble up */
  206         int father = HEAP_FATHER(son) ;
  207         struct dn_heap_entry tmp  ;
  208 
  209         if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
  210             break ; /* found right position */ 
  211         /* son smaller than father, swap and try again */
  212         HEAP_SWAP(h->p[son], h->p[father], tmp) ;
  213         son = father ;
  214     }
  215     return 0 ;
  216 }
  217 
  218 /*
  219  * remove top element from heap
  220  */
  221 static void
  222 heap_extract(struct dn_heap *h)
  223 {  
  224     int child, father, max = h->elements - 1 ;
  225     if (max < 0)
  226         return ;
  227 
  228     /* move up smallest child */
  229     father = 0 ;
  230     child = HEAP_LEFT(father) ;         /* left child */
  231     while (child <= max) {              /* valid entry */
  232         if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
  233             child = child+1 ;           /* take right child, otherwise left */
  234         h->p[father] = h->p[child] ;
  235         father = child ;
  236         child = HEAP_LEFT(child) ;   /* left child for next loop */
  237     }   
  238     h->elements-- ;
  239     if (father != max) {
  240         /*
  241          * Fill hole with last entry and bubble up, reusing the insert code
  242          */
  243         h->p[father] = h->p[max] ;
  244         heap_insert(h, father, NULL); /* this one cannot fail */
  245     }   
  246 }           
  247 
  248 /*
  249  * heapify() will reorganize data inside an array to maintain the
  250  * heap property. It is needed when we delete a bunch of entries.
  251  */
  252 static void
  253 heapify(struct dn_heap *h)
  254 {
  255     int father, i ;
  256     struct dn_heap_entry tmp ;
  257 
  258     for (i = h->elements - 1 ; i > 0 ; i-- ) {
  259         father = HEAP_FATHER(i) ;
  260         if ( DN_KEY_LT(h->p[i].key, h->p[father].key) )
  261             HEAP_SWAP(h->p[father], h->p[i], tmp) ;
  262     }
  263 }
  264 /*
  265  * --- end of heap management functions ---
  266  */
  267 
  268 /*
  269  * Scheduler functions -- transmit_event(), ready_event()
  270  *
  271  * transmit_event() is called when the delay-line needs to enter
  272  * the scheduler, either because of existing pkts getting ready,
  273  * or new packets entering the queue. The event handled is the delivery
  274  * time of the packet.
  275  *
  276  * ready_event() does something similar with flow queues, and the
  277  * event handled is the finish time of the head pkt.
  278  *
  279  * In both cases, we make sure that the data structures are consistent
  280  * before passing pkts out, because this might trigger recursive
  281  * invocations of the procedures.
  282  */
  283 static void
  284 transmit_event(struct dn_pipe *pipe)
  285 {
  286     struct dn_pkt *pkt ;
  287 
  288     while ( (pkt = pipe->p.head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) {
  289         /*
  290          * first unlink, then call procedures, since ip_input() can invoke
  291          * ip_output() and viceversa, thus causing nested calls
  292          */
  293         pipe->p.head = DN_NEXT(pkt) ;
  294 
  295         /*
  296          * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf
  297          * (NOT A REAL one, just a small block of malloc'ed memory) with
  298          *     m_type = MT_DUMMYNET
  299          *     m_next = actual mbuf to be processed by ip_input/output
  300          *     m_data = the matching rule
  301          * and some other fields.
  302          * The block IS FREED HERE because it contains parameters passed
  303          * to the called routine.
  304          */
  305         switch (pkt->dn_dir) {
  306         case DN_TO_IP_OUT:
  307             (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL);
  308             rt_unref (pkt->ro.ro_rt) ;
  309             break ;
  310 
  311         case DN_TO_IP_IN :
  312             ip_input((struct mbuf *)pkt) ;
  313             break ;
  314 
  315 #ifdef BRIDGE
  316         case DN_TO_BDG_FWD : {
  317             struct mbuf *m = (struct mbuf *)pkt ;
  318             bdg_forward(&m, pkt->ifp);
  319             if (m)
  320                 m_freem(m);
  321             }
  322             break ;
  323 #endif
  324 
  325         default:
  326             printf("dummynet: bad switch %d!\n", pkt->dn_dir);
  327             m_freem(pkt->dn_m);
  328             break ;
  329         }
  330         FREE(pkt, M_IPFW);
  331     }
  332     /* if there are leftover packets, put into the heap for next event */
  333     if ( (pkt = pipe->p.head) )
  334          heap_insert(&extract_heap, pkt->output_time, pipe ) ;
  335     /* XXX should check errors on heap_insert, by draining the
  336      * whole pipe p and hoping in the future we are more successful
  337      */
  338 }
  339 
  340 /*
  341  * ready_event() is invoked every time the queue must enter the
  342  * scheduler, either because the first packet arrives, or because
  343  * a previously scheduled event fired.
  344  * On invokation, drain as many pkts as possible (could be 0) and then
  345  * if there are leftover packets reinsert the pkt in the scheduler.
  346  */
  347 static void
  348 ready_event(struct dn_flow_queue *q)
  349 {
  350     struct dn_pkt *pkt;
  351     struct dn_pipe *p = q->p ;
  352     int p_was_empty = (p->p.head == NULL) ;
  353 
  354     while ( (pkt = q->r.head) != NULL ) {
  355         int len = pkt->dn_m->m_pkthdr.len;
  356         int len_scaled = p->bandwidth ? len*8*hz : 0 ;
  357         /*
  358          * bandwidth==0 (no limit) means we can drain as many pkts as
  359          * needed from the queue. Setting len_scaled = 0 does the job.
  360          */
  361         if (len_scaled > q->numbytes )
  362             break ;
  363         /*
  364          * extract pkt from queue, compute output time (could be now)
  365          * and put into delay line (p_queue)
  366          */
  367         q->numbytes -= len_scaled ;
  368         q->r.head = DN_NEXT(pkt) ;
  369         q->len-- ;
  370         q->len_bytes -= len ;
  371 
  372         pkt->output_time = curr_time + p->delay ;
  373 
  374         if (p->p.head == NULL)
  375             p->p.head = pkt;
  376         else
  377             DN_NEXT(p->p.tail) = pkt;
  378         p->p.tail = pkt;
  379         DN_NEXT(p->p.tail) = NULL;
  380     }
  381     /*
  382      * If we have more packets queued, schedule next ready event
  383      * (can only occur when bandwidth != 0, otherwise we would have
  384      * flushed the whole queue in the previous loop).
  385      * To this purpose compute how many ticks to go for the next
  386      * event, accounting for packet size and residual credit. This means
  387      * we compute the finish time of the packet.
  388      */
  389     if ( (pkt = q->r.head) != NULL ) { /* this implies bandwidth != 0 */
  390         dn_key t ;
  391         t = (pkt->dn_m->m_pkthdr.len*8*hz - q->numbytes + p->bandwidth - 1 ) /
  392                 p->bandwidth ;
  393         q->numbytes += t * p->bandwidth ;
  394         heap_insert(&ready_heap, curr_time + t, (void *)q );
  395         /* XXX should check errors on heap_insert, and drain the whole
  396          * queue on error hoping next time we are luckier.
  397          */
  398     }
  399     /*
  400      * If the delay line was empty call transmit_event(p) now.
  401      * Otherwise, the scheduler will take care of it.
  402      */
  403     if (p_was_empty)
  404         transmit_event(p);
  405 }
  406 
  407 /*
  408  * this is called once per tick, or HZ times per second. It is used to
  409  * increment the current tick counter and schedule expired events.
  410  */
  411 static void
  412 dummynet(void * __unused unused)
  413 {
  414     void *p ; /* generic parameter to handler */
  415     struct dn_heap *h ;
  416     int s ;
  417 
  418     s = splnet(); /* avoid network interrupts... */
  419     curr_time++ ;
  420     h = &ready_heap ;
  421     while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) {
  422         /*
  423          * XXX if the event is late, we should probably credit the queue
  424          * by q->p->bandwidth * (delta_ticks). On the other hand, i dont
  425          * think this can ever occur with this code (i.e. curr_time will
  426          * still be incremented by one at each tick. Things might be
  427          * different if we were using the counter from the high priority
  428          * timer.
  429          */
  430         if (h->p[0].key != curr_time)
  431             printf("-- dummynet: warning, event is %d ticks late\n",
  432                 curr_time - h->p[0].key);
  433         p = h->p[0].object ;
  434         heap_extract(h); /* need to extract before processing */
  435         ready_event(p) ;
  436     }
  437     h = &extract_heap ;
  438     while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) {
  439         if (h->p[0].key != curr_time)   /* XXX same as above */
  440             printf("-- dummynet: warning, event is %d ticks late\n",
  441                 curr_time - h->p[0].key);
  442         p = h->p[0].object ;
  443         heap_extract(&extract_heap);
  444         transmit_event(p);
  445     }
  446     splx(s);
  447     timeout(dummynet, NULL, 1);
  448 }
  449  
  450 /*
  451  * Unconditionally expire empty queues in case of shortage.
  452  * Returns the number of queues freed.
  453  */
  454 static int
  455 expire_queues(struct dn_pipe *pipe)
  456 {
  457     struct dn_flow_queue *q, *prev ;
  458     int i, initial_elements = pipe->rq_elements ;
  459 
  460     if (pipe->last_expired == time_second)
  461         return 0 ;
  462     pipe->last_expired = time_second ;
  463     for (i = 0 ; i <= pipe->rq_size ; i++) /* last one is overflow */
  464         for (prev=NULL, q = pipe->rq[i] ; q != NULL ; )
  465             if (q->r.head != NULL) {
  466                 prev = q ;
  467                 q = q->next ;
  468             } else { /* entry is idle, expire it */
  469                 struct dn_flow_queue *old_q = q ;
  470 
  471                 if (prev != NULL)
  472                     prev->next = q = q->next ;
  473                 else
  474                     pipe->rq[i] = q = q->next ;
  475                 pipe->rq_elements-- ;
  476                 free(old_q, M_IPFW);
  477             }
  478     return initial_elements - pipe->rq_elements ;
  479 }
  480 
  481 /*
  482  * If room, create a new queue and put at head of slot i;
  483  * otherwise, create or use the default queue.
  484  */
  485 static struct dn_flow_queue *
  486 create_queue(struct dn_pipe *pipe, int i)
  487 {
  488     struct dn_flow_queue *q ;
  489 
  490     if (pipe->rq_elements > pipe->rq_size * dn_max_ratio &&
  491             expire_queues(pipe) == 0) {
  492         /*
  493          * No way to get room, use or create overflow queue.
  494          */
  495         i = pipe->rq_size ;
  496         if ( pipe->rq[i] != NULL )
  497             return pipe->rq[i] ;
  498     }
  499     q = malloc(sizeof(*q), M_IPFW, M_DONTWAIT) ;
  500     if (q == NULL) {
  501         printf("sorry, cannot allocate queue for new flow\n");
  502         return NULL ;
  503     }
  504     bzero(q, sizeof(*q) );      /* needed */
  505     q->p = pipe ;
  506     q->hash_slot = i ;
  507     q->next = pipe->rq[i] ;
  508     pipe->rq[i] = q ;
  509     pipe->rq_elements++ ;
  510     return q ;
  511 }
  512 
  513 /*
  514  * Given a pipe and a pkt in last_pkt, find a matching queue
  515  * after appropriate masking. The queue is moved to front
  516  * so that further searches take less time.
  517  */
  518 static struct dn_flow_queue *
  519 find_queue(struct dn_pipe *pipe)
  520 {
  521     int i = 0 ; /* we need i and q for new allocations */
  522     struct dn_flow_queue *q, *prev;
  523 
  524     if ( !(pipe->flags & DN_HAVE_FLOW_MASK) )
  525         q = pipe->rq[0] ;
  526     else {
  527         /* first, do the masking */
  528         last_pkt.dst_ip &= pipe->flow_mask.dst_ip ;
  529         last_pkt.src_ip &= pipe->flow_mask.src_ip ;
  530         last_pkt.dst_port &= pipe->flow_mask.dst_port ;
  531         last_pkt.src_port &= pipe->flow_mask.src_port ;
  532         last_pkt.proto &= pipe->flow_mask.proto ;
  533         last_pkt.flags = 0 ; /* we dont care about this one */
  534         /* then, hash function */
  535         i = ( (last_pkt.dst_ip) & 0xffff ) ^
  536             ( (last_pkt.dst_ip >> 15) & 0xffff ) ^
  537             ( (last_pkt.src_ip << 1) & 0xffff ) ^
  538             ( (last_pkt.src_ip >> 16 ) & 0xffff ) ^
  539             (last_pkt.dst_port << 1) ^ (last_pkt.src_port) ^
  540             (last_pkt.proto );
  541         i = i % pipe->rq_size ;
  542         /* finally, scan the current list for a match */
  543         searches++ ;
  544         for (prev=NULL, q = pipe->rq[i] ; q ; ) {
  545             search_steps++;
  546             if (bcmp(&last_pkt, &(q->id), sizeof(q->id) ) == 0)
  547                 break ; /* found */
  548             else if (pipe_expire && q->r.head == NULL) {
  549                 /* entry is idle, expire it */
  550                 struct dn_flow_queue *old_q = q ;
  551 
  552                 if (prev != NULL)
  553                     prev->next = q = q->next ;
  554                 else
  555                     pipe->rq[i] = q = q->next ;
  556                 pipe->rq_elements-- ;
  557                 free(old_q, M_IPFW);
  558                 continue ;
  559             }
  560             prev = q ;
  561             q = q->next ;
  562         }
  563         if (q && prev != NULL) { /* found and not in front */
  564             prev->next = q->next ;
  565             q->next = pipe->rq[i] ;
  566             pipe->rq[i] = q ;
  567         }
  568     }
  569     if (q == NULL) { /* no match, need to allocate a new entry */
  570         q = create_queue(pipe, i);
  571         if (q != NULL)
  572             q->id = last_pkt ;
  573     }
  574     return q ;
  575 }
  576 
  577 /*
  578  * dummynet hook for packets.
  579  */
  580 int
  581 dummynet_io(int pipe_nr, int dir,
  582         struct mbuf *m, struct ifnet *ifp, struct route *ro,
  583         struct sockaddr_in *dst,
  584         struct ip_fw_chain *rule)
  585 {
  586     struct dn_pkt *pkt;
  587     struct dn_pipe *p;
  588     int len = m->m_pkthdr.len ;
  589     struct dn_flow_queue *q = NULL ;
  590     int s ;
  591 
  592     s = splimp();
  593     /* XXX check the spl protection. It might be unnecessary since we
  594      * run this at splnet() already.
  595      */
  596 
  597     DEB(printf("-- last_pkt dst 0x%08x/0x%04x src 0x%08x/0x%04x\n",
  598         last_pkt.dst_ip, last_pkt.dst_port,
  599         last_pkt.src_ip, last_pkt.src_port);)
  600 
  601     pipe_nr &= 0xffff ;
  602     /*
  603      * locate pipe. First time is expensive, next have direct access.
  604      */
  605     if ( (p = rule->rule->pipe_ptr) == NULL ) {
  606         for (p = all_pipes; p && p->pipe_nr != pipe_nr; p = p->next)
  607             ;
  608         if (p == NULL)
  609             goto dropit ;       /* this pipe does not exist! */
  610         rule->rule->pipe_ptr = p ; /* record pipe ptr for the future    */
  611     }
  612     q = find_queue(p);
  613     /*
  614      * update statistics, then do various check on reasons to drop pkt
  615      */
  616     if ( q == NULL )
  617         goto dropit ;           /* cannot allocate queue                */
  618     q->tot_bytes += len ;
  619     q->tot_pkts++ ;
  620     if ( p->plr && random() < p->plr )
  621         goto dropit ;           /* random pkt drop                      */
  622     if ( p->queue_size && q->len >= p->queue_size)
  623         goto dropit ;           /* queue count overflow                 */
  624     if ( p->queue_size_bytes && len + q->len_bytes > p->queue_size_bytes)
  625         goto dropit ;           /* queue size overflow                  */
  626     /*
  627      * can implement RED drops here if needed.
  628      */
  629 
  630     pkt = (struct dn_pkt *)malloc(sizeof (*pkt), M_IPFW, M_NOWAIT) ;
  631     if ( pkt == NULL )
  632         goto dropit ;           /* cannot allocate packet header        */
  633     /* ok, i can handle the pkt now... */
  634     bzero(pkt, sizeof(*pkt) ); /* XXX expensive, see if we can remove it*/
  635     /* build and enqueue packet + parameters */
  636     pkt->hdr.mh_type = MT_DUMMYNET ;
  637     (struct ip_fw_chain *)pkt->hdr.mh_data = rule ;
  638     DN_NEXT(pkt) = NULL;
  639     pkt->dn_m = m;
  640     pkt->dn_dir = dir ;
  641 
  642     pkt->ifp = ifp;
  643     if (dir == DN_TO_IP_OUT) {
  644         /*
  645          * We need to copy *ro because for ICMP pkts (and maybe others)
  646          * the caller passed a pointer into the stack; dst might also be
  647          * a pointer into *ro so it needs to be updated.
  648          */
  649         pkt->ro = *ro;
  650         if (ro->ro_rt)
  651             ro->ro_rt->rt_refcnt++ ; /* XXX */
  652         if (dst == (struct sockaddr_in *)&ro->ro_dst) /* dst points into ro */
  653             dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ;
  654 
  655         pkt->dn_dst = dst;
  656     }
  657     if (q->r.head == NULL)
  658         q->r.head = pkt;
  659     else
  660         DN_NEXT(q->r.tail) = pkt;
  661     q->r.tail = pkt;
  662     q->len++;
  663     q->len_bytes += len ;
  664 
  665     /*
  666      * If queue was empty (this is first pkt) then call ready_event()
  667      * now to make the pkt go out at the right time. Otherwise we are done,
  668      * as there must be a ready event already scheduled.
  669      */
  670     if (q->r.head == pkt) /* r_queue was empty */
  671         ready_event( q );
  672     splx(s);
  673     return 0;
  674 
  675 dropit:
  676     splx(s);
  677     if (q)
  678         q->drops++ ;
  679     m_freem(m);
  680     return 0 ; /* XXX should I return an error ? */
  681 }
  682 
  683 /*
  684  * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT)
  685  * Doing this would probably save us the initial bzero of dn_pkt
  686  */
  687 #define DN_FREE_PKT(pkt)        {               \
  688         struct dn_pkt *n = pkt ;                \
  689         rt_unref ( n->ro.ro_rt ) ;              \
  690         m_freem(n->dn_m);                       \
  691         pkt = DN_NEXT(n) ;                      \
  692         free(n, M_IPFW) ;       }
  693 /*
  694  * dispose all packets queued on a pipe
  695  */
  696 static void
  697 purge_pipe(struct dn_pipe *pipe)
  698 {
  699     struct dn_pkt *pkt ;
  700     struct dn_flow_queue *q, *qn ;
  701     int i ;
  702 
  703     for (i = 0 ; i <= pipe->rq_size ; i++ ) /* XXX last one is overflow */
  704         for (q = pipe->rq[i] ; q ; q = qn ) {
  705             for (pkt = q->r.head ; pkt ; )
  706                 DN_FREE_PKT(pkt) ;
  707             qn = q->next ;
  708             free(q, M_IPFW);
  709         }
  710     for (pkt = pipe->p.head ; pkt ; )
  711         DN_FREE_PKT(pkt) ;
  712 }
  713 
  714 /*
  715  * Delete all pipes and heaps returning memory. Must also
  716  * remove references from all ipfw rules to all pipes.
  717  */
  718 static void
  719 dummynet_flush()
  720 {
  721     struct dn_pipe *curr_p, *p ;
  722     struct ip_fw_chain *chain ;
  723     int s ;
  724 
  725     s = splnet() ;
  726 
  727     /* remove all references to pipes ...*/
  728     for (chain= ip_fw_chain.lh_first ; chain; chain = chain->chain.le_next)
  729         chain->rule->pipe_ptr = NULL ;
  730     /* prevent future matches... */
  731     p = all_pipes ;
  732     all_pipes = NULL ; 
  733     /* and free heaps so we don't have unwanted events */
  734     if (ready_heap.size >0 )
  735         free(ready_heap.p, M_IPFW);
  736     ready_heap.elements = ready_heap.size = 0 ;
  737     if (extract_heap.size >0 )
  738         free(extract_heap.p, M_IPFW);
  739     extract_heap.elements = extract_heap.size = 0 ;
  740     splx(s) ;
  741     /*
  742      * Now purge all queued pkts and delete all pipes
  743      */
  744     for ( ; p ; ) {
  745         purge_pipe(p);
  746         curr_p = p ;
  747         p = p->next ;   
  748         free(curr_p->rq, M_IPFW);
  749         free(curr_p, M_IPFW);
  750     }
  751 }
  752 
  753 extern struct ip_fw_chain *ip_fw_default_rule ;
  754 /*
  755  * when a firewall rule is deleted, scan all queues and remove the flow-id
  756  * from packets matching this rule.
  757  */
  758 void
  759 dn_rule_delete(void *r)
  760 {
  761     struct dn_pipe *p ;
  762     struct dn_flow_queue *q ;
  763     struct dn_pkt *pkt ;
  764     int i ;
  765 
  766     for ( p = all_pipes ; p ; p = p->next ) {
  767         for (i = 0 ; i <= p->rq_size ; i++) /* XXX last one is ovflow */
  768             for (q = p->rq[i] ; q ; q = q->next )
  769                 for (pkt = q->r.head ; pkt ; pkt = DN_NEXT(pkt) )
  770                     if (pkt->hdr.mh_data == r)
  771                         pkt->hdr.mh_data = (void *)ip_fw_default_rule ;
  772         for (pkt = p->p.head ; pkt ; pkt = DN_NEXT(pkt) )
  773             if (pkt->hdr.mh_data == r)
  774                 pkt->hdr.mh_data = (void *)ip_fw_default_rule ;
  775     }
  776 }
  777 
  778 /*
  779  * Handler for the various dummynet socket options (get, flush, config, del)
  780  */
  781 static int
  782 ip_dn_ctl(struct sockopt *sopt)
  783 {
  784     int error = 0 ;
  785     struct dn_pipe *p, tmp_pipe ;
  786 
  787     struct dn_pipe *a, *b ;
  788 
  789     /* Disallow sets in really-really secure mode. */
  790     if (sopt->sopt_dir == SOPT_SET && securelevel >= 3)
  791         return (EPERM);
  792 
  793     switch (sopt->sopt_name) {
  794     default :
  795         panic("ip_dn_ctl -- unknown option");
  796 
  797     case IP_DUMMYNET_GET :
  798         {
  799             char *buf, *bp ; /* bp is the "copy-pointer" */
  800             size_t size ;
  801             int s ;
  802 
  803             s = splnet() ; /* to avoid thing change while we work! */
  804             for (p = all_pipes, size = 0 ; p ; p = p->next )
  805                 size += sizeof( *p ) +
  806                     p->rq_elements * sizeof(struct dn_flow_queue);
  807             buf = malloc(size, M_TEMP, M_DONTWAIT);
  808             if (buf == 0) {
  809                 error = ENOBUFS ;
  810                 splx(s);
  811                 break ;
  812             }
  813             for (p = all_pipes, bp = buf ; p ; p = p->next ) {
  814                 int i ;
  815                 struct dn_pipe *pipe_bp = (struct dn_pipe *)bp ;
  816                 struct dn_flow_queue *q;
  817                 int copied = 0 ;
  818 
  819                 /*
  820                  * copy pipe descriptor into *bp, convert delay back to ms,
  821                  * then copy the queue descriptor(s) one at a time.
  822                  */
  823                 bcopy(p, bp, sizeof( *p ) );
  824                 pipe_bp->delay = (pipe_bp->delay * 1000) / hz ;
  825                 bp += sizeof( *p ) ;
  826                 for (i = 0 ; i <= p->rq_size ; i++)
  827                     for (q = p->rq[i] ; q ; q = q->next, bp += sizeof(*q) ) {
  828                         if (q->hash_slot != i)
  829                             printf("++ at %d: wrong slot (have %d, should be %d)\n", copied, q->hash_slot, i);
  830                         copied++ ;
  831                         bcopy(q, bp, sizeof( *q ) );
  832                     }
  833                 if (copied != p->rq_elements)
  834                     printf("++ wrong count, have %d should be %d\n",
  835                         copied, p->rq_elements);
  836             }
  837             splx(s);
  838             error = sooptcopyout(sopt, buf, size);
  839             FREE(buf, M_TEMP);
  840         }
  841         break ;
  842 
  843     case IP_DUMMYNET_FLUSH :
  844         dummynet_flush() ;
  845         break ;
  846 
  847     case IP_DUMMYNET_CONFIGURE :
  848         p = &tmp_pipe ;
  849         error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
  850         if (error)
  851             break ;
  852         /*
  853          * The config program passes parameters as follows:
  854          * bw = bits/second (0 means no limits),
  855          * delay = ms, must be translated into ticks.
  856          * queue_size = slots (0 means no limit)
  857          * queue_size_bytes = bytes (0 means no limit)
  858          *        only one can be set, must be bound-checked
  859          */
  860         p->delay = ( p->delay * hz ) / 1000 ;
  861         if (p->queue_size == 0 && p->queue_size_bytes == 0)
  862             p->queue_size = 50 ;
  863         if (p->queue_size != 0 )        /* buffers are prevailing */
  864             p->queue_size_bytes = 0 ;
  865         if (p->queue_size > 100)
  866             p->queue_size = 50 ;
  867         if (p->queue_size_bytes > 1024*1024)
  868             p->queue_size_bytes = 1024*1024 ;
  869         for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
  870                  a = b , b = b->next) ;
  871         if (b && b->pipe_nr == p->pipe_nr) {
  872             b->bandwidth = p->bandwidth ;
  873             b->delay = p->delay ;
  874             b->queue_size = p->queue_size ;
  875             b->queue_size_bytes = p->queue_size_bytes ;
  876             b->plr = p->plr ;
  877             b->flow_mask = p->flow_mask ;
  878             b->flags = p->flags ;
  879         } else { /* brand new pipe */
  880             int s ;
  881             struct dn_pipe *x;
  882             x = malloc(sizeof(struct dn_pipe), M_IPFW, M_DONTWAIT) ;
  883             if (x == NULL) {
  884                 printf("ip_dummynet.c: no memory for new pipe\n");
  885                 error = ENOSPC ;
  886                 break ;
  887             }
  888             bzero(x, sizeof(*x) );
  889             x->bandwidth = p->bandwidth ;
  890             x->delay = p->delay ;
  891             x->pipe_nr = p->pipe_nr ;
  892             x->queue_size = p->queue_size ;
  893             x->queue_size_bytes = p->queue_size_bytes ;
  894             x->plr = p->plr ;
  895             x->flow_mask = p->flow_mask ;
  896             x->flags = p->flags ;
  897             if (x->flags & DN_HAVE_FLOW_MASK) {/* allocate some slots */
  898                 int l = p->rq_size ;
  899                 if (l == 0)
  900                     l = dn_hash_size ;
  901                 if (l < 4)
  902                     l = 4 ;
  903                 else if (l > 1024)
  904                     l = 1024 ;
  905                 x->rq_size = l ;
  906             } else /* one is enough for null mask */
  907                 x->rq_size = 1 ;
  908             x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *),
  909                     M_IPFW, M_DONTWAIT) ;
  910             if (x->rq == NULL ) {
  911                 printf("sorry, cannot allocate queue\n");
  912                 free(x, M_IPFW);
  913                 error = ENOSPC ;
  914                 break ;
  915             }
  916             bzero(x->rq, (1+x->rq_size) * sizeof(struct dn_flow_queue *) );
  917             x->rq_elements = 0 ;
  918 
  919             s = splnet() ;
  920             x->next = b ;
  921             if (a == NULL)
  922                 all_pipes = x ;
  923             else
  924                 a->next = x ;
  925             splx(s);
  926         }
  927         break ;
  928 
  929     case IP_DUMMYNET_DEL :
  930         p = &tmp_pipe ;
  931         error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
  932         if (error)
  933             break ;
  934 
  935         for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
  936                  a = b , b = b->next) ;
  937         if (b && b->pipe_nr == p->pipe_nr) {    /* found pipe */
  938             int s ;
  939             struct ip_fw_chain *chain ;
  940 
  941             s = splnet() ;
  942             chain = ip_fw_chain.lh_first;
  943 
  944             if (a == NULL)
  945                 all_pipes = b->next ;
  946             else
  947                 a->next = b->next ;
  948             /*
  949              * remove references to this pipe from the ip_fw rules.
  950              */
  951             for (; chain; chain = chain->chain.le_next)
  952                 if (chain->rule->pipe_ptr == b)
  953                     chain->rule->pipe_ptr = NULL ;
  954             /* remove all references to b from heaps */
  955             if (ready_heap.elements > 0) {
  956                 struct dn_heap *h = &ready_heap ;
  957                 int i = 0, found = 0 ;
  958                 while ( i < h->elements ) {
  959                     if (((struct dn_flow_queue *)(h->p[i].object))->p == b) {
  960                         /* found one */
  961                         h->elements-- ;
  962                         h->p[i] = h->p[h->elements] ;
  963                         found++ ;
  964                     } else
  965                         i++ ;
  966                 }
  967                 if (found)
  968                     heapify(h);
  969             }
  970             if (extract_heap.elements > 0) {
  971                 struct dn_heap *h = &extract_heap ;
  972                 int i = 0, found = 0 ;
  973                 while ( i < h->elements ) {
  974                     if (h->p[i].object == b) { /* found one */
  975                         h->elements-- ;
  976                         h->p[i] = h->p[h->elements] ;
  977                         found++ ;
  978                     } else
  979                         i++ ;
  980                 }
  981                 if (found)
  982                     heapify(h);
  983             }
  984             splx(s);
  985             purge_pipe(b);      /* remove pkts from here */
  986             free(b->rq, M_IPFW);
  987             free(b, M_IPFW);
  988         }
  989         break ;
  990     }
  991     return error ;
  992 }
  993 
  994 void
  995 ip_dn_init(void)
  996 {
  997     printf("DUMMYNET initialized (000212)\n");
  998     all_pipes = NULL ;
  999     ready_heap.size = ready_heap.elements = 0 ;
 1000     extract_heap.size = extract_heap.elements = 0 ;
 1001     ip_dn_ctl_ptr = ip_dn_ctl;
 1002     timeout(dummynet, NULL, 1);
 1003 }
 1004 
 1005 #ifdef DUMMYNET_MODULE
 1006 
 1007 #include <sys/exec.h>
 1008 #include <sys/sysent.h>
 1009 #include <sys/lkm.h>
 1010 
 1011 MOD_MISC(dummynet);
 1012 
 1013 static ip_dn_ctl_t *old_dn_ctl_ptr ;
 1014 
 1015 static int
 1016 dummynet_load(struct lkm_table *lkmtp, int cmd)
 1017 {
 1018         int s=splnet();
 1019         old_dn_ctl_ptr = ip_dn_ctl_ptr;
 1020         ip_dn_init();
 1021         splx(s);
 1022         return 0;
 1023 }
 1024 
 1025 static int
 1026 dummynet_unload(struct lkm_table *lkmtp, int cmd)
 1027 {
 1028         int s=splnet();
 1029         ip_dn_ctl_ptr =  old_dn_ctl_ptr;
 1030         splx(s);
 1031         dummynet_flush();
 1032         printf("DUMMYNET unloaded\n");
 1033         return 0;
 1034 }
 1035 
 1036 int
 1037 dummynet_mod(struct lkm_table *lkmtp, int cmd, int ver)
 1038 {
 1039     DISPATCH(lkmtp, cmd, ver, dummynet_load, dummynet_unload, lkm_nullcmd);
 1040 }
 1041 #endif

Cache object: b01c776b047d3c46cb8321b381f30425


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.