1 /*
2 * Copyright (c) 1998-2000 Luigi Rizzo, Universita` di Pisa
3 * Portions Copyright (c) 2000 Akamba Corp.
4 * All rights reserved
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 *
27 * $FreeBSD$
28 */
29
30 #define DEB(x)
31 #define DDB(x) x
32
33 /*
34 * This module implements IP dummynet, a bandwidth limiter/delay emulator
35 * used in conjunction with the ipfw package.
36 *
37 * Most important Changes:
38 *
39 * 000106: large rewrite, use heaps to handle very many pipes.
40 * 980513: initial release
41 *
42 * include files marked with XXX are probably not needed
43 */
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/queue.h> /* XXX */
50 #include <sys/kernel.h>
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/time.h>
54 #include <sys/sysctl.h>
55 #include <net/if.h>
56 #include <net/route.h>
57 #include <netinet/in.h>
58 #include <netinet/in_systm.h>
59 #include <netinet/in_var.h>
60 #include <netinet/ip.h>
61 #include <netinet/ip_fw.h>
62 #include <netinet/ip_dummynet.h>
63 #include <netinet/ip_var.h>
64
65 #include "opt_bdg.h"
66 #ifdef BRIDGE
67 #include <netinet/if_ether.h> /* for struct arpcom */
68 #include <net/bridge.h>
69 #endif
70
71 /*
72 * we keep a private variable for the simulation time, but probably
73 * it would be better to use the already existing one "softticks"
74 * (in sys/kern/kern_timer.c)
75 */
76 static dn_key curr_time = 0 ; /* current simulation time */
77
78 static int dn_hash_size = 64 ; /* default hash size */
79
80 /* statistics on number of queue searches and search steps */
81 static int searches, search_steps ;
82 static int pipe_expire = 0 ; /* expire queue if empty */
83 static int dn_max_ratio = 16 ; /* max queues/buckets ratio */
84
85 static struct dn_heap ready_heap, extract_heap ;
86 static int heap_init(struct dn_heap *h, int size) ;
87 static int heap_insert (struct dn_heap *h, dn_key key1, void *p);
88 static void heap_extract(struct dn_heap *h);
89 static void transmit_event(struct dn_pipe *pipe);
90 static void ready_event(struct dn_flow_queue *q);
91
92 static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */
93
94 #ifdef SYSCTL_NODE
95 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet,
96 CTLFLAG_RW, 0, "Dummynet");
97 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, hash_size,
98 CTLFLAG_RW, &dn_hash_size, 0, "Default hash table size");
99 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, curr_time,
100 CTLFLAG_RD, &curr_time, 0, "Current tick");
101 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, ready_heap,
102 CTLFLAG_RD, &ready_heap.size, 0, "Size of ready heap");
103 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, extract_heap,
104 CTLFLAG_RD, &extract_heap.size, 0, "Size of extract heap");
105 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, searches,
106 CTLFLAG_RD, &searches, 0, "Number of queue searches");
107 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, search_steps,
108 CTLFLAG_RD, &search_steps, 0, "Number of queue search steps");
109 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, expire,
110 CTLFLAG_RW, &pipe_expire, 0, "Expire queue if empty");
111 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, max_chain_len,
112 CTLFLAG_RW, &dn_max_ratio, 0,
113 "Max ratio between dynamic queues and buckets");
114 #endif
115
116 static int ip_dn_ctl(struct sockopt *sopt);
117
118 static void rt_unref(struct rtentry *);
119 static void dummynet(void *);
120 static void dummynet_flush(void);
121
122 /*
123 * ip_fw_chain is used when deleting a pipe, because ipfw rules can
124 * hold references to the pipe.
125 */
126 extern LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain;
127
128 static void
129 rt_unref(struct rtentry *rt)
130 {
131 if (rt == NULL)
132 return ;
133 if (rt->rt_refcnt <= 0)
134 printf("-- warning, refcnt now %d, decreasing\n", rt->rt_refcnt);
135 RTFREE(rt);
136 }
137
138 /*
139 * Heap management functions.
140 *
141 * In the heap, first node is element 0. Children of i are 2i+1 and 2i+2.
142 * Some macros help finding parent/children so we can optimize them.
143 *
144 * heap_init() is called to expand the heap when needed.
145 * Increment size in blocks of 256 entries (which make one 4KB page)
146 * XXX failure to allocate a new element is a pretty bad failure
147 * as we basically stall a whole queue forever!!
148 * Returns 1 on error, 0 on success
149 */
150 #define HEAP_FATHER(x) ( ( (x) - 1 ) / 2 )
151 #define HEAP_LEFT(x) ( 2*(x) + 1 )
152 #define HEAP_IS_LEFT(x) ( (x) & 1 )
153 #define HEAP_RIGHT(x) ( 2*(x) + 1 )
154 #define HEAP_SWAP(a, b, buffer) { buffer = a ; a = b ; b = buffer ; }
155 #define HEAP_INCREMENT 255
156
157 static int
158 heap_init(struct dn_heap *h, int new_size)
159 {
160 struct dn_heap_entry *p;
161
162 if (h->size >= new_size ) {
163 printf("heap_init, Bogus call, have %d want %d\n",
164 h->size, new_size);
165 return 0 ;
166 }
167 new_size = (new_size + HEAP_INCREMENT ) & ~HEAP_INCREMENT ;
168 p = malloc(new_size * sizeof(*p), M_IPFW, M_DONTWAIT );
169 if (p == NULL) {
170 printf(" heap_init, resize %d failed\n", new_size );
171 return 1 ; /* error */
172 }
173 if (h->size > 0) {
174 bcopy(h->p, p, h->size * sizeof(*p) );
175 free(h->p, M_IPFW);
176 }
177 h->p = p ;
178 h->size = new_size ;
179 return 0 ;
180 }
181
182 /*
183 * Insert element in heap. Normally, p != NULL, we insert p in
184 * a new position and bubble up. If p == NULL, then the element is
185 * already in place, and key is the position where to start the
186 * bubble-up.
187 * Returns 1 on failure (cannot allocate new heap entry)
188 */
189 static int
190 heap_insert(struct dn_heap *h, dn_key key1, void *p)
191 {
192 int son = h->elements ;
193
194 if (p == NULL) /* data already there, set starting point */
195 son = key1 ;
196 else { /* insert new element at the end, possibly resize */
197 son = h->elements ;
198 if (son == h->size) /* need resize... */
199 if (heap_init(h, h->elements+1) )
200 return 1 ; /* failure... */
201 h->p[son].object = p ;
202 h->p[son].key = key1 ;
203 h->elements++ ;
204 }
205 while (son > 0) { /* bubble up */
206 int father = HEAP_FATHER(son) ;
207 struct dn_heap_entry tmp ;
208
209 if (DN_KEY_LT( h->p[father].key, h->p[son].key ) )
210 break ; /* found right position */
211 /* son smaller than father, swap and try again */
212 HEAP_SWAP(h->p[son], h->p[father], tmp) ;
213 son = father ;
214 }
215 return 0 ;
216 }
217
218 /*
219 * remove top element from heap
220 */
221 static void
222 heap_extract(struct dn_heap *h)
223 {
224 int child, father, max = h->elements - 1 ;
225 if (max < 0)
226 return ;
227
228 /* move up smallest child */
229 father = 0 ;
230 child = HEAP_LEFT(father) ; /* left child */
231 while (child <= max) { /* valid entry */
232 if (child != max && DN_KEY_LT(h->p[child+1].key, h->p[child].key) )
233 child = child+1 ; /* take right child, otherwise left */
234 h->p[father] = h->p[child] ;
235 father = child ;
236 child = HEAP_LEFT(child) ; /* left child for next loop */
237 }
238 h->elements-- ;
239 if (father != max) {
240 /*
241 * Fill hole with last entry and bubble up, reusing the insert code
242 */
243 h->p[father] = h->p[max] ;
244 heap_insert(h, father, NULL); /* this one cannot fail */
245 }
246 }
247
248 /*
249 * heapify() will reorganize data inside an array to maintain the
250 * heap property. It is needed when we delete a bunch of entries.
251 */
252 static void
253 heapify(struct dn_heap *h)
254 {
255 int father, i ;
256 struct dn_heap_entry tmp ;
257
258 for (i = h->elements - 1 ; i > 0 ; i-- ) {
259 father = HEAP_FATHER(i) ;
260 if ( DN_KEY_LT(h->p[i].key, h->p[father].key) )
261 HEAP_SWAP(h->p[father], h->p[i], tmp) ;
262 }
263 }
264 /*
265 * --- end of heap management functions ---
266 */
267
268 /*
269 * Scheduler functions -- transmit_event(), ready_event()
270 *
271 * transmit_event() is called when the delay-line needs to enter
272 * the scheduler, either because of existing pkts getting ready,
273 * or new packets entering the queue. The event handled is the delivery
274 * time of the packet.
275 *
276 * ready_event() does something similar with flow queues, and the
277 * event handled is the finish time of the head pkt.
278 *
279 * In both cases, we make sure that the data structures are consistent
280 * before passing pkts out, because this might trigger recursive
281 * invocations of the procedures.
282 */
283 static void
284 transmit_event(struct dn_pipe *pipe)
285 {
286 struct dn_pkt *pkt ;
287
288 while ( (pkt = pipe->p.head) && DN_KEY_LEQ(pkt->output_time, curr_time) ) {
289 /*
290 * first unlink, then call procedures, since ip_input() can invoke
291 * ip_output() and viceversa, thus causing nested calls
292 */
293 pipe->p.head = DN_NEXT(pkt) ;
294
295 /*
296 * The actual mbuf is preceded by a struct dn_pkt, resembling an mbuf
297 * (NOT A REAL one, just a small block of malloc'ed memory) with
298 * m_type = MT_DUMMYNET
299 * m_next = actual mbuf to be processed by ip_input/output
300 * m_data = the matching rule
301 * and some other fields.
302 * The block IS FREED HERE because it contains parameters passed
303 * to the called routine.
304 */
305 switch (pkt->dn_dir) {
306 case DN_TO_IP_OUT:
307 (void)ip_output((struct mbuf *)pkt, NULL, NULL, 0, NULL);
308 rt_unref (pkt->ro.ro_rt) ;
309 break ;
310
311 case DN_TO_IP_IN :
312 ip_input((struct mbuf *)pkt) ;
313 break ;
314
315 #ifdef BRIDGE
316 case DN_TO_BDG_FWD : {
317 struct mbuf *m = (struct mbuf *)pkt ;
318 bdg_forward(&m, pkt->ifp);
319 if (m)
320 m_freem(m);
321 }
322 break ;
323 #endif
324
325 default:
326 printf("dummynet: bad switch %d!\n", pkt->dn_dir);
327 m_freem(pkt->dn_m);
328 break ;
329 }
330 FREE(pkt, M_IPFW);
331 }
332 /* if there are leftover packets, put into the heap for next event */
333 if ( (pkt = pipe->p.head) )
334 heap_insert(&extract_heap, pkt->output_time, pipe ) ;
335 /* XXX should check errors on heap_insert, by draining the
336 * whole pipe p and hoping in the future we are more successful
337 */
338 }
339
340 /*
341 * ready_event() is invoked every time the queue must enter the
342 * scheduler, either because the first packet arrives, or because
343 * a previously scheduled event fired.
344 * On invokation, drain as many pkts as possible (could be 0) and then
345 * if there are leftover packets reinsert the pkt in the scheduler.
346 */
347 static void
348 ready_event(struct dn_flow_queue *q)
349 {
350 struct dn_pkt *pkt;
351 struct dn_pipe *p = q->p ;
352 int p_was_empty = (p->p.head == NULL) ;
353
354 while ( (pkt = q->r.head) != NULL ) {
355 int len = pkt->dn_m->m_pkthdr.len;
356 int len_scaled = p->bandwidth ? len*8*hz : 0 ;
357 /*
358 * bandwidth==0 (no limit) means we can drain as many pkts as
359 * needed from the queue. Setting len_scaled = 0 does the job.
360 */
361 if (len_scaled > q->numbytes )
362 break ;
363 /*
364 * extract pkt from queue, compute output time (could be now)
365 * and put into delay line (p_queue)
366 */
367 q->numbytes -= len_scaled ;
368 q->r.head = DN_NEXT(pkt) ;
369 q->len-- ;
370 q->len_bytes -= len ;
371
372 pkt->output_time = curr_time + p->delay ;
373
374 if (p->p.head == NULL)
375 p->p.head = pkt;
376 else
377 DN_NEXT(p->p.tail) = pkt;
378 p->p.tail = pkt;
379 DN_NEXT(p->p.tail) = NULL;
380 }
381 /*
382 * If we have more packets queued, schedule next ready event
383 * (can only occur when bandwidth != 0, otherwise we would have
384 * flushed the whole queue in the previous loop).
385 * To this purpose compute how many ticks to go for the next
386 * event, accounting for packet size and residual credit. This means
387 * we compute the finish time of the packet.
388 */
389 if ( (pkt = q->r.head) != NULL ) { /* this implies bandwidth != 0 */
390 dn_key t ;
391 t = (pkt->dn_m->m_pkthdr.len*8*hz - q->numbytes + p->bandwidth - 1 ) /
392 p->bandwidth ;
393 q->numbytes += t * p->bandwidth ;
394 heap_insert(&ready_heap, curr_time + t, (void *)q );
395 /* XXX should check errors on heap_insert, and drain the whole
396 * queue on error hoping next time we are luckier.
397 */
398 }
399 /*
400 * If the delay line was empty call transmit_event(p) now.
401 * Otherwise, the scheduler will take care of it.
402 */
403 if (p_was_empty)
404 transmit_event(p);
405 }
406
407 /*
408 * this is called once per tick, or HZ times per second. It is used to
409 * increment the current tick counter and schedule expired events.
410 */
411 static void
412 dummynet(void * __unused unused)
413 {
414 void *p ; /* generic parameter to handler */
415 struct dn_heap *h ;
416 int s ;
417
418 s = splnet(); /* avoid network interrupts... */
419 curr_time++ ;
420 h = &ready_heap ;
421 while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) {
422 /*
423 * XXX if the event is late, we should probably credit the queue
424 * by q->p->bandwidth * (delta_ticks). On the other hand, i dont
425 * think this can ever occur with this code (i.e. curr_time will
426 * still be incremented by one at each tick. Things might be
427 * different if we were using the counter from the high priority
428 * timer.
429 */
430 if (h->p[0].key != curr_time)
431 printf("-- dummynet: warning, event is %d ticks late\n",
432 curr_time - h->p[0].key);
433 p = h->p[0].object ;
434 heap_extract(h); /* need to extract before processing */
435 ready_event(p) ;
436 }
437 h = &extract_heap ;
438 while (h->elements > 0 && DN_KEY_LEQ(h->p[0].key, curr_time) ) {
439 if (h->p[0].key != curr_time) /* XXX same as above */
440 printf("-- dummynet: warning, event is %d ticks late\n",
441 curr_time - h->p[0].key);
442 p = h->p[0].object ;
443 heap_extract(&extract_heap);
444 transmit_event(p);
445 }
446 splx(s);
447 timeout(dummynet, NULL, 1);
448 }
449
450 /*
451 * Unconditionally expire empty queues in case of shortage.
452 * Returns the number of queues freed.
453 */
454 static int
455 expire_queues(struct dn_pipe *pipe)
456 {
457 struct dn_flow_queue *q, *prev ;
458 int i, initial_elements = pipe->rq_elements ;
459
460 if (pipe->last_expired == time_second)
461 return 0 ;
462 pipe->last_expired = time_second ;
463 for (i = 0 ; i <= pipe->rq_size ; i++) /* last one is overflow */
464 for (prev=NULL, q = pipe->rq[i] ; q != NULL ; )
465 if (q->r.head != NULL) {
466 prev = q ;
467 q = q->next ;
468 } else { /* entry is idle, expire it */
469 struct dn_flow_queue *old_q = q ;
470
471 if (prev != NULL)
472 prev->next = q = q->next ;
473 else
474 pipe->rq[i] = q = q->next ;
475 pipe->rq_elements-- ;
476 free(old_q, M_IPFW);
477 }
478 return initial_elements - pipe->rq_elements ;
479 }
480
481 /*
482 * If room, create a new queue and put at head of slot i;
483 * otherwise, create or use the default queue.
484 */
485 static struct dn_flow_queue *
486 create_queue(struct dn_pipe *pipe, int i)
487 {
488 struct dn_flow_queue *q ;
489
490 if (pipe->rq_elements > pipe->rq_size * dn_max_ratio &&
491 expire_queues(pipe) == 0) {
492 /*
493 * No way to get room, use or create overflow queue.
494 */
495 i = pipe->rq_size ;
496 if ( pipe->rq[i] != NULL )
497 return pipe->rq[i] ;
498 }
499 q = malloc(sizeof(*q), M_IPFW, M_DONTWAIT) ;
500 if (q == NULL) {
501 printf("sorry, cannot allocate queue for new flow\n");
502 return NULL ;
503 }
504 bzero(q, sizeof(*q) ); /* needed */
505 q->p = pipe ;
506 q->hash_slot = i ;
507 q->next = pipe->rq[i] ;
508 pipe->rq[i] = q ;
509 pipe->rq_elements++ ;
510 return q ;
511 }
512
513 /*
514 * Given a pipe and a pkt in last_pkt, find a matching queue
515 * after appropriate masking. The queue is moved to front
516 * so that further searches take less time.
517 */
518 static struct dn_flow_queue *
519 find_queue(struct dn_pipe *pipe)
520 {
521 int i = 0 ; /* we need i and q for new allocations */
522 struct dn_flow_queue *q, *prev;
523
524 if ( !(pipe->flags & DN_HAVE_FLOW_MASK) )
525 q = pipe->rq[0] ;
526 else {
527 /* first, do the masking */
528 last_pkt.dst_ip &= pipe->flow_mask.dst_ip ;
529 last_pkt.src_ip &= pipe->flow_mask.src_ip ;
530 last_pkt.dst_port &= pipe->flow_mask.dst_port ;
531 last_pkt.src_port &= pipe->flow_mask.src_port ;
532 last_pkt.proto &= pipe->flow_mask.proto ;
533 last_pkt.flags = 0 ; /* we dont care about this one */
534 /* then, hash function */
535 i = ( (last_pkt.dst_ip) & 0xffff ) ^
536 ( (last_pkt.dst_ip >> 15) & 0xffff ) ^
537 ( (last_pkt.src_ip << 1) & 0xffff ) ^
538 ( (last_pkt.src_ip >> 16 ) & 0xffff ) ^
539 (last_pkt.dst_port << 1) ^ (last_pkt.src_port) ^
540 (last_pkt.proto );
541 i = i % pipe->rq_size ;
542 /* finally, scan the current list for a match */
543 searches++ ;
544 for (prev=NULL, q = pipe->rq[i] ; q ; ) {
545 search_steps++;
546 if (bcmp(&last_pkt, &(q->id), sizeof(q->id) ) == 0)
547 break ; /* found */
548 else if (pipe_expire && q->r.head == NULL) {
549 /* entry is idle, expire it */
550 struct dn_flow_queue *old_q = q ;
551
552 if (prev != NULL)
553 prev->next = q = q->next ;
554 else
555 pipe->rq[i] = q = q->next ;
556 pipe->rq_elements-- ;
557 free(old_q, M_IPFW);
558 continue ;
559 }
560 prev = q ;
561 q = q->next ;
562 }
563 if (q && prev != NULL) { /* found and not in front */
564 prev->next = q->next ;
565 q->next = pipe->rq[i] ;
566 pipe->rq[i] = q ;
567 }
568 }
569 if (q == NULL) { /* no match, need to allocate a new entry */
570 q = create_queue(pipe, i);
571 if (q != NULL)
572 q->id = last_pkt ;
573 }
574 return q ;
575 }
576
577 /*
578 * dummynet hook for packets.
579 */
580 int
581 dummynet_io(int pipe_nr, int dir,
582 struct mbuf *m, struct ifnet *ifp, struct route *ro,
583 struct sockaddr_in *dst,
584 struct ip_fw_chain *rule)
585 {
586 struct dn_pkt *pkt;
587 struct dn_pipe *p;
588 int len = m->m_pkthdr.len ;
589 struct dn_flow_queue *q = NULL ;
590 int s ;
591
592 s = splimp();
593 /* XXX check the spl protection. It might be unnecessary since we
594 * run this at splnet() already.
595 */
596
597 DEB(printf("-- last_pkt dst 0x%08x/0x%04x src 0x%08x/0x%04x\n",
598 last_pkt.dst_ip, last_pkt.dst_port,
599 last_pkt.src_ip, last_pkt.src_port);)
600
601 pipe_nr &= 0xffff ;
602 /*
603 * locate pipe. First time is expensive, next have direct access.
604 */
605 if ( (p = rule->rule->pipe_ptr) == NULL ) {
606 for (p = all_pipes; p && p->pipe_nr != pipe_nr; p = p->next)
607 ;
608 if (p == NULL)
609 goto dropit ; /* this pipe does not exist! */
610 rule->rule->pipe_ptr = p ; /* record pipe ptr for the future */
611 }
612 q = find_queue(p);
613 /*
614 * update statistics, then do various check on reasons to drop pkt
615 */
616 if ( q == NULL )
617 goto dropit ; /* cannot allocate queue */
618 q->tot_bytes += len ;
619 q->tot_pkts++ ;
620 if ( p->plr && random() < p->plr )
621 goto dropit ; /* random pkt drop */
622 if ( p->queue_size && q->len >= p->queue_size)
623 goto dropit ; /* queue count overflow */
624 if ( p->queue_size_bytes && len + q->len_bytes > p->queue_size_bytes)
625 goto dropit ; /* queue size overflow */
626 /*
627 * can implement RED drops here if needed.
628 */
629
630 pkt = (struct dn_pkt *)malloc(sizeof (*pkt), M_IPFW, M_NOWAIT) ;
631 if ( pkt == NULL )
632 goto dropit ; /* cannot allocate packet header */
633 /* ok, i can handle the pkt now... */
634 bzero(pkt, sizeof(*pkt) ); /* XXX expensive, see if we can remove it*/
635 /* build and enqueue packet + parameters */
636 pkt->hdr.mh_type = MT_DUMMYNET ;
637 (struct ip_fw_chain *)pkt->hdr.mh_data = rule ;
638 DN_NEXT(pkt) = NULL;
639 pkt->dn_m = m;
640 pkt->dn_dir = dir ;
641
642 pkt->ifp = ifp;
643 if (dir == DN_TO_IP_OUT) {
644 /*
645 * We need to copy *ro because for ICMP pkts (and maybe others)
646 * the caller passed a pointer into the stack; dst might also be
647 * a pointer into *ro so it needs to be updated.
648 */
649 pkt->ro = *ro;
650 if (ro->ro_rt)
651 ro->ro_rt->rt_refcnt++ ; /* XXX */
652 if (dst == (struct sockaddr_in *)&ro->ro_dst) /* dst points into ro */
653 dst = (struct sockaddr_in *)&(pkt->ro.ro_dst) ;
654
655 pkt->dn_dst = dst;
656 }
657 if (q->r.head == NULL)
658 q->r.head = pkt;
659 else
660 DN_NEXT(q->r.tail) = pkt;
661 q->r.tail = pkt;
662 q->len++;
663 q->len_bytes += len ;
664
665 /*
666 * If queue was empty (this is first pkt) then call ready_event()
667 * now to make the pkt go out at the right time. Otherwise we are done,
668 * as there must be a ready event already scheduled.
669 */
670 if (q->r.head == pkt) /* r_queue was empty */
671 ready_event( q );
672 splx(s);
673 return 0;
674
675 dropit:
676 splx(s);
677 if (q)
678 q->drops++ ;
679 m_freem(m);
680 return 0 ; /* XXX should I return an error ? */
681 }
682
683 /*
684 * Below, the rt_unref is only needed when (pkt->dn_dir == DN_TO_IP_OUT)
685 * Doing this would probably save us the initial bzero of dn_pkt
686 */
687 #define DN_FREE_PKT(pkt) { \
688 struct dn_pkt *n = pkt ; \
689 rt_unref ( n->ro.ro_rt ) ; \
690 m_freem(n->dn_m); \
691 pkt = DN_NEXT(n) ; \
692 free(n, M_IPFW) ; }
693 /*
694 * dispose all packets queued on a pipe
695 */
696 static void
697 purge_pipe(struct dn_pipe *pipe)
698 {
699 struct dn_pkt *pkt ;
700 struct dn_flow_queue *q, *qn ;
701 int i ;
702
703 for (i = 0 ; i <= pipe->rq_size ; i++ ) /* XXX last one is overflow */
704 for (q = pipe->rq[i] ; q ; q = qn ) {
705 for (pkt = q->r.head ; pkt ; )
706 DN_FREE_PKT(pkt) ;
707 qn = q->next ;
708 free(q, M_IPFW);
709 }
710 for (pkt = pipe->p.head ; pkt ; )
711 DN_FREE_PKT(pkt) ;
712 }
713
714 /*
715 * Delete all pipes and heaps returning memory. Must also
716 * remove references from all ipfw rules to all pipes.
717 */
718 static void
719 dummynet_flush()
720 {
721 struct dn_pipe *curr_p, *p ;
722 struct ip_fw_chain *chain ;
723 int s ;
724
725 s = splnet() ;
726
727 /* remove all references to pipes ...*/
728 for (chain= ip_fw_chain.lh_first ; chain; chain = chain->chain.le_next)
729 chain->rule->pipe_ptr = NULL ;
730 /* prevent future matches... */
731 p = all_pipes ;
732 all_pipes = NULL ;
733 /* and free heaps so we don't have unwanted events */
734 if (ready_heap.size >0 )
735 free(ready_heap.p, M_IPFW);
736 ready_heap.elements = ready_heap.size = 0 ;
737 if (extract_heap.size >0 )
738 free(extract_heap.p, M_IPFW);
739 extract_heap.elements = extract_heap.size = 0 ;
740 splx(s) ;
741 /*
742 * Now purge all queued pkts and delete all pipes
743 */
744 for ( ; p ; ) {
745 purge_pipe(p);
746 curr_p = p ;
747 p = p->next ;
748 free(curr_p->rq, M_IPFW);
749 free(curr_p, M_IPFW);
750 }
751 }
752
753 extern struct ip_fw_chain *ip_fw_default_rule ;
754 /*
755 * when a firewall rule is deleted, scan all queues and remove the flow-id
756 * from packets matching this rule.
757 */
758 void
759 dn_rule_delete(void *r)
760 {
761 struct dn_pipe *p ;
762 struct dn_flow_queue *q ;
763 struct dn_pkt *pkt ;
764 int i ;
765
766 for ( p = all_pipes ; p ; p = p->next ) {
767 for (i = 0 ; i <= p->rq_size ; i++) /* XXX last one is ovflow */
768 for (q = p->rq[i] ; q ; q = q->next )
769 for (pkt = q->r.head ; pkt ; pkt = DN_NEXT(pkt) )
770 if (pkt->hdr.mh_data == r)
771 pkt->hdr.mh_data = (void *)ip_fw_default_rule ;
772 for (pkt = p->p.head ; pkt ; pkt = DN_NEXT(pkt) )
773 if (pkt->hdr.mh_data == r)
774 pkt->hdr.mh_data = (void *)ip_fw_default_rule ;
775 }
776 }
777
778 /*
779 * Handler for the various dummynet socket options (get, flush, config, del)
780 */
781 static int
782 ip_dn_ctl(struct sockopt *sopt)
783 {
784 int error = 0 ;
785 struct dn_pipe *p, tmp_pipe ;
786
787 struct dn_pipe *a, *b ;
788
789 /* Disallow sets in really-really secure mode. */
790 if (sopt->sopt_dir == SOPT_SET && securelevel >= 3)
791 return (EPERM);
792
793 switch (sopt->sopt_name) {
794 default :
795 panic("ip_dn_ctl -- unknown option");
796
797 case IP_DUMMYNET_GET :
798 {
799 char *buf, *bp ; /* bp is the "copy-pointer" */
800 size_t size ;
801 int s ;
802
803 s = splnet() ; /* to avoid thing change while we work! */
804 for (p = all_pipes, size = 0 ; p ; p = p->next )
805 size += sizeof( *p ) +
806 p->rq_elements * sizeof(struct dn_flow_queue);
807 buf = malloc(size, M_TEMP, M_DONTWAIT);
808 if (buf == 0) {
809 error = ENOBUFS ;
810 splx(s);
811 break ;
812 }
813 for (p = all_pipes, bp = buf ; p ; p = p->next ) {
814 int i ;
815 struct dn_pipe *pipe_bp = (struct dn_pipe *)bp ;
816 struct dn_flow_queue *q;
817 int copied = 0 ;
818
819 /*
820 * copy pipe descriptor into *bp, convert delay back to ms,
821 * then copy the queue descriptor(s) one at a time.
822 */
823 bcopy(p, bp, sizeof( *p ) );
824 pipe_bp->delay = (pipe_bp->delay * 1000) / hz ;
825 bp += sizeof( *p ) ;
826 for (i = 0 ; i <= p->rq_size ; i++)
827 for (q = p->rq[i] ; q ; q = q->next, bp += sizeof(*q) ) {
828 if (q->hash_slot != i)
829 printf("++ at %d: wrong slot (have %d, should be %d)\n", copied, q->hash_slot, i);
830 copied++ ;
831 bcopy(q, bp, sizeof( *q ) );
832 }
833 if (copied != p->rq_elements)
834 printf("++ wrong count, have %d should be %d\n",
835 copied, p->rq_elements);
836 }
837 splx(s);
838 error = sooptcopyout(sopt, buf, size);
839 FREE(buf, M_TEMP);
840 }
841 break ;
842
843 case IP_DUMMYNET_FLUSH :
844 dummynet_flush() ;
845 break ;
846
847 case IP_DUMMYNET_CONFIGURE :
848 p = &tmp_pipe ;
849 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
850 if (error)
851 break ;
852 /*
853 * The config program passes parameters as follows:
854 * bw = bits/second (0 means no limits),
855 * delay = ms, must be translated into ticks.
856 * queue_size = slots (0 means no limit)
857 * queue_size_bytes = bytes (0 means no limit)
858 * only one can be set, must be bound-checked
859 */
860 p->delay = ( p->delay * hz ) / 1000 ;
861 if (p->queue_size == 0 && p->queue_size_bytes == 0)
862 p->queue_size = 50 ;
863 if (p->queue_size != 0 ) /* buffers are prevailing */
864 p->queue_size_bytes = 0 ;
865 if (p->queue_size > 100)
866 p->queue_size = 50 ;
867 if (p->queue_size_bytes > 1024*1024)
868 p->queue_size_bytes = 1024*1024 ;
869 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
870 a = b , b = b->next) ;
871 if (b && b->pipe_nr == p->pipe_nr) {
872 b->bandwidth = p->bandwidth ;
873 b->delay = p->delay ;
874 b->queue_size = p->queue_size ;
875 b->queue_size_bytes = p->queue_size_bytes ;
876 b->plr = p->plr ;
877 b->flow_mask = p->flow_mask ;
878 b->flags = p->flags ;
879 } else { /* brand new pipe */
880 int s ;
881 struct dn_pipe *x;
882 x = malloc(sizeof(struct dn_pipe), M_IPFW, M_DONTWAIT) ;
883 if (x == NULL) {
884 printf("ip_dummynet.c: no memory for new pipe\n");
885 error = ENOSPC ;
886 break ;
887 }
888 bzero(x, sizeof(*x) );
889 x->bandwidth = p->bandwidth ;
890 x->delay = p->delay ;
891 x->pipe_nr = p->pipe_nr ;
892 x->queue_size = p->queue_size ;
893 x->queue_size_bytes = p->queue_size_bytes ;
894 x->plr = p->plr ;
895 x->flow_mask = p->flow_mask ;
896 x->flags = p->flags ;
897 if (x->flags & DN_HAVE_FLOW_MASK) {/* allocate some slots */
898 int l = p->rq_size ;
899 if (l == 0)
900 l = dn_hash_size ;
901 if (l < 4)
902 l = 4 ;
903 else if (l > 1024)
904 l = 1024 ;
905 x->rq_size = l ;
906 } else /* one is enough for null mask */
907 x->rq_size = 1 ;
908 x->rq = malloc((1 + x->rq_size) * sizeof(struct dn_flow_queue *),
909 M_IPFW, M_DONTWAIT) ;
910 if (x->rq == NULL ) {
911 printf("sorry, cannot allocate queue\n");
912 free(x, M_IPFW);
913 error = ENOSPC ;
914 break ;
915 }
916 bzero(x->rq, (1+x->rq_size) * sizeof(struct dn_flow_queue *) );
917 x->rq_elements = 0 ;
918
919 s = splnet() ;
920 x->next = b ;
921 if (a == NULL)
922 all_pipes = x ;
923 else
924 a->next = x ;
925 splx(s);
926 }
927 break ;
928
929 case IP_DUMMYNET_DEL :
930 p = &tmp_pipe ;
931 error = sooptcopyin(sopt, p, sizeof *p, sizeof *p);
932 if (error)
933 break ;
934
935 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
936 a = b , b = b->next) ;
937 if (b && b->pipe_nr == p->pipe_nr) { /* found pipe */
938 int s ;
939 struct ip_fw_chain *chain ;
940
941 s = splnet() ;
942 chain = ip_fw_chain.lh_first;
943
944 if (a == NULL)
945 all_pipes = b->next ;
946 else
947 a->next = b->next ;
948 /*
949 * remove references to this pipe from the ip_fw rules.
950 */
951 for (; chain; chain = chain->chain.le_next)
952 if (chain->rule->pipe_ptr == b)
953 chain->rule->pipe_ptr = NULL ;
954 /* remove all references to b from heaps */
955 if (ready_heap.elements > 0) {
956 struct dn_heap *h = &ready_heap ;
957 int i = 0, found = 0 ;
958 while ( i < h->elements ) {
959 if (((struct dn_flow_queue *)(h->p[i].object))->p == b) {
960 /* found one */
961 h->elements-- ;
962 h->p[i] = h->p[h->elements] ;
963 found++ ;
964 } else
965 i++ ;
966 }
967 if (found)
968 heapify(h);
969 }
970 if (extract_heap.elements > 0) {
971 struct dn_heap *h = &extract_heap ;
972 int i = 0, found = 0 ;
973 while ( i < h->elements ) {
974 if (h->p[i].object == b) { /* found one */
975 h->elements-- ;
976 h->p[i] = h->p[h->elements] ;
977 found++ ;
978 } else
979 i++ ;
980 }
981 if (found)
982 heapify(h);
983 }
984 splx(s);
985 purge_pipe(b); /* remove pkts from here */
986 free(b->rq, M_IPFW);
987 free(b, M_IPFW);
988 }
989 break ;
990 }
991 return error ;
992 }
993
994 void
995 ip_dn_init(void)
996 {
997 printf("DUMMYNET initialized (000212)\n");
998 all_pipes = NULL ;
999 ready_heap.size = ready_heap.elements = 0 ;
1000 extract_heap.size = extract_heap.elements = 0 ;
1001 ip_dn_ctl_ptr = ip_dn_ctl;
1002 timeout(dummynet, NULL, 1);
1003 }
1004
1005 #ifdef DUMMYNET_MODULE
1006
1007 #include <sys/exec.h>
1008 #include <sys/sysent.h>
1009 #include <sys/lkm.h>
1010
1011 MOD_MISC(dummynet);
1012
1013 static ip_dn_ctl_t *old_dn_ctl_ptr ;
1014
1015 static int
1016 dummynet_load(struct lkm_table *lkmtp, int cmd)
1017 {
1018 int s=splnet();
1019 old_dn_ctl_ptr = ip_dn_ctl_ptr;
1020 ip_dn_init();
1021 splx(s);
1022 return 0;
1023 }
1024
1025 static int
1026 dummynet_unload(struct lkm_table *lkmtp, int cmd)
1027 {
1028 int s=splnet();
1029 ip_dn_ctl_ptr = old_dn_ctl_ptr;
1030 splx(s);
1031 dummynet_flush();
1032 printf("DUMMYNET unloaded\n");
1033 return 0;
1034 }
1035
1036 int
1037 dummynet_mod(struct lkm_table *lkmtp, int cmd, int ver)
1038 {
1039 DISPATCH(lkmtp, cmd, ver, dummynet_load, dummynet_unload, lkm_nullcmd);
1040 }
1041 #endif
Cache object: b01c776b047d3c46cb8321b381f30425
|