1 /*
2 * Copyright (c) 1998 Luigi Rizzo
3 *
4 * Redistribution and use in source forms, with and without modification,
5 * are permitted provided that this entire comment appears intact.
6 *
7 * Redistribution in binary form may occur without any restrictions.
8 * Obviously, it would be nice if you gave credit where credit is due
9 * but requiring it would be too onerous.
10 *
11 * This software is provided ``AS IS'' without any warranties of any kind.
12 *
13 * $FreeBSD: src/sys/netinet/ip_dummynet.c,v 1.1.2.6 1999/09/05 08:18:26 peter Exp $
14 */
15
16 /*
17 * This module implements IP dummynet, a bandwidth limiter/delay emulator
18 * used in conjunction with the ipfw package.
19 *
20 * Changes:
21 *
22 * 980821: changed conventions in the queueing logic
23 * packets passed from dummynet to ip_in/out are prepended with
24 * a vestigial mbuf type MT_DUMMYNET which contains a pointer
25 * to the matching rule.
26 * ip_input/output will extract the parameters, free the vestigial mbuf,
27 * and do the processing.
28 *
29 * 980519: fixed behaviour when deleting rules.
30 * 980518: added splimp()/splx() to protect against races
31 * 980513: initial release
32 */
33
34 /* include files marked with XXX are probably not needed */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/queue.h> /* XXX */
41 #include <sys/kernel.h>
42 #include <sys/socket.h>
43 #include <sys/time.h>
44 #include <sys/sysctl.h>
45 #include <net/if.h>
46 #include <net/route.h>
47 #include <netinet/in.h>
48 #include <netinet/in_systm.h>
49 #include <netinet/in_var.h>
50 #include <netinet/ip.h>
51 #include <netinet/ip_fw.h>
52 #include <netinet/ip_dummynet.h>
53
54 #ifdef BRIDGE
55 #include <netinet/if_ether.h> /* for struct arpcom */
56 #include <net/bridge.h>
57 #endif
58
59 static struct dn_pipe *all_pipes = NULL ; /* list of all pipes */
60
61 static int dn_debug = 0 ; /* verbose */
62 static int dn_calls = 0 ; /* number of calls */
63 static int dn_idle = 1;
64 #ifdef SYSCTL_NODE
65 SYSCTL_NODE(_net_inet_ip, OID_AUTO, dummynet, CTLFLAG_RW, 0, "Dummynet");
66 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, debug, CTLFLAG_RW, &dn_debug, 0, "");
67 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, calls, CTLFLAG_RD, &dn_calls, 0, "");
68 SYSCTL_INT(_net_inet_ip_dummynet, OID_AUTO, idle, CTLFLAG_RD, &dn_idle, 0, "");
69 #endif
70
71 static int ip_dn_ctl(int optname, struct mbuf **mm);
72
73 static void dummynet(void);
74 static void dn_restart(void);
75 static void dn_move(struct dn_pipe *pipe, int immediate);
76
77 /*
78 * the following is needed when deleting a pipe, because rules can
79 * hold references to the pipe.
80 */
81 extern LIST_HEAD (ip_fw_head, ip_fw_chain) ip_fw_chain;
82
83 /*
84 * invoked to reschedule the periodic task if necessary.
85 * Should only be called when dn_idle = 1 ;
86 */
87 static void
88 dn_restart()
89 {
90 struct dn_pipe *pipe;
91
92 if (!dn_idle)
93 return;
94
95 for (pipe = all_pipes ; pipe ; pipe = pipe->next ) {
96 /* if there any pipe that needs work, restart */
97 if (pipe->r.head || pipe->p.head || pipe->numbytes < 0 ) {
98 dn_idle = 0;
99 timeout(dummynet, (caddr_t)NULL, 1);
100 return ;
101 }
102 }
103 }
104
105 /*
106 * move packets from R-queue to P-queue
107 */
108 static void
109 dn_move(struct dn_pipe *pipe, int immediate)
110 {
111 struct dn_pkt *tmp, *pkt;
112
113 /*
114 * consistency check, should catch new pipes which are
115 * not initialized properly.
116 */
117 if ( pipe->p.head == NULL &&
118 pipe->ticks_from_last_insert != pipe->delay) {
119 printf("Warning, empty pipe and delay %d (should be %a)d\n",
120 pipe->ticks_from_last_insert, pipe->delay);
121 pipe->ticks_from_last_insert = pipe->delay;
122 }
123 /* this ought to go in dn_dequeue() */
124 if (!immediate && pipe->ticks_from_last_insert < pipe->delay)
125 pipe->ticks_from_last_insert++;
126 if ( pkt = pipe->r.head ) {
127 /*
128 * Move at most numbytes bytes from src and move to dst.
129 * delay is set to ticks_from_last_insert, which
130 * is reset after the first insertion;
131 */
132 while ( pkt ) {
133 int len = pkt->dn_m->m_pkthdr.len ;
134
135 /*
136 * queue limitation: pass packets down if the len is
137 * such that the pkt would go out before the next tick.
138 */
139 if (pipe->bandwidth) {
140 int len_scaled = len*8*hz ;
141 /* numbytes is in bit/sec, scaled 8*hz ... */
142 if (pipe->numbytes < len_scaled)
143 break;
144 pipe->numbytes -= len_scaled;
145 }
146 pipe->r_len--; /* elements in queue */
147 pipe->r_len_bytes -= len ;
148
149 /*
150 * to add delay jitter, must act here. A lower value
151 * (bounded to 0) means lower delay.
152 */
153 pkt->delay = pipe->ticks_from_last_insert;
154 pipe->ticks_from_last_insert = 0;
155 /* compensate the decrement done next in dn_dequeue */
156 if (!immediate && pkt->delay >0 && pipe->p.head==NULL)
157 pkt->delay++;
158 if (pipe->p.head == NULL)
159 pipe->p.head = pkt;
160 else
161 (struct dn_pkt *)pipe->p.tail->dn_next = pkt;
162 pipe->p.tail = pkt;
163 pkt = (struct dn_pkt *)pkt->dn_next;
164 pipe->p.tail->dn_next = NULL;
165 }
166 pipe->r.head = pkt;
167
168 /*** XXX just a sanity check */
169 if ( ( pkt == NULL && pipe->r_len != 0) ||
170 ( pkt != NULL && pipe->r_len == 0) )
171 printf("-- Warning, pipe head %x len %d\n",
172 pkt, pipe->r_len);
173 }
174
175 /*
176 * deliver packets downstream after the delay in the P-queue.
177 */
178
179 if (pipe->p.head == NULL)
180 return;
181 if (!immediate)
182 pipe->p.head->delay--;
183 while ( (pkt = pipe->p.head) && pkt->delay < 1) {
184 /*
185 * first unlink, then call procedures since ip_input()
186 * can result in a call to ip_output cnd viceversa,
187 * thus causing nested calls
188 */
189 pipe->p.head = (struct dn_pkt *) pkt->dn_next ;
190
191 /*
192 * the trick to avoid flow-id settings here is to prepend a
193 * vestigial mbuf to the packet, with the following values:
194 * m_type = MT_DUMMYNET
195 * m_next = the actual mbuf to be processed by ip_input/output
196 * m_data = the matching rule
197 * The vestigial element is the same memory area used by
198 * the dn_pkt, and IS FREED HERE because it can contain
199 * parameters passed to the called routine. The buffer IS NOT
200 * A REAL MBUF, just a block of memory acquired with malloc().
201 */
202 switch (pkt->dn_dir) {
203 case DN_TO_IP_OUT: {
204 struct rtentry *tmp_rt = pkt->ro.ro_rt ;
205
206 (void)ip_output((struct mbuf *)pkt, (struct mbuf *)pkt->ifp,
207 &(pkt->ro), pkt->dn_dst, NULL);
208 if (tmp_rt)
209 tmp_rt->rt_refcnt--; /* XXX return a reference count */
210 }
211 break ;
212 case DN_TO_IP_IN :
213 ip_input((struct mbuf *)pkt) ;
214 break ;
215 #ifdef BRIDGE
216 case DN_TO_BDG_FWD : {
217 struct mbuf *m = pkt ;
218 bdg_forward( &m, pkt->ifp);
219 if (m)
220 m_freem( m );
221 }
222 break ;
223 #endif
224 default:
225 printf("dummynet: bad switch %d!\n", pkt->dn_dir);
226 m_freem(pkt->dn_m);
227 break ;
228 }
229 FREE(pkt, M_IPFW);
230 }
231 }
232
233 /*
234 * this is the periodic task that moves packets between the R-
235 * and the P- queue
236 */
237 void
238 dummynet()
239 {
240 struct dn_pipe *p ;
241 int s ;
242
243 dn_calls++ ;
244 for (p = all_pipes ; p ; p = p->next ) {
245 /*
246 * Increment the amount of data that can be sent. However,
247 * don't do that if the channel is idle
248 * (r.head == NULL && numbytes >= bandwidth).
249 * This bug fix is from tim shepard (shep@bbn.com)
250 */
251 s = splimp();
252 if (p->r.head != NULL || p->numbytes < p->bandwidth )
253 p->numbytes += p->bandwidth ;
254 dn_move(p, 0); /* is it really 0 (also below) ? */
255 splx(s);
256 }
257
258 /*
259 * finally, if some queue has data, restart the timer.
260 */
261 s = splimp();
262 dn_idle = 1;
263 dn_restart();
264 splx(s);
265 }
266
267 /*
268 * dummynet hook for packets.
269 * input and output use the same code, so i use bit 16 in the pipe
270 * number to chose the direction: 1 for output packets, 0 for input.
271 * for input, only m is significant. For output, also the others.
272 */
273 int
274 dummynet_io(int pipe_nr, int dir,
275 struct mbuf *m, struct ifnet *ifp, struct route *ro,
276 struct sockaddr_in *dst,
277 struct ip_fw_chain *rule)
278 {
279 struct dn_pkt *pkt;
280 struct dn_pipe *pipe;
281 int len = m->m_pkthdr.len ;
282
283 int s=splimp();
284
285 pipe_nr &= 0xffff ;
286 /*
287 * locate pipe. First time is expensive, next have direct access.
288 */
289
290 if ( (pipe = rule->rule->pipe_ptr) == NULL ) {
291 for (pipe=all_pipes; pipe && pipe->pipe_nr !=pipe_nr; pipe=pipe->next)
292 ;
293 if (pipe == NULL) {
294 splx(s);
295 if (dn_debug)
296 printf("warning, pkt for no pipe %d\n", pipe_nr);
297 m_freem(m);
298 return 0 ;
299 } else
300 rule->rule->pipe_ptr = pipe ;
301 }
302
303 /*
304 * should i drop ?
305 * This section implements random packet drop.
306 */
307 if ( (pipe->plr && random() < pipe->plr) ||
308 (pipe->queue_size && pipe->r_len >= pipe->queue_size) ||
309 (pipe->queue_size_bytes &&
310 len + pipe->r_len_bytes > pipe->queue_size_bytes) ||
311 (pkt = (struct dn_pkt *)malloc(sizeof (*pkt),
312 M_IPFW, M_NOWAIT) ) == NULL ) {
313 splx(s);
314 if (dn_debug)
315 printf("-- dummynet: drop from pipe %d, have %d pks, %d bytes\n",
316 pipe_nr, pipe->r_len, pipe->r_len_bytes);
317 pipe->r_drops++ ;
318 m_freem(m);
319 return 0 ; /* XXX error */
320 }
321 bzero(pkt, sizeof(*pkt) );
322 /* build and enqueue packet */
323 pkt->hdr.mh_type = MT_DUMMYNET ;
324 (struct ip_fw_chain *)pkt->hdr.mh_data = rule ;
325 pkt->dn_next = NULL;
326 pkt->dn_m = m;
327 pkt->dn_dir = dir ;
328 pkt->delay = 0;
329
330 pkt->ifp = ifp;
331 if (dir == DN_TO_IP_OUT) {
332 /*
333 * we need to copy *ro because for icmp pkts (and maybe others)
334 * the caller passed a pointer into the stack.
335 */
336 pkt->ro = *ro;
337 if (ro->ro_rt)
338 ro->ro_rt->rt_refcnt++ ; /* XXX */
339 /*
340 * and again, dst might be a pointer into *ro...
341 */
342 if (dst == &ro->ro_dst) /* dst points into ro */
343 dst = &(pkt->ro.ro_dst) ;
344
345 pkt->dn_dst = dst;
346 }
347 if (pipe->r.head == NULL)
348 pipe->r.head = pkt;
349 else
350 (struct dn_pkt *)pipe->r.tail->dn_next = pkt;
351 pipe->r.tail = pkt;
352 pipe->r_len++;
353 pipe->r_len_bytes += len ;
354
355 /*
356 * here we could implement RED if we like to
357 */
358
359 if (pipe->r.head == pkt) { /* process immediately */
360 dn_move(pipe, 1);
361 }
362 if (dn_idle)
363 dn_restart();
364 splx(s);
365 return 0;
366 }
367
368 /*
369 * dispose all packets queued on a pipe
370 */
371 static void
372 purge_pipe(struct dn_pipe *pipe)
373 {
374 struct dn_pkt *pkt, *n ;
375 struct rtentry *tmp_rt ;
376
377 for (pkt = pipe->r.head ; pkt ; ) {
378 if (tmp_rt = pkt->ro.ro_rt )
379 tmp_rt->rt_refcnt--; /* XXX return a reference count */
380 m_freem(pkt->dn_m);
381 n = pkt ;
382 pkt = (struct dn_pkt *)pkt->dn_next ;
383 free(n, M_IPFW) ;
384 }
385 for (pkt = pipe->p.head ; pkt ; ) {
386 if (tmp_rt = pkt->ro.ro_rt )
387 tmp_rt->rt_refcnt--; /* XXX return a reference count */
388 m_freem(pkt->dn_m);
389 n = pkt ;
390 pkt = (struct dn_pkt *)pkt->dn_next ;
391 free(n, M_IPFW) ;
392 }
393 }
394
395 /*
396 * delete all pipes returning memory
397 */
398 static void
399 dummynet_flush()
400 {
401 struct dn_pipe *q, *p = all_pipes ;
402 int s = splnet() ;
403
404 all_pipes = NULL ;
405 splx(s) ;
406 /*
407 * purge all queued pkts and delete all pipes
408 */
409 for ( ; p ; ) {
410 purge_pipe(p);
411 q = p ;
412 p = p->next ;
413 free(q, M_IPFW);
414 }
415 }
416
417 extern struct ip_fw_chain *ip_fw_default_rule ;
418 /*
419 * when a firewall rule is deleted, scan all pipes and remove the flow-id
420 * from packets matching this rule.
421 */
422 void
423 dn_rule_delete(void *r)
424 {
425
426 struct dn_pipe *q, *p = all_pipes ;
427
428 for ( p= all_pipes ; p ; p = p->next ) {
429 struct dn_pkt *x ;
430 for (x = p->r.head ; x ; x = (struct dn_pkt *)x->dn_next )
431 if (x->hdr.mh_data == r)
432 x->hdr.mh_data = (void *)ip_fw_default_rule ;
433 for (x = p->p.head ; x ; x = (struct dn_pkt *)x->dn_next )
434 if (x->hdr.mh_data == r)
435 x->hdr.mh_data = (void *)ip_fw_default_rule ;
436 }
437 }
438
439 /*
440 * handler for the various dummynet socket options
441 * (get, flush, config, del)
442 */
443 static int
444 ip_dn_ctl(int optname, struct mbuf **mm)
445 {
446 struct mbuf *m ;
447 if (optname == IP_DUMMYNET_GET) {
448 struct dn_pipe *p = all_pipes ;
449 *mm = m = m_get(M_WAIT, MT_SOOPTS);
450 m->m_len = 0 ;
451 m->m_next = NULL ;
452 for (; p ; p = p->next ) {
453 struct dn_pipe *q = mtod(m,struct dn_pipe *) ;
454 memcpy( m->m_data, p, sizeof(*p) );
455 /*
456 * return bw and delay in bits/s and ms, respectively
457 */
458 q->delay = (q->delay * 1000) / hz ;
459
460 m->m_len = sizeof(*p) ;
461 m->m_next = m_get(M_WAIT, MT_SOOPTS);
462 m = m->m_next ;
463 m->m_len = 0 ;
464 }
465 return 0 ;
466 }
467 if (securelevel > 2) { /* like in the firewall code... */
468 if (m) (void)m_free(m);
469 return (EPERM) ;
470 }
471 m = *mm ;
472 if (optname == IP_DUMMYNET_FLUSH) {
473 dummynet_flush() ;
474 if (m) (void)m_free(m);
475 return 0 ;
476 }
477 if (!m) /* need an argument for the following */
478 return (EINVAL);
479 if (optname == IP_DUMMYNET_CONFIGURE) {
480 struct dn_pipe *p = mtod(m,struct dn_pipe *) ;
481 struct dn_pipe *x, *a, *b ;
482 if (m->m_len != sizeof (*p) ) {
483 printf("dn_pipe Invalid length, %d instead of %d\n",
484 m->m_len, sizeof(*p) );
485 (void)m_free(m);
486 return (EINVAL);
487 }
488 /*
489 * The config program passes parameters as follows:
490 * bandwidth = bits/second (0 = no limits);
491 * delay = ms
492 * must be translated in ticks.
493 * queue_size = slots (0 = no limit)
494 * queue_size_bytes = bytes (0 = no limit)
495 * only one can be set, must be bound-checked
496 */
497 p->delay = ( p->delay * hz ) / 1000 ;
498 if (p->queue_size == 0 && p->queue_size_bytes == 0)
499 p->queue_size = 50 ;
500 if (p->queue_size != 0 ) /* buffers are prevailing */
501 p->queue_size_bytes = 0 ;
502 if (p->queue_size > 100)
503 p->queue_size = 100 ;
504 if (p->queue_size_bytes > 1024*1024)
505 p->queue_size_bytes = 1024*1024 ;
506 #if 0
507 printf("ip_dn: config pipe %d %d bit/s %d ms %d bufs\n",
508 p->pipe_nr,
509 p->bandwidth * 8 * hz ,
510 p->delay * 1000 / hz , p->queue_size);
511 #endif
512 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
513 a = b , b = b->next) ;
514 if (b && b->pipe_nr == p->pipe_nr) {
515 /* XXX should spl and flush old pipe... */
516 b->bandwidth = p->bandwidth ;
517 b->delay = p->delay ;
518 b->ticks_from_last_insert = p->delay ;
519 b->queue_size = p->queue_size ;
520 b->queue_size_bytes = p->queue_size_bytes ;
521 b->plr = p->plr ;
522 } else {
523 int s ;
524 x = malloc(sizeof(struct dn_pipe), M_IPFW, M_DONTWAIT) ;
525 if (x == NULL) {
526 printf("ip_dummynet.c: sorry no memory\n");
527 return (ENOSPC) ;
528 }
529 bzero(x, sizeof(*x) );
530 x->bandwidth = p->bandwidth ;
531 x->delay = p->delay ;
532 x->ticks_from_last_insert = p->delay ;
533 x->pipe_nr = p->pipe_nr ;
534 x->queue_size = p->queue_size ;
535 x->queue_size_bytes = p->queue_size_bytes ;
536 x->plr = p->plr ;
537
538 s = splnet() ;
539 x->next = b ;
540 if (a == NULL)
541 all_pipes = x ;
542 else
543 a->next = x ;
544 splx(s);
545 }
546 (void)m_free(m);
547 return 0 ;
548 }
549 if (optname == IP_DUMMYNET_DEL) {
550 struct dn_pipe *p = mtod(m,struct dn_pipe *) ;
551 struct dn_pipe *x, *a, *b ;
552
553 for (a = NULL , b = all_pipes ; b && b->pipe_nr < p->pipe_nr ;
554 a = b , b = b->next) ;
555 if (b && b->pipe_nr == p->pipe_nr) { /* found pipe */
556 int s = splnet() ;
557 struct ip_fw_chain *chain = ip_fw_chain.lh_first;
558
559 if (a == NULL)
560 all_pipes = b->next ;
561 else
562 a->next = b->next ;
563 /*
564 * remove references to this pipe from the ip_fw rules.
565 */
566 for (; chain; chain = chain->chain.le_next) {
567 register struct ip_fw *const f = chain->rule;
568 if (f->pipe_ptr == b)
569 f->pipe_ptr = NULL ;
570 }
571 splx(s);
572 purge_pipe(b); /* remove pkts from here */
573 free(b, M_IPFW);
574 }
575 }
576 return 0 ;
577 }
578
579 void
580 ip_dn_init(void)
581 {
582 printf("DUMMYNET initialized (990504)\n");
583 all_pipes = NULL ;
584 ip_dn_ctl_ptr = ip_dn_ctl;
585 }
586
587 #ifdef DUMMYNET_MODULE
588
589 #include <sys/exec.h>
590 #include <sys/sysent.h>
591 #include <sys/lkm.h>
592
593 MOD_MISC(dummynet);
594
595 static ip_dn_ctl_t *old_dn_ctl_ptr ;
596
597 static int
598 dummynet_load(struct lkm_table *lkmtp, int cmd)
599 {
600 int s=splnet();
601 old_dn_ctl_ptr = ip_dn_ctl_ptr;
602 ip_dn_init();
603 splx(s);
604 return 0;
605 }
606
607 static int
608 dummynet_unload(struct lkm_table *lkmtp, int cmd)
609 {
610 int s=splnet();
611 ip_dn_ctl_ptr = old_dn_ctl_ptr;
612 splx(s);
613 dummynet_flush();
614 printf("DUMMYNET unloaded\n");
615 return 0;
616 }
617
618 int
619 dummynet_mod(struct lkm_table *lkmtp, int cmd, int ver)
620 {
621 DISPATCH(lkmtp, cmd, ver, dummynet_load, dummynet_unload, lkm_nullcmd);
622 }
623 #endif
Cache object: 732b09f2e22be1b533dc9441f6fc6d38
|