FreeBSD/Linux Kernel Cross Reference
sys/altq/altq_red.c
1 /* $NetBSD: altq_red.c,v 1.10 2004/02/13 18:02:05 wiz Exp $ */
2 /* $KAME: altq_red.c,v 1.9 2002/01/07 11:25:40 kjc Exp $ */
3
4 /*
5 * Copyright (C) 1997-2000
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.10 2004/02/13 18:02:05 wiz Exp $");
65
66 #if defined(__FreeBSD__) || defined(__NetBSD__)
67 #include "opt_altq.h"
68 #if (__FreeBSD__ != 2)
69 #include "opt_inet.h"
70 #ifdef __FreeBSD__
71 #include "opt_inet6.h"
72 #endif
73 #endif
74 #endif /* __FreeBSD__ || __NetBSD__ */
75 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
76
77 #include <sys/param.h>
78 #include <sys/malloc.h>
79 #include <sys/mbuf.h>
80 #include <sys/socket.h>
81 #include <sys/sockio.h>
82 #include <sys/systm.h>
83 #include <sys/proc.h>
84 #include <sys/errno.h>
85 #include <sys/kernel.h>
86 #ifdef ALTQ_FLOWVALVE
87 #include <sys/queue.h>
88 #include <sys/time.h>
89 #endif
90
91 #include <net/if.h>
92 #include <net/if_types.h>
93
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #ifdef INET6
98 #include <netinet/ip6.h>
99 #endif
100
101 #include <altq/altq.h>
102 #include <altq/altq_conf.h>
103 #include <altq/altq_red.h>
104 #ifdef ALTQ_FLOWVALVE
105 #include <altq/altq_flowvalve.h>
106 #endif
107
108 /*
109 * ALTQ/RED (Random Early Detection) implementation using 32-bit
110 * fixed-point calculation.
111 *
112 * written by kjc using the ns code as a reference.
113 * you can learn more about red and ns from Sally's home page at
114 * http://www-nrg.ee.lbl.gov/floyd/
115 *
116 * most of the red parameter values are fixed in this implementation
117 * to prevent fixed-point overflow/underflow.
118 * if you change the parameters, watch out for overflow/underflow!
119 *
120 * the parameters used are recommended values by Sally.
121 * the corresponding ns config looks:
122 * q_weight=0.00195
123 * minthresh=5 maxthresh=15 queue-size=60
124 * linterm=30
125 * dropmech=drop-tail
126 * bytes=false (can't be handled by 32-bit fixed-point)
127 * doubleq=false dqthresh=false
128 * wait=true
129 */
130 /*
131 * alternative red parameters for a slow link.
132 *
133 * assume the queue length becomes from zero to L and keeps L, it takes
134 * N packets for q_avg to reach 63% of L.
135 * when q_weight is 0.002, N is about 500 packets.
136 * for a slow link like dial-up, 500 packets takes more than 1 minute!
137 * when q_weight is 0.008, N is about 127 packets.
138 * when q_weight is 0.016, N is about 63 packets.
139 * bursts of 50 packets are allowd for 0.002, bursts of 25 packets
140 * are allowed for 0.016.
141 * see Sally's paper for more details.
142 */
143 /* normal red parameters */
144 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
145 /* q_weight = 0.00195 */
146
147 /* red parameters for a slow link */
148 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
149 /* q_weight = 0.0078125 */
150
151 /* red parameters for a very slow link (e.g., dialup) */
152 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
153 /* q_weight = 0.015625 */
154
155 /* fixed-point uses 12-bit decimal places */
156 #define FP_SHIFT 12 /* fixed-point shift */
157
158 /* red parameters for drop probability */
159 #define INV_P_MAX 10 /* inverse of max drop probability */
160 #define TH_MIN 5 /* min threshold */
161 #define TH_MAX 15 /* max threshold */
162
163 #define RED_LIMIT 60 /* default max queue length */
164
165 /*
166 * our default policy for forced-drop is drop-tail.
167 * (in altq-1.1.2 or earlier, the default was random-drop.
168 * but it makes more sense to punish the cause of the surge.)
169 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
170 */
171
172 #ifdef ALTQ_FLOWVALVE
173 /*
174 * flow-valve is an extension to protect red from unresponsive flows
175 * and to promote end-to-end congestion control.
176 * flow-valve observes the average drop rates of the flows that have
177 * experienced packet drops in the recent past.
178 * when the average drop rate exceeds the threshold, the flow is
179 * blocked by the flow-valve. the trapped flow should back off
180 * exponentially to escape from the flow-valve.
181 */
182 #ifdef RED_RANDOM_DROP
183 #error "random-drop can't be used with flow-valve!"
184 #endif
185 #endif /* ALTQ_FLOWVALVE */
186
187 /* red_list keeps all red_queue_t's allocated. */
188 static red_queue_t *red_list = NULL;
189
190 /* default red parameter values */
191 static int default_th_min = TH_MIN;
192 static int default_th_max = TH_MAX;
193 static int default_inv_pmax = INV_P_MAX;
194
195 /* internal function prototypes */
196 static int red_enqueue __P((struct ifaltq *, struct mbuf *,
197 struct altq_pktattr *));
198 static struct mbuf *red_dequeue __P((struct ifaltq *, int));
199 static int red_request __P((struct ifaltq *, int, void *));
200 static void red_purgeq __P((red_queue_t *));
201 static int red_detach __P((red_queue_t *));
202 #ifdef ALTQ_FLOWVALVE
203 static __inline struct fve *flowlist_lookup __P((struct flowvalve *,
204 struct altq_pktattr *, struct timeval *));
205 static __inline struct fve *flowlist_reclaim __P((struct flowvalve *,
206 struct altq_pktattr *));
207 static __inline void flowlist_move_to_head __P((struct flowvalve *,
208 struct fve *));
209 static __inline int fv_p2f __P((struct flowvalve *, int));
210 static struct flowvalve *fv_alloc __P((struct red *));
211 static void fv_destroy __P((struct flowvalve *));
212 static int fv_checkflow __P((struct flowvalve *, struct altq_pktattr *,
213 struct fve **));
214 static void fv_dropbyred __P((struct flowvalve *fv, struct altq_pktattr *,
215 struct fve *));
216 #endif
217
218 /*
219 * red device interface
220 */
221 altqdev_decl(red);
222
223 int
224 redopen(dev, flag, fmt, p)
225 dev_t dev;
226 int flag, fmt;
227 struct proc *p;
228 {
229 /* everything will be done when the queueing scheme is attached. */
230 return 0;
231 }
232
233 int
234 redclose(dev, flag, fmt, p)
235 dev_t dev;
236 int flag, fmt;
237 struct proc *p;
238 {
239 red_queue_t *rqp;
240 int err, error = 0;
241
242 while ((rqp = red_list) != NULL) {
243 /* destroy all */
244 err = red_detach(rqp);
245 if (err != 0 && error == 0)
246 error = err;
247 }
248
249 return error;
250 }
251
252 int
253 redioctl(dev, cmd, addr, flag, p)
254 dev_t dev;
255 ioctlcmd_t cmd;
256 caddr_t addr;
257 int flag;
258 struct proc *p;
259 {
260 red_queue_t *rqp;
261 struct red_interface *ifacep;
262 struct ifnet *ifp;
263 int error = 0;
264
265 /* check super-user privilege */
266 switch (cmd) {
267 case RED_GETSTATS:
268 break;
269 default:
270 #if (__FreeBSD_version > 400000)
271 if ((error = suser(p)) != 0)
272 #else
273 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
274 #endif
275 return (error);
276 break;
277 }
278
279 switch (cmd) {
280
281 case RED_ENABLE:
282 ifacep = (struct red_interface *)addr;
283 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
284 error = EBADF;
285 break;
286 }
287 error = altq_enable(rqp->rq_ifq);
288 break;
289
290 case RED_DISABLE:
291 ifacep = (struct red_interface *)addr;
292 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
293 error = EBADF;
294 break;
295 }
296 error = altq_disable(rqp->rq_ifq);
297 break;
298
299 case RED_IF_ATTACH:
300 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
301 if (ifp == NULL) {
302 error = ENXIO;
303 break;
304 }
305
306 /* allocate and initialize red_queue_t */
307 MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
308 if (rqp == NULL) {
309 error = ENOMEM;
310 break;
311 }
312 (void)memset(rqp, 0, sizeof(red_queue_t));
313
314 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
315 M_DEVBUF, M_WAITOK);
316 if (rqp->rq_q == NULL) {
317 FREE(rqp, M_DEVBUF);
318 error = ENOMEM;
319 break;
320 }
321 (void)memset(rqp->rq_q, 0, sizeof(class_queue_t));
322
323 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
324 if (rqp->rq_red == NULL) {
325 FREE(rqp->rq_q, M_DEVBUF);
326 FREE(rqp, M_DEVBUF);
327 error = ENOMEM;
328 break;
329 }
330
331 rqp->rq_ifq = &ifp->if_snd;
332 qtail(rqp->rq_q) = NULL;
333 qlen(rqp->rq_q) = 0;
334 qlimit(rqp->rq_q) = RED_LIMIT;
335 qtype(rqp->rq_q) = Q_RED;
336
337 /*
338 * set RED to this ifnet structure.
339 */
340 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
341 red_enqueue, red_dequeue, red_request,
342 NULL, NULL);
343 if (error) {
344 red_destroy(rqp->rq_red);
345 FREE(rqp->rq_q, M_DEVBUF);
346 FREE(rqp, M_DEVBUF);
347 break;
348 }
349
350 /* add this state to the red list */
351 rqp->rq_next = red_list;
352 red_list = rqp;
353 break;
354
355 case RED_IF_DETACH:
356 ifacep = (struct red_interface *)addr;
357 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
358 error = EBADF;
359 break;
360 }
361 error = red_detach(rqp);
362 break;
363
364 case RED_GETSTATS:
365 do {
366 struct red_stats *q_stats;
367 red_t *rp;
368
369 q_stats = (struct red_stats *)addr;
370 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
371 ALTQT_RED)) == NULL) {
372 error = EBADF;
373 break;
374 }
375
376 q_stats->q_len = qlen(rqp->rq_q);
377 q_stats->q_limit = qlimit(rqp->rq_q);
378
379 rp = rqp->rq_red;
380 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
381 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
382 q_stats->drop_cnt = rp->red_stats.drop_cnt;
383 q_stats->drop_forced = rp->red_stats.drop_forced;
384 q_stats->drop_unforced = rp->red_stats.drop_unforced;
385 q_stats->marked_packets = rp->red_stats.marked_packets;
386
387 q_stats->weight = rp->red_weight;
388 q_stats->inv_pmax = rp->red_inv_pmax;
389 q_stats->th_min = rp->red_thmin;
390 q_stats->th_max = rp->red_thmax;
391
392 #ifdef ALTQ_FLOWVALVE
393 if (rp->red_flowvalve != NULL) {
394 struct flowvalve *fv = rp->red_flowvalve;
395 q_stats->fv_flows = fv->fv_flows;
396 q_stats->fv_pass = fv->fv_stats.pass;
397 q_stats->fv_predrop = fv->fv_stats.predrop;
398 q_stats->fv_alloc = fv->fv_stats.alloc;
399 q_stats->fv_escape = fv->fv_stats.escape;
400 } else {
401 #endif /* ALTQ_FLOWVALVE */
402 q_stats->fv_flows = 0;
403 q_stats->fv_pass = 0;
404 q_stats->fv_predrop = 0;
405 q_stats->fv_alloc = 0;
406 q_stats->fv_escape = 0;
407 #ifdef ALTQ_FLOWVALVE
408 }
409 #endif /* ALTQ_FLOWVALVE */
410 } while (0);
411 break;
412
413 case RED_CONFIG:
414 do {
415 struct red_conf *fc;
416 red_t *new;
417 int s, limit;
418
419 fc = (struct red_conf *)addr;
420 if ((rqp = altq_lookup(fc->iface.red_ifname,
421 ALTQT_RED)) == NULL) {
422 error = EBADF;
423 break;
424 }
425 new = red_alloc(fc->red_weight,
426 fc->red_inv_pmax,
427 fc->red_thmin,
428 fc->red_thmax,
429 fc->red_flags,
430 fc->red_pkttime);
431 if (new == NULL) {
432 error = ENOMEM;
433 break;
434 }
435
436 s = splnet();
437 red_purgeq(rqp);
438 limit = fc->red_limit;
439 if (limit < fc->red_thmax)
440 limit = fc->red_thmax;
441 qlimit(rqp->rq_q) = limit;
442 fc->red_limit = limit; /* write back the new value */
443
444 red_destroy(rqp->rq_red);
445 rqp->rq_red = new;
446
447 splx(s);
448
449 /* write back new values */
450 fc->red_limit = limit;
451 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
452 fc->red_thmin = rqp->rq_red->red_thmin;
453 fc->red_thmax = rqp->rq_red->red_thmax;
454
455 } while (0);
456 break;
457
458 case RED_SETDEFAULTS:
459 do {
460 struct redparams *rp;
461
462 rp = (struct redparams *)addr;
463
464 default_th_min = rp->th_min;
465 default_th_max = rp->th_max;
466 default_inv_pmax = rp->inv_pmax;
467 } while (0);
468 break;
469
470 default:
471 error = EINVAL;
472 break;
473 }
474 return error;
475 }
476
477 static int
478 red_detach(rqp)
479 red_queue_t *rqp;
480 {
481 red_queue_t *tmp;
482 int error = 0;
483
484 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
485 altq_disable(rqp->rq_ifq);
486
487 if ((error = altq_detach(rqp->rq_ifq)))
488 return (error);
489
490 if (red_list == rqp)
491 red_list = rqp->rq_next;
492 else {
493 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
494 if (tmp->rq_next == rqp) {
495 tmp->rq_next = rqp->rq_next;
496 break;
497 }
498 if (tmp == NULL)
499 printf("red_detach: no state found in red_list!\n");
500 }
501
502 red_destroy(rqp->rq_red);
503 FREE(rqp->rq_q, M_DEVBUF);
504 FREE(rqp, M_DEVBUF);
505 return (error);
506 }
507
508 /*
509 * red support routines
510 */
511
512 red_t *
513 red_alloc(weight, inv_pmax, th_min, th_max, flags, pkttime)
514 int weight, inv_pmax, th_min, th_max;
515 int flags, pkttime;
516 {
517 red_t *rp;
518 int w, i;
519 int npkts_per_sec;
520
521 MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
522 if (rp == NULL)
523 return (NULL);
524 (void)memset(rp, 0, sizeof(red_t));
525
526 rp->red_avg = 0;
527 rp->red_idle = 1;
528
529 if (weight == 0)
530 rp->red_weight = W_WEIGHT;
531 else
532 rp->red_weight = weight;
533 if (inv_pmax == 0)
534 rp->red_inv_pmax = default_inv_pmax;
535 else
536 rp->red_inv_pmax = inv_pmax;
537 if (th_min == 0)
538 rp->red_thmin = default_th_min;
539 else
540 rp->red_thmin = th_min;
541 if (th_max == 0)
542 rp->red_thmax = default_th_max;
543 else
544 rp->red_thmax = th_max;
545
546 rp->red_flags = flags;
547
548 if (pkttime == 0)
549 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
550 rp->red_pkttime = 800;
551 else
552 rp->red_pkttime = pkttime;
553
554 if (weight == 0) {
555 /* when the link is very slow, adjust red parameters */
556 npkts_per_sec = 1000000 / rp->red_pkttime;
557 if (npkts_per_sec < 50) {
558 /* up to about 400Kbps */
559 rp->red_weight = W_WEIGHT_2;
560 } else if (npkts_per_sec < 300) {
561 /* up to about 2.4Mbps */
562 rp->red_weight = W_WEIGHT_1;
563 }
564 }
565
566 /* calculate wshift. weight must be power of 2 */
567 w = rp->red_weight;
568 for (i = 0; w > 1; i++)
569 w = w >> 1;
570 rp->red_wshift = i;
571 w = 1 << rp->red_wshift;
572 if (w != rp->red_weight) {
573 printf("invalid weight value %d for red! use %d\n",
574 rp->red_weight, w);
575 rp->red_weight = w;
576 }
577
578 /*
579 * thmin_s and thmax_s are scaled versions of th_min and th_max
580 * to be compared with avg.
581 */
582 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
583 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
584
585 /*
586 * precompute probability denominator
587 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
588 */
589 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
590 * rp->red_inv_pmax) << FP_SHIFT;
591
592 /* allocate weight table */
593 rp->red_wtab = wtab_alloc(rp->red_weight);
594
595 microtime(&rp->red_last);
596 #ifdef ALTQ_FLOWVALVE
597 if (flags & REDF_FLOWVALVE)
598 rp->red_flowvalve = fv_alloc(rp);
599 /* if fv_alloc failes, flowvalve is just disabled */
600 #endif
601 return (rp);
602 }
603
604 void
605 red_destroy(rp)
606 red_t *rp;
607 {
608 #ifdef ALTQ_FLOWVALVE
609 if (rp->red_flowvalve != NULL)
610 fv_destroy(rp->red_flowvalve);
611 #endif
612 wtab_destroy(rp->red_wtab);
613 FREE(rp, M_DEVBUF);
614 }
615
616 void
617 red_getstats(rp, sp)
618 red_t *rp;
619 struct redstats *sp;
620 {
621 sp->q_avg = rp->red_avg >> rp->red_wshift;
622 sp->xmit_cnt = rp->red_stats.xmit_cnt;
623 sp->drop_cnt = rp->red_stats.drop_cnt;
624 sp->drop_forced = rp->red_stats.drop_forced;
625 sp->drop_unforced = rp->red_stats.drop_unforced;
626 sp->marked_packets = rp->red_stats.marked_packets;
627 }
628
629 /*
630 * enqueue routine:
631 *
632 * returns: 0 when successfully queued.
633 * ENOBUFS when drop occurs.
634 */
635 static int
636 red_enqueue(ifq, m, pktattr)
637 struct ifaltq *ifq;
638 struct mbuf *m;
639 struct altq_pktattr *pktattr;
640 {
641 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
642
643 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
644 return ENOBUFS;
645 ifq->ifq_len++;
646 return 0;
647 }
648
649 int
650 red_addq(rp, q, m, pktattr)
651 red_t *rp;
652 class_queue_t *q;
653 struct mbuf *m;
654 struct altq_pktattr *pktattr;
655 {
656 int avg, droptype;
657 int n;
658 #ifdef ALTQ_FLOWVALVE
659 struct fve *fve = NULL;
660
661 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
662 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
663 m_freem(m);
664 return (-1);
665 }
666 #endif
667
668 avg = rp->red_avg;
669
670 /*
671 * if we were idle, we pretend that n packets arrived during
672 * the idle period.
673 */
674 if (rp->red_idle) {
675 struct timeval now;
676 int t;
677
678 rp->red_idle = 0;
679 microtime(&now);
680 t = (now.tv_sec - rp->red_last.tv_sec);
681 if (t > 60) {
682 /*
683 * being idle for more than 1 minute, set avg to zero.
684 * this prevents t from overflow.
685 */
686 avg = 0;
687 } else {
688 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
689 n = t / rp->red_pkttime - 1;
690
691 /* the following line does (avg = (1 - Wq)^n * avg) */
692 if (n > 0)
693 avg = (avg >> FP_SHIFT) *
694 pow_w(rp->red_wtab, n);
695 }
696 }
697
698 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
699 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
700 rp->red_avg = avg; /* save the new value */
701
702 /*
703 * red_count keeps a tally of arriving traffic that has not
704 * been dropped.
705 */
706 rp->red_count++;
707
708 /* see if we drop early */
709 droptype = DTYPE_NODROP;
710 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
711 if (avg >= rp->red_thmax_s) {
712 /* avg >= th_max: forced drop */
713 droptype = DTYPE_FORCED;
714 } else if (rp->red_old == 0) {
715 /* first exceeds th_min */
716 rp->red_count = 1;
717 rp->red_old = 1;
718 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
719 rp->red_probd, rp->red_count)) {
720 /* mark or drop by red */
721 if ((rp->red_flags & REDF_ECN) &&
722 mark_ecn(m, pktattr, rp->red_flags)) {
723 /* successfully marked. do not drop. */
724 rp->red_count = 0;
725 #ifdef RED_STATS
726 rp->red_stats.marked_packets++;
727 #endif
728 } else {
729 /* unforced drop by red */
730 droptype = DTYPE_EARLY;
731 }
732 }
733 } else {
734 /* avg < th_min */
735 rp->red_old = 0;
736 }
737
738 /*
739 * if the queue length hits the hard limit, it's a forced drop.
740 */
741 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
742 droptype = DTYPE_FORCED;
743
744 #ifdef RED_RANDOM_DROP
745 /* if successful or forced drop, enqueue this packet. */
746 if (droptype != DTYPE_EARLY)
747 _addq(q, m);
748 #else
749 /* if successful, enqueue this packet. */
750 if (droptype == DTYPE_NODROP)
751 _addq(q, m);
752 #endif
753 if (droptype != DTYPE_NODROP) {
754 if (droptype == DTYPE_EARLY) {
755 /* drop the incoming packet */
756 #ifdef RED_STATS
757 rp->red_stats.drop_unforced++;
758 #endif
759 } else {
760 /* forced drop, select a victim packet in the queue. */
761 #ifdef RED_RANDOM_DROP
762 m = _getq_random(q);
763 #endif
764 #ifdef RED_STATS
765 rp->red_stats.drop_forced++;
766 #endif
767 }
768 #ifdef RED_STATS
769 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
770 #endif
771 rp->red_count = 0;
772 #ifdef ALTQ_FLOWVALVE
773 if (rp->red_flowvalve != NULL)
774 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
775 #endif
776 m_freem(m);
777 return (-1);
778 }
779 /* successfully queued */
780 #ifdef RED_STATS
781 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
782 #endif
783 return (0);
784 }
785
786 /*
787 * early-drop probability is calculated as follows:
788 * prob = p_max * (avg - th_min) / (th_max - th_min)
789 * prob_a = prob / (2 - count*prob)
790 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
791 * here prob_a increases as successive undrop count increases.
792 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
793 * becomes 1 when (count >= (2 / prob))).
794 */
795 int
796 drop_early(fp_len, fp_probd, count)
797 int fp_len; /* (avg - TH_MIN) in fixed-point */
798 int fp_probd; /* (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point */
799 int count; /* how many successive undropped packets */
800 {
801 int d; /* denominator of drop-probability */
802
803 d = fp_probd - count * fp_len;
804 if (d <= 0)
805 /* count exceeds the hard limit: drop or mark */
806 return (1);
807
808 /*
809 * now the range of d is [1..600] in fixed-point. (when
810 * th_max-th_min=10 and p_max=1/30)
811 * drop probability = (avg - TH_MIN) / d
812 */
813
814 if ((random() % d) < fp_len) {
815 /* drop or mark */
816 return (1);
817 }
818 /* no drop/mark */
819 return (0);
820 }
821
822 /*
823 * try to mark CE bit to the packet.
824 * returns 1 if successfully marked, 0 otherwise.
825 */
826 int
827 mark_ecn(m, pktattr, flags)
828 struct mbuf *m;
829 struct altq_pktattr *pktattr;
830 int flags;
831 {
832 struct mbuf *m0;
833
834 if (pktattr == NULL ||
835 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
836 return (0);
837
838 /* verify that pattr_hdr is within the mbuf data */
839 for (m0 = m; m0 != NULL; m0 = m0->m_next)
840 if ((pktattr->pattr_hdr >= m0->m_data) &&
841 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
842 break;
843 if (m0 == NULL) {
844 /* ick, pattr_hdr is stale */
845 pktattr->pattr_af = AF_UNSPEC;
846 return (0);
847 }
848
849 switch (pktattr->pattr_af) {
850 case AF_INET:
851 if (flags & REDF_ECN4) {
852 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
853 u_int8_t otos;
854 int sum;
855
856 if (ip->ip_v != 4)
857 return (0); /* version mismatch! */
858
859 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
860 return (0); /* not-ECT */
861 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
862 return (1); /* already marked */
863
864 /*
865 * ecn-capable but not marked,
866 * mark CE and update checksum
867 */
868 otos = ip->ip_tos;
869 ip->ip_tos |= IPTOS_ECN_CE;
870 /*
871 * update checksum (from RFC1624)
872 * HC' = ~(~HC + ~m + m')
873 */
874 sum = ~ntohs(ip->ip_sum) & 0xffff;
875 sum += (~otos & 0xffff) + ip->ip_tos;
876 sum = (sum >> 16) + (sum & 0xffff);
877 sum += (sum >> 16); /* add carry */
878 ip->ip_sum = htons(~sum & 0xffff);
879 return (1);
880 }
881 break;
882 #ifdef INET6
883 case AF_INET6:
884 if (flags & REDF_ECN6) {
885 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
886 u_int32_t flowlabel;
887
888 flowlabel = ntohl(ip6->ip6_flow);
889 if ((flowlabel >> 28) != 6)
890 return (0); /* version mismatch! */
891 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
892 (IPTOS_ECN_NOTECT << 20))
893 return (0); /* not-ECT */
894 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
895 (IPTOS_ECN_CE << 20))
896 return (1); /* already marked */
897 /*
898 * ecn-capable but not marked, mark CE
899 */
900 flowlabel |= (IPTOS_ECN_CE << 20);
901 ip6->ip6_flow = htonl(flowlabel);
902 return (1);
903 }
904 break;
905 #endif /* INET6 */
906 }
907
908 /* not marked */
909 return (0);
910 }
911
912 /*
913 * dequeue routine:
914 * must be called in splnet.
915 *
916 * returns: mbuf dequeued.
917 * NULL when no packet is available in the queue.
918 */
919
920 static struct mbuf *
921 red_dequeue(ifq, op)
922 struct ifaltq *ifq;
923 int op;
924 {
925 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
926 struct mbuf *m;
927
928 if (op == ALTDQ_POLL)
929 return qhead(rqp->rq_q);
930
931 /* op == ALTDQ_REMOVE */
932 m = red_getq(rqp->rq_red, rqp->rq_q);
933 if (m != NULL)
934 ifq->ifq_len--;
935 return (m);
936 }
937
938 struct mbuf *
939 red_getq(rp, q)
940 red_t *rp;
941 class_queue_t *q;
942 {
943 struct mbuf *m;
944
945 if ((m = _getq(q)) == NULL) {
946 if (rp->red_idle == 0) {
947 rp->red_idle = 1;
948 microtime(&rp->red_last);
949 }
950 return NULL;
951 }
952
953 rp->red_idle = 0;
954 return (m);
955 }
956
957 static int
958 red_request(ifq, req, arg)
959 struct ifaltq *ifq;
960 int req;
961 void *arg;
962 {
963 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
964
965 switch (req) {
966 case ALTRQ_PURGE:
967 red_purgeq(rqp);
968 break;
969 }
970 return (0);
971 }
972
973 static void
974 red_purgeq(rqp)
975 red_queue_t *rqp;
976 {
977 _flushq(rqp->rq_q);
978 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
979 rqp->rq_ifq->ifq_len = 0;
980 }
981
982
983 /*
984 * helper routine to calibrate avg during idle.
985 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
986 * here Wq = 1/weight and the code assumes Wq is close to zero.
987 *
988 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
989 */
990 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
991
992 struct wtab *
993 wtab_alloc(weight)
994 int weight;
995 {
996 struct wtab *w;
997 int i;
998
999 for (w = wtab_list; w != NULL; w = w->w_next)
1000 if (w->w_weight == weight) {
1001 w->w_refcount++;
1002 return (w);
1003 }
1004
1005 MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
1006 if (w == NULL)
1007 panic("wtab_alloc: malloc failed!");
1008 (void)memset(w, 0, sizeof(struct wtab));
1009 w->w_weight = weight;
1010 w->w_refcount = 1;
1011 w->w_next = wtab_list;
1012 wtab_list = w;
1013
1014 /* initialize the weight table */
1015 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
1016 for (i = 1; i < 32; i++) {
1017 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
1018 if (w->w_tab[i] == 0 && w->w_param_max == 0)
1019 w->w_param_max = 1 << i;
1020 }
1021
1022 return (w);
1023 }
1024
1025 int
1026 wtab_destroy(w)
1027 struct wtab *w;
1028 {
1029 struct wtab *prev;
1030
1031 if (--w->w_refcount > 0)
1032 return (0);
1033
1034 if (wtab_list == w)
1035 wtab_list = w->w_next;
1036 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
1037 if (prev->w_next == w) {
1038 prev->w_next = w->w_next;
1039 break;
1040 }
1041
1042 FREE(w, M_DEVBUF);
1043 return (0);
1044 }
1045
1046 int32_t
1047 pow_w(w, n)
1048 struct wtab *w;
1049 int n;
1050 {
1051 int i, bit;
1052 int32_t val;
1053
1054 if (n >= w->w_param_max)
1055 return (0);
1056
1057 val = 1 << FP_SHIFT;
1058 if (n <= 0)
1059 return (val);
1060
1061 bit = 1;
1062 i = 0;
1063 while (n) {
1064 if (n & bit) {
1065 val = (val * w->w_tab[i]) >> FP_SHIFT;
1066 n &= ~bit;
1067 }
1068 i++;
1069 bit <<= 1;
1070 }
1071 return (val);
1072 }
1073
1074 #ifdef ALTQ_FLOWVALVE
1075
1076 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1077 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1078 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1079 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1080 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1081 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1082
1083 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1084 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1085
1086 #define FV_N 10 /* update fve_f every FV_N packets */
1087
1088 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1089 #define FV_TTHRESH 3 /* time threshold to delete fve */
1090 #define FV_ALPHA 5 /* extra packet count */
1091
1092 #if (__FreeBSD_version > 300000)
1093 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1094 #else
1095 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1096 #endif
1097
1098 /*
1099 * Brtt table: 127 entry table to convert drop rate (p) to
1100 * the corresponding bandwidth fraction (f)
1101 * the following equation is implemented to use scaled values,
1102 * fve_p and fve_f, in the fixed point format.
1103 *
1104 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1105 * f = Brtt(p) / (max_th + alpha)
1106 */
1107 #define BRTT_SIZE 128
1108 #define BRTT_SHIFT 12
1109 #define BRTT_MASK 0x0007f000
1110 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1111
1112 const int brtt_tab[BRTT_SIZE] = {
1113 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1114 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1115 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1116 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1117 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1118 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1119 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1120 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1121 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1122 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1123 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1124 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1125 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1126 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1127 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1128 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1129 };
1130
1131 static __inline struct fve *
1132 flowlist_lookup(fv, pktattr, now)
1133 struct flowvalve *fv;
1134 struct altq_pktattr *pktattr;
1135 struct timeval *now;
1136 {
1137 struct fve *fve;
1138 int flows;
1139 struct ip *ip;
1140 #ifdef INET6
1141 struct ip6_hdr *ip6;
1142 #endif
1143 struct timeval tthresh;
1144
1145 if (pktattr == NULL)
1146 return (NULL);
1147
1148 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1149 flows = 0;
1150 /*
1151 * search the flow list
1152 */
1153 switch (pktattr->pattr_af) {
1154 case AF_INET:
1155 ip = (struct ip *)pktattr->pattr_hdr;
1156 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1157 if (fve->fve_lastdrop.tv_sec == 0)
1158 break;
1159 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1160 fve->fve_lastdrop.tv_sec = 0;
1161 break;
1162 }
1163 if (fve->fve_flow.flow_af == AF_INET &&
1164 fve->fve_flow.flow_ip.ip_src.s_addr ==
1165 ip->ip_src.s_addr &&
1166 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1167 ip->ip_dst.s_addr)
1168 return (fve);
1169 flows++;
1170 }
1171 break;
1172 #ifdef INET6
1173 case AF_INET6:
1174 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1175 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1176 if (fve->fve_lastdrop.tv_sec == 0)
1177 break;
1178 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1179 fve->fve_lastdrop.tv_sec = 0;
1180 break;
1181 }
1182 if (fve->fve_flow.flow_af == AF_INET6 &&
1183 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1184 &ip6->ip6_src) &&
1185 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1186 &ip6->ip6_dst))
1187 return (fve);
1188 flows++;
1189 }
1190 break;
1191 #endif /* INET6 */
1192
1193 default:
1194 /* unknown protocol. no drop. */
1195 return (NULL);
1196 }
1197 fv->fv_flows = flows; /* save the number of active fve's */
1198 return (NULL);
1199 }
1200
1201 static __inline struct fve *
1202 flowlist_reclaim(fv, pktattr)
1203 struct flowvalve *fv;
1204 struct altq_pktattr *pktattr;
1205 {
1206 struct fve *fve;
1207 struct ip *ip;
1208 #ifdef INET6
1209 struct ip6_hdr *ip6;
1210 #endif
1211
1212 /*
1213 * get an entry from the tail of the LRU list.
1214 */
1215 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1216
1217 switch (pktattr->pattr_af) {
1218 case AF_INET:
1219 ip = (struct ip *)pktattr->pattr_hdr;
1220 fve->fve_flow.flow_af = AF_INET;
1221 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1222 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1223 break;
1224 #ifdef INET6
1225 case AF_INET6:
1226 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1227 fve->fve_flow.flow_af = AF_INET6;
1228 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1229 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1230 break;
1231 #endif
1232 }
1233
1234 fve->fve_state = Green;
1235 fve->fve_p = 0.0;
1236 fve->fve_f = 0.0;
1237 fve->fve_ifseq = fv->fv_ifseq - 1;
1238 fve->fve_count = 0;
1239
1240 fv->fv_flows++;
1241 #ifdef FV_STATS
1242 fv->fv_stats.alloc++;
1243 #endif
1244 return (fve);
1245 }
1246
1247 static __inline void
1248 flowlist_move_to_head(fv, fve)
1249 struct flowvalve *fv;
1250 struct fve *fve;
1251 {
1252 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1253 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1254 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1255 }
1256 }
1257
1258 /*
1259 * allocate flowvalve structure
1260 */
1261 static struct flowvalve *
1262 fv_alloc(rp)
1263 struct red *rp;
1264 {
1265 struct flowvalve *fv;
1266 struct fve *fve;
1267 int i, num;
1268
1269 num = FV_FLOWLISTSIZE;
1270 MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
1271 M_DEVBUF, M_WAITOK);
1272 if (fv == NULL)
1273 return (NULL);
1274 (void)memset(fv, 0, sizeof(struct flowvalve));
1275
1276 MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
1277 M_DEVBUF, M_WAITOK);
1278 if (fv->fv_fves == NULL) {
1279 FREE(fv, M_DEVBUF);
1280 return (NULL);
1281 }
1282 (void)memset(fv->fv_fves, 0, sizeof(struct fve) * num);
1283
1284 fv->fv_flows = 0;
1285 TAILQ_INIT(&fv->fv_flowlist);
1286 for (i = 0; i < num; i++) {
1287 fve = &fv->fv_fves[i];
1288 fve->fve_lastdrop.tv_sec = 0;
1289 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1290 }
1291
1292 /* initialize drop rate threshold in scaled fixed-point */
1293 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1294
1295 /* initialize drop rate to fraction table */
1296 MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
1297 M_DEVBUF, M_WAITOK);
1298 if (fv->fv_p2ftab == NULL) {
1299 FREE(fv->fv_fves, M_DEVBUF);
1300 FREE(fv, M_DEVBUF);
1301 return (NULL);
1302 }
1303 /*
1304 * create the p2f table.
1305 * (shift is used to keep the precision)
1306 */
1307 for (i = 1; i < BRTT_SIZE; i++) {
1308 int f;
1309
1310 f = brtt_tab[i] << 8;
1311 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1312 }
1313
1314 return (fv);
1315 }
1316
1317 static void fv_destroy(fv)
1318 struct flowvalve *fv;
1319 {
1320 FREE(fv->fv_p2ftab, M_DEVBUF);
1321 FREE(fv->fv_fves, M_DEVBUF);
1322 FREE(fv, M_DEVBUF);
1323 }
1324
1325 static __inline int
1326 fv_p2f(fv, p)
1327 struct flowvalve *fv;
1328 int p;
1329 {
1330 int val, f;
1331
1332 if (p >= BRTT_PMAX)
1333 f = fv->fv_p2ftab[BRTT_SIZE-1];
1334 else if ((val = (p & BRTT_MASK)))
1335 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1336 else
1337 f = fv->fv_p2ftab[1];
1338 return (f);
1339 }
1340
1341 /*
1342 * check if an arriving packet should be pre-dropped.
1343 * called from red_addq() when a packet arrives.
1344 * returns 1 when the packet should be pre-dropped.
1345 * should be called in splnet.
1346 */
1347 static int
1348 fv_checkflow(fv, pktattr, fcache)
1349 struct flowvalve *fv;
1350 struct altq_pktattr *pktattr;
1351 struct fve **fcache;
1352 {
1353 struct fve *fve;
1354 struct timeval now;
1355
1356 fv->fv_ifseq++;
1357 FV_TIMESTAMP(&now);
1358
1359 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1360 /* no matching entry in the flowlist */
1361 return (0);
1362
1363 *fcache = fve;
1364
1365 /* update fraction f for every FV_N packets */
1366 if (++fve->fve_count == FV_N) {
1367 /*
1368 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1369 */
1370 fve->fve_f =
1371 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1372 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1373 fve->fve_ifseq = fv->fv_ifseq;
1374 fve->fve_count = 0;
1375 }
1376
1377 /*
1378 * overpumping test
1379 */
1380 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1381 int fthresh;
1382
1383 /* calculate a threshold */
1384 fthresh = fv_p2f(fv, fve->fve_p);
1385 if (fve->fve_f > fthresh)
1386 fve->fve_state = Red;
1387 }
1388
1389 if (fve->fve_state == Red) {
1390 /*
1391 * backoff test
1392 */
1393 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1394 /* no drop for at least FV_BACKOFFTHRESH sec */
1395 fve->fve_p = 0;
1396 fve->fve_state = Green;
1397 #ifdef FV_STATS
1398 fv->fv_stats.escape++;
1399 #endif
1400 } else {
1401 /* block this flow */
1402 flowlist_move_to_head(fv, fve);
1403 fve->fve_lastdrop = now;
1404 #ifdef FV_STATS
1405 fv->fv_stats.predrop++;
1406 #endif
1407 return (1);
1408 }
1409 }
1410
1411 /*
1412 * p = (1 - Wp) * p
1413 */
1414 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1415 if (fve->fve_p < 0)
1416 fve->fve_p = 0;
1417 #ifdef FV_STATS
1418 fv->fv_stats.pass++;
1419 #endif
1420 return (0);
1421 }
1422
1423 /*
1424 * called from red_addq when a packet is dropped by red.
1425 * should be called in splnet.
1426 */
1427 static void fv_dropbyred(fv, pktattr, fcache)
1428 struct flowvalve *fv;
1429 struct altq_pktattr *pktattr;
1430 struct fve *fcache;
1431 {
1432 struct fve *fve;
1433 struct timeval now;
1434
1435 if (pktattr == NULL)
1436 return;
1437 FV_TIMESTAMP(&now);
1438
1439 if (fcache != NULL)
1440 /* the fve of this packet is already cached */
1441 fve = fcache;
1442 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1443 fve = flowlist_reclaim(fv, pktattr);
1444
1445 flowlist_move_to_head(fv, fve);
1446
1447 /*
1448 * update p: the following line cancels the update
1449 * in fv_checkflow() and calculate
1450 * p = Wp + (1 - Wp) * p
1451 */
1452 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1453
1454 fve->fve_lastdrop = now;
1455 }
1456
1457 #endif /* ALTQ_FLOWVALVE */
1458
1459 #ifdef KLD_MODULE
1460
1461 static struct altqsw red_sw =
1462 {"red", redopen, redclose, redioctl};
1463
1464 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1465
1466 #endif /* KLD_MODULE */
1467
1468 #endif /* ALTQ_RED */
Cache object: ba043d2a6358aec1c839d69116277dcd
|