FreeBSD/Linux Kernel Cross Reference
sys/altq/altq_red.c
1 /* $NetBSD: altq_red.c,v 1.24 2006/11/16 01:32:37 christos Exp $ */
2 /* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30 /*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.24 2006/11/16 01:32:37 christos Exp $");
65
66 #ifdef _KERNEL_OPT
67 #include "opt_altq.h"
68 #include "opt_inet.h"
69 #include "pf.h"
70 #endif
71
72 #ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
73
74 #include <sys/param.h>
75 #include <sys/malloc.h>
76 #include <sys/mbuf.h>
77 #include <sys/socket.h>
78 #include <sys/systm.h>
79 #include <sys/errno.h>
80 #include <sys/kauth.h>
81 #if 1 /* ALTQ3_COMPAT */
82 #include <sys/sockio.h>
83 #include <sys/proc.h>
84 #include <sys/kernel.h>
85 #include <sys/kauth.h>
86 #ifdef ALTQ_FLOWVALVE
87 #include <sys/queue.h>
88 #include <sys/time.h>
89 #endif
90 #endif /* ALTQ3_COMPAT */
91
92 #include <net/if.h>
93
94 #include <netinet/in.h>
95 #include <netinet/in_systm.h>
96 #include <netinet/ip.h>
97 #ifdef INET6
98 #include <netinet/ip6.h>
99 #endif
100
101 #if NPF > 0
102 #include <net/pfvar.h>
103 #endif
104 #include <altq/altq.h>
105 #include <altq/altq_red.h>
106 #ifdef ALTQ3_COMPAT
107 #include <altq/altq_conf.h>
108 #ifdef ALTQ_FLOWVALVE
109 #include <altq/altq_flowvalve.h>
110 #endif
111 #endif
112
113 /*
114 * ALTQ/RED (Random Early Detection) implementation using 32-bit
115 * fixed-point calculation.
116 *
117 * written by kjc using the ns code as a reference.
118 * you can learn more about red and ns from Sally's home page at
119 * http://www-nrg.ee.lbl.gov/floyd/
120 *
121 * most of the red parameter values are fixed in this implementation
122 * to prevent fixed-point overflow/underflow.
123 * if you change the parameters, watch out for overflow/underflow!
124 *
125 * the parameters used are recommended values by Sally.
126 * the corresponding ns config looks:
127 * q_weight=0.00195
128 * minthresh=5 maxthresh=15 queue-size=60
129 * linterm=30
130 * dropmech=drop-tail
131 * bytes=false (can't be handled by 32-bit fixed-point)
132 * doubleq=false dqthresh=false
133 * wait=true
134 */
135 /*
136 * alternative red parameters for a slow link.
137 *
138 * assume the queue length becomes from zero to L and keeps L, it takes
139 * N packets for q_avg to reach 63% of L.
140 * when q_weight is 0.002, N is about 500 packets.
141 * for a slow link like dial-up, 500 packets takes more than 1 minute!
142 * when q_weight is 0.008, N is about 127 packets.
143 * when q_weight is 0.016, N is about 63 packets.
144 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
145 * are allowed for 0.016.
146 * see Sally's paper for more details.
147 */
148 /* normal red parameters */
149 #define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
150 /* q_weight = 0.00195 */
151
152 /* red parameters for a slow link */
153 #define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
154 /* q_weight = 0.0078125 */
155
156 /* red parameters for a very slow link (e.g., dialup) */
157 #define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
158 /* q_weight = 0.015625 */
159
160 /* fixed-point uses 12-bit decimal places */
161 #define FP_SHIFT 12 /* fixed-point shift */
162
163 /* red parameters for drop probability */
164 #define INV_P_MAX 10 /* inverse of max drop probability */
165 #define TH_MIN 5 /* min threshold */
166 #define TH_MAX 15 /* max threshold */
167
168 #define RED_LIMIT 60 /* default max queue lenght */
169 #define RED_STATS /* collect statistics */
170
171 /*
172 * our default policy for forced-drop is drop-tail.
173 * (in altq-1.1.2 or earlier, the default was random-drop.
174 * but it makes more sense to punish the cause of the surge.)
175 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
176 */
177
178 #ifdef ALTQ3_COMPAT
179 #ifdef ALTQ_FLOWVALVE
180 /*
181 * flow-valve is an extention to protect red from unresponsive flows
182 * and to promote end-to-end congestion control.
183 * flow-valve observes the average drop rates of the flows that have
184 * experienced packet drops in the recent past.
185 * when the average drop rate exceeds the threshold, the flow is
186 * blocked by the flow-valve. the trapped flow should back off
187 * exponentially to escape from the flow-valve.
188 */
189 #ifdef RED_RANDOM_DROP
190 #error "random-drop can't be used with flow-valve!"
191 #endif
192 #endif /* ALTQ_FLOWVALVE */
193
194 /* red_list keeps all red_queue_t's allocated. */
195 static red_queue_t *red_list = NULL;
196
197 #endif /* ALTQ3_COMPAT */
198
199 /* default red parameter values */
200 static int default_th_min = TH_MIN;
201 static int default_th_max = TH_MAX;
202 static int default_inv_pmax = INV_P_MAX;
203
204 #ifdef ALTQ3_COMPAT
205 /* internal function prototypes */
206 static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
207 static struct mbuf *red_dequeue(struct ifaltq *, int);
208 static int red_request(struct ifaltq *, int, void *);
209 static void red_purgeq(red_queue_t *);
210 static int red_detach(red_queue_t *);
211 #ifdef ALTQ_FLOWVALVE
212 static inline struct fve *flowlist_lookup(struct flowvalve *,
213 struct altq_pktattr *, struct timeval *);
214 static inline struct fve *flowlist_reclaim(struct flowvalve *,
215 struct altq_pktattr *);
216 static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
217 static inline int fv_p2f(struct flowvalve *, int);
218 static struct flowvalve *fv_alloc(struct red *);
219 static void fv_destroy(struct flowvalve *);
220 static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
221 struct fve **);
222 static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
223 struct fve *);
224 #endif
225 #endif /* ALTQ3_COMPAT */
226
227 /*
228 * red support routines
229 */
230 red_t *
231 red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
232 int pkttime)
233 {
234 red_t *rp;
235 int w, i;
236 int npkts_per_sec;
237
238 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
239 if (rp == NULL)
240 return (NULL);
241
242 rp->red_avg = 0;
243 rp->red_idle = 1;
244
245 if (weight == 0)
246 rp->red_weight = W_WEIGHT;
247 else
248 rp->red_weight = weight;
249 if (inv_pmax == 0)
250 rp->red_inv_pmax = default_inv_pmax;
251 else
252 rp->red_inv_pmax = inv_pmax;
253 if (th_min == 0)
254 rp->red_thmin = default_th_min;
255 else
256 rp->red_thmin = th_min;
257 if (th_max == 0)
258 rp->red_thmax = default_th_max;
259 else
260 rp->red_thmax = th_max;
261
262 rp->red_flags = flags;
263
264 if (pkttime == 0)
265 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
266 rp->red_pkttime = 800;
267 else
268 rp->red_pkttime = pkttime;
269
270 if (weight == 0) {
271 /* when the link is very slow, adjust red parameters */
272 npkts_per_sec = 1000000 / rp->red_pkttime;
273 if (npkts_per_sec < 50) {
274 /* up to about 400Kbps */
275 rp->red_weight = W_WEIGHT_2;
276 } else if (npkts_per_sec < 300) {
277 /* up to about 2.4Mbps */
278 rp->red_weight = W_WEIGHT_1;
279 }
280 }
281
282 /* calculate wshift. weight must be power of 2 */
283 w = rp->red_weight;
284 for (i = 0; w > 1; i++)
285 w = w >> 1;
286 rp->red_wshift = i;
287 w = 1 << rp->red_wshift;
288 if (w != rp->red_weight) {
289 printf("invalid weight value %d for red! use %d\n",
290 rp->red_weight, w);
291 rp->red_weight = w;
292 }
293
294 /*
295 * thmin_s and thmax_s are scaled versions of th_min and th_max
296 * to be compared with avg.
297 */
298 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
299 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
300
301 /*
302 * precompute probability denominator
303 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
304 */
305 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
306 * rp->red_inv_pmax) << FP_SHIFT;
307
308 /* allocate weight table */
309 rp->red_wtab = wtab_alloc(rp->red_weight);
310
311 microtime(&rp->red_last);
312 #ifdef ALTQ3_COMPAT
313 #ifdef ALTQ_FLOWVALVE
314 if (flags & REDF_FLOWVALVE)
315 rp->red_flowvalve = fv_alloc(rp);
316 /* if fv_alloc failes, flowvalve is just disabled */
317 #endif
318 #endif /* ALTQ3_COMPAT */
319 return (rp);
320 }
321
322 void
323 red_destroy(red_t *rp)
324 {
325 #ifdef ALTQ3_COMPAT
326 #ifdef ALTQ_FLOWVALVE
327 if (rp->red_flowvalve != NULL)
328 fv_destroy(rp->red_flowvalve);
329 #endif
330 #endif /* ALTQ3_COMPAT */
331 wtab_destroy(rp->red_wtab);
332 free(rp, M_DEVBUF);
333 }
334
335 void
336 red_getstats(red_t *rp, struct redstats *sp)
337 {
338 sp->q_avg = rp->red_avg >> rp->red_wshift;
339 sp->xmit_cnt = rp->red_stats.xmit_cnt;
340 sp->drop_cnt = rp->red_stats.drop_cnt;
341 sp->drop_forced = rp->red_stats.drop_forced;
342 sp->drop_unforced = rp->red_stats.drop_unforced;
343 sp->marked_packets = rp->red_stats.marked_packets;
344 }
345
346 int
347 red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
348 struct altq_pktattr *pktattr)
349 {
350 int avg, droptype;
351 int n;
352 #ifdef ALTQ3_COMPAT
353 #ifdef ALTQ_FLOWVALVE
354 struct fve *fve = NULL;
355
356 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
357 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
358 m_freem(m);
359 return (-1);
360 }
361 #endif
362 #endif /* ALTQ3_COMPAT */
363
364 avg = rp->red_avg;
365
366 /*
367 * if we were idle, we pretend that n packets arrived during
368 * the idle period.
369 */
370 if (rp->red_idle) {
371 struct timeval now;
372 int t;
373
374 rp->red_idle = 0;
375 microtime(&now);
376 t = (now.tv_sec - rp->red_last.tv_sec);
377 if (t > 60) {
378 /*
379 * being idle for more than 1 minute, set avg to zero.
380 * this prevents t from overflow.
381 */
382 avg = 0;
383 } else {
384 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
385 n = t / rp->red_pkttime - 1;
386
387 /* the following line does (avg = (1 - Wq)^n * avg) */
388 if (n > 0)
389 avg = (avg >> FP_SHIFT) *
390 pow_w(rp->red_wtab, n);
391 }
392 }
393
394 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
395 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
396 rp->red_avg = avg; /* save the new value */
397
398 /*
399 * red_count keeps a tally of arriving traffic that has not
400 * been dropped.
401 */
402 rp->red_count++;
403
404 /* see if we drop early */
405 droptype = DTYPE_NODROP;
406 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
407 if (avg >= rp->red_thmax_s) {
408 /* avg >= th_max: forced drop */
409 droptype = DTYPE_FORCED;
410 } else if (rp->red_old == 0) {
411 /* first exceeds th_min */
412 rp->red_count = 1;
413 rp->red_old = 1;
414 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
415 rp->red_probd, rp->red_count)) {
416 /* mark or drop by red */
417 if ((rp->red_flags & REDF_ECN) &&
418 mark_ecn(m, pktattr, rp->red_flags)) {
419 /* successfully marked. do not drop. */
420 rp->red_count = 0;
421 #ifdef RED_STATS
422 rp->red_stats.marked_packets++;
423 #endif
424 } else {
425 /* unforced drop by red */
426 droptype = DTYPE_EARLY;
427 }
428 }
429 } else {
430 /* avg < th_min */
431 rp->red_old = 0;
432 }
433
434 /*
435 * if the queue length hits the hard limit, it's a forced drop.
436 */
437 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
438 droptype = DTYPE_FORCED;
439
440 #ifdef RED_RANDOM_DROP
441 /* if successful or forced drop, enqueue this packet. */
442 if (droptype != DTYPE_EARLY)
443 _addq(q, m);
444 #else
445 /* if successful, enqueue this packet. */
446 if (droptype == DTYPE_NODROP)
447 _addq(q, m);
448 #endif
449 if (droptype != DTYPE_NODROP) {
450 if (droptype == DTYPE_EARLY) {
451 /* drop the incoming packet */
452 #ifdef RED_STATS
453 rp->red_stats.drop_unforced++;
454 #endif
455 } else {
456 /* forced drop, select a victim packet in the queue. */
457 #ifdef RED_RANDOM_DROP
458 m = _getq_random(q);
459 #endif
460 #ifdef RED_STATS
461 rp->red_stats.drop_forced++;
462 #endif
463 }
464 #ifdef RED_STATS
465 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
466 #endif
467 rp->red_count = 0;
468 #ifdef ALTQ3_COMPAT
469 #ifdef ALTQ_FLOWVALVE
470 if (rp->red_flowvalve != NULL)
471 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
472 #endif
473 #endif /* ALTQ3_COMPAT */
474 m_freem(m);
475 return (-1);
476 }
477 /* successfully queued */
478 #ifdef RED_STATS
479 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
480 #endif
481 return (0);
482 }
483
484 /*
485 * early-drop probability is calculated as follows:
486 * prob = p_max * (avg - th_min) / (th_max - th_min)
487 * prob_a = prob / (2 - count*prob)
488 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
489 * here prob_a increases as successive undrop count increases.
490 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
491 * becomes 1 when (count >= (2 / prob))).
492 */
493 int
494 drop_early(int fp_len, int fp_probd, int count)
495 {
496 int d; /* denominator of drop-probability */
497
498 d = fp_probd - count * fp_len;
499 if (d <= 0)
500 /* count exceeds the hard limit: drop or mark */
501 return (1);
502
503 /*
504 * now the range of d is [1..600] in fixed-point. (when
505 * th_max-th_min=10 and p_max=1/30)
506 * drop probability = (avg - TH_MIN) / d
507 */
508
509 if ((arc4random() % d) < fp_len) {
510 /* drop or mark */
511 return (1);
512 }
513 /* no drop/mark */
514 return (0);
515 }
516
517 /*
518 * try to mark CE bit to the packet.
519 * returns 1 if successfully marked, 0 otherwise.
520 */
521 int
522 mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
523 {
524 struct mbuf *m0;
525 struct m_tag *t;
526 struct altq_tag *at;
527 void *hdr;
528 int af;
529
530 t = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
531 if (t != NULL) {
532 at = (struct altq_tag *)(t + 1);
533 if (at == NULL)
534 return (0);
535 af = at->af;
536 hdr = at->hdr;
537 #ifdef ALTQ3_COMPAT
538 } else if (pktattr != NULL) {
539 af = pktattr->pattr_af;
540 hdr = pktattr->pattr_hdr;
541 #endif /* ALTQ3_COMPAT */
542 } else
543 return (0);
544
545 if (af != AF_INET && af != AF_INET6)
546 return (0);
547
548 /* verify that pattr_hdr is within the mbuf data */
549 for (m0 = m; m0 != NULL; m0 = m0->m_next)
550 if (((caddr_t)hdr >= m0->m_data) &&
551 ((caddr_t)hdr < m0->m_data + m0->m_len))
552 break;
553 if (m0 == NULL) {
554 /* ick, tag info is stale */
555 return (0);
556 }
557
558 switch (af) {
559 case AF_INET:
560 if (flags & REDF_ECN4) {
561 struct ip *ip = hdr;
562 u_int8_t otos;
563 int sum;
564
565 if (ip->ip_v != 4)
566 return (0); /* version mismatch! */
567
568 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
569 return (0); /* not-ECT */
570 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
571 return (1); /* already marked */
572
573 /*
574 * ecn-capable but not marked,
575 * mark CE and update checksum
576 */
577 otos = ip->ip_tos;
578 ip->ip_tos |= IPTOS_ECN_CE;
579 /*
580 * update checksum (from RFC1624)
581 * HC' = ~(~HC + ~m + m')
582 */
583 sum = ~ntohs(ip->ip_sum) & 0xffff;
584 sum += (~otos & 0xffff) + ip->ip_tos;
585 sum = (sum >> 16) + (sum & 0xffff);
586 sum += (sum >> 16); /* add carry */
587 ip->ip_sum = htons(~sum & 0xffff);
588 return (1);
589 }
590 break;
591 #ifdef INET6
592 case AF_INET6:
593 if (flags & REDF_ECN6) {
594 struct ip6_hdr *ip6 = hdr;
595 u_int32_t flowlabel;
596
597 flowlabel = ntohl(ip6->ip6_flow);
598 if ((flowlabel >> 28) != 6)
599 return (0); /* version mismatch! */
600 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
601 (IPTOS_ECN_NOTECT << 20))
602 return (0); /* not-ECT */
603 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
604 (IPTOS_ECN_CE << 20))
605 return (1); /* already marked */
606 /*
607 * ecn-capable but not marked, mark CE
608 */
609 flowlabel |= (IPTOS_ECN_CE << 20);
610 ip6->ip6_flow = htonl(flowlabel);
611 return (1);
612 }
613 break;
614 #endif /* INET6 */
615 }
616
617 /* not marked */
618 return (0);
619 }
620
621 struct mbuf *
622 red_getq(red_t *rp, class_queue_t *q)
623 {
624 struct mbuf *m;
625
626 if ((m = _getq(q)) == NULL) {
627 if (rp->red_idle == 0) {
628 rp->red_idle = 1;
629 microtime(&rp->red_last);
630 }
631 return NULL;
632 }
633
634 rp->red_idle = 0;
635 return (m);
636 }
637
638 /*
639 * helper routine to calibrate avg during idle.
640 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
641 * here Wq = 1/weight and the code assumes Wq is close to zero.
642 *
643 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
644 */
645 static struct wtab *wtab_list = NULL; /* pointer to wtab list */
646
647 struct wtab *
648 wtab_alloc(int weight)
649 {
650 struct wtab *w;
651 int i;
652
653 for (w = wtab_list; w != NULL; w = w->w_next)
654 if (w->w_weight == weight) {
655 w->w_refcount++;
656 return (w);
657 }
658
659 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
660 if (w == NULL)
661 panic("wtab_alloc: malloc failed!");
662 w->w_weight = weight;
663 w->w_refcount = 1;
664 w->w_next = wtab_list;
665 wtab_list = w;
666
667 /* initialize the weight table */
668 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
669 for (i = 1; i < 32; i++) {
670 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
671 if (w->w_tab[i] == 0 && w->w_param_max == 0)
672 w->w_param_max = 1 << i;
673 }
674
675 return (w);
676 }
677
678 int
679 wtab_destroy(struct wtab *w)
680 {
681 struct wtab *prev;
682
683 if (--w->w_refcount > 0)
684 return (0);
685
686 if (wtab_list == w)
687 wtab_list = w->w_next;
688 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
689 if (prev->w_next == w) {
690 prev->w_next = w->w_next;
691 break;
692 }
693
694 free(w, M_DEVBUF);
695 return (0);
696 }
697
698 int32_t
699 pow_w(struct wtab *w, int n)
700 {
701 int i, bit;
702 int32_t val;
703
704 if (n >= w->w_param_max)
705 return (0);
706
707 val = 1 << FP_SHIFT;
708 if (n <= 0)
709 return (val);
710
711 bit = 1;
712 i = 0;
713 while (n) {
714 if (n & bit) {
715 val = (val * w->w_tab[i]) >> FP_SHIFT;
716 n &= ~bit;
717 }
718 i++;
719 bit <<= 1;
720 }
721 return (val);
722 }
723
724 #ifdef ALTQ3_COMPAT
725 /*
726 * red device interface
727 */
728 altqdev_decl(red);
729
730 int
731 redopen(dev_t dev, int flag, int fmt,
732 struct lwp *l)
733 {
734 /* everything will be done when the queueing scheme is attached. */
735 return 0;
736 }
737
738 int
739 redclose(dev_t dev, int flag, int fmt,
740 struct lwp *l)
741 {
742 red_queue_t *rqp;
743 int err, error = 0;
744
745 while ((rqp = red_list) != NULL) {
746 /* destroy all */
747 err = red_detach(rqp);
748 if (err != 0 && error == 0)
749 error = err;
750 }
751
752 return error;
753 }
754
755 int
756 redioctl(dev_t dev, ioctlcmd_t cmd, caddr_t addr, int flag,
757 struct lwp *l)
758 {
759 red_queue_t *rqp;
760 struct red_interface *ifacep;
761 struct ifnet *ifp;
762 struct proc *p = l->l_proc;
763 int error = 0;
764
765 /* check super-user privilege */
766 switch (cmd) {
767 case RED_GETSTATS:
768 break;
769 default:
770 #if (__FreeBSD_version > 400000)
771 if ((error = suser(p)) != 0)
772 #else
773 if ((error = kauth_authorize_network(p->p_cred,
774 KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RED, NULL,
775 NULL, NULL)) != 0)
776 #endif
777 return (error);
778 break;
779 }
780
781 switch (cmd) {
782
783 case RED_ENABLE:
784 ifacep = (struct red_interface *)addr;
785 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
786 error = EBADF;
787 break;
788 }
789 error = altq_enable(rqp->rq_ifq);
790 break;
791
792 case RED_DISABLE:
793 ifacep = (struct red_interface *)addr;
794 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
795 error = EBADF;
796 break;
797 }
798 error = altq_disable(rqp->rq_ifq);
799 break;
800
801 case RED_IF_ATTACH:
802 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
803 if (ifp == NULL) {
804 error = ENXIO;
805 break;
806 }
807
808 /* allocate and initialize red_queue_t */
809 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
810 if (rqp == NULL) {
811 error = ENOMEM;
812 break;
813 }
814
815 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
816 M_WAITOK|M_ZERO);
817 if (rqp->rq_q == NULL) {
818 free(rqp, M_DEVBUF);
819 error = ENOMEM;
820 break;
821 }
822
823 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
824 if (rqp->rq_red == NULL) {
825 free(rqp->rq_q, M_DEVBUF);
826 free(rqp, M_DEVBUF);
827 error = ENOMEM;
828 break;
829 }
830
831 rqp->rq_ifq = &ifp->if_snd;
832 qtail(rqp->rq_q) = NULL;
833 qlen(rqp->rq_q) = 0;
834 qlimit(rqp->rq_q) = RED_LIMIT;
835 qtype(rqp->rq_q) = Q_RED;
836
837 /*
838 * set RED to this ifnet structure.
839 */
840 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
841 red_enqueue, red_dequeue, red_request,
842 NULL, NULL);
843 if (error) {
844 red_destroy(rqp->rq_red);
845 free(rqp->rq_q, M_DEVBUF);
846 free(rqp, M_DEVBUF);
847 break;
848 }
849
850 /* add this state to the red list */
851 rqp->rq_next = red_list;
852 red_list = rqp;
853 break;
854
855 case RED_IF_DETACH:
856 ifacep = (struct red_interface *)addr;
857 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
858 error = EBADF;
859 break;
860 }
861 error = red_detach(rqp);
862 break;
863
864 case RED_GETSTATS:
865 do {
866 struct red_stats *q_stats;
867 red_t *rp;
868
869 q_stats = (struct red_stats *)addr;
870 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
871 ALTQT_RED)) == NULL) {
872 error = EBADF;
873 break;
874 }
875
876 q_stats->q_len = qlen(rqp->rq_q);
877 q_stats->q_limit = qlimit(rqp->rq_q);
878
879 rp = rqp->rq_red;
880 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
881 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
882 q_stats->drop_cnt = rp->red_stats.drop_cnt;
883 q_stats->drop_forced = rp->red_stats.drop_forced;
884 q_stats->drop_unforced = rp->red_stats.drop_unforced;
885 q_stats->marked_packets = rp->red_stats.marked_packets;
886
887 q_stats->weight = rp->red_weight;
888 q_stats->inv_pmax = rp->red_inv_pmax;
889 q_stats->th_min = rp->red_thmin;
890 q_stats->th_max = rp->red_thmax;
891
892 #ifdef ALTQ_FLOWVALVE
893 if (rp->red_flowvalve != NULL) {
894 struct flowvalve *fv = rp->red_flowvalve;
895 q_stats->fv_flows = fv->fv_flows;
896 q_stats->fv_pass = fv->fv_stats.pass;
897 q_stats->fv_predrop = fv->fv_stats.predrop;
898 q_stats->fv_alloc = fv->fv_stats.alloc;
899 q_stats->fv_escape = fv->fv_stats.escape;
900 } else {
901 #endif /* ALTQ_FLOWVALVE */
902 q_stats->fv_flows = 0;
903 q_stats->fv_pass = 0;
904 q_stats->fv_predrop = 0;
905 q_stats->fv_alloc = 0;
906 q_stats->fv_escape = 0;
907 #ifdef ALTQ_FLOWVALVE
908 }
909 #endif /* ALTQ_FLOWVALVE */
910 } while (/*CONSTCOND*/ 0);
911 break;
912
913 case RED_CONFIG:
914 do {
915 struct red_conf *fc;
916 red_t *new;
917 int s, limit;
918
919 fc = (struct red_conf *)addr;
920 if ((rqp = altq_lookup(fc->iface.red_ifname,
921 ALTQT_RED)) == NULL) {
922 error = EBADF;
923 break;
924 }
925 new = red_alloc(fc->red_weight,
926 fc->red_inv_pmax,
927 fc->red_thmin,
928 fc->red_thmax,
929 fc->red_flags,
930 fc->red_pkttime);
931 if (new == NULL) {
932 error = ENOMEM;
933 break;
934 }
935
936 s = splnet();
937 red_purgeq(rqp);
938 limit = fc->red_limit;
939 if (limit < fc->red_thmax)
940 limit = fc->red_thmax;
941 qlimit(rqp->rq_q) = limit;
942 fc->red_limit = limit; /* write back the new value */
943
944 red_destroy(rqp->rq_red);
945 rqp->rq_red = new;
946
947 splx(s);
948
949 /* write back new values */
950 fc->red_limit = limit;
951 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
952 fc->red_thmin = rqp->rq_red->red_thmin;
953 fc->red_thmax = rqp->rq_red->red_thmax;
954
955 } while (/*CONSTCOND*/ 0);
956 break;
957
958 case RED_SETDEFAULTS:
959 do {
960 struct redparams *rp;
961
962 rp = (struct redparams *)addr;
963
964 default_th_min = rp->th_min;
965 default_th_max = rp->th_max;
966 default_inv_pmax = rp->inv_pmax;
967 } while (/*CONSTCOND*/ 0);
968 break;
969
970 default:
971 error = EINVAL;
972 break;
973 }
974 return error;
975 }
976
977 static int
978 red_detach(red_queue_t *rqp)
979 {
980 red_queue_t *tmp;
981 int error = 0;
982
983 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
984 altq_disable(rqp->rq_ifq);
985
986 if ((error = altq_detach(rqp->rq_ifq)))
987 return (error);
988
989 if (red_list == rqp)
990 red_list = rqp->rq_next;
991 else {
992 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
993 if (tmp->rq_next == rqp) {
994 tmp->rq_next = rqp->rq_next;
995 break;
996 }
997 if (tmp == NULL)
998 printf("red_detach: no state found in red_list!\n");
999 }
1000
1001 red_destroy(rqp->rq_red);
1002 free(rqp->rq_q, M_DEVBUF);
1003 free(rqp, M_DEVBUF);
1004 return (error);
1005 }
1006
1007 /*
1008 * enqueue routine:
1009 *
1010 * returns: 0 when successfully queued.
1011 * ENOBUFS when drop occurs.
1012 */
1013 static int
1014 red_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
1015 {
1016 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1017
1018 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1019 return ENOBUFS;
1020 ifq->ifq_len++;
1021 return 0;
1022 }
1023
1024 /*
1025 * dequeue routine:
1026 * must be called in splnet.
1027 *
1028 * returns: mbuf dequeued.
1029 * NULL when no packet is available in the queue.
1030 */
1031
1032 static struct mbuf *
1033 red_dequeue(struct ifaltq *ifq, int op)
1034 {
1035 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1036 struct mbuf *m;
1037
1038 if (op == ALTDQ_POLL)
1039 return qhead(rqp->rq_q);
1040
1041 /* op == ALTDQ_REMOVE */
1042 m = red_getq(rqp->rq_red, rqp->rq_q);
1043 if (m != NULL)
1044 ifq->ifq_len--;
1045 return (m);
1046 }
1047
1048 static int
1049 red_request(struct ifaltq *ifq, int req, void *arg)
1050 {
1051 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1052
1053 switch (req) {
1054 case ALTRQ_PURGE:
1055 red_purgeq(rqp);
1056 break;
1057 }
1058 return (0);
1059 }
1060
1061 static void
1062 red_purgeq(red_queue_t *rqp)
1063 {
1064 _flushq(rqp->rq_q);
1065 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1066 rqp->rq_ifq->ifq_len = 0;
1067 }
1068
1069 #ifdef ALTQ_FLOWVALVE
1070
1071 #define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1072 #define FV_PSCALE(x) ((x) << FV_PSHIFT)
1073 #define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1074 #define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1075 #define FV_FSCALE(x) ((x) << FV_FSHIFT)
1076 #define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1077
1078 #define FV_TIMER (3 * hz) /* timer value for garbage collector */
1079 #define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1080
1081 #define FV_N 10 /* update fve_f every FV_N packets */
1082
1083 #define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1084 #define FV_TTHRESH 3 /* time threshold to delete fve */
1085 #define FV_ALPHA 5 /* extra packet count */
1086
1087 #define FV_STATS
1088
1089 #if (__FreeBSD_version > 300000) || defined(__HAVE_TIMECOUNTER)
1090 #define FV_TIMESTAMP(tp) getmicrotime(tp)
1091 #else
1092 #define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1093 #endif
1094
1095 /*
1096 * Brtt table: 127 entry table to convert drop rate (p) to
1097 * the corresponding bandwidth fraction (f)
1098 * the following equation is implemented to use scaled values,
1099 * fve_p and fve_f, in the fixed point format.
1100 *
1101 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1102 * f = Brtt(p) / (max_th + alpha)
1103 */
1104 #define BRTT_SIZE 128
1105 #define BRTT_SHIFT 12
1106 #define BRTT_MASK 0x0007f000
1107 #define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1108
1109 const int brtt_tab[BRTT_SIZE] = {
1110 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1111 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1112 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1113 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1114 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1115 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1116 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1117 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1118 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1119 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1120 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1121 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1122 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1123 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1124 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1125 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1126 };
1127
1128 static inline struct fve *
1129 flowlist_lookup(struct flowvalve *fv, struct altq_pktattr *pktattr,
1130 struct timeval *now)
1131 {
1132 struct fve *fve;
1133 int flows;
1134 struct ip *ip;
1135 #ifdef INET6
1136 struct ip6_hdr *ip6;
1137 #endif
1138 struct timeval tthresh;
1139
1140 if (pktattr == NULL)
1141 return (NULL);
1142
1143 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1144 flows = 0;
1145 /*
1146 * search the flow list
1147 */
1148 switch (pktattr->pattr_af) {
1149 case AF_INET:
1150 ip = (struct ip *)pktattr->pattr_hdr;
1151 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1152 if (fve->fve_lastdrop.tv_sec == 0)
1153 break;
1154 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1155 fve->fve_lastdrop.tv_sec = 0;
1156 break;
1157 }
1158 if (fve->fve_flow.flow_af == AF_INET &&
1159 fve->fve_flow.flow_ip.ip_src.s_addr ==
1160 ip->ip_src.s_addr &&
1161 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1162 ip->ip_dst.s_addr)
1163 return (fve);
1164 flows++;
1165 }
1166 break;
1167 #ifdef INET6
1168 case AF_INET6:
1169 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1170 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1171 if (fve->fve_lastdrop.tv_sec == 0)
1172 break;
1173 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1174 fve->fve_lastdrop.tv_sec = 0;
1175 break;
1176 }
1177 if (fve->fve_flow.flow_af == AF_INET6 &&
1178 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1179 &ip6->ip6_src) &&
1180 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1181 &ip6->ip6_dst))
1182 return (fve);
1183 flows++;
1184 }
1185 break;
1186 #endif /* INET6 */
1187
1188 default:
1189 /* unknown protocol. no drop. */
1190 return (NULL);
1191 }
1192 fv->fv_flows = flows; /* save the number of active fve's */
1193 return (NULL);
1194 }
1195
1196 static inline struct fve *
1197 flowlist_reclaim(struct flowvalve *fv, struct altq_pktattr *pktattr)
1198 {
1199 struct fve *fve;
1200 struct ip *ip;
1201 #ifdef INET6
1202 struct ip6_hdr *ip6;
1203 #endif
1204
1205 /*
1206 * get an entry from the tail of the LRU list.
1207 */
1208 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1209
1210 switch (pktattr->pattr_af) {
1211 case AF_INET:
1212 ip = (struct ip *)pktattr->pattr_hdr;
1213 fve->fve_flow.flow_af = AF_INET;
1214 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1215 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1216 break;
1217 #ifdef INET6
1218 case AF_INET6:
1219 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1220 fve->fve_flow.flow_af = AF_INET6;
1221 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1222 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1223 break;
1224 #endif
1225 }
1226
1227 fve->fve_state = Green;
1228 fve->fve_p = 0.0;
1229 fve->fve_f = 0.0;
1230 fve->fve_ifseq = fv->fv_ifseq - 1;
1231 fve->fve_count = 0;
1232
1233 fv->fv_flows++;
1234 #ifdef FV_STATS
1235 fv->fv_stats.alloc++;
1236 #endif
1237 return (fve);
1238 }
1239
1240 static inline void
1241 flowlist_move_to_head(struct flowvalve *fv, struct fve *fve)
1242 {
1243 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1244 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1245 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1246 }
1247 }
1248
1249 /*
1250 * allocate flowvalve structure
1251 */
1252 static struct flowvalve *
1253 fv_alloc(struct red *rp)
1254 {
1255 struct flowvalve *fv;
1256 struct fve *fve;
1257 int i, num;
1258
1259 num = FV_FLOWLISTSIZE;
1260 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
1261 if (fv == NULL)
1262 return (NULL);
1263
1264 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
1265 M_WAITOK|M_ZERO);
1266 if (fv->fv_fves == NULL) {
1267 free(fv, M_DEVBUF);
1268 return (NULL);
1269 }
1270
1271 fv->fv_flows = 0;
1272 TAILQ_INIT(&fv->fv_flowlist);
1273 for (i = 0; i < num; i++) {
1274 fve = &fv->fv_fves[i];
1275 fve->fve_lastdrop.tv_sec = 0;
1276 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1277 }
1278
1279 /* initialize drop rate threshold in scaled fixed-point */
1280 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1281
1282 /* initialize drop rate to fraction table */
1283 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
1284 if (fv->fv_p2ftab == NULL) {
1285 free(fv->fv_fves, M_DEVBUF);
1286 free(fv, M_DEVBUF);
1287 return (NULL);
1288 }
1289 /*
1290 * create the p2f table.
1291 * (shift is used to keep the precision)
1292 */
1293 for (i = 1; i < BRTT_SIZE; i++) {
1294 int f;
1295
1296 f = brtt_tab[i] << 8;
1297 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1298 }
1299
1300 return (fv);
1301 }
1302
1303 static void
1304 fv_destroy(struct flowvalve *fv)
1305 {
1306 free(fv->fv_p2ftab, M_DEVBUF);
1307 free(fv->fv_fves, M_DEVBUF);
1308 free(fv, M_DEVBUF);
1309 }
1310
1311 static inline int
1312 fv_p2f(struct flowvalve *fv, int p)
1313 {
1314 int val, f;
1315
1316 if (p >= BRTT_PMAX)
1317 f = fv->fv_p2ftab[BRTT_SIZE-1];
1318 else if ((val = (p & BRTT_MASK)))
1319 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1320 else
1321 f = fv->fv_p2ftab[1];
1322 return (f);
1323 }
1324
1325 /*
1326 * check if an arriving packet should be pre-dropped.
1327 * called from red_addq() when a packet arrives.
1328 * returns 1 when the packet should be pre-dropped.
1329 * should be called in splnet.
1330 */
1331 static int
1332 fv_checkflow(struct flowvalve *fv, struct altq_pktattr *pktattr,
1333 struct fve **fcache)
1334 {
1335 struct fve *fve;
1336 struct timeval now;
1337
1338 fv->fv_ifseq++;
1339 FV_TIMESTAMP(&now);
1340
1341 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1342 /* no matching entry in the flowlist */
1343 return (0);
1344
1345 *fcache = fve;
1346
1347 /* update fraction f for every FV_N packets */
1348 if (++fve->fve_count == FV_N) {
1349 /*
1350 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1351 */
1352 fve->fve_f =
1353 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1354 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1355 fve->fve_ifseq = fv->fv_ifseq;
1356 fve->fve_count = 0;
1357 }
1358
1359 /*
1360 * overpumping test
1361 */
1362 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1363 int fthresh;
1364
1365 /* calculate a threshold */
1366 fthresh = fv_p2f(fv, fve->fve_p);
1367 if (fve->fve_f > fthresh)
1368 fve->fve_state = Red;
1369 }
1370
1371 if (fve->fve_state == Red) {
1372 /*
1373 * backoff test
1374 */
1375 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1376 /* no drop for at least FV_BACKOFFTHRESH sec */
1377 fve->fve_p = 0;
1378 fve->fve_state = Green;
1379 #ifdef FV_STATS
1380 fv->fv_stats.escape++;
1381 #endif
1382 } else {
1383 /* block this flow */
1384 flowlist_move_to_head(fv, fve);
1385 fve->fve_lastdrop = now;
1386 #ifdef FV_STATS
1387 fv->fv_stats.predrop++;
1388 #endif
1389 return (1);
1390 }
1391 }
1392
1393 /*
1394 * p = (1 - Wp) * p
1395 */
1396 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1397 if (fve->fve_p < 0)
1398 fve->fve_p = 0;
1399 #ifdef FV_STATS
1400 fv->fv_stats.pass++;
1401 #endif
1402 return (0);
1403 }
1404
1405 /*
1406 * called from red_addq when a packet is dropped by red.
1407 * should be called in splnet.
1408 */
1409 static void
1410 fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *pktattr,
1411 struct fve *fcache)
1412 {
1413 struct fve *fve;
1414 struct timeval now;
1415
1416 if (pktattr == NULL)
1417 return;
1418 FV_TIMESTAMP(&now);
1419
1420 if (fcache != NULL)
1421 /* the fve of this packet is already cached */
1422 fve = fcache;
1423 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1424 fve = flowlist_reclaim(fv, pktattr);
1425
1426 flowlist_move_to_head(fv, fve);
1427
1428 /*
1429 * update p: the following line cancels the update
1430 * in fv_checkflow() and calculate
1431 * p = Wp + (1 - Wp) * p
1432 */
1433 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1434
1435 fve->fve_lastdrop = now;
1436 }
1437
1438 #endif /* ALTQ_FLOWVALVE */
1439
1440 #ifdef KLD_MODULE
1441
1442 static struct altqsw red_sw =
1443 {"red", redopen, redclose, redioctl};
1444
1445 ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1446 MODULE_VERSION(altq_red, 1);
1447
1448 #endif /* KLD_MODULE */
1449 #endif /* ALTQ3_COMPAT */
1450
1451 #endif /* ALTQ_RED */
Cache object: ecddd6cde00cb513a8cbd532b1b6c86b
|