The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netpfil/ipfw/dn_sched_qfq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2010 Fabio Checconi, Luigi Rizzo, Paolo Valente
    5  * All rights reserved
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 /*
   30  * $FreeBSD$
   31  */
   32 
   33 #ifdef _KERNEL
   34 #include <sys/malloc.h>
   35 #include <sys/socket.h>
   36 #include <sys/socketvar.h>
   37 #include <sys/kernel.h>
   38 #include <sys/lock.h>
   39 #include <sys/mbuf.h>
   40 #include <sys/module.h>
   41 #include <sys/rwlock.h>
   42 #include <net/if.h>     /* IFNAMSIZ */
   43 #include <netinet/in.h>
   44 #include <netinet/ip_var.h>             /* ipfw_rule_ref */
   45 #include <netinet/ip_fw.h>      /* flow_id */
   46 #include <netinet/ip_dummynet.h>
   47 #include <netpfil/ipfw/ip_fw_private.h>
   48 #include <netpfil/ipfw/dn_heap.h>
   49 #include <netpfil/ipfw/ip_dn_private.h>
   50 #ifdef NEW_AQM
   51 #include <netpfil/ipfw/dn_aqm.h>
   52 #endif
   53 #include <netpfil/ipfw/dn_sched.h>
   54 #else
   55 #include <dn_test.h>
   56 #endif
   57 
   58 #ifdef QFQ_DEBUG
   59 #define _P64    unsigned long long      /* cast for printing uint64_t */
   60 struct qfq_sched;
   61 static void dump_sched(struct qfq_sched *q, const char *msg);
   62 #define NO(x)   x
   63 #else
   64 #define NO(x)
   65 #endif
   66 #define DN_SCHED_QFQ    4 // XXX Where?
   67 typedef unsigned long   bitmap;
   68 
   69 /*
   70  * bitmaps ops are critical. Some linux versions have __fls
   71  * and the bitmap ops. Some machines have ffs
   72  * NOTE: fls() returns 1 for the least significant bit,
   73  *       __fls() returns 0 for the same case.
   74  * We use the base-0 version __fls() to match the description in
   75  * the ToN QFQ paper
   76  */
   77 #if defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
   78 int fls(unsigned int n)
   79 {
   80         int i = 0;
   81         for (i = 0; n > 0; n >>= 1, i++)
   82                 ;
   83         return i;
   84 }
   85 #endif
   86 
   87 #if !defined(_KERNEL) || defined( __FreeBSD__ ) || defined(_WIN32) || (defined(__MIPSEL__) && defined(LINUX_24))
   88 static inline unsigned long __fls(unsigned long word)
   89 {
   90         return fls(word) - 1;
   91 }
   92 #endif
   93 
   94 #if !defined(_KERNEL) || !defined(__linux__)
   95 #ifdef QFQ_DEBUG
   96 static int test_bit(int ix, bitmap *p)
   97 {
   98         if (ix < 0 || ix > 31)
   99                 D("bad index %d", ix);
  100         return *p & (1<<ix);
  101 }
  102 static void __set_bit(int ix, bitmap *p)
  103 {
  104         if (ix < 0 || ix > 31)
  105                 D("bad index %d", ix);
  106         *p |= (1<<ix);
  107 }
  108 static void __clear_bit(int ix, bitmap *p)
  109 {
  110         if (ix < 0 || ix > 31)
  111                 D("bad index %d", ix);
  112         *p &= ~(1<<ix);
  113 }
  114 #else /* !QFQ_DEBUG */
  115 /* XXX do we have fast version, or leave it to the compiler ? */
  116 #define test_bit(ix, pData)     ((*pData) & (1<<(ix)))
  117 #define __set_bit(ix, pData)    (*pData) |= (1<<(ix))
  118 #define __clear_bit(ix, pData)  (*pData) &= ~(1<<(ix))
  119 #endif /* !QFQ_DEBUG */
  120 #endif /* !__linux__ */
  121 
  122 #ifdef __MIPSEL__
  123 #define __clear_bit(ix, pData)  (*pData) &= ~(1<<(ix))
  124 #endif
  125 
  126 /*-------------------------------------------*/
  127 /*
  128 
  129 Virtual time computations.
  130 
  131 S, F and V are all computed in fixed point arithmetic with
  132 FRAC_BITS decimal bits.
  133 
  134    QFQ_MAX_INDEX is the maximum index allowed for a group. We need
  135         one bit per index.
  136    QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
  137    The layout of the bits is as below:
  138   
  139                    [ MTU_SHIFT ][      FRAC_BITS    ]
  140                    [ MAX_INDEX    ][ MIN_SLOT_SHIFT ]
  141                                  ^.__grp->index = 0
  142                                  *.__grp->slot_shift
  143   
  144    where MIN_SLOT_SHIFT is derived by difference from the others.
  145 
  146 The max group index corresponds to Lmax/w_min, where
  147 Lmax=1<<MTU_SHIFT, w_min = 1 .
  148 From this, and knowing how many groups (MAX_INDEX) we want,
  149 we can derive the shift corresponding to each group.
  150 
  151 Because we often need to compute
  152         F = S + len/w_i  and V = V + len/wsum
  153 instead of storing w_i store the value
  154         inv_w = (1<<FRAC_BITS)/w_i
  155 so we can do F = S + len * inv_w * wsum.
  156 We use W_TOT in the formulas so we can easily move between
  157 static and adaptive weight sum.
  158 
  159 The per-scheduler-instance data contain all the data structures
  160 for the scheduler: bitmaps and bucket lists.
  161 
  162  */
  163 /*
  164  * Maximum number of consecutive slots occupied by backlogged classes
  165  * inside a group. This is approx lmax/lmin + 5.
  166  * XXX check because it poses constraints on MAX_INDEX
  167  */
  168 #define QFQ_MAX_SLOTS   32
  169 /*
  170  * Shifts used for class<->group mapping. Class weights are
  171  * in the range [1, QFQ_MAX_WEIGHT], we to map each class i to the
  172  * group with the smallest index that can support the L_i / r_i
  173  * configured for the class.
  174  *
  175  * grp->index is the index of the group; and grp->slot_shift
  176  * is the shift for the corresponding (scaled) sigma_i.
  177  *
  178  * When computing the group index, we do (len<<FP_SHIFT)/weight,
  179  * then compute an FLS (which is like a log2()), and if the result
  180  * is below the MAX_INDEX region we use 0 (which is the same as
  181  * using a larger len).
  182  */
  183 #define QFQ_MAX_INDEX           19
  184 #define QFQ_MAX_WSHIFT          16      /* log2(max_weight) */
  185 
  186 #define QFQ_MAX_WEIGHT          (1<<QFQ_MAX_WSHIFT)
  187 #define QFQ_MAX_WSUM            (2*QFQ_MAX_WEIGHT)
  188 
  189 #define FRAC_BITS               30      /* fixed point arithmetic */
  190 #define ONE_FP                  (1UL << FRAC_BITS)
  191 
  192 #define QFQ_MTU_SHIFT           11      /* log2(max_len) */
  193 #define QFQ_MIN_SLOT_SHIFT      (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
  194 
  195 /*
  196  * Possible group states, also indexes for the bitmaps array in
  197  * struct qfq_queue. We rely on ER, IR, EB, IB being numbered 0..3
  198  */
  199 enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
  200 
  201 struct qfq_group;
  202 /*
  203  * additional queue info. Some of this info should come from
  204  * the flowset, we copy them here for faster processing.
  205  * This is an overlay of the struct dn_queue
  206  */
  207 struct qfq_class {
  208         struct dn_queue _q;
  209         uint64_t S, F;          /* flow timestamps (exact) */
  210         struct qfq_class *next; /* Link for the slot list. */
  211 
  212         /* group we belong to. In principle we would need the index,
  213          * which is log_2(lmax/weight), but we never reference it
  214          * directly, only the group.
  215          */
  216         struct qfq_group *grp;
  217 
  218         /* these are copied from the flowset. */
  219         uint32_t        inv_w;  /* ONE_FP/weight */
  220         uint32_t        lmax;   /* Max packet size for this flow. */
  221 };
  222 
  223 /* Group descriptor, see the paper for details.
  224  * Basically this contains the bucket lists
  225  */
  226 struct qfq_group {
  227         uint64_t S, F;                  /* group timestamps (approx). */
  228         unsigned int slot_shift;        /* Slot shift. */
  229         unsigned int index;             /* Group index. */
  230         unsigned int front;             /* Index of the front slot. */
  231         bitmap full_slots;              /* non-empty slots */
  232 
  233         /* Array of lists of active classes. */
  234         struct qfq_class *slots[QFQ_MAX_SLOTS];
  235 };
  236 
  237 /* scheduler instance descriptor. */
  238 struct qfq_sched {
  239         uint64_t        V;              /* Precise virtual time. */
  240         uint32_t        wsum;           /* weight sum */
  241         uint32_t        iwsum;          /* inverse weight sum */
  242         NO(uint32_t     i_wsum;)        /* ONE_FP/w_sum */
  243         NO(uint32_t     queued;)        /* debugging */
  244         NO(uint32_t     loops;)         /* debugging */
  245         bitmap bitmaps[QFQ_MAX_STATE];  /* Group bitmaps. */
  246         struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
  247 };
  248 
  249 /*---- support functions ----------------------------*/
  250 
  251 /* Generic comparison function, handling wraparound. */
  252 static inline int qfq_gt(uint64_t a, uint64_t b)
  253 {
  254         return (int64_t)(a - b) > 0;
  255 }
  256 
  257 /* Round a precise timestamp to its slotted value. */
  258 static inline uint64_t qfq_round_down(uint64_t ts, unsigned int shift)
  259 {
  260         return ts & ~((1ULL << shift) - 1);
  261 }
  262 
  263 /* return the pointer to the group with lowest index in the bitmap */
  264 static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
  265                                         unsigned long bitmap)
  266 {
  267         int index = ffs(bitmap) - 1; // zero-based
  268         return &q->groups[index];
  269 }
  270 
  271 /*
  272  * Calculate a flow index, given its weight and maximum packet length.
  273  * index = log_2(maxlen/weight) but we need to apply the scaling.
  274  * This is used only once at flow creation.
  275  */
  276 static int qfq_calc_index(uint32_t inv_w, unsigned int maxlen)
  277 {
  278         uint64_t slot_size = (uint64_t)maxlen *inv_w;
  279         unsigned long size_map;
  280         int index = 0;
  281 
  282         size_map = (unsigned long)(slot_size >> QFQ_MIN_SLOT_SHIFT);
  283         if (!size_map)
  284                 goto out;
  285 
  286         index = __fls(size_map) + 1;    // basically a log_2()
  287         index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
  288 
  289         if (index < 0)
  290                 index = 0;
  291 
  292 out:
  293         ND("W = %d, L = %d, I = %d\n", ONE_FP/inv_w, maxlen, index);
  294         return index;
  295 }
  296 /*---- end support functions ----*/
  297 
  298 /*-------- API calls --------------------------------*/
  299 /*
  300  * Validate and copy parameters from flowset.
  301  */
  302 static int
  303 qfq_new_queue(struct dn_queue *_q)
  304 {
  305         struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
  306         struct qfq_class *cl = (struct qfq_class *)_q;
  307         int i;
  308         uint32_t w;     /* approximated weight */
  309 
  310         /* import parameters from the flowset. They should be correct
  311          * already.
  312          */
  313         w = _q->fs->fs.par[0];
  314         cl->lmax = _q->fs->fs.par[1];
  315         if (!w || w > QFQ_MAX_WEIGHT) {
  316                 w = 1;
  317                 D("rounding weight to 1");
  318         }
  319         cl->inv_w = ONE_FP/w;
  320         w = ONE_FP/cl->inv_w;   
  321         if (q->wsum + w > QFQ_MAX_WSUM)
  322                 return EINVAL;
  323 
  324         i = qfq_calc_index(cl->inv_w, cl->lmax);
  325         cl->grp = &q->groups[i];
  326         q->wsum += w;
  327         q->iwsum = ONE_FP / q->wsum; /* XXX note theory */
  328         // XXX cl->S = q->V; ?
  329         return 0;
  330 }
  331 
  332 /* remove an empty queue */
  333 static int
  334 qfq_free_queue(struct dn_queue *_q)
  335 {
  336         struct qfq_sched *q = (struct qfq_sched *)(_q->_si + 1);
  337         struct qfq_class *cl = (struct qfq_class *)_q;
  338         if (cl->inv_w) {
  339                 q->wsum -= ONE_FP/cl->inv_w;
  340                 if (q->wsum != 0)
  341                         q->iwsum = ONE_FP / q->wsum;
  342                 cl->inv_w = 0; /* reset weight to avoid run twice */
  343         }
  344         return 0;
  345 }
  346 
  347 /* Calculate a mask to mimic what would be ffs_from(). */
  348 static inline unsigned long
  349 mask_from(unsigned long bitmap, int from)
  350 {
  351         return bitmap & ~((1UL << from) - 1);
  352 }
  353 
  354 /*
  355  * The state computation relies on ER=0, IR=1, EB=2, IB=3
  356  * First compute eligibility comparing grp->S, q->V,
  357  * then check if someone is blocking us and possibly add EB
  358  */
  359 static inline unsigned int
  360 qfq_calc_state(struct qfq_sched *q, struct qfq_group *grp)
  361 {
  362         /* if S > V we are not eligible */
  363         unsigned int state = qfq_gt(grp->S, q->V);
  364         unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
  365         struct qfq_group *next;
  366 
  367         if (mask) {
  368                 next = qfq_ffs(q, mask);
  369                 if (qfq_gt(grp->F, next->F))
  370                         state |= EB;
  371         }
  372 
  373         return state;
  374 }
  375 
  376 /*
  377  * In principle
  378  *      q->bitmaps[dst] |= q->bitmaps[src] & mask;
  379  *      q->bitmaps[src] &= ~mask;
  380  * but we should make sure that src != dst
  381  */
  382 static inline void
  383 qfq_move_groups(struct qfq_sched *q, unsigned long mask, int src, int dst)
  384 {
  385         q->bitmaps[dst] |= q->bitmaps[src] & mask;
  386         q->bitmaps[src] &= ~mask;
  387 }
  388 
  389 static inline void
  390 qfq_unblock_groups(struct qfq_sched *q, int index, uint64_t old_finish)
  391 {
  392         unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
  393         struct qfq_group *next;
  394 
  395         if (mask) {
  396                 next = qfq_ffs(q, mask);
  397                 if (!qfq_gt(next->F, old_finish))
  398                         return;
  399         }
  400 
  401         mask = (1UL << index) - 1;
  402         qfq_move_groups(q, mask, EB, ER);
  403         qfq_move_groups(q, mask, IB, IR);
  404 }
  405 
  406 /*
  407  * perhaps
  408  *
  409         old_V ^= q->V;
  410         old_V >>= QFQ_MIN_SLOT_SHIFT;
  411         if (old_V) {
  412                 ...
  413         }
  414  *
  415  */
  416 static inline void
  417 qfq_make_eligible(struct qfq_sched *q, uint64_t old_V)
  418 {
  419         unsigned long mask, vslot, old_vslot;
  420 
  421         vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
  422         old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
  423 
  424         if (vslot != old_vslot) {
  425                 /* must be 2ULL, see ToN QFQ article fig.5, we use base-0 fls */
  426                 mask = (2ULL << (__fls(vslot ^ old_vslot))) - 1;
  427                 qfq_move_groups(q, mask, IR, ER);
  428                 qfq_move_groups(q, mask, IB, EB);
  429         }
  430 }
  431 
  432 /*
  433  * XXX we should make sure that slot becomes less than 32.
  434  * This is guaranteed by the input values.
  435  * roundedS is always cl->S rounded on grp->slot_shift bits.
  436  */
  437 static inline void
  438 qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, uint64_t roundedS)
  439 {
  440         uint64_t slot = (roundedS - grp->S) >> grp->slot_shift;
  441         unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
  442 
  443         cl->next = grp->slots[i];
  444         grp->slots[i] = cl;
  445         __set_bit(slot, &grp->full_slots);
  446 }
  447 
  448 /*
  449  * remove the entry from the slot
  450  */
  451 static inline void
  452 qfq_front_slot_remove(struct qfq_group *grp)
  453 {
  454         struct qfq_class **h = &grp->slots[grp->front];
  455 
  456         *h = (*h)->next;
  457         if (!*h)
  458                 __clear_bit(0, &grp->full_slots);
  459 }
  460 
  461 /*
  462  * Returns the first full queue in a group. As a side effect,
  463  * adjust the bucket list so the first non-empty bucket is at
  464  * position 0 in full_slots.
  465  */
  466 static inline struct qfq_class *
  467 qfq_slot_scan(struct qfq_group *grp)
  468 {
  469         int i;
  470 
  471         ND("grp %d full %x", grp->index, grp->full_slots);
  472         if (!grp->full_slots)
  473                 return NULL;
  474 
  475         i = ffs(grp->full_slots) - 1; // zero-based
  476         if (i > 0) {
  477                 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
  478                 grp->full_slots >>= i;
  479         }
  480 
  481         return grp->slots[grp->front];
  482 }
  483 
  484 /*
  485  * adjust the bucket list. When the start time of a group decreases,
  486  * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
  487  * move the objects. The mask of occupied slots must be shifted
  488  * because we use ffs() to find the first non-empty slot.
  489  * This covers decreases in the group's start time, but what about
  490  * increases of the start time ?
  491  * Here too we should make sure that i is less than 32
  492  */
  493 static inline void
  494 qfq_slot_rotate(struct qfq_sched *q, struct qfq_group *grp, uint64_t roundedS)
  495 {
  496         unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
  497 
  498         (void)q;
  499         grp->full_slots <<= i;
  500         grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
  501 }
  502 
  503 static inline void
  504 qfq_update_eligible(struct qfq_sched *q, uint64_t old_V)
  505 {
  506         bitmap ineligible;
  507 
  508         ineligible = q->bitmaps[IR] | q->bitmaps[IB];
  509         if (ineligible) {
  510                 if (!q->bitmaps[ER]) {
  511                         struct qfq_group *grp;
  512                         grp = qfq_ffs(q, ineligible);
  513                         if (qfq_gt(grp->S, q->V))
  514                                 q->V = grp->S;
  515                 }
  516                 qfq_make_eligible(q, old_V);
  517         }
  518 }
  519 
  520 /*
  521  * Updates the class, returns true if also the group needs to be updated.
  522  */
  523 static inline int
  524 qfq_update_class(struct qfq_sched *q, struct qfq_group *grp,
  525             struct qfq_class *cl)
  526 {
  527 
  528         (void)q;
  529         cl->S = cl->F;
  530         if (cl->_q.mq.head == NULL)  {
  531                 qfq_front_slot_remove(grp);
  532         } else {
  533                 unsigned int len;
  534                 uint64_t roundedS;
  535 
  536                 len = cl->_q.mq.head->m_pkthdr.len;
  537                 cl->F = cl->S + (uint64_t)len * cl->inv_w;
  538                 roundedS = qfq_round_down(cl->S, grp->slot_shift);
  539                 if (roundedS == grp->S)
  540                         return 0;
  541 
  542                 qfq_front_slot_remove(grp);
  543                 qfq_slot_insert(grp, cl, roundedS);
  544         }
  545         return 1;
  546 }
  547 
  548 static struct mbuf *
  549 qfq_dequeue(struct dn_sch_inst *si)
  550 {
  551         struct qfq_sched *q = (struct qfq_sched *)(si + 1);
  552         struct qfq_group *grp;
  553         struct qfq_class *cl;
  554         struct mbuf *m;
  555         uint64_t old_V;
  556 
  557         NO(q->loops++;)
  558         if (!q->bitmaps[ER]) {
  559                 NO(if (q->queued)
  560                         dump_sched(q, "start dequeue");)
  561                 return NULL;
  562         }
  563 
  564         grp = qfq_ffs(q, q->bitmaps[ER]);
  565 
  566         cl = grp->slots[grp->front];
  567         /* extract from the first bucket in the bucket list */
  568         m = dn_dequeue(&cl->_q);
  569 
  570         if (!m) {
  571                 D("BUG/* non-workconserving leaf */");
  572                 return NULL;
  573         }
  574         NO(q->queued--;)
  575         old_V = q->V;
  576         q->V += (uint64_t)m->m_pkthdr.len * q->iwsum;
  577         ND("m is %p F 0x%llx V now 0x%llx", m, cl->F, q->V);
  578 
  579         if (qfq_update_class(q, grp, cl)) {
  580                 uint64_t old_F = grp->F;
  581                 cl = qfq_slot_scan(grp);
  582                 if (!cl) { /* group gone, remove from ER */
  583                         __clear_bit(grp->index, &q->bitmaps[ER]);
  584                         // grp->S = grp->F + 1; // XXX debugging only
  585                 } else {
  586                         uint64_t roundedS = qfq_round_down(cl->S, grp->slot_shift);
  587                         unsigned int s;
  588 
  589                         if (grp->S == roundedS)
  590                                 goto skip_unblock;
  591                         grp->S = roundedS;
  592                         grp->F = roundedS + (2ULL << grp->slot_shift);
  593                         /* remove from ER and put in the new set */
  594                         __clear_bit(grp->index, &q->bitmaps[ER]);
  595                         s = qfq_calc_state(q, grp);
  596                         __set_bit(grp->index, &q->bitmaps[s]);
  597                 }
  598                 /* we need to unblock even if the group has gone away */
  599                 qfq_unblock_groups(q, grp->index, old_F);
  600         }
  601 
  602 skip_unblock:
  603         qfq_update_eligible(q, old_V);
  604         NO(if (!q->bitmaps[ER] && q->queued)
  605                 dump_sched(q, "end dequeue");)
  606 
  607         return m;
  608 }
  609 
  610 /*
  611  * Assign a reasonable start time for a new flow k in group i.
  612  * Admissible values for \hat(F) are multiples of \sigma_i
  613  * no greater than V+\sigma_i . Larger values mean that
  614  * we had a wraparound so we consider the timestamp to be stale.
  615  *
  616  * If F is not stale and F >= V then we set S = F.
  617  * Otherwise we should assign S = V, but this may violate
  618  * the ordering in ER. So, if we have groups in ER, set S to
  619  * the F_j of the first group j which would be blocking us.
  620  * We are guaranteed not to move S backward because
  621  * otherwise our group i would still be blocked.
  622  */
  623 static inline void
  624 qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
  625 {
  626         unsigned long mask;
  627         uint64_t limit, roundedF;
  628         int slot_shift = cl->grp->slot_shift;
  629 
  630         roundedF = qfq_round_down(cl->F, slot_shift);
  631         limit = qfq_round_down(q->V, slot_shift) + (1ULL << slot_shift);
  632 
  633         if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
  634                 /* timestamp was stale */
  635                 mask = mask_from(q->bitmaps[ER], cl->grp->index);
  636                 if (mask) {
  637                         struct qfq_group *next = qfq_ffs(q, mask);
  638                         if (qfq_gt(roundedF, next->F)) {
  639                                 /* from pv 71261956973ba9e0637848a5adb4a5819b4bae83 */
  640                                 if (qfq_gt(limit, next->F))
  641                                         cl->S = next->F;
  642                                 else /* preserve timestamp correctness */
  643                                         cl->S = limit;
  644                                 return;
  645                         }
  646                 }
  647                 cl->S = q->V;
  648         } else { /* timestamp is not stale */
  649                 cl->S = cl->F;
  650         }
  651 }
  652 
  653 static int
  654 qfq_enqueue(struct dn_sch_inst *si, struct dn_queue *_q, struct mbuf *m)
  655 {
  656         struct qfq_sched *q = (struct qfq_sched *)(si + 1);
  657         struct qfq_group *grp;
  658         struct qfq_class *cl = (struct qfq_class *)_q;
  659         uint64_t roundedS;
  660         int s;
  661 
  662         NO(q->loops++;)
  663         DX(4, "len %d flow %p inv_w 0x%x grp %d", m->m_pkthdr.len,
  664                 _q, cl->inv_w, cl->grp->index);
  665         /* XXX verify that the packet obeys the parameters */
  666         if (m != _q->mq.head) {
  667                 if (dn_enqueue(_q, m, 0)) /* packet was dropped */
  668                         return 1;
  669                 NO(q->queued++;)
  670                 if (m != _q->mq.head)
  671                         return 0;
  672         }
  673         /* If reach this point, queue q was idle */
  674         grp = cl->grp;
  675         qfq_update_start(q, cl); /* adjust start time */
  676         /* compute new finish time and rounded start. */
  677         cl->F = cl->S + (uint64_t)(m->m_pkthdr.len) * cl->inv_w;
  678         roundedS = qfq_round_down(cl->S, grp->slot_shift);
  679 
  680         /*
  681          * insert cl in the correct bucket.
  682          * If cl->S >= grp->S we don't need to adjust the
  683          * bucket list and simply go to the insertion phase.
  684          * Otherwise grp->S is decreasing, we must make room
  685          * in the bucket list, and also recompute the group state.
  686          * Finally, if there were no flows in this group and nobody
  687          * was in ER make sure to adjust V.
  688          */
  689         if (grp->full_slots) {
  690                 if (!qfq_gt(grp->S, cl->S))
  691                         goto skip_update;
  692                 /* create a slot for this cl->S */
  693                 qfq_slot_rotate(q, grp, roundedS);
  694                 /* group was surely ineligible, remove */
  695                 __clear_bit(grp->index, &q->bitmaps[IR]);
  696                 __clear_bit(grp->index, &q->bitmaps[IB]);
  697         } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
  698                 q->V = roundedS;
  699 
  700         grp->S = roundedS;
  701         grp->F = roundedS + (2ULL << grp->slot_shift); // i.e. 2\sigma_i
  702         s = qfq_calc_state(q, grp);
  703         __set_bit(grp->index, &q->bitmaps[s]);
  704         ND("new state %d 0x%x", s, q->bitmaps[s]);
  705         ND("S %llx F %llx V %llx", cl->S, cl->F, q->V);
  706 skip_update:
  707         qfq_slot_insert(grp, cl, roundedS);
  708 
  709         return 0;
  710 }
  711 
  712 #if 0
  713 static inline void
  714 qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
  715         struct qfq_class *cl, struct qfq_class **pprev)
  716 {
  717         unsigned int i, offset;
  718         uint64_t roundedS;
  719 
  720         roundedS = qfq_round_down(cl->S, grp->slot_shift);
  721         offset = (roundedS - grp->S) >> grp->slot_shift;
  722         i = (grp->front + offset) % QFQ_MAX_SLOTS;
  723 
  724 #ifdef notyet
  725         if (!pprev) {
  726                 pprev = &grp->slots[i];
  727                 while (*pprev && *pprev != cl)
  728                         pprev = &(*pprev)->next;
  729         }
  730 #endif
  731 
  732         *pprev = cl->next;
  733         if (!grp->slots[i])
  734                 __clear_bit(offset, &grp->full_slots);
  735 }
  736 
  737 /*
  738  * called to forcibly destroy a queue.
  739  * If the queue is not in the front bucket, or if it has
  740  * other queues in the front bucket, we can simply remove
  741  * the queue with no other side effects.
  742  * Otherwise we must propagate the event up.
  743  * XXX description to be completed.
  744  */
  745 static void
  746 qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl,
  747                                  struct qfq_class **pprev)
  748 {
  749         struct qfq_group *grp = &q->groups[cl->index];
  750         unsigned long mask;
  751         uint64_t roundedS;
  752         int s;
  753 
  754         cl->F = cl->S;  // not needed if the class goes away.
  755         qfq_slot_remove(q, grp, cl, pprev);
  756 
  757         if (!grp->full_slots) {
  758                 /* nothing left in the group, remove from all sets.
  759                  * Do ER last because if we were blocking other groups
  760                  * we must unblock them.
  761                  */
  762                 __clear_bit(grp->index, &q->bitmaps[IR]);
  763                 __clear_bit(grp->index, &q->bitmaps[EB]);
  764                 __clear_bit(grp->index, &q->bitmaps[IB]);
  765 
  766                 if (test_bit(grp->index, &q->bitmaps[ER]) &&
  767                     !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
  768                         mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
  769                         if (mask)
  770                                 mask = ~((1UL << __fls(mask)) - 1);
  771                         else
  772                                 mask = ~0UL;
  773                         qfq_move_groups(q, mask, EB, ER);
  774                         qfq_move_groups(q, mask, IB, IR);
  775                 }
  776                 __clear_bit(grp->index, &q->bitmaps[ER]);
  777         } else if (!grp->slots[grp->front]) {
  778                 cl = qfq_slot_scan(grp);
  779                 roundedS = qfq_round_down(cl->S, grp->slot_shift);
  780                 if (grp->S != roundedS) {
  781                         __clear_bit(grp->index, &q->bitmaps[ER]);
  782                         __clear_bit(grp->index, &q->bitmaps[IR]);
  783                         __clear_bit(grp->index, &q->bitmaps[EB]);
  784                         __clear_bit(grp->index, &q->bitmaps[IB]);
  785                         grp->S = roundedS;
  786                         grp->F = roundedS + (2ULL << grp->slot_shift);
  787                         s = qfq_calc_state(q, grp);
  788                         __set_bit(grp->index, &q->bitmaps[s]);
  789                 }
  790         }
  791         qfq_update_eligible(q, q->V);
  792 }
  793 #endif
  794 
  795 static int
  796 qfq_new_fsk(struct dn_fsk *f)
  797 {
  798         ipdn_bound_var(&f->fs.par[0], 1, 1, QFQ_MAX_WEIGHT, "qfq weight");
  799         ipdn_bound_var(&f->fs.par[1], 1500, 1, 2000, "qfq maxlen");
  800         ND("weight %d len %d\n", f->fs.par[0], f->fs.par[1]);
  801         return 0;
  802 }
  803 
  804 /*
  805  * initialize a new scheduler instance
  806  */
  807 static int
  808 qfq_new_sched(struct dn_sch_inst *si)
  809 {
  810         struct qfq_sched *q = (struct qfq_sched *)(si + 1);
  811         struct qfq_group *grp;
  812         int i;
  813 
  814         for (i = 0; i <= QFQ_MAX_INDEX; i++) {
  815                 grp = &q->groups[i];
  816                 grp->index = i;
  817                 grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS -
  818                                         (QFQ_MAX_INDEX - i);
  819         }
  820         return 0;
  821 }
  822 
  823 /*
  824  * QFQ scheduler descriptor
  825  */
  826 static struct dn_alg qfq_desc = {
  827         _SI( .type = ) DN_SCHED_QFQ,
  828         _SI( .name = ) "QFQ",
  829         _SI( .flags = ) DN_MULTIQUEUE,
  830 
  831         _SI( .schk_datalen = ) 0,
  832         _SI( .si_datalen = ) sizeof(struct qfq_sched),
  833         _SI( .q_datalen = ) sizeof(struct qfq_class) - sizeof(struct dn_queue),
  834 
  835         _SI( .enqueue = ) qfq_enqueue,
  836         _SI( .dequeue = ) qfq_dequeue,
  837 
  838         _SI( .config = )  NULL,
  839         _SI( .destroy = )  NULL,
  840         _SI( .new_sched = ) qfq_new_sched,
  841         _SI( .free_sched = )  NULL,
  842         _SI( .new_fsk = ) qfq_new_fsk,
  843         _SI( .free_fsk = )  NULL,
  844         _SI( .new_queue = ) qfq_new_queue,
  845         _SI( .free_queue = ) qfq_free_queue,
  846 #ifdef NEW_AQM
  847         _SI( .getconfig = )  NULL,
  848 #endif
  849 };
  850 
  851 DECLARE_DNSCHED_MODULE(dn_qfq, &qfq_desc);
  852 
  853 #ifdef QFQ_DEBUG
  854 static void
  855 dump_groups(struct qfq_sched *q, uint32_t mask)
  856 {
  857         int i, j;
  858 
  859         for (i = 0; i < QFQ_MAX_INDEX + 1; i++) {
  860                 struct qfq_group *g = &q->groups[i];
  861 
  862                 if (0 == (mask & (1<<i)))
  863                         continue;
  864                 for (j = 0; j < QFQ_MAX_SLOTS; j++) {
  865                         if (g->slots[j])
  866                                 D("    bucket %d %p", j, g->slots[j]);
  867                 }
  868                 D("full_slots 0x%llx", (_P64)g->full_slots);
  869                 D("        %2d S 0x%20llx F 0x%llx %c", i,
  870                         (_P64)g->S, (_P64)g->F,
  871                         mask & (1<<i) ? '1' : '');
  872         }
  873 }
  874 
  875 static void
  876 dump_sched(struct qfq_sched *q, const char *msg)
  877 {
  878         D("--- in %s: ---", msg);
  879         D("loops %d queued %d V 0x%llx", q->loops, q->queued, (_P64)q->V);
  880         D("    ER 0x%08x", (unsigned)q->bitmaps[ER]);
  881         D("    EB 0x%08x", (unsigned)q->bitmaps[EB]);
  882         D("    IR 0x%08x", (unsigned)q->bitmaps[IR]);
  883         D("    IB 0x%08x", (unsigned)q->bitmaps[IB]);
  884         dump_groups(q, 0xffffffff);
  885 };
  886 #endif /* QFQ_DEBUG */

Cache object: 14590e6a1b48142281fc2dd3db646411


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.