The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/netpfil/ipfw/dn_sched_wf2q.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa
    5  * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa
    6  * All rights reserved
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 
   30 /*
   31  * $FreeBSD$
   32  */
   33 
   34 #ifdef _KERNEL
   35 #include <sys/malloc.h>
   36 #include <sys/socket.h>
   37 #include <sys/socketvar.h>
   38 #include <sys/kernel.h>
   39 #include <sys/lock.h>
   40 #include <sys/mbuf.h>
   41 #include <sys/module.h>
   42 #include <sys/rwlock.h>
   43 #include <net/if.h>     /* IFNAMSIZ */
   44 #include <netinet/in.h>
   45 #include <netinet/ip_var.h>             /* ipfw_rule_ref */
   46 #include <netinet/ip_fw.h>      /* flow_id */
   47 #include <netinet/ip_dummynet.h>
   48 #include <netpfil/ipfw/ip_fw_private.h>
   49 #include <netpfil/ipfw/dn_heap.h>
   50 #include <netpfil/ipfw/ip_dn_private.h>
   51 #ifdef NEW_AQM
   52 #include <netpfil/ipfw/dn_aqm.h>
   53 #endif
   54 #include <netpfil/ipfw/dn_sched.h>
   55 #else
   56 #include <dn_test.h>
   57 #endif
   58 
   59 #ifndef MAX64
   60 #define MAX64(x,y)  (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x)
   61 #endif
   62 
   63 /*
   64  * timestamps are computed on 64 bit using fixed point arithmetic.
   65  * LMAX_BITS, WMAX_BITS are the max number of bits for the packet len
   66  * and sum of weights, respectively. FRAC_BITS is the number of
   67  * fractional bits. We want FRAC_BITS >> WMAX_BITS to avoid too large
   68  * errors when computing the inverse, FRAC_BITS < 32 so we can do 1/w
   69  * using an unsigned 32-bit division, and to avoid wraparounds we need
   70  * LMAX_BITS + WMAX_BITS + FRAC_BITS << 64
   71  * As an example
   72  * FRAC_BITS = 26, LMAX_BITS=14, WMAX_BITS = 19
   73  */
   74 #ifndef FRAC_BITS
   75 #define FRAC_BITS    28 /* shift for fixed point arithmetic */
   76 #define ONE_FP  (1UL << FRAC_BITS)
   77 #endif
   78 
   79 /*
   80  * Private information for the scheduler instance:
   81  * sch_heap (key is Finish time) returns the next queue to serve
   82  * ne_heap (key is Start time) stores not-eligible queues
   83  * idle_heap (key=start/finish time) stores idle flows. It must
   84  *      support extract-from-middle.
   85  * A flow is only in 1 of the three heaps.
   86  * XXX todo: use a more efficient data structure, e.g. a tree sorted
   87  * by F with min_subtree(S) in each node
   88  */
   89 struct wf2qp_si {
   90     struct dn_heap sch_heap;    /* top extract - key Finish  time */
   91     struct dn_heap ne_heap;     /* top extract - key Start   time */
   92     struct dn_heap idle_heap;   /* random extract - key Start=Finish time */
   93     uint64_t V;                 /* virtual time */
   94     uint32_t inv_wsum;          /* inverse of sum of weights */
   95     uint32_t wsum;              /* sum of weights */
   96 };
   97 
   98 struct wf2qp_queue {
   99     struct dn_queue _q;
  100     uint64_t S, F;              /* start time, finish time */
  101     uint32_t inv_w;             /* ONE_FP / weight */
  102     int32_t heap_pos;           /* position (index) of struct in heap */
  103 };
  104 
  105 /*
  106  * This file implements a WF2Q+ scheduler as it has been in dummynet
  107  * since 2000.
  108  * The scheduler supports per-flow queues and has O(log N) complexity.
  109  *
  110  * WF2Q+ needs to drain entries from the idle heap so that we
  111  * can keep the sum of weights up to date. We can do it whenever
  112  * we get a chance, or periodically, or following some other
  113  * strategy. The function idle_check() drains at most N elements
  114  * from the idle heap.
  115  */
  116 static void
  117 idle_check(struct wf2qp_si *si, int n, int force)
  118 {
  119     struct dn_heap *h = &si->idle_heap;
  120     while (n-- > 0 && h->elements > 0 &&
  121                 (force || DN_KEY_LT(HEAP_TOP(h)->key, si->V))) {
  122         struct dn_queue *q = HEAP_TOP(h)->object;
  123         struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
  124 
  125         heap_extract(h, NULL);
  126         /* XXX to let the flowset delete the queue we should
  127          * mark it as 'unused' by the scheduler.
  128          */
  129         alg_fq->S = alg_fq->F + 1; /* Mark timestamp as invalid. */
  130         si->wsum -= q->fs->fs.par[0];   /* adjust sum of weights */
  131         if (si->wsum > 0)
  132                 si->inv_wsum = ONE_FP/si->wsum;
  133     }
  134 }
  135 
  136 static int
  137 wf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m)
  138 {
  139     struct dn_fsk *fs = q->fs;
  140     struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
  141     struct wf2qp_queue *alg_fq;
  142     uint64_t len = m->m_pkthdr.len;
  143 
  144     if (m != q->mq.head) {
  145         if (dn_enqueue(q, m, 0)) /* packet was dropped */
  146             return 1;
  147         if (m != q->mq.head)    /* queue was already busy */
  148             return 0;
  149     }
  150 
  151     /* If reach this point, queue q was idle */
  152     alg_fq = (struct wf2qp_queue *)q;
  153 
  154     if (DN_KEY_LT(alg_fq->F, alg_fq->S)) {
  155         /* F<S means timestamps are invalid ->brand new queue. */
  156         alg_fq->S = si->V;              /* init start time */
  157         si->wsum += fs->fs.par[0];      /* add weight of new queue. */
  158         si->inv_wsum = ONE_FP/si->wsum;
  159     } else { /* if it was idle then it was in the idle heap */
  160         heap_extract(&si->idle_heap, q);
  161         alg_fq->S = MAX64(alg_fq->F, si->V);    /* compute new S */
  162     }
  163     alg_fq->F = alg_fq->S + len * alg_fq->inv_w;
  164 
  165     /* if nothing is backlogged, make sure this flow is eligible */
  166     if (si->ne_heap.elements == 0 && si->sch_heap.elements == 0)
  167         si->V = MAX64(alg_fq->S, si->V);
  168 
  169     /*
  170      * Look at eligibility. A flow is not eligibile if S>V (when
  171      * this happens, it means that there is some other flow already
  172      * scheduled for the same pipe, so the sch_heap cannot be
  173      * empty). If the flow is not eligible we just store it in the
  174      * ne_heap. Otherwise, we store in the sch_heap.
  175      * Note that for all flows in sch_heap (SCH), S_i <= V,
  176      * and for all flows in ne_heap (NEH), S_i > V.
  177      * So when we need to compute max(V, min(S_i)) forall i in
  178      * SCH+NEH, we only need to look into NEH.
  179      */
  180     if (DN_KEY_LT(si->V, alg_fq->S)) {
  181         /* S>V means flow Not eligible. */
  182         if (si->sch_heap.elements == 0)
  183             D("++ ouch! not eligible but empty scheduler!");
  184         heap_insert(&si->ne_heap, alg_fq->S, q);
  185     } else {
  186         heap_insert(&si->sch_heap, alg_fq->F, q);
  187     }
  188     return 0;
  189 }
  190 
  191 /* XXX invariant: sch > 0 || V >= min(S in neh) */
  192 static struct mbuf *
  193 wf2qp_dequeue(struct dn_sch_inst *_si)
  194 {
  195         /* Access scheduler instance private data */
  196         struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
  197         struct mbuf *m;
  198         struct dn_queue *q;
  199         struct dn_heap *sch = &si->sch_heap;
  200         struct dn_heap *neh = &si->ne_heap;
  201         struct wf2qp_queue *alg_fq;
  202 
  203         if (sch->elements == 0 && neh->elements == 0) {
  204                 /* we have nothing to do. We could kill the idle heap
  205                  * altogether and reset V
  206                  */
  207                 idle_check(si, 0x7fffffff, 1);
  208                 si->V = 0;
  209                 si->wsum = 0;   /* should be set already */
  210                 return NULL;    /* quick return if nothing to do */
  211         }
  212         idle_check(si, 1, 0);   /* drain something from the idle heap */
  213 
  214         /* make sure at least one element is eligible, bumping V
  215          * and moving entries that have become eligible.
  216          * We need to repeat the first part twice, before and
  217          * after extracting the candidate, or enqueue() will
  218          * find the data structure in a wrong state.
  219          */
  220   m = NULL;
  221   for(;;) {
  222         /*
  223          * Compute V = max(V, min(S_i)). Remember that all elements
  224          * in sch have by definition S_i <= V so if sch is not empty,
  225          * V is surely the max and we must not update it. Conversely,
  226          * if sch is empty we only need to look at neh.
  227          * We don't need to move the queues, as it will be done at the
  228          * next enqueue
  229          */
  230         if (sch->elements == 0 && neh->elements > 0) {
  231                 si->V = MAX64(si->V, HEAP_TOP(neh)->key);
  232         }
  233         while (neh->elements > 0 &&
  234                     DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) {
  235                 q = HEAP_TOP(neh)->object;
  236                 alg_fq = (struct wf2qp_queue *)q;
  237                 heap_extract(neh, NULL);
  238                 heap_insert(sch, alg_fq->F, q);
  239         }
  240         if (m) /* pkt found in previous iteration */
  241                 break;
  242         /* ok we have at least one eligible pkt */
  243         q = HEAP_TOP(sch)->object;
  244         alg_fq = (struct wf2qp_queue *)q;
  245         m = dn_dequeue(q);
  246         if (m == NULL)
  247                 return NULL;
  248         heap_extract(sch, NULL); /* Remove queue from heap. */
  249         si->V += (uint64_t)(m->m_pkthdr.len) * si->inv_wsum;
  250         alg_fq->S = alg_fq->F;  /* Update start time. */
  251         if (q->mq.head == 0) {  /* not backlogged any more. */
  252                 heap_insert(&si->idle_heap, alg_fq->F, q);
  253         } else {                        /* Still backlogged. */
  254                 /* Update F, store in neh or sch */
  255                 uint64_t len = q->mq.head->m_pkthdr.len;
  256                 alg_fq->F += len * alg_fq->inv_w;
  257                 if (DN_KEY_LEQ(alg_fq->S, si->V)) {
  258                         heap_insert(sch, alg_fq->F, q);
  259                 } else {
  260                         heap_insert(neh, alg_fq->S, q);
  261                 }
  262         }
  263     }
  264         return m;
  265 }
  266 
  267 static int
  268 wf2qp_new_sched(struct dn_sch_inst *_si)
  269 {
  270         struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
  271         int ofs = offsetof(struct wf2qp_queue, heap_pos);
  272 
  273         /* all heaps support extract from middle */
  274         if (heap_init(&si->idle_heap, 16, ofs) ||
  275             heap_init(&si->sch_heap, 16, ofs) ||
  276             heap_init(&si->ne_heap, 16, ofs)) {
  277                 heap_free(&si->ne_heap);
  278                 heap_free(&si->sch_heap);
  279                 heap_free(&si->idle_heap);
  280                 return ENOMEM;
  281         }
  282         return 0;
  283 }
  284 
  285 static int
  286 wf2qp_free_sched(struct dn_sch_inst *_si)
  287 {
  288         struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1);
  289 
  290         heap_free(&si->sch_heap);
  291         heap_free(&si->ne_heap);
  292         heap_free(&si->idle_heap);
  293 
  294         return 0;
  295 }
  296 
  297 static int
  298 wf2qp_new_fsk(struct dn_fsk *fs)
  299 {
  300         ipdn_bound_var(&fs->fs.par[0], 1,
  301                 1, 100, "WF2Q+ weight");
  302         return 0;
  303 }
  304 
  305 static int
  306 wf2qp_new_queue(struct dn_queue *_q)
  307 {
  308         struct wf2qp_queue *q = (struct wf2qp_queue *)_q;
  309 
  310         _q->ni.oid.subtype = DN_SCHED_WF2QP;
  311         q->F = 0;       /* not strictly necessary */
  312         q->S = q->F + 1;    /* mark timestamp as invalid. */
  313         q->inv_w = ONE_FP / _q->fs->fs.par[0];
  314         if (_q->mq.head != NULL) {
  315                 wf2qp_enqueue(_q->_si, _q, _q->mq.head);
  316         }
  317         return 0;
  318 }
  319 
  320 /*
  321  * Called when the infrastructure removes a queue (e.g. flowset
  322  * is reconfigured). Nothing to do if we did not 'own' the queue,
  323  * otherwise remove it from the right heap and adjust the sum
  324  * of weights.
  325  */
  326 static int
  327 wf2qp_free_queue(struct dn_queue *q)
  328 {
  329         struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q;
  330         struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1);
  331 
  332         if (alg_fq->S >= alg_fq->F + 1)
  333                 return 0;       /* nothing to do, not in any heap */
  334         si->wsum -= q->fs->fs.par[0];
  335         if (si->wsum > 0)
  336                 si->inv_wsum = ONE_FP/si->wsum;
  337 
  338         /* extract from the heap. XXX TODO we may need to adjust V
  339          * to make sure the invariants hold.
  340          */
  341         if (q->mq.head == NULL) {
  342                 heap_extract(&si->idle_heap, q);
  343         } else if (DN_KEY_LT(si->V, alg_fq->S)) {
  344                 heap_extract(&si->ne_heap, q);
  345         } else {
  346                 heap_extract(&si->sch_heap, q);
  347         }
  348         return 0;
  349 }
  350 
  351 /*
  352  * WF2Q+ scheduler descriptor
  353  * contains the type of the scheduler, the name, the size of the
  354  * structures and function pointers.
  355  */
  356 static struct dn_alg wf2qp_desc = {
  357         _SI( .type = ) DN_SCHED_WF2QP,
  358         _SI( .name = ) "WF2Q+",
  359         _SI( .flags = ) DN_MULTIQUEUE,
  360 
  361         /* we need extra space in the si and the queue */
  362         _SI( .schk_datalen = ) 0,
  363         _SI( .si_datalen = ) sizeof(struct wf2qp_si),
  364         _SI( .q_datalen = ) sizeof(struct wf2qp_queue) -
  365                                 sizeof(struct dn_queue),
  366 
  367         _SI( .enqueue = ) wf2qp_enqueue,
  368         _SI( .dequeue = ) wf2qp_dequeue,
  369 
  370         _SI( .config = )  NULL,
  371         _SI( .destroy = )  NULL,
  372         _SI( .new_sched = ) wf2qp_new_sched,
  373         _SI( .free_sched = ) wf2qp_free_sched,
  374 
  375         _SI( .new_fsk = ) wf2qp_new_fsk,
  376         _SI( .free_fsk = )  NULL,
  377 
  378         _SI( .new_queue = ) wf2qp_new_queue,
  379         _SI( .free_queue = ) wf2qp_free_queue,
  380 #ifdef NEW_AQM
  381         _SI( .getconfig = )  NULL,
  382 #endif
  383 
  384 };
  385 
  386 DECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc);

Cache object: 8cb23bcf06c9ca98ef59224a50a506d3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.