The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/bufq_priocscan.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: bufq_priocscan.c,v 1.21 2017/05/04 11:03:27 kamil Exp $        */
    2 
    3 /*-
    4  * Copyright (c)2004,2005,2006,2008,2009,2011,2012 YAMAMOTO Takashi,
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __KERNEL_RCSID(0, "$NetBSD: bufq_priocscan.c,v 1.21 2017/05/04 11:03:27 kamil Exp $");
   31 
   32 #include <sys/param.h>
   33 #include <sys/systm.h>
   34 #include <sys/buf.h>
   35 #include <sys/bufq.h>
   36 #include <sys/bufq_impl.h>
   37 #include <sys/kmem.h>
   38 #include <sys/rbtree.h>
   39 #include <sys/module.h>
   40 
   41 #undef  PRIOCSCAN_USE_GLOBAL_POSITION
   42 
   43 /*
   44  * Cyclical scan (CSCAN)
   45  */
   46 
   47 struct cscan_key {
   48         daddr_t k_rawblkno;
   49         int k_cylinder;
   50 };
   51 
   52 struct cscan_queue {
   53         rb_tree_t cq_buffers;           /* ordered list of buffers */
   54 #if !defined(PRIOCSCAN_USE_GLOBAL_POSITION)
   55         struct cscan_key cq_lastkey;    /* key of last request */
   56 #endif /* !defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
   57         int cq_sortby;                  /* BUFQ_SORT_MASK */
   58         rb_tree_ops_t cq_ops;
   59 };
   60 
   61 static signed int
   62 buf_cmp(const struct buf *b1, const struct buf *b2, int sortby)
   63 {
   64 
   65         if (buf_inorder(b2, b1, sortby)) {
   66                 return 1;       /* b1 > b2 */
   67         }
   68         if (buf_inorder(b1, b2, sortby)) {
   69                 return -1;      /* b1 < b2 */
   70         }
   71         return 0;
   72 }
   73 
   74 /* return positive if n1 > n2 */
   75 static signed int
   76 cscan_tree_compare_nodes(void *context, const void *n1, const void *n2)
   77 {
   78         const struct cscan_queue * const q = context;
   79         const struct buf * const b1 = n1;
   80         const struct buf * const b2 = n2;
   81         const int sortby = q->cq_sortby;
   82         const int diff = buf_cmp(b1, b2, sortby);
   83 
   84         /*
   85          * XXX rawblkno/cylinder might not be unique.  eg. unbuffered i/o
   86          */
   87 
   88         if (diff != 0) {
   89                 return diff;
   90         }
   91 
   92         /*
   93          * XXX rawblkno/cylinder might not be unique.  eg. unbuffered i/o
   94          */
   95         if (b1 > b2) {
   96                 return 1;
   97         }
   98         if (b1 < b2) {
   99                 return -1;
  100         }
  101         return 0;
  102 }
  103 
  104 /* return positive if n1 > k2 */
  105 static signed int
  106 cscan_tree_compare_key(void *context, const void *n1, const void *k2)
  107 {
  108         const struct cscan_queue * const q = context;
  109         const struct buf * const b1 = n1;
  110         const struct cscan_key * const key = k2;
  111         const struct buf tmp = {
  112                 .b_rawblkno = key->k_rawblkno,
  113                 .b_cylinder = key->k_cylinder,
  114         };
  115         const struct buf *b2 = &tmp;
  116         const int sortby = q->cq_sortby;
  117 
  118         return buf_cmp(b1, b2, sortby);
  119 }
  120 
  121 static void __unused
  122 cscan_dump(struct cscan_queue *cq)
  123 {
  124         const int sortby = cq->cq_sortby;
  125         struct buf *bp;
  126 
  127         RB_TREE_FOREACH(bp, &cq->cq_buffers) {
  128                 if (sortby == BUFQ_SORT_RAWBLOCK) {
  129                         printf(" %jd", (intmax_t)bp->b_rawblkno);
  130                 } else {
  131                         printf(" %jd/%jd",
  132                             (intmax_t)bp->b_cylinder, (intmax_t)bp->b_rawblkno);
  133                 }
  134         }
  135 }
  136 
  137 static inline bool
  138 cscan_empty(struct cscan_queue *q)
  139 {
  140 
  141         /* XXX this might do more work than necessary */
  142         return rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT) == NULL;
  143 }
  144 
  145 static void
  146 cscan_put(struct cscan_queue *q, struct buf *bp)
  147 {
  148         struct buf *obp __diagused;
  149 
  150         obp = rb_tree_insert_node(&q->cq_buffers, bp);
  151         KASSERT(obp == bp); /* see cscan_tree_compare_nodes */
  152 }
  153 
  154 static struct buf *
  155 cscan_get(struct cscan_queue *q, int remove, struct cscan_key *key)
  156 {
  157         struct buf *bp;
  158 
  159         bp = rb_tree_find_node_geq(&q->cq_buffers, key);
  160         KDASSERT(bp == NULL || cscan_tree_compare_key(q, bp, key) >= 0);
  161         if (bp == NULL) {
  162                 bp = rb_tree_iterate(&q->cq_buffers, NULL, RB_DIR_LEFT);
  163                 KDASSERT(cscan_tree_compare_key(q, bp, key) < 0);
  164         }
  165         if (bp != NULL && remove) {
  166 #if defined(DEBUG)
  167                 struct buf *nbp;
  168 #endif /* defined(DEBUG) */
  169 
  170                 rb_tree_remove_node(&q->cq_buffers, bp);
  171                 /*
  172                  * remember the head position.
  173                  */
  174                 key->k_cylinder = bp->b_cylinder;
  175                 key->k_rawblkno = bp->b_rawblkno + (bp->b_bcount >> DEV_BSHIFT);
  176 #if defined(DEBUG)
  177                 nbp = rb_tree_find_node_geq(&q->cq_buffers, key);
  178                 if (nbp != NULL && cscan_tree_compare_nodes(q, nbp, bp) < 0) {
  179                         panic("%s: wrong order %p < %p\n", __func__,
  180                             nbp, bp);
  181                 }
  182 #endif /* defined(DEBUG) */
  183         }
  184         return bp;
  185 }
  186 
  187 static void
  188 cscan_init(struct cscan_queue *q, int sortby)
  189 {
  190         static const rb_tree_ops_t cscan_tree_ops = {
  191                 .rbto_compare_nodes = cscan_tree_compare_nodes,
  192                 .rbto_compare_key = cscan_tree_compare_key,
  193                 .rbto_node_offset = offsetof(struct buf, b_u.u_rbnode),
  194                 .rbto_context = NULL,
  195         };
  196 
  197         q->cq_sortby = sortby;
  198         /* XXX copy ops to workaround rbtree.h API limitation */
  199         q->cq_ops = cscan_tree_ops;
  200         q->cq_ops.rbto_context = q;
  201         rb_tree_init(&q->cq_buffers, &q->cq_ops);
  202 }
  203 
  204 /*
  205  * Per-prioritiy CSCAN.
  206  *
  207  * XXX probably we should have a way to raise
  208  * priority of the on-queue requests.
  209  */
  210 #define PRIOCSCAN_NQUEUE        3
  211 
  212 struct priocscan_queue {
  213         struct cscan_queue q_queue;
  214         unsigned int q_burst;
  215 };
  216 
  217 struct bufq_priocscan {
  218         struct priocscan_queue bq_queue[PRIOCSCAN_NQUEUE];
  219 
  220 #if defined(PRIOCSCAN_USE_GLOBAL_POSITION)
  221         /*
  222          * XXX using "global" head position can reduce positioning time
  223          * when switching between queues.
  224          * although it might affect against fairness.
  225          */
  226         struct cscan_key bq_lastkey;
  227 #endif
  228 };
  229 
  230 /*
  231  * how many requests to serve when having pending requests on other queues.
  232  *
  233  * XXX tune
  234  * be careful: while making these values larger likely
  235  * increases the total throughput, it can also increase latencies
  236  * for some workloads.
  237  */
  238 const int priocscan_burst[] = {
  239         64, 16, 4
  240 };
  241 
  242 static void bufq_priocscan_init(struct bufq_state *);
  243 static void bufq_priocscan_put(struct bufq_state *, struct buf *);
  244 static struct buf *bufq_priocscan_get(struct bufq_state *, int);
  245 
  246 BUFQ_DEFINE(priocscan, 40, bufq_priocscan_init);
  247 
  248 static inline struct cscan_queue *bufq_priocscan_selectqueue(
  249     struct bufq_priocscan *, const struct buf *);
  250 
  251 static inline struct cscan_queue *
  252 bufq_priocscan_selectqueue(struct bufq_priocscan *q, const struct buf *bp)
  253 {
  254         static const int priocscan_priomap[] = {
  255                 [BPRIO_TIMENONCRITICAL] = 2,
  256                 [BPRIO_TIMELIMITED] = 1,
  257                 [BPRIO_TIMECRITICAL] = 0
  258         };
  259 
  260         return &q->bq_queue[priocscan_priomap[BIO_GETPRIO(bp)]].q_queue;
  261 }
  262 
  263 static void
  264 bufq_priocscan_put(struct bufq_state *bufq, struct buf *bp)
  265 {
  266         struct bufq_priocscan *q = bufq_private(bufq);
  267         struct cscan_queue *cq;
  268 
  269         cq = bufq_priocscan_selectqueue(q, bp);
  270         cscan_put(cq, bp);
  271 }
  272 
  273 static struct buf *
  274 bufq_priocscan_get(struct bufq_state *bufq, int remove)
  275 {
  276         struct bufq_priocscan *q = bufq_private(bufq);
  277         struct priocscan_queue *pq, *npq;
  278         struct priocscan_queue *first; /* highest priority non-empty queue */
  279         const struct priocscan_queue *epq;
  280         struct buf *bp;
  281         bool single; /* true if there's only one non-empty queue */
  282 
  283         /*
  284          * find the highest priority non-empty queue.
  285          */
  286         pq = &q->bq_queue[0];
  287         epq = pq + PRIOCSCAN_NQUEUE;
  288         for (; pq < epq; pq++) {
  289                 if (!cscan_empty(&pq->q_queue)) {
  290                         break;
  291                 }
  292         }
  293         if (pq == epq) {
  294                 /*
  295                  * all our queues are empty.  there's nothing to serve.
  296                  */
  297                 return NULL;
  298         }
  299         first = pq;
  300 
  301         /*
  302          * scan the rest of queues.
  303          *
  304          * if we have two or more non-empty queues, we serve the highest
  305          * priority one with non-zero burst count.
  306          */
  307         single = true;
  308         for (npq = pq + 1; npq < epq; npq++) {
  309                 if (!cscan_empty(&npq->q_queue)) {
  310                         /*
  311                          * we found another non-empty queue.
  312                          * it means that a queue needs to consume its burst
  313                          * count to be served.
  314                          */
  315                         single = false;
  316 
  317                         /*
  318                          * check if our current candidate queue has already
  319                          * exhausted its burst count.
  320                          */
  321                         if (pq->q_burst > 0) {
  322                                 break;
  323                         }
  324                         pq = npq;
  325                 }
  326         }
  327         if (single) {
  328                 /*
  329                  * there's only a non-empty queue.
  330                  * just serve it without consuming its burst count.
  331                  */
  332                 KASSERT(pq == first);
  333         } else {
  334                 /*
  335                  * there are two or more non-empty queues.
  336                  */
  337                 if (pq->q_burst == 0) {
  338                         /*
  339                          * no queues can be served because they have already
  340                          * exhausted their burst count.
  341                          */
  342                         unsigned int i;
  343 #ifdef DEBUG
  344                         for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
  345                                 pq = &q->bq_queue[i];
  346                                 if (!cscan_empty(&pq->q_queue) && pq->q_burst) {
  347                                         panic("%s: inconsist", __func__);
  348                                 }
  349                         }
  350 #endif /* DEBUG */
  351                         /*
  352                          * reset burst counts.
  353                          */
  354                         if (remove) {
  355                                 for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
  356                                         pq = &q->bq_queue[i];
  357                                         pq->q_burst = priocscan_burst[i];
  358                                 }
  359                         }
  360 
  361                         /*
  362                          * serve the highest priority non-empty queue.
  363                          */
  364                         pq = first;
  365                 }
  366                 /*
  367                  * consume the burst count.
  368                  *
  369                  * XXX account only by number of requests.  is it good enough?
  370                  */
  371                 if (remove) {
  372                         KASSERT(pq->q_burst > 0);
  373                         pq->q_burst--;
  374                 }
  375         }
  376 
  377         /*
  378          * finally, get a request from the selected queue.
  379          */
  380         KDASSERT(!cscan_empty(&pq->q_queue));
  381         bp = cscan_get(&pq->q_queue, remove,
  382 #if defined(PRIOCSCAN_USE_GLOBAL_POSITION)
  383             &q->bq_lastkey
  384 #else /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
  385             &pq->q_queue.cq_lastkey
  386 #endif /* defined(PRIOCSCAN_USE_GLOBAL_POSITION) */
  387             );
  388         KDASSERT(bp != NULL);
  389         KDASSERT(&pq->q_queue == bufq_priocscan_selectqueue(q, bp));
  390 
  391         return bp;
  392 }
  393 
  394 static struct buf *
  395 bufq_priocscan_cancel(struct bufq_state *bufq, struct buf *bp)
  396 {
  397         struct bufq_priocscan * const q = bufq_private(bufq);
  398         unsigned int i;
  399 
  400         for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
  401                 struct cscan_queue * const cq = &q->bq_queue[i].q_queue;
  402                 struct buf *it;
  403 
  404                 /*
  405                  * XXX probably could be faster but the cancel functionality
  406                  * is not widely used anyway.
  407                  */
  408                 RB_TREE_FOREACH(it, &cq->cq_buffers) {
  409                         if (it == bp) {
  410                                 rb_tree_remove_node(&cq->cq_buffers, bp);
  411                                 return bp;
  412                         }
  413                 }
  414         }
  415         return NULL;
  416 }
  417 
  418 static void
  419 bufq_priocscan_fini(struct bufq_state *bufq)
  420 {
  421 
  422         KASSERT(bufq->bq_private != NULL);
  423         kmem_free(bufq->bq_private, sizeof(struct bufq_priocscan));
  424 }
  425 
  426 static void
  427 bufq_priocscan_init(struct bufq_state *bufq)
  428 {
  429         struct bufq_priocscan *q;
  430         const int sortby = bufq->bq_flags & BUFQ_SORT_MASK;
  431         unsigned int i;
  432 
  433         bufq->bq_get = bufq_priocscan_get;
  434         bufq->bq_put = bufq_priocscan_put;
  435         bufq->bq_cancel = bufq_priocscan_cancel;
  436         bufq->bq_fini = bufq_priocscan_fini;
  437         bufq->bq_private = kmem_zalloc(sizeof(struct bufq_priocscan), KM_SLEEP);
  438 
  439         q = bufq->bq_private;
  440         for (i = 0; i < PRIOCSCAN_NQUEUE; i++) {
  441                 struct cscan_queue *cq = &q->bq_queue[i].q_queue;
  442 
  443                 cscan_init(cq, sortby);
  444         }
  445 }
  446 
  447 MODULE(MODULE_CLASS_BUFQ, bufq_priocscan, NULL);
  448 
  449 static int
  450 bufq_priocscan_modcmd(modcmd_t cmd, void *opaque)
  451 {
  452 
  453         switch (cmd) {
  454         case MODULE_CMD_INIT:
  455                 return bufq_register(&bufq_strat_priocscan);
  456         case MODULE_CMD_FINI:
  457                 return bufq_unregister(&bufq_strat_priocscan);
  458         default:
  459                 return ENOTTY;
  460         }
  461 }

Cache object: 8a777ae5832de37c428329d7e41043c7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.