The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/sched/gs_rr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2009-2010 Fabio Checconi
    3  * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 /*
   29  * $Id$
   30  * $FreeBSD: releng/8.3/sys/geom/sched/gs_rr.c 206899 2010-04-20 15:23:12Z luigi $
   31  *
   32  * A round-robin (RR) anticipatory scheduler, with per-client queues.
   33  *
   34  * The goal of this implementation is to improve throughput compared
   35  * to the pure elevator algorithm, and insure some fairness among
   36  * clients.
   37  * 
   38  * Requests coming from the same client are put in the same queue.
   39  * We use anticipation to help reducing seeks, and each queue
   40  * is never served continuously for more than a given amount of
   41  * time or data. Queues are then served in a round-robin fashion.
   42  *
   43  * Each queue can be in any of the following states:
   44  *     READY    immediately serve the first pending request;
   45  *     BUSY     one request is under service, wait for completion;
   46  *     IDLING   do not serve incoming requests immediately, unless
   47  *              they are "eligible" as defined later.
   48  *
   49  * Scheduling is made looking at the status of all queues,
   50  * and the first one in round-robin order is privileged.
   51  */
   52 
   53 #include <sys/param.h>
   54 #include <sys/systm.h>
   55 #include <sys/kernel.h>
   56 #include <sys/bio.h>
   57 #include <sys/callout.h>
   58 #include <sys/malloc.h>
   59 #include <sys/module.h>
   60 #include <sys/proc.h>
   61 #include <sys/queue.h>
   62 #include <sys/sysctl.h>
   63 #include "gs_scheduler.h"
   64 
   65 /* possible states of the scheduler */
   66 enum g_rr_state {
   67         G_QUEUE_READY = 0,      /* Ready to dispatch. */
   68         G_QUEUE_BUSY,           /* Waiting for a completion. */
   69         G_QUEUE_IDLING          /* Waiting for a new request. */
   70 };
   71 
   72 /* possible queue flags */
   73 enum g_rr_flags {
   74         G_FLAG_COMPLETED = 1,   /* Completed a req. in the current budget. */
   75 };
   76 
   77 struct g_rr_softc;
   78 
   79 /*
   80  * Queue descriptor, containing reference count, scheduling
   81  * state, a queue of pending requests, configuration parameters.
   82  * Queues with pending request(s) and not under service are also
   83  * stored in a Round Robin (RR) list.
   84  */
   85 struct g_rr_queue {
   86         struct g_rr_softc *q_sc;        /* link to the parent */
   87 
   88         enum g_rr_state q_status;
   89         unsigned int    q_service;      /* service received so far */
   90         int             q_slice_end;    /* actual slice end in ticks */
   91         enum g_rr_flags q_flags;        /* queue flags */
   92         struct bio_queue_head q_bioq;
   93 
   94         /* Scheduling parameters */
   95         unsigned int    q_budget;       /* slice size in bytes */
   96         unsigned int    q_slice_duration; /* slice size in ticks */
   97         unsigned int    q_wait_ticks;   /* wait time for anticipation */
   98 
   99         /* Stats to drive the various heuristics. */
  100         struct g_savg   q_thinktime;    /* Thinktime average. */
  101         struct g_savg   q_seekdist;     /* Seek distance average. */
  102 
  103         int             q_bionum;       /* Number of requests. */
  104 
  105         off_t           q_lastoff;      /* Last submitted req. offset. */
  106         int             q_lastsub;      /* Last submitted req. time. */
  107 
  108         /* Expiration deadline for an empty queue. */
  109         int             q_expire;
  110 
  111         TAILQ_ENTRY(g_rr_queue) q_tailq; /* RR list link field */
  112 };
  113 
  114 /* List types. */
  115 TAILQ_HEAD(g_rr_tailq, g_rr_queue);
  116 
  117 /* list of scheduler instances */
  118 LIST_HEAD(g_scheds, g_rr_softc);
  119 
  120 /* Default quantum for RR between queues. */
  121 #define G_RR_DEFAULT_BUDGET     0x00800000
  122 
  123 /*
  124  * Per device descriptor, holding the Round Robin list of queues
  125  * accessing the disk, a reference to the geom, and the timer.
  126  */
  127 struct g_rr_softc {
  128         struct g_geom   *sc_geom;
  129 
  130         /*
  131          * sc_active is the queue we are anticipating for.
  132          * It is set only in gs_rr_next(), and possibly cleared
  133          * only in gs_rr_next() or on a timeout.
  134          * The active queue is never in the Round Robin list
  135          * even if it has requests queued.
  136          */
  137         struct g_rr_queue *sc_active;
  138         struct callout  sc_wait;        /* timer for sc_active */
  139 
  140         struct g_rr_tailq sc_rr_tailq;  /* the round-robin list */
  141         int             sc_nqueues;     /* number of queues */
  142 
  143         /* Statistics */
  144         int             sc_in_flight;   /* requests in the driver */
  145 
  146         LIST_ENTRY(g_rr_softc)  sc_next;
  147 };
  148 
  149 /* Descriptor for bounded values, min and max are constant. */
  150 struct x_bound {                
  151         const int       x_min;
  152         int             x_cur;
  153         const int       x_max;
  154 };
  155 
  156 /*
  157  * parameters, config and stats
  158  */
  159 struct g_rr_params {
  160         int     queues;                 /* total number of queues */
  161         int     w_anticipate;           /* anticipate writes */
  162         int     bypass;                 /* bypass scheduling writes */
  163 
  164         int     units;                  /* how many instances */
  165         /* sc_head is used for debugging */
  166         struct g_scheds sc_head;        /* first scheduler instance */
  167 
  168         struct x_bound queue_depth;     /* max parallel requests */
  169         struct x_bound wait_ms;         /* wait time, milliseconds */
  170         struct x_bound quantum_ms;      /* quantum size, milliseconds */
  171         struct x_bound quantum_kb;      /* quantum size, Kb (1024 bytes) */
  172 
  173         /* statistics */
  174         int     wait_hit;               /* success in anticipation */
  175         int     wait_miss;              /* failure in anticipation */
  176 };
  177 
  178 /*
  179  * Default parameters for the scheduler.  The quantum sizes target
  180  * a 80MB/s disk; if the hw is faster or slower the minimum of the
  181  * two will have effect: the clients will still be isolated but
  182  * the fairness may be limited.  A complete solution would involve
  183  * the on-line measurement of the actual disk throughput to derive
  184  * these parameters.  Or we may just choose to ignore service domain
  185  * fairness and accept what can be achieved with time-only budgets.
  186  */
  187 static struct g_rr_params me = {
  188         .sc_head = LIST_HEAD_INITIALIZER(&me.sc_head),
  189         .w_anticipate = 1,
  190         .queue_depth =  { 1,    1,      50 },
  191         .wait_ms =      { 1,    10,     30 },
  192         .quantum_ms =   { 1,    100,    500 },
  193         .quantum_kb =   { 16,   8192,   65536 },
  194 };
  195 
  196 struct g_rr_params *gs_rr_me = &me;
  197 
  198 SYSCTL_DECL(_kern_geom_sched);
  199 SYSCTL_NODE(_kern_geom_sched, OID_AUTO, rr, CTLFLAG_RW, 0,
  200     "GEOM_SCHED ROUND ROBIN stuff");
  201 SYSCTL_UINT(_kern_geom_sched_rr, OID_AUTO, units, CTLFLAG_RD,
  202     &me.units, 0, "Scheduler instances");
  203 SYSCTL_UINT(_kern_geom_sched_rr, OID_AUTO, queues, CTLFLAG_RD,
  204     &me.queues, 0, "Total rr queues");
  205 SYSCTL_UINT(_kern_geom_sched_rr, OID_AUTO, wait_ms, CTLFLAG_RW,
  206     &me.wait_ms.x_cur, 0, "Wait time milliseconds");
  207 SYSCTL_UINT(_kern_geom_sched_rr, OID_AUTO, quantum_ms, CTLFLAG_RW,
  208     &me.quantum_ms.x_cur, 0, "Quantum size milliseconds");
  209 SYSCTL_UINT(_kern_geom_sched_rr, OID_AUTO, bypass, CTLFLAG_RW,
  210     &me.bypass, 0, "Bypass scheduler");
  211 SYSCTL_UINT(_kern_geom_sched_rr, OID_AUTO, w_anticipate, CTLFLAG_RW,
  212     &me.w_anticipate, 0, "Do anticipation on writes");
  213 SYSCTL_UINT(_kern_geom_sched_rr, OID_AUTO, quantum_kb, CTLFLAG_RW,
  214     &me.quantum_kb.x_cur, 0, "Quantum size Kbytes");
  215 SYSCTL_UINT(_kern_geom_sched_rr, OID_AUTO, queue_depth, CTLFLAG_RW,
  216     &me.queue_depth.x_cur, 0, "Maximum simultaneous requests");
  217 SYSCTL_UINT(_kern_geom_sched_rr, OID_AUTO, wait_hit, CTLFLAG_RW,
  218     &me.wait_hit, 0, "Hits in anticipation");
  219 SYSCTL_UINT(_kern_geom_sched_rr, OID_AUTO, wait_miss, CTLFLAG_RW,
  220     &me.wait_miss, 0, "Misses in anticipation");
  221 
  222 #ifdef DEBUG_QUEUES
  223 /* print the status of a queue */
  224 static void
  225 gs_rr_dump_q(struct g_rr_queue *qp, int index)
  226 {
  227         int l = 0;
  228         struct bio *bp;
  229 
  230         TAILQ_FOREACH(bp, &(qp->q_bioq.queue), bio_queue) {
  231                 l++;
  232         }
  233         printf("--- rr queue %d %p status %d len %d ---\n",
  234             index, qp, qp->q_status, l);
  235 }
  236 
  237 /*
  238  * Dump the scheduler status when writing to this sysctl variable.
  239  * XXX right now we only dump the status of the last instance created.
  240  * not a severe issue because this is only for debugging
  241  */
  242 static int
  243 gs_rr_sysctl_status(SYSCTL_HANDLER_ARGS)
  244 {
  245         int error, val = 0;
  246         struct g_rr_softc *sc;
  247 
  248         error = sysctl_handle_int(oidp, &val, 0, req);
  249         if (error || !req->newptr )
  250                 return (error);
  251 
  252         printf("called %s\n", __FUNCTION__);
  253 
  254         LIST_FOREACH(sc, &me.sc_head, sc_next) {
  255                 int i, tot = 0;
  256                 printf("--- sc %p active %p nqueues %d "
  257                     "callout %d in_flight %d ---\n",
  258                     sc, sc->sc_active, sc->sc_nqueues,
  259                     callout_active(&sc->sc_wait),
  260                     sc->sc_in_flight);
  261                 for (i = 0; i < G_RR_HASH_SIZE; i++) {
  262                         struct g_rr_queue *qp;
  263                         LIST_FOREACH(qp, &sc->sc_hash[i], q_hash) {
  264                                 gs_rr_dump_q(qp, tot);
  265                                 tot++;
  266                         }
  267                 }
  268         }
  269         return (0);
  270 }
  271 
  272 SYSCTL_PROC(_kern_geom_sched_rr, OID_AUTO, status,
  273         CTLTYPE_UINT | CTLFLAG_RW,
  274     0, sizeof(int), gs_rr_sysctl_status, "I", "status");
  275 
  276 #endif  /* DEBUG_QUEUES */
  277 
  278 /*
  279  * Get a bounded value, optionally convert to a min of t_min ticks.
  280  */
  281 static int
  282 get_bounded(struct x_bound *v, int t_min)
  283 {
  284         int x;
  285 
  286         x = v->x_cur;
  287         if (x < v->x_min)
  288                 x = v->x_min;
  289         else if (x > v->x_max)
  290                 x = v->x_max;
  291         if (t_min) {
  292                 x = x * hz / 1000;      /* convert to ticks */
  293                 if (x < t_min)
  294                         x = t_min;
  295         }
  296         return x;
  297 }
  298 
  299 /*
  300  * Get a reference to the queue for bp, using the generic
  301  * classification mechanism.
  302  */
  303 static struct g_rr_queue *
  304 g_rr_queue_get(struct g_rr_softc *sc, struct bio *bp)
  305 {
  306 
  307         return (g_sched_get_class(sc->sc_geom, bp));
  308 }
  309 
  310 static int
  311 g_rr_init_class(void *data, void *priv)
  312 {
  313         struct g_rr_softc *sc = data;
  314         struct g_rr_queue *qp = priv;
  315 
  316         gs_bioq_init(&qp->q_bioq);
  317 
  318         /*
  319          * Set the initial parameters for the client:
  320          * slice size in bytes and ticks, and wait ticks.
  321          * Right now these are constant, but we could have
  322          * autoconfiguration code to adjust the values based on
  323          * the actual workload.
  324          */
  325         qp->q_budget = 1024 * get_bounded(&me.quantum_kb, 0);
  326         qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
  327         qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
  328 
  329         qp->q_sc = sc;          /* link to the parent */
  330         qp->q_sc->sc_nqueues++;
  331         me.queues++;
  332 
  333         return (0);
  334 }
  335 
  336 /*
  337  * Release a reference to the queue.
  338  */
  339 static void
  340 g_rr_queue_put(struct g_rr_queue *qp)
  341 {
  342 
  343         g_sched_put_class(qp->q_sc->sc_geom, qp);
  344 }
  345 
  346 static void
  347 g_rr_fini_class(void *data, void *priv)
  348 {
  349         struct g_rr_queue *qp = priv;
  350 
  351         KASSERT(gs_bioq_first(&qp->q_bioq) == NULL,
  352                         ("released nonempty queue"));
  353         qp->q_sc->sc_nqueues--;
  354         me.queues--;
  355 }
  356 
  357 static inline int
  358 g_rr_queue_expired(struct g_rr_queue *qp)
  359 {
  360 
  361         if (qp->q_service >= qp->q_budget)
  362                 return (1);
  363 
  364         if ((qp->q_flags & G_FLAG_COMPLETED) &&
  365             ticks - qp->q_slice_end >= 0)
  366                 return (1);
  367 
  368         return (0);
  369 }
  370 
  371 static inline int
  372 g_rr_should_anticipate(struct g_rr_queue *qp, struct bio *bp)
  373 {
  374         int wait = get_bounded(&me.wait_ms, 2);
  375 
  376         if (!me.w_anticipate && (bp->bio_cmd & BIO_WRITE))
  377                 return (0);
  378 
  379         if (g_savg_valid(&qp->q_thinktime) &&
  380             g_savg_read(&qp->q_thinktime) > wait)
  381                 return (0);
  382 
  383         if (g_savg_valid(&qp->q_seekdist) &&
  384             g_savg_read(&qp->q_seekdist) > 8192)
  385                 return (0);
  386 
  387         return (1);
  388 }
  389 
  390 /*
  391  * Called on a request arrival, timeout or completion.
  392  * Try to serve a request among those queued.
  393  */
  394 static struct bio *
  395 g_rr_next(void *data, int force)
  396 {
  397         struct g_rr_softc *sc = data;
  398         struct g_rr_queue *qp;
  399         struct bio *bp, *next;
  400         int expired;
  401 
  402         qp = sc->sc_active;
  403         if (me.bypass == 0 && !force) {
  404                 if (sc->sc_in_flight >= get_bounded(&me.queue_depth, 0))
  405                         return (NULL);
  406 
  407                 /* Try with the queue under service first. */
  408                 if (qp != NULL && qp->q_status != G_QUEUE_READY) {
  409                         /*
  410                          * Queue is anticipating, ignore request.
  411                          * We should check that we are not past
  412                          * the timeout, but in that case the timeout
  413                          * will fire immediately afterwards so we
  414                          * don't bother.
  415                          */
  416                         return (NULL);
  417                 }
  418         } else if (qp != NULL && qp->q_status != G_QUEUE_READY) {
  419                 g_rr_queue_put(qp);
  420                 sc->sc_active = qp = NULL;
  421         }
  422 
  423         /*
  424          * No queue under service, look for the first in RR order.
  425          * If we find it, select if as sc_active, clear service
  426          * and record the end time of the slice.
  427          */
  428         if (qp == NULL) {
  429                 qp = TAILQ_FIRST(&sc->sc_rr_tailq);
  430                 if (qp == NULL)
  431                         return (NULL); /* no queues at all, return */
  432                 /* otherwise select the new queue for service. */
  433                 TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq);
  434                 sc->sc_active = qp;
  435                 qp->q_service = 0;
  436                 qp->q_flags &= ~G_FLAG_COMPLETED;
  437         }
  438 
  439         bp = gs_bioq_takefirst(&qp->q_bioq);    /* surely not NULL */
  440         qp->q_service += bp->bio_length;        /* charge the service */
  441 
  442         /*
  443          * The request at the head of the active queue is always
  444          * dispatched, and gs_rr_next() will be called again
  445          * immediately.
  446          * We need to prepare for what to do next:
  447          *
  448          * 1. have we reached the end of the (time or service) slice ?
  449          *    If so, clear sc_active and possibly requeue the previous
  450          *    active queue if it has more requests pending;
  451          * 2. do we have more requests in sc_active ?
  452          *    If yes, do not anticipate, as gs_rr_next() will run again;
  453          *    if no, decide whether or not to anticipate depending
  454          *    on read or writes (e.g., anticipate only on reads).
  455          */
  456         expired = g_rr_queue_expired(qp);       /* are we expired ? */
  457         next = gs_bioq_first(&qp->q_bioq);      /* do we have one more ? */
  458         if (expired) {
  459                 sc->sc_active = NULL;
  460                 /* Either requeue or release reference. */
  461                 if (next != NULL)
  462                         TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
  463                 else
  464                         g_rr_queue_put(qp);
  465         } else if (next != NULL) {
  466                 qp->q_status = G_QUEUE_READY;
  467         } else {
  468                 if (!force && g_rr_should_anticipate(qp, bp)) {
  469                         /* anticipate */
  470                         qp->q_status = G_QUEUE_BUSY;
  471                 } else {
  472                         /* do not anticipate, release reference */
  473                         g_rr_queue_put(qp);
  474                         sc->sc_active = NULL;
  475                 }
  476         }
  477         /* If sc_active != NULL, its q_status is always correct. */
  478 
  479         sc->sc_in_flight++;
  480 
  481         return (bp);
  482 }
  483 
  484 static inline void
  485 g_rr_update_thinktime(struct g_rr_queue *qp)
  486 {
  487         int delta = ticks - qp->q_lastsub, wait = get_bounded(&me.wait_ms, 2);
  488 
  489         if (qp->q_sc->sc_active != qp)
  490                 return;
  491 
  492         qp->q_lastsub = ticks;
  493         delta = (delta > 2 * wait) ? 2 * wait : delta;
  494         if (qp->q_bionum > 7)
  495                 g_savg_add_sample(&qp->q_thinktime, delta);
  496 }
  497 
  498 static inline void
  499 g_rr_update_seekdist(struct g_rr_queue *qp, struct bio *bp)
  500 {
  501         off_t dist;
  502 
  503         if (qp->q_lastoff > bp->bio_offset)
  504                 dist = qp->q_lastoff - bp->bio_offset;
  505         else
  506                 dist = bp->bio_offset - qp->q_lastoff;
  507 
  508         if (dist > (8192 * 8))
  509                 dist = 8192 * 8;
  510 
  511         qp->q_lastoff = bp->bio_offset + bp->bio_length;
  512 
  513         if (qp->q_bionum > 7)
  514                 g_savg_add_sample(&qp->q_seekdist, dist);
  515 }
  516 
  517 /*
  518  * Called when a real request for disk I/O arrives.
  519  * Locate the queue associated with the client.
  520  * If the queue is the one we are anticipating for, reset its timeout;
  521  * if the queue is not in the round robin list, insert it in the list.
  522  * On any error, do not queue the request and return -1, the caller
  523  * will take care of this request.
  524  */
  525 static int
  526 g_rr_start(void *data, struct bio *bp)
  527 {
  528         struct g_rr_softc *sc = data;
  529         struct g_rr_queue *qp;
  530 
  531         if (me.bypass)
  532                 return (-1);    /* bypass the scheduler */
  533 
  534         /* Get the queue for the request. */
  535         qp = g_rr_queue_get(sc, bp);
  536         if (qp == NULL)
  537                 return (-1); /* allocation failed, tell upstream */
  538 
  539         if (gs_bioq_first(&qp->q_bioq) == NULL) {
  540                 /*
  541                  * We are inserting into an empty queue.
  542                  * Reset its state if it is sc_active,
  543                  * otherwise insert it in the RR list.
  544                  */
  545                 if (qp == sc->sc_active) {
  546                         qp->q_status = G_QUEUE_READY;
  547                         callout_stop(&sc->sc_wait);
  548                 } else {
  549                         g_sched_priv_ref(qp);
  550                         TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
  551                 }
  552         }
  553 
  554         qp->q_bionum = 1 + qp->q_bionum - (qp->q_bionum >> 3);
  555 
  556         g_rr_update_thinktime(qp);
  557         g_rr_update_seekdist(qp, bp);
  558 
  559         /* Inherit the reference returned by g_rr_queue_get(). */
  560         bp->bio_caller1 = qp;
  561         gs_bioq_disksort(&qp->q_bioq, bp);
  562 
  563         return (0);
  564 }
  565 
  566 /*
  567  * Callout executed when a queue times out anticipating a new request.
  568  */
  569 static void
  570 g_rr_wait_timeout(void *data)
  571 {
  572         struct g_rr_softc *sc = data;
  573         struct g_geom *geom = sc->sc_geom;
  574 
  575         g_sched_lock(geom);
  576         /*
  577          * We can race with other events, so check if
  578          * sc_active is still valid.
  579          */
  580         if (sc->sc_active != NULL) {
  581                 /* Release the reference to the queue. */
  582                 g_rr_queue_put(sc->sc_active);
  583                 sc->sc_active = NULL;
  584                 me.wait_hit--;
  585                 me.wait_miss++; /* record the miss */
  586         }
  587         g_sched_dispatch(geom);
  588         g_sched_unlock(geom);
  589 }
  590 
  591 /*
  592  * Module glue: allocate descriptor, initialize its fields.
  593  */
  594 static void *
  595 g_rr_init(struct g_geom *geom)
  596 {
  597         struct g_rr_softc *sc;
  598 
  599         /* XXX check whether we can sleep */
  600         sc = malloc(sizeof *sc, M_GEOM_SCHED, M_NOWAIT | M_ZERO);
  601         sc->sc_geom = geom;
  602         TAILQ_INIT(&sc->sc_rr_tailq);
  603         callout_init(&sc->sc_wait, CALLOUT_MPSAFE);
  604         LIST_INSERT_HEAD(&me.sc_head, sc, sc_next);
  605         me.units++;
  606 
  607         return (sc);
  608 }
  609 
  610 /*
  611  * Module glue -- drain the callout structure, destroy the
  612  * hash table and its element, and free the descriptor.
  613  */
  614 static void
  615 g_rr_fini(void *data)
  616 {
  617         struct g_rr_softc *sc = data;
  618 
  619         callout_drain(&sc->sc_wait);
  620         KASSERT(sc->sc_active == NULL, ("still a queue under service"));
  621         KASSERT(TAILQ_EMPTY(&sc->sc_rr_tailq), ("still scheduled queues"));
  622 
  623         LIST_REMOVE(sc, sc_next);
  624         me.units--;
  625         free(sc, M_GEOM_SCHED);
  626 }
  627 
  628 /*
  629  * Called when the request under service terminates.
  630  * Start the anticipation timer if needed.
  631  */
  632 static void
  633 g_rr_done(void *data, struct bio *bp)
  634 {
  635         struct g_rr_softc *sc = data;
  636         struct g_rr_queue *qp;
  637 
  638         sc->sc_in_flight--;
  639 
  640         qp = bp->bio_caller1;
  641         if (qp == sc->sc_active && qp->q_status == G_QUEUE_BUSY) {
  642                 if (!(qp->q_flags & G_FLAG_COMPLETED)) {
  643                         qp->q_flags |= G_FLAG_COMPLETED;
  644                         /* in case we want to make the slice adaptive */
  645                         qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
  646                         qp->q_slice_end = ticks + qp->q_slice_duration;
  647                 }
  648 
  649                 /* The queue is trying anticipation, start the timer. */
  650                 qp->q_status = G_QUEUE_IDLING;
  651                 /* may make this adaptive */
  652                 qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
  653                 me.wait_hit++;
  654                 callout_reset(&sc->sc_wait, qp->q_wait_ticks,
  655                     g_rr_wait_timeout, sc);
  656         } else
  657                 g_sched_dispatch(sc->sc_geom);
  658 
  659         /* Release a reference to the queue. */
  660         g_rr_queue_put(qp);
  661 }
  662 
  663 static void
  664 g_rr_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
  665     struct g_consumer *cp, struct g_provider *pp)
  666 {
  667         if (indent == NULL) {   /* plaintext */
  668                 sbuf_printf(sb, " units %d queues %d",
  669                         me.units, me.queues);
  670         }
  671 }
  672 
  673 static struct g_gsched g_rr = {
  674         .gs_name = "rr",
  675         .gs_priv_size = sizeof(struct g_rr_queue),
  676         .gs_init = g_rr_init,
  677         .gs_fini = g_rr_fini,
  678         .gs_start = g_rr_start,
  679         .gs_done = g_rr_done,
  680         .gs_next = g_rr_next,
  681         .gs_dumpconf = g_rr_dumpconf,
  682         .gs_init_class = g_rr_init_class,
  683         .gs_fini_class = g_rr_fini_class,
  684 };
  685 
  686 DECLARE_GSCHED_MODULE(rr, &g_rr);

Cache object: 0e0564f082499d88476e9a403ccafb30


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.