The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/sched/gs_rr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2009-2010 Fabio Checconi
    3  * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 /*
   29  * $Id$
   30  * $FreeBSD: releng/10.0/sys/geom/sched/gs_rr.c 227309 2011-11-07 15:43:11Z ed $
   31  *
   32  * A round-robin (RR) anticipatory scheduler, with per-client queues.
   33  *
   34  * The goal of this implementation is to improve throughput compared
   35  * to the pure elevator algorithm, and insure some fairness among
   36  * clients.
   37  * 
   38  * Requests coming from the same client are put in the same queue.
   39  * We use anticipation to help reducing seeks, and each queue
   40  * is never served continuously for more than a given amount of
   41  * time or data. Queues are then served in a round-robin fashion.
   42  *
   43  * Each queue can be in any of the following states:
   44  *     READY    immediately serve the first pending request;
   45  *     BUSY     one request is under service, wait for completion;
   46  *     IDLING   do not serve incoming requests immediately, unless
   47  *              they are "eligible" as defined later.
   48  *
   49  * Scheduling is made looking at the status of all queues,
   50  * and the first one in round-robin order is privileged.
   51  */
   52 
   53 #include <sys/param.h>
   54 #include <sys/systm.h>
   55 #include <sys/kernel.h>
   56 #include <sys/bio.h>
   57 #include <sys/callout.h>
   58 #include <sys/malloc.h>
   59 #include <sys/module.h>
   60 #include <sys/proc.h>
   61 #include <sys/queue.h>
   62 #include <sys/sbuf.h>
   63 #include <sys/sysctl.h>
   64 #include "gs_scheduler.h"
   65 
   66 /* possible states of the scheduler */
   67 enum g_rr_state {
   68         G_QUEUE_READY = 0,      /* Ready to dispatch. */
   69         G_QUEUE_BUSY,           /* Waiting for a completion. */
   70         G_QUEUE_IDLING          /* Waiting for a new request. */
   71 };
   72 
   73 /* possible queue flags */
   74 enum g_rr_flags {
   75         /* G_FLAG_COMPLETED means that the field q_slice_end is valid. */
   76         G_FLAG_COMPLETED = 1,   /* Completed a req. in the current budget. */
   77 };
   78 
   79 struct g_rr_softc;
   80 
   81 /*
   82  * Queue descriptor, containing reference count, scheduling
   83  * state, a queue of pending requests, configuration parameters.
   84  * Queues with pending request(s) and not under service are also
   85  * stored in a Round Robin (RR) list.
   86  */
   87 struct g_rr_queue {
   88         struct g_rr_softc *q_sc;        /* link to the parent */
   89 
   90         enum g_rr_state q_status;
   91         unsigned int    q_service;      /* service received so far */
   92         int             q_slice_end;    /* actual slice end time, in ticks */
   93         enum g_rr_flags q_flags;        /* queue flags */
   94         struct bio_queue_head q_bioq;
   95 
   96         /* Scheduling parameters */
   97         unsigned int    q_budget;       /* slice size in bytes */
   98         unsigned int    q_slice_duration; /* slice size in ticks */
   99         unsigned int    q_wait_ticks;   /* wait time for anticipation */
  100 
  101         /* Stats to drive the various heuristics. */
  102         struct g_savg   q_thinktime;    /* Thinktime average. */
  103         struct g_savg   q_seekdist;     /* Seek distance average. */
  104 
  105         int             q_bionum;       /* Number of requests. */
  106 
  107         off_t           q_lastoff;      /* Last submitted req. offset. */
  108         int             q_lastsub;      /* Last submitted req. time. */
  109 
  110         /* Expiration deadline for an empty queue. */
  111         int             q_expire;
  112 
  113         TAILQ_ENTRY(g_rr_queue) q_tailq; /* RR list link field */
  114 };
  115 
  116 /* List types. */
  117 TAILQ_HEAD(g_rr_tailq, g_rr_queue);
  118 
  119 /* list of scheduler instances */
  120 LIST_HEAD(g_scheds, g_rr_softc);
  121 
  122 /* Default quantum for RR between queues. */
  123 #define G_RR_DEFAULT_BUDGET     0x00800000
  124 
  125 /*
  126  * Per device descriptor, holding the Round Robin list of queues
  127  * accessing the disk, a reference to the geom, and the timer.
  128  */
  129 struct g_rr_softc {
  130         struct g_geom   *sc_geom;
  131 
  132         /*
  133          * sc_active is the queue we are anticipating for.
  134          * It is set only in gs_rr_next(), and possibly cleared
  135          * only in gs_rr_next() or on a timeout.
  136          * The active queue is never in the Round Robin list
  137          * even if it has requests queued.
  138          */
  139         struct g_rr_queue *sc_active;
  140         struct callout  sc_wait;        /* timer for sc_active */
  141 
  142         struct g_rr_tailq sc_rr_tailq;  /* the round-robin list */
  143         int             sc_nqueues;     /* number of queues */
  144 
  145         /* Statistics */
  146         int             sc_in_flight;   /* requests in the driver */
  147 
  148         LIST_ENTRY(g_rr_softc)  sc_next;
  149 };
  150 
  151 /* Descriptor for bounded values, min and max are constant. */
  152 struct x_bound {                
  153         const int       x_min;
  154         int             x_cur;
  155         const int       x_max;
  156 };
  157 
  158 /*
  159  * parameters, config and stats
  160  */
  161 struct g_rr_params {
  162         int     queues;                 /* total number of queues */
  163         int     w_anticipate;           /* anticipate writes */
  164         int     bypass;                 /* bypass scheduling writes */
  165 
  166         int     units;                  /* how many instances */
  167         /* sc_head is used for debugging */
  168         struct g_scheds sc_head;        /* first scheduler instance */
  169 
  170         struct x_bound queue_depth;     /* max parallel requests */
  171         struct x_bound wait_ms;         /* wait time, milliseconds */
  172         struct x_bound quantum_ms;      /* quantum size, milliseconds */
  173         struct x_bound quantum_kb;      /* quantum size, Kb (1024 bytes) */
  174 
  175         /* statistics */
  176         int     wait_hit;               /* success in anticipation */
  177         int     wait_miss;              /* failure in anticipation */
  178 };
  179 
  180 /*
  181  * Default parameters for the scheduler.  The quantum sizes target
  182  * a 80MB/s disk; if the hw is faster or slower the minimum of the
  183  * two will have effect: the clients will still be isolated but
  184  * the fairness may be limited.  A complete solution would involve
  185  * the on-line measurement of the actual disk throughput to derive
  186  * these parameters.  Or we may just choose to ignore service domain
  187  * fairness and accept what can be achieved with time-only budgets.
  188  */
  189 static struct g_rr_params me = {
  190         .sc_head = LIST_HEAD_INITIALIZER(&me.sc_head),
  191         .w_anticipate = 1,
  192         .queue_depth =  { 1,    1,      50 },
  193         .wait_ms =      { 1,    10,     30 },
  194         .quantum_ms =   { 1,    100,    500 },
  195         .quantum_kb =   { 16,   8192,   65536 },
  196 };
  197 
  198 struct g_rr_params *gs_rr_me = &me;
  199 
  200 SYSCTL_DECL(_kern_geom_sched);
  201 static SYSCTL_NODE(_kern_geom_sched, OID_AUTO, rr, CTLFLAG_RW, 0,
  202     "GEOM_SCHED ROUND ROBIN stuff");
  203 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, units, CTLFLAG_RD,
  204     &me.units, 0, "Scheduler instances");
  205 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queues, CTLFLAG_RD,
  206     &me.queues, 0, "Total rr queues");
  207 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_ms, CTLFLAG_RW,
  208     &me.wait_ms.x_cur, 0, "Wait time milliseconds");
  209 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_ms, CTLFLAG_RW,
  210     &me.quantum_ms.x_cur, 0, "Quantum size milliseconds");
  211 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, bypass, CTLFLAG_RW,
  212     &me.bypass, 0, "Bypass scheduler");
  213 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, w_anticipate, CTLFLAG_RW,
  214     &me.w_anticipate, 0, "Do anticipation on writes");
  215 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_kb, CTLFLAG_RW,
  216     &me.quantum_kb.x_cur, 0, "Quantum size Kbytes");
  217 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queue_depth, CTLFLAG_RW,
  218     &me.queue_depth.x_cur, 0, "Maximum simultaneous requests");
  219 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_hit, CTLFLAG_RW,
  220     &me.wait_hit, 0, "Hits in anticipation");
  221 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_miss, CTLFLAG_RW,
  222     &me.wait_miss, 0, "Misses in anticipation");
  223 
  224 #ifdef DEBUG_QUEUES
  225 /* print the status of a queue */
  226 static void
  227 gs_rr_dump_q(struct g_rr_queue *qp, int index)
  228 {
  229         int l = 0;
  230         struct bio *bp;
  231 
  232         TAILQ_FOREACH(bp, &(qp->q_bioq.queue), bio_queue) {
  233                 l++;
  234         }
  235         printf("--- rr queue %d %p status %d len %d ---\n",
  236             index, qp, qp->q_status, l);
  237 }
  238 
  239 /*
  240  * Dump the scheduler status when writing to this sysctl variable.
  241  * XXX right now we only dump the status of the last instance created.
  242  * not a severe issue because this is only for debugging
  243  */
  244 static int
  245 gs_rr_sysctl_status(SYSCTL_HANDLER_ARGS)
  246 {
  247         int error, val = 0;
  248         struct g_rr_softc *sc;
  249 
  250         error = sysctl_handle_int(oidp, &val, 0, req);
  251         if (error || !req->newptr )
  252                 return (error);
  253 
  254         printf("called %s\n", __FUNCTION__);
  255 
  256         LIST_FOREACH(sc, &me.sc_head, sc_next) {
  257                 int i, tot = 0;
  258                 printf("--- sc %p active %p nqueues %d "
  259                     "callout %d in_flight %d ---\n",
  260                     sc, sc->sc_active, sc->sc_nqueues,
  261                     callout_active(&sc->sc_wait),
  262                     sc->sc_in_flight);
  263                 for (i = 0; i < G_RR_HASH_SIZE; i++) {
  264                         struct g_rr_queue *qp;
  265                         LIST_FOREACH(qp, &sc->sc_hash[i], q_hash) {
  266                                 gs_rr_dump_q(qp, tot);
  267                                 tot++;
  268                         }
  269                 }
  270         }
  271         return (0);
  272 }
  273 
  274 SYSCTL_PROC(_kern_geom_sched_rr, OID_AUTO, status,
  275         CTLTYPE_UINT | CTLFLAG_RW,
  276     0, sizeof(int), gs_rr_sysctl_status, "I", "status");
  277 
  278 #endif  /* DEBUG_QUEUES */
  279 
  280 /*
  281  * Get a bounded value, optionally convert to a min of t_min ticks.
  282  */
  283 static int
  284 get_bounded(struct x_bound *v, int t_min)
  285 {
  286         int x;
  287 
  288         x = v->x_cur;
  289         if (x < v->x_min)
  290                 x = v->x_min;
  291         else if (x > v->x_max)
  292                 x = v->x_max;
  293         if (t_min) {
  294                 x = x * hz / 1000;      /* convert to ticks */
  295                 if (x < t_min)
  296                         x = t_min;
  297         }
  298         return x;
  299 }
  300 
  301 /*
  302  * Get a reference to the queue for bp, using the generic
  303  * classification mechanism.
  304  */
  305 static struct g_rr_queue *
  306 g_rr_queue_get(struct g_rr_softc *sc, struct bio *bp)
  307 {
  308 
  309         return (g_sched_get_class(sc->sc_geom, bp));
  310 }
  311 
  312 static int
  313 g_rr_init_class(void *data, void *priv)
  314 {
  315         struct g_rr_softc *sc = data;
  316         struct g_rr_queue *qp = priv;
  317 
  318         gs_bioq_init(&qp->q_bioq);
  319 
  320         /*
  321          * Set the initial parameters for the client:
  322          * slice size in bytes and ticks, and wait ticks.
  323          * Right now these are constant, but we could have
  324          * autoconfiguration code to adjust the values based on
  325          * the actual workload.
  326          */
  327         qp->q_budget = 1024 * get_bounded(&me.quantum_kb, 0);
  328         qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
  329         qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
  330 
  331         qp->q_sc = sc;          /* link to the parent */
  332         qp->q_sc->sc_nqueues++;
  333         me.queues++;
  334 
  335         return (0);
  336 }
  337 
  338 /*
  339  * Release a reference to the queue.
  340  */
  341 static void
  342 g_rr_queue_put(struct g_rr_queue *qp)
  343 {
  344 
  345         g_sched_put_class(qp->q_sc->sc_geom, qp);
  346 }
  347 
  348 static void
  349 g_rr_fini_class(void *data, void *priv)
  350 {
  351         struct g_rr_queue *qp = priv;
  352 
  353         KASSERT(gs_bioq_first(&qp->q_bioq) == NULL,
  354                         ("released nonempty queue"));
  355         qp->q_sc->sc_nqueues--;
  356         me.queues--;
  357 }
  358 
  359 static inline int
  360 g_rr_queue_expired(struct g_rr_queue *qp)
  361 {
  362 
  363         if (qp->q_service >= qp->q_budget)
  364                 return (1);
  365 
  366         if ((qp->q_flags & G_FLAG_COMPLETED) &&
  367             ticks - qp->q_slice_end >= 0)
  368                 return (1);
  369 
  370         return (0);
  371 }
  372 
  373 static inline int
  374 g_rr_should_anticipate(struct g_rr_queue *qp, struct bio *bp)
  375 {
  376         int wait = get_bounded(&me.wait_ms, 2);
  377 
  378         if (!me.w_anticipate && (bp->bio_cmd & BIO_WRITE))
  379                 return (0);
  380 
  381         if (g_savg_valid(&qp->q_thinktime) &&
  382             g_savg_read(&qp->q_thinktime) > wait)
  383                 return (0);
  384 
  385         if (g_savg_valid(&qp->q_seekdist) &&
  386             g_savg_read(&qp->q_seekdist) > 8192)
  387                 return (0);
  388 
  389         return (1);
  390 }
  391 
  392 /*
  393  * Called on a request arrival, timeout or completion.
  394  * Try to serve a request among those queued.
  395  */
  396 static struct bio *
  397 g_rr_next(void *data, int force)
  398 {
  399         struct g_rr_softc *sc = data;
  400         struct g_rr_queue *qp;
  401         struct bio *bp, *next;
  402         int expired;
  403 
  404         qp = sc->sc_active;
  405         if (me.bypass == 0 && !force) {
  406                 if (sc->sc_in_flight >= get_bounded(&me.queue_depth, 0))
  407                         return (NULL);
  408 
  409                 /* Try with the queue under service first. */
  410                 if (qp != NULL && qp->q_status != G_QUEUE_READY) {
  411                         /*
  412                          * Queue is anticipating, ignore request.
  413                          * We should check that we are not past
  414                          * the timeout, but in that case the timeout
  415                          * will fire immediately afterwards so we
  416                          * don't bother.
  417                          */
  418                         return (NULL);
  419                 }
  420         } else if (qp != NULL && qp->q_status != G_QUEUE_READY) {
  421                 g_rr_queue_put(qp);
  422                 sc->sc_active = qp = NULL;
  423         }
  424 
  425         /*
  426          * No queue under service, look for the first in RR order.
  427          * If we find it, select if as sc_active, clear service
  428          * and record the end time of the slice.
  429          */
  430         if (qp == NULL) {
  431                 qp = TAILQ_FIRST(&sc->sc_rr_tailq);
  432                 if (qp == NULL)
  433                         return (NULL); /* no queues at all, return */
  434                 /* otherwise select the new queue for service. */
  435                 TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq);
  436                 sc->sc_active = qp;
  437                 qp->q_service = 0;
  438                 qp->q_flags &= ~G_FLAG_COMPLETED;
  439         }
  440 
  441         bp = gs_bioq_takefirst(&qp->q_bioq);    /* surely not NULL */
  442         qp->q_service += bp->bio_length;        /* charge the service */
  443 
  444         /*
  445          * The request at the head of the active queue is always
  446          * dispatched, and gs_rr_next() will be called again
  447          * immediately.
  448          * We need to prepare for what to do next:
  449          *
  450          * 1. have we reached the end of the (time or service) slice ?
  451          *    If so, clear sc_active and possibly requeue the previous
  452          *    active queue if it has more requests pending;
  453          * 2. do we have more requests in sc_active ?
  454          *    If yes, do not anticipate, as gs_rr_next() will run again;
  455          *    if no, decide whether or not to anticipate depending
  456          *    on read or writes (e.g., anticipate only on reads).
  457          */
  458         expired = g_rr_queue_expired(qp);       /* are we expired ? */
  459         next = gs_bioq_first(&qp->q_bioq);      /* do we have one more ? */
  460         if (expired) {
  461                 sc->sc_active = NULL;
  462                 /* Either requeue or release reference. */
  463                 if (next != NULL)
  464                         TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
  465                 else
  466                         g_rr_queue_put(qp);
  467         } else if (next != NULL) {
  468                 qp->q_status = G_QUEUE_READY;
  469         } else {
  470                 if (!force && g_rr_should_anticipate(qp, bp)) {
  471                         /* anticipate */
  472                         qp->q_status = G_QUEUE_BUSY;
  473                 } else {
  474                         /* do not anticipate, release reference */
  475                         g_rr_queue_put(qp);
  476                         sc->sc_active = NULL;
  477                 }
  478         }
  479         /* If sc_active != NULL, its q_status is always correct. */
  480 
  481         sc->sc_in_flight++;
  482 
  483         return (bp);
  484 }
  485 
  486 static inline void
  487 g_rr_update_thinktime(struct g_rr_queue *qp)
  488 {
  489         int delta = ticks - qp->q_lastsub, wait = get_bounded(&me.wait_ms, 2);
  490 
  491         if (qp->q_sc->sc_active != qp)
  492                 return;
  493 
  494         qp->q_lastsub = ticks;
  495         delta = (delta > 2 * wait) ? 2 * wait : delta;
  496         if (qp->q_bionum > 7)
  497                 g_savg_add_sample(&qp->q_thinktime, delta);
  498 }
  499 
  500 static inline void
  501 g_rr_update_seekdist(struct g_rr_queue *qp, struct bio *bp)
  502 {
  503         off_t dist;
  504 
  505         if (qp->q_lastoff > bp->bio_offset)
  506                 dist = qp->q_lastoff - bp->bio_offset;
  507         else
  508                 dist = bp->bio_offset - qp->q_lastoff;
  509 
  510         if (dist > (8192 * 8))
  511                 dist = 8192 * 8;
  512 
  513         qp->q_lastoff = bp->bio_offset + bp->bio_length;
  514 
  515         if (qp->q_bionum > 7)
  516                 g_savg_add_sample(&qp->q_seekdist, dist);
  517 }
  518 
  519 /*
  520  * Called when a real request for disk I/O arrives.
  521  * Locate the queue associated with the client.
  522  * If the queue is the one we are anticipating for, reset its timeout;
  523  * if the queue is not in the round robin list, insert it in the list.
  524  * On any error, do not queue the request and return -1, the caller
  525  * will take care of this request.
  526  */
  527 static int
  528 g_rr_start(void *data, struct bio *bp)
  529 {
  530         struct g_rr_softc *sc = data;
  531         struct g_rr_queue *qp;
  532 
  533         if (me.bypass)
  534                 return (-1);    /* bypass the scheduler */
  535 
  536         /* Get the queue for the request. */
  537         qp = g_rr_queue_get(sc, bp);
  538         if (qp == NULL)
  539                 return (-1); /* allocation failed, tell upstream */
  540 
  541         if (gs_bioq_first(&qp->q_bioq) == NULL) {
  542                 /*
  543                  * We are inserting into an empty queue.
  544                  * Reset its state if it is sc_active,
  545                  * otherwise insert it in the RR list.
  546                  */
  547                 if (qp == sc->sc_active) {
  548                         qp->q_status = G_QUEUE_READY;
  549                         callout_stop(&sc->sc_wait);
  550                 } else {
  551                         g_sched_priv_ref(qp);
  552                         TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
  553                 }
  554         }
  555 
  556         qp->q_bionum = 1 + qp->q_bionum - (qp->q_bionum >> 3);
  557 
  558         g_rr_update_thinktime(qp);
  559         g_rr_update_seekdist(qp, bp);
  560 
  561         /* Inherit the reference returned by g_rr_queue_get(). */
  562         bp->bio_caller1 = qp;
  563         gs_bioq_disksort(&qp->q_bioq, bp);
  564 
  565         return (0);
  566 }
  567 
  568 /*
  569  * Callout executed when a queue times out anticipating a new request.
  570  */
  571 static void
  572 g_rr_wait_timeout(void *data)
  573 {
  574         struct g_rr_softc *sc = data;
  575         struct g_geom *geom = sc->sc_geom;
  576 
  577         g_sched_lock(geom);
  578         /*
  579          * We can race with other events, so check if
  580          * sc_active is still valid.
  581          */
  582         if (sc->sc_active != NULL) {
  583                 /* Release the reference to the queue. */
  584                 g_rr_queue_put(sc->sc_active);
  585                 sc->sc_active = NULL;
  586                 me.wait_hit--;
  587                 me.wait_miss++; /* record the miss */
  588         }
  589         g_sched_dispatch(geom);
  590         g_sched_unlock(geom);
  591 }
  592 
  593 /*
  594  * Module glue: allocate descriptor, initialize its fields.
  595  */
  596 static void *
  597 g_rr_init(struct g_geom *geom)
  598 {
  599         struct g_rr_softc *sc;
  600 
  601         /* XXX check whether we can sleep */
  602         sc = malloc(sizeof *sc, M_GEOM_SCHED, M_NOWAIT | M_ZERO);
  603         sc->sc_geom = geom;
  604         TAILQ_INIT(&sc->sc_rr_tailq);
  605         callout_init(&sc->sc_wait, CALLOUT_MPSAFE);
  606         LIST_INSERT_HEAD(&me.sc_head, sc, sc_next);
  607         me.units++;
  608 
  609         return (sc);
  610 }
  611 
  612 /*
  613  * Module glue -- drain the callout structure, destroy the
  614  * hash table and its element, and free the descriptor.
  615  */
  616 static void
  617 g_rr_fini(void *data)
  618 {
  619         struct g_rr_softc *sc = data;
  620 
  621         callout_drain(&sc->sc_wait);
  622         KASSERT(sc->sc_active == NULL, ("still a queue under service"));
  623         KASSERT(TAILQ_EMPTY(&sc->sc_rr_tailq), ("still scheduled queues"));
  624 
  625         LIST_REMOVE(sc, sc_next);
  626         me.units--;
  627         free(sc, M_GEOM_SCHED);
  628 }
  629 
  630 /*
  631  * Called when the request under service terminates.
  632  * Start the anticipation timer if needed.
  633  */
  634 static void
  635 g_rr_done(void *data, struct bio *bp)
  636 {
  637         struct g_rr_softc *sc = data;
  638         struct g_rr_queue *qp;
  639 
  640         sc->sc_in_flight--;
  641 
  642         qp = bp->bio_caller1;
  643 
  644         /*
  645          * When the first request for this queue completes, update the
  646          * duration and end of the slice. We do not do it when the
  647          * slice starts to avoid charging to the queue the time for
  648          * the first seek.
  649          */
  650         if (!(qp->q_flags & G_FLAG_COMPLETED)) {
  651                 qp->q_flags |= G_FLAG_COMPLETED;
  652                 /*
  653                  * recompute the slice duration, in case we want
  654                  * to make it adaptive. This is not used right now.
  655                  * XXX should we do the same for q_quantum and q_wait_ticks ?
  656                  */
  657                 qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
  658                 qp->q_slice_end = ticks + qp->q_slice_duration;
  659         }
  660 
  661         if (qp == sc->sc_active && qp->q_status == G_QUEUE_BUSY) {
  662                 /* The queue is trying anticipation, start the timer. */
  663                 qp->q_status = G_QUEUE_IDLING;
  664                 /* may make this adaptive */
  665                 qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
  666                 me.wait_hit++;
  667                 callout_reset(&sc->sc_wait, qp->q_wait_ticks,
  668                     g_rr_wait_timeout, sc);
  669         } else
  670                 g_sched_dispatch(sc->sc_geom);
  671 
  672         /* Release a reference to the queue. */
  673         g_rr_queue_put(qp);
  674 }
  675 
  676 static void
  677 g_rr_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
  678     struct g_consumer *cp, struct g_provider *pp)
  679 {
  680         if (indent == NULL) {   /* plaintext */
  681                 sbuf_printf(sb, " units %d queues %d",
  682                         me.units, me.queues);
  683         }
  684 }
  685 
  686 static struct g_gsched g_rr = {
  687         .gs_name = "rr",
  688         .gs_priv_size = sizeof(struct g_rr_queue),
  689         .gs_init = g_rr_init,
  690         .gs_fini = g_rr_fini,
  691         .gs_start = g_rr_start,
  692         .gs_done = g_rr_done,
  693         .gs_next = g_rr_next,
  694         .gs_dumpconf = g_rr_dumpconf,
  695         .gs_init_class = g_rr_init_class,
  696         .gs_fini_class = g_rr_fini_class,
  697 };
  698 
  699 DECLARE_GSCHED_MODULE(rr, &g_rr);

Cache object: b6ae7a433e313fa6b7fcb47d58804199


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.