The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/raidframe/rf_diskqueue.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: rf_diskqueue.c,v 1.35 2004/03/23 02:34:10 oster Exp $  */
    2 /*
    3  * Copyright (c) 1995 Carnegie-Mellon University.
    4  * All rights reserved.
    5  *
    6  * Author: Mark Holland
    7  *
    8  * Permission to use, copy, modify and distribute this software and
    9  * its documentation is hereby granted, provided that both the copyright
   10  * notice and this permission notice appear in all copies of the
   11  * software, derivative works or modified versions, and any portions
   12  * thereof, and that both notices appear in supporting documentation.
   13  *
   14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   17  *
   18  * Carnegie Mellon requests users of this software to return to
   19  *
   20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   21  *  School of Computer Science
   22  *  Carnegie Mellon University
   23  *  Pittsburgh PA 15213-3890
   24  *
   25  * any improvements or extensions that they make and grant Carnegie the
   26  * rights to redistribute these changes.
   27  */
   28 
   29 /****************************************************************************
   30  *
   31  * rf_diskqueue.c -- higher-level disk queue code
   32  *
   33  * the routines here are a generic wrapper around the actual queueing
   34  * routines.  The code here implements thread scheduling, synchronization,
   35  * and locking ops (see below) on top of the lower-level queueing code.
   36  *
   37  * to support atomic RMW, we implement "locking operations".  When a
   38  * locking op is dispatched to the lower levels of the driver, the
   39  * queue is locked, and no further I/Os are dispatched until the queue
   40  * receives & completes a corresponding "unlocking operation".  This
   41  * code relies on the higher layers to guarantee that a locking op
   42  * will always be eventually followed by an unlocking op.  The model
   43  * is that the higher layers are structured so locking and unlocking
   44  * ops occur in pairs, i.e.  an unlocking op cannot be generated until
   45  * after a locking op reports completion.  There is no good way to
   46  * check to see that an unlocking op "corresponds" to the op that
   47  * currently has the queue locked, so we make no such attempt.  Since
   48  * by definition there can be only one locking op outstanding on a
   49  * disk, this should not be a problem.
   50  *
   51  * In the kernel, we allow multiple I/Os to be concurrently dispatched
   52  * to the disk driver.  In order to support locking ops in this
   53  * environment, when we decide to do a locking op, we stop dispatching
   54  * new I/Os and wait until all dispatched I/Os have completed before
   55  * dispatching the locking op.
   56  *
   57  * Unfortunately, the code is different in the 3 different operating
   58  * states (user level, kernel, simulator).  In the kernel, I/O is
   59  * non-blocking, and we have no disk threads to dispatch for us.
   60  * Therefore, we have to dispatch new I/Os to the scsi driver at the
   61  * time of enqueue, and also at the time of completion.  At user
   62  * level, I/O is blocking, and so only the disk threads may dispatch
   63  * I/Os.  Thus at user level, all we can do at enqueue time is enqueue
   64  * and wake up the disk thread to do the dispatch.
   65  *
   66  ****************************************************************************/
   67 
   68 #include <sys/cdefs.h>
   69 __KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.35 2004/03/23 02:34:10 oster Exp $");
   70 
   71 #include <dev/raidframe/raidframevar.h>
   72 
   73 #include "rf_threadstuff.h"
   74 #include "rf_raid.h"
   75 #include "rf_diskqueue.h"
   76 #include "rf_alloclist.h"
   77 #include "rf_acctrace.h"
   78 #include "rf_etimer.h"
   79 #include "rf_general.h"
   80 #include "rf_debugprint.h"
   81 #include "rf_shutdown.h"
   82 #include "rf_cvscan.h"
   83 #include "rf_sstf.h"
   84 #include "rf_fifo.h"
   85 #include "rf_kintf.h"
   86 
   87 static void rf_ShutdownDiskQueueSystem(void *);
   88 
   89 #ifndef RF_DEBUG_DISKQUEUE
   90 #define RF_DEBUG_DISKQUEUE 0
   91 #endif
   92 
   93 #if RF_DEBUG_DISKQUEUE
   94 #define Dprintf1(s,a)         if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
   95 #define Dprintf2(s,a,b)       if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
   96 #define Dprintf3(s,a,b,c)     if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
   97 #else
   98 #define Dprintf1(s,a)
   99 #define Dprintf2(s,a,b)
  100 #define Dprintf3(s,a,b,c)
  101 #endif
  102 
  103 /*****************************************************************************
  104  *
  105  * the disk queue switch defines all the functions used in the
  106  * different queueing disciplines queue ID, init routine, enqueue
  107  * routine, dequeue routine
  108  *
  109  ****************************************************************************/
  110 
  111 static const RF_DiskQueueSW_t diskqueuesw[] = {
  112         {"fifo",                /* FIFO */
  113                 rf_FifoCreate,
  114                 rf_FifoEnqueue,
  115                 rf_FifoDequeue,
  116                 rf_FifoPeek,
  117         rf_FifoPromote},
  118 
  119         {"cvscan",              /* cvscan */
  120                 rf_CvscanCreate,
  121                 rf_CvscanEnqueue,
  122                 rf_CvscanDequeue,
  123                 rf_CvscanPeek,
  124         rf_CvscanPromote},
  125 
  126         {"sstf",                /* shortest seek time first */
  127                 rf_SstfCreate,
  128                 rf_SstfEnqueue,
  129                 rf_SstfDequeue,
  130                 rf_SstfPeek,
  131         rf_SstfPromote},
  132 
  133         {"scan",                /* SCAN (two-way elevator) */
  134                 rf_ScanCreate,
  135                 rf_SstfEnqueue,
  136                 rf_ScanDequeue,
  137                 rf_ScanPeek,
  138         rf_SstfPromote},
  139 
  140         {"cscan",               /* CSCAN (one-way elevator) */
  141                 rf_CscanCreate,
  142                 rf_SstfEnqueue,
  143                 rf_CscanDequeue,
  144                 rf_CscanPeek,
  145         rf_SstfPromote},
  146 
  147 };
  148 #define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t))
  149 
  150 #define RF_MAX_FREE_DQD 256
  151 #define RF_MIN_FREE_DQD  64
  152 
  153 #include <sys/buf.h>
  154 
  155 /* configures a single disk queue */
  156 
  157 int 
  158 rf_ConfigureDiskQueue(RF_Raid_t *raidPtr, RF_DiskQueue_t *diskqueue,
  159                       RF_RowCol_t c, const RF_DiskQueueSW_t *p,
  160                       RF_SectorCount_t sectPerDisk, dev_t dev,
  161                       int maxOutstanding, RF_ShutdownList_t **listp,
  162                       RF_AllocListElem_t *clList)
  163 {
  164         diskqueue->col = c;
  165         diskqueue->qPtr = p;
  166         diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp);
  167         diskqueue->dev = dev;
  168         diskqueue->numOutstanding = 0;
  169         diskqueue->queueLength = 0;
  170         diskqueue->maxOutstanding = maxOutstanding;
  171         diskqueue->curPriority = RF_IO_NORMAL_PRIORITY;
  172         diskqueue->nextLockingOp = NULL;
  173         diskqueue->flags = 0;
  174         diskqueue->raidPtr = raidPtr;
  175         diskqueue->rf_cinfo = &raidPtr->raid_cinfo[c];
  176         rf_mutex_init(&diskqueue->mutex);
  177         diskqueue->cond = 0;
  178         return (0);
  179 }
  180 
  181 static void 
  182 rf_ShutdownDiskQueueSystem(void *ignored)
  183 {
  184         pool_destroy(&rf_pools.dqd);
  185 }
  186 
  187 int
  188 rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp)
  189 {
  190 
  191         rf_pool_init(&rf_pools.dqd, sizeof(RF_DiskQueueData_t),
  192                      "rf_dqd_pl", RF_MIN_FREE_DQD, RF_MAX_FREE_DQD);
  193         rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL);
  194 
  195         return (0);
  196 }
  197 
  198 int 
  199 rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
  200                        RF_Config_t *cfgPtr)
  201 {
  202         RF_DiskQueue_t *diskQueues, *spareQueues;
  203         const RF_DiskQueueSW_t *p;
  204         RF_RowCol_t r,c;
  205         int     rc, i;
  206 
  207         raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs;
  208 
  209         for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) {
  210                 if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) {
  211                         p = &diskqueuesw[i];
  212                         break;
  213                 }
  214         }
  215         if (p == NULL) {
  216                 RF_ERRORMSG2("Unknown queue type \"%s\".  Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType);
  217                 p = &diskqueuesw[0];
  218         }
  219         raidPtr->qType = p;
  220 
  221         RF_MallocAndAdd(diskQueues, 
  222                         (raidPtr->numCol + RF_MAXSPARE) *
  223                         sizeof(RF_DiskQueue_t), (RF_DiskQueue_t *), 
  224                         raidPtr->cleanupList);
  225         if (diskQueues == NULL)
  226                 return (ENOMEM);
  227         raidPtr->Queues = diskQueues;
  228 
  229         for (c = 0; c < raidPtr->numCol; c++) {
  230                 rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[c],
  231                                            c, p,
  232                                            raidPtr->sectorsPerDisk, 
  233                                            raidPtr->Disks[c].dev,
  234                                            cfgPtr->maxOutstandingDiskReqs, 
  235                                            listp, raidPtr->cleanupList);
  236                 if (rc)
  237                         return (rc);
  238         }
  239 
  240         spareQueues = &raidPtr->Queues[raidPtr->numCol];
  241         for (r = 0; r < raidPtr->numSpare; r++) {
  242                 rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r],
  243                                            raidPtr->numCol + r, p,
  244                                            raidPtr->sectorsPerDisk,
  245                                            raidPtr->Disks[raidPtr->numCol + r].dev,
  246                                            cfgPtr->maxOutstandingDiskReqs, listp,
  247                                            raidPtr->cleanupList);
  248                 if (rc)
  249                         return (rc);
  250         }
  251         return (0);
  252 }
  253 /* Enqueue a disk I/O
  254  *
  255  * Unfortunately, we have to do things differently in the different
  256  * environments (simulator, user-level, kernel).
  257  * At user level, all I/O is blocking, so we have 1 or more threads/disk
  258  * and the thread that enqueues is different from the thread that dequeues.
  259  * In the kernel, I/O is non-blocking and so we'd like to have multiple
  260  * I/Os outstanding on the physical disks when possible.
  261  *
  262  * when any request arrives at a queue, we have two choices:
  263  *    dispatch it to the lower levels
  264  *    queue it up
  265  *
  266  * kernel rules for when to do what:
  267  *    locking request:  queue empty => dispatch and lock queue,
  268  *                      else queue it
  269  *    unlocking req  :  always dispatch it
  270  *    normal req     :  queue empty => dispatch it & set priority
  271  *                      queue not full & priority is ok => dispatch it
  272  *                      else queue it
  273  *
  274  * user-level rules:
  275  *    always enqueue.  In the special case of an unlocking op, enqueue
  276  *    in a special way that will cause the unlocking op to be the next
  277  *    thing dequeued.
  278  *
  279  * simulator rules:
  280  *    Do the same as at user level, with the sleeps and wakeups suppressed.
  281  */
  282 void 
  283 rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri)
  284 {
  285         RF_ETIMER_START(req->qtime);
  286         RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector);
  287         req->priority = pri;
  288 
  289 #if RF_DEBUG_DISKQUEUE
  290         if (rf_queueDebug && (req->numSector == 0)) {
  291                 printf("Warning: Enqueueing zero-sector access\n");
  292         }
  293 #endif
  294         /*
  295          * kernel
  296          */
  297         RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
  298         /* locking request */
  299         if (RF_LOCKING_REQ(req)) {
  300                 if (RF_QUEUE_EMPTY(queue)) {
  301                         Dprintf2("Dispatching pri %d locking op to c %d (queue empty)\n", pri, queue->col);
  302                         RF_LOCK_QUEUE(queue);
  303                         rf_DispatchKernelIO(queue, req);
  304                 } else {
  305                         queue->queueLength++;   /* increment count of number
  306                                                  * of requests waiting in this
  307                                                  * queue */
  308                         Dprintf2("Enqueueing pri %d locking op to c %d (queue not empty)\n", pri, queue->col);
  309                         req->queue = (void *) queue;
  310                         (queue->qPtr->Enqueue) (queue->qHdr, req, pri);
  311                 }
  312         }
  313         /* unlocking request */
  314         else
  315                 if (RF_UNLOCKING_REQ(req)) {    /* we'll do the actual unlock
  316                                                  * when this I/O completes */
  317                         Dprintf2("Dispatching pri %d unlocking op to c %d\n", pri, queue->col);
  318                         RF_ASSERT(RF_QUEUE_LOCKED(queue));
  319                         rf_DispatchKernelIO(queue, req);
  320                 }
  321         /* normal request */
  322                 else
  323                         if (RF_OK_TO_DISPATCH(queue, req)) {
  324                                 Dprintf2("Dispatching pri %d regular op to c %d (ok to dispatch)\n", pri, queue->col);
  325                                 rf_DispatchKernelIO(queue, req);
  326                         } else {
  327                                 queue->queueLength++;   /* increment count of
  328                                                          * number of requests
  329                                                          * waiting in this queue */
  330                                 Dprintf2("Enqueueing pri %d regular op to c %d (not ok to dispatch)\n", pri, queue->col);
  331                                 req->queue = (void *) queue;
  332                                 (queue->qPtr->Enqueue) (queue->qHdr, req, pri);
  333                         }
  334         RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
  335 }
  336 
  337 
  338 /* get the next set of I/Os started, kernel version only */
  339 void 
  340 rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status)
  341 {
  342         int     done = 0;
  343 
  344         RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
  345 
  346         /* unlock the queue: (1) after an unlocking req completes (2) after a
  347          * locking req fails */
  348         if (RF_UNLOCKING_REQ(req) || (RF_LOCKING_REQ(req) && status)) {
  349                 Dprintf1("DiskIOComplete: unlocking queue at c %d\n", queue->col);
  350                 RF_ASSERT(RF_QUEUE_LOCKED(queue));
  351                 RF_UNLOCK_QUEUE(queue);
  352         }
  353         queue->numOutstanding--;
  354         RF_ASSERT(queue->numOutstanding >= 0);
  355 
  356         /* dispatch requests to the disk until we find one that we can't. */
  357         /* no reason to continue once we've filled up the queue */
  358         /* no reason to even start if the queue is locked */
  359 
  360         while (!done && !RF_QUEUE_FULL(queue) && !RF_QUEUE_LOCKED(queue)) {
  361                 if (queue->nextLockingOp) {
  362                         req = queue->nextLockingOp;
  363                         queue->nextLockingOp = NULL;
  364                         Dprintf2("DiskIOComplete: a pri %d locking req was pending at c %d\n", req->priority, queue->col);
  365                 } else {
  366                         req = (queue->qPtr->Dequeue) (queue->qHdr);
  367                         if (req != NULL) {
  368                                 Dprintf2("DiskIOComplete: extracting pri %d req from queue at c %d\n", req->priority, queue->col);
  369                         } else {
  370                                 Dprintf1("DiskIOComplete: no more requests to extract.\n", "");
  371                         }
  372                 }
  373                 if (req) {
  374                         queue->queueLength--;   /* decrement count of number
  375                                                  * of requests waiting in this
  376                                                  * queue */
  377                         RF_ASSERT(queue->queueLength >= 0);
  378                 }
  379                 if (!req)
  380                         done = 1;
  381                 else
  382                         if (RF_LOCKING_REQ(req)) {
  383                                 if (RF_QUEUE_EMPTY(queue)) {    /* dispatch it */
  384                                         Dprintf2("DiskIOComplete: dispatching pri %d locking req to c %d (queue empty)\n", req->priority, queue->col);
  385                                         RF_LOCK_QUEUE(queue);
  386                                         rf_DispatchKernelIO(queue, req);
  387                                         done = 1;
  388                                 } else {        /* put it aside to wait for
  389                                                  * the queue to drain */
  390                                         Dprintf2("DiskIOComplete: postponing pri %d locking req to c %d\n", req->priority, queue->col);
  391                                         RF_ASSERT(queue->nextLockingOp == NULL);
  392                                         queue->nextLockingOp = req;
  393                                         done = 1;
  394                                 }
  395                         } else
  396                                 if (RF_UNLOCKING_REQ(req)) {    /* should not happen:
  397                                                                  * unlocking ops should
  398                                                                  * not get queued */
  399                                         RF_ASSERT(RF_QUEUE_LOCKED(queue));      /* support it anyway for
  400                                                                                  * the future */
  401                                         Dprintf2("DiskIOComplete: dispatching pri %d unl req to c %d (SHOULD NOT SEE THIS)\n", req->priority, queue->col);
  402                                         rf_DispatchKernelIO(queue, req);
  403                                         done = 1;
  404                                 } else
  405                                         if (RF_OK_TO_DISPATCH(queue, req)) {
  406                                                 Dprintf2("DiskIOComplete: dispatching pri %d regular req to c %d (ok to dispatch)\n", req->priority, queue->col);
  407                                                 rf_DispatchKernelIO(queue, req);
  408                                         } else {        /* we can't dispatch it,
  409                                                          * so just re-enqueue
  410                                                          * it.  */
  411                                                 /* potential trouble here if
  412                                                  * disk queues batch reqs */
  413                                                 Dprintf2("DiskIOComplete: re-enqueueing pri %d regular req to c %d\n", req->priority, queue->col);
  414                                                 queue->queueLength++;
  415                                                 (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority);
  416                                                 done = 1;
  417                                         }
  418         }
  419 
  420         RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
  421 }
  422 /* promotes accesses tagged with the given parityStripeID from low priority
  423  * to normal priority.  This promotion is optional, meaning that a queue
  424  * need not implement it.  If there is no promotion routine associated with
  425  * a queue, this routine does nothing and returns -1.
  426  */
  427 int 
  428 rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID,
  429                  RF_ReconUnitNum_t which_ru)
  430 {
  431         int     retval;
  432 
  433         if (!queue->qPtr->Promote)
  434                 return (-1);
  435         RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
  436         retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru);
  437         RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
  438         return (retval);
  439 }
  440 
  441 RF_DiskQueueData_t *
  442 rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect,
  443                        RF_SectorCount_t nsect, caddr_t buf,
  444                        RF_StripeNum_t parityStripeID,
  445                        RF_ReconUnitNum_t which_ru,
  446                        int (*wakeF) (void *, int), void *arg,
  447                        RF_DiskQueueData_t *next,
  448                        RF_AccTraceEntry_t *tracerec, void *raidPtr,
  449                        RF_DiskQueueDataFlags_t flags, void *kb_proc)
  450 {
  451         RF_DiskQueueData_t *p;
  452         int s;
  453 
  454         p = pool_get(&rf_pools.dqd, PR_WAITOK);
  455         memset(p, 0, sizeof(RF_DiskQueueData_t));
  456         /* Need to be at splbio to access bufpool! */
  457         s = splbio();
  458         p->bp = pool_get(&bufpool, PR_NOWAIT); /* XXX: make up our minds here.
  459                                                   WAITOK, or NOWAIT?? */
  460         splx(s);
  461         if (p->bp == NULL) {
  462                 /* no memory for the buffer!?!? */
  463                 pool_put(&rf_pools.dqd, p);
  464                 return(NULL);
  465         }
  466 
  467         memset(p->bp, 0, sizeof(struct buf));
  468         p->sectorOffset = ssect + rf_protectedSectors;
  469         p->numSector = nsect;
  470         p->type = typ;
  471         p->buf = buf;
  472         p->parityStripeID = parityStripeID;
  473         p->which_ru = which_ru;
  474         p->CompleteFunc = wakeF;
  475         p->argument = arg;
  476         p->next = next;
  477         p->tracerec = tracerec;
  478         p->priority = RF_IO_NORMAL_PRIORITY;
  479         p->raidPtr = raidPtr;
  480         p->flags = flags;
  481         p->b_proc = kb_proc;
  482         return (p);
  483 }
  484 
  485 void 
  486 rf_FreeDiskQueueData(RF_DiskQueueData_t *p)
  487 {
  488         pool_put(&bufpool, p->bp);
  489         pool_put(&rf_pools.dqd, p);
  490 }

Cache object: de98e8be813de63ee8abfe4126c54f80


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.