The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/raidframe/rf_paritylog.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: rf_paritylog.c,v 1.9 2002/09/14 17:53:58 oster Exp $   */
    2 /*
    3  * Copyright (c) 1995 Carnegie-Mellon University.
    4  * All rights reserved.
    5  *
    6  * Author: William V. Courtright II
    7  *
    8  * Permission to use, copy, modify and distribute this software and
    9  * its documentation is hereby granted, provided that both the copyright
   10  * notice and this permission notice appear in all copies of the
   11  * software, derivative works or modified versions, and any portions
   12  * thereof, and that both notices appear in supporting documentation.
   13  *
   14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   17  *
   18  * Carnegie Mellon requests users of this software to return to
   19  *
   20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   21  *  School of Computer Science
   22  *  Carnegie Mellon University
   23  *  Pittsburgh PA 15213-3890
   24  *
   25  * any improvements or extensions that they make and grant Carnegie the
   26  * rights to redistribute these changes.
   27  */
   28 
   29 /* Code for manipulating in-core parity logs
   30  *
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __KERNEL_RCSID(0, "$NetBSD: rf_paritylog.c,v 1.9 2002/09/14 17:53:58 oster Exp $");
   35 
   36 #include "rf_archs.h"
   37 
   38 #if RF_INCLUDE_PARITYLOGGING > 0
   39 
   40 /*
   41  * Append-only log for recording parity "update" and "overwrite" records
   42  */
   43 
   44 #include <dev/raidframe/raidframevar.h>
   45 
   46 #include "rf_threadstuff.h"
   47 #include "rf_mcpair.h"
   48 #include "rf_raid.h"
   49 #include "rf_dag.h"
   50 #include "rf_dagfuncs.h"
   51 #include "rf_desc.h"
   52 #include "rf_layout.h"
   53 #include "rf_diskqueue.h"
   54 #include "rf_etimer.h"
   55 #include "rf_paritylog.h"
   56 #include "rf_general.h"
   57 #include "rf_map.h"
   58 #include "rf_paritylogging.h"
   59 #include "rf_paritylogDiskMgr.h"
   60 
   61 static RF_CommonLogData_t *
   62 AllocParityLogCommonData(RF_Raid_t * raidPtr)
   63 {
   64         RF_CommonLogData_t *common = NULL;
   65         int     rc;
   66 
   67         /* Return a struct for holding common parity log information from the
   68          * free list (rf_parityLogDiskQueue.freeCommonList).  If the free list
   69          * is empty, call RF_Malloc to create a new structure. NON-BLOCKING */
   70 
   71         RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
   72         if (raidPtr->parityLogDiskQueue.freeCommonList) {
   73                 common = raidPtr->parityLogDiskQueue.freeCommonList;
   74                 raidPtr->parityLogDiskQueue.freeCommonList = raidPtr->parityLogDiskQueue.freeCommonList->next;
   75                 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
   76         } else {
   77                 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
   78                 RF_Malloc(common, sizeof(RF_CommonLogData_t), (RF_CommonLogData_t *));
   79                 rc = rf_mutex_init(&common->mutex);
   80                 if (rc) {
   81                         rf_print_unable_to_init_mutex(__FILE__, __LINE__, rc);
   82                         RF_Free(common, sizeof(RF_CommonLogData_t));
   83                         common = NULL;
   84                 }
   85         }
   86         common->next = NULL;
   87         return (common);
   88 }
   89 
   90 static void 
   91 FreeParityLogCommonData(RF_CommonLogData_t * common)
   92 {
   93         RF_Raid_t *raidPtr;
   94 
   95         /* Insert a single struct for holding parity log information (data)
   96          * into the free list (rf_parityLogDiskQueue.freeCommonList).
   97          * NON-BLOCKING */
   98 
   99         raidPtr = common->raidPtr;
  100         RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  101         common->next = raidPtr->parityLogDiskQueue.freeCommonList;
  102         raidPtr->parityLogDiskQueue.freeCommonList = common;
  103         RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  104 }
  105 
  106 static RF_ParityLogData_t *
  107 AllocParityLogData(RF_Raid_t * raidPtr)
  108 {
  109         RF_ParityLogData_t *data = NULL;
  110 
  111         /* Return a struct for holding parity log information from the free
  112          * list (rf_parityLogDiskQueue.freeList).  If the free list is empty,
  113          * call RF_Malloc to create a new structure. NON-BLOCKING */
  114 
  115         RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  116         if (raidPtr->parityLogDiskQueue.freeDataList) {
  117                 data = raidPtr->parityLogDiskQueue.freeDataList;
  118                 raidPtr->parityLogDiskQueue.freeDataList = raidPtr->parityLogDiskQueue.freeDataList->next;
  119                 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  120         } else {
  121                 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  122                 RF_Malloc(data, sizeof(RF_ParityLogData_t), (RF_ParityLogData_t *));
  123         }
  124         data->next = NULL;
  125         data->prev = NULL;
  126         return (data);
  127 }
  128 
  129 
  130 static void 
  131 FreeParityLogData(RF_ParityLogData_t * data)
  132 {
  133         RF_ParityLogData_t *nextItem;
  134         RF_Raid_t *raidPtr;
  135 
  136         /* Insert a linked list of structs for holding parity log information
  137          * (data) into the free list (parityLogDiskQueue.freeList).
  138          * NON-BLOCKING */
  139 
  140         raidPtr = data->common->raidPtr;
  141         RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  142         while (data) {
  143                 nextItem = data->next;
  144                 data->next = raidPtr->parityLogDiskQueue.freeDataList;
  145                 raidPtr->parityLogDiskQueue.freeDataList = data;
  146                 data = nextItem;
  147         }
  148         RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  149 }
  150 
  151 
  152 static void 
  153 EnqueueParityLogData(
  154     RF_ParityLogData_t * data,
  155     RF_ParityLogData_t ** head,
  156     RF_ParityLogData_t ** tail)
  157 {
  158         RF_Raid_t *raidPtr;
  159 
  160         /* Insert an in-core parity log (*data) into the head of a disk queue
  161          * (*head, *tail). NON-BLOCKING */
  162 
  163         raidPtr = data->common->raidPtr;
  164         if (rf_parityLogDebug)
  165                 printf("[enqueueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
  166         RF_ASSERT(data->prev == NULL);
  167         RF_ASSERT(data->next == NULL);
  168         RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  169         if (*head) {
  170                 /* insert into head of queue */
  171                 RF_ASSERT((*head)->prev == NULL);
  172                 RF_ASSERT((*tail)->next == NULL);
  173                 data->next = *head;
  174                 (*head)->prev = data;
  175                 *head = data;
  176         } else {
  177                 /* insert into empty list */
  178                 RF_ASSERT(*head == NULL);
  179                 RF_ASSERT(*tail == NULL);
  180                 *head = data;
  181                 *tail = data;
  182         }
  183         RF_ASSERT((*head)->prev == NULL);
  184         RF_ASSERT((*tail)->next == NULL);
  185         RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  186 }
  187 
  188 static RF_ParityLogData_t *
  189 DequeueParityLogData(
  190     RF_Raid_t * raidPtr,
  191     RF_ParityLogData_t ** head,
  192     RF_ParityLogData_t ** tail,
  193     int ignoreLocks)
  194 {
  195         RF_ParityLogData_t *data;
  196 
  197         /* Remove and return an in-core parity log from the tail of a disk
  198          * queue (*head, *tail). NON-BLOCKING */
  199 
  200         /* remove from tail, preserving FIFO order */
  201         if (!ignoreLocks)
  202                 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  203         data = *tail;
  204         if (data) {
  205                 if (*head == *tail) {
  206                         /* removing last item from queue */
  207                         *head = NULL;
  208                         *tail = NULL;
  209                 } else {
  210                         *tail = (*tail)->prev;
  211                         (*tail)->next = NULL;
  212                         RF_ASSERT((*head)->prev == NULL);
  213                         RF_ASSERT((*tail)->next == NULL);
  214                 }
  215                 data->next = NULL;
  216                 data->prev = NULL;
  217                 if (rf_parityLogDebug)
  218                         printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
  219         }
  220         if (*head) {
  221                 RF_ASSERT((*head)->prev == NULL);
  222                 RF_ASSERT((*tail)->next == NULL);
  223         }
  224         if (!ignoreLocks)
  225                 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  226         return (data);
  227 }
  228 
  229 
  230 static void 
  231 RequeueParityLogData(
  232     RF_ParityLogData_t * data,
  233     RF_ParityLogData_t ** head,
  234     RF_ParityLogData_t ** tail)
  235 {
  236         RF_Raid_t *raidPtr;
  237 
  238         /* Insert an in-core parity log (*data) into the tail of a disk queue
  239          * (*head, *tail). NON-BLOCKING */
  240 
  241         raidPtr = data->common->raidPtr;
  242         RF_ASSERT(data);
  243         if (rf_parityLogDebug)
  244                 printf("[requeueing parity log data, region %d, raidAddress %d, numSector %d]\n", data->regionID, (int) data->diskAddress.raidAddress, (int) data->diskAddress.numSector);
  245         RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  246         if (*tail) {
  247                 /* append to tail of list */
  248                 data->prev = *tail;
  249                 data->next = NULL;
  250                 (*tail)->next = data;
  251                 *tail = data;
  252         } else {
  253                 /* inserting into an empty list */
  254                 *head = data;
  255                 *tail = data;
  256                 (*head)->prev = NULL;
  257                 (*tail)->next = NULL;
  258         }
  259         RF_ASSERT((*head)->prev == NULL);
  260         RF_ASSERT((*tail)->next == NULL);
  261         RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  262 }
  263 
  264 RF_ParityLogData_t *
  265 rf_CreateParityLogData(
  266     RF_ParityRecordType_t operation,
  267     RF_PhysDiskAddr_t * pda,
  268     caddr_t bufPtr,
  269     RF_Raid_t * raidPtr,
  270     int (*wakeFunc) (RF_DagNode_t * node, int status),
  271     void *wakeArg,
  272     RF_AccTraceEntry_t * tracerec,
  273     RF_Etimer_t startTime)
  274 {
  275         RF_ParityLogData_t *data, *resultHead = NULL, *resultTail = NULL;
  276         RF_CommonLogData_t *common;
  277         RF_PhysDiskAddr_t *diskAddress;
  278         int     boundary, offset = 0;
  279 
  280         /* Return an initialized struct of info to be logged. Build one item
  281          * per physical disk address, one item per region.
  282          * 
  283          * NON-BLOCKING */
  284 
  285         diskAddress = pda;
  286         common = AllocParityLogCommonData(raidPtr);
  287         RF_ASSERT(common);
  288 
  289         common->operation = operation;
  290         common->bufPtr = bufPtr;
  291         common->raidPtr = raidPtr;
  292         common->wakeFunc = wakeFunc;
  293         common->wakeArg = wakeArg;
  294         common->tracerec = tracerec;
  295         common->startTime = startTime;
  296         common->cnt = 0;
  297 
  298         if (rf_parityLogDebug)
  299                 printf("[entering CreateParityLogData]\n");
  300         while (diskAddress) {
  301                 common->cnt++;
  302                 data = AllocParityLogData(raidPtr);
  303                 RF_ASSERT(data);
  304                 data->common = common;
  305                 data->next = NULL;
  306                 data->prev = NULL;
  307                 data->regionID = rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector);
  308                 if (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + diskAddress->numSector - 1)) {
  309                         /* disk address does not cross a region boundary */
  310                         data->diskAddress = *diskAddress;
  311                         data->bufOffset = offset;
  312                         offset = offset + diskAddress->numSector;
  313                         EnqueueParityLogData(data, &resultHead, &resultTail);
  314                         /* adjust disk address */
  315                         diskAddress = diskAddress->next;
  316                 } else {
  317                         /* disk address crosses a region boundary */
  318                         /* find address where region is crossed */
  319                         boundary = 0;
  320                         while (data->regionID == rf_MapRegionIDParityLogging(raidPtr, diskAddress->startSector + boundary))
  321                                 boundary++;
  322 
  323                         /* enter data before the boundary */
  324                         data->diskAddress = *diskAddress;
  325                         data->diskAddress.numSector = boundary;
  326                         data->bufOffset = offset;
  327                         offset += boundary;
  328                         EnqueueParityLogData(data, &resultHead, &resultTail);
  329                         /* adjust disk address */
  330                         diskAddress->startSector += boundary;
  331                         diskAddress->numSector -= boundary;
  332                 }
  333         }
  334         if (rf_parityLogDebug)
  335                 printf("[leaving CreateParityLogData]\n");
  336         return (resultHead);
  337 }
  338 
  339 
  340 RF_ParityLogData_t *
  341 rf_SearchAndDequeueParityLogData(
  342     RF_Raid_t * raidPtr,
  343     int regionID,
  344     RF_ParityLogData_t ** head,
  345     RF_ParityLogData_t ** tail,
  346     int ignoreLocks)
  347 {
  348         RF_ParityLogData_t *w;
  349 
  350         /* Remove and return an in-core parity log from a specified region
  351          * (regionID). If a matching log is not found, return NULL.
  352          * 
  353          * NON-BLOCKING. */
  354 
  355         /* walk backward through a list, looking for an entry with a matching
  356          * region ID */
  357         if (!ignoreLocks)
  358                 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  359         w = (*tail);
  360         while (w) {
  361                 if (w->regionID == regionID) {
  362                         /* remove an element from the list */
  363                         if (w == *tail) {
  364                                 if (*head == *tail) {
  365                                         /* removing only element in the list */
  366                                         *head = NULL;
  367                                         *tail = NULL;
  368                                 } else {
  369                                         /* removing last item in the list */
  370                                         *tail = (*tail)->prev;
  371                                         (*tail)->next = NULL;
  372                                         RF_ASSERT((*head)->prev == NULL);
  373                                         RF_ASSERT((*tail)->next == NULL);
  374                                 }
  375                         } else {
  376                                 if (w == *head) {
  377                                         /* removing first item in the list */
  378                                         *head = (*head)->next;
  379                                         (*head)->prev = NULL;
  380                                         RF_ASSERT((*head)->prev == NULL);
  381                                         RF_ASSERT((*tail)->next == NULL);
  382                                 } else {
  383                                         /* removing an item from the middle of
  384                                          * the list */
  385                                         w->prev->next = w->next;
  386                                         w->next->prev = w->prev;
  387                                         RF_ASSERT((*head)->prev == NULL);
  388                                         RF_ASSERT((*tail)->next == NULL);
  389                                 }
  390                         }
  391                         w->prev = NULL;
  392                         w->next = NULL;
  393                         if (rf_parityLogDebug)
  394                                 printf("[dequeueing parity log data, region %d, raidAddress %d, numSector %d]\n", w->regionID, (int) w->diskAddress.raidAddress, (int) w->diskAddress.numSector);
  395                         return (w);
  396                 } else
  397                         w = w->prev;
  398         }
  399         if (!ignoreLocks)
  400                 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  401         return (NULL);
  402 }
  403 
  404 static RF_ParityLogData_t *
  405 DequeueMatchingLogData(
  406     RF_Raid_t * raidPtr,
  407     RF_ParityLogData_t ** head,
  408     RF_ParityLogData_t ** tail)
  409 {
  410         RF_ParityLogData_t *logDataList, *logData;
  411         int     regionID;
  412 
  413         /* Remove and return an in-core parity log from the tail of a disk
  414          * queue (*head, *tail).  Then remove all matching (identical
  415          * regionIDs) logData and return as a linked list.
  416          * 
  417          * NON-BLOCKING */
  418 
  419         logDataList = DequeueParityLogData(raidPtr, head, tail, RF_TRUE);
  420         if (logDataList) {
  421                 regionID = logDataList->regionID;
  422                 logData = logDataList;
  423                 logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
  424                 while (logData->next) {
  425                         logData = logData->next;
  426                         logData->next = rf_SearchAndDequeueParityLogData(raidPtr, regionID, head, tail, RF_TRUE);
  427                 }
  428         }
  429         return (logDataList);
  430 }
  431 
  432 
  433 static RF_ParityLog_t *
  434 AcquireParityLog(
  435     RF_ParityLogData_t * logData,
  436     int finish)
  437 {
  438         RF_ParityLog_t *log = NULL;
  439         RF_Raid_t *raidPtr;
  440 
  441         /* Grab a log buffer from the pool and return it. If no buffers are
  442          * available, return NULL. NON-BLOCKING */
  443         raidPtr = logData->common->raidPtr;
  444         RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
  445         if (raidPtr->parityLogPool.parityLogs) {
  446                 log = raidPtr->parityLogPool.parityLogs;
  447                 raidPtr->parityLogPool.parityLogs = raidPtr->parityLogPool.parityLogs->next;
  448                 log->regionID = logData->regionID;
  449                 log->numRecords = 0;
  450                 log->next = NULL;
  451                 raidPtr->logsInUse++;
  452                 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
  453         } else {
  454                 /* no logs available, so place ourselves on the queue of work
  455                  * waiting on log buffers this is done while
  456                  * parityLogPool.mutex is held, to ensure synchronization with
  457                  * ReleaseParityLogs. */
  458                 if (rf_parityLogDebug)
  459                         printf("[blocked on log, region %d, finish %d]\n", logData->regionID, finish);
  460                 if (finish)
  461                         RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
  462                 else
  463                         EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
  464         }
  465         RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
  466         return (log);
  467 }
  468 
  469 void 
  470 rf_ReleaseParityLogs(
  471     RF_Raid_t * raidPtr,
  472     RF_ParityLog_t * firstLog)
  473 {
  474         RF_ParityLogData_t *logDataList;
  475         RF_ParityLog_t *log, *lastLog;
  476         int     cnt;
  477 
  478         /* Insert a linked list of parity logs (firstLog) to the free list
  479          * (parityLogPool.parityLogPool)
  480          * 
  481          * NON-BLOCKING. */
  482 
  483         RF_ASSERT(firstLog);
  484 
  485         /* Before returning logs to global free list, service all requests
  486          * which are blocked on logs.  Holding mutexes for parityLogPool and
  487          * parityLogDiskQueue forces synchronization with AcquireParityLog(). */
  488         RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
  489         RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  490         logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
  491         log = firstLog;
  492         if (firstLog)
  493                 firstLog = firstLog->next;
  494         log->numRecords = 0;
  495         log->next = NULL;
  496         while (logDataList && log) {
  497                 RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
  498                 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  499                 rf_ParityLogAppend(logDataList, RF_TRUE, &log, RF_FALSE);
  500                 if (rf_parityLogDebug)
  501                         printf("[finishing up buf-blocked log data, region %d]\n", logDataList->regionID);
  502                 if (log == NULL) {
  503                         log = firstLog;
  504                         if (firstLog) {
  505                                 firstLog = firstLog->next;
  506                                 log->numRecords = 0;
  507                                 log->next = NULL;
  508                         }
  509                 }
  510                 RF_LOCK_MUTEX(raidPtr->parityLogPool.mutex);
  511                 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  512                 if (log)
  513                         logDataList = DequeueMatchingLogData(raidPtr, &raidPtr->parityLogDiskQueue.logBlockHead, &raidPtr->parityLogDiskQueue.logBlockTail);
  514         }
  515         /* return remaining logs to pool */
  516         if (log) {
  517                 log->next = firstLog;
  518                 firstLog = log;
  519         }
  520         if (firstLog) {
  521                 lastLog = firstLog;
  522                 raidPtr->logsInUse--;
  523                 RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
  524                 while (lastLog->next) {
  525                         lastLog = lastLog->next;
  526                         raidPtr->logsInUse--;
  527                         RF_ASSERT(raidPtr->logsInUse >= 0 && raidPtr->logsInUse <= raidPtr->numParityLogs);
  528                 }
  529                 lastLog->next = raidPtr->parityLogPool.parityLogs;
  530                 raidPtr->parityLogPool.parityLogs = firstLog;
  531                 cnt = 0;
  532                 log = raidPtr->parityLogPool.parityLogs;
  533                 while (log) {
  534                         cnt++;
  535                         log = log->next;
  536                 }
  537                 RF_ASSERT(cnt + raidPtr->logsInUse == raidPtr->numParityLogs);
  538         }
  539         RF_UNLOCK_MUTEX(raidPtr->parityLogPool.mutex);
  540         RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  541 }
  542 
  543 static void 
  544 ReintLog(
  545     RF_Raid_t * raidPtr,
  546     int regionID,
  547     RF_ParityLog_t * log)
  548 {
  549         RF_ASSERT(log);
  550 
  551         /* Insert an in-core parity log (log) into the disk queue of
  552          * reintegration work.  Set the flag (reintInProgress) for the
  553          * specified region (regionID) to indicate that reintegration is in
  554          * progress for this region. NON-BLOCKING */
  555 
  556         RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
  557         raidPtr->regionInfo[regionID].reintInProgress = RF_TRUE;        /* cleared when reint
  558                                                                          * complete */
  559 
  560         if (rf_parityLogDebug)
  561                 printf("[requesting reintegration of region %d]\n", log->regionID);
  562         /* move record to reintegration queue */
  563         RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  564         log->next = raidPtr->parityLogDiskQueue.reintQueue;
  565         raidPtr->parityLogDiskQueue.reintQueue = log;
  566         RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
  567         RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  568         RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
  569 }
  570 
  571 static void 
  572 FlushLog(
  573     RF_Raid_t * raidPtr,
  574     RF_ParityLog_t * log)
  575 {
  576         /* insert a core log (log) into a list of logs
  577          * (parityLogDiskQueue.flushQueue) waiting to be written to disk.
  578          * NON-BLOCKING */
  579 
  580         RF_ASSERT(log);
  581         RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
  582         RF_ASSERT(log->next == NULL);
  583         /* move log to flush queue */
  584         RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  585         log->next = raidPtr->parityLogDiskQueue.flushQueue;
  586         raidPtr->parityLogDiskQueue.flushQueue = log;
  587         RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  588         RF_SIGNAL_COND(raidPtr->parityLogDiskQueue.cond);
  589 }
  590 
  591 static int 
  592 DumpParityLogToDisk(
  593     int finish,
  594     RF_ParityLogData_t * logData)
  595 {
  596         int     i, diskCount, regionID = logData->regionID;
  597         RF_ParityLog_t *log;
  598         RF_Raid_t *raidPtr;
  599 
  600         raidPtr = logData->common->raidPtr;
  601 
  602         /* Move a core log to disk.  If the log disk is full, initiate
  603          * reintegration.
  604          * 
  605          * Return (0) if we can enqueue the dump immediately, otherwise return
  606          * (1) to indicate we are blocked on reintegration and control of the
  607          * thread should be relinquished.
  608          * 
  609          * Caller must hold regionInfo[regionID].mutex
  610          * 
  611          * NON-BLOCKING */
  612 
  613         if (rf_parityLogDebug)
  614                 printf("[dumping parity log to disk, region %d]\n", regionID);
  615         log = raidPtr->regionInfo[regionID].coreLog;
  616         RF_ASSERT(log->numRecords == raidPtr->numSectorsPerLog);
  617         RF_ASSERT(log->next == NULL);
  618 
  619         /* if reintegration is in progress, must queue work */
  620         RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
  621         if (raidPtr->regionInfo[regionID].reintInProgress) {
  622                 /* Can not proceed since this region is currently being
  623                  * reintegrated. We can not block, so queue remaining work and
  624                  * return */
  625                 if (rf_parityLogDebug)
  626                         printf("[region %d waiting on reintegration]\n", regionID);
  627                 /* XXX not sure about the use of finish - shouldn't this
  628                  * always be "Enqueue"? */
  629                 if (finish)
  630                         RequeueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
  631                 else
  632                         EnqueueParityLogData(logData, &raidPtr->parityLogDiskQueue.reintBlockHead, &raidPtr->parityLogDiskQueue.reintBlockTail);
  633                 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
  634                 return (1);     /* relenquish control of this thread */
  635         }
  636         RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
  637         raidPtr->regionInfo[regionID].coreLog = NULL;
  638         if ((raidPtr->regionInfo[regionID].diskCount) < raidPtr->regionInfo[regionID].capacity)
  639                 /* IMPORTANT!! this loop bound assumes region disk holds an
  640                  * integral number of core logs */
  641         {
  642                 /* update disk map for this region */
  643                 diskCount = raidPtr->regionInfo[regionID].diskCount;
  644                 for (i = 0; i < raidPtr->numSectorsPerLog; i++) {
  645                         raidPtr->regionInfo[regionID].diskMap[i + diskCount].operation = log->records[i].operation;
  646                         raidPtr->regionInfo[regionID].diskMap[i + diskCount].parityAddr = log->records[i].parityAddr;
  647                 }
  648                 log->diskOffset = diskCount;
  649                 raidPtr->regionInfo[regionID].diskCount += raidPtr->numSectorsPerLog;
  650                 FlushLog(raidPtr, log);
  651         } else {
  652                 /* no room for log on disk, send it to disk manager and
  653                  * request reintegration */
  654                 RF_ASSERT(raidPtr->regionInfo[regionID].diskCount == raidPtr->regionInfo[regionID].capacity);
  655                 ReintLog(raidPtr, regionID, log);
  656         }
  657         if (rf_parityLogDebug)
  658                 printf("[finished dumping parity log to disk, region %d]\n", regionID);
  659         return (0);
  660 }
  661 
  662 int 
  663 rf_ParityLogAppend(
  664     RF_ParityLogData_t * logData,
  665     int finish,
  666     RF_ParityLog_t ** incomingLog,
  667     int clearReintFlag)
  668 {
  669         int     regionID, logItem, itemDone;
  670         RF_ParityLogData_t *item;
  671         int     punt, done = RF_FALSE;
  672         RF_ParityLog_t *log;
  673         RF_Raid_t *raidPtr;
  674         RF_Etimer_t timer;
  675         int     (*wakeFunc) (RF_DagNode_t * node, int status);
  676         void   *wakeArg;
  677 
  678         /* Add parity to the appropriate log, one sector at a time. This
  679          * routine is called is called by dag functions ParityLogUpdateFunc
  680          * and ParityLogOverwriteFunc and therefore MUST BE NONBLOCKING.
  681          * 
  682          * Parity to be logged is contained in a linked-list (logData).  When
  683          * this routine returns, every sector in the list will be in one of
  684          * three places: 1) entered into the parity log 2) queued, waiting on
  685          * reintegration 3) queued, waiting on a core log
  686          * 
  687          * Blocked work is passed to the ParityLoggingDiskManager for completion.
  688          * Later, as conditions which required the block are removed, the work
  689          * reenters this routine with the "finish" parameter set to "RF_TRUE."
  690          * 
  691          * NON-BLOCKING */
  692 
  693         raidPtr = logData->common->raidPtr;
  694         /* lock the region for the first item in logData */
  695         RF_ASSERT(logData != NULL);
  696         regionID = logData->regionID;
  697         RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
  698         RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
  699 
  700         if (clearReintFlag) {
  701                 /* Enable flushing for this region.  Holding both locks
  702                  * provides a synchronization barrier with DumpParityLogToDisk */
  703                 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);
  704                 RF_LOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  705                 RF_ASSERT(raidPtr->regionInfo[regionID].reintInProgress == RF_TRUE);
  706                 raidPtr->regionInfo[regionID].diskCount = 0;
  707                 raidPtr->regionInfo[regionID].reintInProgress = RF_FALSE;
  708                 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].reintMutex);      /* flushing is now
  709                                                                                  * enabled */
  710                 RF_UNLOCK_MUTEX(raidPtr->parityLogDiskQueue.mutex);
  711         }
  712         /* process each item in logData */
  713         while (logData) {
  714                 /* remove an item from logData */
  715                 item = logData;
  716                 logData = logData->next;
  717                 item->next = NULL;
  718                 item->prev = NULL;
  719 
  720                 if (rf_parityLogDebug)
  721                         printf("[appending parity log data, region %d, raidAddress %d, numSector %d]\n", item->regionID, (int) item->diskAddress.raidAddress, (int) item->diskAddress.numSector);
  722 
  723                 /* see if we moved to a new region */
  724                 if (regionID != item->regionID) {
  725                         RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
  726                         regionID = item->regionID;
  727                         RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
  728                         RF_ASSERT(raidPtr->regionInfo[regionID].loggingEnabled);
  729                 }
  730                 punt = RF_FALSE;/* Set to RF_TRUE if work is blocked.  This
  731                                  * can happen in one of two ways: 1) no core
  732                                  * log (AcquireParityLog) 2) waiting on
  733                                  * reintegration (DumpParityLogToDisk) If punt
  734                                  * is RF_TRUE, the dataItem was queued, so
  735                                  * skip to next item. */
  736 
  737                 /* process item, one sector at a time, until all sectors
  738                  * processed or we punt */
  739                 if (item->diskAddress.numSector > 0)
  740                         done = RF_FALSE;
  741                 else
  742                         RF_ASSERT(0);
  743                 while (!punt && !done) {
  744                         /* verify that a core log exists for this region */
  745                         if (!raidPtr->regionInfo[regionID].coreLog) {
  746                                 /* Attempt to acquire a parity log. If
  747                                  * acquisition fails, queue remaining work in
  748                                  * data item and move to nextItem. */
  749                                 if (incomingLog)
  750                                         if (*incomingLog) {
  751                                                 RF_ASSERT((*incomingLog)->next == NULL);
  752                                                 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
  753                                                 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
  754                                                 *incomingLog = NULL;
  755                                         } else
  756                                                 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
  757                                 else
  758                                         raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
  759                                 /* Note: AcquireParityLog either returns a log
  760                                  * or enqueues currentItem */
  761                         }
  762                         if (!raidPtr->regionInfo[regionID].coreLog)
  763                                 punt = RF_TRUE; /* failed to find a core log */
  764                         else {
  765                                 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
  766                                 /* verify that the log has room for new
  767                                  * entries */
  768                                 /* if log is full, dump it to disk and grab a
  769                                  * new log */
  770                                 if (raidPtr->regionInfo[regionID].coreLog->numRecords == raidPtr->numSectorsPerLog) {
  771                                         /* log is full, dump it to disk */
  772                                         if (DumpParityLogToDisk(finish, item))
  773                                                 punt = RF_TRUE; /* dump unsuccessful,
  774                                                                  * blocked on
  775                                                                  * reintegration */
  776                                         else {
  777                                                 /* dump was successful */
  778                                                 if (incomingLog)
  779                                                         if (*incomingLog) {
  780                                                                 RF_ASSERT((*incomingLog)->next == NULL);
  781                                                                 raidPtr->regionInfo[regionID].coreLog = *incomingLog;
  782                                                                 raidPtr->regionInfo[regionID].coreLog->regionID = regionID;
  783                                                                 *incomingLog = NULL;
  784                                                         } else
  785                                                                 raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
  786                                                 else
  787                                                         raidPtr->regionInfo[regionID].coreLog = AcquireParityLog(item, finish);
  788                                                 /* if a core log is not
  789                                                  * available, must queue work
  790                                                  * and return */
  791                                                 if (!raidPtr->regionInfo[regionID].coreLog)
  792                                                         punt = RF_TRUE; /* blocked on log
  793                                                                          * availability */
  794                                         }
  795                                 }
  796                         }
  797                         /* if we didn't punt on this item, attempt to add a
  798                          * sector to the core log */
  799                         if (!punt) {
  800                                 RF_ASSERT(raidPtr->regionInfo[regionID].coreLog->next == NULL);
  801                                 /* at this point, we have a core log with
  802                                  * enough room for a sector */
  803                                 /* copy a sector into the log */
  804                                 log = raidPtr->regionInfo[regionID].coreLog;
  805                                 RF_ASSERT(log->numRecords < raidPtr->numSectorsPerLog);
  806                                 logItem = log->numRecords++;
  807                                 log->records[logItem].parityAddr = item->diskAddress;
  808                                 RF_ASSERT(log->records[logItem].parityAddr.startSector >= raidPtr->regionInfo[regionID].parityStartAddr);
  809                                 RF_ASSERT(log->records[logItem].parityAddr.startSector < raidPtr->regionInfo[regionID].parityStartAddr + raidPtr->regionInfo[regionID].numSectorsParity);
  810                                 log->records[logItem].parityAddr.numSector = 1;
  811                                 log->records[logItem].operation = item->common->operation;
  812                                 memcpy(log->bufPtr + (logItem * (1 << item->common->raidPtr->logBytesPerSector)), (item->common->bufPtr + (item->bufOffset++ * (1 << item->common->raidPtr->logBytesPerSector))), (1 << item->common->raidPtr->logBytesPerSector));
  813                                 item->diskAddress.numSector--;
  814                                 item->diskAddress.startSector++;
  815                                 if (item->diskAddress.numSector == 0)
  816                                         done = RF_TRUE;
  817                         }
  818                 }
  819 
  820                 if (!punt) {
  821                         /* Processed this item completely, decrement count of
  822                          * items to be processed. */
  823                         RF_ASSERT(item->diskAddress.numSector == 0);
  824                         RF_LOCK_MUTEX(item->common->mutex);
  825                         item->common->cnt--;
  826                         if (item->common->cnt == 0)
  827                                 itemDone = RF_TRUE;
  828                         else
  829                                 itemDone = RF_FALSE;
  830                         RF_UNLOCK_MUTEX(item->common->mutex);
  831                         if (itemDone) {
  832                                 /* Finished processing all log data for this
  833                                  * IO Return structs to free list and invoke
  834                                  * wakeup function. */
  835                                 timer = item->common->startTime;        /* grab initial value of
  836                                                                          * timer */
  837                                 RF_ETIMER_STOP(timer);
  838                                 RF_ETIMER_EVAL(timer);
  839                                 item->common->tracerec->plog_us += RF_ETIMER_VAL_US(timer);
  840                                 if (rf_parityLogDebug)
  841                                         printf("[waking process for region %d]\n", item->regionID);
  842                                 wakeFunc = item->common->wakeFunc;
  843                                 wakeArg = item->common->wakeArg;
  844                                 FreeParityLogCommonData(item->common);
  845                                 FreeParityLogData(item);
  846                                 (wakeFunc) (wakeArg, 0);
  847                         } else
  848                                 FreeParityLogData(item);
  849                 }
  850         }
  851         RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
  852         if (rf_parityLogDebug)
  853                 printf("[exiting ParityLogAppend]\n");
  854         return (0);
  855 }
  856 
  857 
  858 void 
  859 rf_EnableParityLogging(RF_Raid_t * raidPtr)
  860 {
  861         int     regionID;
  862 
  863         for (regionID = 0; regionID < rf_numParityRegions; regionID++) {
  864                 RF_LOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
  865                 raidPtr->regionInfo[regionID].loggingEnabled = RF_TRUE;
  866                 RF_UNLOCK_MUTEX(raidPtr->regionInfo[regionID].mutex);
  867         }
  868         if (rf_parityLogDebug)
  869                 printf("[parity logging enabled]\n");
  870 }
  871 #endif                          /* RF_INCLUDE_PARITYLOGGING > 0 */

Cache object: b937a06a91beaa8e862033316c10138e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.