The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/raidframe/rf_dagdegrd.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: rf_dagdegrd.c,v 1.21.2.1 2004/08/30 08:44:32 tron Exp $        */
    2 /*
    3  * Copyright (c) 1995 Carnegie-Mellon University.
    4  * All rights reserved.
    5  *
    6  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
    7  *
    8  * Permission to use, copy, modify and distribute this software and
    9  * its documentation is hereby granted, provided that both the copyright
   10  * notice and this permission notice appear in all copies of the
   11  * software, derivative works or modified versions, and any portions
   12  * thereof, and that both notices appear in supporting documentation.
   13  *
   14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   17  *
   18  * Carnegie Mellon requests users of this software to return to
   19  *
   20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   21  *  School of Computer Science
   22  *  Carnegie Mellon University
   23  *  Pittsburgh PA 15213-3890
   24  *
   25  * any improvements or extensions that they make and grant Carnegie the
   26  * rights to redistribute these changes.
   27  */
   28 
   29 /*
   30  * rf_dagdegrd.c
   31  *
   32  * code for creating degraded read DAGs
   33  */
   34 
   35 #include <sys/cdefs.h>
   36 __KERNEL_RCSID(0, "$NetBSD: rf_dagdegrd.c,v 1.21.2.1 2004/08/30 08:44:32 tron Exp $");
   37 
   38 #include <dev/raidframe/raidframevar.h>
   39 
   40 #include "rf_archs.h"
   41 #include "rf_raid.h"
   42 #include "rf_dag.h"
   43 #include "rf_dagutils.h"
   44 #include "rf_dagfuncs.h"
   45 #include "rf_debugMem.h"
   46 #include "rf_general.h"
   47 #include "rf_dagdegrd.h"
   48 #include "rf_map.h"
   49 
   50 
   51 /******************************************************************************
   52  *
   53  * General comments on DAG creation:
   54  *
   55  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
   56  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
   57  * is reached, the execution engine will halt forward execution and work
   58  * backward through the graph, executing the undo functions.  Assuming that
   59  * each node in the graph prior to the Cmt node are undoable and atomic - or -
   60  * does not make changes to permanent state, the graph will fail atomically.
   61  * If an error occurs after the Cmt node executes, the engine will roll-forward
   62  * through the graph, blindly executing nodes until it reaches the end.
   63  * If a graph reaches the end, it is assumed to have completed successfully.
   64  *
   65  * A graph has only 1 Cmt node.
   66  *
   67  */
   68 
   69 
   70 /******************************************************************************
   71  *
   72  * The following wrappers map the standard DAG creation interface to the
   73  * DAG creation routines.  Additionally, these wrappers enable experimentation
   74  * with new DAG structures by providing an extra level of indirection, allowing
   75  * the DAG creation routines to be replaced at this single point.
   76  */
   77 
   78 void 
   79 rf_CreateRaidFiveDegradedReadDAG(RF_Raid_t *raidPtr,
   80                                  RF_AccessStripeMap_t *asmap,
   81                                  RF_DagHeader_t *dag_h,
   82                                  void *bp,
   83                                  RF_RaidAccessFlags_t flags,
   84                                  RF_AllocListElem_t *allocList)
   85 {
   86         rf_CreateDegradedReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
   87             &rf_xorRecoveryFuncs);
   88 }
   89 
   90 
   91 /******************************************************************************
   92  *
   93  * DAG creation code begins here
   94  */
   95 
   96 
   97 /******************************************************************************
   98  * Create a degraded read DAG for RAID level 1
   99  *
  100  * Hdr -> Nil -> R(p/s)d -> Commit -> Trm
  101  *
  102  * The "Rd" node reads data from the surviving disk in the mirror pair
  103  *   Rpd - read of primary copy
  104  *   Rsd - read of secondary copy
  105  *
  106  * Parameters:  raidPtr   - description of the physical array
  107  *              asmap     - logical & physical addresses for this access
  108  *              bp        - buffer ptr (for holding write data)
  109  *              flags     - general flags (e.g. disk locking)
  110  *              allocList - list of memory allocated in DAG creation
  111  *****************************************************************************/
  112 
  113 void 
  114 rf_CreateRaidOneDegradedReadDAG(RF_Raid_t *raidPtr,
  115                                 RF_AccessStripeMap_t *asmap,
  116                                 RF_DagHeader_t *dag_h,
  117                                 void *bp,
  118                                 RF_RaidAccessFlags_t flags,
  119                                 RF_AllocListElem_t *allocList)
  120 {
  121         RF_DagNode_t *rdNode, *blockNode, *commitNode, *termNode;
  122         RF_StripeNum_t parityStripeID;
  123         RF_ReconUnitNum_t which_ru;
  124         RF_PhysDiskAddr_t *pda;
  125         int     useMirror;
  126 
  127         useMirror = 0;
  128         parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
  129             asmap->raidAddress, &which_ru);
  130 #if RF_DEBUG_DAG
  131         if (rf_dagDebug) {
  132                 printf("[Creating RAID level 1 degraded read DAG]\n");
  133         }
  134 #endif
  135         dag_h->creator = "RaidOneDegradedReadDAG";
  136         /* alloc the Wnd nodes and the Wmir node */
  137         if (asmap->numDataFailed == 0)
  138                 useMirror = RF_FALSE;
  139         else
  140                 useMirror = RF_TRUE;
  141 
  142         /* total number of nodes = 1 + (block + commit + terminator) */
  143 
  144         rdNode = rf_AllocDAGNode();
  145         rdNode->list_next = dag_h->nodes;
  146         dag_h->nodes = rdNode;
  147 
  148         blockNode = rf_AllocDAGNode();
  149         blockNode->list_next = dag_h->nodes;
  150         dag_h->nodes = blockNode;
  151 
  152         commitNode = rf_AllocDAGNode();
  153         commitNode->list_next = dag_h->nodes;
  154         dag_h->nodes = commitNode;
  155 
  156         termNode = rf_AllocDAGNode();
  157         termNode->list_next = dag_h->nodes;
  158         dag_h->nodes = termNode;
  159 
  160         /* this dag can not commit until the commit node is reached.   errors
  161          * prior to the commit point imply the dag has failed and must be
  162          * retried */
  163         dag_h->numCommitNodes = 1;
  164         dag_h->numCommits = 0;
  165         dag_h->numSuccedents = 1;
  166 
  167         /* initialize the block, commit, and terminator nodes */
  168         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
  169             NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
  170         rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
  171             NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
  172         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
  173             NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  174 
  175         pda = asmap->physInfo;
  176         RF_ASSERT(pda != NULL);
  177         /* parityInfo must describe entire parity unit */
  178         RF_ASSERT(asmap->parityInfo->next == NULL);
  179 
  180         /* initialize the data node */
  181         if (!useMirror) {
  182                 /* read primary copy of data */
  183                 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
  184                     rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList);
  185                 rdNode->params[0].p = pda;
  186                 rdNode->params[1].p = pda->bufPtr;
  187                 rdNode->params[2].v = parityStripeID;
  188                 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  189                                                        which_ru);
  190         } else {
  191                 /* read secondary copy of data */
  192                 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
  193                     rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList);
  194                 rdNode->params[0].p = asmap->parityInfo;
  195                 rdNode->params[1].p = pda->bufPtr;
  196                 rdNode->params[2].v = parityStripeID;
  197                 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  198                                                        which_ru);
  199         }
  200 
  201         /* connect header to block node */
  202         RF_ASSERT(dag_h->numSuccedents == 1);
  203         RF_ASSERT(blockNode->numAntecedents == 0);
  204         dag_h->succedents[0] = blockNode;
  205 
  206         /* connect block node to rdnode */
  207         RF_ASSERT(blockNode->numSuccedents == 1);
  208         RF_ASSERT(rdNode->numAntecedents == 1);
  209         blockNode->succedents[0] = rdNode;
  210         rdNode->antecedents[0] = blockNode;
  211         rdNode->antType[0] = rf_control;
  212 
  213         /* connect rdnode to commit node */
  214         RF_ASSERT(rdNode->numSuccedents == 1);
  215         RF_ASSERT(commitNode->numAntecedents == 1);
  216         rdNode->succedents[0] = commitNode;
  217         commitNode->antecedents[0] = rdNode;
  218         commitNode->antType[0] = rf_control;
  219 
  220         /* connect commit node to terminator */
  221         RF_ASSERT(commitNode->numSuccedents == 1);
  222         RF_ASSERT(termNode->numAntecedents == 1);
  223         RF_ASSERT(termNode->numSuccedents == 0);
  224         commitNode->succedents[0] = termNode;
  225         termNode->antecedents[0] = commitNode;
  226         termNode->antType[0] = rf_control;
  227 }
  228 
  229 
  230 
  231 /******************************************************************************
  232  *
  233  * creates a DAG to perform a degraded-mode read of data within one stripe.
  234  * This DAG is as follows:
  235  *
  236  * Hdr -> Block -> Rud -> Xor -> Cmt -> T
  237  *              -> Rrd ->
  238  *              -> Rp -->
  239  *
  240  * Each R node is a successor of the L node
  241  * One successor arc from each R node goes to C, and the other to X
  242  * There is one Rud for each chunk of surviving user data requested by the
  243  * user, and one Rrd for each chunk of surviving user data _not_ being read by
  244  * the user
  245  * R = read, ud = user data, rd = recovery (surviving) data, p = parity
  246  * X = XOR, C = Commit, T = terminate
  247  *
  248  * The block node guarantees a single source node.
  249  *
  250  * Note:  The target buffer for the XOR node is set to the actual user buffer
  251  * where the failed data is supposed to end up.  This buffer is zero'd by the
  252  * code here.  Thus, if you create a degraded read dag, use it, and then
  253  * re-use, you have to be sure to zero the target buffer prior to the re-use.
  254  *
  255  * The recfunc argument at the end specifies the name and function used for
  256  * the redundancy
  257  * recovery function.
  258  *
  259  *****************************************************************************/
  260 
  261 void 
  262 rf_CreateDegradedReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
  263                          RF_DagHeader_t *dag_h, void *bp,
  264                          RF_RaidAccessFlags_t flags,
  265                          RF_AllocListElem_t *allocList,
  266                          const RF_RedFuncs_t *recFunc)
  267 {
  268         RF_DagNode_t *rudNodes, *rrdNodes, *xorNode, *blockNode;
  269         RF_DagNode_t *commitNode, *rpNode, *termNode;
  270         RF_DagNode_t *tmpNode, *tmprudNode, *tmprrdNode;
  271         int     nNodes, nRrdNodes, nRudNodes, nXorBufs, i;
  272         int     j, paramNum;
  273         RF_SectorCount_t sectorsPerSU;
  274         RF_ReconUnitNum_t which_ru;
  275         char    overlappingPDAs[RF_MAXCOL];/* a temporary array of flags */
  276         RF_AccessStripeMapHeader_t *new_asm_h[2];
  277         RF_PhysDiskAddr_t *pda, *parityPDA;
  278         RF_StripeNum_t parityStripeID;
  279         RF_PhysDiskAddr_t *failedPDA;
  280         RF_RaidLayout_t *layoutPtr;
  281         char   *rpBuf;
  282 
  283         layoutPtr = &(raidPtr->Layout);
  284         /* failedPDA points to the pda within the asm that targets the failed
  285          * disk */
  286         failedPDA = asmap->failedPDAs[0];
  287         parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr,
  288             asmap->raidAddress, &which_ru);
  289         sectorsPerSU = layoutPtr->sectorsPerStripeUnit;
  290 
  291 #if RF_DEBUG_DAG
  292         if (rf_dagDebug) {
  293                 printf("[Creating degraded read DAG]\n");
  294         }
  295 #endif
  296         RF_ASSERT(asmap->numDataFailed == 1);
  297         dag_h->creator = "DegradedReadDAG";
  298 
  299         /*
  300          * generate two ASMs identifying the surviving data we need
  301          * in order to recover the lost data
  302          */
  303 
  304         /* overlappingPDAs array must be zero'd */
  305         memset(overlappingPDAs, 0, RF_MAXCOL);
  306         rf_GenerateFailedAccessASMs(raidPtr, asmap, failedPDA, dag_h, new_asm_h, &nXorBufs,
  307             &rpBuf, overlappingPDAs, allocList);
  308 
  309         /*
  310          * create all the nodes at once
  311          *
  312          * -1 because no access is generated for the failed pda
  313          */
  314         nRudNodes = asmap->numStripeUnitsAccessed - 1;
  315         nRrdNodes = ((new_asm_h[0]) ? new_asm_h[0]->stripeMap->numStripeUnitsAccessed : 0) +
  316             ((new_asm_h[1]) ? new_asm_h[1]->stripeMap->numStripeUnitsAccessed : 0);
  317         nNodes = 5 + nRudNodes + nRrdNodes;     /* lock, unlock, xor, Rp, Rud,
  318                                                  * Rrd */
  319 
  320         blockNode = rf_AllocDAGNode();
  321         blockNode->list_next = dag_h->nodes;
  322         dag_h->nodes = blockNode;
  323 
  324         commitNode = rf_AllocDAGNode();
  325         commitNode->list_next = dag_h->nodes;
  326         dag_h->nodes = commitNode;
  327 
  328         xorNode = rf_AllocDAGNode();
  329         xorNode->list_next = dag_h->nodes;
  330         dag_h->nodes = xorNode;
  331 
  332         rpNode = rf_AllocDAGNode();
  333         rpNode->list_next = dag_h->nodes;
  334         dag_h->nodes = rpNode;
  335 
  336         termNode = rf_AllocDAGNode();
  337         termNode->list_next = dag_h->nodes;
  338         dag_h->nodes = termNode;
  339 
  340         for (i = 0; i < nRudNodes; i++) {
  341                 tmpNode = rf_AllocDAGNode();
  342                 tmpNode->list_next = dag_h->nodes;
  343                 dag_h->nodes = tmpNode;
  344         }
  345         rudNodes = dag_h->nodes;
  346 
  347         for (i = 0; i < nRrdNodes; i++) {
  348                 tmpNode = rf_AllocDAGNode();
  349                 tmpNode->list_next = dag_h->nodes;
  350                 dag_h->nodes = tmpNode;
  351         }
  352         rrdNodes = dag_h->nodes;
  353 
  354         /* initialize nodes */
  355         dag_h->numCommitNodes = 1;
  356         dag_h->numCommits = 0;
  357         /* this dag can not commit until the commit node is reached errors
  358          * prior to the commit point imply the dag has failed */
  359         dag_h->numSuccedents = 1;
  360 
  361         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
  362             NULL, nRudNodes + nRrdNodes + 1, 0, 0, 0, dag_h, "Nil", allocList);
  363         rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
  364             NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
  365         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
  366             NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  367         rf_InitNode(xorNode, rf_wait, RF_FALSE, recFunc->simple, rf_NullNodeUndoFunc,
  368             NULL, 1, nRudNodes + nRrdNodes + 1, 2 * nXorBufs + 2, 1, dag_h,
  369             recFunc->SimpleName, allocList);
  370 
  371         /* fill in the Rud nodes */
  372         tmprudNode = rudNodes;
  373         for (pda = asmap->physInfo, i = 0; i < nRudNodes; i++, pda = pda->next) {
  374                 if (pda == failedPDA) {
  375                         i--;
  376                         continue;
  377                 }
  378                 rf_InitNode(tmprudNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
  379                     rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
  380                     "Rud", allocList);
  381                 RF_ASSERT(pda);
  382                 tmprudNode->params[0].p = pda;
  383                 tmprudNode->params[1].p = pda->bufPtr;
  384                 tmprudNode->params[2].v = parityStripeID;
  385                 tmprudNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  386                 tmprudNode = tmprudNode->list_next;
  387         }
  388 
  389         /* fill in the Rrd nodes */
  390         i = 0;
  391         tmprrdNode = rrdNodes;
  392         if (new_asm_h[0]) {
  393                 for (pda = new_asm_h[0]->stripeMap->physInfo;
  394                     i < new_asm_h[0]->stripeMap->numStripeUnitsAccessed;
  395                     i++, pda = pda->next) {
  396                         rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
  397                             rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
  398                             dag_h, "Rrd", allocList);
  399                         RF_ASSERT(pda);
  400                         tmprrdNode->params[0].p = pda;
  401                         tmprrdNode->params[1].p = pda->bufPtr;
  402                         tmprrdNode->params[2].v = parityStripeID;
  403                         tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  404                         tmprrdNode = tmprrdNode->list_next;
  405                 }
  406         }
  407         if (new_asm_h[1]) {
  408                 /* tmprrdNode = rrdNodes; */ /* don't set this here -- old code was using i+j, which means
  409                    we need to just continue using tmprrdNode for the next 'j' elements. */
  410                 for (j = 0, pda = new_asm_h[1]->stripeMap->physInfo;
  411                     j < new_asm_h[1]->stripeMap->numStripeUnitsAccessed;
  412                     j++, pda = pda->next) {
  413                         rf_InitNode(tmprrdNode, rf_wait, RF_FALSE, rf_DiskReadFunc,
  414                             rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0,
  415                             dag_h, "Rrd", allocList);
  416                         RF_ASSERT(pda);
  417                         tmprrdNode->params[0].p = pda;
  418                         tmprrdNode->params[1].p = pda->bufPtr;
  419                         tmprrdNode->params[2].v = parityStripeID;
  420                         tmprrdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  421                         tmprrdNode = tmprrdNode->list_next;
  422                 }
  423         }
  424         /* make a PDA for the parity unit */
  425         parityPDA = rf_AllocPhysDiskAddr();
  426         parityPDA->next = dag_h->pda_cleanup_list;
  427         dag_h->pda_cleanup_list = parityPDA;
  428         parityPDA->col = asmap->parityInfo->col;
  429         parityPDA->startSector = ((asmap->parityInfo->startSector / sectorsPerSU)
  430             * sectorsPerSU) + (failedPDA->startSector % sectorsPerSU);
  431         parityPDA->numSector = failedPDA->numSector;
  432 
  433         /* initialize the Rp node */
  434         rf_InitNode(rpNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
  435             rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rp ", allocList);
  436         rpNode->params[0].p = parityPDA;
  437         rpNode->params[1].p = rpBuf;
  438         rpNode->params[2].v = parityStripeID;
  439         rpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  440 
  441         /*
  442          * the last and nastiest step is to assign all
  443          * the parameters of the Xor node
  444          */
  445         paramNum = 0;
  446         tmprrdNode = rrdNodes;
  447         for (i = 0; i < nRrdNodes; i++) {
  448                 /* all the Rrd nodes need to be xored together */
  449                 xorNode->params[paramNum++] = tmprrdNode->params[0];
  450                 xorNode->params[paramNum++] = tmprrdNode->params[1];
  451                 tmprrdNode = tmprrdNode->list_next;
  452         }
  453         tmprudNode = rudNodes;
  454         for (i = 0; i < nRudNodes; i++) {
  455                 /* any Rud nodes that overlap the failed access need to be
  456                  * xored in */
  457                 if (overlappingPDAs[i]) {
  458                         pda = rf_AllocPhysDiskAddr();
  459                         memcpy((char *) pda, (char *) tmprudNode->params[0].p, sizeof(RF_PhysDiskAddr_t));
  460                         /* add it into the pda_cleanup_list *after* the copy, TYVM */
  461                         pda->next = dag_h->pda_cleanup_list;
  462                         dag_h->pda_cleanup_list = pda;
  463                         rf_RangeRestrictPDA(raidPtr, failedPDA, pda, RF_RESTRICT_DOBUFFER, 0);
  464                         xorNode->params[paramNum++].p = pda;
  465                         xorNode->params[paramNum++].p = pda->bufPtr;
  466                 }
  467                 tmprudNode = tmprudNode->list_next;
  468         }
  469 
  470         /* install parity pda as last set of params to be xor'd */
  471         xorNode->params[paramNum++].p = parityPDA;
  472         xorNode->params[paramNum++].p = rpBuf;
  473 
  474         /*
  475          * the last 2 params to the recovery xor node are
  476          * the failed PDA and the raidPtr
  477          */
  478         xorNode->params[paramNum++].p = failedPDA;
  479         xorNode->params[paramNum++].p = raidPtr;
  480         RF_ASSERT(paramNum == 2 * nXorBufs + 2);
  481 
  482         /*
  483          * The xor node uses results[0] as the target buffer.
  484          * Set pointer and zero the buffer. In the kernel, this
  485          * may be a user buffer in which case we have to remap it.
  486          */
  487         xorNode->results[0] = failedPDA->bufPtr;
  488         memset(failedPDA->bufPtr, 0, rf_RaidAddressToByte(raidPtr,
  489                 failedPDA->numSector));
  490 
  491         /* connect nodes to form graph */
  492         /* connect the header to the block node */
  493         RF_ASSERT(dag_h->numSuccedents == 1);
  494         RF_ASSERT(blockNode->numAntecedents == 0);
  495         dag_h->succedents[0] = blockNode;
  496 
  497         /* connect the block node to the read nodes */
  498         RF_ASSERT(blockNode->numSuccedents == (1 + nRrdNodes + nRudNodes));
  499         RF_ASSERT(rpNode->numAntecedents == 1);
  500         blockNode->succedents[0] = rpNode;
  501         rpNode->antecedents[0] = blockNode;
  502         rpNode->antType[0] = rf_control;
  503         tmprrdNode = rrdNodes;
  504         for (i = 0; i < nRrdNodes; i++) {
  505                 RF_ASSERT(tmprrdNode->numSuccedents == 1);
  506                 blockNode->succedents[1 + i] = tmprrdNode;
  507                 tmprrdNode->antecedents[0] = blockNode;
  508                 tmprrdNode->antType[0] = rf_control;
  509                 tmprrdNode = tmprrdNode->list_next;
  510         }
  511         tmprudNode = rudNodes;
  512         for (i = 0; i < nRudNodes; i++) {
  513                 RF_ASSERT(tmprudNode->numSuccedents == 1);
  514                 blockNode->succedents[1 + nRrdNodes + i] = tmprudNode;
  515                 tmprudNode->antecedents[0] = blockNode;
  516                 tmprudNode->antType[0] = rf_control;
  517                 tmprudNode = tmprudNode->list_next;
  518         }
  519 
  520         /* connect the read nodes to the xor node */
  521         RF_ASSERT(xorNode->numAntecedents == (1 + nRrdNodes + nRudNodes));
  522         RF_ASSERT(rpNode->numSuccedents == 1);
  523         rpNode->succedents[0] = xorNode;
  524         xorNode->antecedents[0] = rpNode;
  525         xorNode->antType[0] = rf_trueData;
  526         tmprrdNode = rrdNodes;
  527         for (i = 0; i < nRrdNodes; i++) {
  528                 RF_ASSERT(tmprrdNode->numSuccedents == 1);
  529                 tmprrdNode->succedents[0] = xorNode;
  530                 xorNode->antecedents[1 + i] = tmprrdNode;
  531                 xorNode->antType[1 + i] = rf_trueData;
  532                 tmprrdNode = tmprrdNode->list_next;
  533         }
  534         tmprudNode = rudNodes;
  535         for (i = 0; i < nRudNodes; i++) {
  536                 RF_ASSERT(tmprudNode->numSuccedents == 1);
  537                 tmprudNode->succedents[0] = xorNode;
  538                 xorNode->antecedents[1 + nRrdNodes + i] = tmprudNode;
  539                 xorNode->antType[1 + nRrdNodes + i] = rf_trueData;
  540                 tmprudNode = tmprudNode->list_next;
  541         }
  542 
  543         /* connect the xor node to the commit node */
  544         RF_ASSERT(xorNode->numSuccedents == 1);
  545         RF_ASSERT(commitNode->numAntecedents == 1);
  546         xorNode->succedents[0] = commitNode;
  547         commitNode->antecedents[0] = xorNode;
  548         commitNode->antType[0] = rf_control;
  549 
  550         /* connect the termNode to the commit node */
  551         RF_ASSERT(commitNode->numSuccedents == 1);
  552         RF_ASSERT(termNode->numAntecedents == 1);
  553         RF_ASSERT(termNode->numSuccedents == 0);
  554         commitNode->succedents[0] = termNode;
  555         termNode->antType[0] = rf_control;
  556         termNode->antecedents[0] = commitNode;
  557 }
  558 
  559 #if (RF_INCLUDE_CHAINDECLUSTER > 0)
  560 /******************************************************************************
  561  * Create a degraded read DAG for Chained Declustering
  562  *
  563  * Hdr -> Nil -> R(p/s)d -> Cmt -> Trm
  564  *
  565  * The "Rd" node reads data from the surviving disk in the mirror pair
  566  *   Rpd - read of primary copy
  567  *   Rsd - read of secondary copy
  568  *
  569  * Parameters:  raidPtr   - description of the physical array
  570  *              asmap     - logical & physical addresses for this access
  571  *              bp        - buffer ptr (for holding write data)
  572  *              flags     - general flags (e.g. disk locking)
  573  *              allocList - list of memory allocated in DAG creation
  574  *****************************************************************************/
  575 
  576 void 
  577 rf_CreateRaidCDegradedReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
  578                               RF_DagHeader_t *dag_h, void *bp,
  579                               RF_RaidAccessFlags_t flags,
  580                               RF_AllocListElem_t *allocList)
  581 {
  582         RF_DagNode_t *nodes, *rdNode, *blockNode, *commitNode, *termNode;
  583         RF_StripeNum_t parityStripeID;
  584         int     useMirror, i, shiftable;
  585         RF_ReconUnitNum_t which_ru;
  586         RF_PhysDiskAddr_t *pda;
  587 
  588         if ((asmap->numDataFailed + asmap->numParityFailed) == 0) {
  589                 shiftable = RF_TRUE;
  590         } else {
  591                 shiftable = RF_FALSE;
  592         }
  593         useMirror = 0;
  594         parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
  595             asmap->raidAddress, &which_ru);
  596 
  597 #if RF_DEBUG_DAG
  598         if (rf_dagDebug) {
  599                 printf("[Creating RAID C degraded read DAG]\n");
  600         }
  601 #endif
  602         dag_h->creator = "RaidCDegradedReadDAG";
  603         /* alloc the Wnd nodes and the Wmir node */
  604         if (asmap->numDataFailed == 0)
  605                 useMirror = RF_FALSE;
  606         else
  607                 useMirror = RF_TRUE;
  608 
  609         /* total number of nodes = 1 + (block + commit + terminator) */
  610         RF_MallocAndAdd(nodes, 4 * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
  611         i = 0;
  612         rdNode = &nodes[i];
  613         i++;
  614         blockNode = &nodes[i];
  615         i++;
  616         commitNode = &nodes[i];
  617         i++;
  618         termNode = &nodes[i];
  619         i++;
  620 
  621         /*
  622          * This dag can not commit until the commit node is reached.
  623          * Errors prior to the commit point imply the dag has failed
  624          * and must be retried.
  625          */
  626         dag_h->numCommitNodes = 1;
  627         dag_h->numCommits = 0;
  628         dag_h->numSuccedents = 1;
  629 
  630         /* initialize the block, commit, and terminator nodes */
  631         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
  632             NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
  633         rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
  634             NULL, 1, 1, 0, 0, dag_h, "Cmt", allocList);
  635         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
  636             NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  637 
  638         pda = asmap->physInfo;
  639         RF_ASSERT(pda != NULL);
  640         /* parityInfo must describe entire parity unit */
  641         RF_ASSERT(asmap->parityInfo->next == NULL);
  642 
  643         /* initialize the data node */
  644         if (!useMirror) {
  645                 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
  646                     rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rpd", allocList);
  647                 if (shiftable && rf_compute_workload_shift(raidPtr, pda)) {
  648                         /* shift this read to the next disk in line */
  649                         rdNode->params[0].p = asmap->parityInfo;
  650                         rdNode->params[1].p = pda->bufPtr;
  651                         rdNode->params[2].v = parityStripeID;
  652                         rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  653                 } else {
  654                         /* read primary copy */
  655                         rdNode->params[0].p = pda;
  656                         rdNode->params[1].p = pda->bufPtr;
  657                         rdNode->params[2].v = parityStripeID;
  658                         rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  659                 }
  660         } else {
  661                 /* read secondary copy of data */
  662                 rf_InitNode(rdNode, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc,
  663                     rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rsd", allocList);
  664                 rdNode->params[0].p = asmap->parityInfo;
  665                 rdNode->params[1].p = pda->bufPtr;
  666                 rdNode->params[2].v = parityStripeID;
  667                 rdNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  668         }
  669 
  670         /* connect header to block node */
  671         RF_ASSERT(dag_h->numSuccedents == 1);
  672         RF_ASSERT(blockNode->numAntecedents == 0);
  673         dag_h->succedents[0] = blockNode;
  674 
  675         /* connect block node to rdnode */
  676         RF_ASSERT(blockNode->numSuccedents == 1);
  677         RF_ASSERT(rdNode->numAntecedents == 1);
  678         blockNode->succedents[0] = rdNode;
  679         rdNode->antecedents[0] = blockNode;
  680         rdNode->antType[0] = rf_control;
  681 
  682         /* connect rdnode to commit node */
  683         RF_ASSERT(rdNode->numSuccedents == 1);
  684         RF_ASSERT(commitNode->numAntecedents == 1);
  685         rdNode->succedents[0] = commitNode;
  686         commitNode->antecedents[0] = rdNode;
  687         commitNode->antType[0] = rf_control;
  688 
  689         /* connect commit node to terminator */
  690         RF_ASSERT(commitNode->numSuccedents == 1);
  691         RF_ASSERT(termNode->numAntecedents == 1);
  692         RF_ASSERT(termNode->numSuccedents == 0);
  693         commitNode->succedents[0] = termNode;
  694         termNode->antecedents[0] = commitNode;
  695         termNode->antType[0] = rf_control;
  696 }
  697 #endif /* (RF_INCLUDE_CHAINDECLUSTER > 0) */
  698 
  699 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0)
  700 /*
  701  * XXX move this elsewhere?
  702  */
  703 void 
  704 rf_DD_GenerateFailedAccessASMs(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
  705                                RF_PhysDiskAddr_t **pdap, int *nNodep,
  706                                RF_PhysDiskAddr_t **pqpdap, int *nPQNodep,
  707                                RF_AllocListElem_t *allocList)
  708 {
  709         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
  710         int     PDAPerDisk, i;
  711         RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
  712         int     numDataCol = layoutPtr->numDataCol;
  713         int     state;
  714         RF_SectorNum_t suoff, suend;
  715         unsigned firstDataCol, napdas, count;
  716         RF_SectorNum_t fone_start, fone_end, ftwo_start = 0, ftwo_end = 0;
  717         RF_PhysDiskAddr_t *fone = asmap->failedPDAs[0], *ftwo = asmap->failedPDAs[1];
  718         RF_PhysDiskAddr_t *pda_p;
  719         RF_PhysDiskAddr_t *phys_p;
  720         RF_RaidAddr_t sosAddr;
  721 
  722         /* determine how many pda's we will have to generate per unaccess
  723          * stripe. If there is only one failed data unit, it is one; if two,
  724          * possibly two, depending wether they overlap. */
  725 
  726         fone_start = rf_StripeUnitOffset(layoutPtr, fone->startSector);
  727         fone_end = fone_start + fone->numSector;
  728 
  729 #define CONS_PDA(if,start,num) \
  730   pda_p->col = asmap->if->col; \
  731   pda_p->startSector = ((asmap->if->startSector / secPerSU) * secPerSU) + start; \
  732   pda_p->numSector = num; \
  733   pda_p->next = NULL; \
  734   RF_MallocAndAdd(pda_p->bufPtr,rf_RaidAddressToByte(raidPtr,num),(char *), allocList)
  735 
  736         if (asmap->numDataFailed == 1) {
  737                 PDAPerDisk = 1;
  738                 state = 1;
  739                 RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
  740                 pda_p = *pqpdap;
  741                 /* build p */
  742                 CONS_PDA(parityInfo, fone_start, fone->numSector);
  743                 pda_p->type = RF_PDA_TYPE_PARITY;
  744                 pda_p++;
  745                 /* build q */
  746                 CONS_PDA(qInfo, fone_start, fone->numSector);
  747                 pda_p->type = RF_PDA_TYPE_Q;
  748         } else {
  749                 ftwo_start = rf_StripeUnitOffset(layoutPtr, ftwo->startSector);
  750                 ftwo_end = ftwo_start + ftwo->numSector;
  751                 if (fone->numSector + ftwo->numSector > secPerSU) {
  752                         PDAPerDisk = 1;
  753                         state = 2;
  754                         RF_MallocAndAdd(*pqpdap, 2 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
  755                         pda_p = *pqpdap;
  756                         CONS_PDA(parityInfo, 0, secPerSU);
  757                         pda_p->type = RF_PDA_TYPE_PARITY;
  758                         pda_p++;
  759                         CONS_PDA(qInfo, 0, secPerSU);
  760                         pda_p->type = RF_PDA_TYPE_Q;
  761                 } else {
  762                         PDAPerDisk = 2;
  763                         state = 3;
  764                         /* four of them, fone, then ftwo */
  765                         RF_MallocAndAdd(*pqpdap, 4 * sizeof(RF_PhysDiskAddr_t), (RF_PhysDiskAddr_t *), allocList);
  766                         pda_p = *pqpdap;
  767                         CONS_PDA(parityInfo, fone_start, fone->numSector);
  768                         pda_p->type = RF_PDA_TYPE_PARITY;
  769                         pda_p++;
  770                         CONS_PDA(qInfo, fone_start, fone->numSector);
  771                         pda_p->type = RF_PDA_TYPE_Q;
  772                         pda_p++;
  773                         CONS_PDA(parityInfo, ftwo_start, ftwo->numSector);
  774                         pda_p->type = RF_PDA_TYPE_PARITY;
  775                         pda_p++;
  776                         CONS_PDA(qInfo, ftwo_start, ftwo->numSector);
  777                         pda_p->type = RF_PDA_TYPE_Q;
  778                 }
  779         }
  780         /* figure out number of nonaccessed pda */
  781         napdas = PDAPerDisk * (numDataCol - asmap->numStripeUnitsAccessed - (ftwo == NULL ? 1 : 0));
  782         *nPQNodep = PDAPerDisk;
  783 
  784         /* sweep over the over accessed pda's, figuring out the number of
  785          * additional pda's to generate. Of course, skip the failed ones */
  786 
  787         count = 0;
  788         for (pda_p = asmap->physInfo; pda_p; pda_p = pda_p->next) {
  789                 if ((pda_p == fone) || (pda_p == ftwo))
  790                         continue;
  791                 suoff = rf_StripeUnitOffset(layoutPtr, pda_p->startSector);
  792                 suend = suoff + pda_p->numSector;
  793                 switch (state) {
  794                 case 1: /* one failed PDA to overlap */
  795                         /* if a PDA doesn't contain the failed unit, it can
  796                          * only miss the start or end, not both */
  797                         if ((suoff > fone_start) || (suend < fone_end))
  798                                 count++;
  799                         break;
  800                 case 2: /* whole stripe */
  801                         if (suoff)      /* leak at begining */
  802                                 count++;
  803                         if (suend < numDataCol) /* leak at end */
  804                                 count++;
  805                         break;
  806                 case 3: /* two disjoint units */
  807                         if ((suoff > fone_start) || (suend < fone_end))
  808                                 count++;
  809                         if ((suoff > ftwo_start) || (suend < ftwo_end))
  810                                 count++;
  811                         break;
  812                 default:
  813                         RF_PANIC();
  814                 }
  815         }
  816 
  817         napdas += count;
  818         *nNodep = napdas;
  819         if (napdas == 0)
  820                 return;         /* short circuit */
  821 
  822         /* allocate up our list of pda's */
  823 
  824         RF_MallocAndAdd(pda_p, napdas * sizeof(RF_PhysDiskAddr_t), 
  825                         (RF_PhysDiskAddr_t *), allocList);
  826         *pdap = pda_p;
  827 
  828         /* linkem together */
  829         for (i = 0; i < (napdas - 1); i++)
  830                 pda_p[i].next = pda_p + (i + 1);
  831 
  832         /* march through the one's up to the first accessed disk */
  833         firstDataCol = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), asmap->physInfo->raidAddress) % numDataCol;
  834         sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
  835         for (i = 0; i < firstDataCol; i++) {
  836                 if ((pda_p - (*pdap)) == napdas)
  837                         continue;
  838                 pda_p->type = RF_PDA_TYPE_DATA;
  839                 pda_p->raidAddress = sosAddr + (i * secPerSU);
  840                 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
  841                 /* skip over dead disks */
  842                 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->col].status))
  843                         continue;
  844                 switch (state) {
  845                 case 1: /* fone */
  846                         pda_p->numSector = fone->numSector;
  847                         pda_p->raidAddress += fone_start;
  848                         pda_p->startSector += fone_start;
  849                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  850                         break;
  851                 case 2: /* full stripe */
  852                         pda_p->numSector = secPerSU;
  853                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList);
  854                         break;
  855                 case 3: /* two slabs */
  856                         pda_p->numSector = fone->numSector;
  857                         pda_p->raidAddress += fone_start;
  858                         pda_p->startSector += fone_start;
  859                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  860                         pda_p++;
  861                         pda_p->type = RF_PDA_TYPE_DATA;
  862                         pda_p->raidAddress = sosAddr + (i * secPerSU);
  863                         (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
  864                         pda_p->numSector = ftwo->numSector;
  865                         pda_p->raidAddress += ftwo_start;
  866                         pda_p->startSector += ftwo_start;
  867                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  868                         break;
  869                 default:
  870                         RF_PANIC();
  871                 }
  872                 pda_p++;
  873         }
  874 
  875         /* march through the touched stripe units */
  876         for (phys_p = asmap->physInfo; phys_p; phys_p = phys_p->next, i++) {
  877                 if ((phys_p == asmap->failedPDAs[0]) || (phys_p == asmap->failedPDAs[1]))
  878                         continue;
  879                 suoff = rf_StripeUnitOffset(layoutPtr, phys_p->startSector);
  880                 suend = suoff + phys_p->numSector;
  881                 switch (state) {
  882                 case 1: /* single buffer */
  883                         if (suoff > fone_start) {
  884                                 RF_ASSERT(suend >= fone_end);
  885                                 /* The data read starts after the mapped
  886                                  * access, snip off the begining */
  887                                 pda_p->numSector = suoff - fone_start;
  888                                 pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start;
  889                                 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
  890                                 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  891                                 pda_p++;
  892                         }
  893                         if (suend < fone_end) {
  894                                 RF_ASSERT(suoff <= fone_start);
  895                                 /* The data read stops before the end of the
  896                                  * failed access, extend */
  897                                 pda_p->numSector = fone_end - suend;
  898                                 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend;  /* off by one? */
  899                                 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
  900                                 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  901                                 pda_p++;
  902                         }
  903                         break;
  904                 case 2: /* whole stripe unit */
  905                         RF_ASSERT((suoff == 0) || (suend == secPerSU));
  906                         if (suend < secPerSU) { /* short read, snip from end
  907                                                  * on */
  908                                 pda_p->numSector = secPerSU - suend;
  909                                 pda_p->raidAddress = sosAddr + (i * secPerSU) + suend;  /* off by one? */
  910                                 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
  911                                 RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  912                                 pda_p++;
  913                         } else
  914                                 if (suoff > 0) {        /* short at front */
  915                                         pda_p->numSector = suoff;
  916                                         pda_p->raidAddress = sosAddr + (i * secPerSU);
  917                                         (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
  918                                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  919                                         pda_p++;
  920                                 }
  921                         break;
  922                 case 3: /* two nonoverlapping failures */
  923                         if ((suoff > fone_start) || (suend < fone_end)) {
  924                                 if (suoff > fone_start) {
  925                                         RF_ASSERT(suend >= fone_end);
  926                                         /* The data read starts after the
  927                                          * mapped access, snip off the
  928                                          * begining */
  929                                         pda_p->numSector = suoff - fone_start;
  930                                         pda_p->raidAddress = sosAddr + (i * secPerSU) + fone_start;
  931                                         (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
  932                                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  933                                         pda_p++;
  934                                 }
  935                                 if (suend < fone_end) {
  936                                         RF_ASSERT(suoff <= fone_start);
  937                                         /* The data read stops before the end
  938                                          * of the failed access, extend */
  939                                         pda_p->numSector = fone_end - suend;
  940                                         pda_p->raidAddress = sosAddr + (i * secPerSU) + suend;  /* off by one? */
  941                                         (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
  942                                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  943                                         pda_p++;
  944                                 }
  945                         }
  946                         if ((suoff > ftwo_start) || (suend < ftwo_end)) {
  947                                 if (suoff > ftwo_start) {
  948                                         RF_ASSERT(suend >= ftwo_end);
  949                                         /* The data read starts after the
  950                                          * mapped access, snip off the
  951                                          * begining */
  952                                         pda_p->numSector = suoff - ftwo_start;
  953                                         pda_p->raidAddress = sosAddr + (i * secPerSU) + ftwo_start;
  954                                         (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
  955                                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  956                                         pda_p++;
  957                                 }
  958                                 if (suend < ftwo_end) {
  959                                         RF_ASSERT(suoff <= ftwo_start);
  960                                         /* The data read stops before the end
  961                                          * of the failed access, extend */
  962                                         pda_p->numSector = ftwo_end - suend;
  963                                         pda_p->raidAddress = sosAddr + (i * secPerSU) + suend;  /* off by one? */
  964                                         (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
  965                                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  966                                         pda_p++;
  967                                 }
  968                         }
  969                         break;
  970                 default:
  971                         RF_PANIC();
  972                 }
  973         }
  974 
  975         /* after the last accessed disk */
  976         for (; i < numDataCol; i++) {
  977                 if ((pda_p - (*pdap)) == napdas)
  978                         continue;
  979                 pda_p->type = RF_PDA_TYPE_DATA;
  980                 pda_p->raidAddress = sosAddr + (i * secPerSU);
  981                 (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
  982                 /* skip over dead disks */
  983                 if (RF_DEAD_DISK(raidPtr->Disks[pda_p->col].status))
  984                         continue;
  985                 switch (state) {
  986                 case 1: /* fone */
  987                         pda_p->numSector = fone->numSector;
  988                         pda_p->raidAddress += fone_start;
  989                         pda_p->startSector += fone_start;
  990                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
  991                         break;
  992                 case 2: /* full stripe */
  993                         pda_p->numSector = secPerSU;
  994                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, secPerSU), (char *), allocList);
  995                         break;
  996                 case 3: /* two slabs */
  997                         pda_p->numSector = fone->numSector;
  998                         pda_p->raidAddress += fone_start;
  999                         pda_p->startSector += fone_start;
 1000                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
 1001                         pda_p++;
 1002                         pda_p->type = RF_PDA_TYPE_DATA;
 1003                         pda_p->raidAddress = sosAddr + (i * secPerSU);
 1004                         (raidPtr->Layout.map->MapSector) (raidPtr, pda_p->raidAddress, &(pda_p->col), &(pda_p->startSector), 0);
 1005                         pda_p->numSector = ftwo->numSector;
 1006                         pda_p->raidAddress += ftwo_start;
 1007                         pda_p->startSector += ftwo_start;
 1008                         RF_MallocAndAdd(pda_p->bufPtr, rf_RaidAddressToByte(raidPtr, pda_p->numSector), (char *), allocList);
 1009                         break;
 1010                 default:
 1011                         RF_PANIC();
 1012                 }
 1013                 pda_p++;
 1014         }
 1015 
 1016         RF_ASSERT(pda_p - *pdap == napdas);
 1017         return;
 1018 }
 1019 #define INIT_DISK_NODE(node,name) \
 1020 rf_InitNode(node, rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 2,1,4,0, dag_h, name, allocList); \
 1021 (node)->succedents[0] = unblockNode; \
 1022 (node)->succedents[1] = recoveryNode; \
 1023 (node)->antecedents[0] = blockNode; \
 1024 (node)->antType[0] = rf_control
 1025 
 1026 #define DISK_NODE_PARAMS(_node_,_p_) \
 1027   (_node_).params[0].p = _p_ ; \
 1028   (_node_).params[1].p = (_p_)->bufPtr; \
 1029   (_node_).params[2].v = parityStripeID; \
 1030   (_node_).params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru)
 1031 
 1032 void 
 1033 rf_DoubleDegRead(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
 1034                  RF_DagHeader_t *dag_h, void *bp, RF_RaidAccessFlags_t flags,
 1035                  RF_AllocListElem_t *allocList,
 1036                  char *redundantReadNodeName, char *recoveryNodeName,
 1037                  int (*recovFunc) (RF_DagNode_t *))
 1038 {
 1039         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
 1040         RF_DagNode_t *nodes, *rudNodes, *rrdNodes, *recoveryNode, *blockNode,
 1041                *unblockNode, *rpNodes, *rqNodes, *termNode;
 1042         RF_PhysDiskAddr_t *pda, *pqPDAs;
 1043         RF_PhysDiskAddr_t *npdas;
 1044         int     nNodes, nRrdNodes, nRudNodes, i;
 1045         RF_ReconUnitNum_t which_ru;
 1046         int     nReadNodes, nPQNodes;
 1047         RF_PhysDiskAddr_t *failedPDA = asmap->failedPDAs[0];
 1048         RF_PhysDiskAddr_t *failedPDAtwo = asmap->failedPDAs[1];
 1049         RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, asmap->raidAddress, &which_ru);
 1050 
 1051 #if RF_DEBUG_DAG
 1052         if (rf_dagDebug)
 1053                 printf("[Creating Double Degraded Read DAG]\n");
 1054 #endif
 1055         rf_DD_GenerateFailedAccessASMs(raidPtr, asmap, &npdas, &nRrdNodes, &pqPDAs, &nPQNodes, allocList);
 1056 
 1057         nRudNodes = asmap->numStripeUnitsAccessed - (asmap->numDataFailed);
 1058         nReadNodes = nRrdNodes + nRudNodes + 2 * nPQNodes;
 1059         nNodes = 4 /* block, unblock, recovery, term */ + nReadNodes;
 1060 
 1061         RF_MallocAndAdd(nodes, nNodes * sizeof(RF_DagNode_t), (RF_DagNode_t *), allocList);
 1062         i = 0;
 1063         blockNode = &nodes[i];
 1064         i += 1;
 1065         unblockNode = &nodes[i];
 1066         i += 1;
 1067         recoveryNode = &nodes[i];
 1068         i += 1;
 1069         termNode = &nodes[i];
 1070         i += 1;
 1071         rudNodes = &nodes[i];
 1072         i += nRudNodes;
 1073         rrdNodes = &nodes[i];
 1074         i += nRrdNodes;
 1075         rpNodes = &nodes[i];
 1076         i += nPQNodes;
 1077         rqNodes = &nodes[i];
 1078         i += nPQNodes;
 1079         RF_ASSERT(i == nNodes);
 1080 
 1081         dag_h->numSuccedents = 1;
 1082         dag_h->succedents[0] = blockNode;
 1083         dag_h->creator = "DoubleDegRead";
 1084         dag_h->numCommits = 0;
 1085         dag_h->numCommitNodes = 1;      /* unblock */
 1086 
 1087         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 2, 0, 0, dag_h, "Trm", allocList);
 1088         termNode->antecedents[0] = unblockNode;
 1089         termNode->antType[0] = rf_control;
 1090         termNode->antecedents[1] = recoveryNode;
 1091         termNode->antType[1] = rf_control;
 1092 
 1093         /* init the block and unblock nodes */
 1094         /* The block node has all nodes except itself, unblock and recovery as
 1095          * successors. Similarly for predecessors of the unblock. */
 1096         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nReadNodes, 0, 0, 0, dag_h, "Nil", allocList);
 1097         rf_InitNode(unblockNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nReadNodes, 0, 0, dag_h, "Nil", allocList);
 1098 
 1099         for (i = 0; i < nReadNodes; i++) {
 1100                 blockNode->succedents[i] = rudNodes + i;
 1101                 unblockNode->antecedents[i] = rudNodes + i;
 1102                 unblockNode->antType[i] = rf_control;
 1103         }
 1104         unblockNode->succedents[0] = termNode;
 1105 
 1106         /* The recovery node has all the reads as predecessors, and the term
 1107          * node as successors. It gets a pda as a param from each of the read
 1108          * nodes plus the raidPtr. For each failed unit is has a result pda. */
 1109         rf_InitNode(recoveryNode, rf_wait, RF_FALSE, recovFunc, rf_NullNodeUndoFunc, NULL,
 1110             1,                  /* succesors */
 1111             nReadNodes,         /* preds */
 1112             nReadNodes + 2,     /* params */
 1113             asmap->numDataFailed,       /* results */
 1114             dag_h, recoveryNodeName, allocList);
 1115 
 1116         recoveryNode->succedents[0] = termNode;
 1117         for (i = 0; i < nReadNodes; i++) {
 1118                 recoveryNode->antecedents[i] = rudNodes + i;
 1119                 recoveryNode->antType[i] = rf_trueData;
 1120         }
 1121 
 1122         /* build the read nodes, then come back and fill in recovery params
 1123          * and results */
 1124         pda = asmap->physInfo;
 1125         for (i = 0; i < nRudNodes; pda = pda->next) {
 1126                 if ((pda == failedPDA) || (pda == failedPDAtwo))
 1127                         continue;
 1128                 INIT_DISK_NODE(rudNodes + i, "Rud");
 1129                 RF_ASSERT(pda);
 1130                 DISK_NODE_PARAMS(rudNodes[i], pda);
 1131                 i++;
 1132         }
 1133 
 1134         pda = npdas;
 1135         for (i = 0; i < nRrdNodes; i++, pda = pda->next) {
 1136                 INIT_DISK_NODE(rrdNodes + i, "Rrd");
 1137                 RF_ASSERT(pda);
 1138                 DISK_NODE_PARAMS(rrdNodes[i], pda);
 1139         }
 1140 
 1141         /* redundancy pdas */
 1142         pda = pqPDAs;
 1143         INIT_DISK_NODE(rpNodes, "Rp");
 1144         RF_ASSERT(pda);
 1145         DISK_NODE_PARAMS(rpNodes[0], pda);
 1146         pda++;
 1147         INIT_DISK_NODE(rqNodes, redundantReadNodeName);
 1148         RF_ASSERT(pda);
 1149         DISK_NODE_PARAMS(rqNodes[0], pda);
 1150         if (nPQNodes == 2) {
 1151                 pda++;
 1152                 INIT_DISK_NODE(rpNodes + 1, "Rp");
 1153                 RF_ASSERT(pda);
 1154                 DISK_NODE_PARAMS(rpNodes[1], pda);
 1155                 pda++;
 1156                 INIT_DISK_NODE(rqNodes + 1, redundantReadNodeName);
 1157                 RF_ASSERT(pda);
 1158                 DISK_NODE_PARAMS(rqNodes[1], pda);
 1159         }
 1160         /* fill in recovery node params */
 1161         for (i = 0; i < nReadNodes; i++)
 1162                 recoveryNode->params[i] = rudNodes[i].params[0];        /* pda */
 1163         recoveryNode->params[i++].p = (void *) raidPtr;
 1164         recoveryNode->params[i++].p = (void *) asmap;
 1165         recoveryNode->results[0] = failedPDA;
 1166         if (asmap->numDataFailed == 2)
 1167                 recoveryNode->results[1] = failedPDAtwo;
 1168 
 1169         /* zero fill the target data buffers? */
 1170 }
 1171 
 1172 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */

Cache object: 1af5ad52cb711e95c8acc9572df0eac5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.