The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: rf_dagffwr.c,v 2004/08/30 08:44:40 tron Exp $ */
    2 /*
    3  * Copyright (c) 1995 Carnegie-Mellon University.
    4  * All rights reserved.
    5  *
    6  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
    7  *
    8  * Permission to use, copy, modify and distribute this software and
    9  * its documentation is hereby granted, provided that both the copyright
   10  * notice and this permission notice appear in all copies of the
   11  * software, derivative works or modified versions, and any portions
   12  * thereof, and that both notices appear in supporting documentation.
   13  *
   17  *
   18  * Carnegie Mellon requests users of this software to return to
   19  *
   20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   21  *  School of Computer Science
   22  *  Carnegie Mellon University
   23  *  Pittsburgh PA 15213-3890
   24  *
   25  * any improvements or extensions that they make and grant Carnegie the
   26  * rights to redistribute these changes.
   27  */
   29 /*
   30  * rf_dagff.c
   31  *
   32  * code for creating fault-free DAGs
   33  *
   34  */
   36 #include <sys/cdefs.h>
   37 __KERNEL_RCSID(0, "$NetBSD: rf_dagffwr.c,v 2004/08/30 08:44:40 tron Exp $");
   39 #include <dev/raidframe/raidframevar.h>
   41 #include "rf_raid.h"
   42 #include "rf_dag.h"
   43 #include "rf_dagutils.h"
   44 #include "rf_dagfuncs.h"
   45 #include "rf_debugMem.h"
   46 #include "rf_dagffrd.h"
   47 #include "rf_general.h"
   48 #include "rf_dagffwr.h"
   49 #include "rf_map.h"
   51 /******************************************************************************
   52  *
   53  * General comments on DAG creation:
   54  *
   55  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
   56  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
   57  * is reached, the execution engine will halt forward execution and work
   58  * backward through the graph, executing the undo functions.  Assuming that
   59  * each node in the graph prior to the Cmt node are undoable and atomic - or -
   60  * does not make changes to permanent state, the graph will fail atomically.
   61  * If an error occurs after the Cmt node executes, the engine will roll-forward
   62  * through the graph, blindly executing nodes until it reaches the end.
   63  * If a graph reaches the end, it is assumed to have completed successfully.
   64  *
   65  * A graph has only 1 Cmt node.
   66  *
   67  */
   70 /******************************************************************************
   71  *
   72  * The following wrappers map the standard DAG creation interface to the
   73  * DAG creation routines.  Additionally, these wrappers enable experimentation
   74  * with new DAG structures by providing an extra level of indirection, allowing
   75  * the DAG creation routines to be replaced at this single point.
   76  */
   79 void 
   80 rf_CreateNonRedundantWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
   81                               RF_DagHeader_t *dag_h, void *bp,
   82                               RF_RaidAccessFlags_t flags,
   83                               RF_AllocListElem_t *allocList,
   84                               RF_IoType_t type)
   85 {
   86         rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
   87                                  RF_IO_TYPE_WRITE);
   88 }
   90 void 
   91 rf_CreateRAID0WriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
   92                        RF_DagHeader_t *dag_h, void *bp,
   93                        RF_RaidAccessFlags_t flags,
   94                        RF_AllocListElem_t *allocList,
   95                        RF_IoType_t type)
   96 {
   97         rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
   98                                  RF_IO_TYPE_WRITE);
   99 }
  101 void 
  102 rf_CreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
  103                        RF_DagHeader_t *dag_h, void *bp,
  104                        RF_RaidAccessFlags_t flags,
  105                        RF_AllocListElem_t *allocList)
  106 {
  107         /* "normal" rollaway */
  108         rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, 
  109                                      allocList, &rf_xorFuncs, NULL);
  110 }
  112 void 
  113 rf_CreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
  114                        RF_DagHeader_t *dag_h, void *bp,
  115                        RF_RaidAccessFlags_t flags,
  116                        RF_AllocListElem_t *allocList)
  117 {
  118         /* "normal" rollaway */
  119         rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, 
  120                                      allocList, 1, rf_RegularXorFunc, RF_TRUE);
  121 }
  124 /******************************************************************************
  125  *
  126  * DAG creation code begins here
  127  */
  130 /******************************************************************************
  131  *
  132  * creates a DAG to perform a large-write operation:
  133  *
  134  *           / Rod \           / Wnd \
  135  * H -- block- Rod - Xor - Cmt - Wnd --- T
  136  *           \ Rod /          \  Wnp /
  137  *                             \[Wnq]/
  138  *
  139  * The XOR node also does the Q calculation in the P+Q architecture.
  140  * All nodes are before the commit node (Cmt) are assumed to be atomic and
  141  * undoable - or - they make no changes to permanent state.
  142  *
  143  * Rod = read old data
  144  * Cmt = commit node
  145  * Wnp = write new parity
  146  * Wnd = write new data
  147  * Wnq = write new "q"
  148  * [] denotes optional segments in the graph
  149  *
  150  * Parameters:  raidPtr   - description of the physical array
  151  *              asmap     - logical & physical addresses for this access
  152  *              bp        - buffer ptr (holds write data)
  153  *              flags     - general flags (e.g. disk locking)
  154  *              allocList - list of memory allocated in DAG creation
  155  *              nfaults   - number of faults array can tolerate
  156  *                          (equal to # redundancy units in stripe)
  157  *              redfuncs  - list of redundancy generating functions
  158  *
  159  *****************************************************************************/
  161 void 
  162 rf_CommonCreateLargeWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
  163                              RF_DagHeader_t *dag_h, void *bp,
  164                              RF_RaidAccessFlags_t flags,
  165                              RF_AllocListElem_t *allocList,
  166                              int nfaults, int (*redFunc) (RF_DagNode_t *),
  167                              int allowBufferRecycle)
  168 {
  169         RF_DagNode_t *wndNodes, *rodNodes, *xorNode, *wnpNode, *tmpNode;
  170         RF_DagNode_t *wnqNode, *blockNode, *commitNode, *termNode;
  171         int     nWndNodes, nRodNodes, i, nodeNum, asmNum;
  172         RF_AccessStripeMapHeader_t *new_asm_h[2];
  173         RF_StripeNum_t parityStripeID;
  174         char   *sosBuffer, *eosBuffer;
  175         RF_ReconUnitNum_t which_ru;
  176         RF_RaidLayout_t *layoutPtr;
  177         RF_PhysDiskAddr_t *pda;
  179         layoutPtr = &(raidPtr->Layout);
  180         parityStripeID = rf_RaidAddressToParityStripeID(layoutPtr, 
  181                                                         asmap->raidAddress,
  182                                                         &which_ru);
  184 #if RF_DEBUG_DAG
  185         if (rf_dagDebug) {
  186                 printf("[Creating large-write DAG]\n");
  187         }
  188 #endif
  189         dag_h->creator = "LargeWriteDAG";
  191         dag_h->numCommitNodes = 1;
  192         dag_h->numCommits = 0;
  193         dag_h->numSuccedents = 1;
  195         /* alloc the nodes: Wnd, xor, commit, block, term, and  Wnp */
  196         nWndNodes = asmap->numStripeUnitsAccessed;
  198         for (i = 0; i < nWndNodes; i++) {
  199                 tmpNode = rf_AllocDAGNode();
  200                 tmpNode->list_next = dag_h->nodes;
  201                 dag_h->nodes = tmpNode;
  202         }
  203         wndNodes = dag_h->nodes;
  205         xorNode = rf_AllocDAGNode();
  206         xorNode->list_next = dag_h->nodes;
  207         dag_h->nodes = xorNode;
  209         wnpNode = rf_AllocDAGNode();
  210         wnpNode->list_next = dag_h->nodes;
  211         dag_h->nodes = wnpNode;
  213         blockNode = rf_AllocDAGNode();
  214         blockNode->list_next = dag_h->nodes;
  215         dag_h->nodes = blockNode;
  217         commitNode = rf_AllocDAGNode();
  218         commitNode->list_next = dag_h->nodes;
  219         dag_h->nodes = commitNode;
  221         termNode = rf_AllocDAGNode();
  222         termNode->list_next = dag_h->nodes;
  223         dag_h->nodes = termNode;
  225 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  226         if (nfaults == 2) {
  227                 wnqNode = rf_AllocDAGNode();
  228         } else {
  229 #endif
  230                 wnqNode = NULL;
  231 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  232         }
  233 #endif
  234         rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, 
  235                                         new_asm_h, &nRodNodes, &sosBuffer, 
  236                                         &eosBuffer, allocList);
  237         if (nRodNodes > 0) {
  238                 for (i = 0; i < nRodNodes; i++) {
  239                         tmpNode = rf_AllocDAGNode();
  240                         tmpNode->list_next = dag_h->nodes;
  241                         dag_h->nodes = tmpNode;
  242                 }
  243                 rodNodes = dag_h->nodes;
  244         } else {
  245                 rodNodes = NULL;
  246         }
  248         /* begin node initialization */
  249         if (nRodNodes > 0) {
  250                 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, 
  251                             rf_NullNodeUndoFunc, NULL, nRodNodes, 0, 0, 0, 
  252                             dag_h, "Nil", allocList);
  253         } else {
  254                 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, 
  255                             rf_NullNodeUndoFunc, NULL, 1, 0, 0, 0, 
  256                             dag_h, "Nil", allocList);
  257         }
  259         rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, 
  260                     rf_NullNodeUndoFunc, NULL, nWndNodes + nfaults, 1, 0, 0, 
  261                     dag_h, "Cmt", allocList);
  262         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, 
  263                     rf_TerminateUndoFunc, NULL, 0, nWndNodes + nfaults, 0, 0, 
  264                     dag_h, "Trm", allocList);
  266         /* initialize the Rod nodes */
  267         tmpNode = rodNodes;
  268         for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
  269                 if (new_asm_h[asmNum]) {
  270                         pda = new_asm_h[asmNum]->stripeMap->physInfo;
  271                         while (pda) {
  272                                 rf_InitNode(tmpNode, rf_wait, 
  273                                             RF_FALSE, rf_DiskReadFunc,
  274                                             rf_DiskReadUndoFunc, 
  275                                             rf_GenericWakeupFunc, 
  276                                             1, 1, 4, 0, dag_h,
  277                                             "Rod", allocList);
  278                                 tmpNode->params[0].p = pda;
  279                                 tmpNode->params[1].p = pda->bufPtr;
  280                                 tmpNode->params[2].v = parityStripeID;
  281                                 tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  282                                     which_ru);
  283                                 nodeNum++;
  284                                 pda = pda->next;
  285                                 tmpNode = tmpNode->list_next;
  286                         }
  287                 }
  288         }
  289         RF_ASSERT(nodeNum == nRodNodes);
  291         /* initialize the wnd nodes */
  292         pda = asmap->physInfo;
  293         tmpNode = wndNodes;
  294         for (i = 0; i < nWndNodes; i++) {
  295                 rf_InitNode(tmpNode, rf_wait, RF_FALSE, 
  296                             rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
  297                             rf_GenericWakeupFunc, 1, 1, 4, 0, 
  298                             dag_h, "Wnd", allocList);
  299                 RF_ASSERT(pda != NULL);
  300                 tmpNode->params[0].p = pda;
  301                 tmpNode->params[1].p = pda->bufPtr;
  302                 tmpNode->params[2].v = parityStripeID;
  303                 tmpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  304                 pda = pda->next;
  305                 tmpNode = tmpNode->list_next;
  306         }
  308         /* initialize the redundancy node */
  309         if (nRodNodes > 0) {
  310                 rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, 
  311                             rf_NullNodeUndoFunc, NULL, 1,
  312                             nRodNodes, 2 * (nWndNodes + nRodNodes) + 1, 
  313                             nfaults, dag_h, "Xr ", allocList);
  314         } else {
  315                 rf_InitNode(xorNode, rf_wait, RF_FALSE, redFunc, 
  316                             rf_NullNodeUndoFunc, NULL, 1,
  317                             1, 2 * (nWndNodes + nRodNodes) + 1, 
  318                             nfaults, dag_h, "Xr ", allocList);
  319         }
  320         xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
  321         tmpNode = wndNodes;
  322         for (i = 0; i < nWndNodes; i++) {
  323                 /* pda */
  324                 xorNode->params[2 * i + 0] = tmpNode->params[0];
  325                 /* buf ptr */ 
  326                 xorNode->params[2 * i + 1] = tmpNode->params[1];
  327                 tmpNode = tmpNode->list_next;
  328         }
  329         tmpNode = rodNodes;
  330         for (i = 0; i < nRodNodes; i++) {
  331                 /* pda */
  332                 xorNode->params[2 * (nWndNodes + i) + 0] = tmpNode->params[0];
  333                 /* buf ptr */
  334                 xorNode->params[2 * (nWndNodes + i) + 1] = tmpNode->params[1];
  335                 tmpNode = tmpNode->list_next;
  336         }
  337         /* xor node needs to get at RAID information */
  338         xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;
  340         /*
  341          * Look for an Rod node that reads a complete SU. If none,
  342          * alloc a buffer to receive the parity info. Note that we
  343          * can't use a new data buffer because it will not have gotten
  344          * written when the xor occurs.  */
  345         if (allowBufferRecycle) {
  346                 tmpNode = rodNodes;
  347                 for (i = 0; i < nRodNodes; i++) {
  348                         if (((RF_PhysDiskAddr_t *) tmpNode->params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
  349                                 break;
  350                         tmpNode = tmpNode->list_next;
  351                 }
  352         }
  353         if ((!allowBufferRecycle) || (i == nRodNodes)) {
  354                 xorNode->results[0] = rf_AllocBuffer(raidPtr, dag_h, rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit));
  355         } else {
  356                 /* this works because the only way we get here is if
  357                    allowBufferRecycle is true and we went through the
  358                    above for loop, and exited via the break before
  359                    i==nRodNodes was true.  That means tmpNode will
  360                    still point to a valid node -- the one we want for
  361                    here! */
  362                 xorNode->results[0] = tmpNode->params[1].p;
  363         }
  365         /* initialize the Wnp node */
  366         rf_InitNode(wnpNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, 
  367                     rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, 
  368                     dag_h, "Wnp", allocList);
  369         wnpNode->params[0].p = asmap->parityInfo;
  370         wnpNode->params[1].p = xorNode->results[0];
  371         wnpNode->params[2].v = parityStripeID;
  372         wnpNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  373         /* parityInfo must describe entire parity unit */
  374         RF_ASSERT(asmap->parityInfo->next == NULL);
  376 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  377         if (nfaults == 2) {
  378                 /*
  379                  * We never try to recycle a buffer for the Q calcuation
  380                  * in addition to the parity. This would cause two buffers
  381                  * to get smashed during the P and Q calculation, guaranteeing
  382                  * one would be wrong.
  383                  */
  384                 RF_MallocAndAdd(xorNode->results[1],
  385                                 rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit),
  386                                 (void *), allocList);
  387                 rf_InitNode(wnqNode, rf_wait, RF_FALSE, rf_DiskWriteFunc, 
  388                             rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 
  389                             1, 1, 4, 0, dag_h, "Wnq", allocList);
  390                 wnqNode->params[0].p = asmap->qInfo;
  391                 wnqNode->params[1].p = xorNode->results[1];
  392                 wnqNode->params[2].v = parityStripeID;
  393                 wnqNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  394                 /* parityInfo must describe entire parity unit */
  395                 RF_ASSERT(asmap->parityInfo->next == NULL);
  396         }
  397 #endif
  398         /*
  399          * Connect nodes to form graph.
  400          */
  402         /* connect dag header to block node */
  403         RF_ASSERT(blockNode->numAntecedents == 0);
  404         dag_h->succedents[0] = blockNode;
  406         if (nRodNodes > 0) {
  407                 /* connect the block node to the Rod nodes */
  408                 RF_ASSERT(blockNode->numSuccedents == nRodNodes);
  409                 RF_ASSERT(xorNode->numAntecedents == nRodNodes);
  410                 tmpNode = rodNodes;
  411                 for (i = 0; i < nRodNodes; i++) {
  412                         RF_ASSERT(tmpNode->numAntecedents == 1);
  413                         blockNode->succedents[i] = tmpNode;
  414                         tmpNode->antecedents[0] = blockNode;
  415                         tmpNode->antType[0] = rf_control;
  417                         /* connect the Rod nodes to the Xor node */
  418                         RF_ASSERT(tmpNode->numSuccedents == 1);
  419                         tmpNode->succedents[0] = xorNode;
  420                         xorNode->antecedents[i] = tmpNode;
  421                         xorNode->antType[i] = rf_trueData;
  422                         tmpNode = tmpNode->list_next;
  423                 }
  424         } else {
  425                 /* connect the block node to the Xor node */
  426                 RF_ASSERT(blockNode->numSuccedents == 1);
  427                 RF_ASSERT(xorNode->numAntecedents == 1);
  428                 blockNode->succedents[0] = xorNode;
  429                 xorNode->antecedents[0] = blockNode;
  430                 xorNode->antType[0] = rf_control;
  431         }
  433         /* connect the xor node to the commit node */
  434         RF_ASSERT(xorNode->numSuccedents == 1);
  435         RF_ASSERT(commitNode->numAntecedents == 1);
  436         xorNode->succedents[0] = commitNode;
  437         commitNode->antecedents[0] = xorNode;
  438         commitNode->antType[0] = rf_control;
  440         /* connect the commit node to the write nodes */
  441         RF_ASSERT(commitNode->numSuccedents == nWndNodes + nfaults);
  442         tmpNode = wndNodes;
  443         for (i = 0; i < nWndNodes; i++) {
  444                 RF_ASSERT(wndNodes->numAntecedents == 1);
  445                 commitNode->succedents[i] = tmpNode;
  446                 tmpNode->antecedents[0] = commitNode;
  447                 tmpNode->antType[0] = rf_control;
  448                 tmpNode = tmpNode->list_next;
  449         }
  450         RF_ASSERT(wnpNode->numAntecedents == 1);
  451         commitNode->succedents[nWndNodes] = wnpNode;
  452         wnpNode->antecedents[0] = commitNode;
  453         wnpNode->antType[0] = rf_trueData;
  454 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  455         if (nfaults == 2) {
  456                 RF_ASSERT(wnqNode->numAntecedents == 1);
  457                 commitNode->succedents[nWndNodes + 1] = wnqNode;
  458                 wnqNode->antecedents[0] = commitNode;
  459                 wnqNode->antType[0] = rf_trueData;
  460         }
  461 #endif
  462         /* connect the write nodes to the term node */
  463         RF_ASSERT(termNode->numAntecedents == nWndNodes + nfaults);
  464         RF_ASSERT(termNode->numSuccedents == 0);
  465         tmpNode = wndNodes;
  466         for (i = 0; i < nWndNodes; i++) {
  467                 RF_ASSERT(wndNodes->numSuccedents == 1);
  468                 tmpNode->succedents[0] = termNode;
  469                 termNode->antecedents[i] = tmpNode;
  470                 termNode->antType[i] = rf_control;
  471                 tmpNode = tmpNode->list_next;
  472         }
  473         RF_ASSERT(wnpNode->numSuccedents == 1);
  474         wnpNode->succedents[0] = termNode;
  475         termNode->antecedents[nWndNodes] = wnpNode;
  476         termNode->antType[nWndNodes] = rf_control;
  477 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  478         if (nfaults == 2) {
  479                 RF_ASSERT(wnqNode->numSuccedents == 1);
  480                 wnqNode->succedents[0] = termNode;
  481                 termNode->antecedents[nWndNodes + 1] = wnqNode;
  482                 termNode->antType[nWndNodes + 1] = rf_control;
  483         }
  484 #endif
  485 }
  486 /******************************************************************************
  487  *
  488  * creates a DAG to perform a small-write operation (either raid 5 or pq),
  489  * which is as follows:
  490  *
  491  * Hdr -> Nil -> Rop -> Xor -> Cmt ----> Wnp [Unp] --> Trm
  492  *            \- Rod X      /     \----> Wnd [Und]-/
  493  *           [\- Rod X     /       \---> Wnd [Und]-/]
  494  *           [\- Roq -> Q /         \--> Wnq [Unq]-/]
  495  *
  496  * Rop = read old parity
  497  * Rod = read old data
  498  * Roq = read old "q"
  499  * Cmt = commit node
  500  * Und = unlock data disk
  501  * Unp = unlock parity disk
  502  * Unq = unlock q disk
  503  * Wnp = write new parity
  504  * Wnd = write new data
  505  * Wnq = write new "q"
  506  * [ ] denotes optional segments in the graph
  507  *
  508  * Parameters:  raidPtr   - description of the physical array
  509  *              asmap     - logical & physical addresses for this access
  510  *              bp        - buffer ptr (holds write data)
  511  *              flags     - general flags (e.g. disk locking)
  512  *              allocList - list of memory allocated in DAG creation
  513  *              pfuncs    - list of parity generating functions
  514  *              qfuncs    - list of q generating functions
  515  *
  516  * A null qfuncs indicates single fault tolerant
  517  *****************************************************************************/
  519 void 
  520 rf_CommonCreateSmallWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
  521                              RF_DagHeader_t *dag_h, void *bp,
  522                              RF_RaidAccessFlags_t flags,
  523                              RF_AllocListElem_t *allocList,
  524                              const RF_RedFuncs_t *pfuncs,
  525                              const RF_RedFuncs_t *qfuncs)
  526 {
  527         RF_DagNode_t *readDataNodes, *readParityNodes, *readQNodes, *termNode;
  528         RF_DagNode_t *tmpNode, *tmpreadDataNode, *tmpreadParityNode;
  529         RF_DagNode_t *xorNodes, *qNodes, *blockNode, *commitNode;
  530         RF_DagNode_t *writeDataNodes, *writeParityNodes, *writeQNodes;
  531         RF_DagNode_t *tmpxorNode, *tmpqNode, *tmpwriteDataNode, *tmpreadQNode;
  532         RF_DagNode_t *tmpwriteParityNode;
  533 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  534         RF_DagNode_t *tmpwriteQNode;
  535 #endif
  536         int     i, j, nNodes, totalNumNodes;
  537         RF_ReconUnitNum_t which_ru;
  538         int     (*func) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
  539         int     (*qfunc) (RF_DagNode_t *);
  540         int     numDataNodes, numParityNodes;
  541         RF_StripeNum_t parityStripeID;
  542         RF_PhysDiskAddr_t *pda;
  543         char   *name, *qname;
  544         long    nfaults;
  546         nfaults = qfuncs ? 2 : 1;
  548         parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
  549             asmap->raidAddress, &which_ru);
  550         pda = asmap->physInfo;
  551         numDataNodes = asmap->numStripeUnitsAccessed;
  552         numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
  554 #if RF_DEBUG_DAG
  555         if (rf_dagDebug) {
  556                 printf("[Creating small-write DAG]\n");
  557         }
  558 #endif
  559         RF_ASSERT(numDataNodes > 0);
  560         dag_h->creator = "SmallWriteDAG";
  562         dag_h->numCommitNodes = 1;
  563         dag_h->numCommits = 0;
  564         dag_h->numSuccedents = 1;
  566         /*
  567          * DAG creation occurs in four steps:
  568          * 1. count the number of nodes in the DAG
  569          * 2. create the nodes
  570          * 3. initialize the nodes
  571          * 4. connect the nodes
  572          */
  574         /*
  575          * Step 1. compute number of nodes in the graph
  576          */
  578         /* number of nodes: a read and write for each data unit a
  579          * redundancy computation node for each parity node (nfaults *
  580          * nparity) a read and write for each parity unit a block and
  581          * commit node (2) a terminate node if atomic RMW an unlock
  582          * node for each data unit, redundancy unit */
  583         totalNumNodes = (2 * numDataNodes) + (nfaults * numParityNodes)
  584             + (nfaults * 2 * numParityNodes) + 3;
  585         /*
  586          * Step 2. create the nodes
  587          */
  589         blockNode = rf_AllocDAGNode();
  590         blockNode->list_next = dag_h->nodes;
  591         dag_h->nodes = blockNode;
  593         commitNode = rf_AllocDAGNode();
  594         commitNode->list_next = dag_h->nodes;
  595         dag_h->nodes = commitNode;
  597         for (i = 0; i < numDataNodes; i++) {
  598                 tmpNode = rf_AllocDAGNode();
  599                 tmpNode->list_next = dag_h->nodes;
  600                 dag_h->nodes = tmpNode;
  601         }
  602         readDataNodes = dag_h->nodes;
  604         for (i = 0; i < numParityNodes; i++) {
  605                 tmpNode = rf_AllocDAGNode();
  606                 tmpNode->list_next = dag_h->nodes;
  607                 dag_h->nodes = tmpNode;
  608         }
  609         readParityNodes = dag_h->nodes;
  611         for (i = 0; i < numDataNodes; i++) {
  612                 tmpNode = rf_AllocDAGNode();
  613                 tmpNode->list_next = dag_h->nodes;
  614                 dag_h->nodes = tmpNode;
  615         }
  616         writeDataNodes = dag_h->nodes;
  618         for (i = 0; i < numParityNodes; i++) {
  619                 tmpNode = rf_AllocDAGNode();
  620                 tmpNode->list_next = dag_h->nodes;
  621                 dag_h->nodes = tmpNode;
  622         }
  623         writeParityNodes = dag_h->nodes;
  625         for (i = 0; i < numParityNodes; i++) {
  626                 tmpNode = rf_AllocDAGNode();
  627                 tmpNode->list_next = dag_h->nodes;
  628                 dag_h->nodes = tmpNode;
  629         }
  630         xorNodes = dag_h->nodes;
  632         termNode = rf_AllocDAGNode();
  633         termNode->list_next = dag_h->nodes;
  634         dag_h->nodes = termNode;
  636 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  637         if (nfaults == 2) {
  638                 for (i = 0; i < numParityNodes; i++) {
  639                         tmpNode = rf_AllocDAGNode();
  640                         tmpNode->list_next = dag_h->nodes;
  641                         dag_h->nodes = tmpNode;
  642                 }
  643                 readQNodes = dag_h->nodes;
  645                 for (i = 0; i < numParityNodes; i++) {
  646                         tmpNode = rf_AllocDAGNode();
  647                         tmpNode->list_next = dag_h->nodes;
  648                         dag_h->nodes = tmpNode;
  649                 }
  650                 writeQNodes = dag_h->nodes;
  652                 for (i = 0; i < numParityNodes; i++) {
  653                         tmpNode = rf_AllocDAGNode();
  654                         tmpNode->list_next = dag_h->nodes;
  655                         dag_h->nodes = tmpNode;
  656                 }
  657                 qNodes = dag_h->nodes;
  658         } else {
  659 #endif
  660                 readQNodes = writeQNodes = qNodes = NULL;
  661 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  662         }
  663 #endif
  665         /*
  666          * Step 3. initialize the nodes
  667          */
  668         /* initialize block node (Nil) */
  669         nNodes = numDataNodes + (nfaults * numParityNodes);
  670         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, 
  671                     rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, 
  672                     dag_h, "Nil", allocList);
  674         /* initialize commit node (Cmt) */
  675         rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, 
  676                     rf_NullNodeUndoFunc, NULL, nNodes, 
  677                     (nfaults * numParityNodes), 0, 0, dag_h, "Cmt", allocList);
  679         /* initialize terminate node (Trm) */
  680         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, 
  681                     rf_TerminateUndoFunc, NULL, 0, nNodes, 0, 0, 
  682                     dag_h, "Trm", allocList);
  684         /* initialize nodes which read old data (Rod) */
  685         tmpreadDataNode = readDataNodes;
  686         for (i = 0; i < numDataNodes; i++) {
  687                 rf_InitNode(tmpreadDataNode, rf_wait, RF_FALSE, 
  688                             rf_DiskReadFunc, rf_DiskReadUndoFunc,
  689                             rf_GenericWakeupFunc, (nfaults * numParityNodes), 
  690                             1, 4, 0, dag_h, "Rod", allocList);
  691                 RF_ASSERT(pda != NULL);
  692                 /* physical disk addr desc */
  693                 tmpreadDataNode->params[0].p = pda;
  694                 /* buffer to hold old data */
  695                 tmpreadDataNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
  696                 tmpreadDataNode->params[2].v = parityStripeID;
  697                 tmpreadDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  698                     which_ru);
  699                 pda = pda->next;
  700                 for (j = 0; j < tmpreadDataNode->numSuccedents; j++) {
  701                         tmpreadDataNode->propList[j] = NULL;
  702                 }
  703                 tmpreadDataNode = tmpreadDataNode->list_next;
  704         }
  706         /* initialize nodes which read old parity (Rop) */
  707         pda = asmap->parityInfo;
  708         i = 0;
  709         tmpreadParityNode = readParityNodes;
  710         for (i = 0; i < numParityNodes; i++) {
  711                 RF_ASSERT(pda != NULL);
  712                 rf_InitNode(tmpreadParityNode, rf_wait, RF_FALSE, 
  713                             rf_DiskReadFunc, rf_DiskReadUndoFunc,
  714                             rf_GenericWakeupFunc, numParityNodes, 1, 4, 0, 
  715                             dag_h, "Rop", allocList);
  716                 tmpreadParityNode->params[0].p = pda;
  717                 /* buffer to hold old parity */
  718                 tmpreadParityNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda->numSector << raidPtr->logBytesPerSector);
  719                 tmpreadParityNode->params[2].v = parityStripeID;
  720                 tmpreadParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  721                     which_ru);
  722                 pda = pda->next;
  723                 for (j = 0; j < tmpreadParityNode->numSuccedents; j++) {
  724                         tmpreadParityNode->propList[0] = NULL;
  725                 }
  726                 tmpreadParityNode = tmpreadParityNode->list_next;
  727         }
  729 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  730         /* initialize nodes which read old Q (Roq) */
  731         if (nfaults == 2) {
  732                 pda = asmap->qInfo;
  733                 tmpreadQNode = readQNodes;
  734                 for (i = 0; i < numParityNodes; i++) {
  735                         RF_ASSERT(pda != NULL);
  736                         rf_InitNode(tmpreadQNode, rf_wait, RF_FALSE, 
  737                                     rf_DiskReadFunc, rf_DiskReadUndoFunc,
  738                                     rf_GenericWakeupFunc, numParityNodes, 
  739                                     1, 4, 0, dag_h, "Roq", allocList);
  740                         tmpreadQNode->params[0].p = pda;
  741                         /* buffer to hold old Q */
  742                         tmpreadQNode->params[1].p = rf_AllocBuffer(raidPtr, dag_h,
  743                                                                    pda->numSector << raidPtr->logBytesPerSector);
  744                         tmpreadQNode->params[2].v = parityStripeID;
  745                         tmpreadQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  746                             which_ru);
  747                         pda = pda->next;
  748                         for (j = 0; j < tmpreadQNode->numSuccedents; j++) {
  749                                 tmpreadQNode->propList[0] = NULL;
  750                         }
  751                         tmpreadQNode = tmpreadQNode->list_next;
  752                 }
  753         }
  754 #endif
  755         /* initialize nodes which write new data (Wnd) */
  756         pda = asmap->physInfo;
  757         tmpwriteDataNode = writeDataNodes;
  758         for (i = 0; i < numDataNodes; i++) {
  759                 RF_ASSERT(pda != NULL);
  760                 rf_InitNode(tmpwriteDataNode, rf_wait, RF_FALSE, 
  761                             rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 
  762                             rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
  763                             "Wnd", allocList);
  764                 /* physical disk addr desc */
  765                 tmpwriteDataNode->params[0].p = pda;
  766                 /* buffer holding new data to be written */
  767                 tmpwriteDataNode->params[1].p = pda->bufPtr;
  768                 tmpwriteDataNode->params[2].v = parityStripeID;
  769                 tmpwriteDataNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  770                     which_ru);
  771                 pda = pda->next;
  772                 tmpwriteDataNode = tmpwriteDataNode->list_next;
  773         }
  775         /*
  776          * Initialize nodes which compute new parity and Q.
  777          */
  778         /*
  779          * We use the simple XOR func in the double-XOR case, and when
  780          * we're accessing only a portion of one stripe unit. The
  781          * distinction between the two is that the regular XOR func
  782          * assumes that the targbuf is a full SU in size, and examines
  783          * the pda associated with the buffer to decide where within
  784          * the buffer to XOR the data, whereas the simple XOR func
  785          * just XORs the data into the start of the buffer.  */
  786         if ((numParityNodes == 2) || ((numDataNodes == 1)
  787                 && (asmap->totalSectorsAccessed < 
  788                     raidPtr->Layout.sectorsPerStripeUnit))) {
  789                 func = pfuncs->simple;
  790                 undoFunc = rf_NullNodeUndoFunc;
  791                 name = pfuncs->SimpleName;
  792                 if (qfuncs) {
  793                         qfunc = qfuncs->simple;
  794                         qname = qfuncs->SimpleName;
  795                 } else {
  796                         qfunc = NULL;
  797                         qname = NULL;
  798                 }
  799         } else {
  800                 func = pfuncs->regular;
  801                 undoFunc = rf_NullNodeUndoFunc;
  802                 name = pfuncs->RegularName;
  803                 if (qfuncs) {
  804                         qfunc = qfuncs->regular;
  805                         qname = qfuncs->RegularName;
  806                 } else {
  807                         qfunc = NULL;
  808                         qname = NULL;
  809                 }
  810         }
  811         /*
  812          * Initialize the xor nodes: params are {pda,buf}
  813          * from {Rod,Wnd,Rop} nodes, and raidPtr
  814          */
  815         if (numParityNodes == 2) {
  816                 /* double-xor case */
  817                 tmpxorNode = xorNodes;
  818                 tmpreadDataNode = readDataNodes;
  819                 tmpreadParityNode = readParityNodes;
  820                 tmpwriteDataNode = writeDataNodes;
  821                 tmpqNode = qNodes;
  822                 tmpreadQNode = readQNodes;
  823                 for (i = 0; i < numParityNodes; i++) {
  824                         /* note: no wakeup func for xor */
  825                         rf_InitNode(tmpxorNode, rf_wait, RF_FALSE, func, 
  826                                     undoFunc, NULL, 1, 
  827                                     (numDataNodes + numParityNodes), 
  828                                     7, 1, dag_h, name, allocList);
  829                         tmpxorNode->flags |= RF_DAGNODE_FLAG_YIELD;
  830                         tmpxorNode->params[0] = tmpreadDataNode->params[0];
  831                         tmpxorNode->params[1] = tmpreadDataNode->params[1];
  832                         tmpxorNode->params[2] = tmpreadParityNode->params[0];
  833                         tmpxorNode->params[3] = tmpreadParityNode->params[1];
  834                         tmpxorNode->params[4] = tmpwriteDataNode->params[0];
  835                         tmpxorNode->params[5] = tmpwriteDataNode->params[1];
  836                         tmpxorNode->params[6].p = raidPtr;
  837                         /* use old parity buf as target buf */
  838                         tmpxorNode->results[0] = tmpreadParityNode->params[1].p;
  839 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  840                         if (nfaults == 2) {
  841                                 /* note: no wakeup func for qor */
  842                                 rf_InitNode(tmpqNode, rf_wait, RF_FALSE, 
  843                                             qfunc, undoFunc, NULL, 1,
  844                                             (numDataNodes + numParityNodes), 
  845                                             7, 1, dag_h, qname, allocList);
  846                                 tmpqNode->params[0] = tmpreadDataNode->params[0];
  847                                 tmpqNode->params[1] = tmpreadDataNode->params[1];
  848                                 tmpqNode->params[2] = tmpreadQNode->.params[0];
  849                                 tmpqNode->params[3] = tmpreadQNode->params[1];
  850                                 tmpqNode->params[4] = tmpwriteDataNode->params[0];
  851                                 tmpqNode->params[5] = tmpwriteDataNode->params[1];
  852                                 tmpqNode->params[6].p = raidPtr;
  853                                 /* use old Q buf as target buf */
  854                                 tmpqNode->results[0] = tmpreadQNode->params[1].p;
  855                                 tmpqNode = tmpqNode->list_next;
  856                                 tmpreadQNodes = tmpreadQNodes->list_next;
  857                         }
  858 #endif
  859                         tmpxorNode = tmpxorNode->list_next;
  860                         tmpreadDataNode = tmpreadDataNode->list_next;
  861                         tmpreadParityNode = tmpreadParityNode->list_next;
  862                         tmpwriteDataNode = tmpwriteDataNode->list_next;
  863                 }
  864         } else {
  865                 /* there is only one xor node in this case */
  866                 rf_InitNode(xorNodes, rf_wait, RF_FALSE, func, 
  867                             undoFunc, NULL, 1, (numDataNodes + numParityNodes),
  868                             (2 * (numDataNodes + numDataNodes + 1) + 1), 1, 
  869                             dag_h, name, allocList);
  870                 xorNodes->flags |= RF_DAGNODE_FLAG_YIELD;
  871                 tmpreadDataNode = readDataNodes;
  872                 for (i = 0; i < numDataNodes; i++) { /* used to be"numDataNodes + 1" until we factored 
  873                                                         out the "+1" into the "deal with Rop separately below */
  874                         /* set up params related to Rod nodes */
  875                         xorNodes->params[2 * i + 0] = tmpreadDataNode->params[0];       /* pda */
  876                         xorNodes->params[2 * i + 1] = tmpreadDataNode->params[1];       /* buffer ptr */
  877                         tmpreadDataNode = tmpreadDataNode->list_next;
  878                 }
  879                 /* deal with Rop separately */
  880                 xorNodes->params[2 * numDataNodes + 0] = readParityNodes->params[0];    /* pda */
  881                 xorNodes->params[2 * numDataNodes + 1] = readParityNodes->params[1];    /* buffer ptr */
  883                 tmpwriteDataNode = writeDataNodes;
  884                 for (i = 0; i < numDataNodes; i++) {
  885                         /* set up params related to Wnd and Wnp nodes */
  886                         xorNodes->params[2 * (numDataNodes + 1 + i) + 0] =      /* pda */
  887                             tmpwriteDataNode->params[0];
  888                         xorNodes->params[2 * (numDataNodes + 1 + i) + 1] =      /* buffer ptr */
  889                             tmpwriteDataNode->params[1];
  890                         tmpwriteDataNode = tmpwriteDataNode->list_next;
  891                 }
  892                 /* xor node needs to get at RAID information */
  893                 xorNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
  894                 xorNodes->results[0] = readParityNodes->params[1].p;
  895 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  896                 if (nfaults == 2) {
  897                         rf_InitNode(qNodes, rf_wait, RF_FALSE, qfunc, 
  898                                     undoFunc, NULL, 1,
  899                                     (numDataNodes + numParityNodes),
  900                                     (2 * (numDataNodes + numDataNodes + 1) + 1), 1,
  901                                     dag_h, qname, allocList);
  902                         tmpreadDataNode = readDataNodes;
  903                         for (i = 0; i < numDataNodes; i++) {
  904                                 /* set up params related to Rod */
  905                                 qNodes->params[2 * i + 0] = tmpreadDataNode->params[0]; /* pda */
  906                                 qNodes->params[2 * i + 1] = tmpreadDataNode->params[1]; /* buffer ptr */
  907                                 tmpreadDataNode = tmpreadDataNode->list_next;
  908                         }
  909                         /* and read old q */
  910                         qNodes->params[2 * numDataNodes + 0] =  /* pda */
  911                             readQNodes->params[0];
  912                         qNodes->params[2 * numDataNodes + 1] =  /* buffer ptr */
  913                             readQNodes->params[1];
  914                         tmpwriteDataNode = writeDataNodes;
  915                         for (i = 0; i < numDataNodes; i++) {
  916                                 /* set up params related to Wnd nodes */
  917                                 qNodes->params[2 * (numDataNodes + 1 + i) + 0] =        /* pda */
  918                                     tmpwriteDataNode->params[0];
  919                                 qNodes->params[2 * (numDataNodes + 1 + i) + 1] =        /* buffer ptr */
  920                                     tmpwriteDataNode->params[1];
  921                                 tmpwriteDataNode = tmpwriteDataNode->list_next;
  922                         }
  923                         /* xor node needs to get at RAID information */
  924                         qNodes->params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;
  925                         qNodes->results[0] = readQNodes->params[1].p;
  926                 }
  927 #endif
  928         }
  930         /* initialize nodes which write new parity (Wnp) */
  931         pda = asmap->parityInfo;
  932         tmpwriteParityNode = writeParityNodes;
  933         tmpxorNode = xorNodes;
  934         for (i = 0; i < numParityNodes; i++) {
  935                 rf_InitNode(tmpwriteParityNode, rf_wait, RF_FALSE, 
  936                             rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
  937                             rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
  938                             "Wnp", allocList);
  939                 RF_ASSERT(pda != NULL);
  940                 tmpwriteParityNode->params[0].p = pda;  /* param 1 (bufPtr)
  941                                                          * filled in by xor node */
  942                 tmpwriteParityNode->params[1].p = tmpxorNode->results[0];       /* buffer pointer for
  943                                                                                  * parity write
  944                                                                                  * operation */
  945                 tmpwriteParityNode->params[2].v = parityStripeID;
  946                 tmpwriteParityNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  947                     which_ru);
  948                 pda = pda->next;
  949                 tmpwriteParityNode = tmpwriteParityNode->list_next;
  950                 tmpxorNode = tmpxorNode->list_next;
  951         }
  953 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
  954         /* initialize nodes which write new Q (Wnq) */
  955         if (nfaults == 2) {
  956                 pda = asmap->qInfo;
  957                 tmpwriteQNode = writeQNodes;
  958                 tmpqNode = qNodes;
  959                 for (i = 0; i < numParityNodes; i++) {
  960                         rf_InitNode(tmpwriteQNode, rf_wait, RF_FALSE, 
  961                                     rf_DiskWriteFunc, rf_DiskWriteUndoFunc, 
  962                                     rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h,
  963                                     "Wnq", allocList);
  964                         RF_ASSERT(pda != NULL);
  965                         tmpwriteQNode->params[0].p = pda;       /* param 1 (bufPtr)
  966                                                                  * filled in by xor node */
  967                         tmpwriteQNode->params[1].p = tmpqNode->results[0];      /* buffer pointer for
  968                                                                                  * parity write
  969                                                                                  * operation */
  970                         tmpwriteQNode->params[2].v = parityStripeID;
  971                         tmpwriteQNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY,
  972                             which_ru);
  973                         pda = pda->next;
  974                         tmpwriteQNode = tmpwriteQNode->list_next;
  975                         tmpqNode = tmpqNode->list_next;
  976                 }
  977         }
  978 #endif
  979         /*
  980          * Step 4. connect the nodes.
  981          */
  983         /* connect header to block node */
  984         dag_h->succedents[0] = blockNode;
  986         /* connect block node to read old data nodes */
  987         RF_ASSERT(blockNode->numSuccedents == (numDataNodes + (numParityNodes * nfaults)));
  988         tmpreadDataNode = readDataNodes;
  989         for (i = 0; i < numDataNodes; i++) {
  990                 blockNode->succedents[i] = tmpreadDataNode;
  991                 RF_ASSERT(tmpreadDataNode->numAntecedents == 1);
  992                 tmpreadDataNode->antecedents[0] = blockNode;
  993                 tmpreadDataNode->antType[0] = rf_control;
  994                 tmpreadDataNode = tmpreadDataNode->list_next;
  995         }
  997         /* connect block node to read old parity nodes */
  998         tmpreadParityNode = readParityNodes;
  999         for (i = 0; i < numParityNodes; i++) {
 1000                 blockNode->succedents[numDataNodes + i] = tmpreadParityNode;
 1001                 RF_ASSERT(tmpreadParityNode->numAntecedents == 1);
 1002                 tmpreadParityNode->antecedents[0] = blockNode;
 1003                 tmpreadParityNode->antType[0] = rf_control;
 1004                 tmpreadParityNode = tmpreadParityNode->list_next;
 1005         }
 1007 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
 1008         /* connect block node to read old Q nodes */
 1009         if (nfaults == 2) {
 1010                 tmpreadQNode = readQNodes;
 1011                 for (i = 0; i < numParityNodes; i++) {
 1012                         blockNode->succedents[numDataNodes + numParityNodes + i] = tmpreadQNode;
 1013                         RF_ASSERT(tmpreadQNode->numAntecedents == 1);
 1014                         tmpreadQNode->antecedents[0] = blockNode;
 1015                         tmpreadQNode->antType[0] = rf_control;
 1016                         tmpreadQNode = tmpreadQNode->list_next;
 1017                 }
 1018         }
 1019 #endif
 1020         /* connect read old data nodes to xor nodes */
 1021         tmpreadDataNode = readDataNodes;
 1022         for (i = 0; i < numDataNodes; i++) {
 1023                 RF_ASSERT(tmpreadDataNode->numSuccedents == (nfaults * numParityNodes));
 1024                 tmpxorNode = xorNodes;
 1025                 for (j = 0; j < numParityNodes; j++) {
 1026                         RF_ASSERT(tmpxorNode->numAntecedents == numDataNodes + numParityNodes);
 1027                         tmpreadDataNode->succedents[j] = tmpxorNode;
 1028                         tmpxorNode->antecedents[i] = tmpreadDataNode;
 1029                         tmpxorNode->antType[i] = rf_trueData;
 1030                         tmpxorNode = tmpxorNode->list_next;
 1031                 }
 1032                 tmpreadDataNode = tmpreadDataNode->list_next;
 1033         }
 1035 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
 1036         /* connect read old data nodes to q nodes */
 1037         if (nfaults == 2) {
 1038                 tmpreadDataNode = readDataNodes;
 1039                 for (i = 0; i < numDataNodes; i++) {
 1040                         tmpqNode = qNodes;
 1041                         for (j = 0; j < numParityNodes; j++) {
 1042                                 RF_ASSERT(tmpqNode->numAntecedents == numDataNodes + numParityNodes);
 1043                                 tmpreadDataNode->succedents[numParityNodes + j] = tmpqNode;
 1044                                 tmpqNode->antecedents[i] = tmpreadDataNode;
 1045                                 tmpqNode->antType[i] = rf_trueData;
 1046                                 tmpqNode = tmpqNode->list_next;
 1047                         }
 1048                         tmpreadDataNode = tmpreadDataNode->list_next;
 1049                 }
 1050         }
 1051 #endif
 1052         /* connect read old parity nodes to xor nodes */
 1053         tmpreadParityNode = readParityNodes;
 1054         for (i = 0; i < numParityNodes; i++) {
 1055                 RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
 1056                 tmpxorNode = xorNodes;
 1057                 for (j = 0; j < numParityNodes; j++) {
 1058                         tmpreadParityNode->succedents[j] = tmpxorNode;
 1059                         tmpxorNode->antecedents[numDataNodes + i] = tmpreadParityNode;
 1060                         tmpxorNode->antType[numDataNodes + i] = rf_trueData;
 1061                         tmpxorNode = tmpxorNode->list_next;
 1062                 }
 1063                 tmpreadParityNode = tmpreadParityNode->list_next;
 1064         }
 1066 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
 1067         /* connect read old q nodes to q nodes */
 1068         if (nfaults == 2) {
 1069                 tmpreadParityNode = readParityNodes;
 1070                 tmpreadQNode = readQNodes;
 1071                 for (i = 0; i < numParityNodes; i++) {
 1072                         RF_ASSERT(tmpreadParityNode->numSuccedents == numParityNodes);
 1073                         tmpqNode = qNodes;
 1074                         for (j = 0; j < numParityNodes; j++) {
 1075                                 tmpreadQNode->succedents[j] = tmpqNode;
 1076                                 tmpqNode->antecedents[numDataNodes + i] = tmpreadQNodes;
 1077                                 tmpqNode->antType[numDataNodes + i] = rf_trueData;
 1078                                 tmpqNode = tmpqNode->list_next;
 1079                         }
 1080                         tmpreadParityNode = tmpreadParityNode->list_next;
 1081                         tmpreadQNode = tmpreadQNode->list_next;
 1082                 }
 1083         }
 1084 #endif
 1085         /* connect xor nodes to commit node */
 1086         RF_ASSERT(commitNode->numAntecedents == (nfaults * numParityNodes));
 1087         tmpxorNode = xorNodes;
 1088         for (i = 0; i < numParityNodes; i++) {
 1089                 RF_ASSERT(tmpxorNode->numSuccedents == 1);
 1090                 tmpxorNode->succedents[0] = commitNode;
 1091                 commitNode->antecedents[i] = tmpxorNode;
 1092                 commitNode->antType[i] = rf_control;
 1093                 tmpxorNode = tmpxorNode->list_next;
 1094         }
 1096 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
 1097         /* connect q nodes to commit node */
 1098         if (nfaults == 2) {
 1099                 tmpqNode = qNodes;
 1100                 for (i = 0; i < numParityNodes; i++) {
 1101                         RF_ASSERT(tmpqNode->numSuccedents == 1);
 1102                         tmpqNode->succedents[0] = commitNode;
 1103                         commitNode->antecedents[i + numParityNodes] = tmpqNode;
 1104                         commitNode->antType[i + numParityNodes] = rf_control;
 1105                         tmpqNode = tmpqNode->list_next;
 1106                 }
 1107         }
 1108 #endif
 1109         /* connect commit node to write nodes */
 1110         RF_ASSERT(commitNode->numSuccedents == (numDataNodes + (nfaults * numParityNodes)));
 1111         tmpwriteDataNode = writeDataNodes;
 1112         for (i = 0; i < numDataNodes; i++) {
 1113                 RF_ASSERT(tmpwriteDataNode->numAntecedents == 1);
 1114                 commitNode->succedents[i] = tmpwriteDataNode;
 1115                 tmpwriteDataNode->antecedents[0] = commitNode;
 1116                 tmpwriteDataNode->antType[0] = rf_trueData;
 1117                 tmpwriteDataNode = tmpwriteDataNode->list_next;
 1118         }
 1119         tmpwriteParityNode = writeParityNodes;
 1120         for (i = 0; i < numParityNodes; i++) {
 1121                 RF_ASSERT(tmpwriteParityNode->numAntecedents == 1);
 1122                 commitNode->succedents[i + numDataNodes] = tmpwriteParityNode;
 1123                 tmpwriteParityNode->antecedents[0] = commitNode;
 1124                 tmpwriteParityNode->antType[0] = rf_trueData;
 1125                 tmpwriteParityNode = tmpwriteParityNode->list_next;
 1126         }
 1127 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
 1128         if (nfaults == 2) {
 1129                 tmpwriteQNode = writeQNodes;
 1130                 for (i = 0; i < numParityNodes; i++) {
 1131                         RF_ASSERT(tmpwriteQNode->numAntecedents == 1);
 1132                         commitNode->succedents[i + numDataNodes + numParityNodes] = tmpwriteQNode;
 1133                         tmpwriteQNode->antecedents[0] = commitNode;
 1134                         tmpwriteQNode->antType[0] = rf_trueData;
 1135                         tmpwriteQNode = tmpwriteQNode->list_next;
 1136                 }
 1137         }
 1138 #endif
 1139         RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
 1140         RF_ASSERT(termNode->numSuccedents == 0);
 1141         tmpwriteDataNode = writeDataNodes;
 1142         for (i = 0; i < numDataNodes; i++) {
 1143                 /* connect write new data nodes to term node */
 1144                 RF_ASSERT(tmpwriteDataNode->numSuccedents == 1);
 1145                 RF_ASSERT(termNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
 1146                 tmpwriteDataNode->succedents[0] = termNode;
 1147                 termNode->antecedents[i] = tmpwriteDataNode;
 1148                 termNode->antType[i] = rf_control;
 1149                 tmpwriteDataNode = tmpwriteDataNode->list_next;
 1150         }
 1152         tmpwriteParityNode = writeParityNodes;
 1153         for (i = 0; i < numParityNodes; i++) {
 1154                 RF_ASSERT(tmpwriteParityNode->numSuccedents == 1);
 1155                 tmpwriteParityNode->succedents[0] = termNode;
 1156                 termNode->antecedents[numDataNodes + i] = tmpwriteParityNode;
 1157                 termNode->antType[numDataNodes + i] = rf_control;
 1158                 tmpwriteParityNode = tmpwriteParityNode->list_next;
 1159         }
 1161 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
 1162         if (nfaults == 2) {
 1163                 tmpwriteQNode = writeQNodes;
 1164                 for (i = 0; i < numParityNodes; i++) {
 1165                         RF_ASSERT(tmpwriteQNode->numSuccedents == 1);
 1166                         tmpwriteQNode->succedents[0] = termNode;
 1167                         termNode->antecedents[numDataNodes + numParityNodes + i] = tmpwriteQNode;
 1168                         termNode->antType[numDataNodes + numParityNodes + i] = rf_control;
 1169                         tmpwriteQNode = tmpwriteQNode->list_next;
 1170                 }
 1171         }
 1172 #endif
 1173 }
 1176 /******************************************************************************
 1177  * create a write graph (fault-free or degraded) for RAID level 1
 1178  *
 1179  * Hdr -> Commit -> Wpd -> Nil -> Trm
 1180  *               -> Wsd ->
 1181  *
 1182  * The "Wpd" node writes data to the primary copy in the mirror pair
 1183  * The "Wsd" node writes data to the secondary copy in the mirror pair
 1184  *
 1185  * Parameters:  raidPtr   - description of the physical array
 1186  *              asmap     - logical & physical addresses for this access
 1187  *              bp        - buffer ptr (holds write data)
 1188  *              flags     - general flags (e.g. disk locking)
 1189  *              allocList - list of memory allocated in DAG creation
 1190  *****************************************************************************/
 1192 void 
 1193 rf_CreateRaidOneWriteDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
 1194                          RF_DagHeader_t *dag_h, void *bp,
 1195                          RF_RaidAccessFlags_t flags,
 1196                          RF_AllocListElem_t *allocList)
 1197 {
 1198         RF_DagNode_t *unblockNode, *termNode, *commitNode;
 1199         RF_DagNode_t *wndNode, *wmirNode;
 1200         RF_DagNode_t *tmpNode, *tmpwndNode, *tmpwmirNode;
 1201         int     nWndNodes, nWmirNodes, i;
 1202         RF_ReconUnitNum_t which_ru;
 1203         RF_PhysDiskAddr_t *pda, *pdaP;
 1204         RF_StripeNum_t parityStripeID;
 1206         parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout),
 1207             asmap->raidAddress, &which_ru);
 1208 #if RF_DEBUG_DAG
 1209         if (rf_dagDebug) {
 1210                 printf("[Creating RAID level 1 write DAG]\n");
 1211         }
 1212 #endif
 1213         dag_h->creator = "RaidOneWriteDAG";
 1215         /* 2 implies access not SU aligned */
 1216         nWmirNodes = (asmap->parityInfo->next) ? 2 : 1;
 1217         nWndNodes = (asmap->physInfo->next) ? 2 : 1;
 1219         /* alloc the Wnd nodes and the Wmir node */
 1220         if (asmap->numDataFailed == 1)
 1221                 nWndNodes--;
 1222         if (asmap->numParityFailed == 1)
 1223                 nWmirNodes--;
 1225         /* total number of nodes = nWndNodes + nWmirNodes + (commit + unblock
 1226          * + terminator) */
 1227         for (i = 0; i < nWndNodes; i++) {
 1228                 tmpNode = rf_AllocDAGNode();
 1229                 tmpNode->list_next = dag_h->nodes;
 1230                 dag_h->nodes = tmpNode;
 1231         }
 1232         wndNode = dag_h->nodes;
 1234         for (i = 0; i < nWmirNodes; i++) {
 1235                 tmpNode = rf_AllocDAGNode();
 1236                 tmpNode->list_next = dag_h->nodes;
 1237                 dag_h->nodes = tmpNode;
 1238         }
 1239         wmirNode = dag_h->nodes;
 1241         commitNode = rf_AllocDAGNode();
 1242         commitNode->list_next = dag_h->nodes;
 1243         dag_h->nodes = commitNode;
 1245         unblockNode = rf_AllocDAGNode();
 1246         unblockNode->list_next = dag_h->nodes;
 1247         dag_h->nodes = unblockNode;
 1249         termNode = rf_AllocDAGNode();
 1250         termNode->list_next = dag_h->nodes;
 1251         dag_h->nodes = termNode;
 1253         /* this dag can commit immediately */
 1254         dag_h->numCommitNodes = 1;
 1255         dag_h->numCommits = 0;
 1256         dag_h->numSuccedents = 1;
 1258         /* initialize the commit, unblock, and term nodes */
 1259         rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, 
 1260                     rf_NullNodeUndoFunc, NULL, (nWndNodes + nWmirNodes), 
 1261                     0, 0, 0, dag_h, "Cmt", allocList);
 1262         rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, 
 1263                     rf_NullNodeUndoFunc, NULL, 1, (nWndNodes + nWmirNodes), 
 1264                     0, 0, dag_h, "Nil", allocList);
 1265         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, 
 1266                     rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, 
 1267                     dag_h, "Trm", allocList);
 1269         /* initialize the wnd nodes */
 1270         if (nWndNodes > 0) {
 1271                 pda = asmap->physInfo;
 1272                 tmpwndNode = wndNode;
 1273                 for (i = 0; i < nWndNodes; i++) {
 1274                         rf_InitNode(tmpwndNode, rf_wait, RF_FALSE, 
 1275                                     rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
 1276                                     rf_GenericWakeupFunc, 1, 1, 4, 0, 
 1277                                     dag_h, "Wpd", allocList);
 1278                         RF_ASSERT(pda != NULL);
 1279                         tmpwndNode->params[0].p = pda;
 1280                         tmpwndNode->params[1].p = pda->bufPtr;
 1281                         tmpwndNode->params[2].v = parityStripeID;
 1282                         tmpwndNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
 1283                         pda = pda->next;
 1284                         tmpwndNode = tmpwndNode->list_next;
 1285                 }
 1286                 RF_ASSERT(pda == NULL);
 1287         }
 1288         /* initialize the mirror nodes */
 1289         if (nWmirNodes > 0) {
 1290                 pda = asmap->physInfo;
 1291                 pdaP = asmap->parityInfo;
 1292                 tmpwmirNode = wmirNode;
 1293                 for (i = 0; i < nWmirNodes; i++) {
 1294                         rf_InitNode(tmpwmirNode, rf_wait, RF_FALSE, 
 1295                                     rf_DiskWriteFunc, rf_DiskWriteUndoFunc,
 1296                                     rf_GenericWakeupFunc, 1, 1, 4, 0, 
 1297                                     dag_h, "Wsd", allocList);
 1298                         RF_ASSERT(pda != NULL);
 1299                         tmpwmirNode->params[0].p = pdaP;
 1300                         tmpwmirNode->params[1].p = pda->bufPtr;
 1301                         tmpwmirNode->params[2].v = parityStripeID;
 1302                         tmpwmirNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
 1303                         pda = pda->next;
 1304                         pdaP = pdaP->next;
 1305                         tmpwmirNode = tmpwmirNode->list_next;
 1306                 }
 1307                 RF_ASSERT(pda == NULL);
 1308                 RF_ASSERT(pdaP == NULL);
 1309         }
 1310         /* link the header node to the commit node */
 1311         RF_ASSERT(dag_h->numSuccedents == 1);
 1312         RF_ASSERT(commitNode->numAntecedents == 0);
 1313         dag_h->succedents[0] = commitNode;
 1315         /* link the commit node to the write nodes */
 1316         RF_ASSERT(commitNode->numSuccedents == (nWndNodes + nWmirNodes));
 1317         tmpwndNode = wndNode;
 1318         for (i = 0; i < nWndNodes; i++) {
 1319                 RF_ASSERT(tmpwndNode->numAntecedents == 1);
 1320                 commitNode->succedents[i] = tmpwndNode;
 1321                 tmpwndNode->antecedents[0] = commitNode;
 1322                 tmpwndNode->antType[0] = rf_control;
 1323                 tmpwndNode = tmpwndNode->list_next;
 1324         }
 1325         tmpwmirNode = wmirNode;
 1326         for (i = 0; i < nWmirNodes; i++) {
 1327                 RF_ASSERT(tmpwmirNode->numAntecedents == 1);
 1328                 commitNode->succedents[i + nWndNodes] = tmpwmirNode;
 1329                 tmpwmirNode->antecedents[0] = commitNode;
 1330                 tmpwmirNode->antType[0] = rf_control;
 1331                 tmpwmirNode = tmpwmirNode->list_next;
 1332         }
 1334         /* link the write nodes to the unblock node */
 1335         RF_ASSERT(unblockNode->numAntecedents == (nWndNodes + nWmirNodes));
 1336         tmpwndNode = wndNode;
 1337         for (i = 0; i < nWndNodes; i++) {
 1338                 RF_ASSERT(tmpwndNode->numSuccedents == 1);
 1339                 tmpwndNode->succedents[0] = unblockNode;
 1340                 unblockNode->antecedents[i] = tmpwndNode;
 1341                 unblockNode->antType[i] = rf_control;
 1342                 tmpwndNode = tmpwndNode->list_next;
 1343         }
 1344         tmpwmirNode = wmirNode;
 1345         for (i = 0; i < nWmirNodes; i++) {
 1346                 RF_ASSERT(tmpwmirNode->numSuccedents == 1);
 1347                 tmpwmirNode->succedents[0] = unblockNode;
 1348                 unblockNode->antecedents[i + nWndNodes] = tmpwmirNode;
 1349                 unblockNode->antType[i + nWndNodes] = rf_control;
 1350                 tmpwmirNode = tmpwmirNode->list_next;
 1351         }
 1353         /* link the unblock node to the term node */
 1354         RF_ASSERT(unblockNode->numSuccedents == 1);
 1355         RF_ASSERT(termNode->numAntecedents == 1);
 1356         RF_ASSERT(termNode->numSuccedents == 0);
 1357         unblockNode->succedents[0] = termNode;
 1358         termNode->antecedents[0] = unblockNode;
 1359         termNode->antType[0] = rf_control;
 1360 }

Cache object: 4229b75ce92b20ebc587b9c388454a6e

[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.