The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/raidframe/rf_parityloggingdags.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: rf_parityloggingdags.c,v 1.13 2004/01/10 00:56:28 oster Exp $  */
    2 /*
    3  * Copyright (c) 1995 Carnegie-Mellon University.
    4  * All rights reserved.
    5  *
    6  * Author: William V. Courtright II
    7  *
    8  * Permission to use, copy, modify and distribute this software and
    9  * its documentation is hereby granted, provided that both the copyright
   10  * notice and this permission notice appear in all copies of the
   11  * software, derivative works or modified versions, and any portions
   12  * thereof, and that both notices appear in supporting documentation.
   13  *
   14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   17  *
   18  * Carnegie Mellon requests users of this software to return to
   19  *
   20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   21  *  School of Computer Science
   22  *  Carnegie Mellon University
   23  *  Pittsburgh PA 15213-3890
   24  *
   25  * any improvements or extensions that they make and grant Carnegie the
   26  * rights to redistribute these changes.
   27  */
   28 
   29 /*
   30   DAGs specific to parity logging are created here
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __KERNEL_RCSID(0, "$NetBSD: rf_parityloggingdags.c,v 1.13 2004/01/10 00:56:28 oster Exp $");
   35 
   36 #include "rf_archs.h"
   37 #include "opt_raid_diagnostic.h"
   38 
   39 #if RF_INCLUDE_PARITYLOGGING > 0
   40 
   41 #include <dev/raidframe/raidframevar.h>
   42 
   43 #include "rf_raid.h"
   44 #include "rf_dag.h"
   45 #include "rf_dagutils.h"
   46 #include "rf_dagfuncs.h"
   47 #include "rf_debugMem.h"
   48 #include "rf_paritylog.h"
   49 #include "rf_general.h"
   50 
   51 #include "rf_parityloggingdags.h"
   52 
   53 /******************************************************************************
   54  *
   55  * creates a DAG to perform a large-write operation:
   56  *
   57  *         / Rod \     / Wnd \
   58  * H -- NIL- Rod - NIL - Wnd ------ NIL - T
   59  *         \ Rod /     \ Xor - Lpo /
   60  *
   61  * The writes are not done until the reads complete because if they were done in
   62  * parallel, a failure on one of the reads could leave the parity in an inconsistent
   63  * state, so that the retry with a new DAG would produce erroneous parity.
   64  *
   65  * Note:  this DAG has the nasty property that none of the buffers allocated for reading
   66  *        old data can be freed until the XOR node fires.  Need to fix this.
   67  *
   68  * The last two arguments are the number of faults tolerated, and function for the
   69  * redundancy calculation. The undo for the redundancy calc is assumed to be null
   70  *
   71  *****************************************************************************/
   72 
   73 void 
   74 rf_CommonCreateParityLoggingLargeWriteDAG(
   75     RF_Raid_t * raidPtr,
   76     RF_AccessStripeMap_t * asmap,
   77     RF_DagHeader_t * dag_h,
   78     void *bp,
   79     RF_RaidAccessFlags_t flags,
   80     RF_AllocListElem_t * allocList,
   81     int nfaults,
   82     int (*redFunc) (RF_DagNode_t *))
   83 {
   84         RF_DagNode_t *nodes, *wndNodes, *rodNodes = NULL, *syncNode, *xorNode,
   85                *lpoNode, *blockNode, *unblockNode, *termNode;
   86         int     nWndNodes, nRodNodes, i;
   87         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
   88         RF_AccessStripeMapHeader_t *new_asm_h[2];
   89         int     nodeNum, asmNum;
   90         RF_ReconUnitNum_t which_ru;
   91         char   *sosBuffer, *eosBuffer;
   92         RF_PhysDiskAddr_t *pda;
   93         RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru);
   94 
   95         if (rf_dagDebug)
   96                 printf("[Creating parity-logging large-write DAG]\n");
   97         RF_ASSERT(nfaults == 1);/* this arch only single fault tolerant */
   98         dag_h->creator = "ParityLoggingLargeWriteDAG";
   99 
  100         /* alloc the Wnd nodes, the xor node, and the Lpo node */
  101         nWndNodes = asmap->numStripeUnitsAccessed;
  102         RF_MallocAndAdd(nodes, (nWndNodes + 6) * sizeof(RF_DagNode_t), 
  103                         (RF_DagNode_t *), allocList);
  104         i = 0;
  105         wndNodes = &nodes[i];
  106         i += nWndNodes;
  107         xorNode = &nodes[i];
  108         i += 1;
  109         lpoNode = &nodes[i];
  110         i += 1;
  111         blockNode = &nodes[i];
  112         i += 1;
  113         syncNode = &nodes[i];
  114         i += 1;
  115         unblockNode = &nodes[i];
  116         i += 1;
  117         termNode = &nodes[i];
  118         i += 1;
  119 
  120         dag_h->numCommitNodes = nWndNodes + 1;
  121         dag_h->numCommits = 0;
  122         dag_h->numSuccedents = 1;
  123 
  124         rf_MapUnaccessedPortionOfStripe(raidPtr, layoutPtr, asmap, dag_h, new_asm_h, &nRodNodes, &sosBuffer, &eosBuffer, allocList);
  125         if (nRodNodes > 0)
  126                 RF_MallocAndAdd(rodNodes, nRodNodes * sizeof(RF_DagNode_t), 
  127                                 (RF_DagNode_t *), allocList);
  128 
  129         /* begin node initialization */
  130         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nRodNodes + 1, 0, 0, 0, dag_h, "Nil", allocList);
  131         rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nWndNodes + 1, 0, 0, dag_h, "Nil", allocList);
  132         rf_InitNode(syncNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nWndNodes + 1, nRodNodes + 1, 0, 0, dag_h, "Nil", allocList);
  133         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  134 
  135         /* initialize the Rod nodes */
  136         for (nodeNum = asmNum = 0; asmNum < 2; asmNum++) {
  137                 if (new_asm_h[asmNum]) {
  138                         pda = new_asm_h[asmNum]->stripeMap->physInfo;
  139                         while (pda) {
  140                                 rf_InitNode(&rodNodes[nodeNum], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Rod", allocList);
  141                                 rodNodes[nodeNum].params[0].p = pda;
  142                                 rodNodes[nodeNum].params[1].p = pda->bufPtr;
  143                                 rodNodes[nodeNum].params[2].v = parityStripeID;
  144                                 rodNodes[nodeNum].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  145                                 nodeNum++;
  146                                 pda = pda->next;
  147                         }
  148                 }
  149         }
  150         RF_ASSERT(nodeNum == nRodNodes);
  151 
  152         /* initialize the wnd nodes */
  153         pda = asmap->physInfo;
  154         for (i = 0; i < nWndNodes; i++) {
  155                 rf_InitNode(&wndNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 4, 0, dag_h, "Wnd", allocList);
  156                 RF_ASSERT(pda != NULL);
  157                 wndNodes[i].params[0].p = pda;
  158                 wndNodes[i].params[1].p = pda->bufPtr;
  159                 wndNodes[i].params[2].v = parityStripeID;
  160                 wndNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  161                 pda = pda->next;
  162         }
  163 
  164         /* initialize the redundancy node */
  165         rf_InitNode(xorNode, rf_wait, RF_TRUE, redFunc, rf_NullNodeUndoFunc, NULL, 1, 1, 2 * (nWndNodes + nRodNodes) + 1, 1, dag_h, "Xr ", allocList);
  166         xorNode->flags |= RF_DAGNODE_FLAG_YIELD;
  167         for (i = 0; i < nWndNodes; i++) {
  168                 xorNode->params[2 * i + 0] = wndNodes[i].params[0];     /* pda */
  169                 xorNode->params[2 * i + 1] = wndNodes[i].params[1];     /* buf ptr */
  170         }
  171         for (i = 0; i < nRodNodes; i++) {
  172                 xorNode->params[2 * (nWndNodes + i) + 0] = rodNodes[i].params[0];       /* pda */
  173                 xorNode->params[2 * (nWndNodes + i) + 1] = rodNodes[i].params[1];       /* buf ptr */
  174         }
  175         xorNode->params[2 * (nWndNodes + nRodNodes)].p = raidPtr;       /* xor node needs to get
  176                                                                          * at RAID information */
  177 
  178         /* look for an Rod node that reads a complete SU.  If none, alloc a
  179          * buffer to receive the parity info. Note that we can't use a new
  180          * data buffer because it will not have gotten written when the xor
  181          * occurs. */
  182         for (i = 0; i < nRodNodes; i++)
  183                 if (((RF_PhysDiskAddr_t *) rodNodes[i].params[0].p)->numSector == raidPtr->Layout.sectorsPerStripeUnit)
  184                         break;
  185         if (i == nRodNodes) {
  186                 RF_MallocAndAdd(xorNode->results[0], 
  187                                 rf_RaidAddressToByte(raidPtr, raidPtr->Layout.sectorsPerStripeUnit), (void *), allocList);
  188         } else {
  189                 xorNode->results[0] = rodNodes[i].params[1].p;
  190         }
  191 
  192         /* initialize the Lpo node */
  193         rf_InitNode(lpoNode, rf_wait, RF_FALSE, rf_ParityLogOverwriteFunc, rf_ParityLogOverwriteUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpo", allocList);
  194 
  195         lpoNode->params[0].p = asmap->parityInfo;
  196         lpoNode->params[1].p = xorNode->results[0];
  197         RF_ASSERT(asmap->parityInfo->next == NULL);     /* parityInfo must
  198                                                          * describe entire
  199                                                          * parity unit */
  200 
  201         /* connect nodes to form graph */
  202 
  203         /* connect dag header to block node */
  204         RF_ASSERT(dag_h->numSuccedents == 1);
  205         RF_ASSERT(blockNode->numAntecedents == 0);
  206         dag_h->succedents[0] = blockNode;
  207 
  208         /* connect the block node to the Rod nodes */
  209         RF_ASSERT(blockNode->numSuccedents == nRodNodes + 1);
  210         for (i = 0; i < nRodNodes; i++) {
  211                 RF_ASSERT(rodNodes[i].numAntecedents == 1);
  212                 blockNode->succedents[i] = &rodNodes[i];
  213                 rodNodes[i].antecedents[0] = blockNode;
  214                 rodNodes[i].antType[0] = rf_control;
  215         }
  216 
  217         /* connect the block node to the sync node */
  218         /* necessary if nRodNodes == 0 */
  219         RF_ASSERT(syncNode->numAntecedents == nRodNodes + 1);
  220         blockNode->succedents[nRodNodes] = syncNode;
  221         syncNode->antecedents[0] = blockNode;
  222         syncNode->antType[0] = rf_control;
  223 
  224         /* connect the Rod nodes to the syncNode */
  225         for (i = 0; i < nRodNodes; i++) {
  226                 rodNodes[i].succedents[0] = syncNode;
  227                 syncNode->antecedents[1 + i] = &rodNodes[i];
  228                 syncNode->antType[1 + i] = rf_control;
  229         }
  230 
  231         /* connect the sync node to the xor node */
  232         RF_ASSERT(syncNode->numSuccedents == nWndNodes + 1);
  233         RF_ASSERT(xorNode->numAntecedents == 1);
  234         syncNode->succedents[0] = xorNode;
  235         xorNode->antecedents[0] = syncNode;
  236         xorNode->antType[0] = rf_trueData;      /* carry forward from sync */
  237 
  238         /* connect the sync node to the Wnd nodes */
  239         for (i = 0; i < nWndNodes; i++) {
  240                 RF_ASSERT(wndNodes->numAntecedents == 1);
  241                 syncNode->succedents[1 + i] = &wndNodes[i];
  242                 wndNodes[i].antecedents[0] = syncNode;
  243                 wndNodes[i].antType[0] = rf_control;
  244         }
  245 
  246         /* connect the xor node to the Lpo node */
  247         RF_ASSERT(xorNode->numSuccedents == 1);
  248         RF_ASSERT(lpoNode->numAntecedents == 1);
  249         xorNode->succedents[0] = lpoNode;
  250         lpoNode->antecedents[0] = xorNode;
  251         lpoNode->antType[0] = rf_trueData;
  252 
  253         /* connect the Wnd nodes to the unblock node */
  254         RF_ASSERT(unblockNode->numAntecedents == nWndNodes + 1);
  255         for (i = 0; i < nWndNodes; i++) {
  256                 RF_ASSERT(wndNodes->numSuccedents == 1);
  257                 wndNodes[i].succedents[0] = unblockNode;
  258                 unblockNode->antecedents[i] = &wndNodes[i];
  259                 unblockNode->antType[i] = rf_control;
  260         }
  261 
  262         /* connect the Lpo node to the unblock node */
  263         RF_ASSERT(lpoNode->numSuccedents == 1);
  264         lpoNode->succedents[0] = unblockNode;
  265         unblockNode->antecedents[nWndNodes] = lpoNode;
  266         unblockNode->antType[nWndNodes] = rf_control;
  267 
  268         /* connect unblock node to terminator */
  269         RF_ASSERT(unblockNode->numSuccedents == 1);
  270         RF_ASSERT(termNode->numAntecedents == 1);
  271         RF_ASSERT(termNode->numSuccedents == 0);
  272         unblockNode->succedents[0] = termNode;
  273         termNode->antecedents[0] = unblockNode;
  274         termNode->antType[0] = rf_control;
  275 }
  276 
  277 
  278 
  279 
  280 /******************************************************************************
  281  *
  282  * creates a DAG to perform a small-write operation (either raid 5 or pq), which is as follows:
  283  *
  284  *                                     Header
  285  *                                       |
  286  *                                     Block
  287  *                                 / |  ... \   \
  288  *                                /  |       \   \
  289  *                             Rod  Rod      Rod  Rop
  290  *                             | \ /| \    / |  \/ |
  291  *                             |    |        |  /\ |
  292  *                             Wnd  Wnd      Wnd   X
  293  *                              |    \       /     |
  294  *                              |     \     /      |
  295  *                               \     \   /      Lpo
  296  *                                \     \ /       /
  297  *                                 +-> Unblock <-+
  298  *                                       |
  299  *                                       T
  300  *
  301  *
  302  * R = Read, W = Write, X = Xor, o = old, n = new, d = data, p = parity.
  303  * When the access spans a stripe unit boundary and is less than one SU in size, there will
  304  * be two Rop -- X -- Wnp branches.  I call this the "double-XOR" case.
  305  * The second output from each Rod node goes to the X node.  In the double-XOR
  306  * case, there are exactly 2 Rod nodes, and each sends one output to one X node.
  307  * There is one Rod -- Wnd -- T branch for each stripe unit being updated.
  308  *
  309  * The block and unblock nodes are unused.  See comment above CreateFaultFreeReadDAG.
  310  *
  311  * Note:  this DAG ignores all the optimizations related to making the RMWs atomic.
  312  *        it also has the nasty property that none of the buffers allocated for reading
  313  *        old data & parity can be freed until the XOR node fires.  Need to fix this.
  314  *
  315  * A null qfuncs indicates single fault tolerant
  316  *****************************************************************************/
  317 
  318 void 
  319 rf_CommonCreateParityLoggingSmallWriteDAG(
  320     RF_Raid_t * raidPtr,
  321     RF_AccessStripeMap_t * asmap,
  322     RF_DagHeader_t * dag_h,
  323     void *bp,
  324     RF_RaidAccessFlags_t flags,
  325     RF_AllocListElem_t * allocList,
  326     RF_RedFuncs_t * pfuncs,
  327     RF_RedFuncs_t * qfuncs)
  328 {
  329         RF_DagNode_t *xorNodes, *blockNode, *unblockNode, *nodes;
  330         RF_DagNode_t *readDataNodes, *readParityNodes;
  331         RF_DagNode_t *writeDataNodes, *lpuNodes;
  332         RF_DagNode_t *unlockDataNodes = NULL, *termNode;
  333         RF_PhysDiskAddr_t *pda = asmap->physInfo;
  334         int     numDataNodes = asmap->numStripeUnitsAccessed;
  335         int     numParityNodes = (asmap->parityInfo->next) ? 2 : 1;
  336         int     i, j, nNodes, totalNumNodes;
  337         RF_ReconUnitNum_t which_ru;
  338         int     (*func) (RF_DagNode_t * node), (*undoFunc) (RF_DagNode_t * node);
  339         int     (*qfunc) (RF_DagNode_t * node);
  340         char   *name, *qname;
  341         RF_StripeNum_t parityStripeID = rf_RaidAddressToParityStripeID(&(raidPtr->Layout), asmap->raidAddress, &which_ru);
  342 #ifdef RAID_DIAGNOSTIC
  343         long    nfaults = qfuncs ? 2 : 1;
  344 #endif /* RAID_DIAGNOSTIC */
  345 
  346         if (rf_dagDebug)
  347                 printf("[Creating parity-logging small-write DAG]\n");
  348         RF_ASSERT(numDataNodes > 0);
  349         RF_ASSERT(nfaults == 1);
  350         dag_h->creator = "ParityLoggingSmallWriteDAG";
  351 
  352         /* DAG creation occurs in three steps: 1. count the number of nodes in
  353          * the DAG 2. create the nodes 3. initialize the nodes 4. connect the
  354          * nodes */
  355 
  356         /* Step 1. compute number of nodes in the graph */
  357 
  358         /* number of nodes: a read and write for each data unit a redundancy
  359          * computation node for each parity node a read and Lpu for each
  360          * parity unit a block and unblock node (2) a terminator node if
  361          * atomic RMW an unlock node for each data unit, redundancy unit */
  362         totalNumNodes = (2 * numDataNodes) + numParityNodes + (2 * numParityNodes) + 3;
  363 
  364         nNodes = numDataNodes + numParityNodes;
  365 
  366         dag_h->numCommitNodes = numDataNodes + numParityNodes;
  367         dag_h->numCommits = 0;
  368         dag_h->numSuccedents = 1;
  369 
  370         /* Step 2. create the nodes */
  371         RF_MallocAndAdd(nodes, totalNumNodes * sizeof(RF_DagNode_t), 
  372                         (RF_DagNode_t *), allocList);
  373         i = 0;
  374         blockNode = &nodes[i];
  375         i += 1;
  376         unblockNode = &nodes[i];
  377         i += 1;
  378         readDataNodes = &nodes[i];
  379         i += numDataNodes;
  380         readParityNodes = &nodes[i];
  381         i += numParityNodes;
  382         writeDataNodes = &nodes[i];
  383         i += numDataNodes;
  384         lpuNodes = &nodes[i];
  385         i += numParityNodes;
  386         xorNodes = &nodes[i];
  387         i += numParityNodes;
  388         termNode = &nodes[i];
  389         i += 1;
  390 
  391         RF_ASSERT(i == totalNumNodes);
  392 
  393         /* Step 3. initialize the nodes */
  394         /* initialize block node (Nil) */
  395         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, nNodes, 0, 0, 0, dag_h, "Nil", allocList);
  396 
  397         /* initialize unblock node (Nil) */
  398         rf_InitNode(unblockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc, NULL, 1, nNodes, 0, 0, dag_h, "Nil", allocList);
  399 
  400         /* initialize terminatory node (Trm) */
  401         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  402 
  403         /* initialize nodes which read old data (Rod) */
  404         for (i = 0; i < numDataNodes; i++) {
  405                 rf_InitNode(&readDataNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rod", allocList);
  406                 RF_ASSERT(pda != NULL);
  407                 readDataNodes[i].params[0].p = pda;     /* physical disk addr
  408                                                          * desc */
  409                 readDataNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList);  /* buffer to hold old
  410                                                                                                  * data */
  411                 readDataNodes[i].params[2].v = parityStripeID;
  412                 readDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  413                 pda = pda->next;
  414                 readDataNodes[i].propList[0] = NULL;
  415                 readDataNodes[i].propList[1] = NULL;
  416         }
  417 
  418         /* initialize nodes which read old parity (Rop) */
  419         pda = asmap->parityInfo;
  420         i = 0;
  421         for (i = 0; i < numParityNodes; i++) {
  422                 RF_ASSERT(pda != NULL);
  423                 rf_InitNode(&readParityNodes[i], rf_wait, RF_FALSE, rf_DiskReadFunc, rf_DiskReadUndoFunc, rf_GenericWakeupFunc, nNodes, 1, 4, 0, dag_h, "Rop", allocList);
  424                 readParityNodes[i].params[0].p = pda;
  425                 readParityNodes[i].params[1].p = rf_AllocBuffer(raidPtr, dag_h, pda, allocList);        /* buffer to hold old
  426                                                                                                          * parity */
  427                 readParityNodes[i].params[2].v = parityStripeID;
  428                 readParityNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  429                 readParityNodes[i].propList[0] = NULL;
  430                 pda = pda->next;
  431         }
  432 
  433         /* initialize nodes which write new data (Wnd) */
  434         pda = asmap->physInfo;
  435         for (i = 0; i < numDataNodes; i++) {
  436                 RF_ASSERT(pda != NULL);
  437                 rf_InitNode(&writeDataNodes[i], rf_wait, RF_TRUE, rf_DiskWriteFunc, rf_DiskWriteUndoFunc, rf_GenericWakeupFunc, 1, nNodes, 4, 0, dag_h, "Wnd", allocList);
  438                 writeDataNodes[i].params[0].p = pda;    /* physical disk addr
  439                                                          * desc */
  440                 writeDataNodes[i].params[1].p = pda->bufPtr;    /* buffer holding new
  441                                                                  * data to be written */
  442                 writeDataNodes[i].params[2].v = parityStripeID;
  443                 writeDataNodes[i].params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, which_ru);
  444 
  445                 pda = pda->next;
  446         }
  447 
  448 
  449         /* initialize nodes which compute new parity */
  450         /* we use the simple XOR func in the double-XOR case, and when we're
  451          * accessing only a portion of one stripe unit. the distinction
  452          * between the two is that the regular XOR func assumes that the
  453          * targbuf is a full SU in size, and examines the pda associated with
  454          * the buffer to decide where within the buffer to XOR the data,
  455          * whereas the simple XOR func just XORs the data into the start of
  456          * the buffer. */
  457         if ((numParityNodes == 2) || ((numDataNodes == 1) && (asmap->totalSectorsAccessed < raidPtr->Layout.sectorsPerStripeUnit))) {
  458                 func = pfuncs->simple;
  459                 undoFunc = rf_NullNodeUndoFunc;
  460                 name = pfuncs->SimpleName;
  461                 if (qfuncs) {
  462                         qfunc = qfuncs->simple;
  463                         qname = qfuncs->SimpleName;
  464                 }
  465         } else {
  466                 func = pfuncs->regular;
  467                 undoFunc = rf_NullNodeUndoFunc;
  468                 name = pfuncs->RegularName;
  469                 if (qfuncs) {
  470                         qfunc = qfuncs->regular;
  471                         qname = qfuncs->RegularName;
  472                 }
  473         }
  474         /* initialize the xor nodes: params are {pda,buf} from {Rod,Wnd,Rop}
  475          * nodes, and raidPtr  */
  476         if (numParityNodes == 2) {      /* double-xor case */
  477                 for (i = 0; i < numParityNodes; i++) {
  478                         rf_InitNode(&xorNodes[i], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, 7, 1, dag_h, name, allocList);     /* no wakeup func for
  479                                                                                                                                          * xor */
  480                         xorNodes[i].flags |= RF_DAGNODE_FLAG_YIELD;
  481                         xorNodes[i].params[0] = readDataNodes[i].params[0];
  482                         xorNodes[i].params[1] = readDataNodes[i].params[1];
  483                         xorNodes[i].params[2] = readParityNodes[i].params[0];
  484                         xorNodes[i].params[3] = readParityNodes[i].params[1];
  485                         xorNodes[i].params[4] = writeDataNodes[i].params[0];
  486                         xorNodes[i].params[5] = writeDataNodes[i].params[1];
  487                         xorNodes[i].params[6].p = raidPtr;
  488                         xorNodes[i].results[0] = readParityNodes[i].params[1].p;        /* use old parity buf as
  489                                                                                          * target buf */
  490                 }
  491         } else {
  492                 /* there is only one xor node in this case */
  493                 rf_InitNode(&xorNodes[0], rf_wait, RF_TRUE, func, undoFunc, NULL, 1, nNodes, (2 * (numDataNodes + numDataNodes + 1) + 1), 1, dag_h, name, allocList);
  494                 xorNodes[0].flags |= RF_DAGNODE_FLAG_YIELD;
  495                 for (i = 0; i < numDataNodes + 1; i++) {
  496                         /* set up params related to Rod and Rop nodes */
  497                         xorNodes[0].params[2 * i + 0] = readDataNodes[i].params[0];     /* pda */
  498                         xorNodes[0].params[2 * i + 1] = readDataNodes[i].params[1];     /* buffer pointer */
  499                 }
  500                 for (i = 0; i < numDataNodes; i++) {
  501                         /* set up params related to Wnd and Wnp nodes */
  502                         xorNodes[0].params[2 * (numDataNodes + 1 + i) + 0] = writeDataNodes[i].params[0];       /* pda */
  503                         xorNodes[0].params[2 * (numDataNodes + 1 + i) + 1] = writeDataNodes[i].params[1];       /* buffer pointer */
  504                 }
  505                 xorNodes[0].params[2 * (numDataNodes + numDataNodes + 1)].p = raidPtr;  /* xor node needs to get
  506                                                                                          * at RAID information */
  507                 xorNodes[0].results[0] = readParityNodes[0].params[1].p;
  508         }
  509 
  510         /* initialize the log node(s) */
  511         pda = asmap->parityInfo;
  512         for (i = 0; i < numParityNodes; i++) {
  513                 RF_ASSERT(pda);
  514                 rf_InitNode(&lpuNodes[i], rf_wait, RF_FALSE, rf_ParityLogUpdateFunc, rf_ParityLogUpdateUndoFunc, rf_GenericWakeupFunc, 1, 1, 2, 0, dag_h, "Lpu", allocList);
  515                 lpuNodes[i].params[0].p = pda;  /* PhysDiskAddr of parity */
  516                 lpuNodes[i].params[1].p = xorNodes[i].results[0];       /* buffer pointer to
  517                                                                          * parity */
  518                 pda = pda->next;
  519         }
  520 
  521 
  522         /* Step 4. connect the nodes */
  523 
  524         /* connect header to block node */
  525         RF_ASSERT(dag_h->numSuccedents == 1);
  526         RF_ASSERT(blockNode->numAntecedents == 0);
  527         dag_h->succedents[0] = blockNode;
  528 
  529         /* connect block node to read old data nodes */
  530         RF_ASSERT(blockNode->numSuccedents == (numDataNodes + numParityNodes));
  531         for (i = 0; i < numDataNodes; i++) {
  532                 blockNode->succedents[i] = &readDataNodes[i];
  533                 RF_ASSERT(readDataNodes[i].numAntecedents == 1);
  534                 readDataNodes[i].antecedents[0] = blockNode;
  535                 readDataNodes[i].antType[0] = rf_control;
  536         }
  537 
  538         /* connect block node to read old parity nodes */
  539         for (i = 0; i < numParityNodes; i++) {
  540                 blockNode->succedents[numDataNodes + i] = &readParityNodes[i];
  541                 RF_ASSERT(readParityNodes[i].numAntecedents == 1);
  542                 readParityNodes[i].antecedents[0] = blockNode;
  543                 readParityNodes[i].antType[0] = rf_control;
  544         }
  545 
  546         /* connect read old data nodes to write new data nodes */
  547         for (i = 0; i < numDataNodes; i++) {
  548                 RF_ASSERT(readDataNodes[i].numSuccedents == numDataNodes + numParityNodes);
  549                 for (j = 0; j < numDataNodes; j++) {
  550                         RF_ASSERT(writeDataNodes[j].numAntecedents == numDataNodes + numParityNodes);
  551                         readDataNodes[i].succedents[j] = &writeDataNodes[j];
  552                         writeDataNodes[j].antecedents[i] = &readDataNodes[i];
  553                         if (i == j)
  554                                 writeDataNodes[j].antType[i] = rf_antiData;
  555                         else
  556                                 writeDataNodes[j].antType[i] = rf_control;
  557                 }
  558         }
  559 
  560         /* connect read old data nodes to xor nodes */
  561         for (i = 0; i < numDataNodes; i++)
  562                 for (j = 0; j < numParityNodes; j++) {
  563                         RF_ASSERT(xorNodes[j].numAntecedents == numDataNodes + numParityNodes);
  564                         readDataNodes[i].succedents[numDataNodes + j] = &xorNodes[j];
  565                         xorNodes[j].antecedents[i] = &readDataNodes[i];
  566                         xorNodes[j].antType[i] = rf_trueData;
  567                 }
  568 
  569         /* connect read old parity nodes to write new data nodes */
  570         for (i = 0; i < numParityNodes; i++) {
  571                 RF_ASSERT(readParityNodes[i].numSuccedents == numDataNodes + numParityNodes);
  572                 for (j = 0; j < numDataNodes; j++) {
  573                         readParityNodes[i].succedents[j] = &writeDataNodes[j];
  574                         writeDataNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i];
  575                         writeDataNodes[j].antType[numDataNodes + i] = rf_control;
  576                 }
  577         }
  578 
  579         /* connect read old parity nodes to xor nodes */
  580         for (i = 0; i < numParityNodes; i++)
  581                 for (j = 0; j < numParityNodes; j++) {
  582                         readParityNodes[i].succedents[numDataNodes + j] = &xorNodes[j];
  583                         xorNodes[j].antecedents[numDataNodes + i] = &readParityNodes[i];
  584                         xorNodes[j].antType[numDataNodes + i] = rf_trueData;
  585                 }
  586 
  587         /* connect xor nodes to write new parity nodes */
  588         for (i = 0; i < numParityNodes; i++) {
  589                 RF_ASSERT(xorNodes[i].numSuccedents == 1);
  590                 RF_ASSERT(lpuNodes[i].numAntecedents == 1);
  591                 xorNodes[i].succedents[0] = &lpuNodes[i];
  592                 lpuNodes[i].antecedents[0] = &xorNodes[i];
  593                 lpuNodes[i].antType[0] = rf_trueData;
  594         }
  595 
  596         for (i = 0; i < numDataNodes; i++) {
  597                 /* connect write new data nodes to unblock node */
  598                 RF_ASSERT(writeDataNodes[i].numSuccedents == 1);
  599                 RF_ASSERT(unblockNode->numAntecedents == (numDataNodes + (nfaults * numParityNodes)));
  600                 writeDataNodes[i].succedents[0] = unblockNode;
  601                 unblockNode->antecedents[i] = &writeDataNodes[i];
  602                 unblockNode->antType[i] = rf_control;
  603         }
  604 
  605         /* connect write new parity nodes to unblock node */
  606         for (i = 0; i < numParityNodes; i++) {
  607                 RF_ASSERT(lpuNodes[i].numSuccedents == 1);
  608                 lpuNodes[i].succedents[0] = unblockNode;
  609                 unblockNode->antecedents[numDataNodes + i] = &lpuNodes[i];
  610                 unblockNode->antType[numDataNodes + i] = rf_control;
  611         }
  612 
  613         /* connect unblock node to terminator */
  614         RF_ASSERT(unblockNode->numSuccedents == 1);
  615         RF_ASSERT(termNode->numAntecedents == 1);
  616         RF_ASSERT(termNode->numSuccedents == 0);
  617         unblockNode->succedents[0] = termNode;
  618         termNode->antecedents[0] = unblockNode;
  619         termNode->antType[0] = rf_control;
  620 }
  621 
  622 
  623 void 
  624 rf_CreateParityLoggingSmallWriteDAG(
  625     RF_Raid_t * raidPtr,
  626     RF_AccessStripeMap_t * asmap,
  627     RF_DagHeader_t * dag_h,
  628     void *bp,
  629     RF_RaidAccessFlags_t flags,
  630     RF_AllocListElem_t * allocList,
  631     RF_RedFuncs_t * pfuncs,
  632     RF_RedFuncs_t * qfuncs)
  633 {
  634         dag_h->creator = "ParityLoggingSmallWriteDAG";
  635         rf_CommonCreateParityLoggingSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_xorFuncs, NULL);
  636 }
  637 
  638 
  639 void 
  640 rf_CreateParityLoggingLargeWriteDAG(
  641     RF_Raid_t * raidPtr,
  642     RF_AccessStripeMap_t * asmap,
  643     RF_DagHeader_t * dag_h,
  644     void *bp,
  645     RF_RaidAccessFlags_t flags,
  646     RF_AllocListElem_t * allocList,
  647     int nfaults,
  648     int (*redFunc) (RF_DagNode_t *))
  649 {
  650         dag_h->creator = "ParityLoggingSmallWriteDAG";
  651         rf_CommonCreateParityLoggingLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 1, rf_RegularXorFunc);
  652 }
  653 #endif                          /* RF_INCLUDE_PARITYLOGGING > 0 */

Cache object: 4a9937499d8c459327e8f42ceedf80bd


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.