The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/raidframe/rf_dagffrd.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: rf_dagffrd.c,v 1.13 2004/03/18 16:40:05 oster Exp $    */
    2 /*
    3  * Copyright (c) 1995 Carnegie-Mellon University.
    4  * All rights reserved.
    5  *
    6  * Author: Mark Holland, Daniel Stodolsky, William V. Courtright II
    7  *
    8  * Permission to use, copy, modify and distribute this software and
    9  * its documentation is hereby granted, provided that both the copyright
   10  * notice and this permission notice appear in all copies of the
   11  * software, derivative works or modified versions, and any portions
   12  * thereof, and that both notices appear in supporting documentation.
   13  *
   14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   17  *
   18  * Carnegie Mellon requests users of this software to return to
   19  *
   20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   21  *  School of Computer Science
   22  *  Carnegie Mellon University
   23  *  Pittsburgh PA 15213-3890
   24  *
   25  * any improvements or extensions that they make and grant Carnegie the
   26  * rights to redistribute these changes.
   27  */
   28 
   29 /*
   30  * rf_dagffrd.c
   31  *
   32  * code for creating fault-free read DAGs
   33  *
   34  */
   35 
   36 #include <sys/cdefs.h>
   37 __KERNEL_RCSID(0, "$NetBSD: rf_dagffrd.c,v 1.13 2004/03/18 16:40:05 oster Exp $");
   38 
   39 #include <dev/raidframe/raidframevar.h>
   40 
   41 #include "rf_raid.h"
   42 #include "rf_dag.h"
   43 #include "rf_dagutils.h"
   44 #include "rf_dagfuncs.h"
   45 #include "rf_debugMem.h"
   46 #include "rf_general.h"
   47 #include "rf_dagffrd.h"
   48 
   49 /******************************************************************************
   50  *
   51  * General comments on DAG creation:
   52  *
   53  * All DAGs in this file use roll-away error recovery.  Each DAG has a single
   54  * commit node, usually called "Cmt."  If an error occurs before the Cmt node
   55  * is reached, the execution engine will halt forward execution and work
   56  * backward through the graph, executing the undo functions.  Assuming that
   57  * each node in the graph prior to the Cmt node are undoable and atomic - or -
   58  * does not make changes to permanent state, the graph will fail atomically.
   59  * If an error occurs after the Cmt node executes, the engine will roll-forward
   60  * through the graph, blindly executing nodes until it reaches the end.
   61  * If a graph reaches the end, it is assumed to have completed successfully.
   62  *
   63  * A graph has only 1 Cmt node.
   64  *
   65  */
   66 
   67 
   68 /******************************************************************************
   69  *
   70  * The following wrappers map the standard DAG creation interface to the
   71  * DAG creation routines.  Additionally, these wrappers enable experimentation
   72  * with new DAG structures by providing an extra level of indirection, allowing
   73  * the DAG creation routines to be replaced at this single point.
   74  */
   75 
   76 void 
   77 rf_CreateFaultFreeReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
   78                           RF_DagHeader_t *dag_h, void *bp,
   79                           RF_RaidAccessFlags_t flags,
   80                           RF_AllocListElem_t *allocList)
   81 {
   82         rf_CreateNonredundantDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
   83             RF_IO_TYPE_READ);
   84 }
   85 
   86 
   87 /******************************************************************************
   88  *
   89  * DAG creation code begins here
   90  */
   91 
   92 /******************************************************************************
   93  *
   94  * creates a DAG to perform a nonredundant read or write of data within one
   95  * stripe.
   96  * For reads, this DAG is as follows:
   97  *
   98  *                   /---- read ----\
   99  *    Header -- Block ---- read ---- Commit -- Terminate
  100  *                   \---- read ----/
  101  *
  102  * For writes, this DAG is as follows:
  103  *
  104  *                    /---- write ----\
  105  *    Header -- Commit ---- write ---- Block -- Terminate
  106  *                    \---- write ----/
  107  *
  108  * There is one disk node per stripe unit accessed, and all disk nodes are in
  109  * parallel.
  110  *
  111  * Tricky point here:  The first disk node (read or write) is created
  112  * normally.  Subsequent disk nodes are created by copying the first one,
  113  * and modifying a few params.  The "succedents" and "antecedents" fields are
  114  * _not_ re-created in each node, but rather left pointing to the same array
  115  * that was malloc'd when the first node was created.  Thus, it's essential
  116  * that when this DAG is freed, the succedents and antecedents fields be freed
  117  * in ONLY ONE of the read nodes.  This does not apply to the "params" field
  118  * because it is recreated for each READ node.
  119  *
  120  * Note that normal-priority accesses do not need to be tagged with their
  121  * parity stripe ID, because they will never be promoted.  Hence, I've
  122  * commented-out the code to do this, and marked it with UNNEEDED.
  123  *
  124  *****************************************************************************/
  125 
  126 void 
  127 rf_CreateNonredundantDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
  128                          RF_DagHeader_t *dag_h, void *bp,
  129                          RF_RaidAccessFlags_t flags,
  130                          RF_AllocListElem_t *allocList,
  131                          RF_IoType_t type)
  132 {
  133         RF_DagNode_t *diskNodes, *blockNode, *commitNode, *termNode;
  134         RF_DagNode_t *tmpNode, *tmpdiskNode;
  135         RF_PhysDiskAddr_t *pda = asmap->physInfo;
  136         int     (*doFunc) (RF_DagNode_t *), (*undoFunc) (RF_DagNode_t *);
  137         int     i, n, totalNumNodes;
  138         char   *name;
  139 
  140         n = asmap->numStripeUnitsAccessed;
  141         dag_h->creator = "NonredundantDAG";
  142 
  143         RF_ASSERT(RF_IO_IS_R_OR_W(type));
  144         switch (type) {
  145         case RF_IO_TYPE_READ:
  146                 doFunc = rf_DiskReadFunc;
  147                 undoFunc = rf_DiskReadUndoFunc;
  148                 name = "R  ";
  149 #if RF_DEBUG_DAG
  150                 if (rf_dagDebug)
  151                         printf("[Creating non-redundant read DAG]\n");
  152 #endif
  153                 break;
  154         case RF_IO_TYPE_WRITE:
  155                 doFunc = rf_DiskWriteFunc;
  156                 undoFunc = rf_DiskWriteUndoFunc;
  157                 name = "W  ";
  158 #if RF_DEBUG_DAG
  159                 if (rf_dagDebug)
  160                         printf("[Creating non-redundant write DAG]\n");
  161 #endif
  162                 break;
  163         default:
  164                 RF_PANIC();
  165         }
  166 
  167         /*
  168          * For reads, the dag can not commit until the block node is reached.
  169          * for writes, the dag commits immediately.
  170          */
  171         dag_h->numCommitNodes = 1;
  172         dag_h->numCommits = 0;
  173         dag_h->numSuccedents = 1;
  174 
  175         /*
  176          * Node count:
  177          * 1 block node
  178          * n data reads (or writes)
  179          * 1 commit node
  180          * 1 terminator node
  181          */
  182         RF_ASSERT(n > 0);
  183         totalNumNodes = n + 3;
  184 
  185         for (i = 0; i < n; i++) {
  186                 tmpNode = rf_AllocDAGNode();
  187                 tmpNode->list_next = dag_h->nodes;
  188                 dag_h->nodes = tmpNode;
  189         }
  190         diskNodes = dag_h->nodes;
  191 
  192         blockNode = rf_AllocDAGNode();
  193         blockNode->list_next = dag_h->nodes;
  194         dag_h->nodes = blockNode;
  195 
  196         commitNode = rf_AllocDAGNode();
  197         commitNode->list_next = dag_h->nodes;
  198         dag_h->nodes = commitNode;
  199 
  200         termNode = rf_AllocDAGNode();
  201         termNode->list_next = dag_h->nodes;
  202         dag_h->nodes = termNode;
  203 
  204         /* initialize nodes */
  205         switch (type) {
  206         case RF_IO_TYPE_READ:
  207                 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
  208                     NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
  209                 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
  210                     NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
  211                 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
  212                     NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  213                 break;
  214         case RF_IO_TYPE_WRITE:
  215                 rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
  216                     NULL, 1, 0, 0, 0, dag_h, "Nil", allocList);
  217                 rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc, rf_NullNodeUndoFunc,
  218                     NULL, n, 1, 0, 0, dag_h, "Cmt", allocList);
  219                 rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc, rf_TerminateUndoFunc,
  220                     NULL, 0, n, 0, 0, dag_h, "Trm", allocList);
  221                 break;
  222         default:
  223                 RF_PANIC();
  224         }
  225 
  226         tmpdiskNode = diskNodes;
  227         for (i = 0; i < n; i++) {
  228                 RF_ASSERT(pda != NULL);
  229                 rf_InitNode(tmpdiskNode, rf_wait, RF_FALSE, doFunc, undoFunc, rf_GenericWakeupFunc,
  230                     1, 1, 4, 0, dag_h, name, allocList);
  231                 tmpdiskNode->params[0].p = pda;
  232                 tmpdiskNode->params[1].p = pda->bufPtr;
  233                 /* parity stripe id is not necessary */
  234                 tmpdiskNode->params[2].v = 0;
  235                 tmpdiskNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0);
  236                 pda = pda->next;
  237                 tmpdiskNode = tmpdiskNode->list_next;
  238         }
  239 
  240         /*
  241          * Connect nodes.
  242          */
  243 
  244         /* connect hdr to block node */
  245         RF_ASSERT(blockNode->numAntecedents == 0);
  246         dag_h->succedents[0] = blockNode;
  247 
  248         if (type == RF_IO_TYPE_READ) {
  249                 /* connecting a nonredundant read DAG */
  250                 RF_ASSERT(blockNode->numSuccedents == n);
  251                 RF_ASSERT(commitNode->numAntecedents == n);
  252                 tmpdiskNode = diskNodes;
  253                 for (i = 0; i < n; i++) {
  254                         /* connect block node to each read node */
  255                         RF_ASSERT(tmpdiskNode->numAntecedents == 1);
  256                         blockNode->succedents[i] = tmpdiskNode;
  257                         tmpdiskNode->antecedents[0] = blockNode;
  258                         tmpdiskNode->antType[0] = rf_control;
  259 
  260                         /* connect each read node to the commit node */
  261                         RF_ASSERT(tmpdiskNode->numSuccedents == 1);
  262                         tmpdiskNode->succedents[0] = commitNode;
  263                         commitNode->antecedents[i] = tmpdiskNode;
  264                         commitNode->antType[i] = rf_control;
  265                         tmpdiskNode = tmpdiskNode->list_next;
  266                 }
  267                 /* connect the commit node to the term node */
  268                 RF_ASSERT(commitNode->numSuccedents == 1);
  269                 RF_ASSERT(termNode->numAntecedents == 1);
  270                 RF_ASSERT(termNode->numSuccedents == 0);
  271                 commitNode->succedents[0] = termNode;
  272                 termNode->antecedents[0] = commitNode;
  273                 termNode->antType[0] = rf_control;
  274         } else {
  275                 /* connecting a nonredundant write DAG */
  276                 /* connect the block node to the commit node */
  277                 RF_ASSERT(blockNode->numSuccedents == 1);
  278                 RF_ASSERT(commitNode->numAntecedents == 1);
  279                 blockNode->succedents[0] = commitNode;
  280                 commitNode->antecedents[0] = blockNode;
  281                 commitNode->antType[0] = rf_control;
  282 
  283                 RF_ASSERT(commitNode->numSuccedents == n);
  284                 RF_ASSERT(termNode->numAntecedents == n);
  285                 RF_ASSERT(termNode->numSuccedents == 0);
  286                 tmpdiskNode = diskNodes;
  287                 for (i = 0; i < n; i++) {
  288                         /* connect the commit node to each write node */
  289                         RF_ASSERT(tmpdiskNode->numAntecedents == 1);
  290                         commitNode->succedents[i] = tmpdiskNode;
  291                         tmpdiskNode->antecedents[0] = commitNode;
  292                         tmpdiskNode->antType[0] = rf_control;
  293 
  294                         /* connect each write node to the term node */
  295                         RF_ASSERT(tmpdiskNode->numSuccedents == 1);
  296                         tmpdiskNode->succedents[0] = termNode;
  297                         termNode->antecedents[i] = tmpdiskNode;
  298                         termNode->antType[i] = rf_control;
  299                         tmpdiskNode = tmpdiskNode->list_next;
  300                 }
  301         }
  302 }
  303 /******************************************************************************
  304  * Create a fault-free read DAG for RAID level 1
  305  *
  306  * Hdr -> Nil -> Rmir -> Cmt -> Trm
  307  *
  308  * The "Rmir" node schedules a read from the disk in the mirror pair with the
  309  * shortest disk queue.  the proper queue is selected at Rmir execution.  this
  310  * deferred mapping is unlike other archs in RAIDframe which generally fix
  311  * mapping at DAG creation time.
  312  *
  313  * Parameters:  raidPtr   - description of the physical array
  314  *              asmap     - logical & physical addresses for this access
  315  *              bp        - buffer ptr (for holding read data)
  316  *              flags     - general flags (e.g. disk locking)
  317  *              allocList - list of memory allocated in DAG creation
  318  *****************************************************************************/
  319 
  320 static void 
  321 CreateMirrorReadDAG(RF_Raid_t *raidPtr, RF_AccessStripeMap_t *asmap,
  322                     RF_DagHeader_t *dag_h, void *bp,
  323                     RF_RaidAccessFlags_t flags, 
  324                     RF_AllocListElem_t *allocList,
  325                     int (*readfunc) (RF_DagNode_t * node))
  326 {
  327         RF_DagNode_t *readNodes, *blockNode, *commitNode, *termNode;
  328         RF_DagNode_t *tmpNode, *tmpreadNode;
  329         RF_PhysDiskAddr_t *data_pda = asmap->physInfo;
  330         RF_PhysDiskAddr_t *parity_pda = asmap->parityInfo;
  331         int     i, n, totalNumNodes;
  332 
  333         n = asmap->numStripeUnitsAccessed;
  334         dag_h->creator = "RaidOneReadDAG";
  335 #if RF_DEBUG_DAG
  336         if (rf_dagDebug) {
  337                 printf("[Creating RAID level 1 read DAG]\n");
  338         }
  339 #endif
  340         /*
  341          * This dag can not commit until the commit node is reached
  342          * errors prior to the commit point imply the dag has failed.
  343          */
  344         dag_h->numCommitNodes = 1;
  345         dag_h->numCommits = 0;
  346         dag_h->numSuccedents = 1;
  347 
  348         /*
  349          * Node count:
  350          * n data reads
  351          * 1 block node
  352          * 1 commit node
  353          * 1 terminator node
  354          */
  355         RF_ASSERT(n > 0);
  356         totalNumNodes = n + 3;
  357 
  358         for (i = 0; i < n; i++) {
  359                 tmpNode = rf_AllocDAGNode();
  360                 tmpNode->list_next = dag_h->nodes;
  361                 dag_h->nodes = tmpNode;
  362         }
  363         readNodes = dag_h->nodes;
  364 
  365         blockNode = rf_AllocDAGNode();
  366         blockNode->list_next = dag_h->nodes;
  367         dag_h->nodes = blockNode;
  368 
  369         commitNode = rf_AllocDAGNode();
  370         commitNode->list_next = dag_h->nodes;
  371         dag_h->nodes = commitNode;
  372 
  373         termNode = rf_AllocDAGNode();
  374         termNode->list_next = dag_h->nodes;
  375         dag_h->nodes = termNode;
  376 
  377         /* initialize nodes */
  378         rf_InitNode(blockNode, rf_wait, RF_FALSE, rf_NullNodeFunc,
  379             rf_NullNodeUndoFunc, NULL, n, 0, 0, 0, dag_h, "Nil", allocList);
  380         rf_InitNode(commitNode, rf_wait, RF_TRUE, rf_NullNodeFunc,
  381             rf_NullNodeUndoFunc, NULL, 1, n, 0, 0, dag_h, "Cmt", allocList);
  382         rf_InitNode(termNode, rf_wait, RF_FALSE, rf_TerminateFunc,
  383             rf_TerminateUndoFunc, NULL, 0, 1, 0, 0, dag_h, "Trm", allocList);
  384 
  385         tmpreadNode = readNodes;
  386         for (i = 0; i < n; i++) {
  387                 RF_ASSERT(data_pda != NULL);
  388                 RF_ASSERT(parity_pda != NULL);
  389                 rf_InitNode(tmpreadNode, rf_wait, RF_FALSE, readfunc,
  390                     rf_DiskReadMirrorUndoFunc, rf_GenericWakeupFunc, 1, 1, 5, 0, dag_h,
  391                     "Rmir", allocList);
  392                 tmpreadNode->params[0].p = data_pda;
  393                 tmpreadNode->params[1].p = data_pda->bufPtr;
  394                 /* parity stripe id is not necessary */
  395                 tmpreadNode->params[2].p = 0;
  396                 tmpreadNode->params[3].v = RF_CREATE_PARAM3(RF_IO_NORMAL_PRIORITY, 0);
  397                 tmpreadNode->params[4].p = parity_pda;
  398                 data_pda = data_pda->next;
  399                 parity_pda = parity_pda->next;
  400                 tmpreadNode = tmpreadNode->list_next;
  401         }
  402 
  403         /*
  404          * Connect nodes
  405          */
  406 
  407         /* connect hdr to block node */
  408         RF_ASSERT(blockNode->numAntecedents == 0);
  409         dag_h->succedents[0] = blockNode;
  410 
  411         /* connect block node to read nodes */
  412         RF_ASSERT(blockNode->numSuccedents == n);
  413         tmpreadNode = readNodes;
  414         for (i = 0; i < n; i++) {
  415                 RF_ASSERT(tmpreadNode->numAntecedents == 1);
  416                 blockNode->succedents[i] = tmpreadNode;
  417                 tmpreadNode->antecedents[0] = blockNode;
  418                 tmpreadNode->antType[0] = rf_control;
  419                 tmpreadNode = tmpreadNode->list_next;
  420         }
  421 
  422         /* connect read nodes to commit node */
  423         RF_ASSERT(commitNode->numAntecedents == n);
  424         tmpreadNode = readNodes;
  425         for (i = 0; i < n; i++) {
  426                 RF_ASSERT(tmpreadNode->numSuccedents == 1);
  427                 tmpreadNode->succedents[0] = commitNode;
  428                 commitNode->antecedents[i] = tmpreadNode;
  429                 commitNode->antType[i] = rf_control;
  430                 tmpreadNode = tmpreadNode->list_next;
  431         }
  432 
  433         /* connect commit node to term node */
  434         RF_ASSERT(commitNode->numSuccedents == 1);
  435         RF_ASSERT(termNode->numAntecedents == 1);
  436         RF_ASSERT(termNode->numSuccedents == 0);
  437         commitNode->succedents[0] = termNode;
  438         termNode->antecedents[0] = commitNode;
  439         termNode->antType[0] = rf_control;
  440 }
  441 
  442 void 
  443 rf_CreateMirrorIdleReadDAG(
  444     RF_Raid_t * raidPtr,
  445     RF_AccessStripeMap_t * asmap,
  446     RF_DagHeader_t * dag_h,
  447     void *bp,
  448     RF_RaidAccessFlags_t flags,
  449     RF_AllocListElem_t * allocList)
  450 {
  451         CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
  452             rf_DiskReadMirrorIdleFunc);
  453 }
  454 
  455 #if (RF_INCLUDE_CHAINDECLUSTER > 0) || (RF_INCLUDE_INTERDECLUSTER > 0)
  456 
  457 void 
  458 rf_CreateMirrorPartitionReadDAG(RF_Raid_t *raidPtr,
  459                                 RF_AccessStripeMap_t *asmap,
  460                                 RF_DagHeader_t *dag_h, void *bp,
  461                                 RF_RaidAccessFlags_t flags,
  462                                 RF_AllocListElem_t *allocList)
  463 {
  464         CreateMirrorReadDAG(raidPtr, asmap, dag_h, bp, flags, allocList,
  465             rf_DiskReadMirrorPartitionFunc);
  466 }
  467 #endif

Cache object: 0de00ede565179b02ec61df95eaa1e5b


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.