The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/raidframe/rf_evenodd_dagfuncs.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: rf_evenodd_dagfuncs.c,v 1.13 2003/12/29 02:38:17 oster Exp $   */
    2 /*
    3  * Copyright (c) 1995 Carnegie-Mellon University.
    4  * All rights reserved.
    5  *
    6  * Author: ChangMing Wu
    7  *
    8  * Permission to use, copy, modify and distribute this software and
    9  * its documentation is hereby granted, provided that both the copyright
   10  * notice and this permission notice appear in all copies of the
   11  * software, derivative works or modified versions, and any portions
   12  * thereof, and that both notices appear in supporting documentation.
   13  *
   14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   17  *
   18  * Carnegie Mellon requests users of this software to return to
   19  *
   20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   21  *  School of Computer Science
   22  *  Carnegie Mellon University
   23  *  Pittsburgh PA 15213-3890
   24  *
   25  * any improvements or extensions that they make and grant Carnegie the
   26  * rights to redistribute these changes.
   27  */
   28 
   29 /*
   30  * Code for RAID-EVENODD  architecture.
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __KERNEL_RCSID(0, "$NetBSD: rf_evenodd_dagfuncs.c,v 1.13 2003/12/29 02:38:17 oster Exp $");
   35 
   36 #include "rf_archs.h"
   37 #include "opt_raid_diagnostic.h"
   38 
   39 #if RF_INCLUDE_EVENODD > 0
   40 
   41 #include <dev/raidframe/raidframevar.h>
   42 
   43 #include "rf_raid.h"
   44 #include "rf_dag.h"
   45 #include "rf_dagffrd.h"
   46 #include "rf_dagffwr.h"
   47 #include "rf_dagdegrd.h"
   48 #include "rf_dagdegwr.h"
   49 #include "rf_dagutils.h"
   50 #include "rf_dagfuncs.h"
   51 #include "rf_etimer.h"
   52 #include "rf_general.h"
   53 #include "rf_parityscan.h"
   54 #include "rf_evenodd.h"
   55 #include "rf_evenodd_dagfuncs.h"
   56 
   57 /* These redundant functions are for small write */
   58 RF_RedFuncs_t rf_EOSmallWritePFuncs = {rf_RegularXorFunc, "Regular Old-New P", rf_SimpleXorFunc, "Simple Old-New P"};
   59 RF_RedFuncs_t rf_EOSmallWriteEFuncs = {rf_RegularONEFunc, "Regular Old-New E", rf_SimpleONEFunc, "Regular Old-New E"};
   60 /* These redundant functions are for degraded read */
   61 RF_RedFuncs_t rf_eoPRecoveryFuncs = {rf_RecoveryXorFunc, "Recovery Xr", rf_RecoveryXorFunc, "Recovery Xr"};
   62 RF_RedFuncs_t rf_eoERecoveryFuncs = {rf_RecoveryEFunc, "Recovery E Func", rf_RecoveryEFunc, "Recovery E Func"};
   63 /**********************************************************************************************
   64  *   the following encoding node functions is used in  EO_000_CreateLargeWriteDAG
   65  **********************************************************************************************/
   66 int 
   67 rf_RegularPEFunc(node)
   68         RF_DagNode_t *node;
   69 {
   70         rf_RegularESubroutine(node, node->results[1]);
   71         rf_RegularXorFunc(node);/* does the wakeup here! */
   72 #if 1
   73         return (0);             /* XXX This was missing... GO */
   74 #endif
   75 }
   76 
   77 
   78 /************************************************************************************************
   79  *  For EO_001_CreateSmallWriteDAG, there are (i)RegularONEFunc() and (ii)SimpleONEFunc() to
   80  *  be used. The previous case is when write access at least sectors of full stripe unit.
   81  *  The later function is used when the write access two stripe units but with total sectors
   82  *  less than sectors per SU. In this case, the access of parity and 'E' are shown as disconnected
   83  *  areas in their stripe unit and  parity write and 'E' write are both devided into two distinct
   84  *  writes( totally four). This simple old-new write and regular old-new write happen as in RAID-5
   85  ************************************************************************************************/
   86 
   87 /* Algorithm:
   88      1. Store the difference of old data and new data in the Rod buffer.
   89      2. then encode this buffer into the buffer which already have old 'E' information inside it,
   90         the result can be shown to be the new 'E' information.
   91      3. xor the Wnd buffer into the difference buffer to recover the  original old data.
   92    Here we have another alternative: to allocate a temporary buffer for storing the difference of
   93    old data and new data, then encode temp buf into old 'E' buf to form new 'E', but this approach
   94    take the same speed as the previous, and need more memory.
   95 */
   96 int 
   97 rf_RegularONEFunc(node)
   98         RF_DagNode_t *node;
   99 {
  100         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  101         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  102         int     EpdaIndex = (node->numParams - 1) / 2 - 1;      /* the parameter of node
  103                                                                  * where you can find
  104                                                                  * e-pda */
  105         int     i, k, retcode = 0;
  106         int     suoffset, length;
  107         RF_RowCol_t scol;
  108         char   *srcbuf, *destbuf;
  109         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  110         RF_Etimer_t timer;
  111         RF_PhysDiskAddr_t *pda;
  112 #ifdef RAID_DIAGNOSTIC
  113         RF_PhysDiskAddr_t *EPDA =
  114             (RF_PhysDiskAddr_t *) node->params[EpdaIndex].p;
  115         int     ESUOffset = rf_StripeUnitOffset(layoutPtr, EPDA->startSector);
  116 #endif /* RAID_DIAGNOSTIC */
  117 
  118         RF_ASSERT(EPDA->type == RF_PDA_TYPE_Q);
  119         RF_ASSERT(ESUOffset == 0);
  120 
  121         RF_ETIMER_START(timer);
  122 
  123         /* Xor the Wnd buffer into Rod buffer, the difference of old data and
  124          * new data is stored in Rod buffer */
  125         for (k = 0; k < EpdaIndex; k += 2) {
  126                 length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
  127                 retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
  128         }
  129         /* Start to encoding the buffer storing the difference of old data and
  130          * new data into 'E' buffer  */
  131         for (i = 0; i < EpdaIndex; i += 2)
  132                 if (node->params[i + 1].p != node->results[0]) {        /* results[0] is buf ptr
  133                                                                          * of E */
  134                         pda = (RF_PhysDiskAddr_t *) node->params[i].p;
  135                         srcbuf = (char *) node->params[i + 1].p;
  136                         scol = rf_EUCol(layoutPtr, pda->raidAddress);
  137                         suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  138                         destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset);
  139                         rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
  140                 }
  141         /* Recover the original old data to be used by parity encoding
  142          * function in XorNode */
  143         for (k = 0; k < EpdaIndex; k += 2) {
  144                 length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[k].p)->numSector);
  145                 retcode = rf_bxor(node->params[k + EpdaIndex + 3].p, node->params[k + 1].p, length, node->dagHdr->bp);
  146         }
  147         RF_ETIMER_STOP(timer);
  148         RF_ETIMER_EVAL(timer);
  149         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  150         rf_GenericWakeupFunc(node, 0);
  151 #if 1
  152         return (0);             /* XXX this was missing.. GO */
  153 #endif
  154 }
  155 
  156 int 
  157 rf_SimpleONEFunc(node)
  158         RF_DagNode_t *node;
  159 {
  160         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  161         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  162         RF_PhysDiskAddr_t *pda = (RF_PhysDiskAddr_t *) node->params[0].p;
  163         int     retcode = 0;
  164         char   *srcbuf, *destbuf;
  165         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  166         int     length;
  167         RF_RowCol_t scol;
  168         RF_Etimer_t timer;
  169 
  170         RF_ASSERT(((RF_PhysDiskAddr_t *) node->params[2].p)->type == RF_PDA_TYPE_Q);
  171         if (node->dagHdr->status == rf_enable) {
  172                 RF_ETIMER_START(timer);
  173                 length = rf_RaidAddressToByte(raidPtr, ((RF_PhysDiskAddr_t *) node->params[4].p)->numSector);   /* this is a pda of
  174                                                                                                                  * writeDataNodes */
  175                 /* bxor to buffer of readDataNodes */
  176                 retcode = rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
  177                 /* find out the corresponding colume in encoding matrix for
  178                  * write colume to be encoded into redundant disk 'E' */
  179                 scol = rf_EUCol(layoutPtr, pda->raidAddress);
  180                 srcbuf = node->params[1].p;
  181                 destbuf = node->params[3].p;
  182                 /* Start encoding process */
  183                 rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
  184                 rf_bxor(node->params[5].p, node->params[1].p, length, node->dagHdr->bp);
  185                 RF_ETIMER_STOP(timer);
  186                 RF_ETIMER_EVAL(timer);
  187                 tracerec->q_us += RF_ETIMER_VAL_US(timer);
  188 
  189         }
  190         return (rf_GenericWakeupFunc(node, retcode));   /* call wake func
  191                                                          * explicitly since no
  192                                                          * I/O in this node */
  193 }
  194 
  195 
  196 /****** called by rf_RegularPEFunc(node) and rf_RegularEFunc(node) in f.f. large write  ********/
  197 void 
  198 rf_RegularESubroutine(node, ebuf)
  199         RF_DagNode_t *node;
  200         char   *ebuf;
  201 {
  202         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  203         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  204         RF_PhysDiskAddr_t *pda;
  205         int     i, suoffset;
  206         RF_RowCol_t scol;
  207         char   *srcbuf, *destbuf;
  208         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  209         RF_Etimer_t timer;
  210 
  211         RF_ETIMER_START(timer);
  212         for (i = 0; i < node->numParams - 2; i += 2) {
  213                 RF_ASSERT(node->params[i + 1].p != ebuf);
  214                 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
  215                 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  216                 scol = rf_EUCol(layoutPtr, pda->raidAddress);
  217                 srcbuf = (char *) node->params[i + 1].p;
  218                 destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset);
  219                 rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
  220         }
  221         RF_ETIMER_STOP(timer);
  222         RF_ETIMER_EVAL(timer);
  223         tracerec->xor_us += RF_ETIMER_VAL_US(timer);
  224 }
  225 
  226 
  227 /*******************************************************************************************
  228  *                       Used in  EO_001_CreateLargeWriteDAG
  229  ******************************************************************************************/
  230 int 
  231 rf_RegularEFunc(node)
  232         RF_DagNode_t *node;
  233 {
  234         rf_RegularESubroutine(node, node->results[0]);
  235         rf_GenericWakeupFunc(node, 0);
  236 #if 1
  237         return (0);             /* XXX this was missing?.. GO */
  238 #endif
  239 }
  240 /*******************************************************************************************
  241  * This degraded function allow only two case:
  242  *  1. when write access the full failed stripe unit, then the access can be more than
  243  *     one tripe units.
  244  *  2. when write access only part of the failed SU, we assume accesses of more than
  245  *     one stripe unit is not allowed so that the write can be dealt with like a
  246  *     large write.
  247  *  The following function is based on these assumptions. So except in the second case,
  248  *  it looks the same as a large write encodeing function. But this is not exactly the
  249  *  normal way for doing a degraded write, since raidframe have to break cases of access
  250  *  other than the above two into smaller accesses. We may have to change
  251  *  DegrESubroutin in the future.
  252  *******************************************************************************************/
  253 void 
  254 rf_DegrESubroutine(node, ebuf)
  255         RF_DagNode_t *node;
  256         char   *ebuf;
  257 {
  258         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  259         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  260         RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
  261         RF_PhysDiskAddr_t *pda;
  262         int     i, suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
  263         RF_RowCol_t scol;
  264         char   *srcbuf, *destbuf;
  265         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  266         RF_Etimer_t timer;
  267 
  268         RF_ETIMER_START(timer);
  269         for (i = 0; i < node->numParams - 2; i += 2) {
  270                 RF_ASSERT(node->params[i + 1].p != ebuf);
  271                 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
  272                 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  273                 scol = rf_EUCol(layoutPtr, pda->raidAddress);
  274                 srcbuf = (char *) node->params[i + 1].p;
  275                 destbuf = ebuf + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
  276                 rf_e_encToBuf(raidPtr, scol, srcbuf, RF_EO_MATRIX_DIM - 2, destbuf, pda->numSector);
  277         }
  278 
  279         RF_ETIMER_STOP(timer);
  280         RF_ETIMER_EVAL(timer);
  281         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  282 }
  283 
  284 
  285 /**************************************************************************************
  286  * This function is used in case where one data disk failed and both redundant disks
  287  * alive. It is used in the EO_100_CreateWriteDAG. Note: if there is another disk
  288  * failed in the stripe but not accessed at this time, then we should, instead, use
  289  * the rf_EOWriteDoubleRecoveryFunc().
  290  **************************************************************************************/
  291 int 
  292 rf_Degraded_100_EOFunc(node)
  293         RF_DagNode_t *node;
  294 {
  295         rf_DegrESubroutine(node, node->results[1]);
  296         rf_RecoveryXorFunc(node);       /* does the wakeup here! */
  297 #if 1
  298         return (0);             /* XXX this was missing... SHould these be
  299                                  * void functions??? GO */
  300 #endif
  301 }
  302 /**************************************************************************************
  303  * This function is to encode one sector in one of the data disks to the E disk.
  304  * However, in evenodd this function can also be used as decoding function to recover
  305  * data from dead disk in the case of parity failure and a single data failure.
  306  **************************************************************************************/
  307 void 
  308 rf_e_EncOneSect(
  309     RF_RowCol_t srcLogicCol,
  310     char *srcSecbuf,
  311     RF_RowCol_t destLogicCol,
  312     char *destSecbuf,
  313     int bytesPerSector)
  314 {
  315         int     S_index;        /* index of the EU in the src col which need
  316                                  * be Xored into all EUs in a dest sector */
  317         int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
  318         RF_RowCol_t j, indexInDest,     /* row index of an encoding unit in
  319                                          * the destination colume of encoding
  320                                          * matrix */
  321                 indexInSrc;     /* row index of an encoding unit in the source
  322                                  * colume used for recovery */
  323         int     bytesPerEU = bytesPerSector / numRowInEncMatix;
  324 
  325 #if RF_EO_MATRIX_DIM > 17
  326         int     shortsPerEU = bytesPerEU / sizeof(short);
  327         short  *destShortBuf, *srcShortBuf1, *srcShortBuf2;
  328         short temp1;
  329 #elif RF_EO_MATRIX_DIM == 17
  330         int     longsPerEU = bytesPerEU / sizeof(long);
  331         long   *destLongBuf, *srcLongBuf1, *srcLongBuf2;
  332         long temp1;
  333 #endif
  334 
  335 #if RF_EO_MATRIX_DIM > 17
  336         RF_ASSERT(sizeof(short) == 2 || sizeof(short) == 1);
  337         RF_ASSERT(bytesPerEU % sizeof(short) == 0);
  338 #elif RF_EO_MATRIX_DIM == 17
  339         RF_ASSERT(sizeof(long) == 8 || sizeof(long) == 4);
  340         RF_ASSERT(bytesPerEU % sizeof(long) == 0);
  341 #endif
  342 
  343         S_index = rf_EO_Mod((RF_EO_MATRIX_DIM - 1 + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
  344 #if RF_EO_MATRIX_DIM > 17
  345         srcShortBuf1 = (short *) (srcSecbuf + S_index * bytesPerEU);
  346 #elif RF_EO_MATRIX_DIM == 17
  347         srcLongBuf1 = (long *) (srcSecbuf + S_index * bytesPerEU);
  348 #endif
  349 
  350         for (indexInDest = 0; indexInDest < numRowInEncMatix; indexInDest++) {
  351                 indexInSrc = rf_EO_Mod((indexInDest + destLogicCol - srcLogicCol), RF_EO_MATRIX_DIM);
  352 
  353 #if RF_EO_MATRIX_DIM > 17
  354                 destShortBuf = (short *) (destSecbuf + indexInDest * bytesPerEU);
  355                 srcShortBuf2 = (short *) (srcSecbuf + indexInSrc * bytesPerEU);
  356                 for (j = 0; j < shortsPerEU; j++) {
  357                         temp1 = destShortBuf[j] ^ srcShortBuf1[j];
  358                         /* note: S_index won't be at the end row for any src
  359                          * col! */
  360                         if (indexInSrc != RF_EO_MATRIX_DIM - 1)
  361                                 destShortBuf[j] = (srcShortBuf2[j]) ^ temp1;
  362                         /* if indexInSrc is at the end row, ie.
  363                          * RF_EO_MATRIX_DIM -1, then all elements are zero! */
  364                         else
  365                                 destShortBuf[j] = temp1;
  366                 }
  367 
  368 #elif RF_EO_MATRIX_DIM == 17
  369                 destLongBuf = (long *) (destSecbuf + indexInDest * bytesPerEU);
  370                 srcLongBuf2 = (long *) (srcSecbuf + indexInSrc * bytesPerEU);
  371                 for (j = 0; j < longsPerEU; j++) {
  372                         temp1 = destLongBuf[j] ^ srcLongBuf1[j];
  373                         if (indexInSrc != RF_EO_MATRIX_DIM - 1)
  374                                 destLongBuf[j] = (srcLongBuf2[j]) ^ temp1;
  375                         else
  376                                 destLongBuf[j] = temp1;
  377                 }
  378 #endif
  379         }
  380 }
  381 
  382 void 
  383 rf_e_encToBuf(
  384     RF_Raid_t * raidPtr,
  385     RF_RowCol_t srcLogicCol,
  386     char *srcbuf,
  387     RF_RowCol_t destLogicCol,
  388     char *destbuf,
  389     int numSector)
  390 {
  391         int     i, bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
  392 
  393         for (i = 0; i < numSector; i++) {
  394                 rf_e_EncOneSect(srcLogicCol, srcbuf, destLogicCol, destbuf, bytesPerSector);
  395                 srcbuf += bytesPerSector;
  396                 destbuf += bytesPerSector;
  397         }
  398 }
  399 /**************************************************************************************
  400  * when parity die and one data die, We use second redundant information, 'E',
  401  * to recover the data in dead disk. This function is used in the recovery node of
  402  * for EO_110_CreateReadDAG
  403  **************************************************************************************/
  404 int 
  405 rf_RecoveryEFunc(node)
  406         RF_DagNode_t *node;
  407 {
  408         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  409         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  410         RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
  411         RF_RowCol_t scol,       /* source logical column */
  412                 fcol = rf_EUCol(layoutPtr, failedPDA->raidAddress);     /* logical column of
  413                                                                          * failed SU */
  414         int     i;
  415         RF_PhysDiskAddr_t *pda;
  416         int     suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
  417         char   *srcbuf, *destbuf;
  418         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  419         RF_Etimer_t timer;
  420 
  421         memset((char *) node->results[0], 0,
  422             rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
  423         if (node->dagHdr->status == rf_enable) {
  424                 RF_ETIMER_START(timer);
  425                 for (i = 0; i < node->numParams - 2; i += 2)
  426                         if (node->params[i + 1].p != node->results[0]) {
  427                                 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
  428                                 if (i == node->numParams - 4)
  429                                         scol = RF_EO_MATRIX_DIM - 2;    /* the colume of
  430                                                                          * redundant E */
  431                                 else
  432                                         scol = rf_EUCol(layoutPtr, pda->raidAddress);
  433                                 srcbuf = (char *) node->params[i + 1].p;
  434                                 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  435                                 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
  436                                 rf_e_encToBuf(raidPtr, scol, srcbuf, fcol, destbuf, pda->numSector);
  437                         }
  438                 RF_ETIMER_STOP(timer);
  439                 RF_ETIMER_EVAL(timer);
  440                 tracerec->xor_us += RF_ETIMER_VAL_US(timer);
  441         }
  442         return (rf_GenericWakeupFunc(node, 0)); /* node execute successfully */
  443 }
  444 /**************************************************************************************
  445  * This function is used in the case where one data and the parity have filed.
  446  * (in EO_110_CreateWriteDAG )
  447  **************************************************************************************/
  448 int 
  449 rf_EO_DegradedWriteEFunc(RF_DagNode_t * node)
  450 {
  451         rf_DegrESubroutine(node, node->results[0]);
  452         rf_GenericWakeupFunc(node, 0);
  453 #if 1
  454         return (0);             /* XXX Yet another one!! GO */
  455 #endif
  456 }
  457 
  458 
  459 
  460 /**************************************************************************************
  461  *              THE FUNCTION IS FOR DOUBLE DEGRADED READ AND WRITE CASES
  462  **************************************************************************************/
  463 
  464 void 
  465 rf_doubleEOdecode(
  466     RF_Raid_t * raidPtr,
  467     char **rrdbuf,
  468     char **dest,
  469     RF_RowCol_t * fcol,
  470     char *pbuf,
  471     char *ebuf)
  472 {
  473         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
  474         int     i, j, k, f1, f2, row;
  475         int     rrdrow, erow, count = 0;
  476         int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
  477         int     numRowInEncMatix = (RF_EO_MATRIX_DIM) - 1;
  478 #if 0
  479         int     pcol = (RF_EO_MATRIX_DIM) - 1;
  480 #endif
  481         int     ecol = (RF_EO_MATRIX_DIM) - 2;
  482         int     bytesPerEU = bytesPerSector / numRowInEncMatix;
  483         int     numDataCol = layoutPtr->numDataCol;
  484 #if RF_EO_MATRIX_DIM > 17
  485         int     shortsPerEU = bytesPerEU / sizeof(short);
  486         short  *rrdbuf_current, *pbuf_current, *ebuf_current;
  487         short  *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
  488         short *temp;
  489         short  *P;
  490 
  491         RF_ASSERT(bytesPerEU % sizeof(short) == 0);
  492         RF_Malloc(P, bytesPerEU, (short *));
  493         RF_Malloc(temp, bytesPerEU, (short *));
  494 #elif RF_EO_MATRIX_DIM == 17
  495         int     longsPerEU = bytesPerEU / sizeof(long);
  496         long   *rrdbuf_current, *pbuf_current, *ebuf_current;
  497         long   *dest_smaller, *dest_smaller_current, *dest_larger, *dest_larger_current;
  498         long *temp;
  499         long   *P;
  500 
  501         RF_ASSERT(bytesPerEU % sizeof(long) == 0);
  502         RF_Malloc(P, bytesPerEU, (long *));
  503         RF_Malloc(temp, bytesPerEU, (long *));
  504 #endif
  505         RF_ASSERT(*((long *) dest[0]) == 0);
  506         RF_ASSERT(*((long *) dest[1]) == 0);
  507         memset((char *) P, 0, bytesPerEU);
  508         memset((char *) temp, 0, bytesPerEU);
  509         RF_ASSERT(*P == 0);
  510         /* calculate the 'P' parameter, which, not parity, is the Xor of all
  511          * elements in the last two column, ie. 'E' and 'parity' colume, see
  512          * the Ref. paper by Blaum, et al 1993  */
  513         for (i = 0; i < numRowInEncMatix; i++)
  514                 for (k = 0; k < longsPerEU; k++) {
  515 #if RF_EO_MATRIX_DIM > 17
  516                         ebuf_current = ((short *) ebuf) + i * shortsPerEU + k;
  517                         pbuf_current = ((short *) pbuf) + i * shortsPerEU + k;
  518 #elif RF_EO_MATRIX_DIM == 17
  519                         ebuf_current = ((long *) ebuf) + i * longsPerEU + k;
  520                         pbuf_current = ((long *) pbuf) + i * longsPerEU + k;
  521 #endif
  522                         P[k] ^= *ebuf_current;
  523                         P[k] ^= *pbuf_current;
  524                 }
  525         RF_ASSERT(fcol[0] != fcol[1]);
  526         if (fcol[0] < fcol[1]) {
  527 #if RF_EO_MATRIX_DIM > 17
  528                 dest_smaller = (short *) (dest[0]);
  529                 dest_larger = (short *) (dest[1]);
  530 #elif RF_EO_MATRIX_DIM == 17
  531                 dest_smaller = (long *) (dest[0]);
  532                 dest_larger = (long *) (dest[1]);
  533 #endif
  534                 f1 = fcol[0];
  535                 f2 = fcol[1];
  536         } else {
  537 #if RF_EO_MATRIX_DIM > 17
  538                 dest_smaller = (short *) (dest[1]);
  539                 dest_larger = (short *) (dest[0]);
  540 #elif RF_EO_MATRIX_DIM == 17
  541                 dest_smaller = (long *) (dest[1]);
  542                 dest_larger = (long *) (dest[0]);
  543 #endif
  544                 f1 = fcol[1];
  545                 f2 = fcol[0];
  546         }
  547         row = (RF_EO_MATRIX_DIM) - 1;
  548         while ((row = rf_EO_Mod((row + f1 - f2), RF_EO_MATRIX_DIM)) != ((RF_EO_MATRIX_DIM) - 1)) {
  549 #if RF_EO_MATRIX_DIM > 17
  550                 dest_larger_current = dest_larger + row * shortsPerEU;
  551                 dest_smaller_current = dest_smaller + row * shortsPerEU;
  552 #elif RF_EO_MATRIX_DIM == 17
  553                 dest_larger_current = dest_larger + row * longsPerEU;
  554                 dest_smaller_current = dest_smaller + row * longsPerEU;
  555 #endif
  556                 /**    Do the diagonal recovery. Initially, temp[k] = (failed 1),
  557                        which is the failed data in the colume which has smaller col index. **/
  558                 /* step 1:  ^(SUM of nonfailed in-diagonal A(rrdrow,0..m-3))         */
  559                 for (j = 0; j < numDataCol; j++) {
  560                         if (j == f1 || j == f2)
  561                                 continue;
  562                         rrdrow = rf_EO_Mod((row + f2 - j), RF_EO_MATRIX_DIM);
  563                         if (rrdrow != (RF_EO_MATRIX_DIM) - 1) {
  564 #if RF_EO_MATRIX_DIM > 17
  565                                 rrdbuf_current = (short *) (rrdbuf[j]) + rrdrow * shortsPerEU;
  566                                 for (k = 0; k < shortsPerEU; k++)
  567                                         temp[k] ^= *(rrdbuf_current + k);
  568 #elif RF_EO_MATRIX_DIM == 17
  569                                 rrdbuf_current = (long *) (rrdbuf[j]) + rrdrow * longsPerEU;
  570                                 for (k = 0; k < longsPerEU; k++)
  571                                         temp[k] ^= *(rrdbuf_current + k);
  572 #endif
  573                         }
  574                 }
  575                 /* step 2:  ^E(erow,m-2), If erow is at the buttom row, don't
  576                  * Xor into it  E(erow,m-2) = (principle diagonal) ^ (failed
  577                  * 1) ^ (failed 2) ^ ( SUM of nonfailed in-diagonal
  578                  * A(rrdrow,0..m-3) ) After this step, temp[k] = (principle
  579                  * diagonal) ^ (failed 2)       */
  580 
  581                 erow = rf_EO_Mod((row + f2 - ecol), (RF_EO_MATRIX_DIM));
  582                 if (erow != (RF_EO_MATRIX_DIM) - 1) {
  583 #if RF_EO_MATRIX_DIM > 17
  584                         ebuf_current = (short *) ebuf + shortsPerEU * erow;
  585                         for (k = 0; k < shortsPerEU; k++)
  586                                 temp[k] ^= *(ebuf_current + k);
  587 #elif RF_EO_MATRIX_DIM == 17
  588                         ebuf_current = (long *) ebuf + longsPerEU * erow;
  589                         for (k = 0; k < longsPerEU; k++)
  590                                 temp[k] ^= *(ebuf_current + k);
  591 #endif
  592                 }
  593                 /* step 3: ^P to obtain the failed data (failed 2).  P can be
  594                  * proved to be actually  (principle diagonal)  After this
  595                  * step, temp[k] = (failed 2), the failed data to be recovered */
  596 #if RF_EO_MATRIX_DIM > 17
  597                 for (k = 0; k < shortsPerEU; k++)
  598                         temp[k] ^= P[k];
  599                 /* Put the data to the destination buffer                              */
  600                 for (k = 0; k < shortsPerEU; k++)
  601                         dest_larger_current[k] = temp[k];
  602 #elif RF_EO_MATRIX_DIM == 17
  603                 for (k = 0; k < longsPerEU; k++)
  604                         temp[k] ^= P[k];
  605                 /* Put the data to the destination buffer                              */
  606                 for (k = 0; k < longsPerEU; k++)
  607                         dest_larger_current[k] = temp[k];
  608 #endif
  609 
  610                 /**          THE FOLLOWING DO THE HORIZONTAL XOR                **/
  611                 /* step 1:  ^(SUM of A(row,0..m-3)), ie. all nonfailed data
  612                  * columes    */
  613                 for (j = 0; j < numDataCol; j++) {
  614                         if (j == f1 || j == f2)
  615                                 continue;
  616 #if RF_EO_MATRIX_DIM > 17
  617                         rrdbuf_current = (short *) (rrdbuf[j]) + row * shortsPerEU;
  618                         for (k = 0; k < shortsPerEU; k++)
  619                                 temp[k] ^= *(rrdbuf_current + k);
  620 #elif RF_EO_MATRIX_DIM == 17
  621                         rrdbuf_current = (long *) (rrdbuf[j]) + row * longsPerEU;
  622                         for (k = 0; k < longsPerEU; k++)
  623                                 temp[k] ^= *(rrdbuf_current + k);
  624 #endif
  625                 }
  626                 /* step 2: ^A(row,m-1) */
  627                 /* step 3: Put the data to the destination buffer                                */
  628 #if RF_EO_MATRIX_DIM > 17
  629                 pbuf_current = (short *) pbuf + shortsPerEU * row;
  630                 for (k = 0; k < shortsPerEU; k++)
  631                         temp[k] ^= *(pbuf_current + k);
  632                 for (k = 0; k < shortsPerEU; k++)
  633                         dest_smaller_current[k] = temp[k];
  634 #elif RF_EO_MATRIX_DIM == 17
  635                 pbuf_current = (long *) pbuf + longsPerEU * row;
  636                 for (k = 0; k < longsPerEU; k++)
  637                         temp[k] ^= *(pbuf_current + k);
  638                 for (k = 0; k < longsPerEU; k++)
  639                         dest_smaller_current[k] = temp[k];
  640 #endif
  641                 count++;
  642         }
  643         /* Check if all Encoding Unit in the data buffer have been decoded,
  644          * according EvenOdd theory, if "RF_EO_MATRIX_DIM" is a prime number,
  645          * this algorithm will covered all buffer                                */
  646         RF_ASSERT(count == numRowInEncMatix);
  647         RF_Free((char *) P, bytesPerEU);
  648         RF_Free((char *) temp, bytesPerEU);
  649 }
  650 
  651 
  652 /***************************************************************************************
  653 *       This function is called by double degragded read
  654 *       EO_200_CreateReadDAG
  655 *
  656 ***************************************************************************************/
  657 int 
  658 rf_EvenOddDoubleRecoveryFunc(node)
  659         RF_DagNode_t *node;
  660 {
  661         int     ndataParam = 0;
  662         int     np = node->numParams;
  663         RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
  664         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
  665         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
  666         int     i, prm, sector, nresults = node->numResults;
  667         RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
  668         unsigned sosAddr;
  669         int     two = 0, mallc_one = 0, mallc_two = 0;  /* flags to indicate if
  670                                                          * memory is allocated */
  671         int     bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
  672         RF_PhysDiskAddr_t *ppda, *ppda2, *epda, *epda2, *pda, *pda0, *pda1,
  673                 npda;
  674         RF_RowCol_t fcol[2], fsuoff[2], fsuend[2], numDataCol = layoutPtr->numDataCol;
  675         char  **buf, *ebuf, *pbuf, *dest[2];
  676         long   *suoff = NULL, *suend = NULL, *prmToCol = NULL, psuoff, esuoff;
  677         RF_SectorNum_t startSector, endSector;
  678         RF_Etimer_t timer;
  679         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  680 
  681         RF_ETIMER_START(timer);
  682 
  683         /* Find out the number of parameters which are pdas for data
  684          * information */
  685         for (i = 0; i <= np; i++)
  686                 if (((RF_PhysDiskAddr_t *) node->params[i].p)->type != RF_PDA_TYPE_DATA) {
  687                         ndataParam = i;
  688                         break;
  689                 }
  690         RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
  691         if (ndataParam != 0) {
  692                 RF_Malloc(suoff, ndataParam * sizeof(long), (long *));
  693                 RF_Malloc(suend, ndataParam * sizeof(long), (long *));
  694                 RF_Malloc(prmToCol, ndataParam * sizeof(long), (long *));
  695         }
  696         if (asmap->failedPDAs[1] &&
  697             (asmap->failedPDAs[1]->numSector + asmap->failedPDAs[0]->numSector < secPerSU)) {
  698                 RF_ASSERT(0);   /* currently, no support for this situation */
  699                 ppda = node->params[np - 6].p;
  700                 ppda2 = node->params[np - 5].p;
  701                 RF_ASSERT(ppda2->type == RF_PDA_TYPE_PARITY);
  702                 epda = node->params[np - 4].p;
  703                 epda2 = node->params[np - 3].p;
  704                 RF_ASSERT(epda2->type == RF_PDA_TYPE_Q);
  705                 two = 1;
  706         } else {
  707                 ppda = node->params[np - 4].p;
  708                 epda = node->params[np - 3].p;
  709                 psuoff = rf_StripeUnitOffset(layoutPtr, ppda->startSector);
  710                 esuoff = rf_StripeUnitOffset(layoutPtr, epda->startSector);
  711                 RF_ASSERT(psuoff == esuoff);
  712         }
  713         /*
  714             the followings have three goals:
  715             1. determine the startSector to begin decoding and endSector to end decoding.
  716             2. determine the colume numbers of the two failed disks.
  717             3. determine the offset and end offset of the access within each failed stripe unit.
  718          */
  719         if (nresults == 1) {
  720                 /* find the startSector to begin decoding */
  721                 pda = node->results[0];
  722                 memset(pda->bufPtr, 0, bytesPerSector * pda->numSector);
  723                 fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  724                 fsuend[0] = fsuoff[0] + pda->numSector;
  725                 startSector = fsuoff[0];
  726                 endSector = fsuend[0];
  727 
  728                 /* find out the column of failed disk being accessed */
  729                 fcol[0] = rf_EUCol(layoutPtr, pda->raidAddress);
  730 
  731                 /* find out the other failed colume not accessed */
  732                 sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
  733                 for (i = 0; i < numDataCol; i++) {
  734                         npda.raidAddress = sosAddr + (i * secPerSU);
  735                         (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
  736                         /* skip over dead disks */
  737                         if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
  738                                 if (i != fcol[0])
  739                                         break;
  740                 }
  741                 RF_ASSERT(i < numDataCol);
  742                 fcol[1] = i;
  743         } else {
  744                 RF_ASSERT(nresults == 2);
  745                 pda0 = node->results[0];
  746                 memset(pda0->bufPtr, 0, bytesPerSector * pda0->numSector);
  747                 pda1 = node->results[1];
  748                 memset(pda1->bufPtr, 0, bytesPerSector * pda1->numSector);
  749                 /* determine the failed colume numbers of the two failed
  750                  * disks. */
  751                 fcol[0] = rf_EUCol(layoutPtr, pda0->raidAddress);
  752                 fcol[1] = rf_EUCol(layoutPtr, pda1->raidAddress);
  753                 /* determine the offset and end offset of the access within
  754                  * each failed stripe unit. */
  755                 fsuoff[0] = rf_StripeUnitOffset(layoutPtr, pda0->startSector);
  756                 fsuend[0] = fsuoff[0] + pda0->numSector;
  757                 fsuoff[1] = rf_StripeUnitOffset(layoutPtr, pda1->startSector);
  758                 fsuend[1] = fsuoff[1] + pda1->numSector;
  759                 /* determine the startSector to begin decoding */
  760                 startSector = RF_MIN(pda0->startSector, pda1->startSector);
  761                 /* determine the endSector to end decoding */
  762                 endSector = RF_MAX(fsuend[0], fsuend[1]);
  763         }
  764         /*
  765               assign the beginning sector and the end sector for each parameter
  766               find out the corresponding colume # for each parameter
  767         */
  768         for (prm = 0; prm < ndataParam; prm++) {
  769                 pda = node->params[prm].p;
  770                 suoff[prm] = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  771                 suend[prm] = suoff[prm] + pda->numSector;
  772                 prmToCol[prm] = rf_EUCol(layoutPtr, pda->raidAddress);
  773         }
  774         /* 'sector' is the sector for the current decoding algorithm. For each
  775          * sector in the failed SU, find out the corresponding parameters that
  776          * cover the current sector and that are needed for decoding of this
  777          * sector in failed SU. 2.  Find out if sector is in the shadow of any
  778          * accessed failed SU. If not, malloc a temporary space of a sector in
  779          * size. */
  780         for (sector = startSector; sector < endSector; sector++) {
  781                 if (nresults == 2)
  782                         if (!(fsuoff[0] <= sector && sector < fsuend[0]) && !(fsuoff[1] <= sector && sector < fsuend[1]))
  783                                 continue;
  784                 for (prm = 0; prm < ndataParam; prm++)
  785                         if (suoff[prm] <= sector && sector < suend[prm])
  786                                 buf[(prmToCol[prm])] = ((RF_PhysDiskAddr_t *) node->params[prm].p)->bufPtr +
  787                                     rf_RaidAddressToByte(raidPtr, sector - suoff[prm]);
  788                 /* find out if sector is in the shadow of any accessed failed
  789                  * SU. If yes, assign dest[0], dest[1] to point at suitable
  790                  * position of the buffer corresponding to failed SUs. if no,
  791                  * malloc a temporary space of a sector in size for
  792                  * destination of decoding. */
  793                 RF_ASSERT(nresults == 1 || nresults == 2);
  794                 if (nresults == 1) {
  795                         dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
  796                         /* Always malloc temp buffer to dest[1]  */
  797                         RF_Malloc(dest[1], bytesPerSector, (char *));
  798                         memset(dest[1], 0, bytesPerSector);
  799                         mallc_two = 1;
  800                 } else {
  801                         if (fsuoff[0] <= sector && sector < fsuend[0])
  802                                 dest[0] = ((RF_PhysDiskAddr_t *) node->results[0])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[0]);
  803                         else {
  804                                 RF_Malloc(dest[0], bytesPerSector, (char *));
  805                                 memset(dest[0], 0, bytesPerSector);
  806                                 mallc_one = 1;
  807                         }
  808                         if (fsuoff[1] <= sector && sector < fsuend[1])
  809                                 dest[1] = ((RF_PhysDiskAddr_t *) node->results[1])->bufPtr + rf_RaidAddressToByte(raidPtr, sector - fsuoff[1]);
  810                         else {
  811                                 RF_Malloc(dest[1], bytesPerSector, (char *));
  812                                 memset(dest[1], 0, bytesPerSector);
  813                                 mallc_two = 1;
  814                         }
  815                         RF_ASSERT(mallc_one == 0 || mallc_two == 0);
  816                 }
  817                 pbuf = ppda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - psuoff);
  818                 ebuf = epda->bufPtr + rf_RaidAddressToByte(raidPtr, sector - esuoff);
  819                 /*
  820                  * After finish finding all needed sectors, call doubleEOdecode function for decoding
  821                  * one sector to destination.
  822                  */
  823                 rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
  824                 /* free all allocated memory, and mark flag to indicate no
  825                  * memory is being allocated */
  826                 if (mallc_one == 1)
  827                         RF_Free(dest[0], bytesPerSector);
  828                 if (mallc_two == 1)
  829                         RF_Free(dest[1], bytesPerSector);
  830                 mallc_one = mallc_two = 0;
  831         }
  832         RF_Free(buf, numDataCol * sizeof(char *));
  833         if (ndataParam != 0) {
  834                 RF_Free(suoff, ndataParam * sizeof(long));
  835                 RF_Free(suend, ndataParam * sizeof(long));
  836                 RF_Free(prmToCol, ndataParam * sizeof(long));
  837         }
  838         RF_ETIMER_STOP(timer);
  839         RF_ETIMER_EVAL(timer);
  840         if (tracerec) {
  841                 tracerec->q_us += RF_ETIMER_VAL_US(timer);
  842         }
  843         rf_GenericWakeupFunc(node, 0);
  844 #if 1
  845         return (0);             /* XXX is this even close!!?!?!!? GO */
  846 #endif
  847 }
  848 
  849 
  850 /* currently, only access of one of the two failed SU is allowed in this function.
  851  * also, asmap->numStripeUnitsAccessed is limited to be one, the RaidFrame will break large access into
  852  * many accesses of single stripe unit.
  853  */
  854 
  855 int 
  856 rf_EOWriteDoubleRecoveryFunc(node)
  857         RF_DagNode_t *node;
  858 {
  859         int     np = node->numParams;
  860         RF_AccessStripeMap_t *asmap = (RF_AccessStripeMap_t *) node->params[np - 1].p;
  861         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 2].p;
  862         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & (raidPtr->Layout);
  863         RF_SectorNum_t sector;
  864         RF_RowCol_t col, scol;
  865         int     prm, i, j;
  866         RF_SectorCount_t secPerSU = layoutPtr->sectorsPerStripeUnit;
  867         unsigned sosAddr;
  868         unsigned bytesPerSector = rf_RaidAddressToByte(raidPtr, 1);
  869         RF_int64 numbytes;
  870         RF_SectorNum_t startSector, endSector;
  871         RF_PhysDiskAddr_t *ppda, *epda, *pda, *fpda, npda;
  872         RF_RowCol_t fcol[2], numDataCol = layoutPtr->numDataCol;
  873         char  **buf;            /* buf[0], buf[1], buf[2], ...etc. point to
  874                                  * buffer storing data read from col0, col1,
  875                                  * col2 */
  876         char   *ebuf, *pbuf, *dest[2], *olddata[2];
  877         RF_Etimer_t timer;
  878         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  879 
  880         RF_ASSERT(asmap->numDataFailed == 1);   /* currently only support this
  881                                                  * case, the other failed SU
  882                                                  * is not being accessed */
  883         RF_ETIMER_START(timer);
  884         RF_Malloc(buf, numDataCol * sizeof(char *), (char **));
  885 
  886         ppda = node->results[0];/* Instead of being buffers, node->results[0]
  887                                  * and [1] are Ppda and Epda  */
  888         epda = node->results[1];
  889         fpda = asmap->failedPDAs[0];
  890 
  891         /* First, recovery the failed old SU using EvenOdd double decoding      */
  892         /* determine the startSector and endSector for decoding */
  893         startSector = rf_StripeUnitOffset(layoutPtr, fpda->startSector);
  894         endSector = startSector + fpda->numSector;
  895         /* Assign buf[col] pointers to point to each non-failed colume  and
  896          * initialize the pbuf and ebuf to point at the beginning of each
  897          * source buffers and destination buffers */
  898         for (prm = 0; prm < numDataCol - 2; prm++) {
  899                 pda = (RF_PhysDiskAddr_t *) node->params[prm].p;
  900                 col = rf_EUCol(layoutPtr, pda->raidAddress);
  901                 buf[col] = pda->bufPtr;
  902         }
  903         /* pbuf and ebuf:  they will change values as double recovery decoding
  904          * goes on */
  905         pbuf = ppda->bufPtr;
  906         ebuf = epda->bufPtr;
  907         /* find out the logical colume numbers in the encoding matrix of the
  908          * two failed columes */
  909         fcol[0] = rf_EUCol(layoutPtr, fpda->raidAddress);
  910 
  911         /* find out the other failed colume not accessed this time */
  912         sosAddr = rf_RaidAddressOfPrevStripeBoundary(layoutPtr, asmap->raidAddress);
  913         for (i = 0; i < numDataCol; i++) {
  914                 npda.raidAddress = sosAddr + (i * secPerSU);
  915                 (raidPtr->Layout.map->MapSector) (raidPtr, npda.raidAddress, &(npda.col), &(npda.startSector), 0);
  916                 /* skip over dead disks */
  917                 if (RF_DEAD_DISK(raidPtr->Disks[npda.col].status))
  918                         if (i != fcol[0])
  919                                 break;
  920         }
  921         RF_ASSERT(i < numDataCol);
  922         fcol[1] = i;
  923         /* assign temporary space to put recovered failed SU */
  924         numbytes = fpda->numSector * bytesPerSector;
  925         RF_Malloc(olddata[0], numbytes, (char *));
  926         RF_Malloc(olddata[1], numbytes, (char *));
  927         dest[0] = olddata[0];
  928         dest[1] = olddata[1];
  929         memset(olddata[0], 0, numbytes);
  930         memset(olddata[1], 0, numbytes);
  931         /* Begin the recovery decoding, initially buf[j],  ebuf, pbuf, dest[j]
  932          * have already pointed at the beginning of each source buffers and
  933          * destination buffers */
  934         for (sector = startSector, i = 0; sector < endSector; sector++, i++) {
  935                 rf_doubleEOdecode(raidPtr, buf, dest, fcol, pbuf, ebuf);
  936                 for (j = 0; j < numDataCol; j++)
  937                         if ((j != fcol[0]) && (j != fcol[1]))
  938                                 buf[j] += bytesPerSector;
  939                 dest[0] += bytesPerSector;
  940                 dest[1] += bytesPerSector;
  941                 ebuf += bytesPerSector;
  942                 pbuf += bytesPerSector;
  943         }
  944         /* after recovery, the buffer pointed by olddata[0] is the old failed
  945          * data. With new writing data and this old data, use small write to
  946          * calculate the new redundant informations */
  947         /* node->params[ 0, ... PDAPerDisk * (numDataCol - 2)-1 ] are Pdas of
  948          * Rrd; params[ PDAPerDisk*(numDataCol - 2), ... PDAPerDisk*numDataCol
  949          * -1 ] are Pdas of Rp, ( Rp2 ), Re, ( Re2 ) ; params[
  950          * PDAPerDisk*numDataCol, ... PDAPerDisk*numDataCol
  951          * +asmap->numStripeUnitsAccessed -asmap->numDataFailed-1] are Pdas of
  952          * wudNodes; For current implementation, we assume the simplest case:
  953          * asmap->numStripeUnitsAccessed == 1 and asmap->numDataFailed == 1
  954          * ie. PDAPerDisk = 1 then node->params[numDataCol] must be the new
  955          * data to be writen to the failed disk. We first bxor the new data
  956          * into the old recovered data, then do the same things as small
  957          * write. */
  958 
  959         rf_bxor(((RF_PhysDiskAddr_t *) node->params[numDataCol].p)->bufPtr, olddata[0], numbytes, node->dagHdr->bp);
  960         /* do new 'E' calculation  */
  961         /* find out the corresponding colume in encoding matrix for write
  962          * colume to be encoded into redundant disk 'E' */
  963         scol = rf_EUCol(layoutPtr, fpda->raidAddress);
  964         /* olddata[0] now is source buffer pointer; epda->bufPtr is the dest
  965          * buffer pointer               */
  966         rf_e_encToBuf(raidPtr, scol, olddata[0], RF_EO_MATRIX_DIM - 2, epda->bufPtr, fpda->numSector);
  967 
  968         /* do new 'P' calculation  */
  969         rf_bxor(olddata[0], ppda->bufPtr, numbytes, node->dagHdr->bp);
  970         /* Free the allocated buffer  */
  971         RF_Free(olddata[0], numbytes);
  972         RF_Free(olddata[1], numbytes);
  973         RF_Free(buf, numDataCol * sizeof(char *));
  974 
  975         RF_ETIMER_STOP(timer);
  976         RF_ETIMER_EVAL(timer);
  977         if (tracerec) {
  978                 tracerec->q_us += RF_ETIMER_VAL_US(timer);
  979         }
  980         rf_GenericWakeupFunc(node, 0);
  981         return (0);
  982 }
  983 #endif                          /* RF_INCLUDE_EVENODD > 0 */

Cache object: aa3c5ae6cdb853531707f08b12bd8dca


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.