The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/raidframe/rf_pq.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: rf_pq.c,v 1.13 2003/11/16 20:32:05 oster Exp $ */
    2 /*
    3  * Copyright (c) 1995 Carnegie-Mellon University.
    4  * All rights reserved.
    5  *
    6  * Author: Daniel Stodolsky
    7  *
    8  * Permission to use, copy, modify and distribute this software and
    9  * its documentation is hereby granted, provided that both the copyright
   10  * notice and this permission notice appear in all copies of the
   11  * software, derivative works or modified versions, and any portions
   12  * thereof, and that both notices appear in supporting documentation.
   13  *
   14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
   15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
   16  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
   17  *
   18  * Carnegie Mellon requests users of this software to return to
   19  *
   20  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
   21  *  School of Computer Science
   22  *  Carnegie Mellon University
   23  *  Pittsburgh PA 15213-3890
   24  *
   25  * any improvements or extensions that they make and grant Carnegie the
   26  * rights to redistribute these changes.
   27  */
   28 
   29 /*
   30  * Code for RAID level 6 (P + Q) disk array architecture.
   31  */
   32 
   33 #include <sys/cdefs.h>
   34 __KERNEL_RCSID(0, "$NetBSD: rf_pq.c,v 1.13 2003/11/16 20:32:05 oster Exp $");
   35 
   36 #include "rf_archs.h"
   37 
   38 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0)
   39 
   40 #include <dev/raidframe/raidframevar.h>
   41 
   42 #include "rf_raid.h"
   43 #include "rf_dag.h"
   44 #include "rf_dagffrd.h"
   45 #include "rf_dagffwr.h"
   46 #include "rf_dagdegrd.h"
   47 #include "rf_dagdegwr.h"
   48 #include "rf_dagutils.h"
   49 #include "rf_dagfuncs.h"
   50 #include "rf_etimer.h"
   51 #include "rf_pqdeg.h"
   52 #include "rf_general.h"
   53 #include "rf_map.h"
   54 #include "rf_pq.h"
   55 
   56 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
   57 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
   58 
   59 int 
   60 rf_RegularONPFunc(node)
   61         RF_DagNode_t *node;
   62 {
   63         return (rf_RegularXorFunc(node));
   64 }
   65 /*
   66    same as simpleONQ func, but the coefficient is always 1
   67 */
   68 
   69 int 
   70 rf_SimpleONPFunc(node)
   71         RF_DagNode_t *node;
   72 {
   73         return (rf_SimpleXorFunc(node));
   74 }
   75 
   76 int 
   77 rf_RecoveryPFunc(node)
   78         RF_DagNode_t *node;
   79 {
   80         return (rf_RecoveryXorFunc(node));
   81 }
   82 
   83 int 
   84 rf_RegularPFunc(node)
   85         RF_DagNode_t *node;
   86 {
   87         return (rf_RegularXorFunc(node));
   88 }
   89 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */
   90 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
   91 
   92 static void 
   93 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
   94     unsigned char coeff);
   95 static void 
   96 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
   97     unsigned length, unsigned coeff);
   98 
   99 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
  100 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
  101 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
  102 
  103 void 
  104 rf_PQDagSelect(
  105     RF_Raid_t * raidPtr,
  106     RF_IoType_t type,
  107     RF_AccessStripeMap_t * asmap,
  108     RF_VoidFuncPtr * createFunc)
  109 {
  110         RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
  111         unsigned ndfail = asmap->numDataFailed;
  112         unsigned npfail = asmap->numParityFailed;
  113         unsigned ntfail = npfail + ndfail;
  114 
  115         RF_ASSERT(RF_IO_IS_R_OR_W(type));
  116         if (ntfail > 2) {
  117                 RF_ERRORMSG("more than two disks failed in a single group!  Aborting I/O operation.\n");
  118                 *createFunc = NULL;
  119                 return;
  120         }
  121         /* ok, we can do this I/O */
  122         if (type == RF_IO_TYPE_READ) {
  123                 switch (ndfail) {
  124                 case 0:
  125                         /* fault free read */
  126                         *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG;       /* same as raid 5 */
  127                         break;
  128                 case 1:
  129                         /* lost a single data unit */
  130                         /* two cases: (1) parity is not lost. do a normal raid
  131                          * 5 reconstruct read. (2) parity is lost. do a
  132                          * reconstruct read using "q". */
  133                         if (ntfail == 2) {      /* also lost redundancy */
  134                                 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
  135                                         *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
  136                                 else
  137                                         *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
  138                         } else {
  139                                 /* P and Q are ok. But is there a failure in
  140                                  * some unaccessed data unit? */
  141                                 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
  142                                         *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
  143                                 else
  144                                         *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
  145                         }
  146                         break;
  147                 case 2:
  148                         /* lost two data units */
  149                         *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
  150                         break;
  151                 }
  152                 return;
  153         }
  154         /* a write */
  155         switch (ntfail) {
  156         case 0:         /* fault free */
  157                 if (rf_suppressLocksAndLargeWrites ||
  158                     (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
  159                         (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
  160 
  161                         *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
  162                 } else {
  163                         *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
  164                 }
  165                 break;
  166 
  167         case 1:         /* single disk fault */
  168                 if (npfail == 1) {
  169                         RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
  170                         if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) {      /* q died, treat like
  171                                                                                  * normal mode raid5
  172                                                                                  * write. */
  173                                 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
  174                                     || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
  175                                         *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
  176                                 else
  177                                         *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
  178                         } else {/* parity died, small write only updating Q */
  179                                 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
  180                                     || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
  181                                         *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
  182                                 else
  183                                         *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
  184                         }
  185                 } else {        /* data missing. Do a P reconstruct write if
  186                                  * only a single data unit is lost in the
  187                                  * stripe, otherwise a PQ reconstruct write. */
  188                         if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
  189                                 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
  190                         else
  191                                 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
  192                 }
  193                 break;
  194 
  195         case 2:         /* two disk faults */
  196                 switch (npfail) {
  197                 case 2: /* both p and q dead */
  198                         *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
  199                         break;
  200                 case 1: /* either p or q and dead data */
  201                         RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
  202                         RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
  203                         if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
  204                                 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
  205                         else
  206                                 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
  207                         break;
  208                 case 0: /* double data loss */
  209                         *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
  210                         break;
  211                 }
  212                 break;
  213 
  214         default:                /* more than 2 disk faults */
  215                 *createFunc = NULL;
  216                 RF_PANIC();
  217         }
  218         return;
  219 }
  220 /*
  221    Used as a stop gap info function
  222 */
  223 #if 0
  224 static void 
  225 PQOne(raidPtr, nSucc, nAnte, asmap)
  226         RF_Raid_t *raidPtr;
  227         int    *nSucc;
  228         int    *nAnte;
  229         RF_AccessStripeMap_t *asmap;
  230 {
  231         *nSucc = *nAnte = 1;
  232 }
  233 
  234 static void 
  235 PQOneTwo(raidPtr, nSucc, nAnte, asmap)
  236         RF_Raid_t *raidPtr;
  237         int    *nSucc;
  238         int    *nAnte;
  239         RF_AccessStripeMap_t *asmap;
  240 {
  241         *nSucc = 1;
  242         *nAnte = 2;
  243 }
  244 #endif
  245 
  246 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
  247 {
  248         rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
  249             rf_RegularPQFunc, RF_FALSE);
  250 }
  251 
  252 int 
  253 rf_RegularONQFunc(node)
  254         RF_DagNode_t *node;
  255 {
  256         int     np = node->numParams;
  257         int     d;
  258         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
  259         int     i;
  260         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  261         RF_Etimer_t timer;
  262         char   *qbuf, *qpbuf;
  263         char   *obuf, *nbuf;
  264         RF_PhysDiskAddr_t *old, *new;
  265         unsigned long coeff;
  266         unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
  267 
  268         RF_ETIMER_START(timer);
  269 
  270         d = (np - 3) / 4;
  271         RF_ASSERT(4 * d + 3 == np);
  272         qbuf = (char *) node->params[2 * d + 1].p;      /* q buffer */
  273         for (i = 0; i < d; i++) {
  274                 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
  275                 obuf = (char *) node->params[2 * i + 1].p;
  276                 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
  277                 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
  278                 RF_ASSERT(new->numSector == old->numSector);
  279                 RF_ASSERT(new->raidAddress == old->raidAddress);
  280                 /* the stripe unit within the stripe tells us the coefficient
  281                  * to use for the multiply. */
  282                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
  283                 /* compute the data unit offset within the column, then add
  284                  * one */
  285                 coeff = (coeff % raidPtr->Layout.numDataCol);
  286                 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
  287                 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
  288         }
  289 
  290         RF_ETIMER_STOP(timer);
  291         RF_ETIMER_EVAL(timer);
  292         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  293         rf_GenericWakeupFunc(node, 0);  /* call wake func explicitly since no
  294                                          * I/O in this node */
  295         return (0);
  296 }
  297 /*
  298    See the SimpleXORFunc for the difference between a simple and regular func.
  299    These Q functions should be used for
  300 
  301          new q = Q(data,old data,old q)
  302 
  303    style updates and not for
  304 
  305          q = ( new data, new data, .... )
  306 
  307    computations.
  308 
  309    The simple q takes 2(2d+1)+1 params, where d is the number
  310    of stripes written. The order of params is
  311    old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
  312    [2d] old q pda_0, old q buffer
  313    [2d_2] new data pda_0, new data buffer_0, ...                                    new data pda_d, new data buffer_d
  314    raidPtr
  315 */
  316 
  317 int 
  318 rf_SimpleONQFunc(node)
  319         RF_DagNode_t *node;
  320 {
  321         int     np = node->numParams;
  322         int     d;
  323         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
  324         int     i;
  325         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  326         RF_Etimer_t timer;
  327         char   *qbuf;
  328         char   *obuf, *nbuf;
  329         RF_PhysDiskAddr_t *old, *new;
  330         unsigned long coeff;
  331 
  332         RF_ETIMER_START(timer);
  333 
  334         d = (np - 3) / 4;
  335         RF_ASSERT(4 * d + 3 == np);
  336         qbuf = (char *) node->params[2 * d + 1].p;      /* q buffer */
  337         for (i = 0; i < d; i++) {
  338                 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
  339                 obuf = (char *) node->params[2 * i + 1].p;
  340                 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
  341                 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
  342                 RF_ASSERT(new->numSector == old->numSector);
  343                 RF_ASSERT(new->raidAddress == old->raidAddress);
  344                 /* the stripe unit within the stripe tells us the coefficient
  345                  * to use for the multiply. */
  346                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
  347                 /* compute the data unit offset within the column, then add
  348                  * one */
  349                 coeff = (coeff % raidPtr->Layout.numDataCol);
  350                 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
  351         }
  352 
  353         RF_ETIMER_STOP(timer);
  354         RF_ETIMER_EVAL(timer);
  355         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  356         rf_GenericWakeupFunc(node, 0);  /* call wake func explicitly since no
  357                                          * I/O in this node */
  358         return (0);
  359 }
  360 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
  361 {
  362         rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
  363 }
  364 
  365 static void RegularQSubr(RF_DagNode_t *node, char   *qbuf);
  366 
  367 static void 
  368 RegularQSubr(node, qbuf)
  369         RF_DagNode_t *node;
  370         char   *qbuf;
  371 {
  372         int     np = node->numParams;
  373         int     d;
  374         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
  375         unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
  376         int     i;
  377         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  378         RF_Etimer_t timer;
  379         char   *obuf, *qpbuf;
  380         RF_PhysDiskAddr_t *old;
  381         unsigned long coeff;
  382 
  383         RF_ETIMER_START(timer);
  384 
  385         d = (np - 1) / 2;
  386         RF_ASSERT(2 * d + 1 == np);
  387         for (i = 0; i < d; i++) {
  388                 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
  389                 obuf = (char *) node->params[2 * i + 1].p;
  390                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
  391                 /* compute the data unit offset within the column, then add
  392                  * one */
  393                 coeff = (coeff % raidPtr->Layout.numDataCol);
  394                 /* the input buffers may not all be aligned with the start of
  395                  * the stripe. so shift by their sector offset within the
  396                  * stripe unit */
  397                 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
  398                 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
  399         }
  400 
  401         RF_ETIMER_STOP(timer);
  402         RF_ETIMER_EVAL(timer);
  403         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  404 }
  405 /*
  406    used in degraded writes.
  407 */
  408 
  409 static void DegrQSubr(RF_DagNode_t *node);
  410 
  411 static void 
  412 DegrQSubr(node)
  413         RF_DagNode_t *node;
  414 {
  415         int     np = node->numParams;
  416         int     d;
  417         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
  418         unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
  419         int     i;
  420         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  421         RF_Etimer_t timer;
  422         char   *qbuf = node->results[1];
  423         char   *obuf, *qpbuf;
  424         RF_PhysDiskAddr_t *old;
  425         unsigned long coeff;
  426         unsigned fail_start;
  427         int     j;
  428 
  429         old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
  430         fail_start = old->startSector % secPerSU;
  431 
  432         RF_ETIMER_START(timer);
  433 
  434         d = (np - 2) / 2;
  435         RF_ASSERT(2 * d + 2 == np);
  436         for (i = 0; i < d; i++) {
  437                 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
  438                 obuf = (char *) node->params[2 * i + 1].p;
  439                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
  440                 /* compute the data unit offset within the column, then add
  441                  * one */
  442                 coeff = (coeff % raidPtr->Layout.numDataCol);
  443                 /* the input buffers may not all be aligned with the start of
  444                  * the stripe. so shift by their sector offset within the
  445                  * stripe unit */
  446                 j = old->startSector % secPerSU;
  447                 RF_ASSERT(j >= fail_start);
  448                 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
  449                 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
  450         }
  451 
  452         RF_ETIMER_STOP(timer);
  453         RF_ETIMER_EVAL(timer);
  454         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  455 }
  456 /*
  457    Called by large write code to compute the new parity and the new q.
  458 
  459    structure of the params:
  460 
  461    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
  462    raidPtr
  463 
  464    for a total of 2d+1 arguments.
  465    The result buffers results[0], results[1] are the buffers for the p and q,
  466    respectively.
  467 
  468    We compute Q first, then compute P. The P calculation may try to reuse
  469    one of the input buffers for its output, so if we computed P first, we would
  470    corrupt the input for the q calculation.
  471 */
  472 
  473 int 
  474 rf_RegularPQFunc(node)
  475         RF_DagNode_t *node;
  476 {
  477         RegularQSubr(node, node->results[1]);
  478         return (rf_RegularXorFunc(node));       /* does the wakeup */
  479 }
  480 
  481 int 
  482 rf_RegularQFunc(node)
  483         RF_DagNode_t *node;
  484 {
  485         /* Almost ... adjust Qsubr args */
  486         RegularQSubr(node, node->results[0]);
  487         rf_GenericWakeupFunc(node, 0);  /* call wake func explicitly since no
  488                                          * I/O in this node */
  489         return (0);
  490 }
  491 /*
  492    Called by singly degraded write code to compute the new parity and the new q.
  493 
  494    structure of the params:
  495 
  496    pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
  497    failedPDA raidPtr
  498 
  499    for a total of 2d+2 arguments.
  500    The result buffers results[0], results[1] are the buffers for the parity and q,
  501    respectively.
  502 
  503    We compute Q first, then compute parity. The parity calculation may try to reuse
  504    one of the input buffers for its output, so if we computed parity first, we would
  505    corrupt the input for the q calculation.
  506 
  507    We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
  508 */
  509 
  510 void 
  511 rf_Degraded_100_PQFunc(node)
  512         RF_DagNode_t *node;
  513 {
  514         int     np = node->numParams;
  515 
  516         RF_ASSERT(np >= 2);
  517         DegrQSubr(node);
  518         rf_RecoveryXorFunc(node);
  519 }
  520 
  521 
  522 /*
  523    The two below are used when reading a stripe with a single lost data unit.
  524    The parameters are
  525 
  526    pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
  527 
  528    and results[0] contains the data buffer. Which is originally zero-filled.
  529 
  530 */
  531 
  532 /* this Q func is used by the degraded-mode dag functions to recover lost data.
  533  * the second-to-last parameter is the PDA for the failed portion of the access.
  534  * the code here looks at this PDA and assumes that the xor target buffer is
  535  * equal in size to the number of sectors in the failed PDA.  It then uses
  536  * the other PDAs in the parameter list to determine where within the target
  537  * buffer the corresponding data should be xored.
  538  *
  539  * Recall the basic equation is
  540  *
  541  *     Q = ( data_1 + 2 * data_2 ... + k * data_k  ) mod 256
  542  *
  543  * so to recover data_j we need
  544  *
  545  *    J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
  546  *
  547  * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
  548  * copying Q into it. Then we need to do a table lookup to convert to solve
  549  *   data_j /= J
  550  *
  551  *
  552  */
  553 int 
  554 rf_RecoveryQFunc(node)
  555         RF_DagNode_t *node;
  556 {
  557         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  558         RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
  559         RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
  560         int     i;
  561         RF_PhysDiskAddr_t *pda;
  562         RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
  563         char   *srcbuf, *destbuf;
  564         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  565         RF_Etimer_t timer;
  566         unsigned long coeff;
  567 
  568         RF_ETIMER_START(timer);
  569         /* start by copying Q into the buffer */
  570         memcpy(node->results[0], node->params[node->numParams - 3].p,
  571             rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
  572         for (i = 0; i < node->numParams - 4; i += 2) {
  573                 RF_ASSERT(node->params[i + 1].p != node->results[0]);
  574                 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
  575                 srcbuf = (char *) node->params[i + 1].p;
  576                 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
  577                 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
  578                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
  579                 /* compute the data unit offset within the column */
  580                 coeff = (coeff % raidPtr->Layout.numDataCol);
  581                 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
  582         }
  583         /* Do the nasty inversion now */
  584         coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
  585         rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
  586         RF_ETIMER_STOP(timer);
  587         RF_ETIMER_EVAL(timer);
  588         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  589         rf_GenericWakeupFunc(node, 0);
  590         return (0);
  591 }
  592 
  593 int 
  594 rf_RecoveryPQFunc(node)
  595         RF_DagNode_t *node;
  596 {
  597         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
  598         printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid);
  599         return (1);
  600 }
  601 /*
  602    Degraded write Q subroutine.
  603    Used when P is dead.
  604    Large-write style Q computation.
  605    Parameters
  606 
  607    (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
  608 
  609    We ignore failedPDA.
  610 
  611    This is a "simple style" recovery func.
  612 */
  613 
  614 void 
  615 rf_PQ_DegradedWriteQFunc(node)
  616         RF_DagNode_t *node;
  617 {
  618         int     np = node->numParams;
  619         int     d;
  620         RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
  621         unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
  622         int     i;
  623         RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
  624         RF_Etimer_t timer;
  625         char   *qbuf = node->results[0];
  626         char   *obuf, *qpbuf;
  627         RF_PhysDiskAddr_t *old;
  628         unsigned long coeff;
  629         int     fail_start, j;
  630 
  631         old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
  632         fail_start = old->startSector % secPerSU;
  633 
  634         RF_ETIMER_START(timer);
  635 
  636         d = (np - 2) / 2;
  637         RF_ASSERT(2 * d + 2 == np);
  638 
  639         for (i = 0; i < d; i++) {
  640                 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
  641                 obuf = (char *) node->params[2 * i + 1].p;
  642                 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
  643                 /* compute the data unit offset within the column, then add
  644                  * one */
  645                 coeff = (coeff % raidPtr->Layout.numDataCol);
  646                 j = old->startSector % secPerSU;
  647                 RF_ASSERT(j >= fail_start);
  648                 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
  649                 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
  650         }
  651 
  652         RF_ETIMER_STOP(timer);
  653         RF_ETIMER_EVAL(timer);
  654         tracerec->q_us += RF_ETIMER_VAL_US(timer);
  655         rf_GenericWakeupFunc(node, 0);
  656 }
  657 
  658 
  659 
  660 
  661 /* Q computations */
  662 
  663 /*
  664    coeff - colummn;
  665 
  666    compute  dest ^= qfor[28-coeff][rn[coeff+1] a]
  667 
  668    on 5-bit basis;
  669    length in bytes;
  670 */
  671 
  672 void 
  673 rf_IncQ(dest, buf, length, coeff)
  674         unsigned long *dest;
  675         unsigned long *buf;
  676         unsigned length;
  677         unsigned coeff;
  678 {
  679         unsigned long a, d, new;
  680         unsigned long a1, a2;
  681         unsigned int *q = &(rf_qfor[28 - coeff][0]);
  682         unsigned r = rf_rn[coeff + 1];
  683 
  684 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
  685 #define INSERT(a,i) (a << (5L*i))
  686 
  687         length /= 8;
  688         /* 13 5 bit quants in a 64 bit word */
  689         while (length) {
  690                 a = *buf++;
  691                 d = *dest;
  692                 a1 = EXTRACT(a, 0) ^ r;
  693                 a2 = EXTRACT(a, 1) ^ r;
  694                 new = INSERT(a2, 1) | a1;
  695                 a1 = EXTRACT(a, 2) ^ r;
  696                 a2 = EXTRACT(a, 3) ^ r;
  697                 a1 = q[a1];
  698                 a2 = q[a2];
  699                 new = new | INSERT(a1, 2) | INSERT(a2, 3);
  700                 a1 = EXTRACT(a, 4) ^ r;
  701                 a2 = EXTRACT(a, 5) ^ r;
  702                 a1 = q[a1];
  703                 a2 = q[a2];
  704                 new = new | INSERT(a1, 4) | INSERT(a2, 5);
  705                 a1 = EXTRACT(a, 5) ^ r;
  706                 a2 = EXTRACT(a, 6) ^ r;
  707                 a1 = q[a1];
  708                 a2 = q[a2];
  709                 new = new | INSERT(a1, 5) | INSERT(a2, 6);
  710 #if RF_LONGSHIFT > 2
  711                 a1 = EXTRACT(a, 7) ^ r;
  712                 a2 = EXTRACT(a, 8) ^ r;
  713                 a1 = q[a1];
  714                 a2 = q[a2];
  715                 new = new | INSERT(a1, 7) | INSERT(a2, 8);
  716                 a1 = EXTRACT(a, 9) ^ r;
  717                 a2 = EXTRACT(a, 10) ^ r;
  718                 a1 = q[a1];
  719                 a2 = q[a2];
  720                 new = new | INSERT(a1, 9) | INSERT(a2, 10);
  721                 a1 = EXTRACT(a, 11) ^ r;
  722                 a2 = EXTRACT(a, 12) ^ r;
  723                 a1 = q[a1];
  724                 a2 = q[a2];
  725                 new = new | INSERT(a1, 11) | INSERT(a2, 12);
  726 #endif                          /* RF_LONGSHIFT > 2 */
  727                 d ^= new;
  728                 *dest++ = d;
  729                 length--;
  730         }
  731 }
  732 /*
  733    compute
  734 
  735    dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
  736 
  737    on a five bit basis.
  738    optimization: compute old ^ new on 64 bit basis.
  739 
  740    length in bytes.
  741 */
  742 
  743 static void 
  744 QDelta(
  745     char *dest,
  746     char *obuf,
  747     char *nbuf,
  748     unsigned length,
  749     unsigned char coeff)
  750 {
  751         unsigned long a, d, new;
  752         unsigned long a1, a2;
  753         unsigned int *q = &(rf_qfor[28 - coeff][0]);
  754         unsigned int r = rf_rn[coeff + 1];
  755 
  756         r = a1 = a2 = new = d = a = 0; /* XXX for now... */
  757         q = NULL; /* XXX for now */
  758 
  759 #ifdef _KERNEL
  760         /* PQ in kernel currently not supported because the encoding/decoding
  761          * table is not present */
  762         memset(dest, 0, length);
  763 #else                           /* KERNEL */
  764         /* this code probably doesn't work and should be rewritten  -wvcii */
  765         /* 13 5 bit quants in a 64 bit word */
  766         length /= 8;
  767         while (length) {
  768                 a = *obuf++;    /* XXX need to reorg to avoid cache conflicts */
  769                 a ^= *nbuf++;
  770                 d = *dest;
  771                 a1 = EXTRACT(a, 0) ^ r;
  772                 a2 = EXTRACT(a, 1) ^ r;
  773                 a1 = q[a1];
  774                 a2 = q[a2];
  775                 new = INSERT(a2, 1) | a1;
  776                 a1 = EXTRACT(a, 2) ^ r;
  777                 a2 = EXTRACT(a, 3) ^ r;
  778                 a1 = q[a1];
  779                 a2 = q[a2];
  780                 new = new | INSERT(a1, 2) | INSERT(a2, 3);
  781                 a1 = EXTRACT(a, 4) ^ r;
  782                 a2 = EXTRACT(a, 5) ^ r;
  783                 a1 = q[a1];
  784                 a2 = q[a2];
  785                 new = new | INSERT(a1, 4) | INSERT(a2, 5);
  786                 a1 = EXTRACT(a, 5) ^ r;
  787                 a2 = EXTRACT(a, 6) ^ r;
  788                 a1 = q[a1];
  789                 a2 = q[a2];
  790                 new = new | INSERT(a1, 5) | INSERT(a2, 6);
  791 #if RF_LONGSHIFT > 2
  792                 a1 = EXTRACT(a, 7) ^ r;
  793                 a2 = EXTRACT(a, 8) ^ r;
  794                 a1 = q[a1];
  795                 a2 = q[a2];
  796                 new = new | INSERT(a1, 7) | INSERT(a2, 8);
  797                 a1 = EXTRACT(a, 9) ^ r;
  798                 a2 = EXTRACT(a, 10) ^ r;
  799                 a1 = q[a1];
  800                 a2 = q[a2];
  801                 new = new | INSERT(a1, 9) | INSERT(a2, 10);
  802                 a1 = EXTRACT(a, 11) ^ r;
  803                 a2 = EXTRACT(a, 12) ^ r;
  804                 a1 = q[a1];
  805                 a2 = q[a2];
  806                 new = new | INSERT(a1, 11) | INSERT(a2, 12);
  807 #endif                          /* RF_LONGSHIFT > 2 */
  808                 d ^= new;
  809                 *dest++ = d;
  810                 length--;
  811         }
  812 #endif                          /* _KERNEL */
  813 }
  814 /*
  815    recover columns a and b from the given p and q into
  816    bufs abuf and bbuf. All bufs are word aligned.
  817    Length is in bytes.
  818 */
  819 
  820 
  821 /*
  822  * XXX
  823  *
  824  * Everything about this seems wrong.
  825  */
  826 void 
  827 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
  828         unsigned long *pbuf;
  829         unsigned long *qbuf;
  830         unsigned long *abuf;
  831         unsigned long *bbuf;
  832         unsigned length;
  833         unsigned coeff_a;
  834         unsigned coeff_b;
  835 {
  836         unsigned long p, q, a, a0, a1;
  837         int     col = (29 * coeff_a) + coeff_b;
  838         unsigned char *q0 = &(rf_qinv[col][0]);
  839 
  840         length /= 8;
  841         while (length) {
  842                 p = *pbuf++;
  843                 q = *qbuf++;
  844                 a0 = EXTRACT(p, 0);
  845                 a1 = EXTRACT(q, 0);
  846                 a = q0[a0 << 5 | a1];
  847 #define MF(i) \
  848       a0 = EXTRACT(p,i); \
  849       a1 = EXTRACT(q,i); \
  850       a  = a | INSERT(q0[a0<<5 | a1],i)
  851 
  852                 MF(1);
  853                 MF(2);
  854                 MF(3);
  855                 MF(4);
  856                 MF(5);
  857                 MF(6);
  858 #if 0
  859                 MF(7);
  860                 MF(8);
  861                 MF(9);
  862                 MF(10);
  863                 MF(11);
  864                 MF(12);
  865 #endif                          /* 0 */
  866                 *abuf++ = a;
  867                 *bbuf++ = a ^ p;
  868                 length--;
  869         }
  870 }
  871 /*
  872    Lost parity and a data column. Recover that data column.
  873    Assume col coeff is lost. Let q the contents of Q after
  874    all surviving data columns have been q-xored out of it.
  875    Then we have the equation
  876 
  877    q[28-coeff][a_i ^ r_i+1] = q
  878 
  879    but q is cyclic with period 31.
  880    So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
  881       q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
  882 
  883    so a_i = r_{coeff+1} ^ q[3+coeff][q]
  884 
  885    The routine is passed q buffer and the buffer
  886    the data is to be recoverd into. They can be the same.
  887 */
  888 
  889 
  890 
  891 static void 
  892 rf_InvertQ(
  893     unsigned long *qbuf,
  894     unsigned long *abuf,
  895     unsigned length,
  896     unsigned coeff)
  897 {
  898         unsigned long a, new;
  899         unsigned long a1, a2;
  900         unsigned int *q = &(rf_qfor[3 + coeff][0]);
  901         unsigned r = rf_rn[coeff + 1];
  902 
  903         /* 13 5 bit quants in a 64 bit word */
  904         length /= 8;
  905         while (length) {
  906                 a = *qbuf++;
  907                 a1 = EXTRACT(a, 0);
  908                 a2 = EXTRACT(a, 1);
  909                 a1 = r ^ q[a1];
  910                 a2 = r ^ q[a2];
  911                 new = INSERT(a2, 1) | a1;
  912 #define M(i,j) \
  913       a1 = EXTRACT(a,i); \
  914       a2 = EXTRACT(a,j); \
  915       a1 = r ^ q[a1]; \
  916       a2 = r ^ q[a2]; \
  917       new = new | INSERT(a1,i) | INSERT(a2,j)
  918 
  919                 M(2, 3);
  920                 M(4, 5);
  921                 M(5, 6);
  922 #if RF_LONGSHIFT > 2
  923                 M(7, 8);
  924                 M(9, 10);
  925                 M(11, 12);
  926 #endif                          /* RF_LONGSHIFT > 2 */
  927                 *abuf++ = new;
  928                 length--;
  929         }
  930 }
  931 #endif                          /* (RF_INCLUDE_DECL_PQ > 0) ||
  932                                  * (RF_INCLUDE_RAID6 > 0) */

Cache object: 51f8544460fb5b57011fd03a93fd3cc4


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.