1 /* $NetBSD: rf_pq.c,v 1.13 2003/11/16 20:32:05 oster Exp $ */
2 /*
3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved.
5 *
6 * Author: Daniel Stodolsky
7 *
8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation.
13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 *
18 * Carnegie Mellon requests users of this software to return to
19 *
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science
22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890
24 *
25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes.
27 */
28
29 /*
30 * Code for RAID level 6 (P + Q) disk array architecture.
31 */
32
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: rf_pq.c,v 1.13 2003/11/16 20:32:05 oster Exp $");
35
36 #include "rf_archs.h"
37
38 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0)
39
40 #include <dev/raidframe/raidframevar.h>
41
42 #include "rf_raid.h"
43 #include "rf_dag.h"
44 #include "rf_dagffrd.h"
45 #include "rf_dagffwr.h"
46 #include "rf_dagdegrd.h"
47 #include "rf_dagdegwr.h"
48 #include "rf_dagutils.h"
49 #include "rf_dagfuncs.h"
50 #include "rf_etimer.h"
51 #include "rf_pqdeg.h"
52 #include "rf_general.h"
53 #include "rf_map.h"
54 #include "rf_pq.h"
55
56 RF_RedFuncs_t rf_pFuncs = {rf_RegularONPFunc, "Regular Old-New P", rf_SimpleONPFunc, "Simple Old-New P"};
57 RF_RedFuncs_t rf_pRecoveryFuncs = {rf_RecoveryPFunc, "Recovery P Func", rf_RecoveryPFunc, "Recovery P Func"};
58
59 int
60 rf_RegularONPFunc(node)
61 RF_DagNode_t *node;
62 {
63 return (rf_RegularXorFunc(node));
64 }
65 /*
66 same as simpleONQ func, but the coefficient is always 1
67 */
68
69 int
70 rf_SimpleONPFunc(node)
71 RF_DagNode_t *node;
72 {
73 return (rf_SimpleXorFunc(node));
74 }
75
76 int
77 rf_RecoveryPFunc(node)
78 RF_DagNode_t *node;
79 {
80 return (rf_RecoveryXorFunc(node));
81 }
82
83 int
84 rf_RegularPFunc(node)
85 RF_DagNode_t *node;
86 {
87 return (rf_RegularXorFunc(node));
88 }
89 #endif /* (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0) || (RF_INCLUDE_EVENODD > 0) */
90 #if (RF_INCLUDE_DECL_PQ > 0) || (RF_INCLUDE_RAID6 > 0)
91
92 static void
93 QDelta(char *dest, char *obuf, char *nbuf, unsigned length,
94 unsigned char coeff);
95 static void
96 rf_InvertQ(unsigned long *qbuf, unsigned long *abuf,
97 unsigned length, unsigned coeff);
98
99 RF_RedFuncs_t rf_qFuncs = {rf_RegularONQFunc, "Regular Old-New Q", rf_SimpleONQFunc, "Simple Old-New Q"};
100 RF_RedFuncs_t rf_qRecoveryFuncs = {rf_RecoveryQFunc, "Recovery Q Func", rf_RecoveryQFunc, "Recovery Q Func"};
101 RF_RedFuncs_t rf_pqRecoveryFuncs = {rf_RecoveryPQFunc, "Recovery PQ Func", rf_RecoveryPQFunc, "Recovery PQ Func"};
102
103 void
104 rf_PQDagSelect(
105 RF_Raid_t * raidPtr,
106 RF_IoType_t type,
107 RF_AccessStripeMap_t * asmap,
108 RF_VoidFuncPtr * createFunc)
109 {
110 RF_RaidLayout_t *layoutPtr = &(raidPtr->Layout);
111 unsigned ndfail = asmap->numDataFailed;
112 unsigned npfail = asmap->numParityFailed;
113 unsigned ntfail = npfail + ndfail;
114
115 RF_ASSERT(RF_IO_IS_R_OR_W(type));
116 if (ntfail > 2) {
117 RF_ERRORMSG("more than two disks failed in a single group! Aborting I/O operation.\n");
118 *createFunc = NULL;
119 return;
120 }
121 /* ok, we can do this I/O */
122 if (type == RF_IO_TYPE_READ) {
123 switch (ndfail) {
124 case 0:
125 /* fault free read */
126 *createFunc = (RF_VoidFuncPtr) rf_CreateFaultFreeReadDAG; /* same as raid 5 */
127 break;
128 case 1:
129 /* lost a single data unit */
130 /* two cases: (1) parity is not lost. do a normal raid
131 * 5 reconstruct read. (2) parity is lost. do a
132 * reconstruct read using "q". */
133 if (ntfail == 2) { /* also lost redundancy */
134 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY)
135 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateReadDAG;
136 else
137 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateReadDAG;
138 } else {
139 /* P and Q are ok. But is there a failure in
140 * some unaccessed data unit? */
141 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
142 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
143 else
144 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateReadDAG;
145 }
146 break;
147 case 2:
148 /* lost two data units */
149 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateReadDAG;
150 break;
151 }
152 return;
153 }
154 /* a write */
155 switch (ntfail) {
156 case 0: /* fault free */
157 if (rf_suppressLocksAndLargeWrites ||
158 (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) && (layoutPtr->numDataCol != 1)) ||
159 (asmap->parityInfo->next != NULL) || (asmap->qInfo->next != NULL) || rf_CheckStripeForFailures(raidPtr, asmap))) {
160
161 *createFunc = (RF_VoidFuncPtr) rf_PQCreateSmallWriteDAG;
162 } else {
163 *createFunc = (RF_VoidFuncPtr) rf_PQCreateLargeWriteDAG;
164 }
165 break;
166
167 case 1: /* single disk fault */
168 if (npfail == 1) {
169 RF_ASSERT((asmap->failedPDAs[0]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q));
170 if (asmap->failedPDAs[0]->type == RF_PDA_TYPE_Q) { /* q died, treat like
171 * normal mode raid5
172 * write. */
173 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
174 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
175 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateSmallWriteDAG;
176 else
177 *createFunc = (RF_VoidFuncPtr) rf_PQ_001_CreateLargeWriteDAG;
178 } else {/* parity died, small write only updating Q */
179 if (((asmap->numStripeUnitsAccessed <= (layoutPtr->numDataCol / 2)) || (asmap->numStripeUnitsAccessed == 1))
180 || rf_NumFailedDataUnitsInStripe(raidPtr, asmap))
181 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateSmallWriteDAG;
182 else
183 *createFunc = (RF_VoidFuncPtr) rf_PQ_010_CreateLargeWriteDAG;
184 }
185 } else { /* data missing. Do a P reconstruct write if
186 * only a single data unit is lost in the
187 * stripe, otherwise a PQ reconstruct write. */
188 if (rf_NumFailedDataUnitsInStripe(raidPtr, asmap) == 2)
189 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
190 else
191 *createFunc = (RF_VoidFuncPtr) rf_PQ_100_CreateWriteDAG;
192 }
193 break;
194
195 case 2: /* two disk faults */
196 switch (npfail) {
197 case 2: /* both p and q dead */
198 *createFunc = (RF_VoidFuncPtr) rf_PQ_011_CreateWriteDAG;
199 break;
200 case 1: /* either p or q and dead data */
201 RF_ASSERT(asmap->failedPDAs[0]->type == RF_PDA_TYPE_DATA);
202 RF_ASSERT((asmap->failedPDAs[1]->type == RF_PDA_TYPE_PARITY) || (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q));
203 if (asmap->failedPDAs[1]->type == RF_PDA_TYPE_Q)
204 *createFunc = (RF_VoidFuncPtr) rf_PQ_101_CreateWriteDAG;
205 else
206 *createFunc = (RF_VoidFuncPtr) rf_PQ_110_CreateWriteDAG;
207 break;
208 case 0: /* double data loss */
209 *createFunc = (RF_VoidFuncPtr) rf_PQ_200_CreateWriteDAG;
210 break;
211 }
212 break;
213
214 default: /* more than 2 disk faults */
215 *createFunc = NULL;
216 RF_PANIC();
217 }
218 return;
219 }
220 /*
221 Used as a stop gap info function
222 */
223 #if 0
224 static void
225 PQOne(raidPtr, nSucc, nAnte, asmap)
226 RF_Raid_t *raidPtr;
227 int *nSucc;
228 int *nAnte;
229 RF_AccessStripeMap_t *asmap;
230 {
231 *nSucc = *nAnte = 1;
232 }
233
234 static void
235 PQOneTwo(raidPtr, nSucc, nAnte, asmap)
236 RF_Raid_t *raidPtr;
237 int *nSucc;
238 int *nAnte;
239 RF_AccessStripeMap_t *asmap;
240 {
241 *nSucc = 1;
242 *nAnte = 2;
243 }
244 #endif
245
246 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateLargeWriteDAG)
247 {
248 rf_CommonCreateLargeWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, 2,
249 rf_RegularPQFunc, RF_FALSE);
250 }
251
252 int
253 rf_RegularONQFunc(node)
254 RF_DagNode_t *node;
255 {
256 int np = node->numParams;
257 int d;
258 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
259 int i;
260 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
261 RF_Etimer_t timer;
262 char *qbuf, *qpbuf;
263 char *obuf, *nbuf;
264 RF_PhysDiskAddr_t *old, *new;
265 unsigned long coeff;
266 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
267
268 RF_ETIMER_START(timer);
269
270 d = (np - 3) / 4;
271 RF_ASSERT(4 * d + 3 == np);
272 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
273 for (i = 0; i < d; i++) {
274 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
275 obuf = (char *) node->params[2 * i + 1].p;
276 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
277 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
278 RF_ASSERT(new->numSector == old->numSector);
279 RF_ASSERT(new->raidAddress == old->raidAddress);
280 /* the stripe unit within the stripe tells us the coefficient
281 * to use for the multiply. */
282 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
283 /* compute the data unit offset within the column, then add
284 * one */
285 coeff = (coeff % raidPtr->Layout.numDataCol);
286 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
287 QDelta(qpbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
288 }
289
290 RF_ETIMER_STOP(timer);
291 RF_ETIMER_EVAL(timer);
292 tracerec->q_us += RF_ETIMER_VAL_US(timer);
293 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
294 * I/O in this node */
295 return (0);
296 }
297 /*
298 See the SimpleXORFunc for the difference between a simple and regular func.
299 These Q functions should be used for
300
301 new q = Q(data,old data,old q)
302
303 style updates and not for
304
305 q = ( new data, new data, .... )
306
307 computations.
308
309 The simple q takes 2(2d+1)+1 params, where d is the number
310 of stripes written. The order of params is
311 old data pda_0, old data buffer_0, old data pda_1, old data buffer_1, ... old data pda_d, old data buffer_d
312 [2d] old q pda_0, old q buffer
313 [2d_2] new data pda_0, new data buffer_0, ... new data pda_d, new data buffer_d
314 raidPtr
315 */
316
317 int
318 rf_SimpleONQFunc(node)
319 RF_DagNode_t *node;
320 {
321 int np = node->numParams;
322 int d;
323 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
324 int i;
325 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
326 RF_Etimer_t timer;
327 char *qbuf;
328 char *obuf, *nbuf;
329 RF_PhysDiskAddr_t *old, *new;
330 unsigned long coeff;
331
332 RF_ETIMER_START(timer);
333
334 d = (np - 3) / 4;
335 RF_ASSERT(4 * d + 3 == np);
336 qbuf = (char *) node->params[2 * d + 1].p; /* q buffer */
337 for (i = 0; i < d; i++) {
338 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
339 obuf = (char *) node->params[2 * i + 1].p;
340 new = (RF_PhysDiskAddr_t *) node->params[2 * (d + 1 + i)].p;
341 nbuf = (char *) node->params[2 * (d + 1 + i) + 1].p;
342 RF_ASSERT(new->numSector == old->numSector);
343 RF_ASSERT(new->raidAddress == old->raidAddress);
344 /* the stripe unit within the stripe tells us the coefficient
345 * to use for the multiply. */
346 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), new->raidAddress);
347 /* compute the data unit offset within the column, then add
348 * one */
349 coeff = (coeff % raidPtr->Layout.numDataCol);
350 QDelta(qbuf, obuf, nbuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
351 }
352
353 RF_ETIMER_STOP(timer);
354 RF_ETIMER_EVAL(timer);
355 tracerec->q_us += RF_ETIMER_VAL_US(timer);
356 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
357 * I/O in this node */
358 return (0);
359 }
360 RF_CREATE_DAG_FUNC_DECL(rf_PQCreateSmallWriteDAG)
361 {
362 rf_CommonCreateSmallWriteDAG(raidPtr, asmap, dag_h, bp, flags, allocList, &rf_pFuncs, &rf_qFuncs);
363 }
364
365 static void RegularQSubr(RF_DagNode_t *node, char *qbuf);
366
367 static void
368 RegularQSubr(node, qbuf)
369 RF_DagNode_t *node;
370 char *qbuf;
371 {
372 int np = node->numParams;
373 int d;
374 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
375 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
376 int i;
377 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
378 RF_Etimer_t timer;
379 char *obuf, *qpbuf;
380 RF_PhysDiskAddr_t *old;
381 unsigned long coeff;
382
383 RF_ETIMER_START(timer);
384
385 d = (np - 1) / 2;
386 RF_ASSERT(2 * d + 1 == np);
387 for (i = 0; i < d; i++) {
388 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
389 obuf = (char *) node->params[2 * i + 1].p;
390 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
391 /* compute the data unit offset within the column, then add
392 * one */
393 coeff = (coeff % raidPtr->Layout.numDataCol);
394 /* the input buffers may not all be aligned with the start of
395 * the stripe. so shift by their sector offset within the
396 * stripe unit */
397 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, old->startSector % secPerSU);
398 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
399 }
400
401 RF_ETIMER_STOP(timer);
402 RF_ETIMER_EVAL(timer);
403 tracerec->q_us += RF_ETIMER_VAL_US(timer);
404 }
405 /*
406 used in degraded writes.
407 */
408
409 static void DegrQSubr(RF_DagNode_t *node);
410
411 static void
412 DegrQSubr(node)
413 RF_DagNode_t *node;
414 {
415 int np = node->numParams;
416 int d;
417 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
418 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
419 int i;
420 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
421 RF_Etimer_t timer;
422 char *qbuf = node->results[1];
423 char *obuf, *qpbuf;
424 RF_PhysDiskAddr_t *old;
425 unsigned long coeff;
426 unsigned fail_start;
427 int j;
428
429 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
430 fail_start = old->startSector % secPerSU;
431
432 RF_ETIMER_START(timer);
433
434 d = (np - 2) / 2;
435 RF_ASSERT(2 * d + 2 == np);
436 for (i = 0; i < d; i++) {
437 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
438 obuf = (char *) node->params[2 * i + 1].p;
439 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
440 /* compute the data unit offset within the column, then add
441 * one */
442 coeff = (coeff % raidPtr->Layout.numDataCol);
443 /* the input buffers may not all be aligned with the start of
444 * the stripe. so shift by their sector offset within the
445 * stripe unit */
446 j = old->startSector % secPerSU;
447 RF_ASSERT(j >= fail_start);
448 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
449 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
450 }
451
452 RF_ETIMER_STOP(timer);
453 RF_ETIMER_EVAL(timer);
454 tracerec->q_us += RF_ETIMER_VAL_US(timer);
455 }
456 /*
457 Called by large write code to compute the new parity and the new q.
458
459 structure of the params:
460
461 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d ( d = numDataCol
462 raidPtr
463
464 for a total of 2d+1 arguments.
465 The result buffers results[0], results[1] are the buffers for the p and q,
466 respectively.
467
468 We compute Q first, then compute P. The P calculation may try to reuse
469 one of the input buffers for its output, so if we computed P first, we would
470 corrupt the input for the q calculation.
471 */
472
473 int
474 rf_RegularPQFunc(node)
475 RF_DagNode_t *node;
476 {
477 RegularQSubr(node, node->results[1]);
478 return (rf_RegularXorFunc(node)); /* does the wakeup */
479 }
480
481 int
482 rf_RegularQFunc(node)
483 RF_DagNode_t *node;
484 {
485 /* Almost ... adjust Qsubr args */
486 RegularQSubr(node, node->results[0]);
487 rf_GenericWakeupFunc(node, 0); /* call wake func explicitly since no
488 * I/O in this node */
489 return (0);
490 }
491 /*
492 Called by singly degraded write code to compute the new parity and the new q.
493
494 structure of the params:
495
496 pda_0, buffer_0, pda_1 , buffer_1, ... , pda_d, buffer_d
497 failedPDA raidPtr
498
499 for a total of 2d+2 arguments.
500 The result buffers results[0], results[1] are the buffers for the parity and q,
501 respectively.
502
503 We compute Q first, then compute parity. The parity calculation may try to reuse
504 one of the input buffers for its output, so if we computed parity first, we would
505 corrupt the input for the q calculation.
506
507 We treat this identically to the regularPQ case, ignoring the failedPDA extra argument.
508 */
509
510 void
511 rf_Degraded_100_PQFunc(node)
512 RF_DagNode_t *node;
513 {
514 int np = node->numParams;
515
516 RF_ASSERT(np >= 2);
517 DegrQSubr(node);
518 rf_RecoveryXorFunc(node);
519 }
520
521
522 /*
523 The two below are used when reading a stripe with a single lost data unit.
524 The parameters are
525
526 pda_0, buffer_0, .... pda_n, buffer_n, P pda, P buffer, failedPDA, raidPtr
527
528 and results[0] contains the data buffer. Which is originally zero-filled.
529
530 */
531
532 /* this Q func is used by the degraded-mode dag functions to recover lost data.
533 * the second-to-last parameter is the PDA for the failed portion of the access.
534 * the code here looks at this PDA and assumes that the xor target buffer is
535 * equal in size to the number of sectors in the failed PDA. It then uses
536 * the other PDAs in the parameter list to determine where within the target
537 * buffer the corresponding data should be xored.
538 *
539 * Recall the basic equation is
540 *
541 * Q = ( data_1 + 2 * data_2 ... + k * data_k ) mod 256
542 *
543 * so to recover data_j we need
544 *
545 * J data_j = (Q - data_1 - 2 data_2 ....- k* data_k) mod 256
546 *
547 * So the coefficient for each buffer is (255 - data_col), and j should be initialized by
548 * copying Q into it. Then we need to do a table lookup to convert to solve
549 * data_j /= J
550 *
551 *
552 */
553 int
554 rf_RecoveryQFunc(node)
555 RF_DagNode_t *node;
556 {
557 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
558 RF_RaidLayout_t *layoutPtr = (RF_RaidLayout_t *) & raidPtr->Layout;
559 RF_PhysDiskAddr_t *failedPDA = (RF_PhysDiskAddr_t *) node->params[node->numParams - 2].p;
560 int i;
561 RF_PhysDiskAddr_t *pda;
562 RF_RaidAddr_t suoffset, failedSUOffset = rf_StripeUnitOffset(layoutPtr, failedPDA->startSector);
563 char *srcbuf, *destbuf;
564 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
565 RF_Etimer_t timer;
566 unsigned long coeff;
567
568 RF_ETIMER_START(timer);
569 /* start by copying Q into the buffer */
570 memcpy(node->results[0], node->params[node->numParams - 3].p,
571 rf_RaidAddressToByte(raidPtr, failedPDA->numSector));
572 for (i = 0; i < node->numParams - 4; i += 2) {
573 RF_ASSERT(node->params[i + 1].p != node->results[0]);
574 pda = (RF_PhysDiskAddr_t *) node->params[i].p;
575 srcbuf = (char *) node->params[i + 1].p;
576 suoffset = rf_StripeUnitOffset(layoutPtr, pda->startSector);
577 destbuf = ((char *) node->results[0]) + rf_RaidAddressToByte(raidPtr, suoffset - failedSUOffset);
578 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), pda->raidAddress);
579 /* compute the data unit offset within the column */
580 coeff = (coeff % raidPtr->Layout.numDataCol);
581 rf_IncQ((unsigned long *) destbuf, (unsigned long *) srcbuf, rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
582 }
583 /* Do the nasty inversion now */
584 coeff = (rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), failedPDA->startSector) % raidPtr->Layout.numDataCol);
585 rf_InvertQ(node->results[0], node->results[0], rf_RaidAddressToByte(raidPtr, pda->numSector), coeff);
586 RF_ETIMER_STOP(timer);
587 RF_ETIMER_EVAL(timer);
588 tracerec->q_us += RF_ETIMER_VAL_US(timer);
589 rf_GenericWakeupFunc(node, 0);
590 return (0);
591 }
592
593 int
594 rf_RecoveryPQFunc(node)
595 RF_DagNode_t *node;
596 {
597 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[node->numParams - 1].p;
598 printf("raid%d: Recovery from PQ not implemented.\n",raidPtr->raidid);
599 return (1);
600 }
601 /*
602 Degraded write Q subroutine.
603 Used when P is dead.
604 Large-write style Q computation.
605 Parameters
606
607 (pda,buf),(pda,buf),.....,(failedPDA,bufPtr),failedPDA,raidPtr.
608
609 We ignore failedPDA.
610
611 This is a "simple style" recovery func.
612 */
613
614 void
615 rf_PQ_DegradedWriteQFunc(node)
616 RF_DagNode_t *node;
617 {
618 int np = node->numParams;
619 int d;
620 RF_Raid_t *raidPtr = (RF_Raid_t *) node->params[np - 1].p;
621 unsigned secPerSU = raidPtr->Layout.sectorsPerStripeUnit;
622 int i;
623 RF_AccTraceEntry_t *tracerec = node->dagHdr->tracerec;
624 RF_Etimer_t timer;
625 char *qbuf = node->results[0];
626 char *obuf, *qpbuf;
627 RF_PhysDiskAddr_t *old;
628 unsigned long coeff;
629 int fail_start, j;
630
631 old = (RF_PhysDiskAddr_t *) node->params[np - 2].p;
632 fail_start = old->startSector % secPerSU;
633
634 RF_ETIMER_START(timer);
635
636 d = (np - 2) / 2;
637 RF_ASSERT(2 * d + 2 == np);
638
639 for (i = 0; i < d; i++) {
640 old = (RF_PhysDiskAddr_t *) node->params[2 * i].p;
641 obuf = (char *) node->params[2 * i + 1].p;
642 coeff = rf_RaidAddressToStripeUnitID(&(raidPtr->Layout), old->raidAddress);
643 /* compute the data unit offset within the column, then add
644 * one */
645 coeff = (coeff % raidPtr->Layout.numDataCol);
646 j = old->startSector % secPerSU;
647 RF_ASSERT(j >= fail_start);
648 qpbuf = qbuf + rf_RaidAddressToByte(raidPtr, j - fail_start);
649 rf_IncQ((unsigned long *) qpbuf, (unsigned long *) obuf, rf_RaidAddressToByte(raidPtr, old->numSector), coeff);
650 }
651
652 RF_ETIMER_STOP(timer);
653 RF_ETIMER_EVAL(timer);
654 tracerec->q_us += RF_ETIMER_VAL_US(timer);
655 rf_GenericWakeupFunc(node, 0);
656 }
657
658
659
660
661 /* Q computations */
662
663 /*
664 coeff - colummn;
665
666 compute dest ^= qfor[28-coeff][rn[coeff+1] a]
667
668 on 5-bit basis;
669 length in bytes;
670 */
671
672 void
673 rf_IncQ(dest, buf, length, coeff)
674 unsigned long *dest;
675 unsigned long *buf;
676 unsigned length;
677 unsigned coeff;
678 {
679 unsigned long a, d, new;
680 unsigned long a1, a2;
681 unsigned int *q = &(rf_qfor[28 - coeff][0]);
682 unsigned r = rf_rn[coeff + 1];
683
684 #define EXTRACT(a,i) ((a >> (5L*i)) & 0x1f)
685 #define INSERT(a,i) (a << (5L*i))
686
687 length /= 8;
688 /* 13 5 bit quants in a 64 bit word */
689 while (length) {
690 a = *buf++;
691 d = *dest;
692 a1 = EXTRACT(a, 0) ^ r;
693 a2 = EXTRACT(a, 1) ^ r;
694 new = INSERT(a2, 1) | a1;
695 a1 = EXTRACT(a, 2) ^ r;
696 a2 = EXTRACT(a, 3) ^ r;
697 a1 = q[a1];
698 a2 = q[a2];
699 new = new | INSERT(a1, 2) | INSERT(a2, 3);
700 a1 = EXTRACT(a, 4) ^ r;
701 a2 = EXTRACT(a, 5) ^ r;
702 a1 = q[a1];
703 a2 = q[a2];
704 new = new | INSERT(a1, 4) | INSERT(a2, 5);
705 a1 = EXTRACT(a, 5) ^ r;
706 a2 = EXTRACT(a, 6) ^ r;
707 a1 = q[a1];
708 a2 = q[a2];
709 new = new | INSERT(a1, 5) | INSERT(a2, 6);
710 #if RF_LONGSHIFT > 2
711 a1 = EXTRACT(a, 7) ^ r;
712 a2 = EXTRACT(a, 8) ^ r;
713 a1 = q[a1];
714 a2 = q[a2];
715 new = new | INSERT(a1, 7) | INSERT(a2, 8);
716 a1 = EXTRACT(a, 9) ^ r;
717 a2 = EXTRACT(a, 10) ^ r;
718 a1 = q[a1];
719 a2 = q[a2];
720 new = new | INSERT(a1, 9) | INSERT(a2, 10);
721 a1 = EXTRACT(a, 11) ^ r;
722 a2 = EXTRACT(a, 12) ^ r;
723 a1 = q[a1];
724 a2 = q[a2];
725 new = new | INSERT(a1, 11) | INSERT(a2, 12);
726 #endif /* RF_LONGSHIFT > 2 */
727 d ^= new;
728 *dest++ = d;
729 length--;
730 }
731 }
732 /*
733 compute
734
735 dest ^= rf_qfor[28-coeff][rf_rn[coeff+1] (old^new) ]
736
737 on a five bit basis.
738 optimization: compute old ^ new on 64 bit basis.
739
740 length in bytes.
741 */
742
743 static void
744 QDelta(
745 char *dest,
746 char *obuf,
747 char *nbuf,
748 unsigned length,
749 unsigned char coeff)
750 {
751 unsigned long a, d, new;
752 unsigned long a1, a2;
753 unsigned int *q = &(rf_qfor[28 - coeff][0]);
754 unsigned int r = rf_rn[coeff + 1];
755
756 r = a1 = a2 = new = d = a = 0; /* XXX for now... */
757 q = NULL; /* XXX for now */
758
759 #ifdef _KERNEL
760 /* PQ in kernel currently not supported because the encoding/decoding
761 * table is not present */
762 memset(dest, 0, length);
763 #else /* KERNEL */
764 /* this code probably doesn't work and should be rewritten -wvcii */
765 /* 13 5 bit quants in a 64 bit word */
766 length /= 8;
767 while (length) {
768 a = *obuf++; /* XXX need to reorg to avoid cache conflicts */
769 a ^= *nbuf++;
770 d = *dest;
771 a1 = EXTRACT(a, 0) ^ r;
772 a2 = EXTRACT(a, 1) ^ r;
773 a1 = q[a1];
774 a2 = q[a2];
775 new = INSERT(a2, 1) | a1;
776 a1 = EXTRACT(a, 2) ^ r;
777 a2 = EXTRACT(a, 3) ^ r;
778 a1 = q[a1];
779 a2 = q[a2];
780 new = new | INSERT(a1, 2) | INSERT(a2, 3);
781 a1 = EXTRACT(a, 4) ^ r;
782 a2 = EXTRACT(a, 5) ^ r;
783 a1 = q[a1];
784 a2 = q[a2];
785 new = new | INSERT(a1, 4) | INSERT(a2, 5);
786 a1 = EXTRACT(a, 5) ^ r;
787 a2 = EXTRACT(a, 6) ^ r;
788 a1 = q[a1];
789 a2 = q[a2];
790 new = new | INSERT(a1, 5) | INSERT(a2, 6);
791 #if RF_LONGSHIFT > 2
792 a1 = EXTRACT(a, 7) ^ r;
793 a2 = EXTRACT(a, 8) ^ r;
794 a1 = q[a1];
795 a2 = q[a2];
796 new = new | INSERT(a1, 7) | INSERT(a2, 8);
797 a1 = EXTRACT(a, 9) ^ r;
798 a2 = EXTRACT(a, 10) ^ r;
799 a1 = q[a1];
800 a2 = q[a2];
801 new = new | INSERT(a1, 9) | INSERT(a2, 10);
802 a1 = EXTRACT(a, 11) ^ r;
803 a2 = EXTRACT(a, 12) ^ r;
804 a1 = q[a1];
805 a2 = q[a2];
806 new = new | INSERT(a1, 11) | INSERT(a2, 12);
807 #endif /* RF_LONGSHIFT > 2 */
808 d ^= new;
809 *dest++ = d;
810 length--;
811 }
812 #endif /* _KERNEL */
813 }
814 /*
815 recover columns a and b from the given p and q into
816 bufs abuf and bbuf. All bufs are word aligned.
817 Length is in bytes.
818 */
819
820
821 /*
822 * XXX
823 *
824 * Everything about this seems wrong.
825 */
826 void
827 rf_PQ_recover(pbuf, qbuf, abuf, bbuf, length, coeff_a, coeff_b)
828 unsigned long *pbuf;
829 unsigned long *qbuf;
830 unsigned long *abuf;
831 unsigned long *bbuf;
832 unsigned length;
833 unsigned coeff_a;
834 unsigned coeff_b;
835 {
836 unsigned long p, q, a, a0, a1;
837 int col = (29 * coeff_a) + coeff_b;
838 unsigned char *q0 = &(rf_qinv[col][0]);
839
840 length /= 8;
841 while (length) {
842 p = *pbuf++;
843 q = *qbuf++;
844 a0 = EXTRACT(p, 0);
845 a1 = EXTRACT(q, 0);
846 a = q0[a0 << 5 | a1];
847 #define MF(i) \
848 a0 = EXTRACT(p,i); \
849 a1 = EXTRACT(q,i); \
850 a = a | INSERT(q0[a0<<5 | a1],i)
851
852 MF(1);
853 MF(2);
854 MF(3);
855 MF(4);
856 MF(5);
857 MF(6);
858 #if 0
859 MF(7);
860 MF(8);
861 MF(9);
862 MF(10);
863 MF(11);
864 MF(12);
865 #endif /* 0 */
866 *abuf++ = a;
867 *bbuf++ = a ^ p;
868 length--;
869 }
870 }
871 /*
872 Lost parity and a data column. Recover that data column.
873 Assume col coeff is lost. Let q the contents of Q after
874 all surviving data columns have been q-xored out of it.
875 Then we have the equation
876
877 q[28-coeff][a_i ^ r_i+1] = q
878
879 but q is cyclic with period 31.
880 So q[3+coeff][q[28-coeff][a_i ^ r_{i+1}]] =
881 q[31][a_i ^ r_{i+1}] = a_i ^ r_{i+1} .
882
883 so a_i = r_{coeff+1} ^ q[3+coeff][q]
884
885 The routine is passed q buffer and the buffer
886 the data is to be recoverd into. They can be the same.
887 */
888
889
890
891 static void
892 rf_InvertQ(
893 unsigned long *qbuf,
894 unsigned long *abuf,
895 unsigned length,
896 unsigned coeff)
897 {
898 unsigned long a, new;
899 unsigned long a1, a2;
900 unsigned int *q = &(rf_qfor[3 + coeff][0]);
901 unsigned r = rf_rn[coeff + 1];
902
903 /* 13 5 bit quants in a 64 bit word */
904 length /= 8;
905 while (length) {
906 a = *qbuf++;
907 a1 = EXTRACT(a, 0);
908 a2 = EXTRACT(a, 1);
909 a1 = r ^ q[a1];
910 a2 = r ^ q[a2];
911 new = INSERT(a2, 1) | a1;
912 #define M(i,j) \
913 a1 = EXTRACT(a,i); \
914 a2 = EXTRACT(a,j); \
915 a1 = r ^ q[a1]; \
916 a2 = r ^ q[a2]; \
917 new = new | INSERT(a1,i) | INSERT(a2,j)
918
919 M(2, 3);
920 M(4, 5);
921 M(5, 6);
922 #if RF_LONGSHIFT > 2
923 M(7, 8);
924 M(9, 10);
925 M(11, 12);
926 #endif /* RF_LONGSHIFT > 2 */
927 *abuf++ = new;
928 length--;
929 }
930 }
931 #endif /* (RF_INCLUDE_DECL_PQ > 0) ||
932 * (RF_INCLUDE_RAID6 > 0) */
Cache object: 51f8544460fb5b57011fd03a93fd3cc4
|