1 /* $OpenBSD: softraid_raid6.c,v 1.72 2021/05/16 15:12:37 deraadt Exp $ */
2 /*
3 * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us>
4 * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include "bio.h"
20
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/buf.h>
24 #include <sys/device.h>
25 #include <sys/ioctl.h>
26 #include <sys/malloc.h>
27 #include <sys/kernel.h>
28 #include <sys/disk.h>
29 #include <sys/rwlock.h>
30 #include <sys/queue.h>
31 #include <sys/fcntl.h>
32 #include <sys/mount.h>
33 #include <sys/sensors.h>
34 #include <sys/stat.h>
35 #include <sys/task.h>
36 #include <sys/conf.h>
37 #include <sys/uio.h>
38
39 #include <scsi/scsi_all.h>
40 #include <scsi/scsiconf.h>
41 #include <scsi/scsi_disk.h>
42
43 #include <dev/softraidvar.h>
44
45 uint8_t *gf_map[256];
46 uint8_t gf_pow[768];
47 int gf_log[256];
48
49 /* RAID 6 functions. */
50 int sr_raid6_create(struct sr_discipline *, struct bioc_createraid *,
51 int, int64_t);
52 int sr_raid6_assemble(struct sr_discipline *, struct bioc_createraid *,
53 int, void *);
54 int sr_raid6_init(struct sr_discipline *);
55 int sr_raid6_rw(struct sr_workunit *);
56 int sr_raid6_openings(struct sr_discipline *);
57 void sr_raid6_intr(struct buf *);
58 int sr_raid6_wu_done(struct sr_workunit *);
59 void sr_raid6_set_chunk_state(struct sr_discipline *, int, int);
60 void sr_raid6_set_vol_state(struct sr_discipline *);
61
62 void sr_raid6_xorp(void *, void *, int);
63 void sr_raid6_xorq(void *, void *, int, int);
64 int sr_raid6_addio(struct sr_workunit *wu, int, daddr_t, long,
65 void *, int, int, void *, void *, int);
66 void sr_raid6_scrub(struct sr_discipline *);
67 int sr_failio(struct sr_workunit *);
68
69 void gf_init(void);
70 uint8_t gf_inv(uint8_t);
71 int gf_premul(uint8_t);
72 uint8_t gf_mul(uint8_t, uint8_t);
73
74 #define SR_NOFAIL 0x00
75 #define SR_FAILX (1L << 0)
76 #define SR_FAILY (1L << 1)
77 #define SR_FAILP (1L << 2)
78 #define SR_FAILQ (1L << 3)
79
80 struct sr_raid6_opaque {
81 int gn;
82 void *pbuf;
83 void *qbuf;
84 };
85
86 /* discipline initialisation. */
87 void
88 sr_raid6_discipline_init(struct sr_discipline *sd)
89 {
90 /* Initialize GF256 tables. */
91 gf_init();
92
93 /* Fill out discipline members. */
94 sd->sd_type = SR_MD_RAID6;
95 strlcpy(sd->sd_name, "RAID 6", sizeof(sd->sd_name));
96 sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
97 SR_CAP_REDUNDANT;
98 sd->sd_max_wu = SR_RAID6_NOWU;
99
100 /* Setup discipline specific function pointers. */
101 sd->sd_assemble = sr_raid6_assemble;
102 sd->sd_create = sr_raid6_create;
103 sd->sd_openings = sr_raid6_openings;
104 sd->sd_scsi_rw = sr_raid6_rw;
105 sd->sd_scsi_intr = sr_raid6_intr;
106 sd->sd_scsi_wu_done = sr_raid6_wu_done;
107 sd->sd_set_chunk_state = sr_raid6_set_chunk_state;
108 sd->sd_set_vol_state = sr_raid6_set_vol_state;
109 }
110
111 int
112 sr_raid6_create(struct sr_discipline *sd, struct bioc_createraid *bc,
113 int no_chunk, int64_t coerced_size)
114 {
115 if (no_chunk < 4) {
116 sr_error(sd->sd_sc, "%s requires four or more chunks",
117 sd->sd_name);
118 return EINVAL;
119 }
120
121 /*
122 * XXX add variable strip size later even though MAXPHYS is really
123 * the clever value, users like * to tinker with that type of stuff.
124 */
125 sd->sd_meta->ssdi.ssd_strip_size = MAXPHYS;
126 sd->sd_meta->ssdi.ssd_size = (coerced_size &
127 ~(((u_int64_t)sd->sd_meta->ssdi.ssd_strip_size >>
128 DEV_BSHIFT) - 1)) * (no_chunk - 2);
129
130 return sr_raid6_init(sd);
131 }
132
133 int
134 sr_raid6_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
135 int no_chunk, void *data)
136 {
137 return sr_raid6_init(sd);
138 }
139
140 int
141 sr_raid6_init(struct sr_discipline *sd)
142 {
143 /* Initialise runtime values. */
144 sd->mds.mdd_raid6.sr6_strip_bits =
145 sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size);
146 if (sd->mds.mdd_raid6.sr6_strip_bits == -1) {
147 sr_error(sd->sd_sc, "invalid strip size");
148 return EINVAL;
149 }
150
151 /* only if stripsize <= MAXPHYS */
152 sd->sd_max_ccb_per_wu = max(6, 2 * sd->sd_meta->ssdi.ssd_chunk_no);
153
154 return 0;
155 }
156
157 int
158 sr_raid6_openings(struct sr_discipline *sd)
159 {
160 return (sd->sd_max_wu >> 1); /* 2 wu's per IO */
161 }
162
163 void
164 sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
165 {
166 int old_state, s;
167
168 /* XXX this is for RAID 0 */
169 DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
170 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
171 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
172
173 /* ok to go to splbio since this only happens in error path */
174 s = splbio();
175 old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
176
177 /* multiple IOs to the same chunk that fail will come through here */
178 if (old_state == new_state)
179 goto done;
180
181 switch (old_state) {
182 case BIOC_SDONLINE:
183 switch (new_state) {
184 case BIOC_SDOFFLINE:
185 case BIOC_SDSCRUB:
186 break;
187 default:
188 goto die;
189 }
190 break;
191
192 case BIOC_SDOFFLINE:
193 if (new_state == BIOC_SDREBUILD) {
194 ;
195 } else
196 goto die;
197 break;
198
199 case BIOC_SDSCRUB:
200 switch (new_state) {
201 case BIOC_SDONLINE:
202 case BIOC_SDOFFLINE:
203 break;
204 default:
205 goto die;
206 }
207 break;
208
209 case BIOC_SDREBUILD:
210 switch (new_state) {
211 case BIOC_SDONLINE:
212 case BIOC_SDOFFLINE:
213 break;
214 default:
215 goto die;
216 }
217 break;
218
219 default:
220 die:
221 splx(s); /* XXX */
222 panic("%s: %s: %s: invalid chunk state transition %d -> %d",
223 DEVNAME(sd->sd_sc),
224 sd->sd_meta->ssd_devname,
225 sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
226 old_state, new_state);
227 /* NOTREACHED */
228 }
229
230 sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
231 sd->sd_set_vol_state(sd);
232
233 sd->sd_must_flush = 1;
234 task_add(systq, &sd->sd_meta_save_task);
235 done:
236 splx(s);
237 }
238
239 void
240 sr_raid6_set_vol_state(struct sr_discipline *sd)
241 {
242 int states[SR_MAX_STATES];
243 int new_state, i, s, nd;
244 int old_state = sd->sd_vol_status;
245
246 /* XXX this is for RAID 0 */
247
248 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
249 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
250
251 nd = sd->sd_meta->ssdi.ssd_chunk_no;
252
253 for (i = 0; i < SR_MAX_STATES; i++)
254 states[i] = 0;
255
256 for (i = 0; i < nd; i++) {
257 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
258 if (s >= SR_MAX_STATES)
259 panic("%s: %s: %s: invalid chunk state",
260 DEVNAME(sd->sd_sc),
261 sd->sd_meta->ssd_devname,
262 sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
263 states[s]++;
264 }
265
266 if (states[BIOC_SDONLINE] == nd)
267 new_state = BIOC_SVONLINE;
268 else if (states[BIOC_SDONLINE] < nd - 2)
269 new_state = BIOC_SVOFFLINE;
270 else if (states[BIOC_SDSCRUB] != 0)
271 new_state = BIOC_SVSCRUB;
272 else if (states[BIOC_SDREBUILD] != 0)
273 new_state = BIOC_SVREBUILD;
274 else if (states[BIOC_SDONLINE] < nd)
275 new_state = BIOC_SVDEGRADED;
276 else {
277 printf("old_state = %d, ", old_state);
278 for (i = 0; i < nd; i++)
279 printf("%d = %d, ", i,
280 sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
281 panic("invalid new_state");
282 }
283
284 DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
285 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
286 old_state, new_state);
287
288 switch (old_state) {
289 case BIOC_SVONLINE:
290 switch (new_state) {
291 case BIOC_SVONLINE: /* can go to same state */
292 case BIOC_SVOFFLINE:
293 case BIOC_SVDEGRADED:
294 case BIOC_SVREBUILD: /* happens on boot */
295 break;
296 default:
297 goto die;
298 }
299 break;
300
301 case BIOC_SVOFFLINE:
302 /* XXX this might be a little too much */
303 goto die;
304
305 case BIOC_SVDEGRADED:
306 switch (new_state) {
307 case BIOC_SVOFFLINE:
308 case BIOC_SVREBUILD:
309 case BIOC_SVDEGRADED: /* can go to the same state */
310 break;
311 default:
312 goto die;
313 }
314 break;
315
316 case BIOC_SVBUILDING:
317 switch (new_state) {
318 case BIOC_SVONLINE:
319 case BIOC_SVOFFLINE:
320 case BIOC_SVBUILDING: /* can go to the same state */
321 break;
322 default:
323 goto die;
324 }
325 break;
326
327 case BIOC_SVSCRUB:
328 switch (new_state) {
329 case BIOC_SVONLINE:
330 case BIOC_SVOFFLINE:
331 case BIOC_SVDEGRADED:
332 case BIOC_SVSCRUB: /* can go to same state */
333 break;
334 default:
335 goto die;
336 }
337 break;
338
339 case BIOC_SVREBUILD:
340 switch (new_state) {
341 case BIOC_SVONLINE:
342 case BIOC_SVOFFLINE:
343 case BIOC_SVDEGRADED:
344 case BIOC_SVREBUILD: /* can go to the same state */
345 break;
346 default:
347 goto die;
348 }
349 break;
350
351 default:
352 die:
353 panic("%s: %s: invalid volume state transition %d -> %d",
354 DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
355 old_state, new_state);
356 /* NOTREACHED */
357 }
358
359 sd->sd_vol_status = new_state;
360 }
361
362 /* modes:
363 * readq: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
364 * 0, qbuf, NULL, 0);
365 * readp: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
366 * 0, pbuf, NULL, 0);
367 * readx: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
368 * 0, pbuf, qbuf, gf_pow[i]);
369 */
370
371 int
372 sr_raid6_rw(struct sr_workunit *wu)
373 {
374 struct sr_workunit *wu_r = NULL;
375 struct sr_discipline *sd = wu->swu_dis;
376 struct scsi_xfer *xs = wu->swu_xs;
377 struct sr_chunk *scp;
378 int s, fail, i, gxinv, pxinv;
379 daddr_t blkno, lba;
380 int64_t chunk_offs, lbaoffs, offset, strip_offs;
381 int64_t strip_no, strip_size, strip_bits, row_size;
382 int64_t fchunk, no_chunk, chunk, qchunk, pchunk;
383 long length, datalen;
384 void *pbuf, *data, *qbuf;
385
386 /* blkno and scsi error will be handled by sr_validate_io */
387 if (sr_validate_io(wu, &blkno, "sr_raid6_rw"))
388 goto bad;
389
390 strip_size = sd->sd_meta->ssdi.ssd_strip_size;
391 strip_bits = sd->mds.mdd_raid6.sr6_strip_bits;
392 no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 2;
393 row_size = (no_chunk << strip_bits) >> DEV_BSHIFT;
394
395 data = xs->data;
396 datalen = xs->datalen;
397 lbaoffs = blkno << DEV_BSHIFT;
398
399 if (xs->flags & SCSI_DATA_OUT) {
400 if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP)) == NULL){
401 printf("%s: can't get wu_r", DEVNAME(sd->sd_sc));
402 goto bad;
403 }
404 wu_r->swu_state = SR_WU_INPROGRESS;
405 wu_r->swu_flags |= SR_WUF_DISCIPLINE;
406 }
407
408 wu->swu_blk_start = 0;
409 while (datalen != 0) {
410 strip_no = lbaoffs >> strip_bits;
411 strip_offs = lbaoffs & (strip_size - 1);
412 chunk_offs = (strip_no / no_chunk) << strip_bits;
413 offset = chunk_offs + strip_offs;
414
415 /* get size remaining in this stripe */
416 length = MIN(strip_size - strip_offs, datalen);
417
418 /* map disk offset to parity/data drive */
419 chunk = strip_no % no_chunk;
420
421 qchunk = (no_chunk + 1) - ((strip_no / no_chunk) % (no_chunk+2));
422 if (qchunk == 0)
423 pchunk = no_chunk + 1;
424 else
425 pchunk = qchunk - 1;
426 if (chunk >= pchunk)
427 chunk++;
428 if (chunk >= qchunk)
429 chunk++;
430
431 lba = offset >> DEV_BSHIFT;
432
433 /* XXX big hammer.. exclude I/O from entire stripe */
434 if (wu->swu_blk_start == 0)
435 wu->swu_blk_start = (strip_no / no_chunk) * row_size;
436 wu->swu_blk_end = (strip_no / no_chunk) * row_size + (row_size - 1);
437
438 fail = 0;
439 fchunk = -1;
440
441 /* Get disk-fail flags */
442 for (i=0; i< no_chunk+2; i++) {
443 scp = sd->sd_vol.sv_chunks[i];
444 switch (scp->src_meta.scm_status) {
445 case BIOC_SDOFFLINE:
446 case BIOC_SDREBUILD:
447 case BIOC_SDHOTSPARE:
448 if (i == qchunk)
449 fail |= SR_FAILQ;
450 else if (i == pchunk)
451 fail |= SR_FAILP;
452 else if (i == chunk)
453 fail |= SR_FAILX;
454 else {
455 /* dual data-disk failure */
456 fail |= SR_FAILY;
457 fchunk = i;
458 }
459 break;
460 }
461 }
462 if (xs->flags & SCSI_DATA_IN) {
463 if (!(fail & SR_FAILX)) {
464 /* drive is good. issue single read request */
465 if (sr_raid6_addio(wu, chunk, lba, length,
466 data, xs->flags, 0, NULL, NULL, 0))
467 goto bad;
468 } else if (fail & SR_FAILP) {
469 /* Dx, P failed */
470 printf("Disk %llx offline, "
471 "regenerating Dx+P\n", chunk);
472
473 gxinv = gf_inv(gf_pow[chunk]);
474
475 /* Calculate: Dx = (Q^Dz*gz)*inv(gx) */
476 memset(data, 0, length);
477 if (sr_raid6_addio(wu, qchunk, lba, length,
478 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv))
479 goto bad;
480
481 /* Read Dz * gz * inv(gx) */
482 for (i = 0; i < no_chunk+2; i++) {
483 if (i == qchunk || i == pchunk || i == chunk)
484 continue;
485
486 if (sr_raid6_addio(wu, i, lba, length,
487 NULL, SCSI_DATA_IN, 0, NULL, data,
488 gf_mul(gf_pow[i], gxinv)))
489 goto bad;
490 }
491
492 /* data will contain correct value on completion */
493 } else if (fail & SR_FAILY) {
494 /* Dx, Dy failed */
495 printf("Disk %llx & %llx offline, "
496 "regenerating Dx+Dy\n", chunk, fchunk);
497
498 gxinv = gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]);
499 pxinv = gf_mul(gf_pow[fchunk], gxinv);
500
501 /* read Q * inv(gx + gy) */
502 memset(data, 0, length);
503 if (sr_raid6_addio(wu, qchunk, lba, length,
504 NULL, SCSI_DATA_IN, 0, NULL, data, gxinv))
505 goto bad;
506
507 /* read P * gy * inv(gx + gy) */
508 if (sr_raid6_addio(wu, pchunk, lba, length,
509 NULL, SCSI_DATA_IN, 0, NULL, data, pxinv))
510 goto bad;
511
512 /* Calculate: Dx*gx^Dy*gy = Q^(Dz*gz) ; Dx^Dy = P^Dz
513 * Q: sr_raid6_xorp(qbuf, --, length);
514 * P: sr_raid6_xorp(pbuf, --, length);
515 * Dz: sr_raid6_xorp(pbuf, --, length);
516 * sr_raid6_xorq(qbuf, --, length, gf_pow[i]);
517 */
518 for (i = 0; i < no_chunk+2; i++) {
519 if (i == qchunk || i == pchunk ||
520 i == chunk || i == fchunk)
521 continue;
522
523 /* read Dz * (gz + gy) * inv(gx + gy) */
524 if (sr_raid6_addio(wu, i, lba, length,
525 NULL, SCSI_DATA_IN, 0, NULL, data,
526 pxinv ^ gf_mul(gf_pow[i], gxinv)))
527 goto bad;
528 }
529 } else {
530 /* Two cases: single disk (Dx) or (Dx+Q)
531 * Dx = Dz ^ P (same as RAID5)
532 */
533 printf("Disk %llx offline, "
534 "regenerating Dx%s\n", chunk,
535 fail & SR_FAILQ ? "+Q" : " single");
536
537 /* Calculate: Dx = P^Dz
538 * P: sr_raid6_xorp(data, ---, length);
539 * Dz: sr_raid6_xorp(data, ---, length);
540 */
541 memset(data, 0, length);
542 for (i = 0; i < no_chunk+2; i++) {
543 if (i != chunk && i != qchunk) {
544 /* Read Dz */
545 if (sr_raid6_addio(wu, i, lba,
546 length, NULL, SCSI_DATA_IN,
547 0, data, NULL, 0))
548 goto bad;
549 }
550 }
551
552 /* data will contain correct value on completion */
553 }
554 } else {
555 /* XXX handle writes to failed/offline disk? */
556 if (fail & (SR_FAILX|SR_FAILQ|SR_FAILP))
557 goto bad;
558
559 /*
560 * initialize pbuf with contents of new data to be
561 * written. This will be XORed with old data and old
562 * parity in the intr routine. The result in pbuf
563 * is the new parity data.
564 */
565 qbuf = sr_block_get(sd, length);
566 if (qbuf == NULL)
567 goto bad;
568
569 pbuf = sr_block_get(sd, length);
570 if (pbuf == NULL)
571 goto bad;
572
573 /* Calculate P = Dn; Q = gn * Dn */
574 if (gf_premul(gf_pow[chunk]))
575 goto bad;
576 sr_raid6_xorp(pbuf, data, length);
577 sr_raid6_xorq(qbuf, data, length, gf_pow[chunk]);
578
579 /* Read old data: P ^= Dn' ; Q ^= (gn * Dn') */
580 if (sr_raid6_addio(wu_r, chunk, lba, length, NULL,
581 SCSI_DATA_IN, 0, pbuf, qbuf, gf_pow[chunk]))
582 goto bad;
583
584 /* Read old xor-parity: P ^= P' */
585 if (sr_raid6_addio(wu_r, pchunk, lba, length, NULL,
586 SCSI_DATA_IN, 0, pbuf, NULL, 0))
587 goto bad;
588
589 /* Read old q-parity: Q ^= Q' */
590 if (sr_raid6_addio(wu_r, qchunk, lba, length, NULL,
591 SCSI_DATA_IN, 0, qbuf, NULL, 0))
592 goto bad;
593
594 /* write new data */
595 if (sr_raid6_addio(wu, chunk, lba, length, data,
596 xs->flags, 0, NULL, NULL, 0))
597 goto bad;
598
599 /* write new xor-parity */
600 if (sr_raid6_addio(wu, pchunk, lba, length, pbuf,
601 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0))
602 goto bad;
603
604 /* write new q-parity */
605 if (sr_raid6_addio(wu, qchunk, lba, length, qbuf,
606 xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0))
607 goto bad;
608 }
609
610 /* advance to next block */
611 lbaoffs += length;
612 datalen -= length;
613 data += length;
614 }
615
616 s = splbio();
617 if (wu_r) {
618 /* collide write request with reads */
619 wu_r->swu_blk_start = wu->swu_blk_start;
620 wu_r->swu_blk_end = wu->swu_blk_end;
621
622 wu->swu_state = SR_WU_DEFERRED;
623 wu_r->swu_collider = wu;
624 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
625
626 wu = wu_r;
627 }
628 splx(s);
629
630 sr_schedule_wu(wu);
631
632 return (0);
633 bad:
634 /* XXX - can leak pbuf/qbuf on error. */
635 /* wu is unwound by sr_wu_put */
636 if (wu_r)
637 sr_scsi_wu_put(sd, wu_r);
638 return (1);
639 }
640
641 /* Handle failure I/O completion */
642 int
643 sr_failio(struct sr_workunit *wu)
644 {
645 struct sr_discipline *sd = wu->swu_dis;
646 struct sr_ccb *ccb;
647
648 if (!(wu->swu_flags & SR_WUF_FAIL))
649 return (0);
650
651 /* Wu is a 'fake'.. don't do real I/O just intr */
652 TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
653 TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link)
654 sr_raid6_intr(&ccb->ccb_buf);
655 return (1);
656 }
657
658 void
659 sr_raid6_intr(struct buf *bp)
660 {
661 struct sr_ccb *ccb = (struct sr_ccb *)bp;
662 struct sr_workunit *wu = ccb->ccb_wu;
663 struct sr_discipline *sd = wu->swu_dis;
664 struct sr_raid6_opaque *pq = ccb->ccb_opaque;
665 int s;
666
667 DNPRINTF(SR_D_INTR, "%s: sr_raid6_intr bp %p xs %p\n",
668 DEVNAME(sd->sd_sc), bp, wu->swu_xs);
669
670 s = splbio();
671 sr_ccb_done(ccb);
672
673 /* XOR data to result. */
674 if (ccb->ccb_state == SR_CCB_OK && pq) {
675 if (pq->pbuf)
676 /* Calculate xor-parity */
677 sr_raid6_xorp(pq->pbuf, ccb->ccb_buf.b_data,
678 ccb->ccb_buf.b_bcount);
679 if (pq->qbuf)
680 /* Calculate q-parity */
681 sr_raid6_xorq(pq->qbuf, ccb->ccb_buf.b_data,
682 ccb->ccb_buf.b_bcount, pq->gn);
683 free(pq, M_DEVBUF, 0);
684 ccb->ccb_opaque = NULL;
685 }
686
687 /* Free allocated data buffer. */
688 if (ccb->ccb_flags & SR_CCBF_FREEBUF) {
689 sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount);
690 ccb->ccb_buf.b_data = NULL;
691 }
692
693 sr_wu_done(wu);
694 splx(s);
695 }
696
697 int
698 sr_raid6_wu_done(struct sr_workunit *wu)
699 {
700 struct sr_discipline *sd = wu->swu_dis;
701 struct scsi_xfer *xs = wu->swu_xs;
702
703 /* XXX - we have no way of propagating errors... */
704 if (wu->swu_flags & SR_WUF_DISCIPLINE)
705 return SR_WU_OK;
706
707 /* XXX - This is insufficient for RAID 6. */
708 if (wu->swu_ios_succeeded > 0) {
709 xs->error = XS_NOERROR;
710 return SR_WU_OK;
711 }
712
713 if (xs->flags & SCSI_DATA_IN) {
714 printf("%s: retrying read on block %lld\n",
715 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
716 sr_wu_release_ccbs(wu);
717 wu->swu_state = SR_WU_RESTART;
718 if (sd->sd_scsi_rw(wu) == 0)
719 return SR_WU_RESTART;
720 } else {
721 printf("%s: permanently fail write on block %lld\n",
722 sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
723 }
724
725 wu->swu_state = SR_WU_FAILED;
726 xs->error = XS_DRIVER_STUFFUP;
727
728 return SR_WU_FAILED;
729 }
730
731 int
732 sr_raid6_addio(struct sr_workunit *wu, int chunk, daddr_t blkno,
733 long len, void *data, int xsflags, int ccbflags, void *pbuf,
734 void *qbuf, int gn)
735 {
736 struct sr_discipline *sd = wu->swu_dis;
737 struct sr_ccb *ccb;
738 struct sr_raid6_opaque *pqbuf;
739
740 DNPRINTF(SR_D_DIS, "sr_raid6_addio: %s %d.%lld %ld %p:%p\n",
741 (xsflags & SCSI_DATA_IN) ? "read" : "write", chunk,
742 (long long)blkno, len, pbuf, qbuf);
743
744 /* Allocate temporary buffer. */
745 if (data == NULL) {
746 data = sr_block_get(sd, len);
747 if (data == NULL)
748 return (-1);
749 ccbflags |= SR_CCBF_FREEBUF;
750 }
751
752 ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags);
753 if (ccb == NULL) {
754 if (ccbflags & SR_CCBF_FREEBUF)
755 sr_block_put(sd, data, len);
756 return (-1);
757 }
758 if (pbuf || qbuf) {
759 /* XXX - can leak data and ccb on failure. */
760 if (qbuf && gf_premul(gn))
761 return (-1);
762
763 /* XXX - should be preallocated? */
764 pqbuf = malloc(sizeof(struct sr_raid6_opaque),
765 M_DEVBUF, M_ZERO | M_NOWAIT);
766 if (pqbuf == NULL) {
767 sr_ccb_put(ccb);
768 return (-1);
769 }
770 pqbuf->pbuf = pbuf;
771 pqbuf->qbuf = qbuf;
772 pqbuf->gn = gn;
773 ccb->ccb_opaque = pqbuf;
774 }
775 sr_wu_enqueue_ccb(wu, ccb);
776
777 return (0);
778 }
779
780 /* Perform RAID6 parity calculation.
781 * P=xor parity, Q=GF256 parity, D=data, gn=disk# */
782 void
783 sr_raid6_xorp(void *p, void *d, int len)
784 {
785 uint32_t *pbuf = p, *data = d;
786
787 len >>= 2;
788 while (len--)
789 *pbuf++ ^= *data++;
790 }
791
792 void
793 sr_raid6_xorq(void *q, void *d, int len, int gn)
794 {
795 uint32_t *qbuf = q, *data = d, x;
796 uint8_t *gn_map = gf_map[gn];
797
798 len >>= 2;
799 while (len--) {
800 x = *data++;
801 *qbuf++ ^= (((uint32_t)gn_map[x & 0xff]) |
802 ((uint32_t)gn_map[(x >> 8) & 0xff] << 8) |
803 ((uint32_t)gn_map[(x >> 16) & 0xff] << 16) |
804 ((uint32_t)gn_map[(x >> 24) & 0xff] << 24));
805 }
806 }
807
808 /* Create GF256 log/pow tables: polynomial = 0x11D */
809 void
810 gf_init(void)
811 {
812 int i;
813 uint8_t p = 1;
814
815 /* use 2N pow table to avoid using % in multiply */
816 for (i=0; i<256; i++) {
817 gf_log[p] = i;
818 gf_pow[i] = gf_pow[i+255] = p;
819 p = ((p << 1) ^ ((p & 0x80) ? 0x1D : 0x00));
820 }
821 gf_log[0] = 512;
822 }
823
824 uint8_t
825 gf_inv(uint8_t a)
826 {
827 return gf_pow[255 - gf_log[a]];
828 }
829
830 uint8_t
831 gf_mul(uint8_t a, uint8_t b)
832 {
833 return gf_pow[gf_log[a] + gf_log[b]];
834 }
835
836 /* Precalculate multiplication tables for drive gn */
837 int
838 gf_premul(uint8_t gn)
839 {
840 int i;
841
842 if (gf_map[gn] != NULL)
843 return (0);
844
845 if ((gf_map[gn] = malloc(256, M_DEVBUF, M_ZERO | M_NOWAIT)) == NULL)
846 return (-1);
847
848 for (i=0; i<256; i++)
849 gf_map[gn][i] = gf_pow[gf_log[i] + gf_log[gn]];
850 return (0);
851 }
Cache object: 2591c69175af608d80ba4fd4fb0bfc43
|