The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/softraid_raid6.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /* $OpenBSD: softraid_raid6.c,v 1.72 2021/05/16 15:12:37 deraadt Exp $ */
    2 /*
    3  * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us>
    4  * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org>
    5  *
    6  * Permission to use, copy, modify, and distribute this software for any
    7  * purpose with or without fee is hereby granted, provided that the above
    8  * copyright notice and this permission notice appear in all copies.
    9  *
   10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
   17  */
   18 
   19 #include "bio.h"
   20 
   21 #include <sys/param.h>
   22 #include <sys/systm.h>
   23 #include <sys/buf.h>
   24 #include <sys/device.h>
   25 #include <sys/ioctl.h>
   26 #include <sys/malloc.h>
   27 #include <sys/kernel.h>
   28 #include <sys/disk.h>
   29 #include <sys/rwlock.h>
   30 #include <sys/queue.h>
   31 #include <sys/fcntl.h>
   32 #include <sys/mount.h>
   33 #include <sys/sensors.h>
   34 #include <sys/stat.h>
   35 #include <sys/task.h>
   36 #include <sys/conf.h>
   37 #include <sys/uio.h>
   38 
   39 #include <scsi/scsi_all.h>
   40 #include <scsi/scsiconf.h>
   41 #include <scsi/scsi_disk.h>
   42 
   43 #include <dev/softraidvar.h>
   44 
   45 uint8_t *gf_map[256];
   46 uint8_t gf_pow[768];
   47 int     gf_log[256];
   48 
   49 /* RAID 6 functions. */
   50 int     sr_raid6_create(struct sr_discipline *, struct bioc_createraid *,
   51             int, int64_t);
   52 int     sr_raid6_assemble(struct sr_discipline *, struct bioc_createraid *,
   53             int, void *);
   54 int     sr_raid6_init(struct sr_discipline *);
   55 int     sr_raid6_rw(struct sr_workunit *);
   56 int     sr_raid6_openings(struct sr_discipline *);
   57 void    sr_raid6_intr(struct buf *);
   58 int     sr_raid6_wu_done(struct sr_workunit *);
   59 void    sr_raid6_set_chunk_state(struct sr_discipline *, int, int);
   60 void    sr_raid6_set_vol_state(struct sr_discipline *);
   61 
   62 void    sr_raid6_xorp(void *, void *, int);
   63 void    sr_raid6_xorq(void *, void *, int, int);
   64 int     sr_raid6_addio(struct sr_workunit *wu, int, daddr_t, long,
   65             void *, int, int, void *, void *, int);
   66 void    sr_raid6_scrub(struct sr_discipline *);
   67 int     sr_failio(struct sr_workunit *);
   68 
   69 void    gf_init(void);
   70 uint8_t gf_inv(uint8_t);
   71 int     gf_premul(uint8_t);
   72 uint8_t gf_mul(uint8_t, uint8_t);
   73 
   74 #define SR_NOFAIL               0x00
   75 #define SR_FAILX                (1L << 0)
   76 #define SR_FAILY                (1L << 1)
   77 #define SR_FAILP                (1L << 2)
   78 #define SR_FAILQ                (1L << 3)
   79 
   80 struct sr_raid6_opaque {
   81         int     gn;
   82         void    *pbuf;
   83         void    *qbuf;
   84 };
   85 
   86 /* discipline initialisation. */
   87 void
   88 sr_raid6_discipline_init(struct sr_discipline *sd)
   89 {
   90         /* Initialize GF256 tables. */
   91         gf_init();
   92 
   93         /* Fill out discipline members. */
   94         sd->sd_type = SR_MD_RAID6;
   95         strlcpy(sd->sd_name, "RAID 6", sizeof(sd->sd_name));
   96         sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
   97             SR_CAP_REDUNDANT;
   98         sd->sd_max_wu = SR_RAID6_NOWU;
   99 
  100         /* Setup discipline specific function pointers. */
  101         sd->sd_assemble = sr_raid6_assemble;
  102         sd->sd_create = sr_raid6_create;
  103         sd->sd_openings = sr_raid6_openings;
  104         sd->sd_scsi_rw = sr_raid6_rw;
  105         sd->sd_scsi_intr = sr_raid6_intr;
  106         sd->sd_scsi_wu_done = sr_raid6_wu_done;
  107         sd->sd_set_chunk_state = sr_raid6_set_chunk_state;
  108         sd->sd_set_vol_state = sr_raid6_set_vol_state;
  109 }
  110 
  111 int
  112 sr_raid6_create(struct sr_discipline *sd, struct bioc_createraid *bc,
  113     int no_chunk, int64_t coerced_size)
  114 {
  115         if (no_chunk < 4) {
  116                 sr_error(sd->sd_sc, "%s requires four or more chunks",
  117                     sd->sd_name);
  118                 return EINVAL;
  119         }
  120 
  121         /*
  122          * XXX add variable strip size later even though MAXPHYS is really
  123          * the clever value, users like * to tinker with that type of stuff.
  124          */
  125         sd->sd_meta->ssdi.ssd_strip_size = MAXPHYS;
  126         sd->sd_meta->ssdi.ssd_size = (coerced_size &
  127             ~(((u_int64_t)sd->sd_meta->ssdi.ssd_strip_size >>
  128             DEV_BSHIFT) - 1)) * (no_chunk - 2);
  129 
  130         return sr_raid6_init(sd);
  131 }
  132 
  133 int
  134 sr_raid6_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
  135     int no_chunk, void *data)
  136 {
  137         return sr_raid6_init(sd);
  138 }
  139 
  140 int
  141 sr_raid6_init(struct sr_discipline *sd)
  142 {
  143         /* Initialise runtime values. */
  144         sd->mds.mdd_raid6.sr6_strip_bits =
  145             sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size);
  146         if (sd->mds.mdd_raid6.sr6_strip_bits == -1) {
  147                 sr_error(sd->sd_sc, "invalid strip size");
  148                 return EINVAL;
  149         }
  150 
  151         /* only if stripsize <= MAXPHYS */
  152         sd->sd_max_ccb_per_wu = max(6, 2 * sd->sd_meta->ssdi.ssd_chunk_no);
  153 
  154         return 0;
  155 }
  156 
  157 int
  158 sr_raid6_openings(struct sr_discipline *sd)
  159 {
  160         return (sd->sd_max_wu >> 1); /* 2 wu's per IO */
  161 }
  162 
  163 void
  164 sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
  165 {
  166         int                     old_state, s;
  167 
  168         /* XXX this is for RAID 0 */
  169         DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
  170             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
  171             sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
  172 
  173         /* ok to go to splbio since this only happens in error path */
  174         s = splbio();
  175         old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
  176 
  177         /* multiple IOs to the same chunk that fail will come through here */
  178         if (old_state == new_state)
  179                 goto done;
  180 
  181         switch (old_state) {
  182         case BIOC_SDONLINE:
  183                 switch (new_state) {
  184                 case BIOC_SDOFFLINE:
  185                 case BIOC_SDSCRUB:
  186                         break;
  187                 default:
  188                         goto die;
  189                 }
  190                 break;
  191 
  192         case BIOC_SDOFFLINE:
  193                 if (new_state == BIOC_SDREBUILD) {
  194                         ;
  195                 } else
  196                         goto die;
  197                 break;
  198 
  199         case BIOC_SDSCRUB:
  200                 switch (new_state) {
  201                 case BIOC_SDONLINE:
  202                 case BIOC_SDOFFLINE:
  203                         break;
  204                 default:
  205                         goto die;
  206                 }
  207                 break;
  208 
  209         case BIOC_SDREBUILD:
  210                 switch (new_state) {
  211                 case BIOC_SDONLINE:
  212                 case BIOC_SDOFFLINE:
  213                         break;
  214                 default:
  215                         goto die;
  216                 }
  217                 break;
  218 
  219         default:
  220 die:
  221                 splx(s); /* XXX */
  222                 panic("%s: %s: %s: invalid chunk state transition %d -> %d",
  223                     DEVNAME(sd->sd_sc),
  224                     sd->sd_meta->ssd_devname,
  225                     sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
  226                     old_state, new_state);
  227                 /* NOTREACHED */
  228         }
  229 
  230         sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
  231         sd->sd_set_vol_state(sd);
  232 
  233         sd->sd_must_flush = 1;
  234         task_add(systq, &sd->sd_meta_save_task);
  235 done:
  236         splx(s);
  237 }
  238 
  239 void
  240 sr_raid6_set_vol_state(struct sr_discipline *sd)
  241 {
  242         int                     states[SR_MAX_STATES];
  243         int                     new_state, i, s, nd;
  244         int                     old_state = sd->sd_vol_status;
  245 
  246         /* XXX this is for RAID 0 */
  247 
  248         DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
  249             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
  250 
  251         nd = sd->sd_meta->ssdi.ssd_chunk_no;
  252 
  253         for (i = 0; i < SR_MAX_STATES; i++)
  254                 states[i] = 0;
  255 
  256         for (i = 0; i < nd; i++) {
  257                 s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
  258                 if (s >= SR_MAX_STATES)
  259                         panic("%s: %s: %s: invalid chunk state",
  260                             DEVNAME(sd->sd_sc),
  261                             sd->sd_meta->ssd_devname,
  262                             sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
  263                 states[s]++;
  264         }
  265 
  266         if (states[BIOC_SDONLINE] == nd)
  267                 new_state = BIOC_SVONLINE;
  268         else if (states[BIOC_SDONLINE] < nd - 2)
  269                 new_state = BIOC_SVOFFLINE;
  270         else if (states[BIOC_SDSCRUB] != 0)
  271                 new_state = BIOC_SVSCRUB;
  272         else if (states[BIOC_SDREBUILD] != 0)
  273                 new_state = BIOC_SVREBUILD;
  274         else if (states[BIOC_SDONLINE] < nd)
  275                 new_state = BIOC_SVDEGRADED;
  276         else {
  277                 printf("old_state = %d, ", old_state);
  278                 for (i = 0; i < nd; i++)
  279                         printf("%d = %d, ", i,
  280                             sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
  281                 panic("invalid new_state");
  282         }
  283 
  284         DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
  285             DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
  286             old_state, new_state);
  287 
  288         switch (old_state) {
  289         case BIOC_SVONLINE:
  290                 switch (new_state) {
  291                 case BIOC_SVONLINE: /* can go to same state */
  292                 case BIOC_SVOFFLINE:
  293                 case BIOC_SVDEGRADED:
  294                 case BIOC_SVREBUILD: /* happens on boot */
  295                         break;
  296                 default:
  297                         goto die;
  298                 }
  299                 break;
  300 
  301         case BIOC_SVOFFLINE:
  302                 /* XXX this might be a little too much */
  303                 goto die;
  304 
  305         case BIOC_SVDEGRADED:
  306                 switch (new_state) {
  307                 case BIOC_SVOFFLINE:
  308                 case BIOC_SVREBUILD:
  309                 case BIOC_SVDEGRADED: /* can go to the same state */
  310                         break;
  311                 default:
  312                         goto die;
  313                 }
  314                 break;
  315 
  316         case BIOC_SVBUILDING:
  317                 switch (new_state) {
  318                 case BIOC_SVONLINE:
  319                 case BIOC_SVOFFLINE:
  320                 case BIOC_SVBUILDING: /* can go to the same state */
  321                         break;
  322                 default:
  323                         goto die;
  324                 }
  325                 break;
  326 
  327         case BIOC_SVSCRUB:
  328                 switch (new_state) {
  329                 case BIOC_SVONLINE:
  330                 case BIOC_SVOFFLINE:
  331                 case BIOC_SVDEGRADED:
  332                 case BIOC_SVSCRUB: /* can go to same state */
  333                         break;
  334                 default:
  335                         goto die;
  336                 }
  337                 break;
  338 
  339         case BIOC_SVREBUILD:
  340                 switch (new_state) {
  341                 case BIOC_SVONLINE:
  342                 case BIOC_SVOFFLINE:
  343                 case BIOC_SVDEGRADED:
  344                 case BIOC_SVREBUILD: /* can go to the same state */
  345                         break;
  346                 default:
  347                         goto die;
  348                 }
  349                 break;
  350 
  351         default:
  352 die:
  353                 panic("%s: %s: invalid volume state transition %d -> %d",
  354                     DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
  355                     old_state, new_state);
  356                 /* NOTREACHED */
  357         }
  358 
  359         sd->sd_vol_status = new_state;
  360 }
  361 
  362 /*  modes:
  363  *   readq: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
  364  *              0, qbuf, NULL, 0);
  365  *   readp: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
  366  *              0, pbuf, NULL, 0);
  367  *   readx: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
  368  *              0, pbuf, qbuf, gf_pow[i]);
  369  */
  370 
  371 int
  372 sr_raid6_rw(struct sr_workunit *wu)
  373 {
  374         struct sr_workunit      *wu_r = NULL;
  375         struct sr_discipline    *sd = wu->swu_dis;
  376         struct scsi_xfer        *xs = wu->swu_xs;
  377         struct sr_chunk         *scp;
  378         int                     s, fail, i, gxinv, pxinv;
  379         daddr_t                 blkno, lba;
  380         int64_t                 chunk_offs, lbaoffs, offset, strip_offs;
  381         int64_t                 strip_no, strip_size, strip_bits, row_size;
  382         int64_t                 fchunk, no_chunk, chunk, qchunk, pchunk;
  383         long                    length, datalen;
  384         void                    *pbuf, *data, *qbuf;
  385 
  386         /* blkno and scsi error will be handled by sr_validate_io */
  387         if (sr_validate_io(wu, &blkno, "sr_raid6_rw"))
  388                 goto bad;
  389 
  390         strip_size = sd->sd_meta->ssdi.ssd_strip_size;
  391         strip_bits = sd->mds.mdd_raid6.sr6_strip_bits;
  392         no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 2;
  393         row_size = (no_chunk << strip_bits) >> DEV_BSHIFT;
  394 
  395         data = xs->data;
  396         datalen = xs->datalen;
  397         lbaoffs = blkno << DEV_BSHIFT;
  398 
  399         if (xs->flags & SCSI_DATA_OUT) {
  400                 if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP)) == NULL){
  401                         printf("%s: can't get wu_r", DEVNAME(sd->sd_sc));
  402                         goto bad;
  403                 }
  404                 wu_r->swu_state = SR_WU_INPROGRESS;
  405                 wu_r->swu_flags |= SR_WUF_DISCIPLINE;
  406         }
  407 
  408         wu->swu_blk_start = 0;
  409         while (datalen != 0) {
  410                 strip_no = lbaoffs >> strip_bits;
  411                 strip_offs = lbaoffs & (strip_size - 1);
  412                 chunk_offs = (strip_no / no_chunk) << strip_bits;
  413                 offset = chunk_offs + strip_offs;
  414 
  415                 /* get size remaining in this stripe */
  416                 length = MIN(strip_size - strip_offs, datalen);
  417 
  418                 /* map disk offset to parity/data drive */
  419                 chunk = strip_no % no_chunk;
  420 
  421                 qchunk = (no_chunk + 1) - ((strip_no / no_chunk) % (no_chunk+2));
  422                 if (qchunk == 0)
  423                         pchunk = no_chunk + 1;
  424                 else
  425                         pchunk = qchunk - 1;
  426                 if (chunk >= pchunk)
  427                         chunk++;
  428                 if (chunk >= qchunk)
  429                         chunk++;
  430 
  431                 lba = offset >> DEV_BSHIFT;
  432 
  433                 /* XXX big hammer.. exclude I/O from entire stripe */
  434                 if (wu->swu_blk_start == 0)
  435                         wu->swu_blk_start = (strip_no / no_chunk) * row_size;
  436                 wu->swu_blk_end = (strip_no / no_chunk) * row_size + (row_size - 1);
  437 
  438                 fail = 0;
  439                 fchunk = -1;
  440 
  441                 /* Get disk-fail flags */
  442                 for (i=0; i< no_chunk+2; i++) {
  443                         scp = sd->sd_vol.sv_chunks[i];
  444                         switch (scp->src_meta.scm_status) {
  445                         case BIOC_SDOFFLINE:
  446                         case BIOC_SDREBUILD:
  447                         case BIOC_SDHOTSPARE:
  448                                 if (i == qchunk)
  449                                         fail |= SR_FAILQ;
  450                                 else if (i == pchunk)
  451                                         fail |= SR_FAILP;
  452                                 else if (i == chunk)
  453                                         fail |= SR_FAILX;
  454                                 else {
  455                                         /* dual data-disk failure */
  456                                         fail |= SR_FAILY;
  457                                         fchunk = i;
  458                                 }
  459                                 break;
  460                         }
  461                 }
  462                 if (xs->flags & SCSI_DATA_IN) {
  463                         if (!(fail & SR_FAILX)) {
  464                                 /* drive is good. issue single read request */
  465                                 if (sr_raid6_addio(wu, chunk, lba, length,
  466                                     data, xs->flags, 0, NULL, NULL, 0))
  467                                         goto bad;
  468                         } else if (fail & SR_FAILP) {
  469                                 /* Dx, P failed */
  470                                 printf("Disk %llx offline, "
  471                                     "regenerating Dx+P\n", chunk);
  472 
  473                                 gxinv = gf_inv(gf_pow[chunk]);
  474 
  475                                 /* Calculate: Dx = (Q^Dz*gz)*inv(gx) */
  476                                 memset(data, 0, length);
  477                                 if (sr_raid6_addio(wu, qchunk, lba, length,
  478                                     NULL, SCSI_DATA_IN, 0, NULL, data, gxinv))
  479                                         goto bad;
  480 
  481                                 /* Read Dz * gz * inv(gx) */
  482                                 for (i = 0; i < no_chunk+2; i++) {
  483                                         if  (i == qchunk || i == pchunk || i == chunk)
  484                                                 continue;
  485 
  486                                         if (sr_raid6_addio(wu, i, lba, length,
  487                                             NULL, SCSI_DATA_IN, 0, NULL, data,
  488                                             gf_mul(gf_pow[i], gxinv)))
  489                                                 goto bad;
  490                                 }
  491 
  492                                 /* data will contain correct value on completion */
  493                         } else if (fail & SR_FAILY) {
  494                                 /* Dx, Dy failed */
  495                                 printf("Disk %llx & %llx offline, "
  496                                     "regenerating Dx+Dy\n", chunk, fchunk);
  497 
  498                                 gxinv = gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]);
  499                                 pxinv = gf_mul(gf_pow[fchunk], gxinv);
  500 
  501                                 /* read Q * inv(gx + gy) */
  502                                 memset(data, 0, length);
  503                                 if (sr_raid6_addio(wu, qchunk, lba, length,
  504                                     NULL, SCSI_DATA_IN, 0, NULL, data, gxinv))
  505                                         goto bad;
  506 
  507                                 /* read P * gy * inv(gx + gy) */
  508                                 if (sr_raid6_addio(wu, pchunk, lba, length,
  509                                     NULL, SCSI_DATA_IN, 0, NULL, data, pxinv))
  510                                         goto bad;
  511 
  512                                 /* Calculate: Dx*gx^Dy*gy = Q^(Dz*gz) ; Dx^Dy = P^Dz
  513                                  *   Q:  sr_raid6_xorp(qbuf, --, length);
  514                                  *   P:  sr_raid6_xorp(pbuf, --, length);
  515                                  *   Dz: sr_raid6_xorp(pbuf, --, length);
  516                                  *       sr_raid6_xorq(qbuf, --, length, gf_pow[i]);
  517                                  */
  518                                 for (i = 0; i < no_chunk+2; i++) {
  519                                         if (i == qchunk || i == pchunk ||
  520                                             i == chunk || i == fchunk)
  521                                                 continue;
  522 
  523                                         /* read Dz * (gz + gy) * inv(gx + gy) */
  524                                         if (sr_raid6_addio(wu, i, lba, length,
  525                                             NULL, SCSI_DATA_IN, 0, NULL, data,
  526                                             pxinv ^ gf_mul(gf_pow[i], gxinv)))
  527                                                 goto bad;
  528                                 }
  529                         } else {
  530                                 /* Two cases: single disk (Dx) or (Dx+Q)
  531                                  *   Dx = Dz ^ P (same as RAID5)
  532                                  */
  533                                 printf("Disk %llx offline, "
  534                                     "regenerating Dx%s\n", chunk,
  535                                     fail & SR_FAILQ ? "+Q" : " single");
  536 
  537                                 /* Calculate: Dx = P^Dz
  538                                  *   P:  sr_raid6_xorp(data, ---, length);
  539                                  *   Dz: sr_raid6_xorp(data, ---, length);
  540                                  */
  541                                 memset(data, 0, length);
  542                                 for (i = 0; i < no_chunk+2; i++) {
  543                                         if (i != chunk && i != qchunk) {
  544                                                 /* Read Dz */
  545                                                 if (sr_raid6_addio(wu, i, lba,
  546                                                     length, NULL, SCSI_DATA_IN,
  547                                                     0, data, NULL, 0))
  548                                                         goto bad;
  549                                         }
  550                                 }
  551 
  552                                 /* data will contain correct value on completion */
  553                         }
  554                 } else {
  555                         /* XXX handle writes to failed/offline disk? */
  556                         if (fail & (SR_FAILX|SR_FAILQ|SR_FAILP))
  557                                 goto bad;
  558 
  559                         /*
  560                          * initialize pbuf with contents of new data to be
  561                          * written. This will be XORed with old data and old
  562                          * parity in the intr routine. The result in pbuf
  563                          * is the new parity data.
  564                          */
  565                         qbuf = sr_block_get(sd, length);
  566                         if (qbuf == NULL)
  567                                 goto bad;
  568 
  569                         pbuf = sr_block_get(sd, length);
  570                         if (pbuf == NULL)
  571                                 goto bad;
  572 
  573                         /* Calculate P = Dn; Q = gn * Dn */
  574                         if (gf_premul(gf_pow[chunk]))
  575                                 goto bad;
  576                         sr_raid6_xorp(pbuf, data, length);
  577                         sr_raid6_xorq(qbuf, data, length, gf_pow[chunk]);
  578 
  579                         /* Read old data: P ^= Dn' ; Q ^= (gn * Dn') */
  580                         if (sr_raid6_addio(wu_r, chunk, lba, length, NULL,
  581                                 SCSI_DATA_IN, 0, pbuf, qbuf, gf_pow[chunk]))
  582                                 goto bad;
  583 
  584                         /* Read old xor-parity: P ^= P' */
  585                         if (sr_raid6_addio(wu_r, pchunk, lba, length, NULL,
  586                                 SCSI_DATA_IN, 0, pbuf, NULL, 0))
  587                                 goto bad;
  588 
  589                         /* Read old q-parity: Q ^= Q' */
  590                         if (sr_raid6_addio(wu_r, qchunk, lba, length, NULL,
  591                                 SCSI_DATA_IN, 0, qbuf, NULL, 0))
  592                                 goto bad;
  593 
  594                         /* write new data */
  595                         if (sr_raid6_addio(wu, chunk, lba, length, data,
  596                             xs->flags, 0, NULL, NULL, 0))
  597                                 goto bad;
  598 
  599                         /* write new xor-parity */
  600                         if (sr_raid6_addio(wu, pchunk, lba, length, pbuf,
  601                             xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0))
  602                                 goto bad;
  603 
  604                         /* write new q-parity */
  605                         if (sr_raid6_addio(wu, qchunk, lba, length, qbuf,
  606                             xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0))
  607                                 goto bad;
  608                 }
  609 
  610                 /* advance to next block */
  611                 lbaoffs += length;
  612                 datalen -= length;
  613                 data += length;
  614         }
  615 
  616         s = splbio();
  617         if (wu_r) {
  618                 /* collide write request with reads */
  619                 wu_r->swu_blk_start = wu->swu_blk_start;
  620                 wu_r->swu_blk_end = wu->swu_blk_end;
  621 
  622                 wu->swu_state = SR_WU_DEFERRED;
  623                 wu_r->swu_collider = wu;
  624                 TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
  625 
  626                 wu = wu_r;
  627         }
  628         splx(s);
  629 
  630         sr_schedule_wu(wu);
  631 
  632         return (0);
  633 bad:
  634         /* XXX - can leak pbuf/qbuf on error. */
  635         /* wu is unwound by sr_wu_put */
  636         if (wu_r)
  637                 sr_scsi_wu_put(sd, wu_r);
  638         return (1);
  639 }
  640 
  641 /* Handle failure I/O completion */
  642 int
  643 sr_failio(struct sr_workunit *wu)
  644 {
  645         struct sr_discipline    *sd = wu->swu_dis;
  646         struct sr_ccb           *ccb;
  647 
  648         if (!(wu->swu_flags & SR_WUF_FAIL))
  649                 return (0);
  650 
  651         /* Wu is a 'fake'.. don't do real I/O just intr */
  652         TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
  653         TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link)
  654                 sr_raid6_intr(&ccb->ccb_buf);
  655         return (1);
  656 }
  657 
  658 void
  659 sr_raid6_intr(struct buf *bp)
  660 {
  661         struct sr_ccb           *ccb = (struct sr_ccb *)bp;
  662         struct sr_workunit      *wu = ccb->ccb_wu;
  663         struct sr_discipline    *sd = wu->swu_dis;
  664         struct sr_raid6_opaque  *pq = ccb->ccb_opaque;
  665         int                     s;
  666 
  667         DNPRINTF(SR_D_INTR, "%s: sr_raid6_intr bp %p xs %p\n",
  668             DEVNAME(sd->sd_sc), bp, wu->swu_xs);
  669 
  670         s = splbio();
  671         sr_ccb_done(ccb);
  672 
  673         /* XOR data to result. */
  674         if (ccb->ccb_state == SR_CCB_OK && pq) {
  675                 if (pq->pbuf)
  676                         /* Calculate xor-parity */
  677                         sr_raid6_xorp(pq->pbuf, ccb->ccb_buf.b_data,
  678                             ccb->ccb_buf.b_bcount);
  679                 if (pq->qbuf)
  680                         /* Calculate q-parity */
  681                         sr_raid6_xorq(pq->qbuf, ccb->ccb_buf.b_data,
  682                             ccb->ccb_buf.b_bcount, pq->gn);
  683                 free(pq, M_DEVBUF, 0);
  684                 ccb->ccb_opaque = NULL;
  685         }
  686 
  687         /* Free allocated data buffer. */
  688         if (ccb->ccb_flags & SR_CCBF_FREEBUF) {
  689                 sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount);
  690                 ccb->ccb_buf.b_data = NULL;
  691         }
  692 
  693         sr_wu_done(wu);
  694         splx(s);
  695 }
  696 
  697 int
  698 sr_raid6_wu_done(struct sr_workunit *wu)
  699 {
  700         struct sr_discipline    *sd = wu->swu_dis;
  701         struct scsi_xfer        *xs = wu->swu_xs;
  702 
  703         /* XXX - we have no way of propagating errors... */
  704         if (wu->swu_flags & SR_WUF_DISCIPLINE)
  705                 return SR_WU_OK;
  706 
  707         /* XXX - This is insufficient for RAID 6. */
  708         if (wu->swu_ios_succeeded > 0) {
  709                 xs->error = XS_NOERROR;
  710                 return SR_WU_OK;
  711         }
  712 
  713         if (xs->flags & SCSI_DATA_IN) {
  714                 printf("%s: retrying read on block %lld\n",
  715                     sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
  716                 sr_wu_release_ccbs(wu);
  717                 wu->swu_state = SR_WU_RESTART;
  718                 if (sd->sd_scsi_rw(wu) == 0)
  719                         return SR_WU_RESTART;
  720         } else {
  721                 printf("%s: permanently fail write on block %lld\n",
  722                     sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
  723         }
  724 
  725         wu->swu_state = SR_WU_FAILED;
  726         xs->error = XS_DRIVER_STUFFUP;
  727 
  728         return SR_WU_FAILED;
  729 }
  730 
  731 int
  732 sr_raid6_addio(struct sr_workunit *wu, int chunk, daddr_t blkno,
  733     long len, void *data, int xsflags, int ccbflags, void *pbuf,
  734     void *qbuf, int gn)
  735 {
  736         struct sr_discipline    *sd = wu->swu_dis;
  737         struct sr_ccb           *ccb;
  738         struct sr_raid6_opaque  *pqbuf;
  739 
  740         DNPRINTF(SR_D_DIS, "sr_raid6_addio: %s %d.%lld %ld %p:%p\n",
  741             (xsflags & SCSI_DATA_IN) ? "read" : "write", chunk,
  742             (long long)blkno, len, pbuf, qbuf);
  743 
  744         /* Allocate temporary buffer. */
  745         if (data == NULL) {
  746                 data = sr_block_get(sd, len);
  747                 if (data == NULL)
  748                         return (-1);
  749                 ccbflags |= SR_CCBF_FREEBUF;
  750         }
  751 
  752         ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags);
  753         if (ccb == NULL) {
  754                 if (ccbflags & SR_CCBF_FREEBUF)
  755                         sr_block_put(sd, data, len);
  756                 return (-1);
  757         }
  758         if (pbuf || qbuf) {
  759                 /* XXX - can leak data and ccb on failure. */
  760                 if (qbuf && gf_premul(gn))
  761                         return (-1);
  762 
  763                 /* XXX - should be preallocated? */
  764                 pqbuf = malloc(sizeof(struct sr_raid6_opaque),
  765                     M_DEVBUF, M_ZERO | M_NOWAIT);
  766                 if (pqbuf == NULL) {
  767                         sr_ccb_put(ccb);
  768                         return (-1);
  769                 }
  770                 pqbuf->pbuf = pbuf;
  771                 pqbuf->qbuf = qbuf;
  772                 pqbuf->gn = gn;
  773                 ccb->ccb_opaque = pqbuf;
  774         }
  775         sr_wu_enqueue_ccb(wu, ccb);
  776 
  777         return (0);
  778 }
  779 
  780 /* Perform RAID6 parity calculation.
  781  *   P=xor parity, Q=GF256 parity, D=data, gn=disk# */
  782 void
  783 sr_raid6_xorp(void *p, void *d, int len)
  784 {
  785         uint32_t *pbuf = p, *data = d;
  786 
  787         len >>= 2;
  788         while (len--)
  789                 *pbuf++ ^= *data++;
  790 }
  791 
  792 void
  793 sr_raid6_xorq(void *q, void *d, int len, int gn)
  794 {
  795         uint32_t        *qbuf = q, *data = d, x;
  796         uint8_t         *gn_map = gf_map[gn];
  797 
  798         len >>= 2;
  799         while (len--) {
  800                 x = *data++;
  801                 *qbuf++ ^= (((uint32_t)gn_map[x & 0xff]) |
  802                             ((uint32_t)gn_map[(x >> 8) & 0xff] << 8) |
  803                             ((uint32_t)gn_map[(x >> 16) & 0xff] << 16) |
  804                             ((uint32_t)gn_map[(x >> 24) & 0xff] << 24));
  805         }
  806 }
  807 
  808 /* Create GF256 log/pow tables: polynomial = 0x11D */
  809 void
  810 gf_init(void)
  811 {
  812         int i;
  813         uint8_t p = 1;
  814 
  815         /* use 2N pow table to avoid using % in multiply */
  816         for (i=0; i<256; i++) {
  817                 gf_log[p] = i;
  818                 gf_pow[i] = gf_pow[i+255] = p;
  819                 p = ((p << 1) ^ ((p & 0x80) ? 0x1D : 0x00));
  820         }
  821         gf_log[0] = 512;
  822 }
  823 
  824 uint8_t
  825 gf_inv(uint8_t a)
  826 {
  827         return gf_pow[255 - gf_log[a]];
  828 }
  829 
  830 uint8_t
  831 gf_mul(uint8_t a, uint8_t b)
  832 {
  833         return gf_pow[gf_log[a] + gf_log[b]];
  834 }
  835 
  836 /* Precalculate multiplication tables for drive gn */
  837 int
  838 gf_premul(uint8_t gn)
  839 {
  840         int i;
  841 
  842         if (gf_map[gn] != NULL)
  843                 return (0);
  844 
  845         if ((gf_map[gn] = malloc(256, M_DEVBUF, M_ZERO | M_NOWAIT)) == NULL)
  846                 return (-1);
  847 
  848         for (i=0; i<256; i++)
  849                 gf_map[gn][i] = gf_pow[gf_log[i] + gf_log[gn]];
  850         return (0);
  851 }

Cache object: 2591c69175af608d80ba4fd4fb0bfc43


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.