The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/vinum/geom_vinum_raid5.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2004, 2007 Lukas Ertl
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD: releng/10.0/sys/geom/vinum/geom_vinum_raid5.c 191856 2009-05-06 19:34:32Z lulf $");
   29 
   30 #include <sys/param.h>
   31 #include <sys/bio.h>
   32 #include <sys/lock.h>
   33 #include <sys/malloc.h>
   34 #include <sys/systm.h>
   35 
   36 #include <geom/geom.h>
   37 #include <geom/vinum/geom_vinum_var.h>
   38 #include <geom/vinum/geom_vinum_raid5.h>
   39 #include <geom/vinum/geom_vinum.h>
   40 
   41 static int              gv_raid5_offset(struct gv_plex *, off_t, off_t,
   42                             off_t *, off_t *, int *, int *, int);
   43 static struct bio *     gv_raid5_clone_bio(struct bio *, struct gv_sd *,
   44                             struct gv_raid5_packet *, caddr_t, int);
   45 static int      gv_raid5_request(struct gv_plex *, struct gv_raid5_packet *,
   46                     struct bio *, caddr_t, off_t, off_t, int *);
   47 static int      gv_raid5_check(struct gv_plex *, struct gv_raid5_packet *,
   48                     struct bio *, caddr_t, off_t, off_t);
   49 static int      gv_raid5_rebuild(struct gv_plex *, struct gv_raid5_packet *,
   50                     struct bio *, caddr_t, off_t, off_t);
   51 
   52 struct gv_raid5_packet *
   53 gv_raid5_start(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff,
   54     off_t bcount)
   55 {
   56         struct bio *cbp;
   57         struct gv_raid5_packet *wp, *wp2;
   58         struct gv_bioq *bq, *bq2;
   59         int err, delay;
   60 
   61         delay = 0;
   62         wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
   63         wp->bio = bp;
   64         wp->waiting = NULL;
   65         wp->parity = NULL;
   66         TAILQ_INIT(&wp->bits);
   67 
   68         if (bp->bio_pflags & GV_BIO_REBUILD)
   69                 err = gv_raid5_rebuild(p, wp, bp, addr, boff, bcount);
   70         else if (bp->bio_pflags & GV_BIO_CHECK)
   71                 err = gv_raid5_check(p, wp, bp, addr, boff, bcount);
   72         else
   73                 err = gv_raid5_request(p, wp, bp, addr, boff, bcount, &delay);
   74 
   75         /* Means we have a delayed request. */
   76         if (delay) {
   77                 g_free(wp);
   78                 return (NULL);
   79         }
   80         
   81         /*
   82          * Building the sub-request failed, we probably need to clean up a lot.
   83          */
   84         if (err) {
   85                 G_VINUM_LOGREQ(0, bp, "raid5 plex request failed.");
   86                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
   87                         TAILQ_REMOVE(&wp->bits, bq, queue);
   88                         g_free(bq);
   89                 }
   90                 if (wp->waiting != NULL) {
   91                         if (wp->waiting->bio_cflags & GV_BIO_MALLOC)
   92                                 g_free(wp->waiting->bio_data);
   93                         g_destroy_bio(wp->waiting);
   94                 }
   95                 if (wp->parity != NULL) {
   96                         if (wp->parity->bio_cflags & GV_BIO_MALLOC)
   97                                 g_free(wp->parity->bio_data);
   98                         g_destroy_bio(wp->parity);
   99                 }
  100                 g_free(wp);
  101 
  102                 TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
  103                         if (wp->bio != bp)
  104                                 continue;
  105 
  106                         TAILQ_REMOVE(&p->packets, wp, list);
  107                         TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
  108                                 TAILQ_REMOVE(&wp->bits, bq, queue);
  109                                 g_free(bq);
  110                         }
  111                         g_free(wp);
  112                 }
  113 
  114                 cbp = bioq_takefirst(p->bqueue);
  115                 while (cbp != NULL) {
  116                         if (cbp->bio_cflags & GV_BIO_MALLOC)
  117                                 g_free(cbp->bio_data);
  118                         g_destroy_bio(cbp);
  119                         cbp = bioq_takefirst(p->bqueue);
  120                 }
  121 
  122                 /* If internal, stop and reset state. */
  123                 if (bp->bio_pflags & GV_BIO_INTERNAL) {
  124                         if (bp->bio_pflags & GV_BIO_MALLOC)
  125                                 g_free(bp->bio_data);
  126                         g_destroy_bio(bp);
  127                         /* Reset flags. */
  128                         p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
  129                             GV_PLEX_GROWING);
  130                         return (NULL);
  131                 }
  132                 g_io_deliver(bp, err);
  133                 return (NULL);
  134         }
  135 
  136         return (wp);
  137 }
  138 
  139 /*
  140  * Check if the stripe that the work packet wants is already being used by
  141  * some other work packet.
  142  */
  143 int
  144 gv_stripe_active(struct gv_plex *p, struct bio *bp)
  145 {
  146         struct gv_raid5_packet *wp, *owp;
  147         int overlap;
  148 
  149         wp = bp->bio_caller2;
  150         if (wp->lockbase == -1)
  151                 return (0);
  152 
  153         overlap = 0;
  154         TAILQ_FOREACH(owp, &p->packets, list) {
  155                 if (owp == wp)
  156                         break;
  157                 if ((wp->lockbase >= owp->lockbase) &&
  158                     (wp->lockbase <= owp->lockbase + owp->length)) {
  159                         overlap++;
  160                         break;
  161                 }
  162                 if ((wp->lockbase <= owp->lockbase) &&
  163                     (wp->lockbase + wp->length >= owp->lockbase)) {
  164                         overlap++;
  165                         break;
  166                 }
  167         }
  168 
  169         return (overlap);
  170 }
  171 
  172 static int
  173 gv_raid5_check(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
  174     caddr_t addr, off_t boff, off_t bcount)
  175 {
  176         struct gv_sd *parity, *s;
  177         struct gv_bioq *bq;
  178         struct bio *cbp;
  179         int i, psdno;
  180         off_t real_len, real_off;
  181 
  182         if (p == NULL || LIST_EMPTY(&p->subdisks))
  183                 return (ENXIO);
  184 
  185         gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno, 1);
  186 
  187         /* Find the right subdisk. */
  188         parity = NULL;
  189         i = 0;
  190         LIST_FOREACH(s, &p->subdisks, in_plex) {
  191                 if (i == psdno) {
  192                         parity = s;
  193                         break;
  194                 }
  195                 i++;
  196         }
  197 
  198         /* Parity stripe not found. */
  199         if (parity == NULL)
  200                 return (ENXIO);
  201 
  202         if (parity->state != GV_SD_UP)
  203                 return (ENXIO);
  204 
  205         wp->length = real_len;
  206         wp->data = addr;
  207         wp->lockbase = real_off;
  208 
  209         /* Read all subdisks. */
  210         LIST_FOREACH(s, &p->subdisks, in_plex) {
  211                 /* Skip the parity subdisk. */
  212                 if (s == parity)
  213                         continue;
  214                 /* Skip growing subdisks. */
  215                 if (s->flags & GV_SD_GROW)
  216                         continue;
  217 
  218                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
  219                 if (cbp == NULL)
  220                         return (ENOMEM);
  221                 cbp->bio_cmd = BIO_READ;
  222 
  223                 bioq_insert_tail(p->bqueue, cbp);
  224 
  225                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  226                 bq->bp = cbp;
  227                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  228         }
  229 
  230         /* Read the parity data. */
  231         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
  232         if (cbp == NULL)
  233                 return (ENOMEM);
  234         cbp->bio_cmd = BIO_READ;
  235         wp->waiting = cbp;
  236 
  237         /*
  238          * In case we want to rebuild the parity, create an extra BIO to write
  239          * it out.  It also acts as buffer for the XOR operations.
  240          */
  241         cbp = gv_raid5_clone_bio(bp, parity, wp, addr, 1);
  242         if (cbp == NULL)
  243                 return (ENOMEM);
  244         wp->parity = cbp;
  245 
  246         return (0);
  247 }
  248 
  249 /* Rebuild a degraded RAID5 plex. */
  250 static int
  251 gv_raid5_rebuild(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
  252     caddr_t addr, off_t boff, off_t bcount)
  253 {
  254         struct gv_sd *broken, *s;
  255         struct gv_bioq *bq;
  256         struct bio *cbp;
  257         off_t real_len, real_off;
  258 
  259         if (p == NULL || LIST_EMPTY(&p->subdisks))
  260                 return (ENXIO);
  261 
  262         gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL, 1);
  263 
  264         /* Find the right subdisk. */
  265         broken = NULL;
  266         LIST_FOREACH(s, &p->subdisks, in_plex) {
  267                 if (s->state != GV_SD_UP)
  268                         broken = s;
  269         }
  270 
  271         /* Broken stripe not found. */
  272         if (broken == NULL)
  273                 return (ENXIO);
  274 
  275         switch (broken->state) {
  276         case GV_SD_UP:
  277                 return (EINVAL);
  278 
  279         case GV_SD_STALE:
  280                 if (!(bp->bio_pflags & GV_BIO_REBUILD))
  281                         return (ENXIO);
  282 
  283                 G_VINUM_DEBUG(1, "sd %s is reviving", broken->name);
  284                 gv_set_sd_state(broken, GV_SD_REVIVING, GV_SETSTATE_FORCE);
  285                 /* Set this bit now, but should be set at end. */
  286                 broken->flags |= GV_SD_CANGOUP;
  287                 break;
  288 
  289         case GV_SD_REVIVING:
  290                 break;
  291 
  292         default:
  293                 /* All other subdisk states mean it's not accessible. */
  294                 return (ENXIO);
  295         }
  296 
  297         wp->length = real_len;
  298         wp->data = addr;
  299         wp->lockbase = real_off;
  300 
  301         KASSERT(wp->length >= 0, ("gv_rebuild_raid5: wp->length < 0"));
  302 
  303         /* Read all subdisks. */
  304         LIST_FOREACH(s, &p->subdisks, in_plex) {
  305                 /* Skip the broken subdisk. */
  306                 if (s == broken)
  307                         continue;
  308 
  309                 /* Skip growing subdisks. */
  310                 if (s->flags & GV_SD_GROW)
  311                         continue;
  312 
  313                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
  314                 if (cbp == NULL)
  315                         return (ENOMEM);
  316                 cbp->bio_cmd = BIO_READ;
  317 
  318                 bioq_insert_tail(p->bqueue, cbp);
  319 
  320                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  321                 bq->bp = cbp;
  322                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  323         }
  324 
  325         /* Write the parity data. */
  326         cbp = gv_raid5_clone_bio(bp, broken, wp, NULL, 1);
  327         if (cbp == NULL)
  328                 return (ENOMEM);
  329         wp->parity = cbp;
  330 
  331         p->synced = boff;
  332 
  333         /* Post notification that we're finished. */
  334         return (0);
  335 }
  336 
  337 /* Build a request group to perform (part of) a RAID5 request. */
  338 static int
  339 gv_raid5_request(struct gv_plex *p, struct gv_raid5_packet *wp,
  340     struct bio *bp, caddr_t addr, off_t boff, off_t bcount, int *delay)
  341 {
  342         struct g_geom *gp;
  343         struct gv_sd *broken, *original, *parity, *s;
  344         struct gv_bioq *bq;
  345         struct bio *cbp;
  346         int i, psdno, sdno, type, grow;
  347         off_t real_len, real_off;
  348 
  349         gp = bp->bio_to->geom;
  350 
  351         if (p == NULL || LIST_EMPTY(&p->subdisks))
  352                 return (ENXIO);
  353 
  354         /* We are optimistic and assume that this request will be OK. */
  355 #define REQ_TYPE_NORMAL         0
  356 #define REQ_TYPE_DEGRADED       1
  357 #define REQ_TYPE_NOPARITY       2
  358 
  359         type = REQ_TYPE_NORMAL;
  360         original = parity = broken = NULL;
  361 
  362         /* XXX: The resize won't crash with rebuild or sync, but we should still
  363          * be aware of it. Also this should perhaps be done on rebuild/check as
  364          * well?
  365          */
  366         /* If we're over, we must use the old. */ 
  367         if (boff >= p->synced) {
  368                 grow = 1;
  369         /* Or if over the resized offset, we use all drives. */
  370         } else if (boff + bcount <= p->synced) {
  371                 grow = 0;
  372         /* Else, we're in the middle, and must wait a bit. */
  373         } else {
  374                 bioq_disksort(p->rqueue, bp);
  375                 *delay = 1;
  376                 return (0);
  377         }
  378         gv_raid5_offset(p, boff, bcount, &real_off, &real_len,
  379             &sdno, &psdno, grow);
  380 
  381         /* Find the right subdisks. */
  382         i = 0;
  383         LIST_FOREACH(s, &p->subdisks, in_plex) {
  384                 if (i == sdno)
  385                         original = s;
  386                 if (i == psdno)
  387                         parity = s;
  388                 if (s->state != GV_SD_UP)
  389                         broken = s;
  390                 i++;
  391         }
  392 
  393         if ((original == NULL) || (parity == NULL))
  394                 return (ENXIO);
  395 
  396         /* Our data stripe is missing. */
  397         if (original->state != GV_SD_UP)
  398                 type = REQ_TYPE_DEGRADED;
  399 
  400         /* If synchronizing request, just write it if disks are stale. */
  401         if (original->state == GV_SD_STALE && parity->state == GV_SD_STALE &&
  402             bp->bio_pflags & GV_BIO_SYNCREQ && bp->bio_cmd == BIO_WRITE) {
  403                 type = REQ_TYPE_NORMAL;
  404         /* Our parity stripe is missing. */
  405         } else if (parity->state != GV_SD_UP) {
  406                 /* We cannot take another failure if we're already degraded. */
  407                 if (type != REQ_TYPE_NORMAL)
  408                         return (ENXIO);
  409                 else
  410                         type = REQ_TYPE_NOPARITY;
  411         }
  412 
  413         wp->length = real_len;
  414         wp->data = addr;
  415         wp->lockbase = real_off;
  416 
  417         KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0"));
  418 
  419         if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len < p->synced))
  420                 type = REQ_TYPE_NORMAL;
  421 
  422         if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len >= p->synced)) {
  423                 bioq_disksort(p->rqueue, bp);
  424                 *delay = 1;
  425                 return (0);
  426         }
  427 
  428         switch (bp->bio_cmd) {
  429         case BIO_READ:
  430                 /*
  431                  * For a degraded read we need to read in all stripes except
  432                  * the broken one plus the parity stripe and then recalculate
  433                  * the desired data.
  434                  */
  435                 if (type == REQ_TYPE_DEGRADED) {
  436                         bzero(wp->data, wp->length);
  437                         LIST_FOREACH(s, &p->subdisks, in_plex) {
  438                                 /* Skip the broken subdisk. */
  439                                 if (s == broken)
  440                                         continue;
  441                                 /* Skip growing if within offset. */
  442                                 if (grow && s->flags & GV_SD_GROW)
  443                                         continue;
  444                                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
  445                                 if (cbp == NULL)
  446                                         return (ENOMEM);
  447 
  448                                 bioq_insert_tail(p->bqueue, cbp);
  449 
  450                                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  451                                 bq->bp = cbp;
  452                                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  453                         }
  454 
  455                 /* A normal read can be fulfilled with the original subdisk. */
  456                 } else {
  457                         cbp = gv_raid5_clone_bio(bp, original, wp, addr, 0);
  458                         if (cbp == NULL)
  459                                 return (ENOMEM);
  460 
  461                         bioq_insert_tail(p->bqueue, cbp);
  462                 }
  463                 wp->lockbase = -1;
  464 
  465                 break;
  466 
  467         case BIO_WRITE:
  468                 /*
  469                  * A degraded write means we cannot write to the original data
  470                  * subdisk.  Thus we need to read in all valid stripes,
  471                  * recalculate the parity from the original data, and then
  472                  * write the parity stripe back out.
  473                  */
  474                 if (type == REQ_TYPE_DEGRADED) {
  475                         /* Read all subdisks. */
  476                         LIST_FOREACH(s, &p->subdisks, in_plex) {
  477                                 /* Skip the broken and the parity subdisk. */
  478                                 if ((s == broken) || (s == parity))
  479                                         continue;
  480                                 /* Skip growing if within offset. */
  481                                 if (grow && s->flags & GV_SD_GROW)
  482                                         continue;
  483 
  484                                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
  485                                 if (cbp == NULL)
  486                                         return (ENOMEM);
  487                                 cbp->bio_cmd = BIO_READ;
  488 
  489                                 bioq_insert_tail(p->bqueue, cbp);
  490 
  491                                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  492                                 bq->bp = cbp;
  493                                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  494                         }
  495 
  496                         /* Write the parity data. */
  497                         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
  498                         if (cbp == NULL)
  499                                 return (ENOMEM);
  500                         bcopy(addr, cbp->bio_data, wp->length);
  501                         wp->parity = cbp;
  502 
  503                 /*
  504                  * When the parity stripe is missing we just write out the data.
  505                  */
  506                 } else if (type == REQ_TYPE_NOPARITY) {
  507                         cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
  508                         if (cbp == NULL)
  509                                 return (ENOMEM);
  510 
  511                         bioq_insert_tail(p->bqueue, cbp);
  512 
  513                         bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  514                         bq->bp = cbp;
  515                         TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  516 
  517                 /*
  518                  * A normal write request goes to the original subdisk, then we
  519                  * read in all other stripes, recalculate the parity and write
  520                  * out the parity again.
  521                  */
  522                 } else {
  523                         /* Read old parity. */
  524                         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
  525                         if (cbp == NULL)
  526                                 return (ENOMEM);
  527                         cbp->bio_cmd = BIO_READ;
  528 
  529                         bioq_insert_tail(p->bqueue, cbp);
  530 
  531                         bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  532                         bq->bp = cbp;
  533                         TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  534 
  535                         /* Read old data. */
  536                         cbp = gv_raid5_clone_bio(bp, original, wp, NULL, 1);
  537                         if (cbp == NULL)
  538                                 return (ENOMEM);
  539                         cbp->bio_cmd = BIO_READ;
  540 
  541                         bioq_insert_tail(p->bqueue, cbp);
  542 
  543                         bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  544                         bq->bp = cbp;
  545                         TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  546 
  547                         /* Write new data. */
  548                         cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
  549                         if (cbp == NULL)
  550                                 return (ENOMEM);
  551 
  552                         /*
  553                          * We must not write the new data until the old data
  554                          * was read, so hold this BIO back until we're ready
  555                          * for it.
  556                          */
  557                         wp->waiting = cbp;
  558 
  559                         /* The final bio for the parity. */
  560                         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
  561                         if (cbp == NULL)
  562                                 return (ENOMEM);
  563 
  564                         /* Remember that this is the BIO for the parity data. */
  565                         wp->parity = cbp;
  566                 }
  567                 break;
  568 
  569         default:
  570                 return (EINVAL);
  571         }
  572 
  573         return (0);
  574 }
  575 
  576 /*
  577  * Calculate the offsets in the various subdisks for a RAID5 request. Also take
  578  * care of new subdisks in an expanded RAID5 array. 
  579  * XXX: This assumes that the new subdisks are inserted after the others (which
  580  * is okay as long as plex_offset is larger). If subdisks are inserted into the
  581  * plexlist before, we get problems.
  582  */
  583 static int
  584 gv_raid5_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
  585     off_t *real_len, int *sdno, int *psdno, int growing)
  586 {
  587         struct gv_sd *s;
  588         int sd, psd, sdcount;
  589         off_t len_left, stripeend, stripeoff, stripestart;
  590 
  591         sdcount = p->sdcount;
  592         if (growing) {
  593                 LIST_FOREACH(s, &p->subdisks, in_plex) {
  594                         if (s->flags & GV_SD_GROW)
  595                                 sdcount--;
  596                 }
  597         }
  598 
  599         /* The number of the subdisk containing the parity stripe. */
  600         psd = sdcount - 1 - ( boff / (p->stripesize * (sdcount - 1))) %
  601             sdcount;
  602         KASSERT(psdno >= 0, ("gv_raid5_offset: psdno < 0"));
  603 
  604         /* Offset of the start address from the start of the stripe. */
  605         stripeoff = boff % (p->stripesize * (sdcount - 1));
  606         KASSERT(stripeoff >= 0, ("gv_raid5_offset: stripeoff < 0"));
  607 
  608         /* The number of the subdisk where the stripe resides. */
  609         sd = stripeoff / p->stripesize;
  610         KASSERT(sdno >= 0, ("gv_raid5_offset: sdno < 0"));
  611 
  612         /* At or past parity subdisk. */
  613         if (sd >= psd)
  614                 sd++;
  615 
  616         /* The offset of the stripe on this subdisk. */
  617         stripestart = (boff - stripeoff) / (sdcount - 1);
  618         KASSERT(stripestart >= 0, ("gv_raid5_offset: stripestart < 0"));
  619 
  620         stripeoff %= p->stripesize;
  621 
  622         /* The offset of the request on this subdisk. */
  623         *real_off = stripestart + stripeoff;
  624 
  625         stripeend = stripestart + p->stripesize;
  626         len_left = stripeend - *real_off;
  627         KASSERT(len_left >= 0, ("gv_raid5_offset: len_left < 0"));
  628 
  629         *real_len = (bcount <= len_left) ? bcount : len_left;
  630 
  631         if (sdno != NULL)
  632                 *sdno = sd;
  633         if (psdno != NULL)
  634                 *psdno = psd;
  635 
  636         return (0);
  637 }
  638 
  639 static struct bio *
  640 gv_raid5_clone_bio(struct bio *bp, struct gv_sd *s, struct gv_raid5_packet *wp,
  641     caddr_t addr, int use_wp)
  642 {
  643         struct bio *cbp;
  644 
  645         cbp = g_clone_bio(bp);
  646         if (cbp == NULL)
  647                 return (NULL);
  648         if (addr == NULL) {
  649                 cbp->bio_data = g_malloc(wp->length, M_WAITOK | M_ZERO);
  650                 cbp->bio_cflags |= GV_BIO_MALLOC;
  651         } else
  652                 cbp->bio_data = addr;
  653         cbp->bio_offset = wp->lockbase + s->drive_offset;
  654         cbp->bio_length = wp->length;
  655         cbp->bio_done = gv_done;
  656         cbp->bio_caller1 = s;
  657         if (use_wp)
  658                 cbp->bio_caller2 = wp;
  659 
  660         return (cbp);
  661 }

Cache object: 962aa32aaff7aec9d4fe7d9fe8611b02


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.