The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/vinum/geom_vinum_raid5.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2004, 2007 Lukas Ertl
    5  * All rights reserved.
    6  *
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 1. Redistributions of source code must retain the above copyright
   11  *    notice, this list of conditions and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 
   29 #include <sys/cdefs.h>
   30 __FBSDID("$FreeBSD$");
   31 
   32 #include <sys/param.h>
   33 #include <sys/bio.h>
   34 #include <sys/lock.h>
   35 #include <sys/malloc.h>
   36 #include <sys/systm.h>
   37 
   38 #include <geom/geom.h>
   39 #include <geom/geom_dbg.h>
   40 #include <geom/vinum/geom_vinum_var.h>
   41 #include <geom/vinum/geom_vinum_raid5.h>
   42 #include <geom/vinum/geom_vinum.h>
   43 
   44 static int              gv_raid5_offset(struct gv_plex *, off_t, off_t,
   45                             off_t *, off_t *, int *, int *, int);
   46 static struct bio *     gv_raid5_clone_bio(struct bio *, struct gv_sd *,
   47                             struct gv_raid5_packet *, caddr_t, int);
   48 static int      gv_raid5_request(struct gv_plex *, struct gv_raid5_packet *,
   49                     struct bio *, caddr_t, off_t, off_t, int *);
   50 static int      gv_raid5_check(struct gv_plex *, struct gv_raid5_packet *,
   51                     struct bio *, caddr_t, off_t, off_t);
   52 static int      gv_raid5_rebuild(struct gv_plex *, struct gv_raid5_packet *,
   53                     struct bio *, caddr_t, off_t, off_t);
   54 
   55 struct gv_raid5_packet *
   56 gv_raid5_start(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff,
   57     off_t bcount)
   58 {
   59         struct bio *cbp;
   60         struct gv_raid5_packet *wp, *wp2;
   61         struct gv_bioq *bq, *bq2;
   62         int err, delay;
   63 
   64         delay = 0;
   65         wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
   66         wp->bio = bp;
   67         wp->waiting = NULL;
   68         wp->parity = NULL;
   69         TAILQ_INIT(&wp->bits);
   70 
   71         if (bp->bio_pflags & GV_BIO_REBUILD)
   72                 err = gv_raid5_rebuild(p, wp, bp, addr, boff, bcount);
   73         else if (bp->bio_pflags & GV_BIO_CHECK)
   74                 err = gv_raid5_check(p, wp, bp, addr, boff, bcount);
   75         else
   76                 err = gv_raid5_request(p, wp, bp, addr, boff, bcount, &delay);
   77 
   78         /* Means we have a delayed request. */
   79         if (delay) {
   80                 g_free(wp);
   81                 return (NULL);
   82         }
   83 
   84         /*
   85          * Building the sub-request failed, we probably need to clean up a lot.
   86          */
   87         if (err) {
   88                 G_VINUM_LOGREQ(0, bp, "raid5 plex request failed.");
   89                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
   90                         TAILQ_REMOVE(&wp->bits, bq, queue);
   91                         g_free(bq);
   92                 }
   93                 if (wp->waiting != NULL) {
   94                         if (wp->waiting->bio_cflags & GV_BIO_MALLOC)
   95                                 g_free(wp->waiting->bio_data);
   96                         gv_drive_done(wp->waiting->bio_caller1);
   97                         g_destroy_bio(wp->waiting);
   98                 }
   99                 if (wp->parity != NULL) {
  100                         if (wp->parity->bio_cflags & GV_BIO_MALLOC)
  101                                 g_free(wp->parity->bio_data);
  102                         gv_drive_done(wp->parity->bio_caller1);
  103                         g_destroy_bio(wp->parity);
  104                 }
  105                 g_free(wp);
  106 
  107                 TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
  108                         if (wp->bio != bp)
  109                                 continue;
  110 
  111                         TAILQ_REMOVE(&p->packets, wp, list);
  112                         TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
  113                                 TAILQ_REMOVE(&wp->bits, bq, queue);
  114                                 g_free(bq);
  115                         }
  116                         g_free(wp);
  117                 }
  118 
  119                 cbp = bioq_takefirst(p->bqueue);
  120                 while (cbp != NULL) {
  121                         if (cbp->bio_cflags & GV_BIO_MALLOC)
  122                                 g_free(cbp->bio_data);
  123                         gv_drive_done(cbp->bio_caller1);
  124                         g_destroy_bio(cbp);
  125                         cbp = bioq_takefirst(p->bqueue);
  126                 }
  127 
  128                 /* If internal, stop and reset state. */
  129                 if (bp->bio_pflags & GV_BIO_INTERNAL) {
  130                         if (bp->bio_pflags & GV_BIO_MALLOC)
  131                                 g_free(bp->bio_data);
  132                         g_destroy_bio(bp);
  133                         /* Reset flags. */
  134                         p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
  135                             GV_PLEX_GROWING);
  136                         return (NULL);
  137                 }
  138                 g_io_deliver(bp, err);
  139                 return (NULL);
  140         }
  141 
  142         return (wp);
  143 }
  144 
  145 /*
  146  * Check if the stripe that the work packet wants is already being used by
  147  * some other work packet.
  148  */
  149 int
  150 gv_stripe_active(struct gv_plex *p, struct bio *bp)
  151 {
  152         struct gv_raid5_packet *wp, *owp;
  153         int overlap;
  154 
  155         wp = bp->bio_caller2;
  156         if (wp->lockbase == -1)
  157                 return (0);
  158 
  159         overlap = 0;
  160         TAILQ_FOREACH(owp, &p->packets, list) {
  161                 if (owp == wp)
  162                         break;
  163                 if ((wp->lockbase >= owp->lockbase) &&
  164                     (wp->lockbase <= owp->lockbase + owp->length)) {
  165                         overlap++;
  166                         break;
  167                 }
  168                 if ((wp->lockbase <= owp->lockbase) &&
  169                     (wp->lockbase + wp->length >= owp->lockbase)) {
  170                         overlap++;
  171                         break;
  172                 }
  173         }
  174 
  175         return (overlap);
  176 }
  177 
  178 static int
  179 gv_raid5_check(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
  180     caddr_t addr, off_t boff, off_t bcount)
  181 {
  182         struct gv_sd *parity, *s;
  183         struct gv_bioq *bq;
  184         struct bio *cbp;
  185         int i, psdno;
  186         off_t real_len, real_off;
  187 
  188         if (p == NULL || LIST_EMPTY(&p->subdisks))
  189                 return (ENXIO);
  190 
  191         gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno, 1);
  192 
  193         /* Find the right subdisk. */
  194         parity = NULL;
  195         i = 0;
  196         LIST_FOREACH(s, &p->subdisks, in_plex) {
  197                 if (i == psdno) {
  198                         parity = s;
  199                         break;
  200                 }
  201                 i++;
  202         }
  203 
  204         /* Parity stripe not found. */
  205         if (parity == NULL)
  206                 return (ENXIO);
  207 
  208         if (parity->state != GV_SD_UP)
  209                 return (ENXIO);
  210 
  211         wp->length = real_len;
  212         wp->data = addr;
  213         wp->lockbase = real_off;
  214 
  215         /* Read all subdisks. */
  216         LIST_FOREACH(s, &p->subdisks, in_plex) {
  217                 /* Skip the parity subdisk. */
  218                 if (s == parity)
  219                         continue;
  220                 /* Skip growing subdisks. */
  221                 if (s->flags & GV_SD_GROW)
  222                         continue;
  223 
  224                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
  225                 if (cbp == NULL)
  226                         return (ENOMEM);
  227                 cbp->bio_cmd = BIO_READ;
  228 
  229                 bioq_insert_tail(p->bqueue, cbp);
  230 
  231                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  232                 bq->bp = cbp;
  233                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  234         }
  235 
  236         /* Read the parity data. */
  237         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
  238         if (cbp == NULL)
  239                 return (ENOMEM);
  240         cbp->bio_cmd = BIO_READ;
  241         wp->waiting = cbp;
  242 
  243         /*
  244          * In case we want to rebuild the parity, create an extra BIO to write
  245          * it out.  It also acts as buffer for the XOR operations.
  246          */
  247         cbp = gv_raid5_clone_bio(bp, parity, wp, addr, 1);
  248         if (cbp == NULL)
  249                 return (ENOMEM);
  250         wp->parity = cbp;
  251 
  252         return (0);
  253 }
  254 
  255 /* Rebuild a degraded RAID5 plex. */
  256 static int
  257 gv_raid5_rebuild(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
  258     caddr_t addr, off_t boff, off_t bcount)
  259 {
  260         struct gv_sd *broken, *s;
  261         struct gv_bioq *bq;
  262         struct bio *cbp;
  263         off_t real_len, real_off;
  264 
  265         if (p == NULL || LIST_EMPTY(&p->subdisks))
  266                 return (ENXIO);
  267 
  268         gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL, 1);
  269 
  270         /* Find the right subdisk. */
  271         broken = NULL;
  272         LIST_FOREACH(s, &p->subdisks, in_plex) {
  273                 if (s->state != GV_SD_UP)
  274                         broken = s;
  275         }
  276 
  277         /* Broken stripe not found. */
  278         if (broken == NULL)
  279                 return (ENXIO);
  280 
  281         switch (broken->state) {
  282         case GV_SD_UP:
  283                 return (EINVAL);
  284 
  285         case GV_SD_STALE:
  286                 if (!(bp->bio_pflags & GV_BIO_REBUILD))
  287                         return (ENXIO);
  288 
  289                 G_VINUM_DEBUG(1, "sd %s is reviving", broken->name);
  290                 gv_set_sd_state(broken, GV_SD_REVIVING, GV_SETSTATE_FORCE);
  291                 /* Set this bit now, but should be set at end. */
  292                 broken->flags |= GV_SD_CANGOUP;
  293                 break;
  294 
  295         case GV_SD_REVIVING:
  296                 break;
  297 
  298         default:
  299                 /* All other subdisk states mean it's not accessible. */
  300                 return (ENXIO);
  301         }
  302 
  303         wp->length = real_len;
  304         wp->data = addr;
  305         wp->lockbase = real_off;
  306 
  307         KASSERT(wp->length >= 0, ("gv_rebuild_raid5: wp->length < 0"));
  308 
  309         /* Read all subdisks. */
  310         LIST_FOREACH(s, &p->subdisks, in_plex) {
  311                 /* Skip the broken subdisk. */
  312                 if (s == broken)
  313                         continue;
  314 
  315                 /* Skip growing subdisks. */
  316                 if (s->flags & GV_SD_GROW)
  317                         continue;
  318 
  319                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
  320                 if (cbp == NULL)
  321                         return (ENOMEM);
  322                 cbp->bio_cmd = BIO_READ;
  323 
  324                 bioq_insert_tail(p->bqueue, cbp);
  325 
  326                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  327                 bq->bp = cbp;
  328                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  329         }
  330 
  331         /* Write the parity data. */
  332         cbp = gv_raid5_clone_bio(bp, broken, wp, NULL, 1);
  333         if (cbp == NULL)
  334                 return (ENOMEM);
  335         wp->parity = cbp;
  336 
  337         p->synced = boff;
  338 
  339         /* Post notification that we're finished. */
  340         return (0);
  341 }
  342 
  343 /* Build a request group to perform (part of) a RAID5 request. */
  344 static int
  345 gv_raid5_request(struct gv_plex *p, struct gv_raid5_packet *wp,
  346     struct bio *bp, caddr_t addr, off_t boff, off_t bcount, int *delay)
  347 {
  348         struct gv_sd *broken, *original, *parity, *s;
  349         struct gv_bioq *bq;
  350         struct bio *cbp;
  351         int i, psdno, sdno, type, grow;
  352         off_t real_len, real_off;
  353 
  354         if (p == NULL || LIST_EMPTY(&p->subdisks))
  355                 return (ENXIO);
  356 
  357         /* We are optimistic and assume that this request will be OK. */
  358 #define REQ_TYPE_NORMAL         0
  359 #define REQ_TYPE_DEGRADED       1
  360 #define REQ_TYPE_NOPARITY       2
  361 
  362         type = REQ_TYPE_NORMAL;
  363         original = parity = broken = NULL;
  364 
  365         /* XXX: The resize won't crash with rebuild or sync, but we should still
  366          * be aware of it. Also this should perhaps be done on rebuild/check as
  367          * well?
  368          */
  369         /* If we're over, we must use the old. */ 
  370         if (boff >= p->synced) {
  371                 grow = 1;
  372         /* Or if over the resized offset, we use all drives. */
  373         } else if (boff + bcount <= p->synced) {
  374                 grow = 0;
  375         /* Else, we're in the middle, and must wait a bit. */
  376         } else {
  377                 bioq_disksort(p->rqueue, bp);
  378                 *delay = 1;
  379                 return (0);
  380         }
  381         gv_raid5_offset(p, boff, bcount, &real_off, &real_len,
  382             &sdno, &psdno, grow);
  383 
  384         /* Find the right subdisks. */
  385         i = 0;
  386         LIST_FOREACH(s, &p->subdisks, in_plex) {
  387                 if (i == sdno)
  388                         original = s;
  389                 if (i == psdno)
  390                         parity = s;
  391                 if (s->state != GV_SD_UP)
  392                         broken = s;
  393                 i++;
  394         }
  395 
  396         if ((original == NULL) || (parity == NULL))
  397                 return (ENXIO);
  398 
  399         /* Our data stripe is missing. */
  400         if (original->state != GV_SD_UP)
  401                 type = REQ_TYPE_DEGRADED;
  402 
  403         /* If synchronizing request, just write it if disks are stale. */
  404         if (original->state == GV_SD_STALE && parity->state == GV_SD_STALE &&
  405             bp->bio_pflags & GV_BIO_SYNCREQ && bp->bio_cmd == BIO_WRITE) {
  406                 type = REQ_TYPE_NORMAL;
  407         /* Our parity stripe is missing. */
  408         } else if (parity->state != GV_SD_UP) {
  409                 /* We cannot take another failure if we're already degraded. */
  410                 if (type != REQ_TYPE_NORMAL)
  411                         return (ENXIO);
  412                 else
  413                         type = REQ_TYPE_NOPARITY;
  414         }
  415 
  416         wp->length = real_len;
  417         wp->data = addr;
  418         wp->lockbase = real_off;
  419 
  420         KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0"));
  421 
  422         if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len < p->synced))
  423                 type = REQ_TYPE_NORMAL;
  424 
  425         if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len >= p->synced)) {
  426                 bioq_disksort(p->rqueue, bp);
  427                 *delay = 1;
  428                 return (0);
  429         }
  430 
  431         switch (bp->bio_cmd) {
  432         case BIO_READ:
  433                 /*
  434                  * For a degraded read we need to read in all stripes except
  435                  * the broken one plus the parity stripe and then recalculate
  436                  * the desired data.
  437                  */
  438                 if (type == REQ_TYPE_DEGRADED) {
  439                         bzero(wp->data, wp->length);
  440                         LIST_FOREACH(s, &p->subdisks, in_plex) {
  441                                 /* Skip the broken subdisk. */
  442                                 if (s == broken)
  443                                         continue;
  444                                 /* Skip growing if within offset. */
  445                                 if (grow && s->flags & GV_SD_GROW)
  446                                         continue;
  447                                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
  448                                 if (cbp == NULL)
  449                                         return (ENOMEM);
  450 
  451                                 bioq_insert_tail(p->bqueue, cbp);
  452 
  453                                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  454                                 bq->bp = cbp;
  455                                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  456                         }
  457 
  458                 /* A normal read can be fulfilled with the original subdisk. */
  459                 } else {
  460                         cbp = gv_raid5_clone_bio(bp, original, wp, addr, 0);
  461                         if (cbp == NULL)
  462                                 return (ENOMEM);
  463 
  464                         bioq_insert_tail(p->bqueue, cbp);
  465                 }
  466                 wp->lockbase = -1;
  467 
  468                 break;
  469 
  470         case BIO_WRITE:
  471                 /*
  472                  * A degraded write means we cannot write to the original data
  473                  * subdisk.  Thus we need to read in all valid stripes,
  474                  * recalculate the parity from the original data, and then
  475                  * write the parity stripe back out.
  476                  */
  477                 if (type == REQ_TYPE_DEGRADED) {
  478                         /* Read all subdisks. */
  479                         LIST_FOREACH(s, &p->subdisks, in_plex) {
  480                                 /* Skip the broken and the parity subdisk. */
  481                                 if ((s == broken) || (s == parity))
  482                                         continue;
  483                                 /* Skip growing if within offset. */
  484                                 if (grow && s->flags & GV_SD_GROW)
  485                                         continue;
  486 
  487                                 cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
  488                                 if (cbp == NULL)
  489                                         return (ENOMEM);
  490                                 cbp->bio_cmd = BIO_READ;
  491 
  492                                 bioq_insert_tail(p->bqueue, cbp);
  493 
  494                                 bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  495                                 bq->bp = cbp;
  496                                 TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  497                         }
  498 
  499                         /* Write the parity data. */
  500                         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
  501                         if (cbp == NULL)
  502                                 return (ENOMEM);
  503                         bcopy(addr, cbp->bio_data, wp->length);
  504                         wp->parity = cbp;
  505 
  506                 /*
  507                  * When the parity stripe is missing we just write out the data.
  508                  */
  509                 } else if (type == REQ_TYPE_NOPARITY) {
  510                         cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
  511                         if (cbp == NULL)
  512                                 return (ENOMEM);
  513 
  514                         bioq_insert_tail(p->bqueue, cbp);
  515 
  516                         bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  517                         bq->bp = cbp;
  518                         TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  519 
  520                 /*
  521                  * A normal write request goes to the original subdisk, then we
  522                  * read in all other stripes, recalculate the parity and write
  523                  * out the parity again.
  524                  */
  525                 } else {
  526                         /* Read old parity. */
  527                         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
  528                         if (cbp == NULL)
  529                                 return (ENOMEM);
  530                         cbp->bio_cmd = BIO_READ;
  531 
  532                         bioq_insert_tail(p->bqueue, cbp);
  533 
  534                         bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  535                         bq->bp = cbp;
  536                         TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  537 
  538                         /* Read old data. */
  539                         cbp = gv_raid5_clone_bio(bp, original, wp, NULL, 1);
  540                         if (cbp == NULL)
  541                                 return (ENOMEM);
  542                         cbp->bio_cmd = BIO_READ;
  543 
  544                         bioq_insert_tail(p->bqueue, cbp);
  545 
  546                         bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
  547                         bq->bp = cbp;
  548                         TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
  549 
  550                         /* Write new data. */
  551                         cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
  552                         if (cbp == NULL)
  553                                 return (ENOMEM);
  554 
  555                         /*
  556                          * We must not write the new data until the old data
  557                          * was read, so hold this BIO back until we're ready
  558                          * for it.
  559                          */
  560                         wp->waiting = cbp;
  561 
  562                         /* The final bio for the parity. */
  563                         cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
  564                         if (cbp == NULL)
  565                                 return (ENOMEM);
  566 
  567                         /* Remember that this is the BIO for the parity data. */
  568                         wp->parity = cbp;
  569                 }
  570                 break;
  571 
  572         default:
  573                 return (EINVAL);
  574         }
  575 
  576         return (0);
  577 }
  578 
  579 /*
  580  * Calculate the offsets in the various subdisks for a RAID5 request. Also take
  581  * care of new subdisks in an expanded RAID5 array. 
  582  * XXX: This assumes that the new subdisks are inserted after the others (which
  583  * is okay as long as plex_offset is larger). If subdisks are inserted into the
  584  * plexlist before, we get problems.
  585  */
  586 static int
  587 gv_raid5_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
  588     off_t *real_len, int *sdno, int *psdno, int growing)
  589 {
  590         struct gv_sd *s;
  591         int sd, psd, sdcount;
  592         off_t len_left, stripeend, stripeoff, stripestart;
  593 
  594         sdcount = p->sdcount;
  595         if (growing) {
  596                 LIST_FOREACH(s, &p->subdisks, in_plex) {
  597                         if (s->flags & GV_SD_GROW)
  598                                 sdcount--;
  599                 }
  600         }
  601 
  602         /* The number of the subdisk containing the parity stripe. */
  603         psd = sdcount - 1 - ( boff / (p->stripesize * (sdcount - 1))) %
  604             sdcount;
  605         KASSERT(psdno >= 0, ("gv_raid5_offset: psdno < 0"));
  606 
  607         /* Offset of the start address from the start of the stripe. */
  608         stripeoff = boff % (p->stripesize * (sdcount - 1));
  609         KASSERT(stripeoff >= 0, ("gv_raid5_offset: stripeoff < 0"));
  610 
  611         /* The number of the subdisk where the stripe resides. */
  612         sd = stripeoff / p->stripesize;
  613         KASSERT(sdno >= 0, ("gv_raid5_offset: sdno < 0"));
  614 
  615         /* At or past parity subdisk. */
  616         if (sd >= psd)
  617                 sd++;
  618 
  619         /* The offset of the stripe on this subdisk. */
  620         stripestart = (boff - stripeoff) / (sdcount - 1);
  621         KASSERT(stripestart >= 0, ("gv_raid5_offset: stripestart < 0"));
  622 
  623         stripeoff %= p->stripesize;
  624 
  625         /* The offset of the request on this subdisk. */
  626         *real_off = stripestart + stripeoff;
  627 
  628         stripeend = stripestart + p->stripesize;
  629         len_left = stripeend - *real_off;
  630         KASSERT(len_left >= 0, ("gv_raid5_offset: len_left < 0"));
  631 
  632         *real_len = (bcount <= len_left) ? bcount : len_left;
  633 
  634         if (sdno != NULL)
  635                 *sdno = sd;
  636         if (psdno != NULL)
  637                 *psdno = psd;
  638 
  639         return (0);
  640 }
  641 
  642 static struct bio *
  643 gv_raid5_clone_bio(struct bio *bp, struct gv_sd *s, struct gv_raid5_packet *wp,
  644     caddr_t addr, int use_wp)
  645 {
  646         struct bio *cbp;
  647 
  648         cbp = g_clone_bio(bp);
  649         if (cbp == NULL)
  650                 return (NULL);
  651         if (addr == NULL) {
  652                 cbp->bio_data = g_malloc(wp->length, M_WAITOK | M_ZERO);
  653                 cbp->bio_cflags |= GV_BIO_MALLOC;
  654         } else
  655                 cbp->bio_data = addr;
  656         cbp->bio_offset = wp->lockbase + s->drive_offset;
  657         cbp->bio_length = wp->length;
  658         cbp->bio_done = gv_done;
  659         cbp->bio_caller1 = s;
  660         s->drive_sc->active++;
  661         if (use_wp)
  662                 cbp->bio_caller2 = wp;
  663 
  664         return (cbp);
  665 }

Cache object: 65567ec86dd8eb3fd248842fdeb7bcd6


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.