The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/vinum/geom_vinum_plex.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2004, 2007 Lukas Ertl
    3  * Copyright (c) 2007, 2009 Ulf Lilleengen
    4  * All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/cdefs.h>
   29 __FBSDID("$FreeBSD: releng/8.3/sys/geom/vinum/geom_vinum_plex.c 191856 2009-05-06 19:34:32Z lulf $");
   30 
   31 #include <sys/param.h>
   32 #include <sys/bio.h>
   33 #include <sys/lock.h>
   34 #include <sys/malloc.h>
   35 #include <sys/systm.h>
   36 
   37 #include <geom/geom.h>
   38 #include <geom/vinum/geom_vinum_var.h>
   39 #include <geom/vinum/geom_vinum_raid5.h>
   40 #include <geom/vinum/geom_vinum.h>
   41 
   42 static int      gv_check_parity(struct gv_plex *, struct bio *,
   43                     struct gv_raid5_packet *);
   44 static int      gv_normal_parity(struct gv_plex *, struct bio *,
   45                     struct gv_raid5_packet *);
   46 static void     gv_plex_flush(struct gv_plex *);
   47 static int      gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *,
   48                     int *, int);
   49 static int      gv_plex_normal_request(struct gv_plex *, struct bio *, off_t,
   50                     off_t,  caddr_t);
   51 static void     gv_post_bio(struct gv_softc *, struct bio *);
   52 
   53 void
   54 gv_plex_start(struct gv_plex *p, struct bio *bp)
   55 {
   56         struct bio *cbp;
   57         struct gv_sd *s;
   58         struct gv_raid5_packet *wp;
   59         caddr_t addr;
   60         off_t bcount, boff, len;
   61 
   62         bcount = bp->bio_length;
   63         addr = bp->bio_data;
   64         boff = bp->bio_offset;
   65 
   66         /* Walk over the whole length of the request, we might split it up. */
   67         while (bcount > 0) {
   68                 wp = NULL;
   69 
   70                 /*
   71                  * RAID5 plexes need special treatment, as a single request
   72                  * might involve several read/write sub-requests.
   73                  */
   74                 if (p->org == GV_PLEX_RAID5) {
   75                         wp = gv_raid5_start(p, bp, addr, boff, bcount);
   76                         if (wp == NULL)
   77                                 return;
   78  
   79                         len = wp->length;
   80 
   81                         if (TAILQ_EMPTY(&wp->bits))
   82                                 g_free(wp);
   83                         else if (wp->lockbase != -1)
   84                                 TAILQ_INSERT_TAIL(&p->packets, wp, list);
   85 
   86                 /*
   87                  * Requests to concatenated and striped plexes go straight
   88                  * through.
   89                  */
   90                 } else {
   91                         len = gv_plex_normal_request(p, bp, boff, bcount, addr);
   92                 }
   93                 if (len < 0)
   94                         return;
   95                         
   96                 bcount -= len;
   97                 addr += len;
   98                 boff += len;
   99         }
  100 
  101         /*
  102          * Fire off all sub-requests.  We get the correct consumer (== drive)
  103          * to send each request to via the subdisk that was stored in
  104          * cbp->bio_caller1.
  105          */
  106         cbp = bioq_takefirst(p->bqueue);
  107         while (cbp != NULL) {
  108                 /*
  109                  * RAID5 sub-requests need to come in correct order, otherwise
  110                  * we trip over the parity, as it might be overwritten by
  111                  * another sub-request.  We abuse cbp->bio_caller2 to mark
  112                  * potential overlap situations. 
  113                  */
  114                 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) {
  115                         /* Park the bio on the waiting queue. */
  116                         cbp->bio_pflags |= GV_BIO_ONHOLD;
  117                         bioq_disksort(p->wqueue, cbp);
  118                 } else {
  119                         s = cbp->bio_caller1;
  120                         g_io_request(cbp, s->drive_sc->consumer);
  121                 }
  122                 cbp = bioq_takefirst(p->bqueue);
  123         }
  124 }
  125 
  126 static int
  127 gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
  128     off_t *real_len, int *sdno, int growing)
  129 {
  130         struct gv_sd *s;
  131         int i, sdcount;
  132         off_t len_left, stripeend, stripeno, stripestart;
  133 
  134         switch (p->org) {
  135         case GV_PLEX_CONCAT:
  136                 /*
  137                  * Find the subdisk where this request starts.  The subdisks in
  138                  * this list must be ordered by plex_offset.
  139                  */
  140                 i = 0;
  141                 LIST_FOREACH(s, &p->subdisks, in_plex) {
  142                         if (s->plex_offset <= boff &&
  143                             s->plex_offset + s->size > boff) {
  144                                 *sdno = i;
  145                                 break;
  146                         }
  147                         i++;
  148                 }
  149                 if (s == NULL || s->drive_sc == NULL)
  150                         return (GV_ERR_NOTFOUND);
  151 
  152                 /* Calculate corresponding offsets on disk. */
  153                 *real_off = boff - s->plex_offset;
  154                 len_left = s->size - (*real_off);
  155                 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
  156                 *real_len = (bcount > len_left) ? len_left : bcount;
  157                 break;
  158 
  159         case GV_PLEX_STRIPED:
  160                 /* The number of the stripe where the request starts. */
  161                 stripeno = boff / p->stripesize;
  162                 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0"));
  163 
  164                 /* Take growing subdisks into account when calculating. */
  165                 sdcount = gv_sdcount(p, (boff >= p->synced));
  166 
  167                 if (!(boff + bcount <= p->synced) &&
  168                     (p->flags & GV_PLEX_GROWING) &&
  169                     !growing)
  170                         return (GV_ERR_ISBUSY);
  171                 *sdno = stripeno % sdcount;
  172 
  173                 KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0"));
  174                 stripestart = (stripeno / sdcount) *
  175                     p->stripesize;
  176                 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0"));
  177                 stripeend = stripestart + p->stripesize;
  178                 *real_off = boff - (stripeno * p->stripesize) +
  179                     stripestart;
  180                 len_left = stripeend - *real_off;
  181                 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
  182 
  183                 *real_len = (bcount <= len_left) ? bcount : len_left;
  184                 break;
  185 
  186         default:
  187                 return (GV_ERR_PLEXORG);
  188         }
  189         return (0);
  190 }
  191 
  192 /*
  193  * Prepare a normal plex request.
  194  */
  195 static int 
  196 gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
  197     off_t bcount,  caddr_t addr)
  198 {
  199         struct gv_sd *s;
  200         struct bio *cbp;
  201         off_t real_len, real_off;
  202         int i, err, sdno;
  203 
  204         s = NULL;
  205         sdno = -1;
  206         real_len = real_off = 0;
  207 
  208         err = ENXIO;
  209 
  210         if (p == NULL || LIST_EMPTY(&p->subdisks)) 
  211                 goto bad;
  212 
  213         err = gv_plex_offset(p, boff, bcount, &real_off,
  214             &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW));
  215         /* If the request was blocked, put it into wait. */
  216         if (err == GV_ERR_ISBUSY) {
  217                 bioq_disksort(p->rqueue, bp);
  218                 return (-1); /* "Fail", and delay request. */
  219         }
  220         if (err) {
  221                 err = ENXIO;
  222                 goto bad;
  223         }
  224         err = ENXIO;
  225 
  226         /* Find the right subdisk. */
  227         i = 0;
  228         LIST_FOREACH(s, &p->subdisks, in_plex) {
  229                 if (i == sdno)
  230                         break;
  231                 i++;
  232         }
  233 
  234         /* Subdisk not found. */
  235         if (s == NULL || s->drive_sc == NULL)
  236                 goto bad;
  237 
  238         /* Now check if we can handle the request on this subdisk. */
  239         switch (s->state) {
  240         case GV_SD_UP:
  241                 /* If the subdisk is up, just continue. */
  242                 break;
  243         case GV_SD_DOWN:
  244                 if (bp->bio_pflags & GV_BIO_INTERNAL)
  245                         G_VINUM_DEBUG(0, "subdisk must be in the stale state in"
  246                             " order to perform administrative requests");
  247                 goto bad;
  248         case GV_SD_STALE:
  249                 if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) {
  250                         G_VINUM_DEBUG(0, "subdisk stale, unable to perform "
  251                             "regular requests");
  252                         goto bad;
  253                 }
  254 
  255                 G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
  256                 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
  257                 break;
  258         case GV_SD_INITIALIZING:
  259                 if (bp->bio_cmd == BIO_READ)
  260                         goto bad;
  261                 break;
  262         default:
  263                 /* All other subdisk states mean it's not accessible. */
  264                 goto bad;
  265         }
  266 
  267         /* Clone the bio and adjust the offsets and sizes. */
  268         cbp = g_clone_bio(bp);
  269         if (cbp == NULL) {
  270                 err = ENOMEM;
  271                 goto bad;
  272         }
  273         cbp->bio_offset = real_off + s->drive_offset;
  274         cbp->bio_length = real_len;
  275         cbp->bio_data = addr;
  276         cbp->bio_done = gv_done;
  277         cbp->bio_caller1 = s;
  278 
  279         /* Store the sub-requests now and let others issue them. */
  280         bioq_insert_tail(p->bqueue, cbp); 
  281         return (real_len);
  282 bad:
  283         G_VINUM_LOGREQ(0, bp, "plex request failed.");
  284         /* Building the sub-request failed. If internal BIO, do not deliver. */
  285         if (bp->bio_pflags & GV_BIO_INTERNAL) {
  286                 if (bp->bio_pflags & GV_BIO_MALLOC)
  287                         g_free(bp->bio_data);
  288                 g_destroy_bio(bp);
  289                 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
  290                     GV_PLEX_GROWING);
  291                 return (-1);
  292         }
  293         g_io_deliver(bp, err);
  294         return (-1);
  295 }
  296 
  297 /*
  298  * Handle a completed request to a striped or concatenated plex.
  299  */
  300 void
  301 gv_plex_normal_done(struct gv_plex *p, struct bio *bp)
  302 {
  303         struct bio *pbp;
  304 
  305         pbp = bp->bio_parent;
  306         if (pbp->bio_error == 0)
  307                 pbp->bio_error = bp->bio_error;
  308         g_destroy_bio(bp);
  309         pbp->bio_inbed++;
  310         if (pbp->bio_children == pbp->bio_inbed) {
  311                 /* Just set it to length since multiple plexes will
  312                  * screw things up. */
  313                 pbp->bio_completed = pbp->bio_length;
  314                 if (pbp->bio_pflags & GV_BIO_SYNCREQ)
  315                         gv_sync_complete(p, pbp);
  316                 else if (pbp->bio_pflags & GV_BIO_GROW)
  317                         gv_grow_complete(p, pbp);
  318                 else
  319                         g_io_deliver(pbp, pbp->bio_error);
  320         }
  321 }
  322 
  323 /*
  324  * Handle a completed request to a RAID-5 plex.
  325  */
  326 void
  327 gv_plex_raid5_done(struct gv_plex *p, struct bio *bp)
  328 {
  329         struct gv_softc *sc;
  330         struct bio *cbp, *pbp;
  331         struct gv_bioq *bq, *bq2;
  332         struct gv_raid5_packet *wp;
  333         off_t completed;
  334         int i;
  335 
  336         completed = 0;
  337         sc = p->vinumconf;
  338         wp = bp->bio_caller2;
  339 
  340         switch (bp->bio_parent->bio_cmd) {
  341         case BIO_READ:
  342                 if (wp == NULL) {
  343                         completed = bp->bio_completed;
  344                         break;
  345                 }
  346 
  347                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
  348                         if (bq->bp != bp)
  349                                 continue;
  350                         TAILQ_REMOVE(&wp->bits, bq, queue);
  351                         g_free(bq);
  352                         for (i = 0; i < wp->length; i++)
  353                                 wp->data[i] ^= bp->bio_data[i];
  354                         break;
  355                 }
  356                 if (TAILQ_EMPTY(&wp->bits)) {
  357                         completed = wp->length;
  358                         if (wp->lockbase != -1) {
  359                                 TAILQ_REMOVE(&p->packets, wp, list);
  360                                 /* Bring the waiting bios back into the game. */
  361                                 pbp = bioq_takefirst(p->wqueue);
  362                                 while (pbp != NULL) {
  363                                         gv_post_bio(sc, pbp);
  364                                         pbp = bioq_takefirst(p->wqueue);
  365                                 }
  366                         }
  367                         g_free(wp);
  368                 }
  369 
  370                 break;
  371 
  372         case BIO_WRITE:
  373                 /* XXX can this ever happen? */
  374                 if (wp == NULL) {
  375                         completed = bp->bio_completed;
  376                         break;
  377                 }
  378 
  379                 /* Check if we need to handle parity data. */
  380                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
  381                         if (bq->bp != bp)
  382                                 continue;
  383                         TAILQ_REMOVE(&wp->bits, bq, queue);
  384                         g_free(bq);
  385                         cbp = wp->parity;
  386                         if (cbp != NULL) {
  387                                 for (i = 0; i < wp->length; i++)
  388                                         cbp->bio_data[i] ^= bp->bio_data[i];
  389                         }
  390                         break;
  391                 }
  392 
  393                 /* Handle parity data. */
  394                 if (TAILQ_EMPTY(&wp->bits)) {
  395                         if (bp->bio_parent->bio_pflags & GV_BIO_CHECK)
  396                                 i = gv_check_parity(p, bp, wp);
  397                         else
  398                                 i = gv_normal_parity(p, bp, wp);
  399 
  400                         /* All of our sub-requests have finished. */
  401                         if (i) {
  402                                 completed = wp->length;
  403                                 TAILQ_REMOVE(&p->packets, wp, list);
  404                                 /* Bring the waiting bios back into the game. */
  405                                 pbp = bioq_takefirst(p->wqueue);
  406                                 while (pbp != NULL) {
  407                                         gv_post_bio(sc, pbp);
  408                                         pbp = bioq_takefirst(p->wqueue);
  409                                 }
  410                                 g_free(wp);
  411                         }
  412                 }
  413 
  414                 break;
  415         }
  416 
  417         pbp = bp->bio_parent;
  418         if (pbp->bio_error == 0)
  419                 pbp->bio_error = bp->bio_error;
  420         pbp->bio_completed += completed;
  421 
  422         /* When the original request is finished, we deliver it. */
  423         pbp->bio_inbed++;
  424         if (pbp->bio_inbed == pbp->bio_children) {
  425                 /* Hand it over for checking or delivery. */
  426                 if (pbp->bio_cmd == BIO_WRITE &&
  427                     (pbp->bio_pflags & GV_BIO_CHECK)) {
  428                         gv_parity_complete(p, pbp);
  429                 } else if (pbp->bio_cmd == BIO_WRITE &&
  430                     (pbp->bio_pflags & GV_BIO_REBUILD)) {
  431                         gv_rebuild_complete(p, pbp);
  432                 } else if (pbp->bio_pflags & GV_BIO_INIT) {
  433                         gv_init_complete(p, pbp);
  434                 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) {
  435                         gv_sync_complete(p, pbp);
  436                 } else if (pbp->bio_pflags & GV_BIO_GROW) {
  437                         gv_grow_complete(p, pbp);
  438                 } else {
  439                         g_io_deliver(pbp, pbp->bio_error);
  440                 }
  441         }
  442 
  443         /* Clean up what we allocated. */
  444         if (bp->bio_cflags & GV_BIO_MALLOC)
  445                 g_free(bp->bio_data);
  446         g_destroy_bio(bp);
  447 }
  448 
  449 static int
  450 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
  451 {
  452         struct bio *pbp;
  453         struct gv_sd *s;
  454         int err, finished, i;
  455 
  456         err = 0;
  457         finished = 1;
  458 
  459         if (wp->waiting != NULL) {
  460                 pbp = wp->waiting;
  461                 wp->waiting = NULL;
  462                 s = pbp->bio_caller1;
  463                 g_io_request(pbp, s->drive_sc->consumer);
  464                 finished = 0;
  465 
  466         } else if (wp->parity != NULL) {
  467                 pbp = wp->parity;
  468                 wp->parity = NULL;
  469 
  470                 /* Check if the parity is correct. */
  471                 for (i = 0; i < wp->length; i++) {
  472                         if (bp->bio_data[i] != pbp->bio_data[i]) {
  473                                 err = 1;
  474                                 break;
  475                         }
  476                 }
  477 
  478                 /* The parity is not correct... */
  479                 if (err) {
  480                         bp->bio_parent->bio_error = EAGAIN;
  481 
  482                         /* ... but we rebuild it. */
  483                         if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) {
  484                                 s = pbp->bio_caller1;
  485                                 g_io_request(pbp, s->drive_sc->consumer);
  486                                 finished = 0;
  487                         }
  488                 }
  489 
  490                 /*
  491                  * Clean up the BIO we would have used for rebuilding the
  492                  * parity.
  493                  */
  494                 if (finished) {
  495                         bp->bio_parent->bio_inbed++;
  496                         g_destroy_bio(pbp);
  497                 }
  498 
  499         }
  500 
  501         return (finished);
  502 }
  503 
  504 static int
  505 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
  506 {
  507         struct bio *cbp, *pbp;
  508         struct gv_sd *s;
  509         int finished, i;
  510 
  511         finished = 1;
  512 
  513         if (wp->waiting != NULL) {
  514                 pbp = wp->waiting;
  515                 wp->waiting = NULL;
  516                 cbp = wp->parity;
  517                 for (i = 0; i < wp->length; i++)
  518                         cbp->bio_data[i] ^= pbp->bio_data[i];
  519                 s = pbp->bio_caller1;
  520                 g_io_request(pbp, s->drive_sc->consumer);
  521                 finished = 0;
  522 
  523         } else if (wp->parity != NULL) {
  524                 cbp = wp->parity;
  525                 wp->parity = NULL;
  526                 s = cbp->bio_caller1;
  527                 g_io_request(cbp, s->drive_sc->consumer);
  528                 finished = 0;
  529         }
  530 
  531         return (finished);
  532 }
  533 
  534 /* Flush the queue with delayed requests. */
  535 static void
  536 gv_plex_flush(struct gv_plex *p)
  537 {
  538         struct gv_softc *sc;
  539         struct bio *bp;
  540 
  541         sc = p->vinumconf;
  542         bp = bioq_takefirst(p->rqueue);
  543         while (bp != NULL) {
  544                 gv_plex_start(p, bp);
  545                 bp = bioq_takefirst(p->rqueue);
  546         }
  547 }
  548 
  549 static void
  550 gv_post_bio(struct gv_softc *sc, struct bio *bp)
  551 {
  552 
  553         KASSERT(sc != NULL, ("NULL sc"));
  554         KASSERT(bp != NULL, ("NULL bp"));
  555         mtx_lock(&sc->bqueue_mtx);
  556         bioq_disksort(sc->bqueue_down, bp);
  557         wakeup(sc);
  558         mtx_unlock(&sc->bqueue_mtx);
  559 }
  560 
  561 int
  562 gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset,
  563     off_t length, int type, caddr_t data)
  564 {
  565         struct gv_softc *sc;
  566         struct bio *bp;
  567 
  568         KASSERT(from != NULL, ("NULL from"));
  569         KASSERT(to != NULL, ("NULL to"));
  570         sc = from->vinumconf;
  571         KASSERT(sc != NULL, ("NULL sc"));
  572 
  573         bp = g_new_bio();
  574         if (bp == NULL) {
  575                 G_VINUM_DEBUG(0, "sync from '%s' failed at offset "
  576                     " %jd; out of memory", from->name, offset);
  577                 return (ENOMEM);
  578         }
  579         bp->bio_length = length;
  580         bp->bio_done = gv_done;
  581         bp->bio_pflags |= GV_BIO_SYNCREQ;
  582         bp->bio_offset = offset;
  583         bp->bio_caller1 = from;         
  584         bp->bio_caller2 = to;
  585         bp->bio_cmd = type;
  586         if (data == NULL)
  587                 data = g_malloc(length, M_WAITOK);
  588         bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */
  589         bp->bio_data = data;
  590 
  591         /* Send down next. */
  592         gv_post_bio(sc, bp);
  593         //gv_plex_start(from, bp);
  594         return (0);
  595 }
  596 
  597 /*
  598  * Handle a finished plex sync bio.
  599  */
  600 int
  601 gv_sync_complete(struct gv_plex *to, struct bio *bp)
  602 {
  603         struct gv_plex *from, *p;
  604         struct gv_sd *s;
  605         struct gv_volume *v;
  606         struct gv_softc *sc;
  607         off_t offset;
  608         int err;
  609 
  610         g_topology_assert_not();
  611 
  612         err = 0;
  613         KASSERT(to != NULL, ("NULL to"));
  614         KASSERT(bp != NULL, ("NULL bp"));
  615         from = bp->bio_caller2;
  616         KASSERT(from != NULL, ("NULL from"));
  617         v = to->vol_sc;
  618         KASSERT(v != NULL, ("NULL v"));
  619         sc = v->vinumconf;
  620         KASSERT(sc != NULL, ("NULL sc"));
  621 
  622         /* If it was a read, write it. */
  623         if (bp->bio_cmd == BIO_READ) {
  624                 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length,
  625                     BIO_WRITE, bp->bio_data);
  626         /* If it was a write, read the next one. */
  627         } else if (bp->bio_cmd == BIO_WRITE) {
  628                 if (bp->bio_pflags & GV_BIO_MALLOC)
  629                         g_free(bp->bio_data);
  630                 to->synced += bp->bio_length;
  631                 /* If we're finished, clean up. */
  632                 if (bp->bio_offset + bp->bio_length >= from->size) {
  633                         G_VINUM_DEBUG(1, "syncing of %s from %s completed",
  634                             to->name, from->name);
  635                         /* Update our state. */
  636                         LIST_FOREACH(s, &to->subdisks, in_plex)
  637                                 gv_set_sd_state(s, GV_SD_UP, 0);
  638                         gv_update_plex_state(to);
  639                         to->flags &= ~GV_PLEX_SYNCING;
  640                         to->synced = 0;
  641                         gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
  642                 } else {
  643                         offset = bp->bio_offset + bp->bio_length;
  644                         err = gv_sync_request(from, to, offset,
  645                             MIN(bp->bio_length, from->size - offset),
  646                             BIO_READ, NULL);
  647                 }
  648         }
  649         g_destroy_bio(bp);
  650         /* Clean up if there was an error. */
  651         if (err) {
  652                 to->flags &= ~GV_PLEX_SYNCING;
  653                 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err);
  654         }
  655 
  656         /* Check if all plexes are synced, and lower refcounts. */
  657         g_topology_lock();
  658         LIST_FOREACH(p, &v->plexes, in_volume) {
  659                 if (p->flags & GV_PLEX_SYNCING) {
  660                         g_topology_unlock();
  661                         return (-1);
  662                 }
  663         }
  664         /* If we came here, all plexes are synced, and we're free. */
  665         gv_access(v->provider, -1, -1, 0);
  666         g_topology_unlock();
  667         G_VINUM_DEBUG(1, "plex sync completed");
  668         gv_volume_flush(v);
  669         return (0);
  670 }
  671 
  672 /*
  673  * Create a new bio struct for the next grow request.
  674  */
  675 int
  676 gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type,
  677     caddr_t data)
  678 {
  679         struct gv_softc *sc;
  680         struct bio *bp;
  681 
  682         KASSERT(p != NULL, ("gv_grow_request: NULL p"));
  683         sc = p->vinumconf;
  684         KASSERT(sc != NULL, ("gv_grow_request: NULL sc"));
  685 
  686         bp = g_new_bio();
  687         if (bp == NULL) {
  688                 G_VINUM_DEBUG(0, "grow of %s failed creating bio: "
  689                     "out of memory", p->name);
  690                 return (ENOMEM);
  691         }
  692 
  693         bp->bio_cmd = type;
  694         bp->bio_done = gv_done;
  695         bp->bio_error = 0;
  696         bp->bio_caller1 = p;
  697         bp->bio_offset = offset;
  698         bp->bio_length = length;
  699         bp->bio_pflags |= GV_BIO_GROW;
  700         if (data == NULL)
  701                 data = g_malloc(length, M_WAITOK);
  702         bp->bio_pflags |= GV_BIO_MALLOC;
  703         bp->bio_data = data;
  704 
  705         gv_post_bio(sc, bp);
  706         //gv_plex_start(p, bp);
  707         return (0);
  708 }
  709 
  710 /*
  711  * Finish handling of a bio to a growing plex.
  712  */
  713 void
  714 gv_grow_complete(struct gv_plex *p, struct bio *bp)
  715 {
  716         struct gv_softc *sc;
  717         struct gv_sd *s;
  718         struct gv_volume *v;
  719         off_t origsize, offset;
  720         int sdcount, err;
  721 
  722         v = p->vol_sc;
  723         KASSERT(v != NULL, ("gv_grow_complete: NULL v"));
  724         sc = v->vinumconf;
  725         KASSERT(sc != NULL, ("gv_grow_complete: NULL sc"));
  726         err = 0;
  727 
  728         /* If it was a read, write it. */
  729         if (bp->bio_cmd == BIO_READ) {
  730                 p->synced += bp->bio_length;
  731                 err = gv_grow_request(p, bp->bio_offset, bp->bio_length,
  732                     BIO_WRITE, bp->bio_data);
  733         /* If it was a write, read next. */
  734         } else if (bp->bio_cmd == BIO_WRITE) {
  735                 if (bp->bio_pflags & GV_BIO_MALLOC)
  736                         g_free(bp->bio_data);
  737 
  738                 /* Find the real size of the plex. */
  739                 sdcount = gv_sdcount(p, 1);
  740                 s = LIST_FIRST(&p->subdisks);
  741                 KASSERT(s != NULL, ("NULL s"));
  742                 origsize = (s->size * (sdcount - 1));
  743                 if (bp->bio_offset + bp->bio_length >= origsize) {
  744                         G_VINUM_DEBUG(1, "growing of %s completed", p->name);
  745                         p->flags &= ~GV_PLEX_GROWING;
  746                         LIST_FOREACH(s, &p->subdisks, in_plex) {
  747                                 s->flags &= ~GV_SD_GROW;
  748                                 gv_set_sd_state(s, GV_SD_UP, 0);
  749                         }
  750                         p->size = gv_plex_size(p);
  751                         gv_update_vol_size(v, gv_vol_size(v));
  752                         gv_set_plex_state(p, GV_PLEX_UP, 0);
  753                         g_topology_lock();
  754                         gv_access(v->provider, -1, -1, 0);
  755                         g_topology_unlock();
  756                         p->synced = 0;
  757                         gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
  758                         /* Issue delayed requests. */
  759                         gv_plex_flush(p);
  760                 } else {
  761                         offset = bp->bio_offset + bp->bio_length;
  762                         err = gv_grow_request(p, offset,
  763                            MIN(bp->bio_length, origsize - offset),
  764                            BIO_READ, NULL);
  765                 }
  766         }
  767         g_destroy_bio(bp);
  768 
  769         if (err) {
  770                 p->flags &= ~GV_PLEX_GROWING;
  771                 G_VINUM_DEBUG(0, "error growing plex: error code %d", err);
  772         }
  773 }
  774 
  775 
  776 /*
  777  * Create an initialization BIO and send it off to the consumer. Assume that
  778  * we're given initialization data as parameter.
  779  */
  780 void
  781 gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length)
  782 {
  783         struct gv_drive *d;
  784         struct g_consumer *cp;
  785         struct bio *bp, *cbp;
  786 
  787         KASSERT(s != NULL, ("gv_init_request: NULL s"));
  788         d = s->drive_sc;
  789         KASSERT(d != NULL, ("gv_init_request: NULL d"));
  790         cp = d->consumer;
  791         KASSERT(cp != NULL, ("gv_init_request: NULL cp"));
  792 
  793         bp = g_new_bio();
  794         if (bp == NULL) {
  795                 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
  796                     " (drive offset %jd); out of memory", s->name,
  797                     (intmax_t)s->initialized, (intmax_t)start);
  798                 return; /* XXX: Error codes. */
  799         }
  800         bp->bio_cmd = BIO_WRITE;
  801         bp->bio_data = data;
  802         bp->bio_done = gv_done;
  803         bp->bio_error = 0;
  804         bp->bio_length = length;
  805         bp->bio_pflags |= GV_BIO_INIT;
  806         bp->bio_offset = start;
  807         bp->bio_caller1 = s;
  808 
  809         /* Then ofcourse, we have to clone it. */
  810         cbp = g_clone_bio(bp);
  811         if (cbp == NULL) {
  812                 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
  813                     " (drive offset %jd); out of memory", s->name,
  814                     (intmax_t)s->initialized, (intmax_t)start);
  815                 return; /* XXX: Error codes. */
  816         }
  817         cbp->bio_done = gv_done;
  818         cbp->bio_caller1 = s;
  819         /* Send it off to the consumer. */
  820         g_io_request(cbp, cp);
  821 }
  822 
  823 /*
  824  * Handle a finished initialization BIO.
  825  */
  826 void
  827 gv_init_complete(struct gv_plex *p, struct bio *bp)
  828 {
  829         struct gv_softc *sc;
  830         struct gv_drive *d;
  831         struct g_consumer *cp;
  832         struct gv_sd *s;
  833         off_t start, length;
  834         caddr_t data;
  835         int error;
  836 
  837         s = bp->bio_caller1;
  838         start = bp->bio_offset;
  839         length = bp->bio_length;
  840         error = bp->bio_error;
  841         data = bp->bio_data;
  842 
  843         KASSERT(s != NULL, ("gv_init_complete: NULL s"));
  844         d = s->drive_sc;
  845         KASSERT(d != NULL, ("gv_init_complete: NULL d"));
  846         cp = d->consumer;
  847         KASSERT(cp != NULL, ("gv_init_complete: NULL cp"));
  848         sc = p->vinumconf;
  849         KASSERT(sc != NULL, ("gv_init_complete: NULL sc"));
  850 
  851         g_destroy_bio(bp);
  852 
  853         /*
  854          * First we need to find out if it was okay, and abort if it's not.
  855          * Then we need to free previous buffers, find out the correct subdisk,
  856          * as well as getting the correct starting point and length of the BIO.
  857          */
  858         if (start >= s->drive_offset + s->size) {
  859                 /* Free the data we initialized. */
  860                 if (data != NULL)
  861                         g_free(data);
  862                 g_topology_assert_not();
  863                 g_topology_lock();
  864                 g_access(cp, 0, -1, 0);
  865                 g_topology_unlock();
  866                 if (error) {
  867                         gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE |
  868                             GV_SETSTATE_CONFIG);
  869                 } else {
  870                         gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
  871                         s->initialized = 0;
  872                         gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
  873                         G_VINUM_DEBUG(1, "subdisk '%s' init: finished "
  874                             "successfully", s->name);
  875                 }
  876                 return;
  877         }
  878         s->initialized += length;
  879         start += length;
  880         gv_init_request(s, start, data, length);
  881 }
  882 
  883 /*
  884  * Create a new bio struct for the next parity rebuild. Used both by internal
  885  * rebuild of degraded plexes as well as user initiated rebuilds/checks.
  886  */
  887 void
  888 gv_parity_request(struct gv_plex *p, int flags, off_t offset)
  889 {
  890         struct gv_softc *sc;
  891         struct bio *bp;
  892 
  893         KASSERT(p != NULL, ("gv_parity_request: NULL p"));
  894         sc = p->vinumconf;
  895         KASSERT(sc != NULL, ("gv_parity_request: NULL sc"));
  896 
  897         bp = g_new_bio();
  898         if (bp == NULL) {
  899                 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: "
  900                     "out of memory", p->name);
  901                 return;
  902         }
  903 
  904         bp->bio_cmd = BIO_WRITE;
  905         bp->bio_done = gv_done;
  906         bp->bio_error = 0;
  907         bp->bio_length = p->stripesize;
  908         bp->bio_caller1 = p;
  909 
  910         /*
  911          * Check if it's a rebuild of a degraded plex or a user request of
  912          * parity rebuild.
  913          */
  914         if (flags & GV_BIO_REBUILD)
  915                 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK);
  916         else if (flags & GV_BIO_CHECK)
  917                 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
  918         else {
  919                 G_VINUM_DEBUG(0, "invalid flags given in rebuild");
  920                 return;
  921         }
  922 
  923         bp->bio_pflags = flags;
  924         bp->bio_pflags |= GV_BIO_MALLOC;
  925 
  926         /* We still have more parity to build. */
  927         bp->bio_offset = offset;
  928         gv_post_bio(sc, bp);
  929         //gv_plex_start(p, bp); /* Send it down to the plex. */
  930 }
  931 
  932 /*
  933  * Handle a finished parity write.
  934  */
  935 void
  936 gv_parity_complete(struct gv_plex *p, struct bio *bp)
  937 {
  938         struct gv_softc *sc;
  939         int error, flags;
  940 
  941         error = bp->bio_error;
  942         flags = bp->bio_pflags;
  943         flags &= ~GV_BIO_MALLOC;
  944 
  945         sc = p->vinumconf;
  946         KASSERT(sc != NULL, ("gv_parity_complete: NULL sc"));
  947 
  948         /* Clean up what we allocated. */
  949         if (bp->bio_pflags & GV_BIO_MALLOC)
  950                 g_free(bp->bio_data);
  951         g_destroy_bio(bp);
  952 
  953         if (error == EAGAIN) {
  954                 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx",
  955                     (intmax_t)p->synced);
  956         }
  957 
  958         /* Any error is fatal, except EAGAIN when we're rebuilding. */
  959         if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) {
  960                 /* Make sure we don't have the lock. */
  961                 g_topology_assert_not();
  962                 g_topology_lock();
  963                 gv_access(p->vol_sc->provider, -1, -1, 0);
  964                 g_topology_unlock();
  965                 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx "
  966                     "errno %d", p->name, (intmax_t)p->synced, error);
  967                 return;
  968         } else {
  969                 p->synced += p->stripesize;
  970         }
  971 
  972         if (p->synced >= p->size) {
  973                 /* Make sure we don't have the lock. */
  974                 g_topology_assert_not();
  975                 g_topology_lock();
  976                 gv_access(p->vol_sc->provider, -1, -1, 0);
  977                 g_topology_unlock();
  978                 /* We're finished. */
  979                 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name);
  980                 p->synced = 0;
  981                 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
  982                 return;
  983         }
  984 
  985         /* Send down next. It will determine if we need to itself. */
  986         gv_parity_request(p, flags, p->synced);
  987 }
  988 
  989 /*
  990  * Handle a finished plex rebuild bio.
  991  */
  992 void
  993 gv_rebuild_complete(struct gv_plex *p, struct bio *bp)
  994 {
  995         struct gv_softc *sc;
  996         struct gv_sd *s;
  997         int error, flags;
  998         off_t offset;
  999 
 1000         error = bp->bio_error;
 1001         flags = bp->bio_pflags;
 1002         offset = bp->bio_offset;
 1003         flags &= ~GV_BIO_MALLOC;
 1004         sc = p->vinumconf;
 1005         KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc"));
 1006 
 1007         /* Clean up what we allocated. */
 1008         if (bp->bio_pflags & GV_BIO_MALLOC)
 1009                 g_free(bp->bio_data);
 1010         g_destroy_bio(bp);
 1011 
 1012         if (error) {
 1013                 g_topology_assert_not();
 1014                 g_topology_lock();
 1015                 gv_access(p->vol_sc->provider, -1, -1, 0);
 1016                 g_topology_unlock();
 1017         
 1018                 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d",
 1019                     p->name, (intmax_t)offset, error);
 1020                 p->flags &= ~GV_PLEX_REBUILDING;
 1021                 p->synced = 0;
 1022                 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
 1023                 return;
 1024         }
 1025 
 1026         offset += (p->stripesize * (gv_sdcount(p, 1) - 1));
 1027         if (offset >= p->size) {
 1028                 /* We're finished. */
 1029                 g_topology_assert_not();
 1030                 g_topology_lock();
 1031                 gv_access(p->vol_sc->provider, -1, -1, 0);
 1032                 g_topology_unlock();
 1033         
 1034                 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name);
 1035                 gv_save_config(p->vinumconf);
 1036                 p->flags &= ~GV_PLEX_REBUILDING;
 1037                 p->synced = 0;
 1038                 /* Try to up all subdisks. */
 1039                 LIST_FOREACH(s, &p->subdisks, in_plex)
 1040                         gv_update_sd_state(s);
 1041                 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 1042                 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
 1043                 return;
 1044         }
 1045 
 1046         /* Send down next. It will determine if we need to itself. */
 1047         gv_parity_request(p, flags, offset);
 1048 }

Cache object: a27975a7942922a27fec7ef67e2a70ae


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.