The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/vinum/geom_vinum_plex.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2004, 2007 Lukas Ertl
    5  * Copyright (c) 2007, 2009 Ulf Lilleengen
    6  * All rights reserved.
    7  *
    8  * Redistribution and use in source and binary forms, with or without
    9  * modification, are permitted provided that the following conditions
   10  * are met:
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in the
   15  *    documentation and/or other materials provided with the distribution.
   16  *
   17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   27  * SUCH DAMAGE.
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include <sys/param.h>
   34 #include <sys/bio.h>
   35 #include <sys/lock.h>
   36 #include <sys/malloc.h>
   37 #include <sys/systm.h>
   38 
   39 #include <geom/geom.h>
   40 #include <geom/geom_dbg.h>
   41 #include <geom/vinum/geom_vinum_var.h>
   42 #include <geom/vinum/geom_vinum_raid5.h>
   43 #include <geom/vinum/geom_vinum.h>
   44 
   45 static int      gv_check_parity(struct gv_plex *, struct bio *,
   46                     struct gv_raid5_packet *);
   47 static int      gv_normal_parity(struct gv_plex *, struct bio *,
   48                     struct gv_raid5_packet *);
   49 static void     gv_plex_flush(struct gv_plex *);
   50 static int      gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *,
   51                     int *, int);
   52 static int      gv_plex_normal_request(struct gv_plex *, struct bio *, off_t,
   53                     off_t,  caddr_t);
   54 static void     gv_post_bio(struct gv_softc *, struct bio *);
   55 
   56 void
   57 gv_plex_start(struct gv_plex *p, struct bio *bp)
   58 {
   59         struct bio *cbp;
   60         struct gv_sd *s;
   61         struct gv_raid5_packet *wp;
   62         caddr_t addr;
   63         off_t bcount, boff, len;
   64 
   65         bcount = bp->bio_length;
   66         addr = bp->bio_data;
   67         boff = bp->bio_offset;
   68 
   69         /* Walk over the whole length of the request, we might split it up. */
   70         while (bcount > 0) {
   71                 wp = NULL;
   72 
   73                 /*
   74                  * RAID5 plexes need special treatment, as a single request
   75                  * might involve several read/write sub-requests.
   76                  */
   77                 if (p->org == GV_PLEX_RAID5) {
   78                         wp = gv_raid5_start(p, bp, addr, boff, bcount);
   79                         if (wp == NULL)
   80                                 return;
   81 
   82                         len = wp->length;
   83 
   84                         if (TAILQ_EMPTY(&wp->bits))
   85                                 g_free(wp);
   86                         else if (wp->lockbase != -1)
   87                                 TAILQ_INSERT_TAIL(&p->packets, wp, list);
   88 
   89                 /*
   90                  * Requests to concatenated and striped plexes go straight
   91                  * through.
   92                  */
   93                 } else {
   94                         len = gv_plex_normal_request(p, bp, boff, bcount, addr);
   95                 }
   96                 if (len < 0)
   97                         return;
   98                         
   99                 bcount -= len;
  100                 addr += len;
  101                 boff += len;
  102         }
  103 
  104         /*
  105          * Fire off all sub-requests.  We get the correct consumer (== drive)
  106          * to send each request to via the subdisk that was stored in
  107          * cbp->bio_caller1.
  108          */
  109         cbp = bioq_takefirst(p->bqueue);
  110         while (cbp != NULL) {
  111                 /*
  112                  * RAID5 sub-requests need to come in correct order, otherwise
  113                  * we trip over the parity, as it might be overwritten by
  114                  * another sub-request.  We abuse cbp->bio_caller2 to mark
  115                  * potential overlap situations. 
  116                  */
  117                 if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) {
  118                         /* Park the bio on the waiting queue. */
  119                         cbp->bio_pflags |= GV_BIO_ONHOLD;
  120                         bioq_disksort(p->wqueue, cbp);
  121                 } else {
  122                         s = cbp->bio_caller1;
  123                         g_io_request(cbp, s->drive_sc->consumer);
  124                 }
  125                 cbp = bioq_takefirst(p->bqueue);
  126         }
  127 }
  128 
  129 static int
  130 gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
  131     off_t *real_len, int *sdno, int growing)
  132 {
  133         struct gv_sd *s;
  134         int i, sdcount;
  135         off_t len_left, stripeend, stripeno, stripestart;
  136 
  137         switch (p->org) {
  138         case GV_PLEX_CONCAT:
  139                 /*
  140                  * Find the subdisk where this request starts.  The subdisks in
  141                  * this list must be ordered by plex_offset.
  142                  */
  143                 i = 0;
  144                 LIST_FOREACH(s, &p->subdisks, in_plex) {
  145                         if (s->plex_offset <= boff &&
  146                             s->plex_offset + s->size > boff) {
  147                                 *sdno = i;
  148                                 break;
  149                         }
  150                         i++;
  151                 }
  152                 if (s == NULL || s->drive_sc == NULL)
  153                         return (GV_ERR_NOTFOUND);
  154 
  155                 /* Calculate corresponding offsets on disk. */
  156                 *real_off = boff - s->plex_offset;
  157                 len_left = s->size - (*real_off);
  158                 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
  159                 *real_len = (bcount > len_left) ? len_left : bcount;
  160                 break;
  161 
  162         case GV_PLEX_STRIPED:
  163                 /* The number of the stripe where the request starts. */
  164                 stripeno = boff / p->stripesize;
  165                 KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0"));
  166 
  167                 /* Take growing subdisks into account when calculating. */
  168                 sdcount = gv_sdcount(p, (boff >= p->synced));
  169 
  170                 if (!(boff + bcount <= p->synced) &&
  171                     (p->flags & GV_PLEX_GROWING) &&
  172                     !growing)
  173                         return (GV_ERR_ISBUSY);
  174                 *sdno = stripeno % sdcount;
  175 
  176                 KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0"));
  177                 stripestart = (stripeno / sdcount) *
  178                     p->stripesize;
  179                 KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0"));
  180                 stripeend = stripestart + p->stripesize;
  181                 *real_off = boff - (stripeno * p->stripesize) +
  182                     stripestart;
  183                 len_left = stripeend - *real_off;
  184                 KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
  185 
  186                 *real_len = (bcount <= len_left) ? bcount : len_left;
  187                 break;
  188 
  189         default:
  190                 return (GV_ERR_PLEXORG);
  191         }
  192         return (0);
  193 }
  194 
  195 /*
  196  * Prepare a normal plex request.
  197  */
  198 static int 
  199 gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
  200     off_t bcount,  caddr_t addr)
  201 {
  202         struct gv_sd *s;
  203         struct bio *cbp;
  204         off_t real_len, real_off;
  205         int i, err, sdno;
  206 
  207         s = NULL;
  208         sdno = -1;
  209         real_len = real_off = 0;
  210 
  211         err = ENXIO;
  212 
  213         if (p == NULL || LIST_EMPTY(&p->subdisks)) 
  214                 goto bad;
  215 
  216         err = gv_plex_offset(p, boff, bcount, &real_off,
  217             &real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW));
  218         /* If the request was blocked, put it into wait. */
  219         if (err == GV_ERR_ISBUSY) {
  220                 bioq_disksort(p->rqueue, bp);
  221                 return (-1); /* "Fail", and delay request. */
  222         }
  223         if (err) {
  224                 err = ENXIO;
  225                 goto bad;
  226         }
  227         err = ENXIO;
  228 
  229         /* Find the right subdisk. */
  230         i = 0;
  231         LIST_FOREACH(s, &p->subdisks, in_plex) {
  232                 if (i == sdno)
  233                         break;
  234                 i++;
  235         }
  236 
  237         /* Subdisk not found. */
  238         if (s == NULL || s->drive_sc == NULL)
  239                 goto bad;
  240 
  241         /* Now check if we can handle the request on this subdisk. */
  242         switch (s->state) {
  243         case GV_SD_UP:
  244                 /* If the subdisk is up, just continue. */
  245                 break;
  246         case GV_SD_DOWN:
  247                 if (bp->bio_pflags & GV_BIO_INTERNAL)
  248                         G_VINUM_DEBUG(0, "subdisk must be in the stale state in"
  249                             " order to perform administrative requests");
  250                 goto bad;
  251         case GV_SD_STALE:
  252                 if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) {
  253                         G_VINUM_DEBUG(0, "subdisk stale, unable to perform "
  254                             "regular requests");
  255                         goto bad;
  256                 }
  257 
  258                 G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
  259                 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
  260                 break;
  261         case GV_SD_INITIALIZING:
  262                 if (bp->bio_cmd == BIO_READ)
  263                         goto bad;
  264                 break;
  265         default:
  266                 /* All other subdisk states mean it's not accessible. */
  267                 goto bad;
  268         }
  269 
  270         /* Clone the bio and adjust the offsets and sizes. */
  271         cbp = g_clone_bio(bp);
  272         if (cbp == NULL) {
  273                 err = ENOMEM;
  274                 goto bad;
  275         }
  276         cbp->bio_offset = real_off + s->drive_offset;
  277         cbp->bio_length = real_len;
  278         cbp->bio_data = addr;
  279         cbp->bio_done = gv_done;
  280         cbp->bio_caller1 = s;
  281         s->drive_sc->active++;
  282 
  283         /* Store the sub-requests now and let others issue them. */
  284         bioq_insert_tail(p->bqueue, cbp); 
  285         return (real_len);
  286 bad:
  287         G_VINUM_LOGREQ(0, bp, "plex request failed.");
  288         /* Building the sub-request failed. If internal BIO, do not deliver. */
  289         if (bp->bio_pflags & GV_BIO_INTERNAL) {
  290                 if (bp->bio_pflags & GV_BIO_MALLOC)
  291                         g_free(bp->bio_data);
  292                 g_destroy_bio(bp);
  293                 p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
  294                     GV_PLEX_GROWING);
  295                 return (-1);
  296         }
  297         g_io_deliver(bp, err);
  298         return (-1);
  299 }
  300 
  301 /*
  302  * Handle a completed request to a striped or concatenated plex.
  303  */
  304 void
  305 gv_plex_normal_done(struct gv_plex *p, struct bio *bp)
  306 {
  307         struct bio *pbp;
  308 
  309         pbp = bp->bio_parent;
  310         if (pbp->bio_error == 0)
  311                 pbp->bio_error = bp->bio_error;
  312         g_destroy_bio(bp);
  313         pbp->bio_inbed++;
  314         if (pbp->bio_children == pbp->bio_inbed) {
  315                 /* Just set it to length since multiple plexes will
  316                  * screw things up. */
  317                 pbp->bio_completed = pbp->bio_length;
  318                 if (pbp->bio_pflags & GV_BIO_SYNCREQ)
  319                         gv_sync_complete(p, pbp);
  320                 else if (pbp->bio_pflags & GV_BIO_GROW)
  321                         gv_grow_complete(p, pbp);
  322                 else
  323                         g_io_deliver(pbp, pbp->bio_error);
  324         }
  325 }
  326 
  327 /*
  328  * Handle a completed request to a RAID-5 plex.
  329  */
  330 void
  331 gv_plex_raid5_done(struct gv_plex *p, struct bio *bp)
  332 {
  333         struct gv_softc *sc;
  334         struct bio *cbp, *pbp;
  335         struct gv_bioq *bq, *bq2;
  336         struct gv_raid5_packet *wp;
  337         off_t completed;
  338         int i;
  339 
  340         completed = 0;
  341         sc = p->vinumconf;
  342         wp = bp->bio_caller2;
  343 
  344         switch (bp->bio_parent->bio_cmd) {
  345         case BIO_READ:
  346                 if (wp == NULL) {
  347                         completed = bp->bio_completed;
  348                         break;
  349                 }
  350 
  351                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
  352                         if (bq->bp != bp)
  353                                 continue;
  354                         TAILQ_REMOVE(&wp->bits, bq, queue);
  355                         g_free(bq);
  356                         for (i = 0; i < wp->length; i++)
  357                                 wp->data[i] ^= bp->bio_data[i];
  358                         break;
  359                 }
  360                 if (TAILQ_EMPTY(&wp->bits)) {
  361                         completed = wp->length;
  362                         if (wp->lockbase != -1) {
  363                                 TAILQ_REMOVE(&p->packets, wp, list);
  364                                 /* Bring the waiting bios back into the game. */
  365                                 pbp = bioq_takefirst(p->wqueue);
  366                                 while (pbp != NULL) {
  367                                         gv_post_bio(sc, pbp);
  368                                         pbp = bioq_takefirst(p->wqueue);
  369                                 }
  370                         }
  371                         g_free(wp);
  372                 }
  373 
  374                 break;
  375 
  376         case BIO_WRITE:
  377                 /* XXX can this ever happen? */
  378                 if (wp == NULL) {
  379                         completed = bp->bio_completed;
  380                         break;
  381                 }
  382 
  383                 /* Check if we need to handle parity data. */
  384                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
  385                         if (bq->bp != bp)
  386                                 continue;
  387                         TAILQ_REMOVE(&wp->bits, bq, queue);
  388                         g_free(bq);
  389                         cbp = wp->parity;
  390                         if (cbp != NULL) {
  391                                 for (i = 0; i < wp->length; i++)
  392                                         cbp->bio_data[i] ^= bp->bio_data[i];
  393                         }
  394                         break;
  395                 }
  396 
  397                 /* Handle parity data. */
  398                 if (TAILQ_EMPTY(&wp->bits)) {
  399                         if (bp->bio_parent->bio_pflags & GV_BIO_CHECK)
  400                                 i = gv_check_parity(p, bp, wp);
  401                         else
  402                                 i = gv_normal_parity(p, bp, wp);
  403 
  404                         /* All of our sub-requests have finished. */
  405                         if (i) {
  406                                 completed = wp->length;
  407                                 TAILQ_REMOVE(&p->packets, wp, list);
  408                                 /* Bring the waiting bios back into the game. */
  409                                 pbp = bioq_takefirst(p->wqueue);
  410                                 while (pbp != NULL) {
  411                                         gv_post_bio(sc, pbp);
  412                                         pbp = bioq_takefirst(p->wqueue);
  413                                 }
  414                                 g_free(wp);
  415                         }
  416                 }
  417 
  418                 break;
  419         }
  420 
  421         pbp = bp->bio_parent;
  422         if (pbp->bio_error == 0)
  423                 pbp->bio_error = bp->bio_error;
  424         pbp->bio_completed += completed;
  425 
  426         /* When the original request is finished, we deliver it. */
  427         pbp->bio_inbed++;
  428         if (pbp->bio_inbed == pbp->bio_children) {
  429                 /* Hand it over for checking or delivery. */
  430                 if (pbp->bio_cmd == BIO_WRITE &&
  431                     (pbp->bio_pflags & GV_BIO_CHECK)) {
  432                         gv_parity_complete(p, pbp);
  433                 } else if (pbp->bio_cmd == BIO_WRITE &&
  434                     (pbp->bio_pflags & GV_BIO_REBUILD)) {
  435                         gv_rebuild_complete(p, pbp);
  436                 } else if (pbp->bio_pflags & GV_BIO_INIT) {
  437                         gv_init_complete(p, pbp);
  438                 } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) {
  439                         gv_sync_complete(p, pbp);
  440                 } else if (pbp->bio_pflags & GV_BIO_GROW) {
  441                         gv_grow_complete(p, pbp);
  442                 } else {
  443                         g_io_deliver(pbp, pbp->bio_error);
  444                 }
  445         }
  446 
  447         /* Clean up what we allocated. */
  448         if (bp->bio_cflags & GV_BIO_MALLOC)
  449                 g_free(bp->bio_data);
  450         g_destroy_bio(bp);
  451 }
  452 
  453 static int
  454 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
  455 {
  456         struct bio *pbp;
  457         struct gv_sd *s;
  458         int err, finished, i;
  459 
  460         err = 0;
  461         finished = 1;
  462 
  463         if (wp->waiting != NULL) {
  464                 pbp = wp->waiting;
  465                 wp->waiting = NULL;
  466                 s = pbp->bio_caller1;
  467                 g_io_request(pbp, s->drive_sc->consumer);
  468                 finished = 0;
  469 
  470         } else if (wp->parity != NULL) {
  471                 pbp = wp->parity;
  472                 wp->parity = NULL;
  473 
  474                 /* Check if the parity is correct. */
  475                 for (i = 0; i < wp->length; i++) {
  476                         if (bp->bio_data[i] != pbp->bio_data[i]) {
  477                                 err = 1;
  478                                 break;
  479                         }
  480                 }
  481 
  482                 /* The parity is not correct... */
  483                 if (err) {
  484                         bp->bio_parent->bio_error = EAGAIN;
  485 
  486                         /* ... but we rebuild it. */
  487                         if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) {
  488                                 s = pbp->bio_caller1;
  489                                 g_io_request(pbp, s->drive_sc->consumer);
  490                                 finished = 0;
  491                         }
  492                 }
  493 
  494                 /*
  495                  * Clean up the BIO we would have used for rebuilding the
  496                  * parity.
  497                  */
  498                 if (finished) {
  499                         bp->bio_parent->bio_inbed++;
  500                         g_destroy_bio(pbp);
  501                 }
  502         }
  503 
  504         return (finished);
  505 }
  506 
  507 static int
  508 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
  509 {
  510         struct bio *cbp, *pbp;
  511         struct gv_sd *s;
  512         int finished, i;
  513 
  514         finished = 1;
  515 
  516         if (wp->waiting != NULL) {
  517                 pbp = wp->waiting;
  518                 wp->waiting = NULL;
  519                 cbp = wp->parity;
  520                 for (i = 0; i < wp->length; i++)
  521                         cbp->bio_data[i] ^= pbp->bio_data[i];
  522                 s = pbp->bio_caller1;
  523                 g_io_request(pbp, s->drive_sc->consumer);
  524                 finished = 0;
  525 
  526         } else if (wp->parity != NULL) {
  527                 cbp = wp->parity;
  528                 wp->parity = NULL;
  529                 s = cbp->bio_caller1;
  530                 g_io_request(cbp, s->drive_sc->consumer);
  531                 finished = 0;
  532         }
  533 
  534         return (finished);
  535 }
  536 
  537 /* Flush the queue with delayed requests. */
  538 static void
  539 gv_plex_flush(struct gv_plex *p)
  540 {
  541         struct bio *bp;
  542 
  543         bp = bioq_takefirst(p->rqueue);
  544         while (bp != NULL) {
  545                 gv_plex_start(p, bp);
  546                 bp = bioq_takefirst(p->rqueue);
  547         }
  548 }
  549 
  550 static void
  551 gv_post_bio(struct gv_softc *sc, struct bio *bp)
  552 {
  553 
  554         KASSERT(sc != NULL, ("NULL sc"));
  555         KASSERT(bp != NULL, ("NULL bp"));
  556         mtx_lock(&sc->bqueue_mtx);
  557         bioq_disksort(sc->bqueue_down, bp);
  558         wakeup(sc);
  559         mtx_unlock(&sc->bqueue_mtx);
  560 }
  561 
  562 int
  563 gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset,
  564     off_t length, int type, caddr_t data)
  565 {
  566         struct gv_softc *sc;
  567         struct bio *bp;
  568 
  569         KASSERT(from != NULL, ("NULL from"));
  570         KASSERT(to != NULL, ("NULL to"));
  571         sc = from->vinumconf;
  572         KASSERT(sc != NULL, ("NULL sc"));
  573 
  574         bp = g_new_bio();
  575         if (bp == NULL) {
  576                 G_VINUM_DEBUG(0, "sync from '%s' failed at offset "
  577                     " %jd; out of memory", from->name, offset);
  578                 return (ENOMEM);
  579         }
  580         bp->bio_length = length;
  581         bp->bio_done = NULL;
  582         bp->bio_pflags |= GV_BIO_SYNCREQ;
  583         bp->bio_offset = offset;
  584         bp->bio_caller1 = from;
  585         bp->bio_caller2 = to;
  586         bp->bio_cmd = type;
  587         if (data == NULL)
  588                 data = g_malloc(length, M_WAITOK);
  589         bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */
  590         bp->bio_data = data;
  591 
  592         /* Send down next. */
  593         gv_post_bio(sc, bp);
  594         //gv_plex_start(from, bp);
  595         return (0);
  596 }
  597 
  598 /*
  599  * Handle a finished plex sync bio.
  600  */
  601 int
  602 gv_sync_complete(struct gv_plex *to, struct bio *bp)
  603 {
  604         struct gv_plex *from, *p;
  605         struct gv_sd *s;
  606         struct gv_volume *v;
  607         struct gv_softc *sc;
  608         off_t offset;
  609         int err;
  610 
  611         g_topology_assert_not();
  612 
  613         err = 0;
  614         KASSERT(to != NULL, ("NULL to"));
  615         KASSERT(bp != NULL, ("NULL bp"));
  616         from = bp->bio_caller2;
  617         KASSERT(from != NULL, ("NULL from"));
  618         v = to->vol_sc;
  619         KASSERT(v != NULL, ("NULL v"));
  620         sc = v->vinumconf;
  621         KASSERT(sc != NULL, ("NULL sc"));
  622 
  623         /* If it was a read, write it. */
  624         if (bp->bio_cmd == BIO_READ) {
  625                 err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length,
  626                     BIO_WRITE, bp->bio_data);
  627         /* If it was a write, read the next one. */
  628         } else if (bp->bio_cmd == BIO_WRITE) {
  629                 if (bp->bio_pflags & GV_BIO_MALLOC)
  630                         g_free(bp->bio_data);
  631                 to->synced += bp->bio_length;
  632                 /* If we're finished, clean up. */
  633                 if (bp->bio_offset + bp->bio_length >= from->size) {
  634                         G_VINUM_DEBUG(1, "syncing of %s from %s completed",
  635                             to->name, from->name);
  636                         /* Update our state. */
  637                         LIST_FOREACH(s, &to->subdisks, in_plex)
  638                                 gv_set_sd_state(s, GV_SD_UP, 0);
  639                         gv_update_plex_state(to);
  640                         to->flags &= ~GV_PLEX_SYNCING;
  641                         to->synced = 0;
  642                         gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
  643                 } else {
  644                         offset = bp->bio_offset + bp->bio_length;
  645                         err = gv_sync_request(from, to, offset,
  646                             MIN(bp->bio_length, from->size - offset),
  647                             BIO_READ, NULL);
  648                 }
  649         }
  650         g_destroy_bio(bp);
  651         /* Clean up if there was an error. */
  652         if (err) {
  653                 to->flags &= ~GV_PLEX_SYNCING;
  654                 G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err);
  655         }
  656 
  657         /* Check if all plexes are synced, and lower refcounts. */
  658         g_topology_lock();
  659         LIST_FOREACH(p, &v->plexes, in_volume) {
  660                 if (p->flags & GV_PLEX_SYNCING) {
  661                         g_topology_unlock();
  662                         return (-1);
  663                 }
  664         }
  665         /* If we came here, all plexes are synced, and we're free. */
  666         gv_access(v->provider, -1, -1, 0);
  667         g_topology_unlock();
  668         G_VINUM_DEBUG(1, "plex sync completed");
  669         gv_volume_flush(v);
  670         return (0);
  671 }
  672 
  673 /*
  674  * Create a new bio struct for the next grow request.
  675  */
  676 int
  677 gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type,
  678     caddr_t data)
  679 {
  680         struct gv_softc *sc;
  681         struct bio *bp;
  682 
  683         KASSERT(p != NULL, ("gv_grow_request: NULL p"));
  684         sc = p->vinumconf;
  685         KASSERT(sc != NULL, ("gv_grow_request: NULL sc"));
  686 
  687         bp = g_new_bio();
  688         if (bp == NULL) {
  689                 G_VINUM_DEBUG(0, "grow of %s failed creating bio: "
  690                     "out of memory", p->name);
  691                 return (ENOMEM);
  692         }
  693 
  694         bp->bio_cmd = type;
  695         bp->bio_done = NULL;
  696         bp->bio_error = 0;
  697         bp->bio_caller1 = p;
  698         bp->bio_offset = offset;
  699         bp->bio_length = length;
  700         bp->bio_pflags |= GV_BIO_GROW;
  701         if (data == NULL)
  702                 data = g_malloc(length, M_WAITOK);
  703         bp->bio_pflags |= GV_BIO_MALLOC;
  704         bp->bio_data = data;
  705 
  706         gv_post_bio(sc, bp);
  707         //gv_plex_start(p, bp);
  708         return (0);
  709 }
  710 
  711 /*
  712  * Finish handling of a bio to a growing plex.
  713  */
  714 void
  715 gv_grow_complete(struct gv_plex *p, struct bio *bp)
  716 {
  717         struct gv_softc *sc;
  718         struct gv_sd *s;
  719         struct gv_volume *v;
  720         off_t origsize, offset;
  721         int sdcount, err;
  722 
  723         v = p->vol_sc;
  724         KASSERT(v != NULL, ("gv_grow_complete: NULL v"));
  725         sc = v->vinumconf;
  726         KASSERT(sc != NULL, ("gv_grow_complete: NULL sc"));
  727         err = 0;
  728 
  729         /* If it was a read, write it. */
  730         if (bp->bio_cmd == BIO_READ) {
  731                 p->synced += bp->bio_length;
  732                 err = gv_grow_request(p, bp->bio_offset, bp->bio_length,
  733                     BIO_WRITE, bp->bio_data);
  734         /* If it was a write, read next. */
  735         } else if (bp->bio_cmd == BIO_WRITE) {
  736                 if (bp->bio_pflags & GV_BIO_MALLOC)
  737                         g_free(bp->bio_data);
  738 
  739                 /* Find the real size of the plex. */
  740                 sdcount = gv_sdcount(p, 1);
  741                 s = LIST_FIRST(&p->subdisks);
  742                 KASSERT(s != NULL, ("NULL s"));
  743                 origsize = (s->size * (sdcount - 1));
  744                 if (bp->bio_offset + bp->bio_length >= origsize) {
  745                         G_VINUM_DEBUG(1, "growing of %s completed", p->name);
  746                         p->flags &= ~GV_PLEX_GROWING;
  747                         LIST_FOREACH(s, &p->subdisks, in_plex) {
  748                                 s->flags &= ~GV_SD_GROW;
  749                                 gv_set_sd_state(s, GV_SD_UP, 0);
  750                         }
  751                         p->size = gv_plex_size(p);
  752                         gv_update_vol_size(v, gv_vol_size(v));
  753                         gv_set_plex_state(p, GV_PLEX_UP, 0);
  754                         g_topology_lock();
  755                         gv_access(v->provider, -1, -1, 0);
  756                         g_topology_unlock();
  757                         p->synced = 0;
  758                         gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
  759                         /* Issue delayed requests. */
  760                         gv_plex_flush(p);
  761                 } else {
  762                         offset = bp->bio_offset + bp->bio_length;
  763                         err = gv_grow_request(p, offset,
  764                            MIN(bp->bio_length, origsize - offset),
  765                            BIO_READ, NULL);
  766                 }
  767         }
  768         g_destroy_bio(bp);
  769 
  770         if (err) {
  771                 p->flags &= ~GV_PLEX_GROWING;
  772                 G_VINUM_DEBUG(0, "error growing plex: error code %d", err);
  773         }
  774 }
  775 
  776 /*
  777  * Create an initialization BIO and send it off to the consumer. Assume that
  778  * we're given initialization data as parameter.
  779  */
  780 void
  781 gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length)
  782 {
  783         struct gv_drive *d;
  784         struct g_consumer *cp;
  785         struct bio *bp, *cbp;
  786 
  787         KASSERT(s != NULL, ("gv_init_request: NULL s"));
  788         d = s->drive_sc;
  789         KASSERT(d != NULL, ("gv_init_request: NULL d"));
  790         cp = d->consumer;
  791         KASSERT(cp != NULL, ("gv_init_request: NULL cp"));
  792 
  793         bp = g_new_bio();
  794         if (bp == NULL) {
  795                 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
  796                     " (drive offset %jd); out of memory", s->name,
  797                     (intmax_t)s->initialized, (intmax_t)start);
  798                 return; /* XXX: Error codes. */
  799         }
  800         bp->bio_cmd = BIO_WRITE;
  801         bp->bio_data = data;
  802         bp->bio_done = NULL;
  803         bp->bio_error = 0;
  804         bp->bio_length = length;
  805         bp->bio_pflags |= GV_BIO_INIT;
  806         bp->bio_offset = start;
  807         bp->bio_caller1 = s;
  808 
  809         /* Then ofcourse, we have to clone it. */
  810         cbp = g_clone_bio(bp);
  811         if (cbp == NULL) {
  812                 G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
  813                     " (drive offset %jd); out of memory", s->name,
  814                     (intmax_t)s->initialized, (intmax_t)start);
  815                 return; /* XXX: Error codes. */
  816         }
  817         cbp->bio_done = gv_done;
  818         cbp->bio_caller1 = s;
  819         d->active++;
  820         /* Send it off to the consumer. */
  821         g_io_request(cbp, cp);
  822 }
  823 
  824 /*
  825  * Handle a finished initialization BIO.
  826  */
  827 void
  828 gv_init_complete(struct gv_plex *p, struct bio *bp)
  829 {
  830         struct gv_softc *sc;
  831         struct gv_drive *d;
  832         struct g_consumer *cp;
  833         struct gv_sd *s;
  834         off_t start, length;
  835         caddr_t data;
  836         int error;
  837 
  838         s = bp->bio_caller1;
  839         start = bp->bio_offset;
  840         length = bp->bio_length;
  841         error = bp->bio_error;
  842         data = bp->bio_data;
  843 
  844         KASSERT(s != NULL, ("gv_init_complete: NULL s"));
  845         d = s->drive_sc;
  846         KASSERT(d != NULL, ("gv_init_complete: NULL d"));
  847         cp = d->consumer;
  848         KASSERT(cp != NULL, ("gv_init_complete: NULL cp"));
  849         sc = p->vinumconf;
  850         KASSERT(sc != NULL, ("gv_init_complete: NULL sc"));
  851 
  852         g_destroy_bio(bp);
  853 
  854         /*
  855          * First we need to find out if it was okay, and abort if it's not.
  856          * Then we need to free previous buffers, find out the correct subdisk,
  857          * as well as getting the correct starting point and length of the BIO.
  858          */
  859         if (start >= s->drive_offset + s->size) {
  860                 /* Free the data we initialized. */
  861                 g_free(data);
  862                 g_topology_assert_not();
  863                 g_topology_lock();
  864                 g_access(cp, 0, -1, 0);
  865                 g_topology_unlock();
  866                 if (error) {
  867                         gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE |
  868                             GV_SETSTATE_CONFIG);
  869                 } else {
  870                         gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
  871                         s->initialized = 0;
  872                         gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
  873                         G_VINUM_DEBUG(1, "subdisk '%s' init: finished "
  874                             "successfully", s->name);
  875                 }
  876                 return;
  877         }
  878         s->initialized += length;
  879         start += length;
  880         gv_init_request(s, start, data, length);
  881 }
  882 
  883 /*
  884  * Create a new bio struct for the next parity rebuild. Used both by internal
  885  * rebuild of degraded plexes as well as user initiated rebuilds/checks.
  886  */
  887 void
  888 gv_parity_request(struct gv_plex *p, int flags, off_t offset)
  889 {
  890         struct gv_softc *sc;
  891         struct bio *bp;
  892 
  893         KASSERT(p != NULL, ("gv_parity_request: NULL p"));
  894         sc = p->vinumconf;
  895         KASSERT(sc != NULL, ("gv_parity_request: NULL sc"));
  896 
  897         bp = g_new_bio();
  898         if (bp == NULL) {
  899                 G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: "
  900                     "out of memory", p->name);
  901                 return;
  902         }
  903 
  904         bp->bio_cmd = BIO_WRITE;
  905         bp->bio_done = NULL;
  906         bp->bio_error = 0;
  907         bp->bio_length = p->stripesize;
  908         bp->bio_caller1 = p;
  909 
  910         /*
  911          * Check if it's a rebuild of a degraded plex or a user request of
  912          * parity rebuild.
  913          */
  914         if (flags & GV_BIO_REBUILD)
  915                 bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK);
  916         else if (flags & GV_BIO_CHECK)
  917                 bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
  918         else {
  919                 G_VINUM_DEBUG(0, "invalid flags given in rebuild");
  920                 return;
  921         }
  922 
  923         bp->bio_pflags = flags;
  924         bp->bio_pflags |= GV_BIO_MALLOC;
  925 
  926         /* We still have more parity to build. */
  927         bp->bio_offset = offset;
  928         gv_post_bio(sc, bp);
  929         //gv_plex_start(p, bp); /* Send it down to the plex. */
  930 }
  931 
  932 /*
  933  * Handle a finished parity write.
  934  */
  935 void
  936 gv_parity_complete(struct gv_plex *p, struct bio *bp)
  937 {
  938         struct gv_softc *sc;
  939         int error, flags;
  940 
  941         error = bp->bio_error;
  942         flags = bp->bio_pflags;
  943         flags &= ~GV_BIO_MALLOC;
  944 
  945         sc = p->vinumconf;
  946         KASSERT(sc != NULL, ("gv_parity_complete: NULL sc"));
  947 
  948         /* Clean up what we allocated. */
  949         if (bp->bio_pflags & GV_BIO_MALLOC)
  950                 g_free(bp->bio_data);
  951         g_destroy_bio(bp);
  952 
  953         if (error == EAGAIN) {
  954                 G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx",
  955                     (intmax_t)p->synced);
  956         }
  957 
  958         /* Any error is fatal, except EAGAIN when we're rebuilding. */
  959         if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) {
  960                 /* Make sure we don't have the lock. */
  961                 g_topology_assert_not();
  962                 g_topology_lock();
  963                 gv_access(p->vol_sc->provider, -1, -1, 0);
  964                 g_topology_unlock();
  965                 G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx "
  966                     "errno %d", p->name, (intmax_t)p->synced, error);
  967                 return;
  968         } else {
  969                 p->synced += p->stripesize;
  970         }
  971 
  972         if (p->synced >= p->size) {
  973                 /* Make sure we don't have the lock. */
  974                 g_topology_assert_not();
  975                 g_topology_lock();
  976                 gv_access(p->vol_sc->provider, -1, -1, 0);
  977                 g_topology_unlock();
  978                 /* We're finished. */
  979                 G_VINUM_DEBUG(1, "parity operation on %s finished", p->name);
  980                 p->synced = 0;
  981                 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
  982                 return;
  983         }
  984 
  985         /* Send down next. It will determine if we need to itself. */
  986         gv_parity_request(p, flags, p->synced);
  987 }
  988 
  989 /*
  990  * Handle a finished plex rebuild bio.
  991  */
  992 void
  993 gv_rebuild_complete(struct gv_plex *p, struct bio *bp)
  994 {
  995         struct gv_softc *sc;
  996         struct gv_sd *s;
  997         int error, flags;
  998         off_t offset;
  999 
 1000         error = bp->bio_error;
 1001         flags = bp->bio_pflags;
 1002         offset = bp->bio_offset;
 1003         flags &= ~GV_BIO_MALLOC;
 1004         sc = p->vinumconf;
 1005         KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc"));
 1006 
 1007         /* Clean up what we allocated. */
 1008         if (bp->bio_pflags & GV_BIO_MALLOC)
 1009                 g_free(bp->bio_data);
 1010         g_destroy_bio(bp);
 1011 
 1012         if (error) {
 1013                 g_topology_assert_not();
 1014                 g_topology_lock();
 1015                 gv_access(p->vol_sc->provider, -1, -1, 0);
 1016                 g_topology_unlock();
 1017 
 1018                 G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d",
 1019                     p->name, (intmax_t)offset, error);
 1020                 p->flags &= ~GV_PLEX_REBUILDING;
 1021                 p->synced = 0;
 1022                 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
 1023                 return;
 1024         }
 1025 
 1026         offset += (p->stripesize * (gv_sdcount(p, 1) - 1));
 1027         if (offset >= p->size) {
 1028                 /* We're finished. */
 1029                 g_topology_assert_not();
 1030                 g_topology_lock();
 1031                 gv_access(p->vol_sc->provider, -1, -1, 0);
 1032                 g_topology_unlock();
 1033 
 1034                 G_VINUM_DEBUG(1, "rebuild of %s finished", p->name);
 1035                 gv_save_config(p->vinumconf);
 1036                 p->flags &= ~GV_PLEX_REBUILDING;
 1037                 p->synced = 0;
 1038                 /* Try to up all subdisks. */
 1039                 LIST_FOREACH(s, &p->subdisks, in_plex)
 1040                         gv_update_sd_state(s);
 1041                 gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
 1042                 gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
 1043                 return;
 1044         }
 1045 
 1046         /* Send down next. It will determine if we need to itself. */
 1047         gv_parity_request(p, flags, offset);
 1048 }

Cache object: 4b756707bb54d96b2c1de0d1f06ef9a3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.