The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/vinum/geom_vinum_plex.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2004 Lukas Ertl
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   24  * SUCH DAMAGE.
   25  */
   26 
   27 #include <sys/cdefs.h>
   28 __FBSDID("$FreeBSD$");
   29 
   30 #include <sys/param.h>
   31 #include <sys/bio.h>
   32 #include <sys/kernel.h>
   33 #include <sys/kthread.h>
   34 #include <sys/libkern.h>
   35 #include <sys/lock.h>
   36 #include <sys/malloc.h>
   37 #include <sys/module.h>
   38 #include <sys/mutex.h>
   39 #include <sys/systm.h>
   40 
   41 #include <geom/geom.h>
   42 #include <geom/vinum/geom_vinum_var.h>
   43 #include <geom/vinum/geom_vinum_raid5.h>
   44 #include <geom/vinum/geom_vinum.h>
   45 
   46 static void gv_plex_completed_request(struct gv_plex *, struct bio *);
   47 static void gv_plex_normal_request(struct gv_plex *, struct bio *);
   48 static void gv_plex_worker(void *);
   49 static int gv_check_parity(struct gv_plex *, struct bio *,
   50     struct gv_raid5_packet *);
   51 static int gv_normal_parity(struct gv_plex *, struct bio *,
   52     struct gv_raid5_packet *);
   53 
   54 /* XXX: is this the place to catch dying subdisks? */
   55 static void
   56 gv_plex_orphan(struct g_consumer *cp)
   57 {
   58         struct g_geom *gp;
   59         struct gv_plex *p;
   60         int error;
   61 
   62         g_topology_assert();
   63         gp = cp->geom;
   64         g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
   65 
   66         if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
   67                 g_access(cp, -cp->acr, -cp->acw, -cp->ace);
   68         error = cp->provider->error;
   69         if (error == 0)
   70                 error = ENXIO;
   71         g_detach(cp);
   72         g_destroy_consumer(cp); 
   73         if (!LIST_EMPTY(&gp->consumer))
   74                 return;
   75 
   76         p = gp->softc;
   77         if (p != NULL) {
   78                 gv_kill_plex_thread(p);
   79                 p->geom = NULL;
   80                 p->provider = NULL;
   81                 p->consumer = NULL;
   82         }
   83         gp->softc = NULL;
   84         g_wither_geom(gp, error);
   85 }
   86 
   87 void
   88 gv_plex_done(struct bio *bp)
   89 {
   90         struct gv_plex *p;
   91 
   92         p = bp->bio_from->geom->softc;
   93         bp->bio_cflags |= GV_BIO_DONE;
   94         mtx_lock(&p->bqueue_mtx);
   95         bioq_insert_tail(p->bqueue, bp);
   96         wakeup(p);
   97         mtx_unlock(&p->bqueue_mtx);
   98 }
   99 
  100 /* Find the correct subdisk to send the bio to and build a bio to send. */
  101 static int
  102 gv_plexbuffer(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff, off_t bcount)
  103 {
  104         struct g_geom *gp;
  105         struct gv_sd *s;
  106         struct bio *cbp, *pbp;
  107         int i, sdno;
  108         off_t len_left, real_len, real_off;
  109         off_t stripeend, stripeno, stripestart;
  110 
  111         if (p == NULL || LIST_EMPTY(&p->subdisks))
  112                 return (ENXIO);
  113 
  114         s = NULL;
  115         gp = bp->bio_to->geom;
  116 
  117         /*
  118          * We only handle concatenated and striped plexes here.  RAID5 plexes
  119          * are handled in build_raid5_request().
  120          */
  121         switch (p->org) {
  122         case GV_PLEX_CONCAT:
  123                 /*
  124                  * Find the subdisk where this request starts.  The subdisks in
  125                  * this list must be ordered by plex_offset.
  126                  */
  127                 LIST_FOREACH(s, &p->subdisks, in_plex) {
  128                         if (s->plex_offset <= boff &&
  129                             s->plex_offset + s->size > boff)
  130                                 break;
  131                 }
  132                 /* Subdisk not found. */
  133                 if (s == NULL)
  134                         return (ENXIO);
  135 
  136                 /* Calculate corresponding offsets on disk. */
  137                 real_off = boff - s->plex_offset;
  138                 len_left = s->size - real_off;
  139                 real_len = (bcount > len_left) ? len_left : bcount;
  140                 break;
  141 
  142         case GV_PLEX_STRIPED:
  143                 /* The number of the stripe where the request starts. */
  144                 stripeno = boff / p->stripesize;
  145 
  146                 /* The number of the subdisk where the stripe resides. */
  147                 sdno = stripeno % p->sdcount;
  148 
  149                 /* Find the right subdisk. */
  150                 i = 0;
  151                 LIST_FOREACH(s, &p->subdisks, in_plex) {
  152                         if (i == sdno)
  153                                 break;
  154                         i++;
  155                 }
  156 
  157                 /* Subdisk not found. */
  158                 if (s == NULL)
  159                         return (ENXIO);
  160 
  161                 /* The offset of the stripe from the start of the subdisk. */ 
  162                 stripestart = (stripeno / p->sdcount) *
  163                     p->stripesize;
  164 
  165                 /* The offset at the end of the stripe. */
  166                 stripeend = stripestart + p->stripesize;
  167 
  168                 /* The offset of the request on this subdisk. */
  169                 real_off = boff - (stripeno * p->stripesize) +
  170                     stripestart;
  171 
  172                 /* The length left in this stripe. */
  173                 len_left = stripeend - real_off;
  174 
  175                 real_len = (bcount <= len_left) ? bcount : len_left;
  176                 break;
  177 
  178         default:
  179                 return (EINVAL);
  180         }
  181 
  182         /* Now check if we can handle the request on this subdisk. */
  183         switch (s->state) {
  184         case GV_SD_UP:
  185                 /* If the subdisk is up, just continue. */
  186                 break;
  187 
  188         case GV_SD_STALE:
  189                 if (!(bp->bio_cflags & GV_BIO_SYNCREQ))
  190                         return (ENXIO);
  191 
  192                 printf("GEOM_VINUM: sd %s is initializing\n", s->name);
  193                 gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
  194                 break;
  195 
  196         case GV_SD_INITIALIZING:
  197                 if (bp->bio_cmd == BIO_READ)
  198                         return (ENXIO);
  199                 break;
  200 
  201         default:
  202                 /* All other subdisk states mean it's not accessible. */
  203                 return (ENXIO);
  204         }
  205 
  206         /* Clone the bio and adjust the offsets and sizes. */
  207         cbp = g_clone_bio(bp);
  208         if (cbp == NULL)
  209                 return (ENOMEM);
  210         cbp->bio_offset = real_off;
  211         cbp->bio_length = real_len;
  212         cbp->bio_data = addr;
  213         cbp->bio_done = g_std_done;
  214         cbp->bio_caller2 = s->consumer;
  215         if ((bp->bio_cflags & GV_BIO_SYNCREQ)) {
  216                 cbp->bio_cflags |= GV_BIO_SYNCREQ;
  217                 cbp->bio_done = gv_plex_done;
  218         }
  219 
  220         if (bp->bio_driver1 == NULL) {
  221                 bp->bio_driver1 = cbp;
  222         } else {
  223                 pbp = bp->bio_driver1;
  224                 while (pbp->bio_caller1 != NULL)
  225                         pbp = pbp->bio_caller1;
  226                 pbp->bio_caller1 = cbp;
  227         }
  228 
  229         return (0);
  230 }
  231 
  232 static void
  233 gv_plex_start(struct bio *bp)
  234 {
  235         struct gv_plex *p;
  236 
  237         switch(bp->bio_cmd) {
  238         case BIO_READ:
  239         case BIO_WRITE:
  240         case BIO_DELETE:
  241                 break;
  242         case BIO_GETATTR:
  243         default:
  244                 g_io_deliver(bp, EOPNOTSUPP);
  245                 return;
  246         }
  247 
  248         /*
  249          * We cannot handle this request if too many of our subdisks are
  250          * inaccessible.
  251          */
  252         p = bp->bio_to->geom->softc;
  253         if ((p->state < GV_PLEX_DEGRADED) &&
  254             !(bp->bio_cflags & GV_BIO_SYNCREQ)) {
  255                 g_io_deliver(bp, ENXIO);
  256                 return;
  257         }
  258 
  259         mtx_lock(&p->bqueue_mtx);
  260         bioq_disksort(p->bqueue, bp);
  261         wakeup(p);
  262         mtx_unlock(&p->bqueue_mtx);
  263 }
  264 
  265 static void
  266 gv_plex_worker(void *arg)
  267 {
  268         struct bio *bp;
  269         struct gv_plex *p;
  270         struct gv_sd *s;
  271 
  272         p = arg;
  273         KASSERT(p != NULL, ("NULL p"));
  274 
  275         mtx_lock(&p->bqueue_mtx);
  276         for (;;) {
  277                 /* We were signaled to exit. */
  278                 if (p->flags & GV_PLEX_THREAD_DIE)
  279                         break;
  280 
  281                 /* Take the first BIO from our queue. */
  282                 bp = bioq_takefirst(p->bqueue);
  283                 if (bp == NULL) {
  284                         msleep(p, &p->bqueue_mtx, PRIBIO, "-", hz/10);
  285                         continue;
  286                 }
  287                 mtx_unlock(&p->bqueue_mtx);
  288 
  289                 /* A completed request. */
  290                 if (bp->bio_cflags & GV_BIO_DONE) {
  291                         if (bp->bio_cflags & GV_BIO_SYNCREQ ||
  292                             bp->bio_cflags & GV_BIO_REBUILD) {
  293                                 s = bp->bio_to->private;
  294                                 if (bp->bio_error == 0)
  295                                         s->initialized += bp->bio_length;
  296                                 if (s->initialized >= s->size) {
  297                                         g_topology_lock();
  298                                         gv_set_sd_state(s, GV_SD_UP,
  299                                             GV_SETSTATE_CONFIG);
  300                                         g_topology_unlock();
  301                                         s->initialized = 0;
  302                                 }
  303                         }
  304 
  305                         if (bp->bio_cflags & GV_BIO_SYNCREQ)
  306                                 g_std_done(bp);
  307                         else
  308                                 gv_plex_completed_request(p, bp);
  309                 /*
  310                  * A sub-request that was hold back because it interfered with
  311                  * another sub-request.
  312                  */
  313                 } else if (bp->bio_cflags & GV_BIO_ONHOLD) {
  314                         /* Is it still locked out? */
  315                         if (gv_stripe_active(p, bp)) {
  316                                 /* Park the bio on the waiting queue. */
  317                                 mtx_lock(&p->bqueue_mtx);
  318                                 bioq_disksort(p->wqueue, bp);
  319                                 mtx_unlock(&p->bqueue_mtx);
  320                         } else {
  321                                 bp->bio_cflags &= ~GV_BIO_ONHOLD;
  322                                 g_io_request(bp, bp->bio_caller2);
  323                         }
  324 
  325                 /* A normal request to this plex. */
  326                 } else
  327                         gv_plex_normal_request(p, bp);
  328 
  329                 mtx_lock(&p->bqueue_mtx);
  330         }
  331         mtx_unlock(&p->bqueue_mtx);
  332         p->flags |= GV_PLEX_THREAD_DEAD;
  333         wakeup(p);
  334 
  335         kthread_exit(ENXIO);
  336 }
  337 
  338 static int
  339 gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
  340 {
  341         struct bio *cbp, *pbp;
  342         int finished, i;
  343 
  344         finished = 1;
  345 
  346         if (wp->waiting != NULL) {
  347                 pbp = wp->waiting;
  348                 wp->waiting = NULL;
  349                 cbp = wp->parity;
  350                 for (i = 0; i < wp->length; i++)
  351                         cbp->bio_data[i] ^= pbp->bio_data[i];
  352                 g_io_request(pbp, pbp->bio_caller2);
  353                 finished = 0;
  354 
  355         } else if (wp->parity != NULL) {
  356                 cbp = wp->parity;
  357                 wp->parity = NULL;
  358                 g_io_request(cbp, cbp->bio_caller2);
  359                 finished = 0;
  360         }
  361 
  362         return (finished);
  363 }
  364 
  365 static int
  366 gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
  367 {
  368         struct bio *pbp;
  369         int err, finished, i;
  370 
  371         err = 0;
  372         finished = 1;
  373 
  374         if (wp->waiting != NULL) {
  375                 pbp = wp->waiting;
  376                 wp->waiting = NULL;
  377                 g_io_request(pbp, pbp->bio_caller2);
  378                 finished = 0;
  379 
  380         } else if (wp->parity != NULL) {
  381                 pbp = wp->parity;
  382                 wp->parity = NULL;
  383 
  384                 /* Check if the parity is correct. */
  385                 for (i = 0; i < wp->length; i++) {
  386                         if (bp->bio_data[i] != pbp->bio_data[i]) {
  387                                 err = 1;
  388                                 break;
  389                         }
  390                 }
  391 
  392                 /* The parity is not correct... */
  393                 if (err) {
  394                         bp->bio_parent->bio_error = EAGAIN;
  395 
  396                         /* ... but we rebuild it. */
  397                         if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) {
  398                                 g_io_request(pbp, pbp->bio_caller2);
  399                                 finished = 0;
  400                         }
  401                 }
  402 
  403                 /*
  404                  * Clean up the BIO we would have used for rebuilding the
  405                  * parity.
  406                  */
  407                 if (finished) {
  408                         bp->bio_parent->bio_inbed++;
  409                         g_destroy_bio(pbp);
  410                 }
  411 
  412         }
  413 
  414         return (finished);
  415 }
  416 
  417 void
  418 gv_plex_completed_request(struct gv_plex *p, struct bio *bp)
  419 {
  420         struct bio *cbp, *pbp;
  421         struct gv_bioq *bq, *bq2;
  422         struct gv_raid5_packet *wp;
  423         int i;
  424 
  425         wp = bp->bio_driver1;
  426 
  427         switch (bp->bio_parent->bio_cmd) {
  428         case BIO_READ:
  429                 if (wp == NULL)
  430                         break;
  431 
  432                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
  433                         if (bq->bp == bp) {
  434                                 TAILQ_REMOVE(&wp->bits, bq, queue);
  435                                 g_free(bq);
  436                                 for (i = 0; i < wp->length; i++)
  437                                         wp->data[i] ^= bp->bio_data[i];
  438                                 break;
  439                         }
  440                 }
  441                 if (TAILQ_EMPTY(&wp->bits)) {
  442                         bp->bio_parent->bio_completed += wp->length;
  443                         if (wp->lockbase != -1) {
  444                                 TAILQ_REMOVE(&p->packets, wp, list);
  445                                 /* Bring the waiting bios back into the game. */
  446                                 mtx_lock(&p->bqueue_mtx);
  447                                 pbp = bioq_takefirst(p->wqueue);
  448                                 while (pbp != NULL) {
  449                                         bioq_disksort(p->bqueue, pbp);
  450                                         pbp = bioq_takefirst(p->wqueue);
  451                                 }
  452                                 mtx_unlock(&p->bqueue_mtx);
  453                         }
  454                         g_free(wp);
  455                 }
  456 
  457                 break;
  458 
  459         case BIO_WRITE:
  460                 if (wp == NULL)
  461                         break;
  462 
  463                 /* Check if we need to handle parity data. */
  464                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
  465                         if (bq->bp == bp) {
  466                                 TAILQ_REMOVE(&wp->bits, bq, queue);
  467                                 g_free(bq);
  468                                 cbp = wp->parity;
  469                                 if (cbp != NULL) {
  470                                         for (i = 0; i < wp->length; i++)
  471                                                 cbp->bio_data[i] ^=
  472                                                     bp->bio_data[i];
  473                                 }
  474                                 break;
  475                         }
  476                 }
  477 
  478                 /* Handle parity data. */
  479                 if (TAILQ_EMPTY(&wp->bits)) {
  480                         if (bp->bio_parent->bio_cflags & GV_BIO_CHECK)
  481                                 i = gv_check_parity(p, bp, wp);
  482                         else
  483                                 i = gv_normal_parity(p, bp, wp);
  484 
  485                         /* All of our sub-requests have finished. */
  486                         if (i) {
  487                                 bp->bio_parent->bio_completed += wp->length;
  488                                 TAILQ_REMOVE(&p->packets, wp, list);
  489                                 /* Bring the waiting bios back into the game. */
  490                                 mtx_lock(&p->bqueue_mtx);
  491                                 pbp = bioq_takefirst(p->wqueue);
  492                                 while (pbp != NULL) {
  493                                         bioq_disksort(p->bqueue, pbp);
  494                                         pbp = bioq_takefirst(p->wqueue);
  495                                 }
  496                                 mtx_unlock(&p->bqueue_mtx);
  497                                 g_free(wp);
  498                         }
  499                 }
  500 
  501                 break;
  502         }
  503 
  504         pbp = bp->bio_parent;
  505         if (pbp->bio_error == 0)
  506                 pbp->bio_error = bp->bio_error;
  507 
  508         /* When the original request is finished, we deliver it. */
  509         pbp->bio_inbed++;
  510         if (pbp->bio_inbed == pbp->bio_children)
  511                 g_io_deliver(pbp, pbp->bio_error);
  512 
  513         /* Clean up what we allocated. */
  514         if (bp->bio_cflags & GV_BIO_MALLOC)
  515                 g_free(bp->bio_data);
  516         g_destroy_bio(bp);
  517 }
  518 
  519 void
  520 gv_plex_normal_request(struct gv_plex *p, struct bio *bp)
  521 {
  522         struct bio *cbp, *pbp;
  523         struct gv_bioq *bq, *bq2;
  524         struct gv_raid5_packet *wp, *wp2;
  525         caddr_t addr;
  526         off_t bcount, boff;
  527         int err;
  528 
  529         bcount = bp->bio_length;
  530         addr = bp->bio_data;
  531         boff = bp->bio_offset;
  532 
  533         /* Walk over the whole length of the request, we might split it up. */
  534         while (bcount > 0) {
  535                 wp = NULL;
  536 
  537                 /*
  538                  * RAID5 plexes need special treatment, as a single write
  539                  * request involves several read/write sub-requests.
  540                  */
  541                 if (p->org == GV_PLEX_RAID5) {
  542                         wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
  543                         wp->bio = bp;
  544                         TAILQ_INIT(&wp->bits);
  545 
  546                         if (bp->bio_cflags & GV_BIO_REBUILD)
  547                                 err = gv_rebuild_raid5(p, wp, bp, addr,
  548                                     boff, bcount);
  549                         else if (bp->bio_cflags & GV_BIO_CHECK)
  550                                 err = gv_check_raid5(p, wp, bp, addr,
  551                                     boff, bcount);
  552                         else
  553                                 err = gv_build_raid5_req(p, wp, bp, addr,
  554                                     boff, bcount);
  555 
  556                         /*
  557                          * Building the sub-request failed, we probably need to
  558                          * clean up a lot.
  559                          */
  560                         if (err) {
  561                                 printf("GEOM_VINUM: plex request failed for ");
  562                                 g_print_bio(bp);
  563                                 printf("\n");
  564                                 TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
  565                                         TAILQ_REMOVE(&wp->bits, bq, queue);
  566                                         g_free(bq);
  567                                 }
  568                                 if (wp->waiting != NULL) {
  569                                         if (wp->waiting->bio_cflags &
  570                                             GV_BIO_MALLOC)
  571                                                 g_free(wp->waiting->bio_data);
  572                                         g_destroy_bio(wp->waiting);
  573                                 }
  574                                 if (wp->parity != NULL) {
  575                                         if (wp->parity->bio_cflags &
  576                                             GV_BIO_MALLOC)
  577                                                 g_free(wp->parity->bio_data);
  578                                         g_destroy_bio(wp->parity);
  579                                 }
  580                                 g_free(wp);
  581 
  582                                 TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
  583                                         if (wp->bio == bp) {
  584                                                 TAILQ_REMOVE(&p->packets, wp,
  585                                                     list);
  586                                                 TAILQ_FOREACH_SAFE(bq,
  587                                                     &wp->bits, queue, bq2) {
  588                                                         TAILQ_REMOVE(&wp->bits,
  589                                                             bq, queue);
  590                                                         g_free(bq);
  591                                                 }
  592                                                 g_free(wp);
  593                                         }
  594                                 }
  595 
  596                                 cbp = bp->bio_driver1;
  597                                 while (cbp != NULL) {
  598                                         pbp = cbp->bio_caller1;
  599                                         if (cbp->bio_cflags & GV_BIO_MALLOC)
  600                                                 g_free(cbp->bio_data);
  601                                         g_destroy_bio(cbp);
  602                                         cbp = pbp;
  603                                 }
  604 
  605                                 g_io_deliver(bp, err);
  606                                 return;
  607                         }
  608  
  609                         if (TAILQ_EMPTY(&wp->bits))
  610                                 g_free(wp);
  611                         else if (wp->lockbase != -1)
  612                                 TAILQ_INSERT_TAIL(&p->packets, wp, list);
  613 
  614                 /*
  615                  * Requests to concatenated and striped plexes go straight
  616                  * through.
  617                  */
  618                 } else {
  619                         err = gv_plexbuffer(p, bp, addr, boff, bcount);
  620 
  621                         /* Building the sub-request failed. */
  622                         if (err) {
  623                                 printf("GEOM_VINUM: plex request failed for ");
  624                                 g_print_bio(bp);
  625                                 printf("\n");
  626                                 cbp = bp->bio_driver1;
  627                                 while (cbp != NULL) {
  628                                         pbp = cbp->bio_caller1;
  629                                         g_destroy_bio(cbp);
  630                                         cbp = pbp;
  631                                 }
  632                                 g_io_deliver(bp, err);
  633                                 return;
  634                         }
  635                 }
  636  
  637                 /* Abuse bio_caller1 as linked list. */
  638                 pbp = bp->bio_driver1;
  639                 while (pbp->bio_caller1 != NULL)
  640                         pbp = pbp->bio_caller1;
  641                 bcount -= pbp->bio_length;
  642                 addr += pbp->bio_length;
  643                 boff += pbp->bio_length;
  644         }
  645 
  646         /* Fire off all sub-requests. */
  647         pbp = bp->bio_driver1;
  648         while (pbp != NULL) {
  649                 /*
  650                  * RAID5 sub-requests need to come in correct order, otherwise
  651                  * we trip over the parity, as it might be overwritten by
  652                  * another sub-request.
  653                  */
  654                 if (pbp->bio_driver1 != NULL &&
  655                     gv_stripe_active(p, pbp)) {
  656                         /* Park the bio on the waiting queue. */
  657                         pbp->bio_cflags |= GV_BIO_ONHOLD;
  658                         mtx_lock(&p->bqueue_mtx);
  659                         bioq_disksort(p->wqueue, pbp);
  660                         mtx_unlock(&p->bqueue_mtx);
  661                 } else
  662                         g_io_request(pbp, pbp->bio_caller2);
  663                 pbp = pbp->bio_caller1;
  664         }
  665 }
  666 
  667 static int
  668 gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
  669 {
  670         struct gv_plex *p;
  671         struct g_geom *gp;
  672         struct g_consumer *cp, *cp2;
  673         int error;
  674 
  675         gp = pp->geom;
  676         p = gp->softc;
  677         KASSERT(p != NULL, ("NULL p"));
  678 
  679         if (p->org == GV_PLEX_RAID5) {
  680                 if (dw > 0 && dr == 0)
  681                         dr = 1;
  682                 else if (dw < 0 && dr == 0)
  683                         dr = -1;
  684         }
  685 
  686         LIST_FOREACH(cp, &gp->consumer, consumer) {
  687                 error = g_access(cp, dr, dw, de);
  688                 if (error) {
  689                         LIST_FOREACH(cp2, &gp->consumer, consumer) {
  690                                 if (cp == cp2)
  691                                         break;
  692                                 g_access(cp2, -dr, -dw, -de);
  693                         }
  694                         return (error);
  695                 }
  696         }
  697         return (0);
  698 }
  699 
  700 static struct g_geom *
  701 gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
  702 {
  703         struct g_geom *gp;
  704         struct g_consumer *cp, *cp2;
  705         struct g_provider *pp2;
  706         struct gv_plex *p;
  707         struct gv_sd *s;
  708         struct gv_softc *sc;
  709         int error;
  710 
  711         g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
  712         g_topology_assert();
  713 
  714         /* We only want to attach to subdisks. */
  715         if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
  716                 return (NULL);
  717 
  718         /* Find the VINUM class and its associated geom. */
  719         gp = find_vinum_geom();
  720         if (gp == NULL)
  721                 return (NULL);
  722         sc = gp->softc;
  723         KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
  724 
  725         /* Find out which subdisk the offered provider corresponds to. */
  726         s = pp->private;
  727         KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
  728 
  729         /* Now find the correct plex where this subdisk belongs to. */
  730         p = gv_find_plex(sc, s->plex);
  731         if (p == NULL) {
  732                 printf("gv_plex_taste: NULL p for '%s'\n", s->name);
  733                 return (NULL);
  734         }
  735 
  736         /*
  737          * Add this subdisk to this plex.  Since we trust the on-disk
  738          * configuration, we don't check the given value (should we?).
  739          * XXX: shouldn't be done here
  740          */
  741         gv_sd_to_plex(p, s, 0);
  742 
  743         /* Now check if there's already a geom for this plex. */
  744         gp = p->geom;
  745 
  746         /* Yes, there is already a geom, so we just add the consumer. */
  747         if (gp != NULL) {
  748                 cp2 = LIST_FIRST(&gp->consumer);
  749                 /* Need to attach a new consumer to this subdisk. */
  750                 cp = g_new_consumer(gp);
  751                 error = g_attach(cp, pp);
  752                 if (error) {
  753                         printf("geom_vinum: couldn't attach consumer to %s\n",
  754                             pp->name);
  755                         g_destroy_consumer(cp);
  756                         return (NULL);
  757                 }
  758                 /* Adjust the access counts of the new consumer. */
  759                 if ((cp2 != NULL) && (cp2->acr || cp2->acw || cp2->ace)) {
  760                         error = g_access(cp, cp2->acr, cp2->acw, cp2->ace);
  761                         if (error) {
  762                                 printf("geom_vinum: couldn't set access counts"
  763                                     " for consumer on %s\n", pp->name);
  764                                 g_detach(cp);
  765                                 g_destroy_consumer(cp);
  766                                 return (NULL);
  767                         }
  768                 }
  769                 s->consumer = cp;
  770 
  771                 /* Adjust the size of the providers this plex has. */
  772                 LIST_FOREACH(pp2, &gp->provider, provider)
  773                         pp2->mediasize = p->size;
  774 
  775                 /* Update the size of the volume this plex is attached to. */
  776                 if (p->vol_sc != NULL)
  777                         gv_update_vol_size(p->vol_sc, p->size);
  778 
  779                 /*
  780                  * If necessary, create bio queues, queue mutex and a worker
  781                  * thread.
  782                  */
  783                 if (p->bqueue == NULL) {
  784                         p->bqueue = g_malloc(sizeof(struct bio_queue_head),
  785                             M_WAITOK | M_ZERO);
  786                         bioq_init(p->bqueue);
  787                 }
  788                 if (p->wqueue == NULL) {
  789                         p->wqueue = g_malloc(sizeof(struct bio_queue_head),
  790                             M_WAITOK | M_ZERO);
  791                         bioq_init(p->wqueue);
  792                 }
  793                 if (mtx_initialized(&p->bqueue_mtx) == 0)
  794                         mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
  795                 if (!(p->flags & GV_PLEX_THREAD_ACTIVE)) {
  796                         kthread_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
  797                             p->name);
  798                         p->flags |= GV_PLEX_THREAD_ACTIVE;
  799                 }
  800 
  801                 return (NULL);
  802 
  803         /* We need to create a new geom. */
  804         } else {
  805                 gp = g_new_geomf(mp, "%s", p->name);
  806                 gp->start = gv_plex_start;
  807                 gp->orphan = gv_plex_orphan;
  808                 gp->access = gv_plex_access;
  809                 gp->softc = p;
  810                 p->geom = gp;
  811 
  812                 TAILQ_INIT(&p->packets);
  813                 p->bqueue = g_malloc(sizeof(struct bio_queue_head),
  814                     M_WAITOK | M_ZERO);
  815                 bioq_init(p->bqueue);
  816                 p->wqueue = g_malloc(sizeof(struct bio_queue_head),
  817                     M_WAITOK | M_ZERO);
  818                 bioq_init(p->wqueue);
  819                 mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
  820                 kthread_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
  821                     p->name);
  822                 p->flags |= GV_PLEX_THREAD_ACTIVE;
  823 
  824                 /* Attach a consumer to this provider. */
  825                 cp = g_new_consumer(gp);
  826                 g_attach(cp, pp);
  827                 s->consumer = cp;
  828 
  829                 /* Create a provider for the outside world. */
  830                 pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
  831                 pp2->mediasize = p->size;
  832                 pp2->sectorsize = pp->sectorsize;
  833                 p->provider = pp2;
  834                 g_error_provider(pp2, 0);
  835                 return (gp);
  836         }
  837 }
  838 
  839 static int
  840 gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
  841     struct g_geom *gp)
  842 {
  843         struct gv_plex *p;
  844 
  845         g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
  846         g_topology_assert();
  847 
  848         p = gp->softc;
  849 
  850         KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
  851 
  852         /*
  853          * If this is a RAID5 plex, check if its worker thread is still active
  854          * and signal it to self destruct.
  855          */
  856         gv_kill_plex_thread(p);
  857         /* g_free(sc); */
  858         g_wither_geom(gp, ENXIO);
  859         return (0);
  860 }
  861 
  862 #define VINUMPLEX_CLASS_NAME "VINUMPLEX"
  863 
  864 static struct g_class g_vinum_plex_class = {
  865         .name = VINUMPLEX_CLASS_NAME,
  866         .version = G_VERSION,
  867         .taste = gv_plex_taste,
  868         .destroy_geom = gv_plex_destroy_geom,
  869 };
  870 
  871 DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);

Cache object: f66399d9a831328d40dac5d75f3fa2bf


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.