The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/geom_ccd.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: (BSD-2-Clause-NetBSD AND BSD-3-Clause)
    3  *
    4  * Copyright (c) 2003 Poul-Henning Kamp.
    5  * Copyright (c) 1996, 1997 The NetBSD Foundation, Inc.
    6  * All rights reserved.
    7  *
    8  * This code is derived from software contributed to The NetBSD Foundation
    9  * by Jason R. Thorpe.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGE.
   31  *
   32  * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 
   33  */
   34 
   35 /*-
   36  * Copyright (c) 1988 University of Utah.
   37  * Copyright (c) 1990, 1993
   38  *      The Regents of the University of California.  All rights reserved.
   39  *
   40  * This code is derived from software contributed to Berkeley by
   41  * the Systems Programming Group of the University of Utah Computer
   42  * Science Department.
   43  *
   44  * Redistribution and use in source and binary forms, with or without
   45  * modification, are permitted provided that the following conditions
   46  * are met:
   47  * 1. Redistributions of source code must retain the above copyright
   48  *    notice, this list of conditions and the following disclaimer.
   49  * 2. Redistributions in binary form must reproduce the above copyright
   50  *    notice, this list of conditions and the following disclaimer in the
   51  *    documentation and/or other materials provided with the distribution.
   52  * 3. Neither the name of the University nor the names of its contributors
   53  *    may be used to endorse or promote products derived from this software
   54  *    without specific prior written permission.
   55  *
   56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   66  * SUCH DAMAGE.
   67  *
   68  * from: Utah $Hdr: cd.c 1.6 90/11/28$
   69  *
   70  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
   71  */
   72 
   73 /*
   74  * Dynamic configuration and disklabel support by:
   75  *      Jason R. Thorpe <thorpej@nas.nasa.gov>
   76  *      Numerical Aerodynamic Simulation Facility
   77  *      Mail Stop 258-6
   78  *      NASA Ames Research Center
   79  *      Moffett Field, CA 94035
   80  */
   81 
   82 #include <sys/cdefs.h>
   83 __FBSDID("$FreeBSD: releng/12.0/sys/geom/geom_ccd.c 332387 2018-04-10 19:18:16Z kevans $");
   84 
   85 #include <sys/param.h>
   86 #include <sys/systm.h>
   87 #include <sys/kernel.h>
   88 #include <sys/module.h>
   89 #include <sys/bio.h>
   90 #include <sys/malloc.h>
   91 #include <sys/sbuf.h>
   92 #include <geom/geom.h>
   93 
   94 /*
   95  * Number of blocks to untouched in front of a component partition.
   96  * This is to avoid violating its disklabel area when it starts at the
   97  * beginning of the slice.
   98  */
   99 #if !defined(CCD_OFFSET)
  100 #define CCD_OFFSET 16
  101 #endif
  102 
  103 /* sc_flags */
  104 #define CCDF_UNIFORM    0x02    /* use LCCD of sizes for uniform interleave */
  105 #define CCDF_MIRROR     0x04    /* use mirroring */
  106 #define CCDF_NO_OFFSET  0x08    /* do not leave space in front */
  107 #define CCDF_LINUX      0x10    /* use Linux compatibility mode */
  108 
  109 /* Mask of user-settable ccd flags. */
  110 #define CCDF_USERMASK   (CCDF_UNIFORM|CCDF_MIRROR)
  111 
  112 /*
  113  * Interleave description table.
  114  * Computed at boot time to speed irregular-interleave lookups.
  115  * The idea is that we interleave in "groups".  First we interleave
  116  * evenly over all component disks up to the size of the smallest
  117  * component (the first group), then we interleave evenly over all
  118  * remaining disks up to the size of the next-smallest (second group),
  119  * and so on.
  120  *
  121  * Each table entry describes the interleave characteristics of one
  122  * of these groups.  For example if a concatenated disk consisted of
  123  * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
  124  * DEV_BSIZE (1), the table would have three entries:
  125  *
  126  *      ndisk   startblk        startoff        dev
  127  *      3       0               0               0, 1, 2
  128  *      2       9               3               0, 2
  129  *      1       13              5               2
  130  *      0       -               -               -
  131  *
  132  * which says that the first nine blocks (0-8) are interleaved over
  133  * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
  134  * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
  135  * at component block 3, and the remaining blocks (13-14) are on disk
  136  * 2 starting at offset 5.
  137  */
  138 struct ccdiinfo {
  139         int     ii_ndisk;       /* # of disks range is interleaved over */
  140         daddr_t ii_startblk;    /* starting scaled block # for range */
  141         daddr_t ii_startoff;    /* starting component offset (block #) */
  142         int     *ii_index;      /* ordered list of components in range */
  143 };
  144 
  145 /*
  146  * Component info table.
  147  * Describes a single component of a concatenated disk.
  148  */
  149 struct ccdcinfo {
  150         daddr_t         ci_size;                /* size */
  151         struct g_provider *ci_provider;         /* provider */
  152         struct g_consumer *ci_consumer;         /* consumer */
  153 };
  154 
  155 /*
  156  * A concatenated disk is described by this structure.
  157  */
  158 
  159 struct ccd_s {
  160         LIST_ENTRY(ccd_s) list;
  161 
  162         int              sc_unit;               /* logical unit number */
  163         int              sc_flags;              /* flags */
  164         daddr_t          sc_size;               /* size of ccd */
  165         int              sc_ileave;             /* interleave */
  166         u_int            sc_ndisks;             /* number of components */
  167         struct ccdcinfo  *sc_cinfo;             /* component info */
  168         struct ccdiinfo  *sc_itable;            /* interleave table */
  169         u_int32_t        sc_secsize;            /* # bytes per sector */
  170         int              sc_pick;               /* side of mirror picked */
  171         daddr_t          sc_blk[2];             /* mirror localization */
  172         u_int32_t        sc_offset;             /* actual offset used */
  173 };
  174 
  175 static g_start_t g_ccd_start;
  176 static void ccdiodone(struct bio *bp);
  177 static void ccdinterleave(struct ccd_s *);
  178 static int ccdinit(struct gctl_req *req, struct ccd_s *);
  179 static int ccdbuffer(struct bio **ret, struct ccd_s *,
  180                       struct bio *, daddr_t, caddr_t, long);
  181 
  182 static void
  183 g_ccd_orphan(struct g_consumer *cp)
  184 {
  185         /*
  186          * XXX: We don't do anything here.  It is not obvious
  187          * XXX: what DTRT would be, so we do what the previous
  188          * XXX: code did: ignore it and let the user cope.
  189          */
  190 }
  191 
  192 static int
  193 g_ccd_access(struct g_provider *pp, int dr, int dw, int de)
  194 {
  195         struct g_geom *gp;
  196         struct g_consumer *cp1, *cp2;
  197         int error;
  198 
  199         de += dr;
  200         de += dw;
  201 
  202         gp = pp->geom;
  203         error = ENXIO;
  204         LIST_FOREACH(cp1, &gp->consumer, consumer) {
  205                 error = g_access(cp1, dr, dw, de);
  206                 if (error) {
  207                         LIST_FOREACH(cp2, &gp->consumer, consumer) {
  208                                 if (cp1 == cp2)
  209                                         break;
  210                                 g_access(cp2, -dr, -dw, -de);
  211                         }
  212                         break;
  213                 }
  214         }
  215         return (error);
  216 }
  217 
  218 /*
  219  * Free the softc and its substructures.
  220  */
  221 static void
  222 g_ccd_freesc(struct ccd_s *sc)
  223 {
  224         struct ccdiinfo *ii;
  225 
  226         g_free(sc->sc_cinfo);
  227         if (sc->sc_itable != NULL) {
  228                 for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
  229                         if (ii->ii_index != NULL)
  230                                 g_free(ii->ii_index);
  231                 g_free(sc->sc_itable);
  232         }
  233         g_free(sc);
  234 }
  235 
  236 
  237 static int
  238 ccdinit(struct gctl_req *req, struct ccd_s *cs)
  239 {
  240         struct ccdcinfo *ci;
  241         daddr_t size;
  242         int ix;
  243         daddr_t minsize;
  244         int maxsecsize;
  245         off_t mediasize;
  246         u_int sectorsize;
  247 
  248         cs->sc_size = 0;
  249 
  250         maxsecsize = 0;
  251         minsize = 0;
  252 
  253         if (cs->sc_flags & CCDF_LINUX) {
  254                 cs->sc_offset = 0;
  255                 cs->sc_ileave *= 2;
  256                 if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2)
  257                         gctl_error(req, "Mirror mode for Linux raids is "
  258                                         "only supported with 2 devices");
  259         } else {
  260                 if (cs->sc_flags & CCDF_NO_OFFSET)
  261                         cs->sc_offset = 0;
  262                 else
  263                         cs->sc_offset = CCD_OFFSET;
  264 
  265         }
  266         for (ix = 0; ix < cs->sc_ndisks; ix++) {
  267                 ci = &cs->sc_cinfo[ix];
  268 
  269                 mediasize = ci->ci_provider->mediasize;
  270                 sectorsize = ci->ci_provider->sectorsize;
  271                 if (sectorsize > maxsecsize)
  272                         maxsecsize = sectorsize;
  273                 size = mediasize / DEV_BSIZE - cs->sc_offset;
  274 
  275                 /* Truncate to interleave boundary */
  276 
  277                 if (cs->sc_ileave > 1)
  278                         size -= size % cs->sc_ileave;
  279 
  280                 if (size == 0) {
  281                         gctl_error(req, "Component %s has effective size zero",
  282                             ci->ci_provider->name);
  283                         return(ENODEV);
  284                 }
  285 
  286                 if (minsize == 0 || size < minsize)
  287                         minsize = size;
  288                 ci->ci_size = size;
  289                 cs->sc_size += size;
  290         }
  291 
  292         /*
  293          * Don't allow the interleave to be smaller than
  294          * the biggest component sector.
  295          */
  296         if ((cs->sc_ileave > 0) &&
  297             (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
  298                 gctl_error(req, "Interleave to small for sector size");
  299                 return(EINVAL);
  300         }
  301 
  302         /*
  303          * If uniform interleave is desired set all sizes to that of
  304          * the smallest component.  This will guarantee that a single
  305          * interleave table is generated.
  306          *
  307          * Lost space must be taken into account when calculating the
  308          * overall size.  Half the space is lost when CCDF_MIRROR is
  309          * specified.
  310          */
  311         if (cs->sc_flags & CCDF_UNIFORM) {
  312                 for (ix = 0; ix < cs->sc_ndisks; ix++) {
  313                         ci = &cs->sc_cinfo[ix];
  314                         ci->ci_size = minsize;
  315                 }
  316                 cs->sc_size = cs->sc_ndisks * minsize;
  317         }
  318 
  319         if (cs->sc_flags & CCDF_MIRROR) {
  320                 /*
  321                  * Check to see if an even number of components
  322                  * have been specified.  The interleave must also
  323                  * be non-zero in order for us to be able to 
  324                  * guarantee the topology.
  325                  */
  326                 if (cs->sc_ndisks % 2) {
  327                         gctl_error(req,
  328                               "Mirroring requires an even number of disks");
  329                         return(EINVAL);
  330                 }
  331                 if (cs->sc_ileave == 0) {
  332                         gctl_error(req,
  333                              "An interleave must be specified when mirroring");
  334                         return(EINVAL);
  335                 }
  336                 cs->sc_size = (cs->sc_ndisks/2) * minsize;
  337         } 
  338 
  339         /*
  340          * Construct the interleave table.
  341          */
  342         ccdinterleave(cs);
  343 
  344         /*
  345          * Create pseudo-geometry based on 1MB cylinders.  It's
  346          * pretty close.
  347          */
  348         cs->sc_secsize = maxsecsize;
  349 
  350         return (0);
  351 }
  352 
  353 static void
  354 ccdinterleave(struct ccd_s *cs)
  355 {
  356         struct ccdcinfo *ci, *smallci;
  357         struct ccdiinfo *ii;
  358         daddr_t bn, lbn;
  359         int ix;
  360         daddr_t size;
  361 
  362 
  363         /*
  364          * Allocate an interleave table.  The worst case occurs when each
  365          * of N disks is of a different size, resulting in N interleave
  366          * tables.
  367          *
  368          * Chances are this is too big, but we don't care.
  369          */
  370         size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
  371         cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
  372 
  373         /*
  374          * Trivial case: no interleave (actually interleave of disk size).
  375          * Each table entry represents a single component in its entirety.
  376          *
  377          * An interleave of 0 may not be used with a mirror setup.
  378          */
  379         if (cs->sc_ileave == 0) {
  380                 bn = 0;
  381                 ii = cs->sc_itable;
  382 
  383                 for (ix = 0; ix < cs->sc_ndisks; ix++) {
  384                         /* Allocate space for ii_index. */
  385                         ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
  386                         ii->ii_ndisk = 1;
  387                         ii->ii_startblk = bn;
  388                         ii->ii_startoff = 0;
  389                         ii->ii_index[0] = ix;
  390                         bn += cs->sc_cinfo[ix].ci_size;
  391                         ii++;
  392                 }
  393                 ii->ii_ndisk = 0;
  394                 return;
  395         }
  396 
  397         /*
  398          * The following isn't fast or pretty; it doesn't have to be.
  399          */
  400         size = 0;
  401         bn = lbn = 0;
  402         for (ii = cs->sc_itable; ; ii++) {
  403                 /*
  404                  * Allocate space for ii_index.  We might allocate more then
  405                  * we use.
  406                  */
  407                 ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
  408                     M_WAITOK);
  409 
  410                 /*
  411                  * Locate the smallest of the remaining components
  412                  */
  413                 smallci = NULL;
  414                 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks]; 
  415                     ci++) {
  416                         if (ci->ci_size > size &&
  417                             (smallci == NULL ||
  418                              ci->ci_size < smallci->ci_size)) {
  419                                 smallci = ci;
  420                         }
  421                 }
  422 
  423                 /*
  424                  * Nobody left, all done
  425                  */
  426                 if (smallci == NULL) {
  427                         ii->ii_ndisk = 0;
  428                         g_free(ii->ii_index);
  429                         ii->ii_index = NULL;
  430                         break;
  431                 }
  432 
  433                 /*
  434                  * Record starting logical block using an sc_ileave blocksize.
  435                  */
  436                 ii->ii_startblk = bn / cs->sc_ileave;
  437 
  438                 /*
  439                  * Record starting component block using an sc_ileave 
  440                  * blocksize.  This value is relative to the beginning of
  441                  * a component disk.
  442                  */
  443                 ii->ii_startoff = lbn;
  444 
  445                 /*
  446                  * Determine how many disks take part in this interleave
  447                  * and record their indices.
  448                  */
  449                 ix = 0;
  450                 for (ci = cs->sc_cinfo; 
  451                     ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
  452                         if (ci->ci_size >= smallci->ci_size) {
  453                                 ii->ii_index[ix++] = ci - cs->sc_cinfo;
  454                         }
  455                 }
  456                 ii->ii_ndisk = ix;
  457                 bn += ix * (smallci->ci_size - size);
  458                 lbn = smallci->ci_size / cs->sc_ileave;
  459                 size = smallci->ci_size;
  460         }
  461 }
  462 
  463 static void
  464 g_ccd_start(struct bio *bp)
  465 {
  466         long bcount, rcount;
  467         struct bio *cbp[2];
  468         caddr_t addr;
  469         daddr_t bn;
  470         int err;
  471         struct ccd_s *cs;
  472 
  473         cs = bp->bio_to->geom->softc;
  474 
  475         /*
  476          * Block all GETATTR requests, we wouldn't know which of our
  477          * subdevices we should ship it off to.
  478          * XXX: this may not be the right policy.
  479          */
  480         if(bp->bio_cmd == BIO_GETATTR) {
  481                 g_io_deliver(bp, EINVAL);
  482                 return;
  483         }
  484 
  485         /*
  486          * Translate the partition-relative block number to an absolute.
  487          */
  488         bn = bp->bio_offset / cs->sc_secsize;
  489 
  490         /*
  491          * Allocate component buffers and fire off the requests
  492          */
  493         addr = bp->bio_data;
  494         for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
  495                 err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
  496                 if (err) {
  497                         bp->bio_completed += bcount;
  498                         if (bp->bio_error == 0)
  499                                 bp->bio_error = err;
  500                         if (bp->bio_completed == bp->bio_length)
  501                                 g_io_deliver(bp, bp->bio_error);
  502                         return;
  503                 }
  504                 rcount = cbp[0]->bio_length;
  505 
  506                 if (cs->sc_flags & CCDF_MIRROR) {
  507                         /*
  508                          * Mirroring.  Writes go to both disks, reads are
  509                          * taken from whichever disk seems most appropriate.
  510                          *
  511                          * We attempt to localize reads to the disk whos arm
  512                          * is nearest the read request.  We ignore seeks due
  513                          * to writes when making this determination and we
  514                          * also try to avoid hogging.
  515                          */
  516                         if (cbp[0]->bio_cmd != BIO_READ) {
  517                                 g_io_request(cbp[0], cbp[0]->bio_from);
  518                                 g_io_request(cbp[1], cbp[1]->bio_from);
  519                         } else {
  520                                 int pick = cs->sc_pick;
  521                                 daddr_t range = cs->sc_size / 16;
  522 
  523                                 if (bn < cs->sc_blk[pick] - range ||
  524                                     bn > cs->sc_blk[pick] + range
  525                                 ) {
  526                                         cs->sc_pick = pick = 1 - pick;
  527                                 }
  528                                 cs->sc_blk[pick] = bn + btodb(rcount);
  529                                 g_io_request(cbp[pick], cbp[pick]->bio_from);
  530                         }
  531                 } else {
  532                         /*
  533                          * Not mirroring
  534                          */
  535                         g_io_request(cbp[0], cbp[0]->bio_from);
  536                 }
  537                 bn += btodb(rcount);
  538                 addr += rcount;
  539         }
  540 }
  541 
  542 /*
  543  * Build a component buffer header.
  544  */
  545 static int
  546 ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
  547 {
  548         struct ccdcinfo *ci, *ci2 = NULL;
  549         struct bio *cbp;
  550         daddr_t cbn, cboff;
  551         off_t cbc;
  552 
  553         /*
  554          * Determine which component bn falls in.
  555          */
  556         cbn = bn;
  557         cboff = 0;
  558 
  559         if (cs->sc_ileave == 0) {
  560                 /*
  561                  * Serially concatenated and neither a mirror nor a parity
  562                  * config.  This is a special case.
  563                  */
  564                 daddr_t sblk;
  565 
  566                 sblk = 0;
  567                 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
  568                         sblk += ci->ci_size;
  569                 cbn -= sblk;
  570         } else {
  571                 struct ccdiinfo *ii;
  572                 int ccdisk, off;
  573 
  574                 /*
  575                  * Calculate cbn, the logical superblock (sc_ileave chunks),
  576                  * and cboff, a normal block offset (DEV_BSIZE chunks) relative
  577                  * to cbn.
  578                  */
  579                 cboff = cbn % cs->sc_ileave;    /* DEV_BSIZE gran */
  580                 cbn = cbn / cs->sc_ileave;      /* DEV_BSIZE * ileave gran */
  581 
  582                 /*
  583                  * Figure out which interleave table to use.
  584                  */
  585                 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
  586                         if (ii->ii_startblk > cbn)
  587                                 break;
  588                 }
  589                 ii--;
  590 
  591                 /*
  592                  * off is the logical superblock relative to the beginning 
  593                  * of this interleave block.  
  594                  */
  595                 off = cbn - ii->ii_startblk;
  596 
  597                 /*
  598                  * We must calculate which disk component to use (ccdisk),
  599                  * and recalculate cbn to be the superblock relative to
  600                  * the beginning of the component.  This is typically done by
  601                  * adding 'off' and ii->ii_startoff together.  However, 'off'
  602                  * must typically be divided by the number of components in
  603                  * this interleave array to be properly convert it from a
  604                  * CCD-relative logical superblock number to a 
  605                  * component-relative superblock number.
  606                  */
  607                 if (ii->ii_ndisk == 1) {
  608                         /*
  609                          * When we have just one disk, it can't be a mirror
  610                          * or a parity config.
  611                          */
  612                         ccdisk = ii->ii_index[0];
  613                         cbn = ii->ii_startoff + off;
  614                 } else {
  615                         if (cs->sc_flags & CCDF_MIRROR) {
  616                                 /*
  617                                  * We have forced a uniform mapping, resulting
  618                                  * in a single interleave array.  We double
  619                                  * up on the first half of the available
  620                                  * components and our mirror is in the second
  621                                  * half.  This only works with a single 
  622                                  * interleave array because doubling up
  623                                  * doubles the number of sectors, so there
  624                                  * cannot be another interleave array because
  625                                  * the next interleave array's calculations
  626                                  * would be off.
  627                                  */
  628                                 int ndisk2 = ii->ii_ndisk / 2;
  629                                 ccdisk = ii->ii_index[off % ndisk2];
  630                                 cbn = ii->ii_startoff + off / ndisk2;
  631                                 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
  632                         } else {
  633                                 ccdisk = ii->ii_index[off % ii->ii_ndisk];
  634                                 cbn = ii->ii_startoff + off / ii->ii_ndisk;
  635                         }
  636                 }
  637 
  638                 ci = &cs->sc_cinfo[ccdisk];
  639 
  640                 /*
  641                  * Convert cbn from a superblock to a normal block so it
  642                  * can be used to calculate (along with cboff) the normal
  643                  * block index into this particular disk.
  644                  */
  645                 cbn *= cs->sc_ileave;
  646         }
  647 
  648         /*
  649          * Fill in the component buf structure.
  650          */
  651         cbp = g_clone_bio(bp);
  652         if (cbp == NULL)
  653                 return (ENOMEM);
  654         cbp->bio_done = g_std_done;
  655         cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset);
  656         cbp->bio_data = addr;
  657         if (cs->sc_ileave == 0)
  658               cbc = dbtob((off_t)(ci->ci_size - cbn));
  659         else
  660               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
  661         cbp->bio_length = (cbc < bcount) ? cbc : bcount;
  662 
  663         cbp->bio_from = ci->ci_consumer;
  664         cb[0] = cbp;
  665 
  666         if (cs->sc_flags & CCDF_MIRROR) {
  667                 cbp = g_clone_bio(bp);
  668                 if (cbp == NULL)
  669                         return (ENOMEM);
  670                 cbp->bio_done = cb[0]->bio_done = ccdiodone;
  671                 cbp->bio_offset = cb[0]->bio_offset;
  672                 cbp->bio_data = cb[0]->bio_data;
  673                 cbp->bio_length = cb[0]->bio_length;
  674                 cbp->bio_from = ci2->ci_consumer;
  675                 cbp->bio_caller1 = cb[0];
  676                 cb[0]->bio_caller1 = cbp;
  677                 cb[1] = cbp;
  678         }
  679         return (0);
  680 }
  681 
  682 /*
  683  * Called only for mirrored operations.
  684  */
  685 static void
  686 ccdiodone(struct bio *cbp)
  687 {
  688         struct bio *mbp, *pbp;
  689 
  690         mbp = cbp->bio_caller1;
  691         pbp = cbp->bio_parent;
  692 
  693         if (pbp->bio_cmd == BIO_READ) {
  694                 if (cbp->bio_error == 0) {
  695                         /* We will not be needing the partner bio */
  696                         if (mbp != NULL) {
  697                                 pbp->bio_inbed++;
  698                                 g_destroy_bio(mbp);
  699                         }
  700                         g_std_done(cbp);
  701                         return;
  702                 }
  703                 if (mbp != NULL) {
  704                         /* Try partner the bio instead */
  705                         mbp->bio_caller1 = NULL;
  706                         pbp->bio_inbed++;
  707                         g_destroy_bio(cbp);
  708                         g_io_request(mbp, mbp->bio_from);
  709                         /*
  710                          * XXX: If this comes back OK, we should actually
  711                          * try to write the good data on the failed mirror
  712                          */
  713                         return;
  714                 }
  715                 g_std_done(cbp);
  716                 return;
  717         }
  718         if (mbp != NULL) {
  719                 mbp->bio_caller1 = NULL;
  720                 pbp->bio_inbed++;
  721                 if (cbp->bio_error != 0 && pbp->bio_error == 0)
  722                         pbp->bio_error = cbp->bio_error;
  723                 g_destroy_bio(cbp);
  724                 return;
  725         }
  726         g_std_done(cbp);
  727 }
  728 
  729 static void
  730 g_ccd_create(struct gctl_req *req, struct g_class *mp)
  731 {
  732         int *unit, *ileave, *nprovider;
  733         struct g_geom *gp;
  734         struct g_consumer *cp;
  735         struct g_provider *pp;
  736         struct ccd_s *sc;
  737         struct sbuf *sb;
  738         char buf[20];
  739         int i, error;
  740 
  741         g_topology_assert();
  742         unit = gctl_get_paraml(req, "unit", sizeof (*unit));
  743         if (unit == NULL) {
  744                 gctl_error(req, "unit parameter not given");
  745                 return;
  746         }
  747         ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
  748         if (ileave == NULL) {
  749                 gctl_error(req, "ileave parameter not given");
  750                 return;
  751         }
  752         nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
  753         if (nprovider == NULL) {
  754                 gctl_error(req, "nprovider parameter not given");
  755                 return;
  756         }
  757 
  758         /* Check for duplicate unit */
  759         LIST_FOREACH(gp, &mp->geom, geom) {
  760                 sc = gp->softc;
  761                 if (sc != NULL && sc->sc_unit == *unit) {
  762                         gctl_error(req, "Unit %d already configured", *unit);
  763                         return;
  764                 }
  765         }
  766 
  767         if (*nprovider <= 0) {
  768                 gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
  769                 return;
  770         }
  771 
  772         /* Check all providers are valid */
  773         for (i = 0; i < *nprovider; i++) {
  774                 sprintf(buf, "provider%d", i);
  775                 pp = gctl_get_provider(req, buf);
  776                 if (pp == NULL)
  777                         return;
  778         }
  779 
  780         gp = g_new_geomf(mp, "ccd%d", *unit);
  781         sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
  782         gp->softc = sc;
  783         sc->sc_ndisks = *nprovider;
  784 
  785         /* Allocate space for the component info. */
  786         sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
  787             M_WAITOK | M_ZERO);
  788 
  789         /* Create consumers and attach to all providers */
  790         for (i = 0; i < *nprovider; i++) {
  791                 sprintf(buf, "provider%d", i);
  792                 pp = gctl_get_provider(req, buf);
  793                 cp = g_new_consumer(gp);
  794                 error = g_attach(cp, pp);
  795                 KASSERT(error == 0, ("attach to %s failed", pp->name));
  796                 sc->sc_cinfo[i].ci_consumer = cp;
  797                 sc->sc_cinfo[i].ci_provider = pp;
  798         }
  799 
  800         sc->sc_unit = *unit;
  801         sc->sc_ileave = *ileave;
  802 
  803         if (gctl_get_param(req, "no_offset", NULL))
  804                 sc->sc_flags |= CCDF_NO_OFFSET;
  805         if (gctl_get_param(req, "linux", NULL))
  806                 sc->sc_flags |= CCDF_LINUX;
  807 
  808         if (gctl_get_param(req, "uniform", NULL))
  809                 sc->sc_flags |= CCDF_UNIFORM;
  810         if (gctl_get_param(req, "mirror", NULL))
  811                 sc->sc_flags |= CCDF_MIRROR;
  812 
  813         if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
  814                 printf("%s: disabling mirror, interleave is 0\n", gp->name);
  815                 sc->sc_flags &= ~(CCDF_MIRROR);
  816         }
  817 
  818         if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
  819                 printf("%s: mirror/parity forces uniform flag\n", gp->name);
  820                 sc->sc_flags |= CCDF_UNIFORM;
  821         }
  822 
  823         error = ccdinit(req, sc);
  824         if (error != 0) {
  825                 g_ccd_freesc(sc);
  826                 gp->softc = NULL;
  827                 g_wither_geom(gp, ENXIO);
  828                 return;
  829         }
  830 
  831         pp = g_new_providerf(gp, "%s", gp->name);
  832         pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
  833         pp->sectorsize = sc->sc_secsize;
  834         g_error_provider(pp, 0);
  835 
  836         sb = sbuf_new_auto();
  837         sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
  838         for (i = 0; i < *nprovider; i++) {
  839                 sbuf_printf(sb, "%s%s",
  840                     i == 0 ? "(" : ", ", 
  841                     sc->sc_cinfo[i].ci_provider->name);
  842         }
  843         sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
  844         if (sc->sc_ileave != 0)
  845                 sbuf_printf(sb, "interleaved at %d blocks\n",
  846                         sc->sc_ileave);
  847         else
  848                 sbuf_printf(sb, "concatenated\n");
  849         sbuf_finish(sb);
  850         gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
  851         sbuf_delete(sb);
  852 }
  853 
  854 static int
  855 g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
  856 {
  857         struct g_provider *pp;
  858         struct ccd_s *sc;
  859 
  860         g_topology_assert();
  861         sc = gp->softc;
  862         pp = LIST_FIRST(&gp->provider);
  863         if (sc == NULL || pp == NULL)
  864                 return (EBUSY);
  865         if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
  866                 gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
  867                     pp->acr, pp->acw, pp->ace);
  868                 return (EBUSY);
  869         }
  870         g_ccd_freesc(sc);
  871         gp->softc = NULL;
  872         g_wither_geom(gp, ENXIO);
  873         return (0);
  874 }
  875 
  876 static void
  877 g_ccd_list(struct gctl_req *req, struct g_class *mp)
  878 {
  879         struct sbuf *sb;
  880         struct ccd_s *cs;
  881         struct g_geom *gp;
  882         int i, unit, *up;
  883 
  884         up = gctl_get_paraml(req, "unit", sizeof (*up));
  885         if (up == NULL) {
  886                 gctl_error(req, "unit parameter not given");
  887                 return;
  888         }
  889         unit = *up;
  890         sb = sbuf_new_auto();
  891         LIST_FOREACH(gp, &mp->geom, geom) {
  892                 cs = gp->softc;
  893                 if (cs == NULL || (unit >= 0 && unit != cs->sc_unit))
  894                         continue;
  895                 sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
  896                     cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
  897                         
  898                 for (i = 0; i < cs->sc_ndisks; ++i) {
  899                         sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
  900                             cs->sc_cinfo[i].ci_provider->name);
  901                 }
  902                 sbuf_printf(sb, "\n");
  903         }
  904         sbuf_finish(sb);
  905         gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
  906         sbuf_delete(sb);
  907 }
  908 
  909 static void
  910 g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
  911 {
  912         struct g_geom *gp;
  913 
  914         g_topology_assert();
  915         if (!strcmp(verb, "create geom")) {
  916                 g_ccd_create(req, mp);
  917         } else if (!strcmp(verb, "destroy geom")) {
  918                 gp = gctl_get_geom(req, mp, "geom");
  919                 if (gp != NULL)
  920                 g_ccd_destroy_geom(req, mp, gp);
  921         } else if (!strcmp(verb, "list")) {
  922                 g_ccd_list(req, mp);
  923         } else {
  924                 gctl_error(req, "unknown verb");
  925         }
  926 }
  927 
  928 static struct g_class g_ccd_class = {
  929         .name = "CCD",
  930         .version = G_VERSION,
  931         .ctlreq = g_ccd_config,
  932         .destroy_geom = g_ccd_destroy_geom,
  933         .start = g_ccd_start,
  934         .orphan = g_ccd_orphan,
  935         .access = g_ccd_access,
  936 };
  937 
  938 DECLARE_GEOM_CLASS(g_ccd_class, g_ccd);
  939 MODULE_VERSION(geom_ccd, 0);

Cache object: c93871028fe89a21e3dea59de72dd34d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.