The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/geom_ccd.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2003 Poul-Henning Kamp.
    3  * Copyright (c) 1995 Jason R. Thorpe.
    4  * Copyright (c) 1990, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  * All rights reserved.
    7  * Copyright (c) 1988 University of Utah.
    8  *
    9  * This code is derived from software contributed to Berkeley by
   10  * the Systems Programming Group of the University of Utah Computer
   11  * Science Department.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed for the NetBSD Project
   24  *      by Jason R. Thorpe.
   25  * 4. The names of the authors may not be used to endorse or promote products
   26  *    derived from this software without specific prior written permission.
   27  *
   28  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
   29  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   30  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
   31  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
   32  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   33  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   34  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   35  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   36  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   38  * SUCH DAMAGE.
   39  *
   40  * Dynamic configuration and disklabel support by:
   41  *      Jason R. Thorpe <thorpej@nas.nasa.gov>
   42  *      Numerical Aerodynamic Simulation Facility
   43  *      Mail Stop 258-6
   44  *      NASA Ames Research Center
   45  *      Moffett Field, CA 94035
   46  *
   47  * from: Utah $Hdr: cd.c 1.6 90/11/28$
   48  *      @(#)cd.c        8.2 (Berkeley) 11/16/93
   49  *      $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 
   50  */
   51 
   52 #include <sys/cdefs.h>
   53 __FBSDID("$FreeBSD: releng/10.0/sys/geom/geom_ccd.c 223921 2011-07-11 05:22:31Z ae $");
   54 
   55 #include <sys/param.h>
   56 #include <sys/systm.h>
   57 #include <sys/kernel.h>
   58 #include <sys/module.h>
   59 #include <sys/bio.h>
   60 #include <sys/malloc.h>
   61 #include <sys/sbuf.h>
   62 #include <geom/geom.h>
   63 
   64 /*
   65  * Number of blocks to untouched in front of a component partition.
   66  * This is to avoid violating its disklabel area when it starts at the
   67  * beginning of the slice.
   68  */
   69 #if !defined(CCD_OFFSET)
   70 #define CCD_OFFSET 16
   71 #endif
   72 
   73 /* sc_flags */
   74 #define CCDF_UNIFORM    0x02    /* use LCCD of sizes for uniform interleave */
   75 #define CCDF_MIRROR     0x04    /* use mirroring */
   76 #define CCDF_NO_OFFSET  0x08    /* do not leave space in front */
   77 #define CCDF_LINUX      0x10    /* use Linux compatibility mode */
   78 
   79 /* Mask of user-settable ccd flags. */
   80 #define CCDF_USERMASK   (CCDF_UNIFORM|CCDF_MIRROR)
   81 
   82 /*
   83  * Interleave description table.
   84  * Computed at boot time to speed irregular-interleave lookups.
   85  * The idea is that we interleave in "groups".  First we interleave
   86  * evenly over all component disks up to the size of the smallest
   87  * component (the first group), then we interleave evenly over all
   88  * remaining disks up to the size of the next-smallest (second group),
   89  * and so on.
   90  *
   91  * Each table entry describes the interleave characteristics of one
   92  * of these groups.  For example if a concatenated disk consisted of
   93  * three components of 5, 3, and 7 DEV_BSIZE blocks interleaved at
   94  * DEV_BSIZE (1), the table would have three entries:
   95  *
   96  *      ndisk   startblk        startoff        dev
   97  *      3       0               0               0, 1, 2
   98  *      2       9               3               0, 2
   99  *      1       13              5               2
  100  *      0       -               -               -
  101  *
  102  * which says that the first nine blocks (0-8) are interleaved over
  103  * 3 disks (0, 1, 2) starting at block offset 0 on any component disk,
  104  * the next 4 blocks (9-12) are interleaved over 2 disks (0, 2) starting
  105  * at component block 3, and the remaining blocks (13-14) are on disk
  106  * 2 starting at offset 5.
  107  */
  108 struct ccdiinfo {
  109         int     ii_ndisk;       /* # of disks range is interleaved over */
  110         daddr_t ii_startblk;    /* starting scaled block # for range */
  111         daddr_t ii_startoff;    /* starting component offset (block #) */
  112         int     *ii_index;      /* ordered list of components in range */
  113 };
  114 
  115 /*
  116  * Component info table.
  117  * Describes a single component of a concatenated disk.
  118  */
  119 struct ccdcinfo {
  120         daddr_t         ci_size;                /* size */
  121         struct g_provider *ci_provider;         /* provider */
  122         struct g_consumer *ci_consumer;         /* consumer */
  123 };
  124 
  125 /*
  126  * A concatenated disk is described by this structure.
  127  */
  128 
  129 struct ccd_s {
  130         LIST_ENTRY(ccd_s) list;
  131 
  132         int              sc_unit;               /* logical unit number */
  133         int              sc_flags;              /* flags */
  134         daddr_t          sc_size;               /* size of ccd */
  135         int              sc_ileave;             /* interleave */
  136         u_int            sc_ndisks;             /* number of components */
  137         struct ccdcinfo  *sc_cinfo;             /* component info */
  138         struct ccdiinfo  *sc_itable;            /* interleave table */
  139         u_int32_t        sc_secsize;            /* # bytes per sector */
  140         int              sc_pick;               /* side of mirror picked */
  141         daddr_t          sc_blk[2];             /* mirror localization */
  142         u_int32_t        sc_offset;             /* actual offset used */
  143 };
  144 
  145 static g_start_t g_ccd_start;
  146 static void ccdiodone(struct bio *bp);
  147 static void ccdinterleave(struct ccd_s *);
  148 static int ccdinit(struct gctl_req *req, struct ccd_s *);
  149 static int ccdbuffer(struct bio **ret, struct ccd_s *,
  150                       struct bio *, daddr_t, caddr_t, long);
  151 
  152 static void
  153 g_ccd_orphan(struct g_consumer *cp)
  154 {
  155         /*
  156          * XXX: We don't do anything here.  It is not obvious
  157          * XXX: what DTRT would be, so we do what the previous
  158          * XXX: code did: ignore it and let the user cope.
  159          */
  160 }
  161 
  162 static int
  163 g_ccd_access(struct g_provider *pp, int dr, int dw, int de)
  164 {
  165         struct g_geom *gp;
  166         struct g_consumer *cp1, *cp2;
  167         int error;
  168 
  169         de += dr;
  170         de += dw;
  171 
  172         gp = pp->geom;
  173         error = ENXIO;
  174         LIST_FOREACH(cp1, &gp->consumer, consumer) {
  175                 error = g_access(cp1, dr, dw, de);
  176                 if (error) {
  177                         LIST_FOREACH(cp2, &gp->consumer, consumer) {
  178                                 if (cp1 == cp2)
  179                                         break;
  180                                 g_access(cp2, -dr, -dw, -de);
  181                         }
  182                         break;
  183                 }
  184         }
  185         return (error);
  186 }
  187 
  188 /*
  189  * Free the softc and its substructures.
  190  */
  191 static void
  192 g_ccd_freesc(struct ccd_s *sc)
  193 {
  194         struct ccdiinfo *ii;
  195 
  196         g_free(sc->sc_cinfo);
  197         if (sc->sc_itable != NULL) {
  198                 for (ii = sc->sc_itable; ii->ii_ndisk > 0; ii++)
  199                         if (ii->ii_index != NULL)
  200                                 g_free(ii->ii_index);
  201                 g_free(sc->sc_itable);
  202         }
  203         g_free(sc);
  204 }
  205 
  206 
  207 static int
  208 ccdinit(struct gctl_req *req, struct ccd_s *cs)
  209 {
  210         struct ccdcinfo *ci;
  211         daddr_t size;
  212         int ix;
  213         daddr_t minsize;
  214         int maxsecsize;
  215         off_t mediasize;
  216         u_int sectorsize;
  217 
  218         cs->sc_size = 0;
  219 
  220         maxsecsize = 0;
  221         minsize = 0;
  222 
  223         if (cs->sc_flags & CCDF_LINUX) {
  224                 cs->sc_offset = 0;
  225                 cs->sc_ileave *= 2;
  226                 if (cs->sc_flags & CCDF_MIRROR && cs->sc_ndisks != 2)
  227                         gctl_error(req, "Mirror mode for Linux raids is "
  228                                         "only supported with 2 devices");
  229         } else {
  230                 if (cs->sc_flags & CCDF_NO_OFFSET)
  231                         cs->sc_offset = 0;
  232                 else
  233                         cs->sc_offset = CCD_OFFSET;
  234 
  235         }
  236         for (ix = 0; ix < cs->sc_ndisks; ix++) {
  237                 ci = &cs->sc_cinfo[ix];
  238 
  239                 mediasize = ci->ci_provider->mediasize;
  240                 sectorsize = ci->ci_provider->sectorsize;
  241                 if (sectorsize > maxsecsize)
  242                         maxsecsize = sectorsize;
  243                 size = mediasize / DEV_BSIZE - cs->sc_offset;
  244 
  245                 /* Truncate to interleave boundary */
  246 
  247                 if (cs->sc_ileave > 1)
  248                         size -= size % cs->sc_ileave;
  249 
  250                 if (size == 0) {
  251                         gctl_error(req, "Component %s has effective size zero",
  252                             ci->ci_provider->name);
  253                         return(ENODEV);
  254                 }
  255 
  256                 if (minsize == 0 || size < minsize)
  257                         minsize = size;
  258                 ci->ci_size = size;
  259                 cs->sc_size += size;
  260         }
  261 
  262         /*
  263          * Don't allow the interleave to be smaller than
  264          * the biggest component sector.
  265          */
  266         if ((cs->sc_ileave > 0) &&
  267             (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) {
  268                 gctl_error(req, "Interleave to small for sector size");
  269                 return(EINVAL);
  270         }
  271 
  272         /*
  273          * If uniform interleave is desired set all sizes to that of
  274          * the smallest component.  This will guarentee that a single
  275          * interleave table is generated.
  276          *
  277          * Lost space must be taken into account when calculating the
  278          * overall size.  Half the space is lost when CCDF_MIRROR is
  279          * specified.
  280          */
  281         if (cs->sc_flags & CCDF_UNIFORM) {
  282                 for (ix = 0; ix < cs->sc_ndisks; ix++) {
  283                         ci = &cs->sc_cinfo[ix];
  284                         ci->ci_size = minsize;
  285                 }
  286                 cs->sc_size = cs->sc_ndisks * minsize;
  287         }
  288 
  289         if (cs->sc_flags & CCDF_MIRROR) {
  290                 /*
  291                  * Check to see if an even number of components
  292                  * have been specified.  The interleave must also
  293                  * be non-zero in order for us to be able to 
  294                  * guarentee the topology.
  295                  */
  296                 if (cs->sc_ndisks % 2) {
  297                         gctl_error(req,
  298                               "Mirroring requires an even number of disks");
  299                         return(EINVAL);
  300                 }
  301                 if (cs->sc_ileave == 0) {
  302                         gctl_error(req,
  303                              "An interleave must be specified when mirroring");
  304                         return(EINVAL);
  305                 }
  306                 cs->sc_size = (cs->sc_ndisks/2) * minsize;
  307         } 
  308 
  309         /*
  310          * Construct the interleave table.
  311          */
  312         ccdinterleave(cs);
  313 
  314         /*
  315          * Create pseudo-geometry based on 1MB cylinders.  It's
  316          * pretty close.
  317          */
  318         cs->sc_secsize = maxsecsize;
  319 
  320         return (0);
  321 }
  322 
  323 static void
  324 ccdinterleave(struct ccd_s *cs)
  325 {
  326         struct ccdcinfo *ci, *smallci;
  327         struct ccdiinfo *ii;
  328         daddr_t bn, lbn;
  329         int ix;
  330         daddr_t size;
  331 
  332 
  333         /*
  334          * Allocate an interleave table.  The worst case occurs when each
  335          * of N disks is of a different size, resulting in N interleave
  336          * tables.
  337          *
  338          * Chances are this is too big, but we don't care.
  339          */
  340         size = (cs->sc_ndisks + 1) * sizeof(struct ccdiinfo);
  341         cs->sc_itable = g_malloc(size, M_WAITOK | M_ZERO);
  342 
  343         /*
  344          * Trivial case: no interleave (actually interleave of disk size).
  345          * Each table entry represents a single component in its entirety.
  346          *
  347          * An interleave of 0 may not be used with a mirror setup.
  348          */
  349         if (cs->sc_ileave == 0) {
  350                 bn = 0;
  351                 ii = cs->sc_itable;
  352 
  353                 for (ix = 0; ix < cs->sc_ndisks; ix++) {
  354                         /* Allocate space for ii_index. */
  355                         ii->ii_index = g_malloc(sizeof(int), M_WAITOK);
  356                         ii->ii_ndisk = 1;
  357                         ii->ii_startblk = bn;
  358                         ii->ii_startoff = 0;
  359                         ii->ii_index[0] = ix;
  360                         bn += cs->sc_cinfo[ix].ci_size;
  361                         ii++;
  362                 }
  363                 ii->ii_ndisk = 0;
  364                 return;
  365         }
  366 
  367         /*
  368          * The following isn't fast or pretty; it doesn't have to be.
  369          */
  370         size = 0;
  371         bn = lbn = 0;
  372         for (ii = cs->sc_itable; ; ii++) {
  373                 /*
  374                  * Allocate space for ii_index.  We might allocate more then
  375                  * we use.
  376                  */
  377                 ii->ii_index = g_malloc((sizeof(int) * cs->sc_ndisks),
  378                     M_WAITOK);
  379 
  380                 /*
  381                  * Locate the smallest of the remaining components
  382                  */
  383                 smallci = NULL;
  384                 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_ndisks]; 
  385                     ci++) {
  386                         if (ci->ci_size > size &&
  387                             (smallci == NULL ||
  388                              ci->ci_size < smallci->ci_size)) {
  389                                 smallci = ci;
  390                         }
  391                 }
  392 
  393                 /*
  394                  * Nobody left, all done
  395                  */
  396                 if (smallci == NULL) {
  397                         ii->ii_ndisk = 0;
  398                         g_free(ii->ii_index);
  399                         ii->ii_index = NULL;
  400                         break;
  401                 }
  402 
  403                 /*
  404                  * Record starting logical block using an sc_ileave blocksize.
  405                  */
  406                 ii->ii_startblk = bn / cs->sc_ileave;
  407 
  408                 /*
  409                  * Record starting component block using an sc_ileave 
  410                  * blocksize.  This value is relative to the beginning of
  411                  * a component disk.
  412                  */
  413                 ii->ii_startoff = lbn;
  414 
  415                 /*
  416                  * Determine how many disks take part in this interleave
  417                  * and record their indices.
  418                  */
  419                 ix = 0;
  420                 for (ci = cs->sc_cinfo; 
  421                     ci < &cs->sc_cinfo[cs->sc_ndisks]; ci++) {
  422                         if (ci->ci_size >= smallci->ci_size) {
  423                                 ii->ii_index[ix++] = ci - cs->sc_cinfo;
  424                         }
  425                 }
  426                 ii->ii_ndisk = ix;
  427                 bn += ix * (smallci->ci_size - size);
  428                 lbn = smallci->ci_size / cs->sc_ileave;
  429                 size = smallci->ci_size;
  430         }
  431 }
  432 
  433 static void
  434 g_ccd_start(struct bio *bp)
  435 {
  436         long bcount, rcount;
  437         struct bio *cbp[2];
  438         caddr_t addr;
  439         daddr_t bn;
  440         int err;
  441         struct ccd_s *cs;
  442 
  443         cs = bp->bio_to->geom->softc;
  444 
  445         /*
  446          * Block all GETATTR requests, we wouldn't know which of our
  447          * subdevices we should ship it off to.
  448          * XXX: this may not be the right policy.
  449          */
  450         if(bp->bio_cmd == BIO_GETATTR) {
  451                 g_io_deliver(bp, EINVAL);
  452                 return;
  453         }
  454 
  455         /*
  456          * Translate the partition-relative block number to an absolute.
  457          */
  458         bn = bp->bio_offset / cs->sc_secsize;
  459 
  460         /*
  461          * Allocate component buffers and fire off the requests
  462          */
  463         addr = bp->bio_data;
  464         for (bcount = bp->bio_length; bcount > 0; bcount -= rcount) {
  465                 err = ccdbuffer(cbp, cs, bp, bn, addr, bcount);
  466                 if (err) {
  467                         bp->bio_completed += bcount;
  468                         if (bp->bio_error == 0)
  469                                 bp->bio_error = err;
  470                         if (bp->bio_completed == bp->bio_length)
  471                                 g_io_deliver(bp, bp->bio_error);
  472                         return;
  473                 }
  474                 rcount = cbp[0]->bio_length;
  475 
  476                 if (cs->sc_flags & CCDF_MIRROR) {
  477                         /*
  478                          * Mirroring.  Writes go to both disks, reads are
  479                          * taken from whichever disk seems most appropriate.
  480                          *
  481                          * We attempt to localize reads to the disk whos arm
  482                          * is nearest the read request.  We ignore seeks due
  483                          * to writes when making this determination and we
  484                          * also try to avoid hogging.
  485                          */
  486                         if (cbp[0]->bio_cmd != BIO_READ) {
  487                                 g_io_request(cbp[0], cbp[0]->bio_from);
  488                                 g_io_request(cbp[1], cbp[1]->bio_from);
  489                         } else {
  490                                 int pick = cs->sc_pick;
  491                                 daddr_t range = cs->sc_size / 16;
  492 
  493                                 if (bn < cs->sc_blk[pick] - range ||
  494                                     bn > cs->sc_blk[pick] + range
  495                                 ) {
  496                                         cs->sc_pick = pick = 1 - pick;
  497                                 }
  498                                 cs->sc_blk[pick] = bn + btodb(rcount);
  499                                 g_io_request(cbp[pick], cbp[pick]->bio_from);
  500                         }
  501                 } else {
  502                         /*
  503                          * Not mirroring
  504                          */
  505                         g_io_request(cbp[0], cbp[0]->bio_from);
  506                 }
  507                 bn += btodb(rcount);
  508                 addr += rcount;
  509         }
  510 }
  511 
  512 /*
  513  * Build a component buffer header.
  514  */
  515 static int
  516 ccdbuffer(struct bio **cb, struct ccd_s *cs, struct bio *bp, daddr_t bn, caddr_t addr, long bcount)
  517 {
  518         struct ccdcinfo *ci, *ci2 = NULL;
  519         struct bio *cbp;
  520         daddr_t cbn, cboff;
  521         off_t cbc;
  522 
  523         /*
  524          * Determine which component bn falls in.
  525          */
  526         cbn = bn;
  527         cboff = 0;
  528 
  529         if (cs->sc_ileave == 0) {
  530                 /*
  531                  * Serially concatenated and neither a mirror nor a parity
  532                  * config.  This is a special case.
  533                  */
  534                 daddr_t sblk;
  535 
  536                 sblk = 0;
  537                 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++)
  538                         sblk += ci->ci_size;
  539                 cbn -= sblk;
  540         } else {
  541                 struct ccdiinfo *ii;
  542                 int ccdisk, off;
  543 
  544                 /*
  545                  * Calculate cbn, the logical superblock (sc_ileave chunks),
  546                  * and cboff, a normal block offset (DEV_BSIZE chunks) relative
  547                  * to cbn.
  548                  */
  549                 cboff = cbn % cs->sc_ileave;    /* DEV_BSIZE gran */
  550                 cbn = cbn / cs->sc_ileave;      /* DEV_BSIZE * ileave gran */
  551 
  552                 /*
  553                  * Figure out which interleave table to use.
  554                  */
  555                 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) {
  556                         if (ii->ii_startblk > cbn)
  557                                 break;
  558                 }
  559                 ii--;
  560 
  561                 /*
  562                  * off is the logical superblock relative to the beginning 
  563                  * of this interleave block.  
  564                  */
  565                 off = cbn - ii->ii_startblk;
  566 
  567                 /*
  568                  * We must calculate which disk component to use (ccdisk),
  569                  * and recalculate cbn to be the superblock relative to
  570                  * the beginning of the component.  This is typically done by
  571                  * adding 'off' and ii->ii_startoff together.  However, 'off'
  572                  * must typically be divided by the number of components in
  573                  * this interleave array to be properly convert it from a
  574                  * CCD-relative logical superblock number to a 
  575                  * component-relative superblock number.
  576                  */
  577                 if (ii->ii_ndisk == 1) {
  578                         /*
  579                          * When we have just one disk, it can't be a mirror
  580                          * or a parity config.
  581                          */
  582                         ccdisk = ii->ii_index[0];
  583                         cbn = ii->ii_startoff + off;
  584                 } else {
  585                         if (cs->sc_flags & CCDF_MIRROR) {
  586                                 /*
  587                                  * We have forced a uniform mapping, resulting
  588                                  * in a single interleave array.  We double
  589                                  * up on the first half of the available
  590                                  * components and our mirror is in the second
  591                                  * half.  This only works with a single 
  592                                  * interleave array because doubling up
  593                                  * doubles the number of sectors, so there
  594                                  * cannot be another interleave array because
  595                                  * the next interleave array's calculations
  596                                  * would be off.
  597                                  */
  598                                 int ndisk2 = ii->ii_ndisk / 2;
  599                                 ccdisk = ii->ii_index[off % ndisk2];
  600                                 cbn = ii->ii_startoff + off / ndisk2;
  601                                 ci2 = &cs->sc_cinfo[ccdisk + ndisk2];
  602                         } else {
  603                                 ccdisk = ii->ii_index[off % ii->ii_ndisk];
  604                                 cbn = ii->ii_startoff + off / ii->ii_ndisk;
  605                         }
  606                 }
  607 
  608                 ci = &cs->sc_cinfo[ccdisk];
  609 
  610                 /*
  611                  * Convert cbn from a superblock to a normal block so it
  612                  * can be used to calculate (along with cboff) the normal
  613                  * block index into this particular disk.
  614                  */
  615                 cbn *= cs->sc_ileave;
  616         }
  617 
  618         /*
  619          * Fill in the component buf structure.
  620          */
  621         cbp = g_clone_bio(bp);
  622         if (cbp == NULL)
  623                 return (ENOMEM);
  624         cbp->bio_done = g_std_done;
  625         cbp->bio_offset = dbtob(cbn + cboff + cs->sc_offset);
  626         cbp->bio_data = addr;
  627         if (cs->sc_ileave == 0)
  628               cbc = dbtob((off_t)(ci->ci_size - cbn));
  629         else
  630               cbc = dbtob((off_t)(cs->sc_ileave - cboff));
  631         cbp->bio_length = (cbc < bcount) ? cbc : bcount;
  632 
  633         cbp->bio_from = ci->ci_consumer;
  634         cb[0] = cbp;
  635 
  636         if (cs->sc_flags & CCDF_MIRROR) {
  637                 cbp = g_clone_bio(bp);
  638                 if (cbp == NULL)
  639                         return (ENOMEM);
  640                 cbp->bio_done = cb[0]->bio_done = ccdiodone;
  641                 cbp->bio_offset = cb[0]->bio_offset;
  642                 cbp->bio_data = cb[0]->bio_data;
  643                 cbp->bio_length = cb[0]->bio_length;
  644                 cbp->bio_from = ci2->ci_consumer;
  645                 cbp->bio_caller1 = cb[0];
  646                 cb[0]->bio_caller1 = cbp;
  647                 cb[1] = cbp;
  648         }
  649         return (0);
  650 }
  651 
  652 /*
  653  * Called only for mirrored operations.
  654  */
  655 static void
  656 ccdiodone(struct bio *cbp)
  657 {
  658         struct bio *mbp, *pbp;
  659 
  660         mbp = cbp->bio_caller1;
  661         pbp = cbp->bio_parent;
  662 
  663         if (pbp->bio_cmd == BIO_READ) {
  664                 if (cbp->bio_error == 0) {
  665                         /* We will not be needing the partner bio */
  666                         if (mbp != NULL) {
  667                                 pbp->bio_inbed++;
  668                                 g_destroy_bio(mbp);
  669                         }
  670                         g_std_done(cbp);
  671                         return;
  672                 }
  673                 if (mbp != NULL) {
  674                         /* Try partner the bio instead */
  675                         mbp->bio_caller1 = NULL;
  676                         pbp->bio_inbed++;
  677                         g_destroy_bio(cbp);
  678                         g_io_request(mbp, mbp->bio_from);
  679                         /*
  680                          * XXX: If this comes back OK, we should actually
  681                          * try to write the good data on the failed mirror
  682                          */
  683                         return;
  684                 }
  685                 g_std_done(cbp);
  686                 return;
  687         }
  688         if (mbp != NULL) {
  689                 mbp->bio_caller1 = NULL;
  690                 pbp->bio_inbed++;
  691                 if (cbp->bio_error != 0 && pbp->bio_error == 0)
  692                         pbp->bio_error = cbp->bio_error;
  693                 g_destroy_bio(cbp);
  694                 return;
  695         }
  696         g_std_done(cbp);
  697 }
  698 
  699 static void
  700 g_ccd_create(struct gctl_req *req, struct g_class *mp)
  701 {
  702         int *unit, *ileave, *nprovider;
  703         struct g_geom *gp;
  704         struct g_consumer *cp;
  705         struct g_provider *pp;
  706         struct ccd_s *sc;
  707         struct sbuf *sb;
  708         char buf[20];
  709         int i, error;
  710 
  711         g_topology_assert();
  712         unit = gctl_get_paraml(req, "unit", sizeof (*unit));
  713         if (unit == NULL) {
  714                 gctl_error(req, "unit parameter not given");
  715                 return;
  716         }
  717         ileave = gctl_get_paraml(req, "ileave", sizeof (*ileave));
  718         if (ileave == NULL) {
  719                 gctl_error(req, "ileave parameter not given");
  720                 return;
  721         }
  722         nprovider = gctl_get_paraml(req, "nprovider", sizeof (*nprovider));
  723         if (nprovider == NULL) {
  724                 gctl_error(req, "nprovider parameter not given");
  725                 return;
  726         }
  727 
  728         /* Check for duplicate unit */
  729         LIST_FOREACH(gp, &mp->geom, geom) {
  730                 sc = gp->softc;
  731                 if (sc != NULL && sc->sc_unit == *unit) {
  732                         gctl_error(req, "Unit %d already configured", *unit);
  733                         return;
  734                 }
  735         }
  736 
  737         if (*nprovider <= 0) {
  738                 gctl_error(req, "Bogus nprovider argument (= %d)", *nprovider);
  739                 return;
  740         }
  741 
  742         /* Check all providers are valid */
  743         for (i = 0; i < *nprovider; i++) {
  744                 sprintf(buf, "provider%d", i);
  745                 pp = gctl_get_provider(req, buf);
  746                 if (pp == NULL)
  747                         return;
  748         }
  749 
  750         gp = g_new_geomf(mp, "ccd%d", *unit);
  751         sc = g_malloc(sizeof *sc, M_WAITOK | M_ZERO);
  752         gp->softc = sc;
  753         sc->sc_ndisks = *nprovider;
  754 
  755         /* Allocate space for the component info. */
  756         sc->sc_cinfo = g_malloc(sc->sc_ndisks * sizeof(struct ccdcinfo),
  757             M_WAITOK | M_ZERO);
  758 
  759         /* Create consumers and attach to all providers */
  760         for (i = 0; i < *nprovider; i++) {
  761                 sprintf(buf, "provider%d", i);
  762                 pp = gctl_get_provider(req, buf);
  763                 cp = g_new_consumer(gp);
  764                 error = g_attach(cp, pp);
  765                 KASSERT(error == 0, ("attach to %s failed", pp->name));
  766                 sc->sc_cinfo[i].ci_consumer = cp;
  767                 sc->sc_cinfo[i].ci_provider = pp;
  768         }
  769 
  770         sc->sc_unit = *unit;
  771         sc->sc_ileave = *ileave;
  772 
  773         if (gctl_get_param(req, "no_offset", NULL))
  774                 sc->sc_flags |= CCDF_NO_OFFSET;
  775         if (gctl_get_param(req, "linux", NULL))
  776                 sc->sc_flags |= CCDF_LINUX;
  777 
  778         if (gctl_get_param(req, "uniform", NULL))
  779                 sc->sc_flags |= CCDF_UNIFORM;
  780         if (gctl_get_param(req, "mirror", NULL))
  781                 sc->sc_flags |= CCDF_MIRROR;
  782 
  783         if (sc->sc_ileave == 0 && (sc->sc_flags & CCDF_MIRROR)) {
  784                 printf("%s: disabling mirror, interleave is 0\n", gp->name);
  785                 sc->sc_flags &= ~(CCDF_MIRROR);
  786         }
  787 
  788         if ((sc->sc_flags & CCDF_MIRROR) && !(sc->sc_flags & CCDF_UNIFORM)) {
  789                 printf("%s: mirror/parity forces uniform flag\n", gp->name);
  790                 sc->sc_flags |= CCDF_UNIFORM;
  791         }
  792 
  793         error = ccdinit(req, sc);
  794         if (error != 0) {
  795                 g_ccd_freesc(sc);
  796                 gp->softc = NULL;
  797                 g_wither_geom(gp, ENXIO);
  798                 return;
  799         }
  800 
  801         pp = g_new_providerf(gp, "%s", gp->name);
  802         pp->mediasize = sc->sc_size * (off_t)sc->sc_secsize;
  803         pp->sectorsize = sc->sc_secsize;
  804         g_error_provider(pp, 0);
  805 
  806         sb = sbuf_new_auto();
  807         sbuf_printf(sb, "ccd%d: %d components ", sc->sc_unit, *nprovider);
  808         for (i = 0; i < *nprovider; i++) {
  809                 sbuf_printf(sb, "%s%s",
  810                     i == 0 ? "(" : ", ", 
  811                     sc->sc_cinfo[i].ci_provider->name);
  812         }
  813         sbuf_printf(sb, "), %jd blocks ", (off_t)pp->mediasize / DEV_BSIZE);
  814         if (sc->sc_ileave != 0)
  815                 sbuf_printf(sb, "interleaved at %d blocks\n",
  816                         sc->sc_ileave);
  817         else
  818                 sbuf_printf(sb, "concatenated\n");
  819         sbuf_finish(sb);
  820         gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
  821         sbuf_delete(sb);
  822 }
  823 
  824 static int
  825 g_ccd_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
  826 {
  827         struct g_provider *pp;
  828         struct ccd_s *sc;
  829 
  830         g_topology_assert();
  831         sc = gp->softc;
  832         pp = LIST_FIRST(&gp->provider);
  833         if (sc == NULL || pp == NULL)
  834                 return (EBUSY);
  835         if (pp->acr != 0 || pp->acw != 0 || pp->ace != 0) {
  836                 gctl_error(req, "%s is open(r%dw%de%d)", gp->name,
  837                     pp->acr, pp->acw, pp->ace);
  838                 return (EBUSY);
  839         }
  840         g_ccd_freesc(sc);
  841         gp->softc = NULL;
  842         g_wither_geom(gp, ENXIO);
  843         return (0);
  844 }
  845 
  846 static void
  847 g_ccd_list(struct gctl_req *req, struct g_class *mp)
  848 {
  849         struct sbuf *sb;
  850         struct ccd_s *cs;
  851         struct g_geom *gp;
  852         int i, unit, *up;
  853 
  854         up = gctl_get_paraml(req, "unit", sizeof (*up));
  855         if (up == NULL) {
  856                 gctl_error(req, "unit parameter not given");
  857                 return;
  858         }
  859         unit = *up;
  860         sb = sbuf_new_auto();
  861         LIST_FOREACH(gp, &mp->geom, geom) {
  862                 cs = gp->softc;
  863                 if (cs == NULL || (unit >= 0 && unit != cs->sc_unit))
  864                         continue;
  865                 sbuf_printf(sb, "ccd%d\t\t%d\t%d\t",
  866                     cs->sc_unit, cs->sc_ileave, cs->sc_flags & CCDF_USERMASK);
  867                         
  868                 for (i = 0; i < cs->sc_ndisks; ++i) {
  869                         sbuf_printf(sb, "%s/dev/%s", i == 0 ? "" : " ",
  870                             cs->sc_cinfo[i].ci_provider->name);
  871                 }
  872                 sbuf_printf(sb, "\n");
  873         }
  874         sbuf_finish(sb);
  875         gctl_set_param_err(req, "output", sbuf_data(sb), sbuf_len(sb) + 1);
  876         sbuf_delete(sb);
  877 }
  878 
  879 static void
  880 g_ccd_config(struct gctl_req *req, struct g_class *mp, char const *verb)
  881 {
  882         struct g_geom *gp;
  883 
  884         g_topology_assert();
  885         if (!strcmp(verb, "create geom")) {
  886                 g_ccd_create(req, mp);
  887         } else if (!strcmp(verb, "destroy geom")) {
  888                 gp = gctl_get_geom(req, mp, "geom");
  889                 if (gp != NULL)
  890                 g_ccd_destroy_geom(req, mp, gp);
  891         } else if (!strcmp(verb, "list")) {
  892                 g_ccd_list(req, mp);
  893         } else {
  894                 gctl_error(req, "unknown verb");
  895         }
  896 }
  897 
  898 static struct g_class g_ccd_class = {
  899         .name = "CCD",
  900         .version = G_VERSION,
  901         .ctlreq = g_ccd_config,
  902         .destroy_geom = g_ccd_destroy_geom,
  903         .start = g_ccd_start,
  904         .orphan = g_ccd_orphan,
  905         .access = g_ccd_access,
  906 };
  907 
  908 DECLARE_GEOM_CLASS(g_ccd_class, g_ccd);

Cache object: 79741ec9f9e88fc617e9da2fcf53ed94


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.