The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/geom/union/g_union.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2022 Marshall Kirk McKusick <mckusick@mckusick.com>
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 #include <sys/param.h>
   29 #include <sys/bio.h>
   30 #include <sys/buf.h>
   31 #include <sys/ctype.h>
   32 #include <sys/kernel.h>
   33 #include <sys/lock.h>
   34 #include <sys/malloc.h>
   35 #include <sys/module.h>
   36 #include <sys/reboot.h>
   37 #include <sys/rwlock.h>
   38 #include <sys/sbuf.h>
   39 #include <sys/sysctl.h>
   40 
   41 #include <geom/geom.h>
   42 #include <geom/geom_dbg.h>
   43 #include <geom/union/g_union.h>
   44 
   45 SYSCTL_DECL(_kern_geom);
   46 static SYSCTL_NODE(_kern_geom, OID_AUTO, union, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
   47     "GEOM_UNION stuff");
   48 static u_int g_union_debug = 0;
   49 SYSCTL_UINT(_kern_geom_union, OID_AUTO, debug, CTLFLAG_RW, &g_union_debug, 0,
   50     "Debug level");
   51 
   52 static void g_union_config(struct gctl_req *req, struct g_class *mp,
   53     const char *verb);
   54 static g_access_t g_union_access;
   55 static g_start_t g_union_start;
   56 static g_dumpconf_t g_union_dumpconf;
   57 static g_orphan_t g_union_orphan;
   58 static int g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
   59     struct g_geom *gp);
   60 static g_provgone_t g_union_providergone;
   61 static g_resize_t g_union_resize;
   62 
   63 struct g_class g_union_class = {
   64         .name = G_UNION_CLASS_NAME,
   65         .version = G_VERSION,
   66         .ctlreq = g_union_config,
   67         .access = g_union_access,
   68         .start = g_union_start,
   69         .dumpconf = g_union_dumpconf,
   70         .orphan = g_union_orphan,
   71         .destroy_geom = g_union_destroy_geom,
   72         .providergone = g_union_providergone,
   73         .resize = g_union_resize,
   74 };
   75 
   76 static void g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool);
   77 static intmax_t g_union_fetcharg(struct gctl_req *req, const char *name);
   78 static bool g_union_verify_nprefix(const char *name);
   79 static void g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool);
   80 static struct g_geom *g_union_find_geom(struct g_class *mp, const char *name);
   81 static void g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool);
   82 static void g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool);
   83 static void g_union_revert(struct g_union_softc *sc);
   84 static void g_union_doio(struct g_union_wip *wip);
   85 static void g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool);
   86 static void g_union_setmap(struct bio *bp, struct g_union_softc *sc);
   87 static bool g_union_getmap(struct bio *bp, struct g_union_softc *sc,
   88         off_t *len2read);
   89 static void g_union_done(struct bio *bp);
   90 static void g_union_kerneldump(struct bio *bp, struct g_union_softc *sc);
   91 static int g_union_dumper(void *, void *, off_t, size_t);
   92 static int g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force);
   93 
   94 /*
   95  * Operate on union-specific configuration commands.
   96  */
   97 static void
   98 g_union_config(struct gctl_req *req, struct g_class *mp, const char *verb)
   99 {
  100         uint32_t *version, *verbose;
  101 
  102         g_topology_assert();
  103 
  104         version = gctl_get_paraml(req, "version", sizeof(*version));
  105         if (version == NULL) {
  106                 gctl_error(req, "No '%s' argument.", "version");
  107                 return;
  108         }
  109         if (*version != G_UNION_VERSION) {
  110                 gctl_error(req, "Userland and kernel parts are out of sync.");
  111                 return;
  112         }
  113         verbose = gctl_get_paraml(req, "verbose", sizeof(*verbose));
  114         if (verbose == NULL) {
  115                 gctl_error(req, "No '%s' argument.", "verbose");
  116                 return;
  117         }
  118         if (strcmp(verb, "create") == 0) {
  119                 g_union_ctl_create(req, mp, *verbose);
  120                 return;
  121         } else if (strcmp(verb, "destroy") == 0) {
  122                 g_union_ctl_destroy(req, mp, *verbose);
  123                 return;
  124         } else if (strcmp(verb, "reset") == 0) {
  125                 g_union_ctl_reset(req, mp, *verbose);
  126                 return;
  127         } else if (strcmp(verb, "revert") == 0) {
  128                 g_union_ctl_revert(req, mp, *verbose);
  129                 return;
  130         } else if (strcmp(verb, "commit") == 0) {
  131                 g_union_ctl_commit(req, mp, *verbose);
  132                 return;
  133         }
  134 
  135         gctl_error(req, "Unknown verb.");
  136 }
  137 
  138 /*
  139  * Create a union device.
  140  */
  141 static void
  142 g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool verbose)
  143 {
  144         struct g_provider *upperpp, *lowerpp, *newpp;
  145         struct g_consumer *uppercp, *lowercp;
  146         struct g_union_softc *sc;
  147         struct g_geom_alias *gap;
  148         struct g_geom *gp;
  149         intmax_t offset, secsize, size, needed;
  150         const char *gunionname;
  151         int *nargs, error, i, n;
  152         char name[64];
  153 
  154         g_topology_assert();
  155 
  156         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
  157         if (nargs == NULL) {
  158                 gctl_error(req, "No '%s' argument.", "nargs");
  159                 return;
  160         }
  161         if (*nargs < 2) {
  162                 gctl_error(req, "Missing device(s).");
  163                 return;
  164         }
  165         if (*nargs > 2) {
  166                 gctl_error(req, "Extra device(s).");
  167                 return;
  168         }
  169 
  170         offset = g_union_fetcharg(req, "offset");
  171         size = g_union_fetcharg(req, "size");
  172         secsize = g_union_fetcharg(req, "secsize");
  173         gunionname = gctl_get_asciiparam(req, "gunionname");
  174 
  175         upperpp = gctl_get_provider(req, "arg0");
  176         lowerpp = gctl_get_provider(req, "arg1");
  177         if (upperpp == NULL || lowerpp == NULL)
  178                 /* error message provided by gctl_get_provider() */
  179                 return;
  180         /* Create the union */
  181         if (secsize == 0)
  182                 secsize = lowerpp->sectorsize;
  183         else if ((secsize % lowerpp->sectorsize) != 0) {
  184                 gctl_error(req, "Sector size %jd is not a multiple of lower "
  185                     "provider %s's %jd sector size.", (intmax_t)secsize,
  186                     lowerpp->name, (intmax_t)lowerpp->sectorsize);
  187                 return;
  188         }
  189         if (secsize > maxphys) {
  190                 gctl_error(req, "Too big secsize %jd for lower provider %s.",
  191                     (intmax_t)secsize, lowerpp->name);
  192                 return;
  193         }
  194         if (secsize % upperpp->sectorsize != 0) {
  195                 gctl_error(req, "Sector size %jd is not a multiple of upper "
  196                     "provider %s's %jd sector size.", (intmax_t)secsize,
  197                     upperpp->name, (intmax_t)upperpp->sectorsize);
  198                 return;
  199         }
  200         if ((offset % secsize) != 0) {
  201                 gctl_error(req, "Offset %jd is not a multiple of lower "
  202                     "provider %s's %jd sector size.", (intmax_t)offset,
  203                     lowerpp->name, (intmax_t)lowerpp->sectorsize);
  204                 return;
  205         }
  206         if (size == 0)
  207                 size = lowerpp->mediasize - offset;
  208         else
  209                 size -= offset;
  210         if ((size % secsize) != 0) {
  211                 gctl_error(req, "Size %jd is not a multiple of sector size "
  212                     "%jd.", (intmax_t)size, (intmax_t)secsize);
  213                 return;
  214         }
  215         if (offset + size < lowerpp->mediasize) {
  216                 gctl_error(req, "Size %jd is too small for lower provider %s, "
  217                     "needs %jd.", (intmax_t)(offset + size), lowerpp->name,
  218                     lowerpp->mediasize);
  219                 return;
  220         }
  221         if (size > upperpp->mediasize) {
  222                 gctl_error(req, "Upper provider %s size (%jd) is too small, "
  223                     "needs %jd.", upperpp->name, (intmax_t)upperpp->mediasize,
  224                     (intmax_t)size);
  225                 return;
  226         }
  227         if (gunionname != NULL && !g_union_verify_nprefix(gunionname)) {
  228                 gctl_error(req, "Gunion name %s must be alphanumeric.",
  229                     gunionname);
  230                 return;
  231         }
  232         if (gunionname != NULL) {
  233                 n = snprintf(name, sizeof(name), "%s%s", gunionname,
  234                     G_UNION_SUFFIX);
  235         } else {
  236                 n = snprintf(name, sizeof(name), "%s-%s%s", upperpp->name,
  237                     lowerpp->name, G_UNION_SUFFIX);
  238         }
  239         if (n <= 0 || n >= sizeof(name)) {
  240                 gctl_error(req, "Invalid provider name.");
  241                 return;
  242         }
  243         LIST_FOREACH(gp, &mp->geom, geom) {
  244                 if (strcmp(gp->name, name) == 0) {
  245                         gctl_error(req, "Provider %s already exists.", name);
  246                         return;
  247                 }
  248         }
  249         gp = g_new_geomf(mp, "%s", name);
  250         sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
  251         rw_init(&sc->sc_rwlock, "gunion");
  252         TAILQ_INIT(&sc->sc_wiplist);
  253         sc->sc_offset = offset;
  254         sc->sc_size = size;
  255         sc->sc_sectorsize = secsize;
  256         sc->sc_reads = 0;
  257         sc->sc_writes = 0;
  258         sc->sc_deletes = 0;
  259         sc->sc_getattrs = 0;
  260         sc->sc_flushes = 0;
  261         sc->sc_speedups = 0;
  262         sc->sc_cmd0s = 0;
  263         sc->sc_cmd1s = 0;
  264         sc->sc_cmd2s = 0;
  265         sc->sc_readbytes = 0;
  266         sc->sc_wrotebytes = 0;
  267         sc->sc_writemap_memory = 0;
  268         gp->softc = sc;
  269 
  270         newpp = g_new_providerf(gp, "%s", gp->name);
  271         newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
  272         newpp->mediasize = size;
  273         newpp->sectorsize = secsize;
  274         LIST_FOREACH(gap, &upperpp->aliases, ga_next)
  275                 g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
  276                     G_UNION_SUFFIX);
  277         LIST_FOREACH(gap, &lowerpp->aliases, ga_next)
  278                 g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
  279                     G_UNION_SUFFIX);
  280         lowercp = g_new_consumer(gp);
  281         lowercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
  282         if ((error = g_attach(lowercp, lowerpp)) != 0) {
  283                 gctl_error(req, "Error %d: cannot attach to provider %s.",
  284                     error, lowerpp->name);
  285                 goto fail1;
  286         }
  287         /* request read and exclusive access for lower */
  288         if ((error = g_access(lowercp, 1, 0, 1)) != 0) {
  289                 gctl_error(req, "Error %d: cannot obtain exclusive access to "
  290                     "%s.\n\tMust be unmounted or mounted read-only.", error,
  291                     lowerpp->name);
  292                 goto fail2;
  293         }
  294         uppercp = g_new_consumer(gp);
  295         uppercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
  296         if ((error = g_attach(uppercp, upperpp)) != 0) {
  297                 gctl_error(req, "Error %d: cannot attach to provider %s.",
  298                     error, upperpp->name);
  299                 goto fail3;
  300         }
  301         /* request read, write, and exclusive access for upper */
  302         if ((error = g_access(uppercp, 1, 1, 1)) != 0) {
  303                 gctl_error(req, "Error %d: cannot obtain write access to %s.",
  304                     error, upperpp->name);
  305                 goto fail4;
  306         }
  307         sc->sc_uppercp = uppercp;
  308         sc->sc_lowercp = lowercp;
  309 
  310         newpp->flags |= (upperpp->flags & G_PF_ACCEPT_UNMAPPED) &
  311             (lowerpp->flags & G_PF_ACCEPT_UNMAPPED);
  312         g_error_provider(newpp, 0);
  313         /*
  314          * Allocate the map that tracks the sectors that have been written
  315          * to the top layer. We use a 2-level hierarchy as that lets us
  316          * map up to 1 petabyte using allocations of less than 33 Mb
  317          * when using 4K byte sectors (or 268 Mb with 512 byte sectors).
  318          *
  319          * We totally populate the leaf nodes rather than allocating them
  320          * as they are first used because their usage occurs in the
  321          * g_union_start() routine that may be running in the g_down
  322          * thread which cannot sleep.
  323          */
  324         sc->sc_map_size = roundup(size / secsize, BITS_PER_ENTRY);
  325         needed = sc->sc_map_size / BITS_PER_ENTRY;
  326         for (sc->sc_root_size = 1;
  327              sc->sc_root_size * sc->sc_root_size < needed;
  328              sc->sc_root_size++)
  329                 continue;
  330         sc->sc_writemap_root = g_malloc(sc->sc_root_size * sizeof(uint64_t *),
  331             M_WAITOK | M_ZERO);
  332         sc->sc_leaf_size = sc->sc_root_size;
  333         sc->sc_bits_per_leaf = sc->sc_leaf_size * BITS_PER_ENTRY;
  334         sc->sc_leafused = g_malloc(roundup(sc->sc_root_size, BITS_PER_ENTRY),
  335             M_WAITOK | M_ZERO);
  336         for (i = 0; i < sc->sc_root_size; i++)
  337                 sc->sc_writemap_root[i] =
  338                     g_malloc(sc->sc_leaf_size * sizeof(uint64_t),
  339                     M_WAITOK | M_ZERO);
  340         sc->sc_writemap_memory =
  341             (sc->sc_root_size + sc->sc_root_size * sc->sc_leaf_size) *
  342             sizeof(uint64_t) + roundup(sc->sc_root_size, BITS_PER_ENTRY);
  343         if (verbose)
  344                 gctl_msg(req, 0, "Device %s created with memory map size %jd.",
  345                     gp->name, (intmax_t)sc->sc_writemap_memory);
  346         gctl_post_messages(req);
  347         G_UNION_DEBUG(1, "Device %s created with memory map size %jd.",
  348             gp->name, (intmax_t)sc->sc_writemap_memory);
  349         return;
  350 
  351 fail4:
  352         g_detach(uppercp);
  353 fail3:
  354         g_destroy_consumer(uppercp);
  355         g_access(lowercp, -1, 0, -1);
  356 fail2:
  357         g_detach(lowercp);
  358 fail1:
  359         g_destroy_consumer(lowercp);
  360         g_destroy_provider(newpp);
  361         g_destroy_geom(gp);
  362 }
  363 
  364 /*
  365  * Fetch named option and verify that it is positive.
  366  */
  367 static intmax_t
  368 g_union_fetcharg(struct gctl_req *req, const char *name)
  369 {
  370         intmax_t *val;
  371 
  372         val = gctl_get_paraml_opt(req, name, sizeof(*val));
  373         if (val == NULL)
  374                 return (0);
  375         if (*val >= 0)
  376                 return (*val);
  377         gctl_msg(req, EINVAL, "Invalid '%s' (%jd): negative value, "
  378             "using default.", name, *val);
  379         return (0);
  380 }
  381 
  382 /*
  383  * Verify that a name is alphanumeric.
  384  */
  385 static bool
  386 g_union_verify_nprefix(const char *name)
  387 {
  388         int i;
  389 
  390         for (i = 0; i < strlen(name); i++) {
  391                 if (isalpha(name[i]) == 0 && isdigit(name[i]) == 0) {
  392                         return (false);
  393                 }
  394         }
  395         return (true);
  396 }
  397 
  398 /*
  399  * Destroy a union device.
  400  */
  401 static void
  402 g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool verbose)
  403 {
  404         int *nargs, *force, error, i;
  405         struct g_geom *gp;
  406         const char *name;
  407         char param[16];
  408 
  409         g_topology_assert();
  410 
  411         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
  412         if (nargs == NULL) {
  413                 gctl_error(req, "No '%s' argument.", "nargs");
  414                 return;
  415         }
  416         if (*nargs <= 0) {
  417                 gctl_error(req, "Missing device(s).");
  418                 return;
  419         }
  420         force = gctl_get_paraml(req, "force", sizeof(*force));
  421         if (force == NULL) {
  422                 gctl_error(req, "No 'force' argument.");
  423                 return;
  424         }
  425 
  426         for (i = 0; i < *nargs; i++) {
  427                 snprintf(param, sizeof(param), "arg%d", i);
  428                 name = gctl_get_asciiparam(req, param);
  429                 if (name == NULL) {
  430                         gctl_msg(req, EINVAL, "No '%s' argument.", param);
  431                         continue;
  432                 }
  433                 if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
  434                         name += strlen(_PATH_DEV);
  435                 gp = g_union_find_geom(mp, name);
  436                 if (gp == NULL) {
  437                         gctl_msg(req, EINVAL, "Device %s is invalid.", name);
  438                         continue;
  439                 }
  440                 error = g_union_destroy(verbose ? req : NULL, gp, *force);
  441                 if (error != 0)
  442                         gctl_msg(req, error, "Error %d: "
  443                             "cannot destroy device %s.", error, gp->name);
  444         }
  445         gctl_post_messages(req);
  446 }
  447 
  448 /*
  449  * Find a union geom.
  450  */
  451 static struct g_geom *
  452 g_union_find_geom(struct g_class *mp, const char *name)
  453 {
  454         struct g_geom *gp;
  455 
  456         LIST_FOREACH(gp, &mp->geom, geom) {
  457                 if (strcmp(gp->name, name) == 0)
  458                         return (gp);
  459         }
  460         return (NULL);
  461 }
  462 
  463 /*
  464  * Zero out all the statistics associated with a union device.
  465  */
  466 static void
  467 g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool verbose)
  468 {
  469         struct g_union_softc *sc;
  470         struct g_provider *pp;
  471         struct g_geom *gp;
  472         char param[16];
  473         int i, *nargs;
  474 
  475         g_topology_assert();
  476 
  477         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
  478         if (nargs == NULL) {
  479                 gctl_error(req, "No '%s' argument.", "nargs");
  480                 return;
  481         }
  482         if (*nargs <= 0) {
  483                 gctl_error(req, "Missing device(s).");
  484                 return;
  485         }
  486 
  487         for (i = 0; i < *nargs; i++) {
  488                 snprintf(param, sizeof(param), "arg%d", i);
  489                 pp = gctl_get_provider(req, param);
  490                 if (pp == NULL) {
  491                         gctl_msg(req, EINVAL, "No '%s' argument.", param);
  492                         continue;
  493                 }
  494                 gp = pp->geom;
  495                 if (gp->class != mp) {
  496                         gctl_msg(req, EINVAL, "Provider %s is invalid.",
  497                             pp->name);
  498                         continue;
  499                 }
  500                 sc = gp->softc;
  501                 sc->sc_reads = 0;
  502                 sc->sc_writes = 0;
  503                 sc->sc_deletes = 0;
  504                 sc->sc_getattrs = 0;
  505                 sc->sc_flushes = 0;
  506                 sc->sc_speedups = 0;
  507                 sc->sc_cmd0s = 0;
  508                 sc->sc_cmd1s = 0;
  509                 sc->sc_cmd2s = 0;
  510                 sc->sc_readbytes = 0;
  511                 sc->sc_wrotebytes = 0;
  512                 if (verbose)
  513                         gctl_msg(req, 0, "Device %s has been reset.", pp->name);
  514                 G_UNION_DEBUG(1, "Device %s has been reset.", pp->name);
  515         }
  516         gctl_post_messages(req);
  517 }
  518 
  519 /*
  520  * Revert all write requests made to the top layer of the union.
  521  */
  522 static void
  523 g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool verbose)
  524 {
  525         struct g_union_softc *sc;
  526         struct g_provider *pp;
  527         struct g_geom *gp;
  528         char param[16];
  529         int i, *nargs;
  530 
  531         g_topology_assert();
  532 
  533         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
  534         if (nargs == NULL) {
  535                 gctl_error(req, "No '%s' argument.", "nargs");
  536                 return;
  537         }
  538         if (*nargs <= 0) {
  539                 gctl_error(req, "Missing device(s).");
  540                 return;
  541         }
  542 
  543         for (i = 0; i < *nargs; i++) {
  544                 snprintf(param, sizeof(param), "arg%d", i);
  545                 pp = gctl_get_provider(req, param);
  546                 if (pp == NULL) {
  547                         gctl_msg(req, EINVAL, "No '%s' argument.", param);
  548                         continue;
  549                 }
  550                 gp = pp->geom;
  551                 if (gp->class != mp) {
  552                         gctl_msg(req, EINVAL, "Provider %s is invalid.",
  553                             pp->name);
  554                         continue;
  555                 }
  556                 sc = gp->softc;
  557                 if (g_union_get_writelock(sc) != 0) {
  558                         gctl_msg(req, EINVAL, "Revert already in progress for "
  559                             "provider %s.", pp->name);
  560                         continue;
  561                 }
  562                 /*
  563                  * No mount or other use of union is allowed.
  564                  */
  565                 if (pp->acr > 0 || pp->acw > 0 || pp->ace > 0) {
  566                         gctl_msg(req, EPERM, "Unable to get exclusive access "
  567                             "for reverting of %s;\n\t%s cannot be mounted or "
  568                             "otherwise open during a revert.",
  569                              pp->name, pp->name);
  570                         g_union_rel_writelock(sc);
  571                         continue;
  572                 }
  573                 g_union_revert(sc);
  574                 g_union_rel_writelock(sc);
  575                 if (verbose)
  576                         gctl_msg(req, 0, "Device %s has been reverted.",
  577                             pp->name);
  578                 G_UNION_DEBUG(1, "Device %s has been reverted.", pp->name);
  579         }
  580         gctl_post_messages(req);
  581 }
  582 
  583 /*
  584  * Revert union writes by zero'ing out the writemap.
  585  */
  586 static void
  587 g_union_revert(struct g_union_softc *sc)
  588 {
  589         int i;
  590 
  591         G_WLOCK(sc);
  592         for (i = 0; i < sc->sc_root_size; i++)
  593                 memset(sc->sc_writemap_root[i], 0,
  594                     sc->sc_leaf_size * sizeof(uint64_t));
  595         memset(sc->sc_leafused, 0, roundup(sc->sc_root_size, BITS_PER_ENTRY));
  596         G_WUNLOCK(sc);
  597 }
  598 
  599 /*
  600  * Commit all the writes made in the top layer to the lower layer.
  601  */
  602 static void
  603 g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool verbose)
  604 {
  605         struct g_union_softc *sc;
  606         struct g_provider *pp, *lowerpp;
  607         struct g_consumer *lowercp;
  608         struct g_geom *gp;
  609         struct bio *bp;
  610         char param[16];
  611         off_t len2rd, len2wt, savelen;
  612         int i, error, error1, *nargs, *force, *reboot;
  613 
  614         g_topology_assert();
  615 
  616         nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
  617         if (nargs == NULL) {
  618                 gctl_error(req, "No '%s' argument.", "nargs");
  619                 return;
  620         }
  621         if (*nargs <= 0) {
  622                 gctl_error(req, "Missing device(s).");
  623                 return;
  624         }
  625         force = gctl_get_paraml(req, "force", sizeof(*force));
  626         if (force == NULL) {
  627                 gctl_error(req, "No 'force' argument.");
  628                 return;
  629         }
  630         reboot = gctl_get_paraml(req, "reboot", sizeof(*reboot));
  631         if (reboot == NULL) {
  632                 gctl_error(req, "No 'reboot' argument.");
  633                 return;
  634         }
  635 
  636         /* Get a bio buffer to do our I/O */
  637         bp = g_alloc_bio();
  638         bp->bio_data = g_malloc(MAXBSIZE, M_WAITOK);
  639         bp->bio_done = biodone;
  640         for (i = 0; i < *nargs; i++) {
  641                 snprintf(param, sizeof(param), "arg%d", i);
  642                 pp = gctl_get_provider(req, param);
  643                 if (pp == NULL) {
  644                         gctl_msg(req, EINVAL, "No '%s' argument.", param);
  645                         continue;
  646                 }
  647                 gp = pp->geom;
  648                 if (gp->class != mp) {
  649                         gctl_msg(req, EINVAL, "Provider %s is invalid.",
  650                             pp->name);
  651                         continue;
  652                 }
  653                 sc = gp->softc;
  654                 if (g_union_get_writelock(sc) != 0) {
  655                         gctl_msg(req, EINVAL, "Commit already in progress for "
  656                             "provider %s.", pp->name);
  657                         continue;
  658                 }
  659         
  660                 /* upgrade to write access for lower */
  661                 lowercp = sc->sc_lowercp;
  662                 lowerpp = lowercp->provider;
  663                 /*
  664                  * No mount or other use of union is allowed, unless the
  665                  * -f flag is given which allows read-only mount or usage.
  666                  */
  667                 if ((*force == false && pp->acr > 0) || pp->acw > 0 ||
  668                      pp->ace > 0) {
  669                         gctl_msg(req, EPERM, "Unable to get exclusive access "
  670                             "for writing of %s.\n\tNote that %s cannot be "
  671                             "mounted or otherwise\n\topen during a commit "
  672                             "unless the -f flag is used.", pp->name, pp->name);
  673                         g_union_rel_writelock(sc);
  674                         continue;
  675                 }
  676                 /*
  677                  * No mount or other use of lower media is allowed, unless the
  678                  * -f flag is given which allows read-only mount or usage.
  679                  */
  680                 if ((*force == false && lowerpp->acr > lowercp->acr) ||
  681                      lowerpp->acw > lowercp->acw ||
  682                      lowerpp->ace > lowercp->ace) {
  683                         gctl_msg(req, EPERM, "provider %s is unable to get "
  684                             "exclusive access to %s\n\tfor writing. Note that "
  685                             "%s cannot be mounted or otherwise open\n\tduring "
  686                             "a commit unless the -f flag is used.", pp->name,
  687                             lowerpp->name, lowerpp->name);
  688                         g_union_rel_writelock(sc);
  689                         continue;
  690                 }
  691                 if ((error = g_access(lowercp, 0, 1, 0)) != 0) {
  692                         gctl_msg(req, error, "Error %d: provider %s is unable "
  693                             "to access %s for writing.", error, pp->name,
  694                             lowerpp->name);
  695                         g_union_rel_writelock(sc);
  696                         continue;
  697                 }
  698                 g_topology_unlock();
  699                 /* Loop over write map copying across written blocks */
  700                 bp->bio_offset = 0;
  701                 bp->bio_length = sc->sc_map_size * sc->sc_sectorsize;
  702                 G_RLOCK(sc);
  703                 error = 0;
  704                 while (bp->bio_length > 0) {
  705                         if (!g_union_getmap(bp, sc, &len2rd)) {
  706                                 /* not written, so skip */
  707                                 bp->bio_offset += len2rd;
  708                                 bp->bio_length -= len2rd;
  709                                 continue;
  710                         }
  711                         G_RUNLOCK(sc);
  712                         /* need to read then write len2rd sectors */
  713                         for ( ; len2rd > 0; len2rd -= len2wt) {
  714                                 /* limit ourselves to MAXBSIZE size I/Os */
  715                                 len2wt = len2rd;
  716                                 if (len2wt > MAXBSIZE)
  717                                         len2wt = MAXBSIZE;
  718                                 savelen = bp->bio_length;
  719                                 bp->bio_length = len2wt;
  720                                 bp->bio_cmd = BIO_READ;
  721                                 g_io_request(bp, sc->sc_uppercp);
  722                                 if ((error = biowait(bp, "rdunion")) != 0) {
  723                                         gctl_msg(req, error, "Commit read "
  724                                             "error %d in provider %s, commit "
  725                                             "aborted.", error, pp->name);
  726                                         goto cleanup;
  727                                 }
  728                                 bp->bio_flags &= ~BIO_DONE;
  729                                 bp->bio_cmd = BIO_WRITE;
  730                                 g_io_request(bp, lowercp);
  731                                 if ((error = biowait(bp, "wtunion")) != 0) {
  732                                         gctl_msg(req, error, "Commit write "
  733                                             "error %d in provider %s, commit "
  734                                             "aborted.", error, pp->name);
  735                                         goto cleanup;
  736                                 }
  737                                 bp->bio_flags &= ~BIO_DONE;
  738                                 bp->bio_offset += len2wt;
  739                                 bp->bio_length = savelen - len2wt;
  740                         }
  741                         G_RLOCK(sc);
  742                 }
  743                 G_RUNLOCK(sc);
  744                 /* clear the write map */
  745                 g_union_revert(sc);
  746 cleanup:
  747                 g_topology_lock();
  748                 /* return lower to previous access */
  749                 if ((error1 = g_access(lowercp, 0, -1, 0)) != 0) {
  750                         G_UNION_DEBUG(2, "Error %d: device %s could not reset "
  751                             "access to %s (r=0 w=-1 e=0).", error1, pp->name,
  752                             lowerpp->name);
  753                 }
  754                 g_union_rel_writelock(sc);
  755                 if (error == 0 && verbose)
  756                         gctl_msg(req, 0, "Device %s has been committed.",
  757                             pp->name);
  758                 G_UNION_DEBUG(1, "Device %s has been committed.", pp->name);
  759         }
  760         gctl_post_messages(req);
  761         g_free(bp->bio_data);
  762         g_destroy_bio(bp);
  763         if (*reboot)
  764                 kern_reboot(RB_AUTOBOOT);
  765 }
  766 
  767 /*
  768  * Generally allow access unless a commit is in progress.
  769  */
  770 static int
  771 g_union_access(struct g_provider *pp, int r, int w, int e)
  772 {
  773         struct g_union_softc *sc;
  774 
  775         sc = pp->geom->softc;
  776         if (sc == NULL) {
  777                 if (r <= 0 && w <= 0 && e <= 0)
  778                         return (0);
  779                 return (ENXIO);
  780         }
  781         r += pp->acr;
  782         w += pp->acw;
  783         e += pp->ace;
  784         if (g_union_get_writelock(sc) != 0) {
  785                 if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0)
  786                         return (0);
  787                 return (EBUSY);
  788         }
  789         g_union_rel_writelock(sc);
  790         return (0);
  791 }
  792 
  793 /*
  794  * Initiate an I/O operation on the union device.
  795  */
  796 static void
  797 g_union_start(struct bio *bp)
  798 {
  799         struct g_union_softc *sc;
  800         struct g_union_wip *wip;
  801         struct bio *cbp;
  802 
  803         sc = bp->bio_to->geom->softc;
  804         if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
  805                 wip = g_malloc(sizeof(*wip), M_NOWAIT);
  806                 if (wip == NULL) {
  807                         g_io_deliver(bp, ENOMEM);
  808                         return;
  809                 }
  810                 TAILQ_INIT(&wip->wip_waiting);
  811                 wip->wip_bp = bp;
  812                 wip->wip_sc = sc;
  813                 wip->wip_start = bp->bio_offset + sc->sc_offset;
  814                 wip->wip_end = wip->wip_start + bp->bio_length - 1;
  815                 wip->wip_numios = 1;
  816                 wip->wip_error = 0;
  817                 g_union_doio(wip);
  818                 return;
  819         }
  820 
  821         /*
  822          * All commands other than read and write are passed through to
  823          * the upper-level device since it is writable and thus able to
  824          * respond to delete, flush, and speedup requests.
  825          */
  826         cbp = g_clone_bio(bp);
  827         if (cbp == NULL) {
  828                 g_io_deliver(bp, ENOMEM);
  829                 return;
  830         }
  831         cbp->bio_offset = bp->bio_offset + sc->sc_offset;
  832         cbp->bio_done = g_std_done;
  833 
  834         switch (cbp->bio_cmd) {
  835         case BIO_DELETE:
  836                 G_UNION_LOGREQ(cbp, "Delete request received.");
  837                 atomic_add_long(&sc->sc_deletes, 1);
  838                 break;
  839         case BIO_GETATTR:
  840                 G_UNION_LOGREQ(cbp, "Getattr request received.");
  841                 atomic_add_long(&sc->sc_getattrs, 1);
  842                 if (strcmp(cbp->bio_attribute, "GEOM::kerneldump") != 0)
  843                         /* forward the GETATTR to the lower-level device */
  844                         break;
  845                 g_union_kerneldump(bp, sc);
  846                 return;
  847         case BIO_FLUSH:
  848                 G_UNION_LOGREQ(cbp, "Flush request received.");
  849                 atomic_add_long(&sc->sc_flushes, 1);
  850                 break;
  851         case BIO_SPEEDUP:
  852                 G_UNION_LOGREQ(cbp, "Speedup request received.");
  853                 atomic_add_long(&sc->sc_speedups, 1);
  854                 break;
  855         case BIO_CMD0:
  856                 G_UNION_LOGREQ(cbp, "Cmd0 request received.");
  857                 atomic_add_long(&sc->sc_cmd0s, 1);
  858                 break;
  859         case BIO_CMD1:
  860                 G_UNION_LOGREQ(cbp, "Cmd1 request received.");
  861                 atomic_add_long(&sc->sc_cmd1s, 1);
  862                 break;
  863         case BIO_CMD2:
  864                 G_UNION_LOGREQ(cbp, "Cmd2 request received.");
  865                 atomic_add_long(&sc->sc_cmd2s, 1);
  866                 break;
  867         default:
  868                 G_UNION_LOGREQ(cbp, "Unknown (%d) request received.",
  869                     cbp->bio_cmd);
  870                 break;
  871         }
  872         g_io_request(cbp, sc->sc_uppercp);
  873 }
  874 
  875 /*
  876  * Initiate a read or write operation on the union device.
  877  */
  878 static void
  879 g_union_doio(struct g_union_wip *wip)
  880 {
  881         struct g_union_softc *sc;
  882         struct g_consumer *cp, *firstcp;
  883         struct g_union_wip *activewip;
  884         struct bio *cbp, *firstbp;
  885         off_t rdlen, len2rd, offset;
  886         int iocnt, needstoblock;
  887         char *level;
  888 
  889         /*
  890          * To maintain consistency, we cannot allow concurrent reads
  891          * or writes to the same block.
  892          *
  893          * A work-in-progress (wip) structure is allocated for each
  894          * read or write request. All active requests are kept on the
  895          * softc sc_wiplist. As each request arrives, it is checked to
  896          * see if it overlaps any of the active entries. If it does not
  897          * overlap, then it is added to the active list and initiated.
  898          * If it does overlap an active entry, it is added to the
  899          * wip_waiting list for the active entry that it overlaps.
  900          * When an active entry completes, it restarts all the requests
  901          * on its wip_waiting list.
  902          */
  903         sc = wip->wip_sc;
  904         G_WLOCK(sc);
  905         TAILQ_FOREACH(activewip, &sc->sc_wiplist, wip_next) {
  906                 if (wip->wip_end < activewip->wip_start ||
  907                     wip->wip_start > activewip->wip_end)
  908                         continue;
  909                 needstoblock = 1;
  910                 if (wip->wip_bp->bio_cmd == BIO_WRITE)
  911                         if (activewip->wip_bp->bio_cmd == BIO_WRITE)
  912                                 sc->sc_writeblockwrite += 1;
  913                         else
  914                                 sc->sc_readblockwrite += 1;
  915                 else
  916                         if (activewip->wip_bp->bio_cmd == BIO_WRITE)
  917                                 sc->sc_writeblockread += 1;
  918                         else {
  919                                 sc->sc_readcurrentread += 1;
  920                                 needstoblock = 0;
  921                         }
  922                 /* Put request on a waiting list if necessary */
  923                 if (needstoblock) {
  924                         TAILQ_INSERT_TAIL(&activewip->wip_waiting, wip,
  925                             wip_next);
  926                         G_WUNLOCK(sc);
  927                         return;
  928                 }
  929         }
  930         /* Put request on the active list */
  931         TAILQ_INSERT_TAIL(&sc->sc_wiplist, wip, wip_next);
  932 
  933         /*
  934          * Process I/O requests that have been cleared to go.
  935          */
  936         cbp = g_clone_bio(wip->wip_bp);
  937         if (cbp == NULL) {
  938                 TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
  939                 G_WUNLOCK(sc);
  940                 KASSERT(TAILQ_FIRST(&wip->wip_waiting) == NULL,
  941                     ("g_union_doio: non-empty work-in-progress waiting queue"));
  942                 g_io_deliver(wip->wip_bp, ENOMEM);
  943                 g_free(wip);
  944                 return;
  945         }
  946         G_WUNLOCK(sc);
  947         cbp->bio_caller1 = wip;
  948         cbp->bio_done = g_union_done;
  949         cbp->bio_offset = wip->wip_start;
  950 
  951         /*
  952          * Writes are always done to the top level. The blocks that
  953          * are written are recorded in the bitmap when the I/O completes.
  954          */
  955         if (cbp->bio_cmd == BIO_WRITE) {
  956                 G_UNION_LOGREQ(cbp, "Sending %jd byte write request to upper "
  957                     "level.", cbp->bio_length);
  958                 atomic_add_long(&sc->sc_writes, 1);
  959                 atomic_add_long(&sc->sc_wrotebytes, cbp->bio_length);
  960                 g_io_request(cbp, sc->sc_uppercp);
  961                 return;
  962         }
  963         /*
  964          * The usual read case is that we either read the top layer
  965          * if the block has been previously written or the bottom layer
  966          * if it has not been written. However, it is possible that
  967          * only part of the block has been written, For example we may
  968          * have written a UFS/FFS file fragment comprising several
  969          * sectors out of an 8-sector block.  Here, if the entire
  970          * 8-sector block is read for example by a snapshot needing
  971          * to copy the full block, then we need to read the written
  972          * sectors from the upper level and the unwritten sectors from
  973          * the lower level. We do this by alternately reading from the
  974          * top and bottom layers until we complete the read. We
  975          * simplify for the common case to just do the I/O and return.
  976          */
  977         atomic_add_long(&sc->sc_reads, 1);
  978         atomic_add_long(&sc->sc_readbytes, cbp->bio_length);
  979         rdlen = cbp->bio_length;
  980         offset = 0;
  981         for (iocnt = 0; ; iocnt++) {
  982                 if (g_union_getmap(cbp, sc, &len2rd)) {
  983                         /* read top */
  984                         cp = sc->sc_uppercp;
  985                         level = "upper";
  986                 } else {
  987                         /* read bottom */
  988                         cp = sc->sc_lowercp;
  989                         level = "lower";
  990                 }
  991                 /* Check if only a single read is required */
  992                 if (iocnt == 0 && rdlen == len2rd) {
  993                         G_UNION_LOGREQLVL((cp == sc->sc_uppercp) ?
  994                             3 : 4, cbp, "Sending %jd byte read "
  995                             "request to %s level.", len2rd, level);
  996                         g_io_request(cbp, cp);
  997                         return;
  998                 }
  999                 cbp->bio_length = len2rd;
 1000                 if ((cbp->bio_flags & BIO_UNMAPPED) != 0)
 1001                         cbp->bio_ma_offset += offset;
 1002                 else
 1003                         cbp->bio_data += offset;
 1004                 offset += len2rd;
 1005                 rdlen -= len2rd;
 1006                 G_UNION_LOGREQLVL(3, cbp, "Sending %jd byte read "
 1007                     "request to %s level.", len2rd, level);
 1008                 /*
 1009                  * To avoid prematurely notifying our consumer
 1010                  * that their I/O has completed, we have to delay
 1011                  * issuing our first I/O request until we have
 1012                  * issued all the additional I/O requests.
 1013                  */
 1014                 if (iocnt > 0) {
 1015                         atomic_add_long(&wip->wip_numios, 1);
 1016                         g_io_request(cbp, cp);
 1017                 } else {
 1018                         firstbp = cbp;
 1019                         firstcp = cp;
 1020                 }
 1021                 if (rdlen == 0)
 1022                         break;
 1023                 /* set up for next read */
 1024                 cbp = g_clone_bio(wip->wip_bp);
 1025                 if (cbp == NULL) {
 1026                         wip->wip_error = ENOMEM;
 1027                         atomic_add_long(&wip->wip_numios, -1);
 1028                         break;
 1029                 }
 1030                 cbp->bio_caller1 = wip;
 1031                 cbp->bio_done = g_union_done;
 1032                 cbp->bio_offset += offset;
 1033                 cbp->bio_length = rdlen;
 1034                 atomic_add_long(&sc->sc_reads, 1);
 1035         }
 1036         /* We have issued all our I/O, so start the first one */
 1037         g_io_request(firstbp, firstcp);
 1038         return;
 1039 }
 1040 
 1041 /*
 1042  * Used when completing a union I/O operation.
 1043  */
 1044 static void
 1045 g_union_done(struct bio *bp)
 1046 {
 1047         struct g_union_wip *wip, *waitingwip;
 1048         struct g_union_softc *sc;
 1049 
 1050         wip = bp->bio_caller1;
 1051         if (wip->wip_error != 0 && bp->bio_error == 0)
 1052                 bp->bio_error = wip->wip_error;
 1053         wip->wip_error = 0;
 1054         if (atomic_fetchadd_long(&wip->wip_numios, -1) == 1) {
 1055                 sc = wip->wip_sc;
 1056                 G_WLOCK(sc);
 1057                 if (bp->bio_cmd == BIO_WRITE)
 1058                         g_union_setmap(bp, sc);
 1059                 TAILQ_REMOVE(&sc->sc_wiplist, wip, wip_next);
 1060                 G_WUNLOCK(sc);
 1061                 while ((waitingwip = TAILQ_FIRST(&wip->wip_waiting)) != NULL) {
 1062                         TAILQ_REMOVE(&wip->wip_waiting, waitingwip, wip_next);
 1063                         g_union_doio(waitingwip);
 1064                 }
 1065                 g_free(wip);
 1066         }
 1067         g_std_done(bp);
 1068 }
 1069 
 1070 /*
 1071  * Record blocks that have been written in the map.
 1072  */
 1073 static void
 1074 g_union_setmap(struct bio *bp, struct g_union_softc *sc)
 1075 {
 1076         size_t root_idx;
 1077         uint64_t **leaf;
 1078         uint64_t *wordp;
 1079         off_t start, numsec;
 1080 
 1081         G_WLOCKOWNED(sc);
 1082         KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
 1083             ("g_union_setmap: offset not on sector boundry"));
 1084         KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
 1085             ("g_union_setmap: length not a multiple of sectors"));
 1086         start = bp->bio_offset / sc->sc_sectorsize;
 1087         numsec = bp->bio_length / sc->sc_sectorsize;
 1088         KASSERT(start + numsec <= sc->sc_map_size,
 1089             ("g_union_setmap: block %jd is out of range", start + numsec));
 1090         for ( ; numsec > 0; numsec--, start++) {
 1091                 root_idx = start / sc->sc_bits_per_leaf;
 1092                 leaf = &sc->sc_writemap_root[root_idx];
 1093                 wordp = &(*leaf)
 1094                     [(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
 1095                 *wordp |= 1ULL << (start % BITS_PER_ENTRY);
 1096                 sc->sc_leafused[root_idx / BITS_PER_ENTRY] |=
 1097                     1ULL << (root_idx % BITS_PER_ENTRY);
 1098         }
 1099 }
 1100 
 1101 /*
 1102  * Check map to determine whether blocks have been written.
 1103  *
 1104  * Return true if they have been written so should be read from the top
 1105  * layer. Return false if they have not been written so should be read
 1106  * from the bottom layer. Return in len2read the bytes to be read. See
 1107  * the comment above the BIO_READ implementation in g_union_start() for
 1108  * an explantion of why len2read may be shorter than the buffer length.
 1109  */
 1110 static bool
 1111 g_union_getmap(struct bio *bp, struct g_union_softc *sc, off_t *len2read)
 1112 {
 1113         off_t start, numsec, leafresid, bitloc;
 1114         bool first, maptype, retval;
 1115         uint64_t *leaf, word;
 1116         size_t root_idx;
 1117 
 1118         KASSERT(bp->bio_offset % sc->sc_sectorsize == 0,
 1119             ("g_union_getmap: offset not on sector boundry"));
 1120         KASSERT(bp->bio_length % sc->sc_sectorsize == 0,
 1121             ("g_union_getmap: length not a multiple of sectors"));
 1122         start = bp->bio_offset / sc->sc_sectorsize;
 1123         numsec = bp->bio_length / sc->sc_sectorsize;
 1124         G_UNION_DEBUG(4, "g_union_getmap: check %jd sectors starting at %jd\n",
 1125             numsec, start);
 1126         KASSERT(start + numsec <= sc->sc_map_size,
 1127             ("g_union_getmap: block %jd is out of range", start + numsec));
 1128                 root_idx = start / sc->sc_bits_per_leaf;
 1129         first = true;
 1130         maptype = false;
 1131         while (numsec > 0) {
 1132                 /* Check first if the leaf records any written sectors */
 1133                 root_idx = start / sc->sc_bits_per_leaf;
 1134                 leafresid = sc->sc_bits_per_leaf -
 1135                     (start % sc->sc_bits_per_leaf);
 1136                 if (((sc->sc_leafused[root_idx / BITS_PER_ENTRY]) &
 1137                     (1ULL << (root_idx % BITS_PER_ENTRY))) == 0) {
 1138                         if (first) {
 1139                                 maptype = false;
 1140                                 first = false;
 1141                         }
 1142                         if (maptype)
 1143                                 break;
 1144                         numsec -= leafresid;
 1145                         start += leafresid;
 1146                         continue;
 1147                 }
 1148                 /* Check up to a word boundry, then check word by word */
 1149                 leaf = sc->sc_writemap_root[root_idx];
 1150                 word = leaf[(start % sc->sc_bits_per_leaf) / BITS_PER_ENTRY];
 1151                 bitloc = start % BITS_PER_ENTRY;
 1152                 if (bitloc == 0 && (word == 0 || word == ~0)) {
 1153                         if (first) {
 1154                                 if (word == 0)
 1155                                         maptype = false;
 1156                                 else
 1157                                         maptype = true;
 1158                                 first = false;
 1159                         }
 1160                         if ((word == 0 && maptype) ||
 1161                             (word == ~0 && !maptype))
 1162                                 break;
 1163                         numsec -= BITS_PER_ENTRY;
 1164                         start += BITS_PER_ENTRY;
 1165                         continue;
 1166                 }
 1167                 for ( ; bitloc < BITS_PER_ENTRY; bitloc ++) {
 1168                         retval = (word & (1ULL << bitloc)) != 0;
 1169                         if (first) {
 1170                                 maptype = retval;
 1171                                 first = false;
 1172                         }
 1173                         if (maptype == retval) {
 1174                                 numsec--;
 1175                                 start++;
 1176                                 continue;
 1177                         }
 1178                         goto out;
 1179                 }
 1180         }
 1181 out:
 1182         if (numsec < 0) {
 1183                 start += numsec;
 1184                 numsec = 0;
 1185         }
 1186         *len2read = bp->bio_length - (numsec * sc->sc_sectorsize);
 1187         G_UNION_DEBUG(maptype ? 3 : 4,
 1188             "g_union_getmap: return maptype %swritten for %jd "
 1189             "sectors ending at %jd\n", maptype ? "" : "NOT ",
 1190             *len2read / sc->sc_sectorsize, start - 1);
 1191         return (maptype);
 1192 }
 1193 
 1194 /*
 1195  * Fill in details for a BIO_GETATTR request.
 1196  */
 1197 static void
 1198 g_union_kerneldump(struct bio *bp, struct g_union_softc *sc)
 1199 {
 1200         struct g_kerneldump *gkd;
 1201         struct g_geom *gp;
 1202         struct g_provider *pp;
 1203 
 1204         gkd = (struct g_kerneldump *)bp->bio_data;
 1205         gp = bp->bio_to->geom;
 1206         g_trace(G_T_TOPOLOGY, "%s(%s, %jd, %jd)", __func__, gp->name,
 1207             (intmax_t)gkd->offset, (intmax_t)gkd->length);
 1208 
 1209         pp = LIST_FIRST(&gp->provider);
 1210 
 1211         gkd->di.dumper = g_union_dumper;
 1212         gkd->di.priv = sc;
 1213         gkd->di.blocksize = pp->sectorsize;
 1214         gkd->di.maxiosize = DFLTPHYS;
 1215         gkd->di.mediaoffset = sc->sc_offset + gkd->offset;
 1216         if (gkd->offset > sc->sc_size) {
 1217                 g_io_deliver(bp, ENODEV);
 1218                 return;
 1219         }
 1220         if (gkd->offset + gkd->length > sc->sc_size)
 1221                 gkd->length = sc->sc_size - gkd->offset;
 1222         gkd->di.mediasize = gkd->length;
 1223         g_io_deliver(bp, 0);
 1224 }
 1225 
 1226 /*
 1227  * Handler for g_union_kerneldump().
 1228  */
 1229 static int
 1230 g_union_dumper(void *priv, void *virtual, off_t offset, size_t length)
 1231 {
 1232 
 1233         return (0);
 1234 }
 1235 
 1236 /*
 1237  * List union statistics.
 1238  */
 1239 static void
 1240 g_union_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 1241     struct g_consumer *cp, struct g_provider *pp)
 1242 {
 1243         struct g_union_softc *sc;
 1244 
 1245         if (pp != NULL || cp != NULL || gp->softc == NULL)
 1246                 return;
 1247         sc = gp->softc;
 1248         sbuf_printf(sb, "%s<Reads>%ju</Reads>\n", indent,
 1249             (uintmax_t)sc->sc_reads);
 1250         sbuf_printf(sb, "%s<Writes>%ju</Writes>\n", indent,
 1251             (uintmax_t)sc->sc_writes);
 1252         sbuf_printf(sb, "%s<Deletes>%ju</Deletes>\n", indent,
 1253             (uintmax_t)sc->sc_deletes);
 1254         sbuf_printf(sb, "%s<Getattrs>%ju</Getattrs>\n", indent,
 1255             (uintmax_t)sc->sc_getattrs);
 1256         sbuf_printf(sb, "%s<Flushes>%ju</Flushes>\n", indent,
 1257             (uintmax_t)sc->sc_flushes);
 1258         sbuf_printf(sb, "%s<Speedups>%ju</Speedups>\n", indent,
 1259             (uintmax_t)sc->sc_speedups);
 1260         sbuf_printf(sb, "%s<Cmd0s>%ju</Cmd0s>\n", indent,
 1261             (uintmax_t)sc->sc_cmd0s);
 1262         sbuf_printf(sb, "%s<Cmd1s>%ju</Cmd1s>\n", indent,
 1263             (uintmax_t)sc->sc_cmd1s);
 1264         sbuf_printf(sb, "%s<Cmd2s>%ju</Cmd2s>\n", indent,
 1265             (uintmax_t)sc->sc_cmd2s);
 1266         sbuf_printf(sb, "%s<ReadCurrentRead>%ju</ReadCurrentRead>\n", indent,
 1267             (uintmax_t)sc->sc_readcurrentread);
 1268         sbuf_printf(sb, "%s<ReadBlockWrite>%ju</ReadBlockWrite>\n", indent,
 1269             (uintmax_t)sc->sc_readblockwrite);
 1270         sbuf_printf(sb, "%s<WriteBlockRead>%ju</WriteBlockRead>\n", indent,
 1271             (uintmax_t)sc->sc_writeblockread);
 1272         sbuf_printf(sb, "%s<WriteBlockWrite>%ju</WriteBlockWrite>\n", indent,
 1273             (uintmax_t)sc->sc_writeblockwrite);
 1274         sbuf_printf(sb, "%s<ReadBytes>%ju</ReadBytes>\n", indent,
 1275             (uintmax_t)sc->sc_readbytes);
 1276         sbuf_printf(sb, "%s<WroteBytes>%ju</WroteBytes>\n", indent,
 1277             (uintmax_t)sc->sc_wrotebytes);
 1278         sbuf_printf(sb, "%s<Offset>%jd</Offset>\n", indent,
 1279             (intmax_t)sc->sc_offset);
 1280 }
 1281 
 1282 /*
 1283  * Clean up an orphaned geom.
 1284  */
 1285 static void
 1286 g_union_orphan(struct g_consumer *cp)
 1287 {
 1288 
 1289         g_topology_assert();
 1290         g_union_destroy(NULL, cp->geom, true);
 1291 }
 1292 
 1293 /*
 1294  * Clean up a union geom.
 1295  */
 1296 static int
 1297 g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
 1298     struct g_geom *gp)
 1299 {
 1300 
 1301         return (g_union_destroy(NULL, gp, false));
 1302 }
 1303 
 1304 /*
 1305  * Clean up a union device.
 1306  */
 1307 static int
 1308 g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force)
 1309 {
 1310         struct g_union_softc *sc;
 1311         struct g_provider *pp;
 1312         int error;
 1313 
 1314         g_topology_assert();
 1315         sc = gp->softc;
 1316         if (sc == NULL)
 1317                 return (ENXIO);
 1318         pp = LIST_FIRST(&gp->provider);
 1319         if ((sc->sc_flags & DOING_COMMIT) != 0 ||
 1320             (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0))) {
 1321                 if (force) {
 1322                         if (req != NULL)
 1323                                 gctl_msg(req, 0, "Device %s is still in use, "
 1324                                     "so is being forcibly removed.", gp->name);
 1325                         G_UNION_DEBUG(1, "Device %s is still in use, so "
 1326                             "is being forcibly removed.", gp->name);
 1327                 } else {
 1328                         if (req != NULL)
 1329                                 gctl_msg(req, EBUSY, "Device %s is still open "
 1330                                     "(r=%d w=%d e=%d).", gp->name, pp->acr,
 1331                                     pp->acw, pp->ace);
 1332                         G_UNION_DEBUG(1, "Device %s is still open "
 1333                             "(r=%d w=%d e=%d).", gp->name, pp->acr,
 1334                             pp->acw, pp->ace);
 1335                         return (EBUSY);
 1336                 }
 1337         } else {
 1338                 if (req != NULL)
 1339                         gctl_msg(req, 0, "Device %s removed.", gp->name);
 1340                 G_UNION_DEBUG(1, "Device %s removed.", gp->name);
 1341         }
 1342         /* Close consumers */
 1343         if ((error = g_access(sc->sc_lowercp, -1, 0, -1)) != 0)
 1344                 G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
 1345                     "to %s.", error, gp->name, sc->sc_lowercp->provider->name);
 1346         if ((error = g_access(sc->sc_uppercp, -1, -1, -1)) != 0)
 1347                 G_UNION_DEBUG(2, "Error %d: device %s could not reset access "
 1348                     "to %s.", error, gp->name, sc->sc_uppercp->provider->name);
 1349 
 1350         g_wither_geom(gp, ENXIO);
 1351 
 1352         return (0);
 1353 }
 1354 
 1355 /*
 1356  * Clean up a union provider.
 1357  */
 1358 static void
 1359 g_union_providergone(struct g_provider *pp)
 1360 {
 1361         struct g_geom *gp;
 1362         struct g_union_softc *sc;
 1363         size_t i;
 1364 
 1365         gp = pp->geom;
 1366         sc = gp->softc;
 1367         gp->softc = NULL;
 1368         for (i = 0; i < sc->sc_root_size; i++)
 1369                 g_free(sc->sc_writemap_root[i]);
 1370         g_free(sc->sc_writemap_root);
 1371         g_free(sc->sc_leafused);
 1372         rw_destroy(&sc->sc_rwlock);
 1373         g_free(sc);
 1374 }
 1375 
 1376 /*
 1377  * Respond to a resized provider.
 1378  */
 1379 static void
 1380 g_union_resize(struct g_consumer *cp)
 1381 {
 1382         struct g_union_softc *sc;
 1383         struct g_geom *gp;
 1384 
 1385         g_topology_assert();
 1386 
 1387         gp = cp->geom;
 1388         sc = gp->softc;
 1389 
 1390         /*
 1391          * If size has gotten bigger, ignore it and just keep using
 1392          * the space we already had. Otherwise we are done.
 1393          */
 1394         if (sc->sc_size < cp->provider->mediasize - sc->sc_offset)
 1395                 return;
 1396         g_union_destroy(NULL, gp, true);
 1397 }
 1398 
 1399 DECLARE_GEOM_CLASS(g_union_class, g_union);
 1400 MODULE_VERSION(geom_union, 0);

Cache object: 4295bf54014fad13c1df4be16424a2a2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.