The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/md/md.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * ----------------------------------------------------------------------------
    3  * "THE BEER-WARE LICENSE" (Revision 42):
    4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
    5  * can do whatever you want with this stuff. If we meet some day, and you think
    6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
    7  * ----------------------------------------------------------------------------
    8  *
    9  * $FreeBSD: releng/8.4/sys/dev/md/md.c 243618 2012-11-27 16:37:36Z jh $
   10  *
   11  */
   12 
   13 /*-
   14  * The following functions are based in the vn(4) driver: mdstart_swap(),
   15  * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(),
   16  * and as such under the following copyright:
   17  *
   18  * Copyright (c) 1988 University of Utah.
   19  * Copyright (c) 1990, 1993
   20  *      The Regents of the University of California.  All rights reserved.
   21  *
   22  * This code is derived from software contributed to Berkeley by
   23  * the Systems Programming Group of the University of Utah Computer
   24  * Science Department.
   25  *
   26  * Redistribution and use in source and binary forms, with or without
   27  * modification, are permitted provided that the following conditions
   28  * are met:
   29  * 1. Redistributions of source code must retain the above copyright
   30  *    notice, this list of conditions and the following disclaimer.
   31  * 2. Redistributions in binary form must reproduce the above copyright
   32  *    notice, this list of conditions and the following disclaimer in the
   33  *    documentation and/or other materials provided with the distribution.
   34  * 4. Neither the name of the University nor the names of its contributors
   35  *    may be used to endorse or promote products derived from this software
   36  *    without specific prior written permission.
   37  *
   38  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   39  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   40  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   41  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   42  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   43  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   44  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   45  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   46  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   47  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   48  * SUCH DAMAGE.
   49  *
   50  * from: Utah Hdr: vn.c 1.13 94/04/02
   51  *
   52  *      from: @(#)vn.c  8.6 (Berkeley) 4/1/94
   53  * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03
   54  */
   55 
   56 #include "opt_geom.h"
   57 #include "opt_md.h"
   58 
   59 #include <sys/param.h>
   60 #include <sys/systm.h>
   61 #include <sys/bio.h>
   62 #include <sys/conf.h>
   63 #include <sys/devicestat.h>
   64 #include <sys/fcntl.h>
   65 #include <sys/kernel.h>
   66 #include <sys/kthread.h>
   67 #include <sys/limits.h>
   68 #include <sys/linker.h>
   69 #include <sys/lock.h>
   70 #include <sys/malloc.h>
   71 #include <sys/mdioctl.h>
   72 #include <sys/mount.h>
   73 #include <sys/mutex.h>
   74 #include <sys/sx.h>
   75 #include <sys/namei.h>
   76 #include <sys/proc.h>
   77 #include <sys/queue.h>
   78 #include <sys/sched.h>
   79 #include <sys/sf_buf.h>
   80 #include <sys/sysctl.h>
   81 #include <sys/vnode.h>
   82 
   83 #include <geom/geom.h>
   84 
   85 #include <vm/vm.h>
   86 #include <vm/vm_object.h>
   87 #include <vm/vm_page.h>
   88 #include <vm/vm_pager.h>
   89 #include <vm/swap_pager.h>
   90 #include <vm/uma.h>
   91 
   92 #define MD_MODVER 1
   93 
   94 #define MD_SHUTDOWN     0x10000         /* Tell worker thread to terminate. */
   95 #define MD_EXITING      0x20000         /* Worker thread is exiting. */
   96 
   97 #ifndef MD_NSECT
   98 #define MD_NSECT (10000 * 2)
   99 #endif
  100 
  101 static MALLOC_DEFINE(M_MD, "md_disk", "Memory Disk");
  102 static MALLOC_DEFINE(M_MDSECT, "md_sectors", "Memory Disk Sectors");
  103 
  104 static int md_debug;
  105 SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, "");
  106 static int md_malloc_wait;
  107 SYSCTL_INT(_vm, OID_AUTO, md_malloc_wait, CTLFLAG_RW, &md_malloc_wait, 0, "");
  108 
  109 #if defined(MD_ROOT) && defined(MD_ROOT_SIZE)
  110 /*
  111  * Preloaded image gets put here.
  112  * Applications that patch the object with the image can determine
  113  * the size looking at the start and end markers (strings),
  114  * so we want them contiguous.
  115  */
  116 static struct {
  117         u_char start[MD_ROOT_SIZE*1024];
  118         u_char end[128];
  119 } mfs_root = {
  120         .start = "MFS Filesystem goes here",
  121         .end = "MFS Filesystem had better STOP here",
  122 };
  123 #endif
  124 
  125 static g_init_t g_md_init;
  126 static g_fini_t g_md_fini;
  127 static g_start_t g_md_start;
  128 static g_access_t g_md_access;
  129 static void g_md_dumpconf(struct sbuf *sb, const char *indent,
  130     struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp);
  131 
  132 static int mdunits;
  133 static struct cdev *status_dev = 0;
  134 static struct sx md_sx;
  135 static struct unrhdr *md_uh;
  136 
  137 static d_ioctl_t mdctlioctl;
  138 
  139 static struct cdevsw mdctl_cdevsw = {
  140         .d_version =    D_VERSION,
  141         .d_ioctl =      mdctlioctl,
  142         .d_name =       MD_NAME,
  143 };
  144 
  145 struct g_class g_md_class = {
  146         .name = "MD",
  147         .version = G_VERSION,
  148         .init = g_md_init,
  149         .fini = g_md_fini,
  150         .start = g_md_start,
  151         .access = g_md_access,
  152         .dumpconf = g_md_dumpconf,
  153 };
  154 
  155 DECLARE_GEOM_CLASS(g_md_class, g_md);
  156 
  157 
  158 static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list);
  159 
  160 #define NINDIR  (PAGE_SIZE / sizeof(uintptr_t))
  161 #define NMASK   (NINDIR-1)
  162 static int nshift;
  163 
  164 struct indir {
  165         uintptr_t       *array;
  166         u_int           total;
  167         u_int           used;
  168         u_int           shift;
  169 };
  170 
  171 struct md_s {
  172         int unit;
  173         LIST_ENTRY(md_s) list;
  174         struct bio_queue_head bio_queue;
  175         struct mtx queue_mtx;
  176         struct cdev *dev;
  177         enum md_types type;
  178         off_t mediasize;
  179         unsigned sectorsize;
  180         unsigned opencount;
  181         unsigned fwheads;
  182         unsigned fwsectors;
  183         unsigned flags;
  184         char name[20];
  185         struct proc *procp;
  186         struct g_geom *gp;
  187         struct g_provider *pp;
  188         int (*start)(struct md_s *sc, struct bio *bp);
  189         struct devstat *devstat;
  190 
  191         /* MD_MALLOC related fields */
  192         struct indir *indir;
  193         uma_zone_t uma;
  194 
  195         /* MD_PRELOAD related fields */
  196         u_char *pl_ptr;
  197         size_t pl_len;
  198 
  199         /* MD_VNODE related fields */
  200         struct vnode *vnode;
  201         char file[PATH_MAX];
  202         struct ucred *cred;
  203 
  204         /* MD_SWAP related fields */
  205         vm_object_t object;
  206 };
  207 
  208 static struct indir *
  209 new_indir(u_int shift)
  210 {
  211         struct indir *ip;
  212 
  213         ip = malloc(sizeof *ip, M_MD, (md_malloc_wait ? M_WAITOK : M_NOWAIT)
  214             | M_ZERO);
  215         if (ip == NULL)
  216                 return (NULL);
  217         ip->array = malloc(sizeof(uintptr_t) * NINDIR,
  218             M_MDSECT, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO);
  219         if (ip->array == NULL) {
  220                 free(ip, M_MD);
  221                 return (NULL);
  222         }
  223         ip->total = NINDIR;
  224         ip->shift = shift;
  225         return (ip);
  226 }
  227 
  228 static void
  229 del_indir(struct indir *ip)
  230 {
  231 
  232         free(ip->array, M_MDSECT);
  233         free(ip, M_MD);
  234 }
  235 
  236 static void
  237 destroy_indir(struct md_s *sc, struct indir *ip)
  238 {
  239         int i;
  240 
  241         for (i = 0; i < NINDIR; i++) {
  242                 if (!ip->array[i])
  243                         continue;
  244                 if (ip->shift)
  245                         destroy_indir(sc, (struct indir*)(ip->array[i]));
  246                 else if (ip->array[i] > 255)
  247                         uma_zfree(sc->uma, (void *)(ip->array[i]));
  248         }
  249         del_indir(ip);
  250 }
  251 
  252 /*
  253  * This function does the math and allocates the top level "indir" structure
  254  * for a device of "size" sectors.
  255  */
  256 
  257 static struct indir *
  258 dimension(off_t size)
  259 {
  260         off_t rcnt;
  261         struct indir *ip;
  262         int layer;
  263 
  264         rcnt = size;
  265         layer = 0;
  266         while (rcnt > NINDIR) {
  267                 rcnt /= NINDIR;
  268                 layer++;
  269         }
  270 
  271         /*
  272          * XXX: the top layer is probably not fully populated, so we allocate
  273          * too much space for ip->array in here.
  274          */
  275         ip = malloc(sizeof *ip, M_MD, M_WAITOK | M_ZERO);
  276         ip->array = malloc(sizeof(uintptr_t) * NINDIR,
  277             M_MDSECT, M_WAITOK | M_ZERO);
  278         ip->total = NINDIR;
  279         ip->shift = layer * nshift;
  280         return (ip);
  281 }
  282 
  283 /*
  284  * Read a given sector
  285  */
  286 
  287 static uintptr_t
  288 s_read(struct indir *ip, off_t offset)
  289 {
  290         struct indir *cip;
  291         int idx;
  292         uintptr_t up;
  293 
  294         if (md_debug > 1)
  295                 printf("s_read(%jd)\n", (intmax_t)offset);
  296         up = 0;
  297         for (cip = ip; cip != NULL;) {
  298                 if (cip->shift) {
  299                         idx = (offset >> cip->shift) & NMASK;
  300                         up = cip->array[idx];
  301                         cip = (struct indir *)up;
  302                         continue;
  303                 }
  304                 idx = offset & NMASK;
  305                 return (cip->array[idx]);
  306         }
  307         return (0);
  308 }
  309 
  310 /*
  311  * Write a given sector, prune the tree if the value is 0
  312  */
  313 
  314 static int
  315 s_write(struct indir *ip, off_t offset, uintptr_t ptr)
  316 {
  317         struct indir *cip, *lip[10];
  318         int idx, li;
  319         uintptr_t up;
  320 
  321         if (md_debug > 1)
  322                 printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr);
  323         up = 0;
  324         li = 0;
  325         cip = ip;
  326         for (;;) {
  327                 lip[li++] = cip;
  328                 if (cip->shift) {
  329                         idx = (offset >> cip->shift) & NMASK;
  330                         up = cip->array[idx];
  331                         if (up != 0) {
  332                                 cip = (struct indir *)up;
  333                                 continue;
  334                         }
  335                         /* Allocate branch */
  336                         cip->array[idx] =
  337                             (uintptr_t)new_indir(cip->shift - nshift);
  338                         if (cip->array[idx] == 0)
  339                                 return (ENOSPC);
  340                         cip->used++;
  341                         up = cip->array[idx];
  342                         cip = (struct indir *)up;
  343                         continue;
  344                 }
  345                 /* leafnode */
  346                 idx = offset & NMASK;
  347                 up = cip->array[idx];
  348                 if (up != 0)
  349                         cip->used--;
  350                 cip->array[idx] = ptr;
  351                 if (ptr != 0)
  352                         cip->used++;
  353                 break;
  354         }
  355         if (cip->used != 0 || li == 1)
  356                 return (0);
  357         li--;
  358         while (cip->used == 0 && cip != ip) {
  359                 li--;
  360                 idx = (offset >> lip[li]->shift) & NMASK;
  361                 up = lip[li]->array[idx];
  362                 KASSERT(up == (uintptr_t)cip, ("md screwed up"));
  363                 del_indir(cip);
  364                 lip[li]->array[idx] = 0;
  365                 lip[li]->used--;
  366                 cip = lip[li];
  367         }
  368         return (0);
  369 }
  370 
  371 
  372 static int
  373 g_md_access(struct g_provider *pp, int r, int w, int e)
  374 {
  375         struct md_s *sc;
  376 
  377         sc = pp->geom->softc;
  378         if (sc == NULL) {
  379                 if (r <= 0 && w <= 0 && e <= 0)
  380                         return (0);
  381                 return (ENXIO);
  382         }
  383         r += pp->acr;
  384         w += pp->acw;
  385         e += pp->ace;
  386         if ((sc->flags & MD_READONLY) != 0 && w > 0)
  387                 return (EROFS);
  388         if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
  389                 sc->opencount = 1;
  390         } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
  391                 sc->opencount = 0;
  392         }
  393         return (0);
  394 }
  395 
  396 static void
  397 g_md_start(struct bio *bp)
  398 {
  399         struct md_s *sc;
  400 
  401         sc = bp->bio_to->geom->softc;
  402         if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE))
  403                 devstat_start_transaction_bio(sc->devstat, bp);
  404         mtx_lock(&sc->queue_mtx);
  405         bioq_disksort(&sc->bio_queue, bp);
  406         mtx_unlock(&sc->queue_mtx);
  407         wakeup(sc);
  408 }
  409 
  410 static int
  411 mdstart_malloc(struct md_s *sc, struct bio *bp)
  412 {
  413         int i, error;
  414         u_char *dst;
  415         off_t secno, nsec, uc;
  416         uintptr_t sp, osp;
  417 
  418         switch (bp->bio_cmd) {
  419         case BIO_READ:
  420         case BIO_WRITE:
  421         case BIO_DELETE:
  422                 break;
  423         default:
  424                 return (EOPNOTSUPP);
  425         }
  426 
  427         nsec = bp->bio_length / sc->sectorsize;
  428         secno = bp->bio_offset / sc->sectorsize;
  429         dst = bp->bio_data;
  430         error = 0;
  431         while (nsec--) {
  432                 osp = s_read(sc->indir, secno);
  433                 if (bp->bio_cmd == BIO_DELETE) {
  434                         if (osp != 0)
  435                                 error = s_write(sc->indir, secno, 0);
  436                 } else if (bp->bio_cmd == BIO_READ) {
  437                         if (osp == 0)
  438                                 bzero(dst, sc->sectorsize);
  439                         else if (osp <= 255)
  440                                 memset(dst, osp, sc->sectorsize);
  441                         else {
  442                                 bcopy((void *)osp, dst, sc->sectorsize);
  443                                 cpu_flush_dcache(dst, sc->sectorsize);
  444                         }
  445                         osp = 0;
  446                 } else if (bp->bio_cmd == BIO_WRITE) {
  447                         if (sc->flags & MD_COMPRESS) {
  448                                 uc = dst[0];
  449                                 for (i = 1; i < sc->sectorsize; i++)
  450                                         if (dst[i] != uc)
  451                                                 break;
  452                         } else {
  453                                 i = 0;
  454                                 uc = 0;
  455                         }
  456                         if (i == sc->sectorsize) {
  457                                 if (osp != uc)
  458                                         error = s_write(sc->indir, secno, uc);
  459                         } else {
  460                                 if (osp <= 255) {
  461                                         sp = (uintptr_t)uma_zalloc(sc->uma,
  462                                             md_malloc_wait ? M_WAITOK :
  463                                             M_NOWAIT);
  464                                         if (sp == 0) {
  465                                                 error = ENOSPC;
  466                                                 break;
  467                                         }
  468                                         bcopy(dst, (void *)sp, sc->sectorsize);
  469                                         error = s_write(sc->indir, secno, sp);
  470                                 } else {
  471                                         bcopy(dst, (void *)osp, sc->sectorsize);
  472                                         osp = 0;
  473                                 }
  474                         }
  475                 } else {
  476                         error = EOPNOTSUPP;
  477                 }
  478                 if (osp > 255)
  479                         uma_zfree(sc->uma, (void*)osp);
  480                 if (error != 0)
  481                         break;
  482                 secno++;
  483                 dst += sc->sectorsize;
  484         }
  485         bp->bio_resid = 0;
  486         return (error);
  487 }
  488 
  489 static int
  490 mdstart_preload(struct md_s *sc, struct bio *bp)
  491 {
  492 
  493         switch (bp->bio_cmd) {
  494         case BIO_READ:
  495                 bcopy(sc->pl_ptr + bp->bio_offset, bp->bio_data,
  496                     bp->bio_length);
  497                 cpu_flush_dcache(bp->bio_data, bp->bio_length);
  498                 break;
  499         case BIO_WRITE:
  500                 bcopy(bp->bio_data, sc->pl_ptr + bp->bio_offset,
  501                     bp->bio_length);
  502                 break;
  503         }
  504         bp->bio_resid = 0;
  505         return (0);
  506 }
  507 
  508 static int
  509 mdstart_vnode(struct md_s *sc, struct bio *bp)
  510 {
  511         int error, vfslocked;
  512         struct uio auio;
  513         struct iovec aiov;
  514         struct mount *mp;
  515         struct vnode *vp;
  516         struct thread *td;
  517 
  518         switch (bp->bio_cmd) {
  519         case BIO_READ:
  520         case BIO_WRITE:
  521         case BIO_FLUSH:
  522                 break;
  523         default:
  524                 return (EOPNOTSUPP);
  525         }
  526 
  527         td = curthread;
  528         vp = sc->vnode;
  529 
  530         /*
  531          * VNODE I/O
  532          *
  533          * If an error occurs, we set BIO_ERROR but we do not set
  534          * B_INVAL because (for a write anyway), the buffer is
  535          * still valid.
  536          */
  537 
  538         if (bp->bio_cmd == BIO_FLUSH) {
  539                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  540                 (void) vn_start_write(vp, &mp, V_WAIT);
  541                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  542                 error = VOP_FSYNC(vp, MNT_WAIT, td);
  543                 VOP_UNLOCK(vp, 0);
  544                 vn_finished_write(mp);
  545                 VFS_UNLOCK_GIANT(vfslocked);
  546                 return (error);
  547         }
  548 
  549         bzero(&auio, sizeof(auio));
  550 
  551         aiov.iov_base = bp->bio_data;
  552         aiov.iov_len = bp->bio_length;
  553         auio.uio_iov = &aiov;
  554         auio.uio_iovcnt = 1;
  555         auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
  556         auio.uio_segflg = UIO_SYSSPACE;
  557         if (bp->bio_cmd == BIO_READ)
  558                 auio.uio_rw = UIO_READ;
  559         else if (bp->bio_cmd == BIO_WRITE)
  560                 auio.uio_rw = UIO_WRITE;
  561         else
  562                 panic("wrong BIO_OP in mdstart_vnode");
  563         auio.uio_resid = bp->bio_length;
  564         auio.uio_td = td;
  565         /*
  566          * When reading set IO_DIRECT to try to avoid double-caching
  567          * the data.  When writing IO_DIRECT is not optimal.
  568          */
  569         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  570         if (bp->bio_cmd == BIO_READ) {
  571                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  572                 error = VOP_READ(vp, &auio, IO_DIRECT, sc->cred);
  573                 VOP_UNLOCK(vp, 0);
  574         } else {
  575                 (void) vn_start_write(vp, &mp, V_WAIT);
  576                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  577                 error = VOP_WRITE(vp, &auio, sc->flags & MD_ASYNC ? 0 : IO_SYNC,
  578                     sc->cred);
  579                 VOP_UNLOCK(vp, 0);
  580                 vn_finished_write(mp);
  581         }
  582         VFS_UNLOCK_GIANT(vfslocked);
  583         bp->bio_resid = auio.uio_resid;
  584         return (error);
  585 }
  586 
  587 static int
  588 mdstart_swap(struct md_s *sc, struct bio *bp)
  589 {
  590         struct sf_buf *sf;
  591         int rv, offs, len, lastend;
  592         vm_pindex_t i, lastp;
  593         vm_page_t m;
  594         u_char *p;
  595 
  596         switch (bp->bio_cmd) {
  597         case BIO_READ:
  598         case BIO_WRITE:
  599         case BIO_DELETE:
  600                 break;
  601         default:
  602                 return (EOPNOTSUPP);
  603         }
  604 
  605         p = bp->bio_data;
  606 
  607         /*
  608          * offs is the offset at which to start operating on the
  609          * next (ie, first) page.  lastp is the last page on
  610          * which we're going to operate.  lastend is the ending
  611          * position within that last page (ie, PAGE_SIZE if
  612          * we're operating on complete aligned pages).
  613          */
  614         offs = bp->bio_offset % PAGE_SIZE;
  615         lastp = (bp->bio_offset + bp->bio_length - 1) / PAGE_SIZE;
  616         lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1;
  617 
  618         rv = VM_PAGER_OK;
  619         VM_OBJECT_LOCK(sc->object);
  620         vm_object_pip_add(sc->object, 1);
  621         for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) {
  622                 len = ((i == lastp) ? lastend : PAGE_SIZE) - offs;
  623 
  624                 m = vm_page_grab(sc->object, i,
  625                     VM_ALLOC_NORMAL|VM_ALLOC_RETRY);
  626                 VM_OBJECT_UNLOCK(sc->object);
  627                 sched_pin();
  628                 sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
  629                 VM_OBJECT_LOCK(sc->object);
  630                 if (bp->bio_cmd == BIO_READ) {
  631                         if (m->valid != VM_PAGE_BITS_ALL)
  632                                 rv = vm_pager_get_pages(sc->object, &m, 1, 0);
  633                         if (rv == VM_PAGER_ERROR) {
  634                                 sf_buf_free(sf);
  635                                 sched_unpin();
  636                                 vm_page_wakeup(m);
  637                                 break;
  638                         }
  639                         bcopy((void *)(sf_buf_kva(sf) + offs), p, len);
  640                         cpu_flush_dcache(p, len);
  641                 } else if (bp->bio_cmd == BIO_WRITE) {
  642                         if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL)
  643                                 rv = vm_pager_get_pages(sc->object, &m, 1, 0);
  644                         if (rv == VM_PAGER_ERROR) {
  645                                 sf_buf_free(sf);
  646                                 sched_unpin();
  647                                 vm_page_wakeup(m);
  648                                 break;
  649                         }
  650                         bcopy(p, (void *)(sf_buf_kva(sf) + offs), len);
  651                         m->valid = VM_PAGE_BITS_ALL;
  652                 } else if (bp->bio_cmd == BIO_DELETE) {
  653                         if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL)
  654                                 rv = vm_pager_get_pages(sc->object, &m, 1, 0);
  655                         if (rv == VM_PAGER_ERROR) {
  656                                 sf_buf_free(sf);
  657                                 sched_unpin();
  658                                 vm_page_wakeup(m);
  659                                 break;
  660                         }
  661                         if (len != PAGE_SIZE) {
  662                                 bzero((void *)(sf_buf_kva(sf) + offs), len);
  663                                 vm_page_clear_dirty(m, offs, len);
  664                                 m->valid = VM_PAGE_BITS_ALL;
  665                         } else
  666                                 vm_pager_page_unswapped(m);
  667                 }
  668                 sf_buf_free(sf);
  669                 sched_unpin();
  670                 vm_page_wakeup(m);
  671                 vm_page_lock_queues();
  672                 if (bp->bio_cmd == BIO_DELETE && len == PAGE_SIZE)
  673                         vm_page_free(m);
  674                 else
  675                         vm_page_activate(m);
  676                 if (bp->bio_cmd == BIO_WRITE)
  677                         vm_page_dirty(m);
  678                 vm_page_unlock_queues();
  679 
  680                 /* Actions on further pages start at offset 0 */
  681                 p += PAGE_SIZE - offs;
  682                 offs = 0;
  683         }
  684         vm_object_pip_subtract(sc->object, 1);
  685         vm_object_set_writeable_dirty(sc->object);
  686         VM_OBJECT_UNLOCK(sc->object);
  687         return (rv != VM_PAGER_ERROR ? 0 : ENOSPC);
  688 }
  689 
  690 static void
  691 md_kthread(void *arg)
  692 {
  693         struct md_s *sc;
  694         struct bio *bp;
  695         int error;
  696 
  697         sc = arg;
  698         thread_lock(curthread);
  699         sched_prio(curthread, PRIBIO);
  700         thread_unlock(curthread);
  701         if (sc->type == MD_VNODE)
  702                 curthread->td_pflags |= TDP_NORUNNINGBUF;
  703 
  704         for (;;) {
  705                 mtx_lock(&sc->queue_mtx);
  706                 if (sc->flags & MD_SHUTDOWN) {
  707                         sc->flags |= MD_EXITING;
  708                         mtx_unlock(&sc->queue_mtx);
  709                         kproc_exit(0);
  710                 }
  711                 bp = bioq_takefirst(&sc->bio_queue);
  712                 if (!bp) {
  713                         msleep(sc, &sc->queue_mtx, PRIBIO | PDROP, "mdwait", 0);
  714                         continue;
  715                 }
  716                 mtx_unlock(&sc->queue_mtx);
  717                 if (bp->bio_cmd == BIO_GETATTR) {
  718                         if ((sc->fwsectors && sc->fwheads &&
  719                             (g_handleattr_int(bp, "GEOM::fwsectors",
  720                             sc->fwsectors) ||
  721                             g_handleattr_int(bp, "GEOM::fwheads",
  722                             sc->fwheads))) ||
  723                             g_handleattr_int(bp, "GEOM::candelete", 1))
  724                                 error = -1;
  725                         else
  726                                 error = EOPNOTSUPP;
  727                 } else {
  728                         error = sc->start(sc, bp);
  729                 }
  730 
  731                 if (error != -1) {
  732                         bp->bio_completed = bp->bio_length;
  733                         if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE))
  734                                 devstat_end_transaction_bio(sc->devstat, bp);
  735                         g_io_deliver(bp, error);
  736                 }
  737         }
  738 }
  739 
  740 static struct md_s *
  741 mdfind(int unit)
  742 {
  743         struct md_s *sc;
  744 
  745         LIST_FOREACH(sc, &md_softc_list, list) {
  746                 if (sc->unit == unit)
  747                         break;
  748         }
  749         return (sc);
  750 }
  751 
  752 static struct md_s *
  753 mdnew(int unit, int *errp, enum md_types type)
  754 {
  755         struct md_s *sc;
  756         int error;
  757 
  758         *errp = 0;
  759         if (unit == -1)
  760                 unit = alloc_unr(md_uh);
  761         else
  762                 unit = alloc_unr_specific(md_uh, unit);
  763 
  764         if (unit == -1) {
  765                 *errp = EBUSY;
  766                 return (NULL);
  767         }
  768 
  769         sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO);
  770         sc->type = type;
  771         bioq_init(&sc->bio_queue);
  772         mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF);
  773         sc->unit = unit;
  774         sprintf(sc->name, "md%d", unit);
  775         LIST_INSERT_HEAD(&md_softc_list, sc, list);
  776         error = kproc_create(md_kthread, sc, &sc->procp, 0, 0,"%s", sc->name);
  777         if (error == 0)
  778                 return (sc);
  779         LIST_REMOVE(sc, list);
  780         mtx_destroy(&sc->queue_mtx);
  781         free_unr(md_uh, sc->unit);
  782         free(sc, M_MD);
  783         *errp = error;
  784         return (NULL);
  785 }
  786 
  787 static void
  788 mdinit(struct md_s *sc)
  789 {
  790         struct g_geom *gp;
  791         struct g_provider *pp;
  792 
  793         g_topology_lock();
  794         gp = g_new_geomf(&g_md_class, "md%d", sc->unit);
  795         gp->softc = sc;
  796         pp = g_new_providerf(gp, "md%d", sc->unit);
  797         pp->mediasize = sc->mediasize;
  798         pp->sectorsize = sc->sectorsize;
  799         sc->gp = gp;
  800         sc->pp = pp;
  801         g_error_provider(pp, 0);
  802         g_topology_unlock();
  803         sc->devstat = devstat_new_entry("md", sc->unit, sc->sectorsize,
  804             DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
  805 }
  806 
  807 /*
  808  * XXX: we should check that the range they feed us is mapped.
  809  * XXX: we should implement read-only.
  810  */
  811 
  812 static int
  813 mdcreate_preload(struct md_s *sc, struct md_ioctl *mdio)
  814 {
  815 
  816         if (mdio->md_options & ~(MD_AUTOUNIT | MD_FORCE))
  817                 return (EINVAL);
  818         if (mdio->md_base == 0)
  819                 return (EINVAL);
  820         sc->flags = mdio->md_options & MD_FORCE;
  821         /* Cast to pointer size, then to pointer to avoid warning */
  822         sc->pl_ptr = (u_char *)(uintptr_t)mdio->md_base;
  823         sc->pl_len = (size_t)sc->mediasize;
  824         return (0);
  825 }
  826 
  827 
  828 static int
  829 mdcreate_malloc(struct md_s *sc, struct md_ioctl *mdio)
  830 {
  831         uintptr_t sp;
  832         int error;
  833         off_t u;
  834 
  835         error = 0;
  836         if (mdio->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE))
  837                 return (EINVAL);
  838         if (mdio->md_sectorsize != 0 && !powerof2(mdio->md_sectorsize))
  839                 return (EINVAL);
  840         /* Compression doesn't make sense if we have reserved space */
  841         if (mdio->md_options & MD_RESERVE)
  842                 mdio->md_options &= ~MD_COMPRESS;
  843         if (mdio->md_fwsectors != 0)
  844                 sc->fwsectors = mdio->md_fwsectors;
  845         if (mdio->md_fwheads != 0)
  846                 sc->fwheads = mdio->md_fwheads;
  847         sc->flags = mdio->md_options & (MD_COMPRESS | MD_FORCE);
  848         sc->indir = dimension(sc->mediasize / sc->sectorsize);
  849         sc->uma = uma_zcreate(sc->name, sc->sectorsize, NULL, NULL, NULL, NULL,
  850             0x1ff, 0);
  851         if (mdio->md_options & MD_RESERVE) {
  852                 off_t nsectors;
  853 
  854                 nsectors = sc->mediasize / sc->sectorsize;
  855                 for (u = 0; u < nsectors; u++) {
  856                         sp = (uintptr_t)uma_zalloc(sc->uma, (md_malloc_wait ?
  857                             M_WAITOK : M_NOWAIT) | M_ZERO);
  858                         if (sp != 0)
  859                                 error = s_write(sc->indir, u, sp);
  860                         else
  861                                 error = ENOMEM;
  862                         if (error != 0)
  863                                 break;
  864                 }
  865         }
  866         return (error);
  867 }
  868 
  869 
  870 static int
  871 mdsetcred(struct md_s *sc, struct ucred *cred)
  872 {
  873         char *tmpbuf;
  874         int error = 0;
  875 
  876         /*
  877          * Set credits in our softc
  878          */
  879 
  880         if (sc->cred)
  881                 crfree(sc->cred);
  882         sc->cred = crhold(cred);
  883 
  884         /*
  885          * Horrible kludge to establish credentials for NFS  XXX.
  886          */
  887 
  888         if (sc->vnode) {
  889                 struct uio auio;
  890                 struct iovec aiov;
  891 
  892                 tmpbuf = malloc(sc->sectorsize, M_TEMP, M_WAITOK);
  893                 bzero(&auio, sizeof(auio));
  894 
  895                 aiov.iov_base = tmpbuf;
  896                 aiov.iov_len = sc->sectorsize;
  897                 auio.uio_iov = &aiov;
  898                 auio.uio_iovcnt = 1;
  899                 auio.uio_offset = 0;
  900                 auio.uio_rw = UIO_READ;
  901                 auio.uio_segflg = UIO_SYSSPACE;
  902                 auio.uio_resid = aiov.iov_len;
  903                 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY);
  904                 error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
  905                 VOP_UNLOCK(sc->vnode, 0);
  906                 free(tmpbuf, M_TEMP);
  907         }
  908         return (error);
  909 }
  910 
  911 static int
  912 mdcreate_vnode(struct md_s *sc, struct md_ioctl *mdio, struct thread *td)
  913 {
  914         struct vattr vattr;
  915         struct nameidata nd;
  916         int error, flags, vfslocked;
  917 
  918         error = copyinstr(mdio->md_file, sc->file, sizeof(sc->file), NULL);
  919         if (error != 0)
  920                 return (error);
  921         flags = FREAD|FWRITE;
  922         /*
  923          * If the user specified that this is a read only device, unset the
  924          * FWRITE mask before trying to open the backing store.
  925          */
  926         if ((mdio->md_options & MD_READONLY) != 0)
  927                 flags &= ~FWRITE;
  928         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, sc->file, td);
  929         error = vn_open(&nd, &flags, 0, NULL);
  930         if (error != 0)
  931                 return (error);
  932         vfslocked = NDHASGIANT(&nd);
  933         NDFREE(&nd, NDF_ONLY_PNBUF);
  934         if (nd.ni_vp->v_type != VREG) {
  935                 error = EINVAL;
  936                 goto bad;
  937         }
  938         error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred);
  939         if (error != 0)
  940                 goto bad;
  941         if (VOP_ISLOCKED(nd.ni_vp) != LK_EXCLUSIVE) {
  942                 vn_lock(nd.ni_vp, LK_UPGRADE | LK_RETRY);
  943                 if (nd.ni_vp->v_iflag & VI_DOOMED) {
  944                         /* Forced unmount. */
  945                         error = EBADF;
  946                         goto bad;
  947                 }
  948         }
  949         nd.ni_vp->v_vflag |= VV_MD;
  950         VOP_UNLOCK(nd.ni_vp, 0);
  951 
  952         if (mdio->md_fwsectors != 0)
  953                 sc->fwsectors = mdio->md_fwsectors;
  954         if (mdio->md_fwheads != 0)
  955                 sc->fwheads = mdio->md_fwheads;
  956         sc->flags = mdio->md_options & (MD_FORCE | MD_ASYNC);
  957         if (!(flags & FWRITE))
  958                 sc->flags |= MD_READONLY;
  959         sc->vnode = nd.ni_vp;
  960 
  961         error = mdsetcred(sc, td->td_ucred);
  962         if (error != 0) {
  963                 sc->vnode = NULL;
  964                 vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY);
  965                 nd.ni_vp->v_vflag &= ~VV_MD;
  966                 goto bad;
  967         }
  968         VFS_UNLOCK_GIANT(vfslocked);
  969         return (0);
  970 bad:
  971         VOP_UNLOCK(nd.ni_vp, 0);
  972         (void)vn_close(nd.ni_vp, flags, td->td_ucred, td);
  973         VFS_UNLOCK_GIANT(vfslocked);
  974         return (error);
  975 }
  976 
  977 static int
  978 mddestroy(struct md_s *sc, struct thread *td)
  979 {
  980         int vfslocked;
  981 
  982         if (sc->gp) {
  983                 sc->gp->softc = NULL;
  984                 g_topology_lock();
  985                 g_wither_geom(sc->gp, ENXIO);
  986                 g_topology_unlock();
  987                 sc->gp = NULL;
  988                 sc->pp = NULL;
  989         }
  990         if (sc->devstat) {
  991                 devstat_remove_entry(sc->devstat);
  992                 sc->devstat = NULL;
  993         }
  994         mtx_lock(&sc->queue_mtx);
  995         sc->flags |= MD_SHUTDOWN;
  996         wakeup(sc);
  997         while (!(sc->flags & MD_EXITING))
  998                 msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10);
  999         mtx_unlock(&sc->queue_mtx);
 1000         mtx_destroy(&sc->queue_mtx);
 1001         if (sc->vnode != NULL) {
 1002                 vfslocked = VFS_LOCK_GIANT(sc->vnode->v_mount);
 1003                 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY);
 1004                 sc->vnode->v_vflag &= ~VV_MD;
 1005                 VOP_UNLOCK(sc->vnode, 0);
 1006                 (void)vn_close(sc->vnode, sc->flags & MD_READONLY ?
 1007                     FREAD : (FREAD|FWRITE), sc->cred, td);
 1008                 VFS_UNLOCK_GIANT(vfslocked);
 1009         }
 1010         if (sc->cred != NULL)
 1011                 crfree(sc->cred);
 1012         if (sc->object != NULL)
 1013                 vm_object_deallocate(sc->object);
 1014         if (sc->indir)
 1015                 destroy_indir(sc, sc->indir);
 1016         if (sc->uma)
 1017                 uma_zdestroy(sc->uma);
 1018 
 1019         LIST_REMOVE(sc, list);
 1020         free_unr(md_uh, sc->unit);
 1021         free(sc, M_MD);
 1022         return (0);
 1023 }
 1024 
 1025 static int
 1026 mdcreate_swap(struct md_s *sc, struct md_ioctl *mdio, struct thread *td)
 1027 {
 1028         vm_ooffset_t npage;
 1029         int error;
 1030 
 1031         /*
 1032          * Range check.  Disallow negative sizes or any size less then the
 1033          * size of a page.  Then round to a page.
 1034          */
 1035         if (sc->mediasize <= 0 || (sc->mediasize % PAGE_SIZE) != 0)
 1036                 return (EDOM);
 1037 
 1038         /*
 1039          * Allocate an OBJT_SWAP object.
 1040          *
 1041          * Note the truncation.
 1042          */
 1043 
 1044         npage = mdio->md_mediasize / PAGE_SIZE;
 1045         if (mdio->md_fwsectors != 0)
 1046                 sc->fwsectors = mdio->md_fwsectors;
 1047         if (mdio->md_fwheads != 0)
 1048                 sc->fwheads = mdio->md_fwheads;
 1049         sc->object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * npage,
 1050             VM_PROT_DEFAULT, 0, td->td_ucred);
 1051         if (sc->object == NULL)
 1052                 return (ENOMEM);
 1053         sc->flags = mdio->md_options & MD_FORCE;
 1054         if (mdio->md_options & MD_RESERVE) {
 1055                 if (swap_pager_reserve(sc->object, 0, npage) < 0) {
 1056                         error = EDOM;
 1057                         goto finish;
 1058                 }
 1059         }
 1060         error = mdsetcred(sc, td->td_ucred);
 1061  finish:
 1062         if (error != 0) {
 1063                 vm_object_deallocate(sc->object);
 1064                 sc->object = NULL;
 1065         }
 1066         return (error);
 1067 }
 1068 
 1069 
 1070 static int
 1071 xmdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
 1072 {
 1073         struct md_ioctl *mdio;
 1074         struct md_s *sc;
 1075         int error, i;
 1076         unsigned sectsize;
 1077 
 1078         if (md_debug)
 1079                 printf("mdctlioctl(%s %lx %p %x %p)\n",
 1080                         devtoname(dev), cmd, addr, flags, td);
 1081 
 1082         mdio = (struct md_ioctl *)addr;
 1083         if (mdio->md_version != MDIOVERSION)
 1084                 return (EINVAL);
 1085 
 1086         /*
 1087          * We assert the version number in the individual ioctl
 1088          * handlers instead of out here because (a) it is possible we
 1089          * may add another ioctl in the future which doesn't read an
 1090          * mdio, and (b) the correct return value for an unknown ioctl
 1091          * is ENOIOCTL, not EINVAL.
 1092          */
 1093         error = 0;
 1094         switch (cmd) {
 1095         case MDIOCATTACH:
 1096                 switch (mdio->md_type) {
 1097                 case MD_MALLOC:
 1098                 case MD_PRELOAD:
 1099                 case MD_VNODE:
 1100                 case MD_SWAP:
 1101                         break;
 1102                 default:
 1103                         return (EINVAL);
 1104                 }
 1105                 if (mdio->md_sectorsize == 0)
 1106                         sectsize = DEV_BSIZE;
 1107                 else
 1108                         sectsize = mdio->md_sectorsize;
 1109                 if (sectsize > MAXPHYS || mdio->md_mediasize < sectsize)
 1110                         return (EINVAL);
 1111                 if (mdio->md_options & MD_AUTOUNIT)
 1112                         sc = mdnew(-1, &error, mdio->md_type);
 1113                 else {
 1114                         if (mdio->md_unit > INT_MAX)
 1115                                 return (EINVAL);
 1116                         sc = mdnew(mdio->md_unit, &error, mdio->md_type);
 1117                 }
 1118                 if (sc == NULL)
 1119                         return (error);
 1120                 if (mdio->md_options & MD_AUTOUNIT)
 1121                         mdio->md_unit = sc->unit;
 1122                 sc->mediasize = mdio->md_mediasize;
 1123                 sc->sectorsize = sectsize;
 1124                 error = EDOOFUS;
 1125                 switch (sc->type) {
 1126                 case MD_MALLOC:
 1127                         sc->start = mdstart_malloc;
 1128                         error = mdcreate_malloc(sc, mdio);
 1129                         break;
 1130                 case MD_PRELOAD:
 1131                         sc->start = mdstart_preload;
 1132                         error = mdcreate_preload(sc, mdio);
 1133                         break;
 1134                 case MD_VNODE:
 1135                         sc->start = mdstart_vnode;
 1136                         error = mdcreate_vnode(sc, mdio, td);
 1137                         break;
 1138                 case MD_SWAP:
 1139                         sc->start = mdstart_swap;
 1140                         error = mdcreate_swap(sc, mdio, td);
 1141                         break;
 1142                 }
 1143                 if (error != 0) {
 1144                         mddestroy(sc, td);
 1145                         return (error);
 1146                 }
 1147 
 1148                 /* Prune off any residual fractional sector */
 1149                 i = sc->mediasize % sc->sectorsize;
 1150                 sc->mediasize -= i;
 1151 
 1152                 mdinit(sc);
 1153                 return (0);
 1154         case MDIOCDETACH:
 1155                 if (mdio->md_mediasize != 0 ||
 1156                     (mdio->md_options & ~MD_FORCE) != 0)
 1157                         return (EINVAL);
 1158 
 1159                 sc = mdfind(mdio->md_unit);
 1160                 if (sc == NULL)
 1161                         return (ENOENT);
 1162                 if (sc->opencount != 0 && !(sc->flags & MD_FORCE) &&
 1163                     !(mdio->md_options & MD_FORCE))
 1164                         return (EBUSY);
 1165                 return (mddestroy(sc, td));
 1166         case MDIOCQUERY:
 1167                 sc = mdfind(mdio->md_unit);
 1168                 if (sc == NULL)
 1169                         return (ENOENT);
 1170                 mdio->md_type = sc->type;
 1171                 mdio->md_options = sc->flags;
 1172                 mdio->md_mediasize = sc->mediasize;
 1173                 mdio->md_sectorsize = sc->sectorsize;
 1174                 if (sc->type == MD_VNODE)
 1175                         error = copyout(sc->file, mdio->md_file,
 1176                             strlen(sc->file) + 1);
 1177                 return (error);
 1178         case MDIOCLIST:
 1179                 i = 1;
 1180                 LIST_FOREACH(sc, &md_softc_list, list) {
 1181                         if (i == MDNPAD - 1)
 1182                                 mdio->md_pad[i] = -1;
 1183                         else
 1184                                 mdio->md_pad[i++] = sc->unit;
 1185                 }
 1186                 mdio->md_pad[0] = i - 1;
 1187                 return (0);
 1188         default:
 1189                 return (ENOIOCTL);
 1190         };
 1191 }
 1192 
 1193 static int
 1194 mdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
 1195 {
 1196         int error;
 1197 
 1198         sx_xlock(&md_sx);
 1199         error = xmdctlioctl(dev, cmd, addr, flags, td);
 1200         sx_xunlock(&md_sx);
 1201         return (error);
 1202 }
 1203 
 1204 static void
 1205 md_preloaded(u_char *image, size_t length)
 1206 {
 1207         struct md_s *sc;
 1208         int error;
 1209 
 1210         sc = mdnew(-1, &error, MD_PRELOAD);
 1211         if (sc == NULL)
 1212                 return;
 1213         sc->mediasize = length;
 1214         sc->sectorsize = DEV_BSIZE;
 1215         sc->pl_ptr = image;
 1216         sc->pl_len = length;
 1217         sc->start = mdstart_preload;
 1218 #ifdef MD_ROOT
 1219         if (sc->unit == 0)
 1220                 rootdevnames[0] = "ufs:/dev/md0";
 1221 #endif
 1222         mdinit(sc);
 1223 }
 1224 
 1225 static void
 1226 g_md_init(struct g_class *mp __unused)
 1227 {
 1228         caddr_t mod;
 1229         caddr_t c;
 1230         u_char *ptr, *name, *type;
 1231         unsigned len;
 1232         int i;
 1233 
 1234         /* figure out log2(NINDIR) */
 1235         for (i = NINDIR, nshift = -1; i; nshift++)
 1236                 i >>= 1;
 1237 
 1238         mod = NULL;
 1239         sx_init(&md_sx, "MD config lock");
 1240         g_topology_unlock();
 1241         md_uh = new_unrhdr(0, INT_MAX, NULL);
 1242 #ifdef MD_ROOT_SIZE
 1243         sx_xlock(&md_sx);
 1244         md_preloaded(mfs_root.start, sizeof(mfs_root.start));
 1245         sx_xunlock(&md_sx);
 1246 #endif
 1247         /* XXX: are preload_* static or do they need Giant ? */
 1248         while ((mod = preload_search_next_name(mod)) != NULL) {
 1249                 name = (char *)preload_search_info(mod, MODINFO_NAME);
 1250                 if (name == NULL)
 1251                         continue;
 1252                 type = (char *)preload_search_info(mod, MODINFO_TYPE);
 1253                 if (type == NULL)
 1254                         continue;
 1255                 if (strcmp(type, "md_image") && strcmp(type, "mfs_root"))
 1256                         continue;
 1257                 c = preload_search_info(mod, MODINFO_ADDR);
 1258                 ptr = *(u_char **)c;
 1259                 c = preload_search_info(mod, MODINFO_SIZE);
 1260                 len = *(size_t *)c;
 1261                 printf("%s%d: Preloaded image <%s> %d bytes at %p\n",
 1262                     MD_NAME, mdunits, name, len, ptr);
 1263                 sx_xlock(&md_sx);
 1264                 md_preloaded(ptr, len);
 1265                 sx_xunlock(&md_sx);
 1266         }
 1267         status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
 1268             0600, MDCTL_NAME);
 1269         g_topology_lock();
 1270 }
 1271 
 1272 static void
 1273 g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 1274     struct g_consumer *cp __unused, struct g_provider *pp)
 1275 {
 1276         struct md_s *mp;
 1277         char *type;
 1278 
 1279         mp = gp->softc;
 1280         if (mp == NULL)
 1281                 return;
 1282 
 1283         switch (mp->type) {
 1284         case MD_MALLOC:
 1285                 type = "malloc";
 1286                 break;
 1287         case MD_PRELOAD:
 1288                 type = "preload";
 1289                 break;
 1290         case MD_VNODE:
 1291                 type = "vnode";
 1292                 break;
 1293         case MD_SWAP:
 1294                 type = "swap";
 1295                 break;
 1296         default:
 1297                 type = "unknown";
 1298                 break;
 1299         }
 1300 
 1301         if (pp != NULL) {
 1302                 if (indent == NULL) {
 1303                         sbuf_printf(sb, " u %d", mp->unit);
 1304                         sbuf_printf(sb, " s %ju", (uintmax_t) mp->sectorsize);
 1305                         sbuf_printf(sb, " f %ju", (uintmax_t) mp->fwheads);
 1306                         sbuf_printf(sb, " fs %ju", (uintmax_t) mp->fwsectors);
 1307                         sbuf_printf(sb, " l %ju", (uintmax_t) mp->mediasize);
 1308                         sbuf_printf(sb, " t %s", type);
 1309                         if (mp->type == MD_VNODE && mp->vnode != NULL)
 1310                                 sbuf_printf(sb, " file %s", mp->file);
 1311                 } else {
 1312                         sbuf_printf(sb, "%s<unit>%d</unit>\n", indent,
 1313                             mp->unit);
 1314                         sbuf_printf(sb, "%s<sectorsize>%ju</sectorsize>\n",
 1315                             indent, (uintmax_t) mp->sectorsize);
 1316                         sbuf_printf(sb, "%s<fwheads>%ju</fwheads>\n",
 1317                             indent, (uintmax_t) mp->fwheads);
 1318                         sbuf_printf(sb, "%s<fwsectors>%ju</fwsectors>\n",
 1319                             indent, (uintmax_t) mp->fwsectors);
 1320                         sbuf_printf(sb, "%s<length>%ju</length>\n",
 1321                             indent, (uintmax_t) mp->mediasize);
 1322                         sbuf_printf(sb, "%s<type>%s</type>\n", indent,
 1323                             type);
 1324                         if (mp->type == MD_VNODE && mp->vnode != NULL)
 1325                                 sbuf_printf(sb, "%s<file>%s</file>\n",
 1326                                     indent, mp->file);
 1327                 }
 1328         }
 1329 }
 1330 
 1331 static void
 1332 g_md_fini(struct g_class *mp __unused)
 1333 {
 1334 
 1335         sx_destroy(&md_sx);
 1336         if (status_dev != NULL)
 1337                 destroy_dev(status_dev);
 1338         delete_unrhdr(md_uh);
 1339 }

Cache object: 3b5baa7d9779feb2bf12dcc12eaa5fab


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.