The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/md/md.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * ----------------------------------------------------------------------------
    3  * "THE BEER-WARE LICENSE" (Revision 42):
    4  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
    5  * can do whatever you want with this stuff. If we meet some day, and you think
    6  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
    7  * ----------------------------------------------------------------------------
    8  *
    9  * $FreeBSD: releng/9.0/sys/dev/md/md.c 223920 2011-07-11 05:19:28Z ae $
   10  *
   11  */
   12 
   13 /*-
   14  * The following functions are based in the vn(4) driver: mdstart_swap(),
   15  * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(),
   16  * and as such under the following copyright:
   17  *
   18  * Copyright (c) 1988 University of Utah.
   19  * Copyright (c) 1990, 1993
   20  *      The Regents of the University of California.  All rights reserved.
   21  *
   22  * This code is derived from software contributed to Berkeley by
   23  * the Systems Programming Group of the University of Utah Computer
   24  * Science Department.
   25  *
   26  * Redistribution and use in source and binary forms, with or without
   27  * modification, are permitted provided that the following conditions
   28  * are met:
   29  * 1. Redistributions of source code must retain the above copyright
   30  *    notice, this list of conditions and the following disclaimer.
   31  * 2. Redistributions in binary form must reproduce the above copyright
   32  *    notice, this list of conditions and the following disclaimer in the
   33  *    documentation and/or other materials provided with the distribution.
   34  * 4. Neither the name of the University nor the names of its contributors
   35  *    may be used to endorse or promote products derived from this software
   36  *    without specific prior written permission.
   37  *
   38  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   39  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   40  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   41  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   42  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   43  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   44  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   45  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   46  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   47  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   48  * SUCH DAMAGE.
   49  *
   50  * from: Utah Hdr: vn.c 1.13 94/04/02
   51  *
   52  *      from: @(#)vn.c  8.6 (Berkeley) 4/1/94
   53  * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03
   54  */
   55 
   56 #include "opt_geom.h"
   57 #include "opt_md.h"
   58 
   59 #include <sys/param.h>
   60 #include <sys/systm.h>
   61 #include <sys/bio.h>
   62 #include <sys/conf.h>
   63 #include <sys/devicestat.h>
   64 #include <sys/fcntl.h>
   65 #include <sys/kernel.h>
   66 #include <sys/kthread.h>
   67 #include <sys/limits.h>
   68 #include <sys/linker.h>
   69 #include <sys/lock.h>
   70 #include <sys/malloc.h>
   71 #include <sys/mdioctl.h>
   72 #include <sys/mount.h>
   73 #include <sys/mutex.h>
   74 #include <sys/sx.h>
   75 #include <sys/namei.h>
   76 #include <sys/proc.h>
   77 #include <sys/queue.h>
   78 #include <sys/sbuf.h>
   79 #include <sys/sched.h>
   80 #include <sys/sf_buf.h>
   81 #include <sys/sysctl.h>
   82 #include <sys/vnode.h>
   83 
   84 #include <geom/geom.h>
   85 
   86 #include <vm/vm.h>
   87 #include <vm/vm_object.h>
   88 #include <vm/vm_page.h>
   89 #include <vm/vm_pager.h>
   90 #include <vm/swap_pager.h>
   91 #include <vm/uma.h>
   92 
   93 #include <machine/vmparam.h>
   94 
   95 #define MD_MODVER 1
   96 
   97 #define MD_SHUTDOWN     0x10000         /* Tell worker thread to terminate. */
   98 #define MD_EXITING      0x20000         /* Worker thread is exiting. */
   99 
  100 #ifndef MD_NSECT
  101 #define MD_NSECT (10000 * 2)
  102 #endif
  103 
  104 static MALLOC_DEFINE(M_MD, "md_disk", "Memory Disk");
  105 static MALLOC_DEFINE(M_MDSECT, "md_sectors", "Memory Disk Sectors");
  106 
  107 static int md_debug;
  108 SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, "");
  109 static int md_malloc_wait;
  110 SYSCTL_INT(_vm, OID_AUTO, md_malloc_wait, CTLFLAG_RW, &md_malloc_wait, 0, "");
  111 
  112 #if defined(MD_ROOT) && defined(MD_ROOT_SIZE)
  113 /*
  114  * Preloaded image gets put here.
  115  * Applications that patch the object with the image can determine
  116  * the size looking at the start and end markers (strings),
  117  * so we want them contiguous.
  118  */
  119 static struct {
  120         u_char start[MD_ROOT_SIZE*1024];
  121         u_char end[128];
  122 } mfs_root = {
  123         .start = "MFS Filesystem goes here",
  124         .end = "MFS Filesystem had better STOP here",
  125 };
  126 #endif
  127 
  128 static g_init_t g_md_init;
  129 static g_fini_t g_md_fini;
  130 static g_start_t g_md_start;
  131 static g_access_t g_md_access;
  132 static void g_md_dumpconf(struct sbuf *sb, const char *indent,
  133     struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp);
  134 
  135 static int mdunits;
  136 static struct cdev *status_dev = 0;
  137 static struct sx md_sx;
  138 static struct unrhdr *md_uh;
  139 
  140 static d_ioctl_t mdctlioctl;
  141 
  142 static struct cdevsw mdctl_cdevsw = {
  143         .d_version =    D_VERSION,
  144         .d_ioctl =      mdctlioctl,
  145         .d_name =       MD_NAME,
  146 };
  147 
  148 struct g_class g_md_class = {
  149         .name = "MD",
  150         .version = G_VERSION,
  151         .init = g_md_init,
  152         .fini = g_md_fini,
  153         .start = g_md_start,
  154         .access = g_md_access,
  155         .dumpconf = g_md_dumpconf,
  156 };
  157 
  158 DECLARE_GEOM_CLASS(g_md_class, g_md);
  159 
  160 
  161 static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(md_softc_list);
  162 
  163 #define NINDIR  (PAGE_SIZE / sizeof(uintptr_t))
  164 #define NMASK   (NINDIR-1)
  165 static int nshift;
  166 
  167 struct indir {
  168         uintptr_t       *array;
  169         u_int           total;
  170         u_int           used;
  171         u_int           shift;
  172 };
  173 
  174 struct md_s {
  175         int unit;
  176         LIST_ENTRY(md_s) list;
  177         struct bio_queue_head bio_queue;
  178         struct mtx queue_mtx;
  179         struct cdev *dev;
  180         enum md_types type;
  181         off_t mediasize;
  182         unsigned sectorsize;
  183         unsigned opencount;
  184         unsigned fwheads;
  185         unsigned fwsectors;
  186         unsigned flags;
  187         char name[20];
  188         struct proc *procp;
  189         struct g_geom *gp;
  190         struct g_provider *pp;
  191         int (*start)(struct md_s *sc, struct bio *bp);
  192         struct devstat *devstat;
  193 
  194         /* MD_MALLOC related fields */
  195         struct indir *indir;
  196         uma_zone_t uma;
  197 
  198         /* MD_PRELOAD related fields */
  199         u_char *pl_ptr;
  200         size_t pl_len;
  201 
  202         /* MD_VNODE related fields */
  203         struct vnode *vnode;
  204         char file[PATH_MAX];
  205         struct ucred *cred;
  206 
  207         /* MD_SWAP related fields */
  208         vm_object_t object;
  209 };
  210 
  211 static struct indir *
  212 new_indir(u_int shift)
  213 {
  214         struct indir *ip;
  215 
  216         ip = malloc(sizeof *ip, M_MD, (md_malloc_wait ? M_WAITOK : M_NOWAIT)
  217             | M_ZERO);
  218         if (ip == NULL)
  219                 return (NULL);
  220         ip->array = malloc(sizeof(uintptr_t) * NINDIR,
  221             M_MDSECT, (md_malloc_wait ? M_WAITOK : M_NOWAIT) | M_ZERO);
  222         if (ip->array == NULL) {
  223                 free(ip, M_MD);
  224                 return (NULL);
  225         }
  226         ip->total = NINDIR;
  227         ip->shift = shift;
  228         return (ip);
  229 }
  230 
  231 static void
  232 del_indir(struct indir *ip)
  233 {
  234 
  235         free(ip->array, M_MDSECT);
  236         free(ip, M_MD);
  237 }
  238 
  239 static void
  240 destroy_indir(struct md_s *sc, struct indir *ip)
  241 {
  242         int i;
  243 
  244         for (i = 0; i < NINDIR; i++) {
  245                 if (!ip->array[i])
  246                         continue;
  247                 if (ip->shift)
  248                         destroy_indir(sc, (struct indir*)(ip->array[i]));
  249                 else if (ip->array[i] > 255)
  250                         uma_zfree(sc->uma, (void *)(ip->array[i]));
  251         }
  252         del_indir(ip);
  253 }
  254 
  255 /*
  256  * This function does the math and allocates the top level "indir" structure
  257  * for a device of "size" sectors.
  258  */
  259 
  260 static struct indir *
  261 dimension(off_t size)
  262 {
  263         off_t rcnt;
  264         struct indir *ip;
  265         int layer;
  266 
  267         rcnt = size;
  268         layer = 0;
  269         while (rcnt > NINDIR) {
  270                 rcnt /= NINDIR;
  271                 layer++;
  272         }
  273 
  274         /*
  275          * XXX: the top layer is probably not fully populated, so we allocate
  276          * too much space for ip->array in here.
  277          */
  278         ip = malloc(sizeof *ip, M_MD, M_WAITOK | M_ZERO);
  279         ip->array = malloc(sizeof(uintptr_t) * NINDIR,
  280             M_MDSECT, M_WAITOK | M_ZERO);
  281         ip->total = NINDIR;
  282         ip->shift = layer * nshift;
  283         return (ip);
  284 }
  285 
  286 /*
  287  * Read a given sector
  288  */
  289 
  290 static uintptr_t
  291 s_read(struct indir *ip, off_t offset)
  292 {
  293         struct indir *cip;
  294         int idx;
  295         uintptr_t up;
  296 
  297         if (md_debug > 1)
  298                 printf("s_read(%jd)\n", (intmax_t)offset);
  299         up = 0;
  300         for (cip = ip; cip != NULL;) {
  301                 if (cip->shift) {
  302                         idx = (offset >> cip->shift) & NMASK;
  303                         up = cip->array[idx];
  304                         cip = (struct indir *)up;
  305                         continue;
  306                 }
  307                 idx = offset & NMASK;
  308                 return (cip->array[idx]);
  309         }
  310         return (0);
  311 }
  312 
  313 /*
  314  * Write a given sector, prune the tree if the value is 0
  315  */
  316 
  317 static int
  318 s_write(struct indir *ip, off_t offset, uintptr_t ptr)
  319 {
  320         struct indir *cip, *lip[10];
  321         int idx, li;
  322         uintptr_t up;
  323 
  324         if (md_debug > 1)
  325                 printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr);
  326         up = 0;
  327         li = 0;
  328         cip = ip;
  329         for (;;) {
  330                 lip[li++] = cip;
  331                 if (cip->shift) {
  332                         idx = (offset >> cip->shift) & NMASK;
  333                         up = cip->array[idx];
  334                         if (up != 0) {
  335                                 cip = (struct indir *)up;
  336                                 continue;
  337                         }
  338                         /* Allocate branch */
  339                         cip->array[idx] =
  340                             (uintptr_t)new_indir(cip->shift - nshift);
  341                         if (cip->array[idx] == 0)
  342                                 return (ENOSPC);
  343                         cip->used++;
  344                         up = cip->array[idx];
  345                         cip = (struct indir *)up;
  346                         continue;
  347                 }
  348                 /* leafnode */
  349                 idx = offset & NMASK;
  350                 up = cip->array[idx];
  351                 if (up != 0)
  352                         cip->used--;
  353                 cip->array[idx] = ptr;
  354                 if (ptr != 0)
  355                         cip->used++;
  356                 break;
  357         }
  358         if (cip->used != 0 || li == 1)
  359                 return (0);
  360         li--;
  361         while (cip->used == 0 && cip != ip) {
  362                 li--;
  363                 idx = (offset >> lip[li]->shift) & NMASK;
  364                 up = lip[li]->array[idx];
  365                 KASSERT(up == (uintptr_t)cip, ("md screwed up"));
  366                 del_indir(cip);
  367                 lip[li]->array[idx] = 0;
  368                 lip[li]->used--;
  369                 cip = lip[li];
  370         }
  371         return (0);
  372 }
  373 
  374 
  375 static int
  376 g_md_access(struct g_provider *pp, int r, int w, int e)
  377 {
  378         struct md_s *sc;
  379 
  380         sc = pp->geom->softc;
  381         if (sc == NULL) {
  382                 if (r <= 0 && w <= 0 && e <= 0)
  383                         return (0);
  384                 return (ENXIO);
  385         }
  386         r += pp->acr;
  387         w += pp->acw;
  388         e += pp->ace;
  389         if ((sc->flags & MD_READONLY) != 0 && w > 0)
  390                 return (EROFS);
  391         if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
  392                 sc->opencount = 1;
  393         } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
  394                 sc->opencount = 0;
  395         }
  396         return (0);
  397 }
  398 
  399 static void
  400 g_md_start(struct bio *bp)
  401 {
  402         struct md_s *sc;
  403 
  404         sc = bp->bio_to->geom->softc;
  405         if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE))
  406                 devstat_start_transaction_bio(sc->devstat, bp);
  407         mtx_lock(&sc->queue_mtx);
  408         bioq_disksort(&sc->bio_queue, bp);
  409         mtx_unlock(&sc->queue_mtx);
  410         wakeup(sc);
  411 }
  412 
  413 static int
  414 mdstart_malloc(struct md_s *sc, struct bio *bp)
  415 {
  416         int i, error;
  417         u_char *dst;
  418         off_t secno, nsec, uc;
  419         uintptr_t sp, osp;
  420 
  421         switch (bp->bio_cmd) {
  422         case BIO_READ:
  423         case BIO_WRITE:
  424         case BIO_DELETE:
  425                 break;
  426         default:
  427                 return (EOPNOTSUPP);
  428         }
  429 
  430         nsec = bp->bio_length / sc->sectorsize;
  431         secno = bp->bio_offset / sc->sectorsize;
  432         dst = bp->bio_data;
  433         error = 0;
  434         while (nsec--) {
  435                 osp = s_read(sc->indir, secno);
  436                 if (bp->bio_cmd == BIO_DELETE) {
  437                         if (osp != 0)
  438                                 error = s_write(sc->indir, secno, 0);
  439                 } else if (bp->bio_cmd == BIO_READ) {
  440                         if (osp == 0)
  441                                 bzero(dst, sc->sectorsize);
  442                         else if (osp <= 255)
  443                                 memset(dst, osp, sc->sectorsize);
  444                         else {
  445                                 bcopy((void *)osp, dst, sc->sectorsize);
  446                                 cpu_flush_dcache(dst, sc->sectorsize);
  447                         }
  448                         osp = 0;
  449                 } else if (bp->bio_cmd == BIO_WRITE) {
  450                         if (sc->flags & MD_COMPRESS) {
  451                                 uc = dst[0];
  452                                 for (i = 1; i < sc->sectorsize; i++)
  453                                         if (dst[i] != uc)
  454                                                 break;
  455                         } else {
  456                                 i = 0;
  457                                 uc = 0;
  458                         }
  459                         if (i == sc->sectorsize) {
  460                                 if (osp != uc)
  461                                         error = s_write(sc->indir, secno, uc);
  462                         } else {
  463                                 if (osp <= 255) {
  464                                         sp = (uintptr_t)uma_zalloc(sc->uma,
  465                                             md_malloc_wait ? M_WAITOK :
  466                                             M_NOWAIT);
  467                                         if (sp == 0) {
  468                                                 error = ENOSPC;
  469                                                 break;
  470                                         }
  471                                         bcopy(dst, (void *)sp, sc->sectorsize);
  472                                         error = s_write(sc->indir, secno, sp);
  473                                 } else {
  474                                         bcopy(dst, (void *)osp, sc->sectorsize);
  475                                         osp = 0;
  476                                 }
  477                         }
  478                 } else {
  479                         error = EOPNOTSUPP;
  480                 }
  481                 if (osp > 255)
  482                         uma_zfree(sc->uma, (void*)osp);
  483                 if (error != 0)
  484                         break;
  485                 secno++;
  486                 dst += sc->sectorsize;
  487         }
  488         bp->bio_resid = 0;
  489         return (error);
  490 }
  491 
  492 static int
  493 mdstart_preload(struct md_s *sc, struct bio *bp)
  494 {
  495 
  496         switch (bp->bio_cmd) {
  497         case BIO_READ:
  498                 bcopy(sc->pl_ptr + bp->bio_offset, bp->bio_data,
  499                     bp->bio_length);
  500                 cpu_flush_dcache(bp->bio_data, bp->bio_length);
  501                 break;
  502         case BIO_WRITE:
  503                 bcopy(bp->bio_data, sc->pl_ptr + bp->bio_offset,
  504                     bp->bio_length);
  505                 break;
  506         }
  507         bp->bio_resid = 0;
  508         return (0);
  509 }
  510 
  511 static int
  512 mdstart_vnode(struct md_s *sc, struct bio *bp)
  513 {
  514         int error, vfslocked;
  515         struct uio auio;
  516         struct iovec aiov;
  517         struct mount *mp;
  518         struct vnode *vp;
  519         struct thread *td;
  520         off_t end, zerosize;
  521 
  522         switch (bp->bio_cmd) {
  523         case BIO_READ:
  524         case BIO_WRITE:
  525         case BIO_DELETE:
  526         case BIO_FLUSH:
  527                 break;
  528         default:
  529                 return (EOPNOTSUPP);
  530         }
  531 
  532         td = curthread;
  533         vp = sc->vnode;
  534 
  535         /*
  536          * VNODE I/O
  537          *
  538          * If an error occurs, we set BIO_ERROR but we do not set
  539          * B_INVAL because (for a write anyway), the buffer is
  540          * still valid.
  541          */
  542 
  543         if (bp->bio_cmd == BIO_FLUSH) {
  544                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  545                 (void) vn_start_write(vp, &mp, V_WAIT);
  546                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  547                 error = VOP_FSYNC(vp, MNT_WAIT, td);
  548                 VOP_UNLOCK(vp, 0);
  549                 vn_finished_write(mp);
  550                 VFS_UNLOCK_GIANT(vfslocked);
  551                 return (error);
  552         }
  553 
  554         bzero(&auio, sizeof(auio));
  555 
  556         /*
  557          * Special case for BIO_DELETE.  On the surface, this is very
  558          * similar to BIO_WRITE, except that we write from our own
  559          * fixed-length buffer, so we have to loop.  The net result is
  560          * that the two cases end up having very little in common.
  561          */
  562         if (bp->bio_cmd == BIO_DELETE) {
  563                 zerosize = ZERO_REGION_SIZE -
  564                     (ZERO_REGION_SIZE % sc->sectorsize);
  565                 auio.uio_iov = &aiov;
  566                 auio.uio_iovcnt = 1;
  567                 auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
  568                 auio.uio_segflg = UIO_SYSSPACE;
  569                 auio.uio_rw = UIO_WRITE;
  570                 auio.uio_td = td;
  571                 end = bp->bio_offset + bp->bio_length;
  572                 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  573                 (void) vn_start_write(vp, &mp, V_WAIT);
  574                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  575                 error = 0;
  576                 while (auio.uio_offset < end) {
  577                         aiov.iov_base = __DECONST(void *, zero_region);
  578                         aiov.iov_len = end - auio.uio_offset;
  579                         if (aiov.iov_len > zerosize)
  580                                 aiov.iov_len = zerosize;
  581                         auio.uio_resid = aiov.iov_len;
  582                         error = VOP_WRITE(vp, &auio,
  583                             sc->flags & MD_ASYNC ? 0 : IO_SYNC, sc->cred);
  584                         if (error != 0)
  585                                 break;
  586                 }
  587                 VOP_UNLOCK(vp, 0);
  588                 vn_finished_write(mp);
  589                 bp->bio_resid = end - auio.uio_offset;
  590                 VFS_UNLOCK_GIANT(vfslocked);
  591                 return (error);
  592         }
  593 
  594         aiov.iov_base = bp->bio_data;
  595         aiov.iov_len = bp->bio_length;
  596         auio.uio_iov = &aiov;
  597         auio.uio_iovcnt = 1;
  598         auio.uio_offset = (vm_ooffset_t)bp->bio_offset;
  599         auio.uio_segflg = UIO_SYSSPACE;
  600         if (bp->bio_cmd == BIO_READ)
  601                 auio.uio_rw = UIO_READ;
  602         else if (bp->bio_cmd == BIO_WRITE)
  603                 auio.uio_rw = UIO_WRITE;
  604         else
  605                 panic("wrong BIO_OP in mdstart_vnode");
  606         auio.uio_resid = bp->bio_length;
  607         auio.uio_td = td;
  608         /*
  609          * When reading set IO_DIRECT to try to avoid double-caching
  610          * the data.  When writing IO_DIRECT is not optimal.
  611          */
  612         vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  613         if (bp->bio_cmd == BIO_READ) {
  614                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  615                 error = VOP_READ(vp, &auio, IO_DIRECT, sc->cred);
  616                 VOP_UNLOCK(vp, 0);
  617         } else {
  618                 (void) vn_start_write(vp, &mp, V_WAIT);
  619                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
  620                 error = VOP_WRITE(vp, &auio, sc->flags & MD_ASYNC ? 0 : IO_SYNC,
  621                     sc->cred);
  622                 VOP_UNLOCK(vp, 0);
  623                 vn_finished_write(mp);
  624         }
  625         VFS_UNLOCK_GIANT(vfslocked);
  626         bp->bio_resid = auio.uio_resid;
  627         return (error);
  628 }
  629 
  630 static int
  631 mdstart_swap(struct md_s *sc, struct bio *bp)
  632 {
  633         struct sf_buf *sf;
  634         int rv, offs, len, lastend;
  635         vm_pindex_t i, lastp;
  636         vm_page_t m;
  637         u_char *p;
  638 
  639         switch (bp->bio_cmd) {
  640         case BIO_READ:
  641         case BIO_WRITE:
  642         case BIO_DELETE:
  643                 break;
  644         default:
  645                 return (EOPNOTSUPP);
  646         }
  647 
  648         p = bp->bio_data;
  649 
  650         /*
  651          * offs is the offset at which to start operating on the
  652          * next (ie, first) page.  lastp is the last page on
  653          * which we're going to operate.  lastend is the ending
  654          * position within that last page (ie, PAGE_SIZE if
  655          * we're operating on complete aligned pages).
  656          */
  657         offs = bp->bio_offset % PAGE_SIZE;
  658         lastp = (bp->bio_offset + bp->bio_length - 1) / PAGE_SIZE;
  659         lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1;
  660 
  661         rv = VM_PAGER_OK;
  662         VM_OBJECT_LOCK(sc->object);
  663         vm_object_pip_add(sc->object, 1);
  664         for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) {
  665                 len = ((i == lastp) ? lastend : PAGE_SIZE) - offs;
  666 
  667                 m = vm_page_grab(sc->object, i,
  668                     VM_ALLOC_NORMAL|VM_ALLOC_RETRY);
  669                 VM_OBJECT_UNLOCK(sc->object);
  670                 sched_pin();
  671                 sf = sf_buf_alloc(m, SFB_CPUPRIVATE);
  672                 VM_OBJECT_LOCK(sc->object);
  673                 if (bp->bio_cmd == BIO_READ) {
  674                         if (m->valid != VM_PAGE_BITS_ALL)
  675                                 rv = vm_pager_get_pages(sc->object, &m, 1, 0);
  676                         if (rv == VM_PAGER_ERROR) {
  677                                 sf_buf_free(sf);
  678                                 sched_unpin();
  679                                 vm_page_wakeup(m);
  680                                 break;
  681                         }
  682                         bcopy((void *)(sf_buf_kva(sf) + offs), p, len);
  683                         cpu_flush_dcache(p, len);
  684                 } else if (bp->bio_cmd == BIO_WRITE) {
  685                         if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL)
  686                                 rv = vm_pager_get_pages(sc->object, &m, 1, 0);
  687                         if (rv == VM_PAGER_ERROR) {
  688                                 sf_buf_free(sf);
  689                                 sched_unpin();
  690                                 vm_page_wakeup(m);
  691                                 break;
  692                         }
  693                         bcopy(p, (void *)(sf_buf_kva(sf) + offs), len);
  694                         m->valid = VM_PAGE_BITS_ALL;
  695                 } else if (bp->bio_cmd == BIO_DELETE) {
  696                         if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL)
  697                                 rv = vm_pager_get_pages(sc->object, &m, 1, 0);
  698                         if (rv == VM_PAGER_ERROR) {
  699                                 sf_buf_free(sf);
  700                                 sched_unpin();
  701                                 vm_page_wakeup(m);
  702                                 break;
  703                         }
  704                         if (len != PAGE_SIZE) {
  705                                 bzero((void *)(sf_buf_kva(sf) + offs), len);
  706                                 vm_page_clear_dirty(m, offs, len);
  707                                 m->valid = VM_PAGE_BITS_ALL;
  708                         } else
  709                                 vm_pager_page_unswapped(m);
  710                 }
  711                 sf_buf_free(sf);
  712                 sched_unpin();
  713                 vm_page_wakeup(m);
  714                 vm_page_lock(m);
  715                 if (bp->bio_cmd == BIO_DELETE && len == PAGE_SIZE)
  716                         vm_page_free(m);
  717                 else
  718                         vm_page_activate(m);
  719                 vm_page_unlock(m);
  720                 if (bp->bio_cmd == BIO_WRITE)
  721                         vm_page_dirty(m);
  722 
  723                 /* Actions on further pages start at offset 0 */
  724                 p += PAGE_SIZE - offs;
  725                 offs = 0;
  726 #if 0
  727 if (bootverbose || bp->bio_offset / PAGE_SIZE < 17)
  728 printf("wire_count %d busy %d flags %x hold_count %d act_count %d queue %d valid %d dirty %d @ %d\n",
  729     m->wire_count, m->busy,
  730     m->flags, m->hold_count, m->act_count, m->queue, m->valid, m->dirty, i);
  731 #endif
  732         }
  733         vm_object_pip_subtract(sc->object, 1);
  734         VM_OBJECT_UNLOCK(sc->object);
  735         return (rv != VM_PAGER_ERROR ? 0 : ENOSPC);
  736 }
  737 
  738 static void
  739 md_kthread(void *arg)
  740 {
  741         struct md_s *sc;
  742         struct bio *bp;
  743         int error;
  744 
  745         sc = arg;
  746         thread_lock(curthread);
  747         sched_prio(curthread, PRIBIO);
  748         thread_unlock(curthread);
  749         if (sc->type == MD_VNODE)
  750                 curthread->td_pflags |= TDP_NORUNNINGBUF;
  751 
  752         for (;;) {
  753                 mtx_lock(&sc->queue_mtx);
  754                 if (sc->flags & MD_SHUTDOWN) {
  755                         sc->flags |= MD_EXITING;
  756                         mtx_unlock(&sc->queue_mtx);
  757                         kproc_exit(0);
  758                 }
  759                 bp = bioq_takefirst(&sc->bio_queue);
  760                 if (!bp) {
  761                         msleep(sc, &sc->queue_mtx, PRIBIO | PDROP, "mdwait", 0);
  762                         continue;
  763                 }
  764                 mtx_unlock(&sc->queue_mtx);
  765                 if (bp->bio_cmd == BIO_GETATTR) {
  766                         if ((sc->fwsectors && sc->fwheads &&
  767                             (g_handleattr_int(bp, "GEOM::fwsectors",
  768                             sc->fwsectors) ||
  769                             g_handleattr_int(bp, "GEOM::fwheads",
  770                             sc->fwheads))) ||
  771                             g_handleattr_int(bp, "GEOM::candelete", 1))
  772                                 error = -1;
  773                         else
  774                                 error = EOPNOTSUPP;
  775                 } else {
  776                         error = sc->start(sc, bp);
  777                 }
  778 
  779                 if (error != -1) {
  780                         bp->bio_completed = bp->bio_length;
  781                         if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE))
  782                                 devstat_end_transaction_bio(sc->devstat, bp);
  783                         g_io_deliver(bp, error);
  784                 }
  785         }
  786 }
  787 
  788 static struct md_s *
  789 mdfind(int unit)
  790 {
  791         struct md_s *sc;
  792 
  793         LIST_FOREACH(sc, &md_softc_list, list) {
  794                 if (sc->unit == unit)
  795                         break;
  796         }
  797         return (sc);
  798 }
  799 
  800 static struct md_s *
  801 mdnew(int unit, int *errp, enum md_types type)
  802 {
  803         struct md_s *sc;
  804         int error;
  805 
  806         *errp = 0;
  807         if (unit == -1)
  808                 unit = alloc_unr(md_uh);
  809         else
  810                 unit = alloc_unr_specific(md_uh, unit);
  811 
  812         if (unit == -1) {
  813                 *errp = EBUSY;
  814                 return (NULL);
  815         }
  816 
  817         sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO);
  818         sc->type = type;
  819         bioq_init(&sc->bio_queue);
  820         mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF);
  821         sc->unit = unit;
  822         sprintf(sc->name, "md%d", unit);
  823         LIST_INSERT_HEAD(&md_softc_list, sc, list);
  824         error = kproc_create(md_kthread, sc, &sc->procp, 0, 0,"%s", sc->name);
  825         if (error == 0)
  826                 return (sc);
  827         LIST_REMOVE(sc, list);
  828         mtx_destroy(&sc->queue_mtx);
  829         free_unr(md_uh, sc->unit);
  830         free(sc, M_MD);
  831         *errp = error;
  832         return (NULL);
  833 }
  834 
  835 static void
  836 mdinit(struct md_s *sc)
  837 {
  838         struct g_geom *gp;
  839         struct g_provider *pp;
  840 
  841         g_topology_lock();
  842         gp = g_new_geomf(&g_md_class, "md%d", sc->unit);
  843         gp->softc = sc;
  844         pp = g_new_providerf(gp, "md%d", sc->unit);
  845         pp->mediasize = sc->mediasize;
  846         pp->sectorsize = sc->sectorsize;
  847         sc->gp = gp;
  848         sc->pp = pp;
  849         g_error_provider(pp, 0);
  850         g_topology_unlock();
  851         sc->devstat = devstat_new_entry("md", sc->unit, sc->sectorsize,
  852             DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
  853 }
  854 
  855 /*
  856  * XXX: we should check that the range they feed us is mapped.
  857  * XXX: we should implement read-only.
  858  */
  859 
  860 static int
  861 mdcreate_preload(struct md_s *sc, struct md_ioctl *mdio)
  862 {
  863 
  864         if (mdio->md_options & ~(MD_AUTOUNIT | MD_FORCE))
  865                 return (EINVAL);
  866         if (mdio->md_base == 0)
  867                 return (EINVAL);
  868         sc->flags = mdio->md_options & MD_FORCE;
  869         /* Cast to pointer size, then to pointer to avoid warning */
  870         sc->pl_ptr = (u_char *)(uintptr_t)mdio->md_base;
  871         sc->pl_len = (size_t)sc->mediasize;
  872         return (0);
  873 }
  874 
  875 
  876 static int
  877 mdcreate_malloc(struct md_s *sc, struct md_ioctl *mdio)
  878 {
  879         uintptr_t sp;
  880         int error;
  881         off_t u;
  882 
  883         error = 0;
  884         if (mdio->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE))
  885                 return (EINVAL);
  886         if (mdio->md_sectorsize != 0 && !powerof2(mdio->md_sectorsize))
  887                 return (EINVAL);
  888         /* Compression doesn't make sense if we have reserved space */
  889         if (mdio->md_options & MD_RESERVE)
  890                 mdio->md_options &= ~MD_COMPRESS;
  891         if (mdio->md_fwsectors != 0)
  892                 sc->fwsectors = mdio->md_fwsectors;
  893         if (mdio->md_fwheads != 0)
  894                 sc->fwheads = mdio->md_fwheads;
  895         sc->flags = mdio->md_options & (MD_COMPRESS | MD_FORCE);
  896         sc->indir = dimension(sc->mediasize / sc->sectorsize);
  897         sc->uma = uma_zcreate(sc->name, sc->sectorsize, NULL, NULL, NULL, NULL,
  898             0x1ff, 0);
  899         if (mdio->md_options & MD_RESERVE) {
  900                 off_t nsectors;
  901 
  902                 nsectors = sc->mediasize / sc->sectorsize;
  903                 for (u = 0; u < nsectors; u++) {
  904                         sp = (uintptr_t)uma_zalloc(sc->uma, (md_malloc_wait ?
  905                             M_WAITOK : M_NOWAIT) | M_ZERO);
  906                         if (sp != 0)
  907                                 error = s_write(sc->indir, u, sp);
  908                         else
  909                                 error = ENOMEM;
  910                         if (error != 0)
  911                                 break;
  912                 }
  913         }
  914         return (error);
  915 }
  916 
  917 
  918 static int
  919 mdsetcred(struct md_s *sc, struct ucred *cred)
  920 {
  921         char *tmpbuf;
  922         int error = 0;
  923 
  924         /*
  925          * Set credits in our softc
  926          */
  927 
  928         if (sc->cred)
  929                 crfree(sc->cred);
  930         sc->cred = crhold(cred);
  931 
  932         /*
  933          * Horrible kludge to establish credentials for NFS  XXX.
  934          */
  935 
  936         if (sc->vnode) {
  937                 struct uio auio;
  938                 struct iovec aiov;
  939 
  940                 tmpbuf = malloc(sc->sectorsize, M_TEMP, M_WAITOK);
  941                 bzero(&auio, sizeof(auio));
  942 
  943                 aiov.iov_base = tmpbuf;
  944                 aiov.iov_len = sc->sectorsize;
  945                 auio.uio_iov = &aiov;
  946                 auio.uio_iovcnt = 1;
  947                 auio.uio_offset = 0;
  948                 auio.uio_rw = UIO_READ;
  949                 auio.uio_segflg = UIO_SYSSPACE;
  950                 auio.uio_resid = aiov.iov_len;
  951                 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY);
  952                 error = VOP_READ(sc->vnode, &auio, 0, sc->cred);
  953                 VOP_UNLOCK(sc->vnode, 0);
  954                 free(tmpbuf, M_TEMP);
  955         }
  956         return (error);
  957 }
  958 
  959 static int
  960 mdcreate_vnode(struct md_s *sc, struct md_ioctl *mdio, struct thread *td)
  961 {
  962         struct vattr vattr;
  963         struct nameidata nd;
  964         char *fname;
  965         int error, flags, vfslocked;
  966 
  967         /*
  968          * Kernel-originated requests must have the filename appended
  969          * to the mdio structure to protect against malicious software.
  970          */
  971         fname = mdio->md_file;
  972         if ((void *)fname != (void *)(mdio + 1)) {
  973                 error = copyinstr(fname, sc->file, sizeof(sc->file), NULL);
  974                 if (error != 0)
  975                         return (error);
  976         } else
  977                 strlcpy(sc->file, fname, sizeof(sc->file));
  978 
  979         /*
  980          * If the user specified that this is a read only device, don't
  981          * set the FWRITE mask before trying to open the backing store.
  982          */
  983         flags = FREAD | ((mdio->md_options & MD_READONLY) ? 0 : FWRITE);
  984         NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, sc->file, td);
  985         error = vn_open(&nd, &flags, 0, NULL);
  986         if (error != 0)
  987                 return (error);
  988         vfslocked = NDHASGIANT(&nd);
  989         NDFREE(&nd, NDF_ONLY_PNBUF);
  990         if (nd.ni_vp->v_type != VREG) {
  991                 error = EINVAL;
  992                 goto bad;
  993         }
  994         error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred);
  995         if (error != 0)
  996                 goto bad;
  997         if (VOP_ISLOCKED(nd.ni_vp) != LK_EXCLUSIVE) {
  998                 vn_lock(nd.ni_vp, LK_UPGRADE | LK_RETRY);
  999                 if (nd.ni_vp->v_iflag & VI_DOOMED) {
 1000                         /* Forced unmount. */
 1001                         error = EBADF;
 1002                         goto bad;
 1003                 }
 1004         }
 1005         nd.ni_vp->v_vflag |= VV_MD;
 1006         VOP_UNLOCK(nd.ni_vp, 0);
 1007 
 1008         if (mdio->md_fwsectors != 0)
 1009                 sc->fwsectors = mdio->md_fwsectors;
 1010         if (mdio->md_fwheads != 0)
 1011                 sc->fwheads = mdio->md_fwheads;
 1012         sc->flags = mdio->md_options & (MD_FORCE | MD_ASYNC);
 1013         if (!(flags & FWRITE))
 1014                 sc->flags |= MD_READONLY;
 1015         sc->vnode = nd.ni_vp;
 1016 
 1017         error = mdsetcred(sc, td->td_ucred);
 1018         if (error != 0) {
 1019                 sc->vnode = NULL;
 1020                 vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY);
 1021                 nd.ni_vp->v_vflag &= ~VV_MD;
 1022                 goto bad;
 1023         }
 1024         VFS_UNLOCK_GIANT(vfslocked);
 1025         return (0);
 1026 bad:
 1027         VOP_UNLOCK(nd.ni_vp, 0);
 1028         (void)vn_close(nd.ni_vp, flags, td->td_ucred, td);
 1029         VFS_UNLOCK_GIANT(vfslocked);
 1030         return (error);
 1031 }
 1032 
 1033 static int
 1034 mddestroy(struct md_s *sc, struct thread *td)
 1035 {
 1036         int vfslocked;
 1037 
 1038         if (sc->gp) {
 1039                 sc->gp->softc = NULL;
 1040                 g_topology_lock();
 1041                 g_wither_geom(sc->gp, ENXIO);
 1042                 g_topology_unlock();
 1043                 sc->gp = NULL;
 1044                 sc->pp = NULL;
 1045         }
 1046         if (sc->devstat) {
 1047                 devstat_remove_entry(sc->devstat);
 1048                 sc->devstat = NULL;
 1049         }
 1050         mtx_lock(&sc->queue_mtx);
 1051         sc->flags |= MD_SHUTDOWN;
 1052         wakeup(sc);
 1053         while (!(sc->flags & MD_EXITING))
 1054                 msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10);
 1055         mtx_unlock(&sc->queue_mtx);
 1056         mtx_destroy(&sc->queue_mtx);
 1057         if (sc->vnode != NULL) {
 1058                 vfslocked = VFS_LOCK_GIANT(sc->vnode->v_mount);
 1059                 vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY);
 1060                 sc->vnode->v_vflag &= ~VV_MD;
 1061                 VOP_UNLOCK(sc->vnode, 0);
 1062                 (void)vn_close(sc->vnode, sc->flags & MD_READONLY ?
 1063                     FREAD : (FREAD|FWRITE), sc->cred, td);
 1064                 VFS_UNLOCK_GIANT(vfslocked);
 1065         }
 1066         if (sc->cred != NULL)
 1067                 crfree(sc->cred);
 1068         if (sc->object != NULL)
 1069                 vm_object_deallocate(sc->object);
 1070         if (sc->indir)
 1071                 destroy_indir(sc, sc->indir);
 1072         if (sc->uma)
 1073                 uma_zdestroy(sc->uma);
 1074 
 1075         LIST_REMOVE(sc, list);
 1076         free_unr(md_uh, sc->unit);
 1077         free(sc, M_MD);
 1078         return (0);
 1079 }
 1080 
 1081 static int
 1082 mdcreate_swap(struct md_s *sc, struct md_ioctl *mdio, struct thread *td)
 1083 {
 1084         vm_ooffset_t npage;
 1085         int error;
 1086 
 1087         /*
 1088          * Range check.  Disallow negative sizes or any size less then the
 1089          * size of a page.  Then round to a page.
 1090          */
 1091         if (sc->mediasize == 0 || (sc->mediasize % PAGE_SIZE) != 0)
 1092                 return (EDOM);
 1093 
 1094         /*
 1095          * Allocate an OBJT_SWAP object.
 1096          *
 1097          * Note the truncation.
 1098          */
 1099 
 1100         npage = mdio->md_mediasize / PAGE_SIZE;
 1101         if (mdio->md_fwsectors != 0)
 1102                 sc->fwsectors = mdio->md_fwsectors;
 1103         if (mdio->md_fwheads != 0)
 1104                 sc->fwheads = mdio->md_fwheads;
 1105         sc->object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * npage,
 1106             VM_PROT_DEFAULT, 0, td->td_ucred);
 1107         if (sc->object == NULL)
 1108                 return (ENOMEM);
 1109         sc->flags = mdio->md_options & MD_FORCE;
 1110         if (mdio->md_options & MD_RESERVE) {
 1111                 if (swap_pager_reserve(sc->object, 0, npage) < 0) {
 1112                         error = EDOM;
 1113                         goto finish;
 1114                 }
 1115         }
 1116         error = mdsetcred(sc, td->td_ucred);
 1117  finish:
 1118         if (error != 0) {
 1119                 vm_object_deallocate(sc->object);
 1120                 sc->object = NULL;
 1121         }
 1122         return (error);
 1123 }
 1124 
 1125 
 1126 static int
 1127 xmdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
 1128 {
 1129         struct md_ioctl *mdio;
 1130         struct md_s *sc;
 1131         int error, i;
 1132 
 1133         if (md_debug)
 1134                 printf("mdctlioctl(%s %lx %p %x %p)\n",
 1135                         devtoname(dev), cmd, addr, flags, td);
 1136 
 1137         mdio = (struct md_ioctl *)addr;
 1138         if (mdio->md_version != MDIOVERSION)
 1139                 return (EINVAL);
 1140 
 1141         /*
 1142          * We assert the version number in the individual ioctl
 1143          * handlers instead of out here because (a) it is possible we
 1144          * may add another ioctl in the future which doesn't read an
 1145          * mdio, and (b) the correct return value for an unknown ioctl
 1146          * is ENOIOCTL, not EINVAL.
 1147          */
 1148         error = 0;
 1149         switch (cmd) {
 1150         case MDIOCATTACH:
 1151                 switch (mdio->md_type) {
 1152                 case MD_MALLOC:
 1153                 case MD_PRELOAD:
 1154                 case MD_VNODE:
 1155                 case MD_SWAP:
 1156                         break;
 1157                 default:
 1158                         return (EINVAL);
 1159                 }
 1160                 if (mdio->md_options & MD_AUTOUNIT)
 1161                         sc = mdnew(-1, &error, mdio->md_type);
 1162                 else {
 1163                         if (mdio->md_unit > INT_MAX)
 1164                                 return (EINVAL);
 1165                         sc = mdnew(mdio->md_unit, &error, mdio->md_type);
 1166                 }
 1167                 if (sc == NULL)
 1168                         return (error);
 1169                 if (mdio->md_options & MD_AUTOUNIT)
 1170                         mdio->md_unit = sc->unit;
 1171                 sc->mediasize = mdio->md_mediasize;
 1172                 if (mdio->md_sectorsize == 0)
 1173                         sc->sectorsize = DEV_BSIZE;
 1174                 else
 1175                         sc->sectorsize = mdio->md_sectorsize;
 1176                 error = EDOOFUS;
 1177                 switch (sc->type) {
 1178                 case MD_MALLOC:
 1179                         sc->start = mdstart_malloc;
 1180                         error = mdcreate_malloc(sc, mdio);
 1181                         break;
 1182                 case MD_PRELOAD:
 1183                         sc->start = mdstart_preload;
 1184                         error = mdcreate_preload(sc, mdio);
 1185                         break;
 1186                 case MD_VNODE:
 1187                         sc->start = mdstart_vnode;
 1188                         error = mdcreate_vnode(sc, mdio, td);
 1189                         break;
 1190                 case MD_SWAP:
 1191                         sc->start = mdstart_swap;
 1192                         error = mdcreate_swap(sc, mdio, td);
 1193                         break;
 1194                 }
 1195                 if (error != 0) {
 1196                         mddestroy(sc, td);
 1197                         return (error);
 1198                 }
 1199 
 1200                 /* Prune off any residual fractional sector */
 1201                 i = sc->mediasize % sc->sectorsize;
 1202                 sc->mediasize -= i;
 1203 
 1204                 mdinit(sc);
 1205                 return (0);
 1206         case MDIOCDETACH:
 1207                 if (mdio->md_mediasize != 0 ||
 1208                     (mdio->md_options & ~MD_FORCE) != 0)
 1209                         return (EINVAL);
 1210 
 1211                 sc = mdfind(mdio->md_unit);
 1212                 if (sc == NULL)
 1213                         return (ENOENT);
 1214                 if (sc->opencount != 0 && !(sc->flags & MD_FORCE) &&
 1215                     !(mdio->md_options & MD_FORCE))
 1216                         return (EBUSY);
 1217                 return (mddestroy(sc, td));
 1218         case MDIOCQUERY:
 1219                 sc = mdfind(mdio->md_unit);
 1220                 if (sc == NULL)
 1221                         return (ENOENT);
 1222                 mdio->md_type = sc->type;
 1223                 mdio->md_options = sc->flags;
 1224                 mdio->md_mediasize = sc->mediasize;
 1225                 mdio->md_sectorsize = sc->sectorsize;
 1226                 if (sc->type == MD_VNODE)
 1227                         error = copyout(sc->file, mdio->md_file,
 1228                             strlen(sc->file) + 1);
 1229                 return (error);
 1230         case MDIOCLIST:
 1231                 i = 1;
 1232                 LIST_FOREACH(sc, &md_softc_list, list) {
 1233                         if (i == MDNPAD - 1)
 1234                                 mdio->md_pad[i] = -1;
 1235                         else
 1236                                 mdio->md_pad[i++] = sc->unit;
 1237                 }
 1238                 mdio->md_pad[0] = i - 1;
 1239                 return (0);
 1240         default:
 1241                 return (ENOIOCTL);
 1242         };
 1243 }
 1244 
 1245 static int
 1246 mdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td)
 1247 {
 1248         int error;
 1249 
 1250         sx_xlock(&md_sx);
 1251         error = xmdctlioctl(dev, cmd, addr, flags, td);
 1252         sx_xunlock(&md_sx);
 1253         return (error);
 1254 }
 1255 
 1256 static void
 1257 md_preloaded(u_char *image, size_t length)
 1258 {
 1259         struct md_s *sc;
 1260         int error;
 1261 
 1262         sc = mdnew(-1, &error, MD_PRELOAD);
 1263         if (sc == NULL)
 1264                 return;
 1265         sc->mediasize = length;
 1266         sc->sectorsize = DEV_BSIZE;
 1267         sc->pl_ptr = image;
 1268         sc->pl_len = length;
 1269         sc->start = mdstart_preload;
 1270 #ifdef MD_ROOT
 1271         if (sc->unit == 0)
 1272                 rootdevnames[0] = "ufs:/dev/md0";
 1273 #endif
 1274         mdinit(sc);
 1275 }
 1276 
 1277 static void
 1278 g_md_init(struct g_class *mp __unused)
 1279 {
 1280         caddr_t mod;
 1281         u_char *ptr, *name, *type;
 1282         unsigned len;
 1283         int i;
 1284 
 1285         /* figure out log2(NINDIR) */
 1286         for (i = NINDIR, nshift = -1; i; nshift++)
 1287                 i >>= 1;
 1288 
 1289         mod = NULL;
 1290         sx_init(&md_sx, "MD config lock");
 1291         g_topology_unlock();
 1292         md_uh = new_unrhdr(0, INT_MAX, NULL);
 1293 #ifdef MD_ROOT_SIZE
 1294         sx_xlock(&md_sx);
 1295         md_preloaded(mfs_root.start, sizeof(mfs_root.start));
 1296         sx_xunlock(&md_sx);
 1297 #endif
 1298         /* XXX: are preload_* static or do they need Giant ? */
 1299         while ((mod = preload_search_next_name(mod)) != NULL) {
 1300                 name = (char *)preload_search_info(mod, MODINFO_NAME);
 1301                 if (name == NULL)
 1302                         continue;
 1303                 type = (char *)preload_search_info(mod, MODINFO_TYPE);
 1304                 if (type == NULL)
 1305                         continue;
 1306                 if (strcmp(type, "md_image") && strcmp(type, "mfs_root"))
 1307                         continue;
 1308                 ptr = preload_fetch_addr(mod);
 1309                 len = preload_fetch_size(mod);
 1310                 if (ptr != NULL && len != 0) {
 1311                         printf("%s%d: Preloaded image <%s> %d bytes at %p\n",
 1312                             MD_NAME, mdunits, name, len, ptr);
 1313                         sx_xlock(&md_sx);
 1314                         md_preloaded(ptr, len);
 1315                         sx_xunlock(&md_sx);
 1316                 }
 1317         }
 1318         status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL,
 1319             0600, MDCTL_NAME);
 1320         g_topology_lock();
 1321 }
 1322 
 1323 static void
 1324 g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
 1325     struct g_consumer *cp __unused, struct g_provider *pp)
 1326 {
 1327         struct md_s *mp;
 1328         char *type;
 1329 
 1330         mp = gp->softc;
 1331         if (mp == NULL)
 1332                 return;
 1333 
 1334         switch (mp->type) {
 1335         case MD_MALLOC:
 1336                 type = "malloc";
 1337                 break;
 1338         case MD_PRELOAD:
 1339                 type = "preload";
 1340                 break;
 1341         case MD_VNODE:
 1342                 type = "vnode";
 1343                 break;
 1344         case MD_SWAP:
 1345                 type = "swap";
 1346                 break;
 1347         default:
 1348                 type = "unknown";
 1349                 break;
 1350         }
 1351 
 1352         if (pp != NULL) {
 1353                 if (indent == NULL) {
 1354                         sbuf_printf(sb, " u %d", mp->unit);
 1355                         sbuf_printf(sb, " s %ju", (uintmax_t) mp->sectorsize);
 1356                         sbuf_printf(sb, " f %ju", (uintmax_t) mp->fwheads);
 1357                         sbuf_printf(sb, " fs %ju", (uintmax_t) mp->fwsectors);
 1358                         sbuf_printf(sb, " l %ju", (uintmax_t) mp->mediasize);
 1359                         sbuf_printf(sb, " t %s", type);
 1360                         if (mp->type == MD_VNODE && mp->vnode != NULL)
 1361                                 sbuf_printf(sb, " file %s", mp->file);
 1362                 } else {
 1363                         sbuf_printf(sb, "%s<unit>%d</unit>\n", indent,
 1364                             mp->unit);
 1365                         sbuf_printf(sb, "%s<sectorsize>%ju</sectorsize>\n",
 1366                             indent, (uintmax_t) mp->sectorsize);
 1367                         sbuf_printf(sb, "%s<fwheads>%ju</fwheads>\n",
 1368                             indent, (uintmax_t) mp->fwheads);
 1369                         sbuf_printf(sb, "%s<fwsectors>%ju</fwsectors>\n",
 1370                             indent, (uintmax_t) mp->fwsectors);
 1371                         sbuf_printf(sb, "%s<length>%ju</length>\n",
 1372                             indent, (uintmax_t) mp->mediasize);
 1373                         sbuf_printf(sb, "%s<type>%s</type>\n", indent,
 1374                             type);
 1375                         if (mp->type == MD_VNODE && mp->vnode != NULL)
 1376                                 sbuf_printf(sb, "%s<file>%s</file>\n",
 1377                                     indent, mp->file);
 1378                 }
 1379         }
 1380 }
 1381 
 1382 static void
 1383 g_md_fini(struct g_class *mp __unused)
 1384 {
 1385 
 1386         sx_destroy(&md_sx);
 1387         if (status_dev != NULL)
 1388                 destroy_dev(status_dev);
 1389         delete_unrhdr(md_uh);
 1390 }

Cache object: c7550d6da7497ca87cfde6b1eb0044d2


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.