The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/vnd.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: vnd.c,v 1.160.2.2 2008/04/08 20:19:27 jdc Exp $        */
    2 
    3 /*-
    4  * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the NetBSD
   21  *      Foundation, Inc. and its contributors.
   22  * 4. Neither the name of The NetBSD Foundation nor the names of its
   23  *    contributors may be used to endorse or promote products derived
   24  *    from this software without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36  * POSSIBILITY OF SUCH DAMAGE.
   37  */
   38 
   39 /*
   40  * Copyright (c) 1990, 1993
   41  *      The Regents of the University of California.  All rights reserved.
   42  *
   43  * This code is derived from software contributed to Berkeley by
   44  * the Systems Programming Group of the University of Utah Computer
   45  * Science Department.
   46  *
   47  * Redistribution and use in source and binary forms, with or without
   48  * modification, are permitted provided that the following conditions
   49  * are met:
   50  * 1. Redistributions of source code must retain the above copyright
   51  *    notice, this list of conditions and the following disclaimer.
   52  * 2. Redistributions in binary form must reproduce the above copyright
   53  *    notice, this list of conditions and the following disclaimer in the
   54  *    documentation and/or other materials provided with the distribution.
   55  * 3. Neither the name of the University nor the names of its contributors
   56  *    may be used to endorse or promote products derived from this software
   57  *    without specific prior written permission.
   58  *
   59  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   60  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   61  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   62  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   63  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   64  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   65  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   69  * SUCH DAMAGE.
   70  *
   71  * from: Utah $Hdr: vn.c 1.13 94/04/02$
   72  *
   73  *      @(#)vn.c        8.9 (Berkeley) 5/14/95
   74  */
   75 
   76 /*
   77  * Copyright (c) 1988 University of Utah.
   78  *
   79  * This code is derived from software contributed to Berkeley by
   80  * the Systems Programming Group of the University of Utah Computer
   81  * Science Department.
   82  *
   83  * Redistribution and use in source and binary forms, with or without
   84  * modification, are permitted provided that the following conditions
   85  * are met:
   86  * 1. Redistributions of source code must retain the above copyright
   87  *    notice, this list of conditions and the following disclaimer.
   88  * 2. Redistributions in binary form must reproduce the above copyright
   89  *    notice, this list of conditions and the following disclaimer in the
   90  *    documentation and/or other materials provided with the distribution.
   91  * 3. All advertising materials mentioning features or use of this software
   92  *    must display the following acknowledgement:
   93  *      This product includes software developed by the University of
   94  *      California, Berkeley and its contributors.
   95  * 4. Neither the name of the University nor the names of its contributors
   96  *    may be used to endorse or promote products derived from this software
   97  *    without specific prior written permission.
   98  *
   99  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  100  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  101  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  102  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  103  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  104  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  105  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  106  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  107  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  108  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  109  * SUCH DAMAGE.
  110  *
  111  * from: Utah $Hdr: vn.c 1.13 94/04/02$
  112  *
  113  *      @(#)vn.c        8.9 (Berkeley) 5/14/95
  114  */
  115 
  116 /*
  117  * Vnode disk driver.
  118  *
  119  * Block/character interface to a vnode.  Allows one to treat a file
  120  * as a disk (e.g. build a filesystem in it, mount it, etc.).
  121  *
  122  * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations,
  123  * this uses them to avoid distorting the local buffer cache.  If those
  124  * block-level operations are not available, this falls back to the regular
  125  * read and write calls.  Using these may distort the cache in some cases
  126  * but better have the driver working than preventing it to work on file
  127  * systems where the block-level operations are not implemented for
  128  * whatever reason.
  129  *
  130  * NOTE 2: There is a security issue involved with this driver.
  131  * Once mounted all access to the contents of the "mapped" file via
  132  * the special file is controlled by the permissions on the special
  133  * file, the protection of the mapped file is ignored (effectively,
  134  * by using root credentials in all transactions).
  135  *
  136  * NOTE 3: Doesn't interact with leases, should it?
  137  */
  138 
  139 #include <sys/cdefs.h>
  140 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.160.2.2 2008/04/08 20:19:27 jdc Exp $");
  141 
  142 #if defined(_KERNEL_OPT)
  143 #include "fs_nfs.h"
  144 #include "opt_vnd.h"
  145 #endif
  146 
  147 #include <sys/param.h>
  148 #include <sys/systm.h>
  149 #include <sys/namei.h>
  150 #include <sys/proc.h>
  151 #include <sys/kthread.h>
  152 #include <sys/errno.h>
  153 #include <sys/buf.h>
  154 #include <sys/bufq.h>
  155 #include <sys/malloc.h>
  156 #include <sys/ioctl.h>
  157 #include <sys/disklabel.h>
  158 #include <sys/device.h>
  159 #include <sys/disk.h>
  160 #include <sys/stat.h>
  161 #include <sys/mount.h>
  162 #include <sys/vnode.h>
  163 #include <sys/file.h>
  164 #include <sys/uio.h>
  165 #include <sys/conf.h>
  166 #include <sys/kauth.h>
  167 
  168 #include <net/zlib.h>
  169 
  170 #include <miscfs/genfs/genfs.h>
  171 #include <miscfs/specfs/specdev.h>
  172 
  173 #include <dev/vndvar.h>
  174 
  175 #include <prop/proplib.h>
  176 
  177 #if defined(VNDDEBUG) && !defined(DEBUG)
  178 #define DEBUG
  179 #endif
  180 
  181 #ifdef DEBUG
  182 int dovndcluster = 1;
  183 #define VDB_FOLLOW      0x01
  184 #define VDB_INIT        0x02
  185 #define VDB_IO          0x04
  186 #define VDB_LABEL       0x08
  187 int vnddebug = 0x00;
  188 #endif
  189 
  190 #define vndunit(x)      DISKUNIT(x)
  191 
  192 struct vndxfer {
  193         struct buf vx_buf;
  194         struct vnd_softc *vx_vnd;
  195 };
  196 #define VND_BUFTOXFER(bp)       ((struct vndxfer *)(void *)bp)
  197 
  198 #define VND_GETXFER(vnd)        pool_get(&(vnd)->sc_vxpool, PR_WAITOK)
  199 #define VND_PUTXFER(vnd, vx)    pool_put(&(vnd)->sc_vxpool, (vx))
  200 
  201 #define VNDLABELDEV(dev) \
  202     (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART))
  203 
  204 /* called by main() at boot time (XXX: and the LKM driver) */
  205 void    vndattach(int);
  206 
  207 static void     vndclear(struct vnd_softc *, int);
  208 static int      vndsetcred(struct vnd_softc *, kauth_cred_t);
  209 static void     vndthrottle(struct vnd_softc *, struct vnode *);
  210 static void     vndiodone(struct buf *);
  211 #if 0
  212 static void     vndshutdown(void);
  213 #endif
  214 
  215 static void     vndgetdefaultlabel(struct vnd_softc *, struct disklabel *);
  216 static void     vndgetdisklabel(dev_t, struct vnd_softc *);
  217 
  218 static int      vndlock(struct vnd_softc *);
  219 static void     vndunlock(struct vnd_softc *);
  220 #ifdef VND_COMPRESSION
  221 static void     compstrategy(struct buf *, off_t);
  222 static void     *vnd_alloc(void *, u_int, u_int);
  223 static void     vnd_free(void *, void *);
  224 #endif /* VND_COMPRESSION */
  225 
  226 static void     vndthread(void *);
  227 static boolean_t vnode_has_op(const struct vnode *, int);
  228 static void     handle_with_rdwr(struct vnd_softc *, const struct buf *,
  229                     struct buf *);
  230 static void     handle_with_strategy(struct vnd_softc *, const struct buf *,
  231                     struct buf *);
  232 static void     vnd_set_properties(struct vnd_softc *);
  233 
  234 static dev_type_open(vndopen);
  235 static dev_type_close(vndclose);
  236 static dev_type_read(vndread);
  237 static dev_type_write(vndwrite);
  238 static dev_type_ioctl(vndioctl);
  239 static dev_type_strategy(vndstrategy);
  240 static dev_type_dump(vnddump);
  241 static dev_type_size(vndsize);
  242 
  243 const struct bdevsw vnd_bdevsw = {
  244         vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK
  245 };
  246 
  247 const struct cdevsw vnd_cdevsw = {
  248         vndopen, vndclose, vndread, vndwrite, vndioctl,
  249         nostop, notty, nopoll, nommap, nokqfilter, D_DISK
  250 };
  251 
  252 static int      vnd_match(struct device *, struct cfdata *, void *);
  253 static void     vnd_attach(struct device *, struct device *, void *);
  254 static int      vnd_detach(struct device *, int);
  255 
  256 CFATTACH_DECL(vnd, sizeof(struct vnd_softc),
  257     vnd_match, vnd_attach, vnd_detach, NULL);
  258 extern struct cfdriver vnd_cd;
  259 
  260 static struct vnd_softc *vnd_spawn(int);
  261 int     vnd_destroy(struct device *);
  262 
  263 void
  264 vndattach(int num)
  265 {
  266         int error;
  267 
  268         error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
  269         if (error)
  270                 aprint_error("%s: unable to register cfattach\n",
  271                     vnd_cd.cd_name);
  272 }
  273 
  274 static int
  275 vnd_match(struct device *self, struct cfdata *cfdata,
  276     void *aux)
  277 {
  278         return 1;
  279 }
  280 
  281 static void
  282 vnd_attach(struct device *parent, struct device *self,
  283     void *aux)
  284 {
  285         struct vnd_softc *sc = (struct vnd_softc *)self;
  286 
  287         sc->sc_comp_offsets = NULL;
  288         sc->sc_comp_buff = NULL;
  289         sc->sc_comp_decombuf = NULL;
  290         bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK);
  291         pseudo_disk_init(&sc->sc_dkdev);
  292 }
  293 
  294 static int
  295 vnd_detach(struct device *self, int flags)
  296 {
  297         struct vnd_softc *sc = (struct vnd_softc *)self;
  298         if (sc->sc_flags & VNF_INITED)
  299                 return EBUSY;
  300 
  301         bufq_free(sc->sc_tab);
  302 
  303         return 0;
  304 }
  305 
  306 static struct vnd_softc *
  307 vnd_spawn(int unit)
  308 {
  309         struct cfdata *cf;
  310 
  311         cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
  312         cf->cf_name = vnd_cd.cd_name;
  313         cf->cf_atname = vnd_cd.cd_name;
  314         cf->cf_unit = unit;
  315         cf->cf_fstate = FSTATE_STAR;
  316 
  317         return (struct vnd_softc *)config_attach_pseudo(cf);
  318 }
  319 
  320 int
  321 vnd_destroy(struct device *dev)
  322 {
  323         int error;
  324         struct cfdata *cf;
  325 
  326         cf = device_cfdata(dev);
  327         error = config_detach(dev, DETACH_QUIET);
  328         if (error)
  329                 return error;
  330         free(cf, M_DEVBUF);
  331         return 0;
  332 }
  333 
  334 static int
  335 vndopen(dev_t dev, int flags, int mode, struct lwp *l)
  336 {
  337         int unit = vndunit(dev);
  338         struct vnd_softc *sc;
  339         int error = 0, part, pmask;
  340         struct disklabel *lp;
  341 
  342 #ifdef DEBUG
  343         if (vnddebug & VDB_FOLLOW)
  344                 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
  345 #endif
  346         sc = device_lookup(&vnd_cd, unit);
  347         if (sc == NULL) {
  348                 sc = vnd_spawn(unit);
  349                 if (sc == NULL)
  350                         return ENOMEM;
  351         }
  352 
  353         if ((error = vndlock(sc)) != 0)
  354                 return (error);
  355 
  356         lp = sc->sc_dkdev.dk_label;
  357 
  358         part = DISKPART(dev);
  359         pmask = (1 << part);
  360 
  361         /*
  362          * If we're initialized, check to see if there are any other
  363          * open partitions.  If not, then it's safe to update the
  364          * in-core disklabel.  Only read the disklabel if it is
  365          * not already valid.
  366          */
  367         if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED &&
  368             sc->sc_dkdev.dk_openmask == 0)
  369                 vndgetdisklabel(dev, sc);
  370 
  371         /* Check that the partitions exists. */
  372         if (part != RAW_PART) {
  373                 if (((sc->sc_flags & VNF_INITED) == 0) ||
  374                     ((part >= lp->d_npartitions) ||
  375                      (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
  376                         error = ENXIO;
  377                         goto done;
  378                 }
  379         }
  380 
  381         /* Prevent our unit from being unconfigured while open. */
  382         switch (mode) {
  383         case S_IFCHR:
  384                 sc->sc_dkdev.dk_copenmask |= pmask;
  385                 break;
  386 
  387         case S_IFBLK:
  388                 sc->sc_dkdev.dk_bopenmask |= pmask;
  389                 break;
  390         }
  391         sc->sc_dkdev.dk_openmask =
  392             sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
  393 
  394  done:
  395         vndunlock(sc);
  396         return (error);
  397 }
  398 
  399 static int
  400 vndclose(dev_t dev, int flags, int mode, struct lwp *l)
  401 {
  402         int unit = vndunit(dev);
  403         struct vnd_softc *sc;
  404         int error = 0, part;
  405 
  406 #ifdef DEBUG
  407         if (vnddebug & VDB_FOLLOW)
  408                 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
  409 #endif
  410         sc = device_lookup(&vnd_cd, unit);
  411         if (sc == NULL)
  412                 return ENXIO;
  413 
  414         if ((error = vndlock(sc)) != 0)
  415                 return (error);
  416 
  417         part = DISKPART(dev);
  418 
  419         /* ...that much closer to allowing unconfiguration... */
  420         switch (mode) {
  421         case S_IFCHR:
  422                 sc->sc_dkdev.dk_copenmask &= ~(1 << part);
  423                 break;
  424 
  425         case S_IFBLK:
  426                 sc->sc_dkdev.dk_bopenmask &= ~(1 << part);
  427                 break;
  428         }
  429         sc->sc_dkdev.dk_openmask =
  430             sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
  431 
  432         vndunlock(sc);
  433 
  434         if ((sc->sc_flags & VNF_INITED) == 0) {
  435                 if ((error = vnd_destroy((struct device *)sc)) != 0) {
  436                         aprint_error("%s: unable to detach instance\n",
  437                             sc->sc_dev.dv_xname);
  438                         return error;
  439                 }
  440         }
  441 
  442         return (0);
  443 }
  444 
  445 /*
  446  * Queue the request, and wakeup the kernel thread to handle it.
  447  */
  448 static void
  449 vndstrategy(struct buf *bp)
  450 {
  451         int unit = vndunit(bp->b_dev);
  452         struct vnd_softc *vnd =
  453             (struct vnd_softc *)device_lookup(&vnd_cd, unit);
  454         struct disklabel *lp = vnd->sc_dkdev.dk_label;
  455         daddr_t blkno;
  456         int s = splbio();
  457 
  458         if ((vnd->sc_flags & VNF_INITED) == 0) {
  459                 bp->b_error = ENXIO;
  460                 goto bad;
  461         }
  462 
  463         /*
  464          * The transfer must be a whole number of blocks.
  465          */
  466         if ((bp->b_bcount % lp->d_secsize) != 0) {
  467                 bp->b_error = EINVAL;
  468                 goto bad;
  469         }
  470 
  471         /*
  472          * check if we're read-only.
  473          */
  474         if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) {
  475                 bp->b_error = EACCES;
  476                 bp->b_flags |= B_ERROR;
  477                 goto done;
  478         }
  479 
  480         /*
  481          * Do bounds checking and adjust transfer.  If there's an error,
  482          * the bounds check will flag that for us.
  483          */
  484         if (DISKPART(bp->b_dev) == RAW_PART) {
  485                 if (bounds_check_with_mediasize(bp, DEV_BSIZE,
  486                     vnd->sc_size) <= 0)
  487                         goto done;
  488         } else {
  489                 if (bounds_check_with_label(&vnd->sc_dkdev,
  490                     bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0)
  491                         goto done;
  492         }
  493 
  494         /*
  495          * Put the block number in terms of the logical blocksize
  496          * of the "device".
  497          */
  498 
  499         blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
  500 
  501         /*
  502          * Translate the partition-relative block number to an absolute.
  503          */
  504         if (DISKPART(bp->b_dev) != RAW_PART) {
  505                 struct partition *pp;
  506 
  507                 pp = &vnd->sc_dkdev.dk_label->d_partitions[
  508                     DISKPART(bp->b_dev)];
  509                 blkno += pp->p_offset;
  510         }
  511         bp->b_rawblkno = blkno;
  512 
  513 #ifdef DEBUG
  514         if (vnddebug & VDB_FOLLOW)
  515                 printf("vndstrategy(%p): unit %d\n", bp, unit);
  516 #endif
  517         BUFQ_PUT(vnd->sc_tab, bp);
  518         wakeup(&vnd->sc_tab);
  519         splx(s);
  520         return;
  521 bad:
  522         bp->b_flags |= B_ERROR;
  523 done:
  524         bp->b_resid = bp->b_bcount;
  525         biodone(bp);
  526         splx(s);
  527 }
  528 
  529 static void
  530 vndthread(void *arg)
  531 {
  532         struct vnd_softc *vnd = arg;
  533         boolean_t usestrategy;
  534         int s;
  535 
  536         /* Determine whether we can use VOP_BMAP and VOP_STRATEGY to
  537          * directly access the backing vnode.  If we can, use these two
  538          * operations to avoid messing with the local buffer cache.
  539          * Otherwise fall back to regular VOP_READ/VOP_WRITE operations
  540          * which are guaranteed to work with any file system. */
  541         usestrategy = vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) &&
  542             vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy));
  543 
  544 #ifdef DEBUG
  545         if (vnddebug & VDB_INIT)
  546                 printf("vndthread: vp %p, %s\n", vnd->sc_vp,
  547                     usestrategy ?
  548                     "using bmap/strategy operations" :
  549                     "using read/write operations");
  550 #endif
  551 
  552         s = splbio();
  553         vnd->sc_flags |= VNF_KTHREAD;
  554         wakeup(&vnd->sc_kthread);
  555 
  556         /*
  557          * Dequeue requests and serve them depending on the available
  558          * vnode operations.
  559          */
  560         while ((vnd->sc_flags & VNF_VUNCONF) == 0) {
  561                 struct vndxfer *vnx;
  562                 int flags;
  563                 struct buf *obp;
  564                 struct buf *bp;
  565 
  566                 obp = BUFQ_GET(vnd->sc_tab);
  567                 if (obp == NULL) {
  568                         tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0);
  569                         continue;
  570                 };
  571                 splx(s);
  572                 flags = obp->b_flags;
  573 #ifdef DEBUG
  574                 if (vnddebug & VDB_FOLLOW)
  575                         printf("vndthread(%p\n", obp);
  576 #endif
  577 
  578                 if (vnd->sc_vp->v_mount == NULL) {
  579                         obp->b_error = ENXIO;
  580                         obp->b_flags |= B_ERROR;
  581                         goto done;
  582                 }
  583 #ifdef VND_COMPRESSION
  584                 /* handle a compressed read */
  585                 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) {
  586                         off_t bn;
  587                         
  588                         /* Convert to a byte offset within the file. */
  589                         bn = obp->b_rawblkno *
  590                             vnd->sc_dkdev.dk_label->d_secsize;
  591 
  592                         compstrategy(obp, bn);
  593                         goto done;
  594                 }
  595 #endif /* VND_COMPRESSION */
  596                 
  597                 /*
  598                  * Allocate a header for this transfer and link it to the
  599                  * buffer
  600                  */
  601                 s = splbio();
  602                 vnx = VND_GETXFER(vnd);
  603                 splx(s);
  604                 vnx->vx_vnd = vnd;
  605 
  606                 s = splbio();
  607                 while (vnd->sc_active >= vnd->sc_maxactive) {
  608                         tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0);
  609                 }
  610                 vnd->sc_active++;
  611                 splx(s);
  612 
  613                 /* Instrumentation. */
  614                 disk_busy(&vnd->sc_dkdev);
  615 
  616                 bp = &vnx->vx_buf;
  617                 BUF_INIT(bp);
  618                 bp->b_flags = (obp->b_flags & B_READ) | B_CALL;
  619                 bp->b_iodone = vndiodone;
  620                 bp->b_private = obp;
  621                 bp->b_vp = vnd->sc_vp;
  622                 bp->b_data = obp->b_data;
  623                 bp->b_bcount = obp->b_bcount;
  624                 BIO_COPYPRIO(bp, obp);
  625 
  626                 /* Handle the request using the appropriate operations. */
  627                 if (usestrategy)
  628                         handle_with_strategy(vnd, obp, bp);
  629                 else
  630                         handle_with_rdwr(vnd, obp, bp);
  631 
  632                 s = splbio();
  633                 continue;
  634 
  635 done:
  636                 biodone(obp);
  637                 s = splbio();
  638         }
  639 
  640         vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF);
  641         wakeup(&vnd->sc_kthread);
  642         splx(s);
  643         kthread_exit(0);
  644 }
  645 
  646 /*
  647  * Checks if the given vnode supports the requested operation.
  648  * The operation is specified the offset returned by VOFFSET.
  649  *
  650  * XXX The test below used to determine this is quite fragile
  651  * because it relies on the file system to use genfs to specify
  652  * unimplemented operations.  There might be another way to do
  653  * it more cleanly.
  654  */
  655 static boolean_t
  656 vnode_has_op(const struct vnode *vp, int opoffset)
  657 {
  658         int (*defaultp)(void *);
  659         int (*opp)(void *);
  660 
  661         defaultp = vp->v_op[VOFFSET(vop_default)];
  662         opp = vp->v_op[opoffset];
  663 
  664         return opp != defaultp && opp != genfs_eopnotsupp &&
  665             opp != genfs_badop && opp != genfs_nullop;
  666 }
  667 
  668 /*
  669  * Handes the read/write request given in 'bp' using the vnode's VOP_READ
  670  * and VOP_WRITE operations.
  671  *
  672  * 'obp' is a pointer to the original request fed to the vnd device.
  673  */
  674 static void
  675 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp)
  676 {
  677         boolean_t doread;
  678         off_t offset;
  679         size_t resid;
  680         struct vnode *vp;
  681 
  682         doread = bp->b_flags & B_READ;
  683         offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
  684         vp = vnd->sc_vp;
  685 
  686 #if defined(DEBUG)
  687         if (vnddebug & VDB_IO)
  688                 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64
  689                     ", secsize %d, offset %" PRIu64
  690                     ", bcount %d, resid %d\n",
  691                     vp, doread ? "read" : "write", obp->b_rawblkno,
  692                     vnd->sc_dkdev.dk_label->d_secsize, offset,
  693                     bp->b_bcount, bp->b_resid);
  694 #endif
  695 
  696         /* Issue the read or write operation. */
  697         bp->b_error =
  698             vn_rdwr(doread ? UIO_READ : UIO_WRITE,
  699             vp, bp->b_data, bp->b_bcount, offset,
  700             UIO_SYSSPACE, 0, vnd->sc_cred, &resid, NULL);
  701         bp->b_resid = resid;
  702         if (bp->b_error != 0)
  703                 bp->b_flags |= B_ERROR;
  704         else
  705                 KASSERT(!(bp->b_flags & B_ERROR));
  706 
  707         /* We need to increase the number of outputs on the vnode if
  708          * there was any write to it. */
  709         if (!doread)
  710                 V_INCR_NUMOUTPUT(vp);
  711 
  712         biodone(bp);
  713 }
  714 
  715 /*
  716  * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP
  717  * and VOP_STRATEGY operations.
  718  *
  719  * 'obp' is a pointer to the original request fed to the vnd device.
  720  */
  721 static void
  722 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp,
  723     struct buf *bp)
  724 {
  725         int bsize, error, flags, skipped;
  726         size_t resid, sz;
  727         off_t bn, offset;
  728         struct mount *mp;
  729 
  730         flags = obp->b_flags;
  731 
  732         mp = NULL;
  733         if (!(flags & B_READ)) {
  734                 int s;
  735                 
  736                 s = splbio();
  737                 V_INCR_NUMOUTPUT(bp->b_vp);
  738                 splx(s);
  739 
  740                 vn_start_write(vnd->sc_vp, &mp, V_WAIT);
  741                 KASSERT(mp != NULL);
  742         }
  743 
  744         /* convert to a byte offset within the file. */
  745         bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
  746 
  747         bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
  748         skipped = 0;
  749 
  750         /*
  751          * Break the request into bsize pieces and feed them
  752          * sequentially using VOP_BMAP/VOP_STRATEGY.
  753          * We do it this way to keep from flooding NFS servers if we
  754          * are connected to an NFS file.  This places the burden on
  755          * the client rather than the server.
  756          */
  757         error = 0;
  758         bp->b_resid = bp->b_bcount;
  759         for (offset = 0, resid = bp->b_resid; resid;
  760             resid -= sz, offset += sz) {
  761                 struct buf *nbp;
  762                 struct vnode *vp;
  763                 daddr_t nbn;
  764                 int off, nra;
  765 
  766                 nra = 0;
  767                 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
  768                 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
  769                 VOP_UNLOCK(vnd->sc_vp, 0);
  770 
  771                 if (error == 0 && (long)nbn == -1)
  772                         error = EIO;
  773 
  774                 /*
  775                  * If there was an error or a hole in the file...punt.
  776                  * Note that we may have to wait for any operations
  777                  * that we have already fired off before releasing
  778                  * the buffer.
  779                  *
  780                  * XXX we could deal with holes here but it would be
  781                  * a hassle (in the write case).
  782                  */
  783                 if (error) {
  784                         skipped += resid;
  785                         break;
  786                 }
  787 
  788 #ifdef DEBUG
  789                 if (!dovndcluster)
  790                         nra = 0;
  791 #endif
  792 
  793                 off = bn % bsize;
  794                 sz = MIN(((off_t)1 + nra) * bsize - off, resid);
  795 #ifdef  DEBUG
  796                 if (vnddebug & VDB_IO)
  797                         printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64
  798                                " sz 0x%zx\n",
  799                             vnd->sc_vp, vp, (long long)bn, nbn, sz);
  800 #endif
  801 
  802                 nbp = getiobuf();
  803                 nestiobuf_setup(bp, nbp, offset, sz);
  804                 nbp->b_blkno = nbn + btodb(off);
  805 
  806 #if 0 /* XXX #ifdef DEBUG */
  807                 if (vnddebug & VDB_IO)
  808                         printf("vndstart(%ld): bp %p vp %p blkno "
  809                             "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n",
  810                             (long) (vnd-vnd_softc), &nbp->vb_buf,
  811                             nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno,
  812                             nbp->vb_buf.b_flags, nbp->vb_buf.b_data,
  813                             nbp->vb_buf.b_bcount);
  814 #endif
  815                 VOP_STRATEGY(vp, nbp);
  816                 bn += sz;
  817         }
  818         nestiobuf_done(bp, skipped, error);
  819 
  820         if (!(flags & B_READ)) {
  821                 KASSERT(mp != NULL);
  822                 vn_finished_write(mp, 0);
  823         }
  824 }
  825 
  826 static void
  827 vndiodone(struct buf *bp)
  828 {
  829         struct vndxfer *vnx = VND_BUFTOXFER(bp);
  830         struct vnd_softc *vnd = vnx->vx_vnd;
  831         struct buf *obp = bp->b_private;
  832 
  833         KASSERT(&vnx->vx_buf == bp);
  834         KASSERT(vnd->sc_active > 0);
  835 #ifdef DEBUG
  836         if (vnddebug & VDB_IO) {
  837                 printf("vndiodone1: bp %p iodone: error %d\n",
  838                     bp, (bp->b_flags & B_ERROR) != 0 ? bp->b_error : 0);
  839         }
  840 #endif
  841         disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid,
  842             (bp->b_flags & B_READ));
  843         vnd->sc_active--;
  844         if (vnd->sc_active == 0) {
  845                 wakeup(&vnd->sc_tab);
  846         }
  847         obp->b_flags |= bp->b_flags & B_ERROR;
  848         obp->b_error = bp->b_error;
  849         obp->b_resid = bp->b_resid;
  850         VND_PUTXFER(vnd, vnx);
  851         biodone(obp);
  852 }
  853 
  854 /* ARGSUSED */
  855 static int
  856 vndread(dev_t dev, struct uio *uio, int flags)
  857 {
  858         int unit = vndunit(dev);
  859         struct vnd_softc *sc;
  860 
  861 #ifdef DEBUG
  862         if (vnddebug & VDB_FOLLOW)
  863                 printf("vndread(0x%x, %p)\n", dev, uio);
  864 #endif
  865 
  866         sc = device_lookup(&vnd_cd, unit);
  867         if (sc == NULL)
  868                 return ENXIO;
  869 
  870         if ((sc->sc_flags & VNF_INITED) == 0)
  871                 return (ENXIO);
  872 
  873         return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
  874 }
  875 
  876 /* ARGSUSED */
  877 static int
  878 vndwrite(dev_t dev, struct uio *uio, int flags)
  879 {
  880         int unit = vndunit(dev);
  881         struct vnd_softc *sc;
  882 
  883 #ifdef DEBUG
  884         if (vnddebug & VDB_FOLLOW)
  885                 printf("vndwrite(0x%x, %p)\n", dev, uio);
  886 #endif
  887 
  888         sc = device_lookup(&vnd_cd, unit);
  889         if (sc == NULL)
  890                 return ENXIO;
  891 
  892         if ((sc->sc_flags & VNF_INITED) == 0)
  893                 return (ENXIO);
  894 
  895         return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
  896 }
  897 
  898 static int
  899 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va)
  900 {
  901         struct vnd_softc *vnd;
  902 
  903         if (*un == -1)
  904                 *un = unit;
  905         if (*un < 0)
  906                 return EINVAL;
  907 
  908         vnd = device_lookup(&vnd_cd, *un);
  909         if (vnd == NULL)
  910                 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1;
  911 
  912         if ((vnd->sc_flags & VNF_INITED) == 0)
  913                 return -1;
  914 
  915         return VOP_GETATTR(vnd->sc_vp, va, l->l_cred, l);
  916 }
  917 
  918 /* ARGSUSED */
  919 static int
  920 vndioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l)
  921 {
  922         int unit = vndunit(dev);
  923         struct vnd_softc *vnd;
  924         struct vnd_ioctl *vio;
  925         struct vattr vattr;
  926         struct nameidata nd;
  927         int error, part, pmask;
  928         size_t geomsize;
  929         int fflags;
  930 #ifdef __HAVE_OLD_DISKLABEL
  931         struct disklabel newlabel;
  932 #endif
  933 
  934 #ifdef DEBUG
  935         if (vnddebug & VDB_FOLLOW)
  936                 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n",
  937                     dev, cmd, data, flag, l->l_proc, unit);
  938 #endif
  939         vnd = device_lookup(&vnd_cd, unit);
  940         if (vnd == NULL &&
  941 #ifdef COMPAT_30
  942             cmd != VNDIOOCGET &&
  943 #endif
  944             cmd != VNDIOCGET)
  945                 return ENXIO;
  946         vio = (struct vnd_ioctl *)data;
  947 
  948         /* Must be open for writes for these commands... */
  949         switch (cmd) {
  950         case VNDIOCSET:
  951         case VNDIOCCLR:
  952         case DIOCSDINFO:
  953         case DIOCWDINFO:
  954 #ifdef __HAVE_OLD_DISKLABEL
  955         case ODIOCSDINFO:
  956         case ODIOCWDINFO:
  957 #endif
  958         case DIOCKLABEL:
  959         case DIOCWLABEL:
  960                 if ((flag & FWRITE) == 0)
  961                         return (EBADF);
  962         }
  963 
  964         /* Must be initialized for these... */
  965         switch (cmd) {
  966         case VNDIOCCLR:
  967         case DIOCGDINFO:
  968         case DIOCSDINFO:
  969         case DIOCWDINFO:
  970         case DIOCGPART:
  971         case DIOCKLABEL:
  972         case DIOCWLABEL:
  973         case DIOCGDEFLABEL:
  974 #ifdef __HAVE_OLD_DISKLABEL
  975         case ODIOCGDINFO:
  976         case ODIOCSDINFO:
  977         case ODIOCWDINFO:
  978         case ODIOCGDEFLABEL:
  979 #endif
  980                 if ((vnd->sc_flags & VNF_INITED) == 0)
  981                         return (ENXIO);
  982         }
  983 
  984         switch (cmd) {
  985         case VNDIOCSET:
  986                 if (vnd->sc_flags & VNF_INITED)
  987                         return (EBUSY);
  988 
  989                 if ((error = vndlock(vnd)) != 0)
  990                         return (error);
  991 
  992                 fflags = FREAD;
  993                 if ((vio->vnd_flags & VNDIOF_READONLY) == 0)
  994                         fflags |= FWRITE;
  995                 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, l);
  996                 if ((error = vn_open(&nd, fflags, 0)) != 0)
  997                         goto unlock_and_exit;
  998                 KASSERT(l);
  999                 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred, l);
 1000                 if (!error && nd.ni_vp->v_type != VREG)
 1001                         error = EOPNOTSUPP;
 1002                 if (error) {
 1003                         VOP_UNLOCK(nd.ni_vp, 0);
 1004                         goto close_and_exit;
 1005                 }
 1006 
 1007                 /* If using a compressed file, initialize its info */
 1008                 /* (or abort with an error if kernel has no compression) */
 1009                 if (vio->vnd_flags & VNF_COMP) {
 1010 #ifdef VND_COMPRESSION
 1011                         struct vnd_comp_header *ch;
 1012                         int i;
 1013                         u_int32_t comp_size;
 1014                         u_int32_t comp_maxsize;
 1015  
 1016                         /* allocate space for compresed file header */
 1017                         ch = malloc(sizeof(struct vnd_comp_header),
 1018                         M_TEMP, M_WAITOK);
 1019  
 1020                         /* read compressed file header */
 1021                         error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)ch,
 1022                           sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE,
 1023                           IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
 1024                         if(error) {
 1025                                 free(ch, M_TEMP);
 1026                                 VOP_UNLOCK(nd.ni_vp, 0);
 1027                                 goto close_and_exit;
 1028                         }
 1029  
 1030                         /* save some header info */
 1031                         vnd->sc_comp_blksz = ntohl(ch->block_size);
 1032                         /* note last offset is the file byte size */
 1033                         vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1;
 1034                         free(ch, M_TEMP);
 1035                         if (vnd->sc_comp_blksz == 0 ||
 1036                             vnd->sc_comp_blksz % DEV_BSIZE !=0) {
 1037                                 VOP_UNLOCK(nd.ni_vp, 0);
 1038                                 error = EINVAL;
 1039                                 goto close_and_exit;
 1040                         }
 1041                         if(sizeof(struct vnd_comp_header) +
 1042                           sizeof(u_int64_t) * vnd->sc_comp_numoffs >
 1043                           vattr.va_size) {
 1044                                 VOP_UNLOCK(nd.ni_vp, 0);
 1045                                 error = EINVAL;
 1046                                 goto close_and_exit;
 1047                         }
 1048  
 1049                         /* set decompressed file size */
 1050                         vattr.va_size =
 1051                             ((u_quad_t)vnd->sc_comp_numoffs - 1) *
 1052                              (u_quad_t)vnd->sc_comp_blksz;
 1053  
 1054                         /* allocate space for all the compressed offsets */
 1055                         vnd->sc_comp_offsets =
 1056                         malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs,
 1057                         M_DEVBUF, M_WAITOK);
 1058  
 1059                         /* read in the offsets */
 1060                         error = vn_rdwr(UIO_READ, nd.ni_vp,
 1061                           (caddr_t)vnd->sc_comp_offsets,
 1062                           sizeof(u_int64_t) * vnd->sc_comp_numoffs,
 1063                           sizeof(struct vnd_comp_header), UIO_SYSSPACE,
 1064                           IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
 1065                         if(error) {
 1066                                 VOP_UNLOCK(nd.ni_vp, 0);
 1067                                 goto close_and_exit;
 1068                         }
 1069                         /*
 1070                          * find largest block size (used for allocation limit).
 1071                          * Also convert offset to native byte order.
 1072                          */
 1073                         comp_maxsize = 0;
 1074                         for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) {
 1075                                 vnd->sc_comp_offsets[i] =
 1076                                   be64toh(vnd->sc_comp_offsets[i]);
 1077                                 comp_size = be64toh(vnd->sc_comp_offsets[i + 1])
 1078                                   - vnd->sc_comp_offsets[i];
 1079                                 if (comp_size > comp_maxsize)
 1080                                         comp_maxsize = comp_size;
 1081                         }
 1082                         vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] =
 1083                           be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]);
 1084  
 1085                         /* create compressed data buffer */
 1086                         vnd->sc_comp_buff = malloc(comp_maxsize,
 1087                           M_DEVBUF, M_WAITOK);
 1088  
 1089                         /* create decompressed buffer */
 1090                         vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz,
 1091                           M_DEVBUF, M_WAITOK);
 1092                         vnd->sc_comp_buffblk = -1;
 1093  
 1094                         /* Initialize decompress stream */
 1095                         bzero(&vnd->sc_comp_stream, sizeof(z_stream));
 1096                         vnd->sc_comp_stream.zalloc = vnd_alloc;
 1097                         vnd->sc_comp_stream.zfree = vnd_free;
 1098                         error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS);
 1099                         if(error) {
 1100                                 if(vnd->sc_comp_stream.msg)
 1101                                         printf("vnd%d: compressed file, %s\n",
 1102                                           unit, vnd->sc_comp_stream.msg);
 1103                                 VOP_UNLOCK(nd.ni_vp, 0);
 1104                                 error = EINVAL;
 1105                                 goto close_and_exit;
 1106                         }
 1107  
 1108                         vnd->sc_flags |= VNF_COMP | VNF_READONLY;
 1109 #else /* !VND_COMPRESSION */
 1110                         VOP_UNLOCK(nd.ni_vp, 0);
 1111                         error = EOPNOTSUPP;
 1112                         goto close_and_exit;
 1113 #endif /* VND_COMPRESSION */
 1114                 }
 1115  
 1116                 VOP_UNLOCK(nd.ni_vp, 0);
 1117                 vnd->sc_vp = nd.ni_vp;
 1118                 vnd->sc_size = btodb(vattr.va_size);    /* note truncation */
 1119 
 1120                 /*
 1121                  * Use pseudo-geometry specified.  If none was provided,
 1122                  * use "standard" Adaptec fictitious geometry.
 1123                  */
 1124                 if (vio->vnd_flags & VNDIOF_HASGEOM) {
 1125 
 1126                         memcpy(&vnd->sc_geom, &vio->vnd_geom,
 1127                             sizeof(vio->vnd_geom));
 1128 
 1129                         /*
 1130                          * Sanity-check the sector size.
 1131                          * XXX Don't allow secsize < DEV_BSIZE.  Should
 1132                          * XXX we?
 1133                          */
 1134                         if (vnd->sc_geom.vng_secsize < DEV_BSIZE ||
 1135                             (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 ||
 1136                             vnd->sc_geom.vng_ncylinders == 0 ||
 1137                             (vnd->sc_geom.vng_ntracks *
 1138                              vnd->sc_geom.vng_nsectors) == 0) {
 1139                                 error = EINVAL;
 1140                                 goto close_and_exit;
 1141                         }
 1142 
 1143                         /*
 1144                          * Compute the size (in DEV_BSIZE blocks) specified
 1145                          * by the geometry.
 1146                          */
 1147                         geomsize = (vnd->sc_geom.vng_nsectors *
 1148                             vnd->sc_geom.vng_ntracks *
 1149                             vnd->sc_geom.vng_ncylinders) *
 1150                             (vnd->sc_geom.vng_secsize / DEV_BSIZE);
 1151 
 1152                         /*
 1153                          * Sanity-check the size against the specified
 1154                          * geometry.
 1155                          */
 1156                         if (vnd->sc_size < geomsize) {
 1157                                 error = EINVAL;
 1158                                 goto close_and_exit;
 1159                         }
 1160                 } else if (vnd->sc_size >= (32 * 64)) {
 1161                         /*
 1162                          * Size must be at least 2048 DEV_BSIZE blocks
 1163                          * (1M) in order to use this geometry.
 1164                          */
 1165                         vnd->sc_geom.vng_secsize = DEV_BSIZE;
 1166                         vnd->sc_geom.vng_nsectors = 32;
 1167                         vnd->sc_geom.vng_ntracks = 64;
 1168                         vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32);
 1169                 } else {
 1170                         vnd->sc_geom.vng_secsize = DEV_BSIZE;
 1171                         vnd->sc_geom.vng_nsectors = 1;
 1172                         vnd->sc_geom.vng_ntracks = 1;
 1173                         vnd->sc_geom.vng_ncylinders = vnd->sc_size;
 1174                 }
 1175 
 1176                 vnd_set_properties(vnd);
 1177 
 1178                 if (vio->vnd_flags & VNDIOF_READONLY) {
 1179                         vnd->sc_flags |= VNF_READONLY;
 1180                 }
 1181 
 1182                 if ((error = vndsetcred(vnd, l->l_cred)) != 0)
 1183                         goto close_and_exit;
 1184 
 1185                 vndthrottle(vnd, vnd->sc_vp);
 1186                 vio->vnd_size = dbtob(vnd->sc_size);
 1187                 vnd->sc_flags |= VNF_INITED;
 1188 
 1189                 /* create the kernel thread, wait for it to be up */
 1190                 error = kthread_create1(vndthread, vnd, &vnd->sc_kthread,
 1191                     vnd->sc_dev.dv_xname);
 1192                 if (error)
 1193                         goto close_and_exit;
 1194                 while ((vnd->sc_flags & VNF_KTHREAD) == 0) {
 1195                         tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0);
 1196                 }
 1197 #ifdef DEBUG
 1198                 if (vnddebug & VDB_INIT)
 1199                         printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n",
 1200                             vnd->sc_vp, (unsigned long) vnd->sc_size,
 1201                             vnd->sc_geom.vng_secsize,
 1202                             vnd->sc_geom.vng_nsectors,
 1203                             vnd->sc_geom.vng_ntracks,
 1204                             vnd->sc_geom.vng_ncylinders);
 1205 #endif
 1206 
 1207                 /* Attach the disk. */
 1208                 vnd->sc_dkdev.dk_name = vnd->sc_dev.dv_xname;
 1209                 pseudo_disk_attach(&vnd->sc_dkdev);
 1210 
 1211                 /* Initialize the xfer and buffer pools. */
 1212                 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0,
 1213                     0, 0, "vndxpl", NULL);
 1214 
 1215                 /* Try and read the disklabel. */
 1216                 vndgetdisklabel(dev, vnd);
 1217 
 1218                 vndunlock(vnd);
 1219 
 1220                 break;
 1221 
 1222 close_and_exit:
 1223                 (void) vn_close(nd.ni_vp, fflags, l->l_cred, l);
 1224 unlock_and_exit:
 1225 #ifdef VND_COMPRESSION
 1226                 /* free any allocated memory (for compressed file) */
 1227                 if(vnd->sc_comp_offsets) {
 1228                         free(vnd->sc_comp_offsets, M_DEVBUF);
 1229                         vnd->sc_comp_offsets = NULL;
 1230                 }
 1231                 if(vnd->sc_comp_buff) {
 1232                         free(vnd->sc_comp_buff, M_DEVBUF);
 1233                         vnd->sc_comp_buff = NULL;
 1234                 }
 1235                 if(vnd->sc_comp_decombuf) {
 1236                         free(vnd->sc_comp_decombuf, M_DEVBUF);
 1237                         vnd->sc_comp_decombuf = NULL;
 1238                 }
 1239 #endif /* VND_COMPRESSION */
 1240                 vndunlock(vnd);
 1241                 return (error);
 1242 
 1243         case VNDIOCCLR:
 1244                 if ((error = vndlock(vnd)) != 0)
 1245                         return (error);
 1246 
 1247                 /*
 1248                  * Don't unconfigure if any other partitions are open
 1249                  * or if both the character and block flavors of this
 1250                  * partition are open.
 1251                  */
 1252                 part = DISKPART(dev);
 1253                 pmask = (1 << part);
 1254                 if (((vnd->sc_dkdev.dk_openmask & ~pmask) ||
 1255                     ((vnd->sc_dkdev.dk_bopenmask & pmask) &&
 1256                     (vnd->sc_dkdev.dk_copenmask & pmask))) &&
 1257                         !(vio->vnd_flags & VNDIOF_FORCE)) {
 1258                         vndunlock(vnd);
 1259                         return (EBUSY);
 1260                 }
 1261 
 1262                 /*
 1263                  * XXX vndclear() might call vndclose() implicitely;
 1264                  * release lock to avoid recursion
 1265                  */
 1266                 vndunlock(vnd);
 1267                 vndclear(vnd, minor(dev));
 1268 #ifdef DEBUG
 1269                 if (vnddebug & VDB_INIT)
 1270                         printf("vndioctl: CLRed\n");
 1271 #endif
 1272 
 1273                 /* Destroy the xfer and buffer pools. */
 1274                 pool_destroy(&vnd->sc_vxpool);
 1275 
 1276                 /* Detatch the disk. */
 1277                 pseudo_disk_detach(&vnd->sc_dkdev);
 1278                 break;
 1279 
 1280 #ifdef COMPAT_30
 1281         case VNDIOOCGET: {
 1282                 struct vnd_ouser *vnu;
 1283                 struct vattr va;
 1284                 vnu = (struct vnd_ouser *)data;
 1285                 KASSERT(l);
 1286                 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) {
 1287                 case 0:
 1288                         vnu->vnu_dev = va.va_fsid;
 1289                         vnu->vnu_ino = va.va_fileid;
 1290                         break;
 1291                 case -1:
 1292                         /* unused is not an error */
 1293                         vnu->vnu_dev = 0;
 1294                         vnu->vnu_ino = 0;
 1295                         break;
 1296                 default:
 1297                         return error;
 1298                 }
 1299                 break;
 1300         }
 1301 #endif
 1302         case VNDIOCGET: {
 1303                 struct vnd_user *vnu;
 1304                 struct vattr va;
 1305                 vnu = (struct vnd_user *)data;
 1306                 KASSERT(l);
 1307                 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) {
 1308                 case 0:
 1309                         vnu->vnu_dev = va.va_fsid;
 1310                         vnu->vnu_ino = va.va_fileid;
 1311                         break;
 1312                 case -1:
 1313                         /* unused is not an error */
 1314                         vnu->vnu_dev = 0;
 1315                         vnu->vnu_ino = 0;
 1316                         break;
 1317                 default:
 1318                         return error;
 1319                 }
 1320                 break;
 1321         }
 1322 
 1323         case DIOCGDINFO:
 1324                 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label);
 1325                 break;
 1326 
 1327 #ifdef __HAVE_OLD_DISKLABEL
 1328         case ODIOCGDINFO:
 1329                 newlabel = *(vnd->sc_dkdev.dk_label);
 1330                 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
 1331                         return ENOTTY;
 1332                 memcpy(data, &newlabel, sizeof (struct olddisklabel));
 1333                 break;
 1334 #endif
 1335 
 1336         case DIOCGPART:
 1337                 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label;
 1338                 ((struct partinfo *)data)->part =
 1339                     &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
 1340                 break;
 1341 
 1342         case DIOCWDINFO:
 1343         case DIOCSDINFO:
 1344 #ifdef __HAVE_OLD_DISKLABEL
 1345         case ODIOCWDINFO:
 1346         case ODIOCSDINFO:
 1347 #endif
 1348         {
 1349                 struct disklabel *lp;
 1350 
 1351                 if ((error = vndlock(vnd)) != 0)
 1352                         return (error);
 1353 
 1354                 vnd->sc_flags |= VNF_LABELLING;
 1355 
 1356 #ifdef __HAVE_OLD_DISKLABEL
 1357                 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
 1358                         memset(&newlabel, 0, sizeof newlabel);
 1359                         memcpy(&newlabel, data, sizeof (struct olddisklabel));
 1360                         lp = &newlabel;
 1361                 } else
 1362 #endif
 1363                 lp = (struct disklabel *)data;
 1364 
 1365                 error = setdisklabel(vnd->sc_dkdev.dk_label,
 1366                     lp, 0, vnd->sc_dkdev.dk_cpulabel);
 1367                 if (error == 0) {
 1368                         if (cmd == DIOCWDINFO
 1369 #ifdef __HAVE_OLD_DISKLABEL
 1370                             || cmd == ODIOCWDINFO
 1371 #endif
 1372                            )
 1373                                 error = writedisklabel(VNDLABELDEV(dev),
 1374                                     vndstrategy, vnd->sc_dkdev.dk_label,
 1375                                     vnd->sc_dkdev.dk_cpulabel);
 1376                 }
 1377 
 1378                 vnd->sc_flags &= ~VNF_LABELLING;
 1379 
 1380                 vndunlock(vnd);
 1381 
 1382                 if (error)
 1383                         return (error);
 1384                 break;
 1385         }
 1386 
 1387         case DIOCKLABEL:
 1388                 if (*(int *)data != 0)
 1389                         vnd->sc_flags |= VNF_KLABEL;
 1390                 else
 1391                         vnd->sc_flags &= ~VNF_KLABEL;
 1392                 break;
 1393 
 1394         case DIOCWLABEL:
 1395                 if (*(int *)data != 0)
 1396                         vnd->sc_flags |= VNF_WLABEL;
 1397                 else
 1398                         vnd->sc_flags &= ~VNF_WLABEL;
 1399                 break;
 1400 
 1401         case DIOCGDEFLABEL:
 1402                 vndgetdefaultlabel(vnd, (struct disklabel *)data);
 1403                 break;
 1404 
 1405 #ifdef __HAVE_OLD_DISKLABEL
 1406         case ODIOCGDEFLABEL:
 1407                 vndgetdefaultlabel(vnd, &newlabel);
 1408                 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
 1409                         return ENOTTY;
 1410                 memcpy(data, &newlabel, sizeof (struct olddisklabel));
 1411                 break;
 1412 #endif
 1413 
 1414         default:
 1415                 return (ENOTTY);
 1416         }
 1417 
 1418         return (0);
 1419 }
 1420 
 1421 /*
 1422  * Duplicate the current processes' credentials.  Since we are called only
 1423  * as the result of a SET ioctl and only root can do that, any future access
 1424  * to this "disk" is essentially as root.  Note that credentials may change
 1425  * if some other uid can write directly to the mapped file (NFS).
 1426  */
 1427 static int
 1428 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred)
 1429 {
 1430         struct uio auio;
 1431         struct iovec aiov;
 1432         char *tmpbuf;
 1433         int error;
 1434 
 1435         vnd->sc_cred = kauth_cred_dup(cred);
 1436         tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
 1437 
 1438         /* XXX: Horrible kludge to establish credentials for NFS */
 1439         aiov.iov_base = tmpbuf;
 1440         aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size));
 1441         auio.uio_iov = &aiov;
 1442         auio.uio_iovcnt = 1;
 1443         auio.uio_offset = 0;
 1444         auio.uio_rw = UIO_READ;
 1445         auio.uio_resid = aiov.iov_len;
 1446         UIO_SETUP_SYSSPACE(&auio);
 1447         vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
 1448         error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
 1449         if (error == 0) {
 1450                 /*
 1451                  * Because vnd does all IO directly through the vnode
 1452                  * we need to flush (at least) the buffer from the above
 1453                  * VOP_READ from the buffer cache to prevent cache
 1454                  * incoherencies.  Also, be careful to write dirty
 1455                  * buffers back to stable storage.
 1456                  */
 1457                 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred,
 1458                             curlwp, 0, 0);
 1459         }
 1460         VOP_UNLOCK(vnd->sc_vp, 0);
 1461 
 1462         free(tmpbuf, M_TEMP);
 1463         return (error);
 1464 }
 1465 
 1466 /*
 1467  * Set maxactive based on FS type
 1468  */
 1469 static void
 1470 vndthrottle(struct vnd_softc *vnd, struct vnode *vp)
 1471 {
 1472 #ifdef NFS
 1473         extern int (**nfsv2_vnodeop_p)(void *);
 1474 
 1475         if (vp->v_op == nfsv2_vnodeop_p)
 1476                 vnd->sc_maxactive = 2;
 1477         else
 1478 #endif
 1479                 vnd->sc_maxactive = 8;
 1480 
 1481         if (vnd->sc_maxactive < 1)
 1482                 vnd->sc_maxactive = 1;
 1483 }
 1484 
 1485 #if 0
 1486 static void
 1487 vndshutdown(void)
 1488 {
 1489         struct vnd_softc *vnd;
 1490 
 1491         for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
 1492                 if (vnd->sc_flags & VNF_INITED)
 1493                         vndclear(vnd);
 1494 }
 1495 #endif
 1496 
 1497 static void
 1498 vndclear(struct vnd_softc *vnd, int myminor)
 1499 {
 1500         struct vnode *vp = vnd->sc_vp;
 1501         struct lwp *l = curlwp;
 1502         int fflags = FREAD;
 1503         int bmaj, cmaj, i, mn;
 1504         int s;
 1505 
 1506 #ifdef DEBUG
 1507         if (vnddebug & VDB_FOLLOW)
 1508                 printf("vndclear(%p): vp %p\n", vnd, vp);
 1509 #endif
 1510         /* locate the major number */
 1511         bmaj = bdevsw_lookup_major(&vnd_bdevsw);
 1512         cmaj = cdevsw_lookup_major(&vnd_cdevsw);
 1513 
 1514         /* Nuke the vnodes for any open instances */
 1515         for (i = 0; i < MAXPARTITIONS; i++) {
 1516                 mn = DISKMINOR(device_unit(&vnd->sc_dev), i);
 1517                 vdevgone(bmaj, mn, mn, VBLK);
 1518                 if (mn != myminor) /* XXX avoid to kill own vnode */
 1519                         vdevgone(cmaj, mn, mn, VCHR);
 1520         }
 1521 
 1522         if ((vnd->sc_flags & VNF_READONLY) == 0)
 1523                 fflags |= FWRITE;
 1524 
 1525         s = splbio();
 1526         bufq_drain(vnd->sc_tab);
 1527         splx(s);
 1528 
 1529         vnd->sc_flags |= VNF_VUNCONF;
 1530         wakeup(&vnd->sc_tab);
 1531         while (vnd->sc_flags & VNF_KTHREAD)
 1532                 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0);
 1533 
 1534 #ifdef VND_COMPRESSION
 1535         /* free the compressed file buffers */
 1536         if(vnd->sc_flags & VNF_COMP) {
 1537                 if(vnd->sc_comp_offsets) {
 1538                         free(vnd->sc_comp_offsets, M_DEVBUF);
 1539                         vnd->sc_comp_offsets = NULL;
 1540                 }
 1541                 if(vnd->sc_comp_buff) {
 1542                         free(vnd->sc_comp_buff, M_DEVBUF);
 1543                         vnd->sc_comp_buff = NULL;
 1544                 }
 1545                 if(vnd->sc_comp_decombuf) {
 1546                         free(vnd->sc_comp_decombuf, M_DEVBUF);
 1547                         vnd->sc_comp_decombuf = NULL;
 1548                 }
 1549         }
 1550 #endif /* VND_COMPRESSION */
 1551         vnd->sc_flags &=
 1552             ~(VNF_INITED | VNF_READONLY | VNF_VLABEL
 1553               | VNF_VUNCONF | VNF_COMP);
 1554         if (vp == (struct vnode *)0)
 1555                 panic("vndclear: null vp");
 1556         (void) vn_close(vp, fflags, vnd->sc_cred, l);
 1557         kauth_cred_free(vnd->sc_cred);
 1558         vnd->sc_vp = (struct vnode *)0;
 1559         vnd->sc_cred = (kauth_cred_t)0;
 1560         vnd->sc_size = 0;
 1561 }
 1562 
 1563 static int
 1564 vndsize(dev_t dev)
 1565 {
 1566         struct vnd_softc *sc;
 1567         struct disklabel *lp;
 1568         int part, unit, omask;
 1569         int size;
 1570 
 1571         unit = vndunit(dev);
 1572         sc = (struct vnd_softc *)device_lookup(&vnd_cd, unit);
 1573         if (sc == NULL)
 1574                 return -1;
 1575 
 1576         if ((sc->sc_flags & VNF_INITED) == 0)
 1577                 return (-1);
 1578 
 1579         part = DISKPART(dev);
 1580         omask = sc->sc_dkdev.dk_openmask & (1 << part);
 1581         lp = sc->sc_dkdev.dk_label;
 1582 
 1583         if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp))     /* XXX */
 1584                 return (-1);
 1585 
 1586         if (lp->d_partitions[part].p_fstype != FS_SWAP)
 1587                 size = -1;
 1588         else
 1589                 size = lp->d_partitions[part].p_size *
 1590                     (lp->d_secsize / DEV_BSIZE);
 1591 
 1592         if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp))    /* XXX */
 1593                 return (-1);
 1594 
 1595         return (size);
 1596 }
 1597 
 1598 static int
 1599 vnddump(dev_t dev, daddr_t blkno, caddr_t va,
 1600     size_t size)
 1601 {
 1602 
 1603         /* Not implemented. */
 1604         return ENXIO;
 1605 }
 1606 
 1607 static void
 1608 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp)
 1609 {
 1610         struct vndgeom *vng = &sc->sc_geom;
 1611         struct partition *pp;
 1612 
 1613         memset(lp, 0, sizeof(*lp));
 1614 
 1615         lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE);
 1616         lp->d_secsize = vng->vng_secsize;
 1617         lp->d_nsectors = vng->vng_nsectors;
 1618         lp->d_ntracks = vng->vng_ntracks;
 1619         lp->d_ncylinders = vng->vng_ncylinders;
 1620         lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
 1621 
 1622         strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename));
 1623         lp->d_type = DTYPE_VND;
 1624         strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
 1625         lp->d_rpm = 3600;
 1626         lp->d_interleave = 1;
 1627         lp->d_flags = 0;
 1628 
 1629         pp = &lp->d_partitions[RAW_PART];
 1630         pp->p_offset = 0;
 1631         pp->p_size = lp->d_secperunit;
 1632         pp->p_fstype = FS_UNUSED;
 1633         lp->d_npartitions = RAW_PART + 1;
 1634 
 1635         lp->d_magic = DISKMAGIC;
 1636         lp->d_magic2 = DISKMAGIC;
 1637         lp->d_checksum = dkcksum(lp);
 1638 }
 1639 
 1640 /*
 1641  * Read the disklabel from a vnd.  If one is not present, create a fake one.
 1642  */
 1643 static void
 1644 vndgetdisklabel(dev_t dev, struct vnd_softc *sc)
 1645 {
 1646         const char *errstring;
 1647         struct disklabel *lp = sc->sc_dkdev.dk_label;
 1648         struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel;
 1649         int i;
 1650 
 1651         memset(clp, 0, sizeof(*clp));
 1652 
 1653         vndgetdefaultlabel(sc, lp);
 1654 
 1655         /*
 1656          * Call the generic disklabel extraction routine.
 1657          */
 1658         errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp);
 1659         if (errstring) {
 1660                 /*
 1661                  * Lack of disklabel is common, but we print the warning
 1662                  * anyway, since it might contain other useful information.
 1663                  */
 1664                 printf("%s: %s\n", sc->sc_dev.dv_xname, errstring);
 1665 
 1666                 /*
 1667                  * For historical reasons, if there's no disklabel
 1668                  * present, all partitions must be FS_BSDFFS and
 1669                  * occupy the entire disk.
 1670                  */
 1671                 for (i = 0; i < MAXPARTITIONS; i++) {
 1672                         /*
 1673                          * Don't wipe out port specific hack (such as
 1674                          * dos partition hack of i386 port).
 1675                          */
 1676                         if (lp->d_partitions[i].p_size != 0)
 1677                                 continue;
 1678 
 1679                         lp->d_partitions[i].p_size = lp->d_secperunit;
 1680                         lp->d_partitions[i].p_offset = 0;
 1681                         lp->d_partitions[i].p_fstype = FS_BSDFFS;
 1682                 }
 1683 
 1684                 strncpy(lp->d_packname, "default label",
 1685                     sizeof(lp->d_packname));
 1686 
 1687                 lp->d_npartitions = MAXPARTITIONS;
 1688                 lp->d_checksum = dkcksum(lp);
 1689         }
 1690 
 1691         /* In-core label now valid. */
 1692         sc->sc_flags |= VNF_VLABEL;
 1693 }
 1694 
 1695 /*
 1696  * Wait interruptibly for an exclusive lock.
 1697  *
 1698  * XXX
 1699  * Several drivers do this; it should be abstracted and made MP-safe.
 1700  */
 1701 static int
 1702 vndlock(struct vnd_softc *sc)
 1703 {
 1704         int error;
 1705 
 1706         while ((sc->sc_flags & VNF_LOCKED) != 0) {
 1707                 sc->sc_flags |= VNF_WANTED;
 1708                 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
 1709                         return (error);
 1710         }
 1711         sc->sc_flags |= VNF_LOCKED;
 1712         return (0);
 1713 }
 1714 
 1715 /*
 1716  * Unlock and wake up any waiters.
 1717  */
 1718 static void
 1719 vndunlock(struct vnd_softc *sc)
 1720 {
 1721 
 1722         sc->sc_flags &= ~VNF_LOCKED;
 1723         if ((sc->sc_flags & VNF_WANTED) != 0) {
 1724                 sc->sc_flags &= ~VNF_WANTED;
 1725                 wakeup(sc);
 1726         }
 1727 }
 1728 
 1729 #ifdef VND_COMPRESSION
 1730 /* compressed file read */
 1731 static void
 1732 compstrategy(struct buf *bp, off_t bn)
 1733 {
 1734         int error;
 1735         int unit = vndunit(bp->b_dev);
 1736         struct vnd_softc *vnd =
 1737             (struct vnd_softc *)device_lookup(&vnd_cd, unit);
 1738         u_int32_t comp_block;
 1739         struct uio auio;
 1740         caddr_t addr;
 1741         int s;
 1742 
 1743         /* set up constants for data move */
 1744         auio.uio_rw = UIO_READ;
 1745         UIO_SETUP_SYSSPACE(&auio);
 1746 
 1747         /* read, and transfer the data */
 1748         addr = bp->b_data;
 1749         bp->b_resid = bp->b_bcount;
 1750         s = splbio();
 1751         while (bp->b_resid > 0) {
 1752                 unsigned length;
 1753                 size_t length_in_buffer;
 1754                 u_int32_t offset_in_buffer;
 1755                 struct iovec aiov;
 1756 
 1757                 /* calculate the compressed block number */
 1758                 comp_block = bn / (off_t)vnd->sc_comp_blksz;
 1759 
 1760                 /* check for good block number */
 1761                 if (comp_block >= vnd->sc_comp_numoffs) {
 1762                         bp->b_error = EINVAL;
 1763                         bp->b_flags |= B_ERROR;
 1764                         splx(s);
 1765                         return;
 1766                 }
 1767 
 1768                 /* read in the compressed block, if not in buffer */
 1769                 if (comp_block != vnd->sc_comp_buffblk) {
 1770                         length = vnd->sc_comp_offsets[comp_block + 1] -
 1771                             vnd->sc_comp_offsets[comp_block];
 1772                         vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
 1773                         error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff,
 1774                             length, vnd->sc_comp_offsets[comp_block],
 1775                             UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL);
 1776                         if (error) {
 1777                                 bp->b_error = error;
 1778                                 bp->b_flags |= B_ERROR;
 1779                                 VOP_UNLOCK(vnd->sc_vp, 0);
 1780                                 splx(s);
 1781                                 return;
 1782                         }
 1783                         /* uncompress the buffer */
 1784                         vnd->sc_comp_stream.next_in = vnd->sc_comp_buff;
 1785                         vnd->sc_comp_stream.avail_in = length;
 1786                         vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf;
 1787                         vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz;
 1788                         inflateReset(&vnd->sc_comp_stream);
 1789                         error = inflate(&vnd->sc_comp_stream, Z_FINISH);
 1790                         if (error != Z_STREAM_END) {
 1791                                 if (vnd->sc_comp_stream.msg)
 1792                                         printf("%s: compressed file, %s\n",
 1793                                             vnd->sc_dev.dv_xname,
 1794                                             vnd->sc_comp_stream.msg);
 1795                                 bp->b_error = EBADMSG;
 1796                                 bp->b_flags |= B_ERROR;
 1797                                 VOP_UNLOCK(vnd->sc_vp, 0);
 1798                                 splx(s);
 1799                                 return;
 1800                         }
 1801                         vnd->sc_comp_buffblk = comp_block;
 1802                         VOP_UNLOCK(vnd->sc_vp, 0);
 1803                 }
 1804 
 1805                 /* transfer the usable uncompressed data */
 1806                 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz;
 1807                 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer;
 1808                 if (length_in_buffer > bp->b_resid)
 1809                         length_in_buffer = bp->b_resid;
 1810                 auio.uio_iov = &aiov;
 1811                 auio.uio_iovcnt = 1;
 1812                 aiov.iov_base = addr;
 1813                 aiov.iov_len = length_in_buffer;
 1814                 auio.uio_resid = aiov.iov_len;
 1815                 auio.uio_offset = 0;
 1816                 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer,
 1817                     length_in_buffer, &auio);
 1818                 if (error) {
 1819                         bp->b_error = error;
 1820                         bp->b_flags |= B_ERROR;
 1821                         splx(s);
 1822                         return;
 1823                 }
 1824 
 1825                 bn += length_in_buffer;
 1826                 addr += length_in_buffer;
 1827                 bp->b_resid -= length_in_buffer;
 1828         }
 1829         splx(s);
 1830 }
 1831 
 1832 /* compression memory allocation routines */
 1833 static void *
 1834 vnd_alloc(void *aux, u_int items, u_int siz)
 1835 {
 1836         return malloc(items * siz, M_TEMP, M_NOWAIT);
 1837 }
 1838 
 1839 static void
 1840 vnd_free(void *aux, void *ptr)
 1841 {
 1842         free(ptr, M_TEMP);
 1843 }
 1844 #endif /* VND_COMPRESSION */
 1845 
 1846 static void
 1847 vnd_set_properties(struct vnd_softc *vnd)
 1848 {
 1849         prop_dictionary_t disk_info, odisk_info, geom;
 1850 
 1851         disk_info = prop_dictionary_create();
 1852 
 1853         geom = prop_dictionary_create();
 1854 
 1855         prop_dictionary_set_uint64(geom, "sectors-per-unit",
 1856             vnd->sc_geom.vng_nsectors * vnd->sc_geom.vng_ntracks *
 1857             vnd->sc_geom.vng_ncylinders);
 1858 
 1859         prop_dictionary_set_uint32(geom, "sector-size",
 1860             vnd->sc_geom.vng_secsize);
 1861 
 1862         prop_dictionary_set_uint16(geom, "sectors-per-track",
 1863             vnd->sc_geom.vng_nsectors);
 1864 
 1865         prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
 1866             vnd->sc_geom.vng_ntracks);
 1867 
 1868         prop_dictionary_set_uint64(geom, "cylinders-per-unit",
 1869             vnd->sc_geom.vng_ncylinders);
 1870 
 1871         prop_dictionary_set(disk_info, "geometry", geom);
 1872         prop_object_release(geom);
 1873 
 1874         prop_dictionary_set(device_properties(&vnd->sc_dev),
 1875             "disk-info", disk_info);
 1876 
 1877         /*
 1878          * Don't release disk_info here; we keep a reference to it.
 1879          * disk_detach() will release it when we go away.
 1880          */
 1881 
 1882         odisk_info = vnd->sc_dkdev.dk_info;
 1883         vnd->sc_dkdev.dk_info = disk_info;
 1884         if (odisk_info)
 1885                 prop_object_release(odisk_info);
 1886 }

Cache object: 02902177f4cd61497d500bcac115f36e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.