The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/dev/vnd.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: vnd.c,v 1.187.4.4 2010/01/30 19:00:46 snj Exp $        */
    2 
    3 /*-
    4  * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  *
   19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   29  * POSSIBILITY OF SUCH DAMAGE.
   30  */
   31 
   32 /*
   33  * Copyright (c) 1990, 1993
   34  *      The Regents of the University of California.  All rights reserved.
   35  *
   36  * This code is derived from software contributed to Berkeley by
   37  * the Systems Programming Group of the University of Utah Computer
   38  * Science Department.
   39  *
   40  * Redistribution and use in source and binary forms, with or without
   41  * modification, are permitted provided that the following conditions
   42  * are met:
   43  * 1. Redistributions of source code must retain the above copyright
   44  *    notice, this list of conditions and the following disclaimer.
   45  * 2. Redistributions in binary form must reproduce the above copyright
   46  *    notice, this list of conditions and the following disclaimer in the
   47  *    documentation and/or other materials provided with the distribution.
   48  * 3. Neither the name of the University nor the names of its contributors
   49  *    may be used to endorse or promote products derived from this software
   50  *    without specific prior written permission.
   51  *
   52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   62  * SUCH DAMAGE.
   63  *
   64  * from: Utah $Hdr: vn.c 1.13 94/04/02$
   65  *
   66  *      @(#)vn.c        8.9 (Berkeley) 5/14/95
   67  */
   68 
   69 /*
   70  * Copyright (c) 1988 University of Utah.
   71  *
   72  * This code is derived from software contributed to Berkeley by
   73  * the Systems Programming Group of the University of Utah Computer
   74  * Science Department.
   75  *
   76  * Redistribution and use in source and binary forms, with or without
   77  * modification, are permitted provided that the following conditions
   78  * are met:
   79  * 1. Redistributions of source code must retain the above copyright
   80  *    notice, this list of conditions and the following disclaimer.
   81  * 2. Redistributions in binary form must reproduce the above copyright
   82  *    notice, this list of conditions and the following disclaimer in the
   83  *    documentation and/or other materials provided with the distribution.
   84  * 3. All advertising materials mentioning features or use of this software
   85  *    must display the following acknowledgement:
   86  *      This product includes software developed by the University of
   87  *      California, Berkeley and its contributors.
   88  * 4. Neither the name of the University nor the names of its contributors
   89  *    may be used to endorse or promote products derived from this software
   90  *    without specific prior written permission.
   91  *
   92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  102  * SUCH DAMAGE.
  103  *
  104  * from: Utah $Hdr: vn.c 1.13 94/04/02$
  105  *
  106  *      @(#)vn.c        8.9 (Berkeley) 5/14/95
  107  */
  108 
  109 /*
  110  * Vnode disk driver.
  111  *
  112  * Block/character interface to a vnode.  Allows one to treat a file
  113  * as a disk (e.g. build a filesystem in it, mount it, etc.).
  114  *
  115  * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations,
  116  * this uses them to avoid distorting the local buffer cache.  If those
  117  * block-level operations are not available, this falls back to the regular
  118  * read and write calls.  Using these may distort the cache in some cases
  119  * but better have the driver working than preventing it to work on file
  120  * systems where the block-level operations are not implemented for
  121  * whatever reason.
  122  *
  123  * NOTE 2: There is a security issue involved with this driver.
  124  * Once mounted all access to the contents of the "mapped" file via
  125  * the special file is controlled by the permissions on the special
  126  * file, the protection of the mapped file is ignored (effectively,
  127  * by using root credentials in all transactions).
  128  *
  129  * NOTE 3: Doesn't interact with leases, should it?
  130  */
  131 
  132 #include <sys/cdefs.h>
  133 __KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.187.4.4 2010/01/30 19:00:46 snj Exp $");
  134 
  135 #if defined(_KERNEL_OPT)
  136 #include "fs_nfs.h"
  137 #include "opt_vnd.h"
  138 #endif
  139 
  140 #include <sys/param.h>
  141 #include <sys/systm.h>
  142 #include <sys/namei.h>
  143 #include <sys/proc.h>
  144 #include <sys/kthread.h>
  145 #include <sys/errno.h>
  146 #include <sys/buf.h>
  147 #include <sys/bufq.h>
  148 #include <sys/malloc.h>
  149 #include <sys/ioctl.h>
  150 #include <sys/disklabel.h>
  151 #include <sys/device.h>
  152 #include <sys/disk.h>
  153 #include <sys/stat.h>
  154 #include <sys/mount.h>
  155 #include <sys/vnode.h>
  156 #include <sys/file.h>
  157 #include <sys/uio.h>
  158 #include <sys/conf.h>
  159 #include <sys/kauth.h>
  160 
  161 #include <net/zlib.h>
  162 
  163 #include <miscfs/genfs/genfs.h>
  164 #include <miscfs/specfs/specdev.h>
  165 
  166 #include <dev/vndvar.h>
  167 
  168 #include <prop/proplib.h>
  169 
  170 #if defined(VNDDEBUG) && !defined(DEBUG)
  171 #define DEBUG
  172 #endif
  173 
  174 #ifdef DEBUG
  175 int dovndcluster = 1;
  176 #define VDB_FOLLOW      0x01
  177 #define VDB_INIT        0x02
  178 #define VDB_IO          0x04
  179 #define VDB_LABEL       0x08
  180 int vnddebug = 0x00;
  181 #endif
  182 
  183 #define vndunit(x)      DISKUNIT(x)
  184 
  185 struct vndxfer {
  186         struct buf vx_buf;
  187         struct vnd_softc *vx_vnd;
  188 };
  189 #define VND_BUFTOXFER(bp)       ((struct vndxfer *)(void *)bp)
  190 
  191 #define VND_GETXFER(vnd)        pool_get(&(vnd)->sc_vxpool, PR_WAITOK)
  192 #define VND_PUTXFER(vnd, vx)    pool_put(&(vnd)->sc_vxpool, (vx))
  193 
  194 #define VNDLABELDEV(dev) \
  195     (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART))
  196 
  197 /* called by main() at boot time (XXX: and the LKM driver) */
  198 void    vndattach(int);
  199 
  200 static void     vndclear(struct vnd_softc *, int);
  201 static int      vndsetcred(struct vnd_softc *, kauth_cred_t);
  202 static void     vndthrottle(struct vnd_softc *, struct vnode *);
  203 static void     vndiodone(struct buf *);
  204 #if 0
  205 static void     vndshutdown(void);
  206 #endif
  207 
  208 static void     vndgetdefaultlabel(struct vnd_softc *, struct disklabel *);
  209 static void     vndgetdisklabel(dev_t, struct vnd_softc *);
  210 
  211 static int      vndlock(struct vnd_softc *);
  212 static void     vndunlock(struct vnd_softc *);
  213 #ifdef VND_COMPRESSION
  214 static void     compstrategy(struct buf *, off_t);
  215 static void     *vnd_alloc(void *, u_int, u_int);
  216 static void     vnd_free(void *, void *);
  217 #endif /* VND_COMPRESSION */
  218 
  219 static void     vndthread(void *);
  220 static bool     vnode_has_op(const struct vnode *, int);
  221 static void     handle_with_rdwr(struct vnd_softc *, const struct buf *,
  222                     struct buf *);
  223 static void     handle_with_strategy(struct vnd_softc *, const struct buf *,
  224                     struct buf *);
  225 static void     vnd_set_properties(struct vnd_softc *);
  226 
  227 static dev_type_open(vndopen);
  228 static dev_type_close(vndclose);
  229 static dev_type_read(vndread);
  230 static dev_type_write(vndwrite);
  231 static dev_type_ioctl(vndioctl);
  232 static dev_type_strategy(vndstrategy);
  233 static dev_type_dump(vnddump);
  234 static dev_type_size(vndsize);
  235 
  236 const struct bdevsw vnd_bdevsw = {
  237         vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK
  238 };
  239 
  240 const struct cdevsw vnd_cdevsw = {
  241         vndopen, vndclose, vndread, vndwrite, vndioctl,
  242         nostop, notty, nopoll, nommap, nokqfilter, D_DISK
  243 };
  244 
  245 static int      vnd_match(device_t, cfdata_t, void *);
  246 static void     vnd_attach(device_t, device_t, void *);
  247 static int      vnd_detach(device_t, int);
  248 
  249 CFATTACH_DECL_NEW(vnd, sizeof(struct vnd_softc),
  250     vnd_match, vnd_attach, vnd_detach, NULL);
  251 extern struct cfdriver vnd_cd;
  252 
  253 static struct vnd_softc *vnd_spawn(int);
  254 int     vnd_destroy(device_t);
  255 
  256 void
  257 vndattach(int num)
  258 {
  259         int error;
  260 
  261         error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca);
  262         if (error)
  263                 aprint_error("%s: unable to register cfattach\n",
  264                     vnd_cd.cd_name);
  265 }
  266 
  267 static int
  268 vnd_match(device_t self, cfdata_t cfdata, void *aux)
  269 {
  270 
  271         return 1;
  272 }
  273 
  274 static void
  275 vnd_attach(device_t parent, device_t self, void *aux)
  276 {
  277         struct vnd_softc *sc = device_private(self);
  278 
  279         sc->sc_dev = self;
  280         sc->sc_comp_offsets = NULL;
  281         sc->sc_comp_buff = NULL;
  282         sc->sc_comp_decombuf = NULL;
  283         bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK);
  284         disk_init(&sc->sc_dkdev, device_xname(self), NULL);
  285         if (!pmf_device_register(self, NULL, NULL))
  286                 aprint_error_dev(self, "couldn't establish power handler\n");
  287 }
  288 
  289 static int
  290 vnd_detach(device_t self, int flags)
  291 {
  292         struct vnd_softc *sc = device_private(self);
  293         if (sc->sc_flags & VNF_INITED)
  294                 return EBUSY;
  295 
  296         pmf_device_deregister(self);
  297         bufq_free(sc->sc_tab);
  298         disk_destroy(&sc->sc_dkdev);
  299 
  300         return 0;
  301 }
  302 
  303 static struct vnd_softc *
  304 vnd_spawn(int unit)
  305 {
  306         struct cfdata *cf;
  307 
  308         cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK);
  309         cf->cf_name = vnd_cd.cd_name;
  310         cf->cf_atname = vnd_cd.cd_name;
  311         cf->cf_unit = unit;
  312         cf->cf_fstate = FSTATE_STAR;
  313 
  314         return device_private(config_attach_pseudo(cf));
  315 }
  316 
  317 int
  318 vnd_destroy(device_t dev)
  319 {
  320         int error;
  321         cfdata_t cf;
  322 
  323         cf = device_cfdata(dev);
  324         error = config_detach(dev, DETACH_QUIET);
  325         if (error)
  326                 return error;
  327         free(cf, M_DEVBUF);
  328         return 0;
  329 }
  330 
  331 static int
  332 vndopen(dev_t dev, int flags, int mode, struct lwp *l)
  333 {
  334         int unit = vndunit(dev);
  335         struct vnd_softc *sc;
  336         int error = 0, part, pmask;
  337         struct disklabel *lp;
  338 
  339 #ifdef DEBUG
  340         if (vnddebug & VDB_FOLLOW)
  341                 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
  342 #endif
  343         sc = device_lookup_private(&vnd_cd, unit);
  344         if (sc == NULL) {
  345                 sc = vnd_spawn(unit);
  346                 if (sc == NULL)
  347                         return ENOMEM;
  348         }
  349 
  350         if ((error = vndlock(sc)) != 0)
  351                 return (error);
  352 
  353         lp = sc->sc_dkdev.dk_label;
  354 
  355         part = DISKPART(dev);
  356         pmask = (1 << part);
  357 
  358         /*
  359          * If we're initialized, check to see if there are any other
  360          * open partitions.  If not, then it's safe to update the
  361          * in-core disklabel.  Only read the disklabel if it is
  362          * not already valid.
  363          */
  364         if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED &&
  365             sc->sc_dkdev.dk_openmask == 0)
  366                 vndgetdisklabel(dev, sc);
  367 
  368         /* Check that the partitions exists. */
  369         if (part != RAW_PART) {
  370                 if (((sc->sc_flags & VNF_INITED) == 0) ||
  371                     ((part >= lp->d_npartitions) ||
  372                      (lp->d_partitions[part].p_fstype == FS_UNUSED))) {
  373                         error = ENXIO;
  374                         goto done;
  375                 }
  376         }
  377 
  378         /* Prevent our unit from being unconfigured while open. */
  379         switch (mode) {
  380         case S_IFCHR:
  381                 sc->sc_dkdev.dk_copenmask |= pmask;
  382                 break;
  383 
  384         case S_IFBLK:
  385                 sc->sc_dkdev.dk_bopenmask |= pmask;
  386                 break;
  387         }
  388         sc->sc_dkdev.dk_openmask =
  389             sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
  390 
  391  done:
  392         vndunlock(sc);
  393         return (error);
  394 }
  395 
  396 static int
  397 vndclose(dev_t dev, int flags, int mode, struct lwp *l)
  398 {
  399         int unit = vndunit(dev);
  400         struct vnd_softc *sc;
  401         int error = 0, part;
  402 
  403 #ifdef DEBUG
  404         if (vnddebug & VDB_FOLLOW)
  405                 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l);
  406 #endif
  407         sc = device_lookup_private(&vnd_cd, unit);
  408         if (sc == NULL)
  409                 return ENXIO;
  410 
  411         if ((error = vndlock(sc)) != 0)
  412                 return (error);
  413 
  414         part = DISKPART(dev);
  415 
  416         /* ...that much closer to allowing unconfiguration... */
  417         switch (mode) {
  418         case S_IFCHR:
  419                 sc->sc_dkdev.dk_copenmask &= ~(1 << part);
  420                 break;
  421 
  422         case S_IFBLK:
  423                 sc->sc_dkdev.dk_bopenmask &= ~(1 << part);
  424                 break;
  425         }
  426         sc->sc_dkdev.dk_openmask =
  427             sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask;
  428 
  429         vndunlock(sc);
  430 
  431         if ((sc->sc_flags & VNF_INITED) == 0) {
  432                 if ((error = vnd_destroy(sc->sc_dev)) != 0) {
  433                         aprint_error_dev(sc->sc_dev,
  434                             "unable to detach instance\n");
  435                         return error;
  436                 }
  437         }
  438 
  439         return (0);
  440 }
  441 
  442 /*
  443  * Queue the request, and wakeup the kernel thread to handle it.
  444  */
  445 static void
  446 vndstrategy(struct buf *bp)
  447 {
  448         int unit = vndunit(bp->b_dev);
  449         struct vnd_softc *vnd =
  450             device_lookup_private(&vnd_cd, unit);
  451         struct disklabel *lp;
  452         daddr_t blkno;
  453         int s = splbio();
  454 
  455         if (vnd == NULL) {
  456                 bp->b_error = ENXIO;
  457                 goto done;
  458         }
  459         lp = vnd->sc_dkdev.dk_label;
  460 
  461         if ((vnd->sc_flags & VNF_INITED) == 0) {
  462                 bp->b_error = ENXIO;
  463                 goto done;
  464         }
  465 
  466         /*
  467          * The transfer must be a whole number of blocks.
  468          */
  469         if ((bp->b_bcount % lp->d_secsize) != 0) {
  470                 bp->b_error = EINVAL;
  471                 goto done;
  472         }
  473 
  474         /*
  475          * check if we're read-only.
  476          */
  477         if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) {
  478                 bp->b_error = EACCES;
  479                 goto done;
  480         }
  481 
  482         /* If it's a nil transfer, wake up the top half now. */
  483         if (bp->b_bcount == 0) {
  484                 goto done;
  485         }
  486 
  487         /*
  488          * Do bounds checking and adjust transfer.  If there's an error,
  489          * the bounds check will flag that for us.
  490          */
  491         if (DISKPART(bp->b_dev) == RAW_PART) {
  492                 if (bounds_check_with_mediasize(bp, DEV_BSIZE,
  493                     vnd->sc_size) <= 0)
  494                         goto done;
  495         } else {
  496                 if (bounds_check_with_label(&vnd->sc_dkdev,
  497                     bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0)
  498                         goto done;
  499         }
  500 
  501         /*
  502          * Put the block number in terms of the logical blocksize
  503          * of the "device".
  504          */
  505 
  506         blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE);
  507 
  508         /*
  509          * Translate the partition-relative block number to an absolute.
  510          */
  511         if (DISKPART(bp->b_dev) != RAW_PART) {
  512                 struct partition *pp;
  513 
  514                 pp = &vnd->sc_dkdev.dk_label->d_partitions[
  515                     DISKPART(bp->b_dev)];
  516                 blkno += pp->p_offset;
  517         }
  518         bp->b_rawblkno = blkno;
  519 
  520 #ifdef DEBUG
  521         if (vnddebug & VDB_FOLLOW)
  522                 printf("vndstrategy(%p): unit %d\n", bp, unit);
  523 #endif
  524         BUFQ_PUT(vnd->sc_tab, bp);
  525         wakeup(&vnd->sc_tab);
  526         splx(s);
  527         return;
  528 
  529 done:
  530         bp->b_resid = bp->b_bcount;
  531         biodone(bp);
  532         splx(s);
  533 }
  534 
  535 static bool
  536 vnode_has_strategy(struct vnd_softc *vnd)
  537 {
  538         return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) &&
  539             vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy));
  540 }
  541 
  542 /* XXX this function needs a reliable check to detect
  543  * sparse files. Otherwise, bmap/strategy may be used
  544  * and fail on non-allocated blocks. VOP_READ/VOP_WRITE
  545  * works on sparse files.
  546  */
  547 #if notyet
  548 static bool
  549 vnode_strategy_probe(struct vnd_softc *vnd)
  550 {
  551         int error;
  552         daddr_t nbn;
  553 
  554         if (!vnode_has_strategy(vnd))
  555                 return false;
  556 
  557         /* Convert the first logical block number to its
  558          * physical block number.
  559          */
  560         error = 0;
  561         vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
  562         error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL);
  563         VOP_UNLOCK(vnd->sc_vp, 0);
  564 
  565         /* Test if that worked. */
  566         if (error == 0 && (long)nbn == -1)
  567                 return false;
  568 
  569         return true;
  570 }
  571 #endif
  572 
  573 static void
  574 vndthread(void *arg)
  575 {
  576         struct vnd_softc *vnd = arg;
  577         bool usestrategy;
  578         int s;
  579 
  580         /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to
  581          * directly access the backing vnode.  If we can, use these two
  582          * operations to avoid messing with the local buffer cache.
  583          * Otherwise fall back to regular VOP_READ/VOP_WRITE operations
  584          * which are guaranteed to work with any file system. */
  585         usestrategy = vnode_has_strategy(vnd);
  586 
  587 #ifdef DEBUG
  588         if (vnddebug & VDB_INIT)
  589                 printf("vndthread: vp %p, %s\n", vnd->sc_vp,
  590                     usestrategy ?
  591                     "using bmap/strategy operations" :
  592                     "using read/write operations");
  593 #endif
  594 
  595         s = splbio();
  596         vnd->sc_flags |= VNF_KTHREAD;
  597         wakeup(&vnd->sc_kthread);
  598 
  599         /*
  600          * Dequeue requests and serve them depending on the available
  601          * vnode operations.
  602          */
  603         while ((vnd->sc_flags & VNF_VUNCONF) == 0) {
  604                 struct vndxfer *vnx;
  605                 int flags;
  606                 struct buf *obp;
  607                 struct buf *bp;
  608 
  609                 obp = BUFQ_GET(vnd->sc_tab);
  610                 if (obp == NULL) {
  611                         tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0);
  612                         continue;
  613                 };
  614                 splx(s);
  615                 flags = obp->b_flags;
  616 #ifdef DEBUG
  617                 if (vnddebug & VDB_FOLLOW)
  618                         printf("vndthread(%p)\n", obp);
  619 #endif
  620 
  621                 if (vnd->sc_vp->v_mount == NULL) {
  622                         obp->b_error = ENXIO;
  623                         goto done;
  624                 }
  625 #ifdef VND_COMPRESSION
  626                 /* handle a compressed read */
  627                 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) {
  628                         off_t bn;
  629                         
  630                         /* Convert to a byte offset within the file. */
  631                         bn = obp->b_rawblkno *
  632                             vnd->sc_dkdev.dk_label->d_secsize;
  633 
  634                         compstrategy(obp, bn);
  635                         goto done;
  636                 }
  637 #endif /* VND_COMPRESSION */
  638                 
  639                 /*
  640                  * Allocate a header for this transfer and link it to the
  641                  * buffer
  642                  */
  643                 s = splbio();
  644                 vnx = VND_GETXFER(vnd);
  645                 splx(s);
  646                 vnx->vx_vnd = vnd;
  647 
  648                 s = splbio();
  649                 while (vnd->sc_active >= vnd->sc_maxactive) {
  650                         tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0);
  651                 }
  652                 vnd->sc_active++;
  653                 splx(s);
  654 
  655                 /* Instrumentation. */
  656                 disk_busy(&vnd->sc_dkdev);
  657 
  658                 bp = &vnx->vx_buf;
  659                 buf_init(bp);
  660                 bp->b_flags = (obp->b_flags & B_READ);
  661                 bp->b_oflags = obp->b_oflags;
  662                 bp->b_cflags = obp->b_cflags;
  663                 bp->b_iodone = vndiodone;
  664                 bp->b_private = obp;
  665                 bp->b_vp = vnd->sc_vp;
  666                 bp->b_objlock = &bp->b_vp->v_interlock;
  667                 bp->b_data = obp->b_data;
  668                 bp->b_bcount = obp->b_bcount;
  669                 BIO_COPYPRIO(bp, obp);
  670 
  671                 /* Handle the request using the appropriate operations. */
  672                 if (usestrategy)
  673                         handle_with_strategy(vnd, obp, bp);
  674                 else
  675                         handle_with_rdwr(vnd, obp, bp);
  676 
  677                 s = splbio();
  678                 continue;
  679 
  680 done:
  681                 biodone(obp);
  682                 s = splbio();
  683         }
  684 
  685         vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF);
  686         wakeup(&vnd->sc_kthread);
  687         splx(s);
  688         kthread_exit(0);
  689 }
  690 
  691 /*
  692  * Checks if the given vnode supports the requested operation.
  693  * The operation is specified the offset returned by VOFFSET.
  694  *
  695  * XXX The test below used to determine this is quite fragile
  696  * because it relies on the file system to use genfs to specify
  697  * unimplemented operations.  There might be another way to do
  698  * it more cleanly.
  699  */
  700 static bool
  701 vnode_has_op(const struct vnode *vp, int opoffset)
  702 {
  703         int (*defaultp)(void *);
  704         int (*opp)(void *);
  705 
  706         defaultp = vp->v_op[VOFFSET(vop_default)];
  707         opp = vp->v_op[opoffset];
  708 
  709         return opp != defaultp && opp != genfs_eopnotsupp &&
  710             opp != genfs_badop && opp != genfs_nullop;
  711 }
  712 
  713 /*
  714  * Handes the read/write request given in 'bp' using the vnode's VOP_READ
  715  * and VOP_WRITE operations.
  716  *
  717  * 'obp' is a pointer to the original request fed to the vnd device.
  718  */
  719 static void
  720 handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp)
  721 {
  722         bool doread;
  723         off_t offset;
  724         size_t resid;
  725         struct vnode *vp;
  726 
  727         doread = bp->b_flags & B_READ;
  728         offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
  729         vp = vnd->sc_vp;
  730 
  731 #if defined(DEBUG)
  732         if (vnddebug & VDB_IO)
  733                 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64
  734                     ", secsize %d, offset %" PRIu64
  735                     ", bcount %d\n",
  736                     vp, doread ? "read" : "write", obp->b_rawblkno,
  737                     vnd->sc_dkdev.dk_label->d_secsize, offset,
  738                     bp->b_bcount);
  739 #endif
  740 
  741         /* Issue the read or write operation. */
  742         bp->b_error =
  743             vn_rdwr(doread ? UIO_READ : UIO_WRITE,
  744             vp, bp->b_data, bp->b_bcount, offset,
  745             UIO_SYSSPACE, 0, vnd->sc_cred, &resid, NULL);
  746         bp->b_resid = resid;
  747 
  748         /* We need to increase the number of outputs on the vnode if
  749          * there was any write to it. */
  750         if (!doread) {
  751                 mutex_enter(&vp->v_interlock);
  752                 vp->v_numoutput++;
  753                 mutex_exit(&vp->v_interlock);
  754         }
  755 
  756         biodone(bp);
  757 }
  758 
  759 /*
  760  * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP
  761  * and VOP_STRATEGY operations.
  762  *
  763  * 'obp' is a pointer to the original request fed to the vnd device.
  764  */
  765 static void
  766 handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp,
  767     struct buf *bp)
  768 {
  769         int bsize, error, flags, skipped;
  770         size_t resid, sz;
  771         off_t bn, offset;
  772         struct vnode *vp;
  773 
  774         flags = obp->b_flags;
  775 
  776         if (!(flags & B_READ)) {
  777                 vp = bp->b_vp;
  778                 mutex_enter(&vp->v_interlock);
  779                 vp->v_numoutput++;
  780                 mutex_exit(&vp->v_interlock);
  781         }
  782 
  783         /* convert to a byte offset within the file. */
  784         bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize;
  785 
  786         bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize;
  787         skipped = 0;
  788 
  789         /*
  790          * Break the request into bsize pieces and feed them
  791          * sequentially using VOP_BMAP/VOP_STRATEGY.
  792          * We do it this way to keep from flooding NFS servers if we
  793          * are connected to an NFS file.  This places the burden on
  794          * the client rather than the server.
  795          */
  796         error = 0;
  797         bp->b_resid = bp->b_bcount;
  798         for (offset = 0, resid = bp->b_resid; resid;
  799             resid -= sz, offset += sz) {
  800                 struct buf *nbp;
  801                 daddr_t nbn;
  802                 int off, nra;
  803 
  804                 nra = 0;
  805                 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE);
  806                 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra);
  807                 VOP_UNLOCK(vnd->sc_vp, 0);
  808 
  809                 if (error == 0 && (long)nbn == -1)
  810                         error = EIO;
  811 
  812                 /*
  813                  * If there was an error or a hole in the file...punt.
  814                  * Note that we may have to wait for any operations
  815                  * that we have already fired off before releasing
  816                  * the buffer.
  817                  *
  818                  * XXX we could deal with holes here but it would be
  819                  * a hassle (in the write case).
  820                  */
  821                 if (error) {
  822                         skipped += resid;
  823                         break;
  824                 }
  825 
  826 #ifdef DEBUG
  827                 if (!dovndcluster)
  828                         nra = 0;
  829 #endif
  830 
  831                 off = bn % bsize;
  832                 sz = MIN(((off_t)1 + nra) * bsize - off, resid);
  833 #ifdef  DEBUG
  834                 if (vnddebug & VDB_IO)
  835                         printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64
  836                             " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn,
  837                             nbn, sz);
  838 #endif
  839 
  840                 nbp = getiobuf(vp, true);
  841                 nestiobuf_setup(bp, nbp, offset, sz);
  842                 nbp->b_blkno = nbn + btodb(off);
  843 
  844 #if 0 /* XXX #ifdef DEBUG */
  845                 if (vnddebug & VDB_IO)
  846                         printf("vndstart(%ld): bp %p vp %p blkno "
  847                             "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n",
  848                             (long) (vnd-vnd_softc), &nbp->vb_buf,
  849                             nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno,
  850                             nbp->vb_buf.b_flags, nbp->vb_buf.b_data,
  851                             nbp->vb_buf.b_bcount);
  852 #endif
  853                 VOP_STRATEGY(vp, nbp);
  854                 bn += sz;
  855         }
  856         nestiobuf_done(bp, skipped, error);
  857 }
  858 
  859 static void
  860 vndiodone(struct buf *bp)
  861 {
  862         struct vndxfer *vnx = VND_BUFTOXFER(bp);
  863         struct vnd_softc *vnd = vnx->vx_vnd;
  864         struct buf *obp = bp->b_private;
  865         int s = splbio();
  866 
  867         KASSERT(&vnx->vx_buf == bp);
  868         KASSERT(vnd->sc_active > 0);
  869 #ifdef DEBUG
  870         if (vnddebug & VDB_IO) {
  871                 printf("vndiodone1: bp %p iodone: error %d\n",
  872                     bp, bp->b_error);
  873         }
  874 #endif
  875         disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid,
  876             (bp->b_flags & B_READ));
  877         vnd->sc_active--;
  878         if (vnd->sc_active == 0) {
  879                 wakeup(&vnd->sc_tab);
  880         }
  881         splx(s);
  882         obp->b_error = bp->b_error;
  883         obp->b_resid = bp->b_resid;
  884         buf_destroy(bp);
  885         VND_PUTXFER(vnd, vnx);
  886         biodone(obp);
  887 }
  888 
  889 /* ARGSUSED */
  890 static int
  891 vndread(dev_t dev, struct uio *uio, int flags)
  892 {
  893         int unit = vndunit(dev);
  894         struct vnd_softc *sc;
  895 
  896 #ifdef DEBUG
  897         if (vnddebug & VDB_FOLLOW)
  898                 printf("vndread(0x%x, %p)\n", dev, uio);
  899 #endif
  900 
  901         sc = device_lookup_private(&vnd_cd, unit);
  902         if (sc == NULL)
  903                 return ENXIO;
  904 
  905         if ((sc->sc_flags & VNF_INITED) == 0)
  906                 return (ENXIO);
  907 
  908         return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio));
  909 }
  910 
  911 /* ARGSUSED */
  912 static int
  913 vndwrite(dev_t dev, struct uio *uio, int flags)
  914 {
  915         int unit = vndunit(dev);
  916         struct vnd_softc *sc;
  917 
  918 #ifdef DEBUG
  919         if (vnddebug & VDB_FOLLOW)
  920                 printf("vndwrite(0x%x, %p)\n", dev, uio);
  921 #endif
  922 
  923         sc = device_lookup_private(&vnd_cd, unit);
  924         if (sc == NULL)
  925                 return ENXIO;
  926 
  927         if ((sc->sc_flags & VNF_INITED) == 0)
  928                 return (ENXIO);
  929 
  930         return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio));
  931 }
  932 
  933 static int
  934 vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va)
  935 {
  936         struct vnd_softc *vnd;
  937 
  938         if (*un == -1)
  939                 *un = unit;
  940         if (*un < 0)
  941                 return EINVAL;
  942 
  943         vnd = device_lookup_private(&vnd_cd, *un);
  944         if (vnd == NULL)
  945                 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1;
  946 
  947         if ((vnd->sc_flags & VNF_INITED) == 0)
  948                 return -1;
  949 
  950         return VOP_GETATTR(vnd->sc_vp, va, l->l_cred);
  951 }
  952 
  953 /* ARGSUSED */
  954 static int
  955 vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
  956 {
  957         int unit = vndunit(dev);
  958         struct vnd_softc *vnd;
  959         struct vnd_ioctl *vio;
  960         struct vattr vattr;
  961         struct nameidata nd;
  962         int error, part, pmask;
  963         size_t geomsize;
  964         int fflags;
  965 #ifdef __HAVE_OLD_DISKLABEL
  966         struct disklabel newlabel;
  967 #endif
  968 
  969 #ifdef DEBUG
  970         if (vnddebug & VDB_FOLLOW)
  971                 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n",
  972                     dev, cmd, data, flag, l->l_proc, unit);
  973 #endif
  974         vnd = device_lookup_private(&vnd_cd, unit);
  975         if (vnd == NULL &&
  976 #ifdef COMPAT_30
  977             cmd != VNDIOOCGET &&
  978 #endif
  979             cmd != VNDIOCGET)
  980                 return ENXIO;
  981         vio = (struct vnd_ioctl *)data;
  982 
  983         /* Must be open for writes for these commands... */
  984         switch (cmd) {
  985         case VNDIOCSET:
  986         case VNDIOCCLR:
  987         case DIOCSDINFO:
  988         case DIOCWDINFO:
  989 #ifdef __HAVE_OLD_DISKLABEL
  990         case ODIOCSDINFO:
  991         case ODIOCWDINFO:
  992 #endif
  993         case DIOCKLABEL:
  994         case DIOCWLABEL:
  995                 if ((flag & FWRITE) == 0)
  996                         return (EBADF);
  997         }
  998 
  999         /* Must be initialized for these... */
 1000         switch (cmd) {
 1001         case VNDIOCCLR:
 1002         case DIOCGDINFO:
 1003         case DIOCSDINFO:
 1004         case DIOCWDINFO:
 1005         case DIOCGPART:
 1006         case DIOCKLABEL:
 1007         case DIOCWLABEL:
 1008         case DIOCGDEFLABEL:
 1009         case DIOCCACHESYNC:
 1010 #ifdef __HAVE_OLD_DISKLABEL
 1011         case ODIOCGDINFO:
 1012         case ODIOCSDINFO:
 1013         case ODIOCWDINFO:
 1014         case ODIOCGDEFLABEL:
 1015 #endif
 1016                 if ((vnd->sc_flags & VNF_INITED) == 0)
 1017                         return (ENXIO);
 1018         }
 1019 
 1020         switch (cmd) {
 1021         case VNDIOCSET:
 1022                 if (vnd->sc_flags & VNF_INITED)
 1023                         return (EBUSY);
 1024 
 1025                 if ((error = vndlock(vnd)) != 0)
 1026                         return (error);
 1027 
 1028                 fflags = FREAD;
 1029                 if ((vio->vnd_flags & VNDIOF_READONLY) == 0)
 1030                         fflags |= FWRITE;
 1031                 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file);
 1032                 if ((error = vn_open(&nd, fflags, 0)) != 0)
 1033                         goto unlock_and_exit;
 1034                 KASSERT(l);
 1035                 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred);
 1036                 if (!error && nd.ni_vp->v_type != VREG)
 1037                         error = EOPNOTSUPP;
 1038                 if (error) {
 1039                         VOP_UNLOCK(nd.ni_vp, 0);
 1040                         goto close_and_exit;
 1041                 }
 1042 
 1043                 /* If using a compressed file, initialize its info */
 1044                 /* (or abort with an error if kernel has no compression) */
 1045                 if (vio->vnd_flags & VNF_COMP) {
 1046 #ifdef VND_COMPRESSION
 1047                         struct vnd_comp_header *ch;
 1048                         int i;
 1049                         u_int32_t comp_size;
 1050                         u_int32_t comp_maxsize;
 1051  
 1052                         /* allocate space for compresed file header */
 1053                         ch = malloc(sizeof(struct vnd_comp_header),
 1054                         M_TEMP, M_WAITOK);
 1055  
 1056                         /* read compressed file header */
 1057                         error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch,
 1058                           sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE,
 1059                           IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
 1060                         if(error) {
 1061                                 free(ch, M_TEMP);
 1062                                 VOP_UNLOCK(nd.ni_vp, 0);
 1063                                 goto close_and_exit;
 1064                         }
 1065  
 1066                         /* save some header info */
 1067                         vnd->sc_comp_blksz = ntohl(ch->block_size);
 1068                         /* note last offset is the file byte size */
 1069                         vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1;
 1070                         free(ch, M_TEMP);
 1071                         if (vnd->sc_comp_blksz == 0 ||
 1072                             vnd->sc_comp_blksz % DEV_BSIZE !=0) {
 1073                                 VOP_UNLOCK(nd.ni_vp, 0);
 1074                                 error = EINVAL;
 1075                                 goto close_and_exit;
 1076                         }
 1077                         if(sizeof(struct vnd_comp_header) +
 1078                           sizeof(u_int64_t) * vnd->sc_comp_numoffs >
 1079                           vattr.va_size) {
 1080                                 VOP_UNLOCK(nd.ni_vp, 0);
 1081                                 error = EINVAL;
 1082                                 goto close_and_exit;
 1083                         }
 1084  
 1085                         /* set decompressed file size */
 1086                         vattr.va_size =
 1087                             ((u_quad_t)vnd->sc_comp_numoffs - 1) *
 1088                              (u_quad_t)vnd->sc_comp_blksz;
 1089  
 1090                         /* allocate space for all the compressed offsets */
 1091                         vnd->sc_comp_offsets =
 1092                         malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs,
 1093                         M_DEVBUF, M_WAITOK);
 1094  
 1095                         /* read in the offsets */
 1096                         error = vn_rdwr(UIO_READ, nd.ni_vp,
 1097                           (void *)vnd->sc_comp_offsets,
 1098                           sizeof(u_int64_t) * vnd->sc_comp_numoffs,
 1099                           sizeof(struct vnd_comp_header), UIO_SYSSPACE,
 1100                           IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL);
 1101                         if(error) {
 1102                                 VOP_UNLOCK(nd.ni_vp, 0);
 1103                                 goto close_and_exit;
 1104                         }
 1105                         /*
 1106                          * find largest block size (used for allocation limit).
 1107                          * Also convert offset to native byte order.
 1108                          */
 1109                         comp_maxsize = 0;
 1110                         for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) {
 1111                                 vnd->sc_comp_offsets[i] =
 1112                                   be64toh(vnd->sc_comp_offsets[i]);
 1113                                 comp_size = be64toh(vnd->sc_comp_offsets[i + 1])
 1114                                   - vnd->sc_comp_offsets[i];
 1115                                 if (comp_size > comp_maxsize)
 1116                                         comp_maxsize = comp_size;
 1117                         }
 1118                         vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] =
 1119                           be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]);
 1120  
 1121                         /* create compressed data buffer */
 1122                         vnd->sc_comp_buff = malloc(comp_maxsize,
 1123                           M_DEVBUF, M_WAITOK);
 1124  
 1125                         /* create decompressed buffer */
 1126                         vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz,
 1127                           M_DEVBUF, M_WAITOK);
 1128                         vnd->sc_comp_buffblk = -1;
 1129  
 1130                         /* Initialize decompress stream */
 1131                         bzero(&vnd->sc_comp_stream, sizeof(z_stream));
 1132                         vnd->sc_comp_stream.zalloc = vnd_alloc;
 1133                         vnd->sc_comp_stream.zfree = vnd_free;
 1134                         error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS);
 1135                         if(error) {
 1136                                 if(vnd->sc_comp_stream.msg)
 1137                                         printf("vnd%d: compressed file, %s\n",
 1138                                           unit, vnd->sc_comp_stream.msg);
 1139                                 VOP_UNLOCK(nd.ni_vp, 0);
 1140                                 error = EINVAL;
 1141                                 goto close_and_exit;
 1142                         }
 1143  
 1144                         vnd->sc_flags |= VNF_COMP | VNF_READONLY;
 1145 #else /* !VND_COMPRESSION */
 1146                         VOP_UNLOCK(nd.ni_vp, 0);
 1147                         error = EOPNOTSUPP;
 1148                         goto close_and_exit;
 1149 #endif /* VND_COMPRESSION */
 1150                 }
 1151  
 1152                 VOP_UNLOCK(nd.ni_vp, 0);
 1153                 vnd->sc_vp = nd.ni_vp;
 1154                 vnd->sc_size = btodb(vattr.va_size);    /* note truncation */
 1155 
 1156                 /*
 1157                  * Use pseudo-geometry specified.  If none was provided,
 1158                  * use "standard" Adaptec fictitious geometry.
 1159                  */
 1160                 if (vio->vnd_flags & VNDIOF_HASGEOM) {
 1161 
 1162                         memcpy(&vnd->sc_geom, &vio->vnd_geom,
 1163                             sizeof(vio->vnd_geom));
 1164 
 1165                         /*
 1166                          * Sanity-check the sector size.
 1167                          * XXX Don't allow secsize < DEV_BSIZE.  Should
 1168                          * XXX we?
 1169                          */
 1170                         if (vnd->sc_geom.vng_secsize < DEV_BSIZE ||
 1171                             (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 ||
 1172                             vnd->sc_geom.vng_ncylinders == 0 ||
 1173                             (vnd->sc_geom.vng_ntracks *
 1174                              vnd->sc_geom.vng_nsectors) == 0) {
 1175                                 error = EINVAL;
 1176                                 goto close_and_exit;
 1177                         }
 1178 
 1179                         /*
 1180                          * Compute the size (in DEV_BSIZE blocks) specified
 1181                          * by the geometry.
 1182                          */
 1183                         geomsize = (vnd->sc_geom.vng_nsectors *
 1184                             vnd->sc_geom.vng_ntracks *
 1185                             vnd->sc_geom.vng_ncylinders) *
 1186                             (vnd->sc_geom.vng_secsize / DEV_BSIZE);
 1187 
 1188                         /*
 1189                          * Sanity-check the size against the specified
 1190                          * geometry.
 1191                          */
 1192                         if (vnd->sc_size < geomsize) {
 1193                                 error = EINVAL;
 1194                                 goto close_and_exit;
 1195                         }
 1196                 } else if (vnd->sc_size >= (32 * 64)) {
 1197                         /*
 1198                          * Size must be at least 2048 DEV_BSIZE blocks
 1199                          * (1M) in order to use this geometry.
 1200                          */
 1201                         vnd->sc_geom.vng_secsize = DEV_BSIZE;
 1202                         vnd->sc_geom.vng_nsectors = 32;
 1203                         vnd->sc_geom.vng_ntracks = 64;
 1204                         vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32);
 1205                 } else {
 1206                         vnd->sc_geom.vng_secsize = DEV_BSIZE;
 1207                         vnd->sc_geom.vng_nsectors = 1;
 1208                         vnd->sc_geom.vng_ntracks = 1;
 1209                         vnd->sc_geom.vng_ncylinders = vnd->sc_size;
 1210                 }
 1211 
 1212                 vnd_set_properties(vnd);
 1213 
 1214                 if (vio->vnd_flags & VNDIOF_READONLY) {
 1215                         vnd->sc_flags |= VNF_READONLY;
 1216                 }
 1217 
 1218                 if ((error = vndsetcred(vnd, l->l_cred)) != 0)
 1219                         goto close_and_exit;
 1220 
 1221                 vndthrottle(vnd, vnd->sc_vp);
 1222                 vio->vnd_size = dbtob(vnd->sc_size);
 1223                 vnd->sc_flags |= VNF_INITED;
 1224 
 1225                 /* create the kernel thread, wait for it to be up */
 1226                 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd,
 1227                     &vnd->sc_kthread, device_xname(vnd->sc_dev));
 1228                 if (error)
 1229                         goto close_and_exit;
 1230                 while ((vnd->sc_flags & VNF_KTHREAD) == 0) {
 1231                         tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0);
 1232                 }
 1233 #ifdef DEBUG
 1234                 if (vnddebug & VDB_INIT)
 1235                         printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n",
 1236                             vnd->sc_vp, (unsigned long) vnd->sc_size,
 1237                             vnd->sc_geom.vng_secsize,
 1238                             vnd->sc_geom.vng_nsectors,
 1239                             vnd->sc_geom.vng_ntracks,
 1240                             vnd->sc_geom.vng_ncylinders);
 1241 #endif
 1242 
 1243                 /* Attach the disk. */
 1244                 disk_attach(&vnd->sc_dkdev);
 1245 
 1246                 /* Initialize the xfer and buffer pools. */
 1247                 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0,
 1248                     0, 0, "vndxpl", NULL, IPL_BIO);
 1249 
 1250                 /* Try and read the disklabel. */
 1251                 vndgetdisklabel(dev, vnd);
 1252 
 1253                 vndunlock(vnd);
 1254 
 1255                 break;
 1256 
 1257 close_and_exit:
 1258                 (void) vn_close(nd.ni_vp, fflags, l->l_cred);
 1259 unlock_and_exit:
 1260 #ifdef VND_COMPRESSION
 1261                 /* free any allocated memory (for compressed file) */
 1262                 if(vnd->sc_comp_offsets) {
 1263                         free(vnd->sc_comp_offsets, M_DEVBUF);
 1264                         vnd->sc_comp_offsets = NULL;
 1265                 }
 1266                 if(vnd->sc_comp_buff) {
 1267                         free(vnd->sc_comp_buff, M_DEVBUF);
 1268                         vnd->sc_comp_buff = NULL;
 1269                 }
 1270                 if(vnd->sc_comp_decombuf) {
 1271                         free(vnd->sc_comp_decombuf, M_DEVBUF);
 1272                         vnd->sc_comp_decombuf = NULL;
 1273                 }
 1274 #endif /* VND_COMPRESSION */
 1275                 vndunlock(vnd);
 1276                 return (error);
 1277 
 1278         case VNDIOCCLR:
 1279                 if ((error = vndlock(vnd)) != 0)
 1280                         return (error);
 1281 
 1282                 /*
 1283                  * Don't unconfigure if any other partitions are open
 1284                  * or if both the character and block flavors of this
 1285                  * partition are open.
 1286                  */
 1287                 part = DISKPART(dev);
 1288                 pmask = (1 << part);
 1289                 if (((vnd->sc_dkdev.dk_openmask & ~pmask) ||
 1290                     ((vnd->sc_dkdev.dk_bopenmask & pmask) &&
 1291                     (vnd->sc_dkdev.dk_copenmask & pmask))) &&
 1292                         !(vio->vnd_flags & VNDIOF_FORCE)) {
 1293                         vndunlock(vnd);
 1294                         return (EBUSY);
 1295                 }
 1296 
 1297                 /*
 1298                  * XXX vndclear() might call vndclose() implicitely;
 1299                  * release lock to avoid recursion
 1300                  */
 1301                 vndunlock(vnd);
 1302                 vndclear(vnd, minor(dev));
 1303 #ifdef DEBUG
 1304                 if (vnddebug & VDB_INIT)
 1305                         printf("vndioctl: CLRed\n");
 1306 #endif
 1307 
 1308                 /* Destroy the xfer and buffer pools. */
 1309                 pool_destroy(&vnd->sc_vxpool);
 1310 
 1311                 /* Detatch the disk. */
 1312                 disk_detach(&vnd->sc_dkdev);
 1313                 break;
 1314 
 1315 #ifdef COMPAT_30
 1316         case VNDIOOCGET: {
 1317                 struct vnd_ouser *vnu;
 1318                 struct vattr va;
 1319                 vnu = (struct vnd_ouser *)data;
 1320                 KASSERT(l);
 1321                 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) {
 1322                 case 0:
 1323                         vnu->vnu_dev = va.va_fsid;
 1324                         vnu->vnu_ino = va.va_fileid;
 1325                         break;
 1326                 case -1:
 1327                         /* unused is not an error */
 1328                         vnu->vnu_dev = 0;
 1329                         vnu->vnu_ino = 0;
 1330                         break;
 1331                 default:
 1332                         return error;
 1333                 }
 1334                 break;
 1335         }
 1336 #endif
 1337         case VNDIOCGET: {
 1338                 struct vnd_user *vnu;
 1339                 struct vattr va;
 1340                 vnu = (struct vnd_user *)data;
 1341                 KASSERT(l);
 1342                 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) {
 1343                 case 0:
 1344                         vnu->vnu_dev = va.va_fsid;
 1345                         vnu->vnu_ino = va.va_fileid;
 1346                         break;
 1347                 case -1:
 1348                         /* unused is not an error */
 1349                         vnu->vnu_dev = 0;
 1350                         vnu->vnu_ino = 0;
 1351                         break;
 1352                 default:
 1353                         return error;
 1354                 }
 1355                 break;
 1356         }
 1357 
 1358         case DIOCGDINFO:
 1359                 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label);
 1360                 break;
 1361 
 1362 #ifdef __HAVE_OLD_DISKLABEL
 1363         case ODIOCGDINFO:
 1364                 newlabel = *(vnd->sc_dkdev.dk_label);
 1365                 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
 1366                         return ENOTTY;
 1367                 memcpy(data, &newlabel, sizeof (struct olddisklabel));
 1368                 break;
 1369 #endif
 1370 
 1371         case DIOCGPART:
 1372                 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label;
 1373                 ((struct partinfo *)data)->part =
 1374                     &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)];
 1375                 break;
 1376 
 1377         case DIOCWDINFO:
 1378         case DIOCSDINFO:
 1379 #ifdef __HAVE_OLD_DISKLABEL
 1380         case ODIOCWDINFO:
 1381         case ODIOCSDINFO:
 1382 #endif
 1383         {
 1384                 struct disklabel *lp;
 1385 
 1386                 if ((error = vndlock(vnd)) != 0)
 1387                         return (error);
 1388 
 1389                 vnd->sc_flags |= VNF_LABELLING;
 1390 
 1391 #ifdef __HAVE_OLD_DISKLABEL
 1392                 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) {
 1393                         memset(&newlabel, 0, sizeof newlabel);
 1394                         memcpy(&newlabel, data, sizeof (struct olddisklabel));
 1395                         lp = &newlabel;
 1396                 } else
 1397 #endif
 1398                 lp = (struct disklabel *)data;
 1399 
 1400                 error = setdisklabel(vnd->sc_dkdev.dk_label,
 1401                     lp, 0, vnd->sc_dkdev.dk_cpulabel);
 1402                 if (error == 0) {
 1403                         if (cmd == DIOCWDINFO
 1404 #ifdef __HAVE_OLD_DISKLABEL
 1405                             || cmd == ODIOCWDINFO
 1406 #endif
 1407                            )
 1408                                 error = writedisklabel(VNDLABELDEV(dev),
 1409                                     vndstrategy, vnd->sc_dkdev.dk_label,
 1410                                     vnd->sc_dkdev.dk_cpulabel);
 1411                 }
 1412 
 1413                 vnd->sc_flags &= ~VNF_LABELLING;
 1414 
 1415                 vndunlock(vnd);
 1416 
 1417                 if (error)
 1418                         return (error);
 1419                 break;
 1420         }
 1421 
 1422         case DIOCKLABEL:
 1423                 if (*(int *)data != 0)
 1424                         vnd->sc_flags |= VNF_KLABEL;
 1425                 else
 1426                         vnd->sc_flags &= ~VNF_KLABEL;
 1427                 break;
 1428 
 1429         case DIOCWLABEL:
 1430                 if (*(int *)data != 0)
 1431                         vnd->sc_flags |= VNF_WLABEL;
 1432                 else
 1433                         vnd->sc_flags &= ~VNF_WLABEL;
 1434                 break;
 1435 
 1436         case DIOCGDEFLABEL:
 1437                 vndgetdefaultlabel(vnd, (struct disklabel *)data);
 1438                 break;
 1439 
 1440 #ifdef __HAVE_OLD_DISKLABEL
 1441         case ODIOCGDEFLABEL:
 1442                 vndgetdefaultlabel(vnd, &newlabel);
 1443                 if (newlabel.d_npartitions > OLDMAXPARTITIONS)
 1444                         return ENOTTY;
 1445                 memcpy(data, &newlabel, sizeof (struct olddisklabel));
 1446                 break;
 1447 #endif
 1448 
 1449         case DIOCCACHESYNC:
 1450                 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
 1451                 error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred,
 1452                     FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0);
 1453                 VOP_UNLOCK(vnd->sc_vp, 0);
 1454                 return error;
 1455 
 1456         default:
 1457                 return (ENOTTY);
 1458         }
 1459 
 1460         return (0);
 1461 }
 1462 
 1463 /*
 1464  * Duplicate the current processes' credentials.  Since we are called only
 1465  * as the result of a SET ioctl and only root can do that, any future access
 1466  * to this "disk" is essentially as root.  Note that credentials may change
 1467  * if some other uid can write directly to the mapped file (NFS).
 1468  */
 1469 static int
 1470 vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred)
 1471 {
 1472         struct uio auio;
 1473         struct iovec aiov;
 1474         char *tmpbuf;
 1475         int error;
 1476 
 1477         vnd->sc_cred = kauth_cred_dup(cred);
 1478         tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK);
 1479 
 1480         /* XXX: Horrible kludge to establish credentials for NFS */
 1481         aiov.iov_base = tmpbuf;
 1482         aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size));
 1483         auio.uio_iov = &aiov;
 1484         auio.uio_iovcnt = 1;
 1485         auio.uio_offset = 0;
 1486         auio.uio_rw = UIO_READ;
 1487         auio.uio_resid = aiov.iov_len;
 1488         UIO_SETUP_SYSSPACE(&auio);
 1489         vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
 1490         error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred);
 1491         if (error == 0) {
 1492                 /*
 1493                  * Because vnd does all IO directly through the vnode
 1494                  * we need to flush (at least) the buffer from the above
 1495                  * VOP_READ from the buffer cache to prevent cache
 1496                  * incoherencies.  Also, be careful to write dirty
 1497                  * buffers back to stable storage.
 1498                  */
 1499                 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred,
 1500                             curlwp, 0, 0);
 1501         }
 1502         VOP_UNLOCK(vnd->sc_vp, 0);
 1503 
 1504         free(tmpbuf, M_TEMP);
 1505         return (error);
 1506 }
 1507 
 1508 /*
 1509  * Set maxactive based on FS type
 1510  */
 1511 static void
 1512 vndthrottle(struct vnd_softc *vnd, struct vnode *vp)
 1513 {
 1514 #ifdef NFS
 1515         extern int (**nfsv2_vnodeop_p)(void *);
 1516 
 1517         if (vp->v_op == nfsv2_vnodeop_p)
 1518                 vnd->sc_maxactive = 2;
 1519         else
 1520 #endif
 1521                 vnd->sc_maxactive = 8;
 1522 
 1523         if (vnd->sc_maxactive < 1)
 1524                 vnd->sc_maxactive = 1;
 1525 }
 1526 
 1527 #if 0
 1528 static void
 1529 vndshutdown(void)
 1530 {
 1531         struct vnd_softc *vnd;
 1532 
 1533         for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++)
 1534                 if (vnd->sc_flags & VNF_INITED)
 1535                         vndclear(vnd);
 1536 }
 1537 #endif
 1538 
 1539 static void
 1540 vndclear(struct vnd_softc *vnd, int myminor)
 1541 {
 1542         struct vnode *vp = vnd->sc_vp;
 1543         int fflags = FREAD;
 1544         int bmaj, cmaj, i, mn;
 1545         int s;
 1546 
 1547 #ifdef DEBUG
 1548         if (vnddebug & VDB_FOLLOW)
 1549                 printf("vndclear(%p): vp %p\n", vnd, vp);
 1550 #endif
 1551         /* locate the major number */
 1552         bmaj = bdevsw_lookup_major(&vnd_bdevsw);
 1553         cmaj = cdevsw_lookup_major(&vnd_cdevsw);
 1554 
 1555         /* Nuke the vnodes for any open instances */
 1556         for (i = 0; i < MAXPARTITIONS; i++) {
 1557                 mn = DISKMINOR(device_unit(vnd->sc_dev), i);
 1558                 vdevgone(bmaj, mn, mn, VBLK);
 1559                 if (mn != myminor) /* XXX avoid to kill own vnode */
 1560                         vdevgone(cmaj, mn, mn, VCHR);
 1561         }
 1562 
 1563         if ((vnd->sc_flags & VNF_READONLY) == 0)
 1564                 fflags |= FWRITE;
 1565 
 1566         s = splbio();
 1567         bufq_drain(vnd->sc_tab);
 1568         splx(s);
 1569 
 1570         vnd->sc_flags |= VNF_VUNCONF;
 1571         wakeup(&vnd->sc_tab);
 1572         while (vnd->sc_flags & VNF_KTHREAD)
 1573                 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0);
 1574 
 1575 #ifdef VND_COMPRESSION
 1576         /* free the compressed file buffers */
 1577         if(vnd->sc_flags & VNF_COMP) {
 1578                 if(vnd->sc_comp_offsets) {
 1579                         free(vnd->sc_comp_offsets, M_DEVBUF);
 1580                         vnd->sc_comp_offsets = NULL;
 1581                 }
 1582                 if(vnd->sc_comp_buff) {
 1583                         free(vnd->sc_comp_buff, M_DEVBUF);
 1584                         vnd->sc_comp_buff = NULL;
 1585                 }
 1586                 if(vnd->sc_comp_decombuf) {
 1587                         free(vnd->sc_comp_decombuf, M_DEVBUF);
 1588                         vnd->sc_comp_decombuf = NULL;
 1589                 }
 1590         }
 1591 #endif /* VND_COMPRESSION */
 1592         vnd->sc_flags &=
 1593             ~(VNF_INITED | VNF_READONLY | VNF_VLABEL
 1594               | VNF_VUNCONF | VNF_COMP);
 1595         if (vp == (struct vnode *)0)
 1596                 panic("vndclear: null vp");
 1597         (void) vn_close(vp, fflags, vnd->sc_cred);
 1598         kauth_cred_free(vnd->sc_cred);
 1599         vnd->sc_vp = (struct vnode *)0;
 1600         vnd->sc_cred = (kauth_cred_t)0;
 1601         vnd->sc_size = 0;
 1602 }
 1603 
 1604 static int
 1605 vndsize(dev_t dev)
 1606 {
 1607         struct vnd_softc *sc;
 1608         struct disklabel *lp;
 1609         int part, unit, omask;
 1610         int size;
 1611 
 1612         unit = vndunit(dev);
 1613         sc = device_lookup_private(&vnd_cd, unit);
 1614         if (sc == NULL)
 1615                 return -1;
 1616 
 1617         if ((sc->sc_flags & VNF_INITED) == 0)
 1618                 return (-1);
 1619 
 1620         part = DISKPART(dev);
 1621         omask = sc->sc_dkdev.dk_openmask & (1 << part);
 1622         lp = sc->sc_dkdev.dk_label;
 1623 
 1624         if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp))     /* XXX */
 1625                 return (-1);
 1626 
 1627         if (lp->d_partitions[part].p_fstype != FS_SWAP)
 1628                 size = -1;
 1629         else
 1630                 size = lp->d_partitions[part].p_size *
 1631                     (lp->d_secsize / DEV_BSIZE);
 1632 
 1633         if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp))    /* XXX */
 1634                 return (-1);
 1635 
 1636         return (size);
 1637 }
 1638 
 1639 static int
 1640 vnddump(dev_t dev, daddr_t blkno, void *va,
 1641     size_t size)
 1642 {
 1643 
 1644         /* Not implemented. */
 1645         return ENXIO;
 1646 }
 1647 
 1648 static void
 1649 vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp)
 1650 {
 1651         struct vndgeom *vng = &sc->sc_geom;
 1652         struct partition *pp;
 1653 
 1654         memset(lp, 0, sizeof(*lp));
 1655 
 1656         lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE);
 1657         lp->d_secsize = vng->vng_secsize;
 1658         lp->d_nsectors = vng->vng_nsectors;
 1659         lp->d_ntracks = vng->vng_ntracks;
 1660         lp->d_ncylinders = vng->vng_ncylinders;
 1661         lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
 1662 
 1663         strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename));
 1664         lp->d_type = DTYPE_VND;
 1665         strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname));
 1666         lp->d_rpm = 3600;
 1667         lp->d_interleave = 1;
 1668         lp->d_flags = 0;
 1669 
 1670         pp = &lp->d_partitions[RAW_PART];
 1671         pp->p_offset = 0;
 1672         pp->p_size = lp->d_secperunit;
 1673         pp->p_fstype = FS_UNUSED;
 1674         lp->d_npartitions = RAW_PART + 1;
 1675 
 1676         lp->d_magic = DISKMAGIC;
 1677         lp->d_magic2 = DISKMAGIC;
 1678         lp->d_checksum = dkcksum(lp);
 1679 }
 1680 
 1681 /*
 1682  * Read the disklabel from a vnd.  If one is not present, create a fake one.
 1683  */
 1684 static void
 1685 vndgetdisklabel(dev_t dev, struct vnd_softc *sc)
 1686 {
 1687         const char *errstring;
 1688         struct disklabel *lp = sc->sc_dkdev.dk_label;
 1689         struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel;
 1690         int i;
 1691 
 1692         memset(clp, 0, sizeof(*clp));
 1693 
 1694         vndgetdefaultlabel(sc, lp);
 1695 
 1696         /*
 1697          * Call the generic disklabel extraction routine.
 1698          */
 1699         errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp);
 1700         if (errstring) {
 1701                 /*
 1702                  * Lack of disklabel is common, but we print the warning
 1703                  * anyway, since it might contain other useful information.
 1704                  */
 1705                 aprint_normal_dev(sc->sc_dev, "%s\n", errstring);
 1706 
 1707                 /*
 1708                  * For historical reasons, if there's no disklabel
 1709                  * present, all partitions must be FS_BSDFFS and
 1710                  * occupy the entire disk.
 1711                  */
 1712                 for (i = 0; i < MAXPARTITIONS; i++) {
 1713                         /*
 1714                          * Don't wipe out port specific hack (such as
 1715                          * dos partition hack of i386 port).
 1716                          */
 1717                         if (lp->d_partitions[i].p_size != 0)
 1718                                 continue;
 1719 
 1720                         lp->d_partitions[i].p_size = lp->d_secperunit;
 1721                         lp->d_partitions[i].p_offset = 0;
 1722                         lp->d_partitions[i].p_fstype = FS_BSDFFS;
 1723                 }
 1724 
 1725                 strncpy(lp->d_packname, "default label",
 1726                     sizeof(lp->d_packname));
 1727 
 1728                 lp->d_npartitions = MAXPARTITIONS;
 1729                 lp->d_checksum = dkcksum(lp);
 1730         }
 1731 
 1732         /* In-core label now valid. */
 1733         sc->sc_flags |= VNF_VLABEL;
 1734 }
 1735 
 1736 /*
 1737  * Wait interruptibly for an exclusive lock.
 1738  *
 1739  * XXX
 1740  * Several drivers do this; it should be abstracted and made MP-safe.
 1741  */
 1742 static int
 1743 vndlock(struct vnd_softc *sc)
 1744 {
 1745         int error;
 1746 
 1747         while ((sc->sc_flags & VNF_LOCKED) != 0) {
 1748                 sc->sc_flags |= VNF_WANTED;
 1749                 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0)
 1750                         return (error);
 1751         }
 1752         sc->sc_flags |= VNF_LOCKED;
 1753         return (0);
 1754 }
 1755 
 1756 /*
 1757  * Unlock and wake up any waiters.
 1758  */
 1759 static void
 1760 vndunlock(struct vnd_softc *sc)
 1761 {
 1762 
 1763         sc->sc_flags &= ~VNF_LOCKED;
 1764         if ((sc->sc_flags & VNF_WANTED) != 0) {
 1765                 sc->sc_flags &= ~VNF_WANTED;
 1766                 wakeup(sc);
 1767         }
 1768 }
 1769 
 1770 #ifdef VND_COMPRESSION
 1771 /* compressed file read */
 1772 static void
 1773 compstrategy(struct buf *bp, off_t bn)
 1774 {
 1775         int error;
 1776         int unit = vndunit(bp->b_dev);
 1777         struct vnd_softc *vnd =
 1778             device_lookup_private(&vnd_cd, unit);
 1779         u_int32_t comp_block;
 1780         struct uio auio;
 1781         char *addr;
 1782         int s;
 1783 
 1784         /* set up constants for data move */
 1785         auio.uio_rw = UIO_READ;
 1786         UIO_SETUP_SYSSPACE(&auio);
 1787 
 1788         /* read, and transfer the data */
 1789         addr = bp->b_data;
 1790         bp->b_resid = bp->b_bcount;
 1791         s = splbio();
 1792         while (bp->b_resid > 0) {
 1793                 unsigned length;
 1794                 size_t length_in_buffer;
 1795                 u_int32_t offset_in_buffer;
 1796                 struct iovec aiov;
 1797 
 1798                 /* calculate the compressed block number */
 1799                 comp_block = bn / (off_t)vnd->sc_comp_blksz;
 1800 
 1801                 /* check for good block number */
 1802                 if (comp_block >= vnd->sc_comp_numoffs) {
 1803                         bp->b_error = EINVAL;
 1804                         splx(s);
 1805                         return;
 1806                 }
 1807 
 1808                 /* read in the compressed block, if not in buffer */
 1809                 if (comp_block != vnd->sc_comp_buffblk) {
 1810                         length = vnd->sc_comp_offsets[comp_block + 1] -
 1811                             vnd->sc_comp_offsets[comp_block];
 1812                         vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY);
 1813                         error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff,
 1814                             length, vnd->sc_comp_offsets[comp_block],
 1815                             UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred,
 1816                             NULL, NULL);
 1817                         if (error) {
 1818                                 bp->b_error = error;
 1819                                 VOP_UNLOCK(vnd->sc_vp, 0);
 1820                                 splx(s);
 1821                                 return;
 1822                         }
 1823                         /* uncompress the buffer */
 1824                         vnd->sc_comp_stream.next_in = vnd->sc_comp_buff;
 1825                         vnd->sc_comp_stream.avail_in = length;
 1826                         vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf;
 1827                         vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz;
 1828                         inflateReset(&vnd->sc_comp_stream);
 1829                         error = inflate(&vnd->sc_comp_stream, Z_FINISH);
 1830                         if (error != Z_STREAM_END) {
 1831                                 if (vnd->sc_comp_stream.msg)
 1832                                         aprint_normal_dev(vnd->sc_dev,
 1833                                             "compressed file, %s\n",
 1834                                             vnd->sc_comp_stream.msg);
 1835                                 bp->b_error = EBADMSG;
 1836                                 VOP_UNLOCK(vnd->sc_vp, 0);
 1837                                 splx(s);
 1838                                 return;
 1839                         }
 1840                         vnd->sc_comp_buffblk = comp_block;
 1841                         VOP_UNLOCK(vnd->sc_vp, 0);
 1842                 }
 1843 
 1844                 /* transfer the usable uncompressed data */
 1845                 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz;
 1846                 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer;
 1847                 if (length_in_buffer > bp->b_resid)
 1848                         length_in_buffer = bp->b_resid;
 1849                 auio.uio_iov = &aiov;
 1850                 auio.uio_iovcnt = 1;
 1851                 aiov.iov_base = addr;
 1852                 aiov.iov_len = length_in_buffer;
 1853                 auio.uio_resid = aiov.iov_len;
 1854                 auio.uio_offset = 0;
 1855                 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer,
 1856                     length_in_buffer, &auio);
 1857                 if (error) {
 1858                         bp->b_error = error;
 1859                         splx(s);
 1860                         return;
 1861                 }
 1862 
 1863                 bn += length_in_buffer;
 1864                 addr += length_in_buffer;
 1865                 bp->b_resid -= length_in_buffer;
 1866         }
 1867         splx(s);
 1868 }
 1869 
 1870 /* compression memory allocation routines */
 1871 static void *
 1872 vnd_alloc(void *aux, u_int items, u_int siz)
 1873 {
 1874         return malloc(items * siz, M_TEMP, M_NOWAIT);
 1875 }
 1876 
 1877 static void
 1878 vnd_free(void *aux, void *ptr)
 1879 {
 1880         free(ptr, M_TEMP);
 1881 }
 1882 #endif /* VND_COMPRESSION */
 1883 
 1884 static void
 1885 vnd_set_properties(struct vnd_softc *vnd)
 1886 {
 1887         prop_dictionary_t disk_info, odisk_info, geom;
 1888 
 1889         disk_info = prop_dictionary_create();
 1890 
 1891         geom = prop_dictionary_create();
 1892 
 1893         prop_dictionary_set_uint64(geom, "sectors-per-unit",
 1894             vnd->sc_geom.vng_nsectors * vnd->sc_geom.vng_ntracks *
 1895             vnd->sc_geom.vng_ncylinders);
 1896 
 1897         prop_dictionary_set_uint32(geom, "sector-size",
 1898             vnd->sc_geom.vng_secsize);
 1899 
 1900         prop_dictionary_set_uint16(geom, "sectors-per-track",
 1901             vnd->sc_geom.vng_nsectors);
 1902 
 1903         prop_dictionary_set_uint16(geom, "tracks-per-cylinder",
 1904             vnd->sc_geom.vng_ntracks);
 1905 
 1906         prop_dictionary_set_uint64(geom, "cylinders-per-unit",
 1907             vnd->sc_geom.vng_ncylinders);
 1908 
 1909         prop_dictionary_set(disk_info, "geometry", geom);
 1910         prop_object_release(geom);
 1911 
 1912         prop_dictionary_set(device_properties(vnd->sc_dev),
 1913             "disk-info", disk_info);
 1914 
 1915         /*
 1916          * Don't release disk_info here; we keep a reference to it.
 1917          * disk_detach() will release it when we go away.
 1918          */
 1919 
 1920         odisk_info = vnd->sc_dkdev.dk_info;
 1921         vnd->sc_dkdev.dk_info = disk_info;
 1922         if (odisk_info)
 1923                 prop_object_release(odisk_info);
 1924 }

Cache object: 80d371d5248c75c82cc1019da1f92bde


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.