The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_subr.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: vfs_subr.c,v 1.175 2008/11/10 11:53:16 pedro Exp $    */
    2 /*      $NetBSD: vfs_subr.c,v 1.53 1996/04/22 01:39:13 christos Exp $   */
    3 
    4 /*
    5  * Copyright (c) 1989, 1993
    6  *      The Regents of the University of California.  All rights reserved.
    7  * (c) UNIX System Laboratories, Inc.
    8  * All or some portions of this file are derived from material licensed
    9  * to the University of California by American Telephone and Telegraph
   10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   11  * the permission of UNIX System Laboratories, Inc.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. Neither the name of the University nor the names of its contributors
   22  *    may be used to endorse or promote products derived from this software
   23  *    without specific prior written permission.
   24  *
   25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   35  * SUCH DAMAGE.
   36  *
   37  *      @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
   38  */
   39 
   40 /*
   41  * External virtual filesystem routines
   42  */
   43 
   44 #include <sys/param.h>
   45 #include <sys/systm.h>
   46 #include <sys/proc.h>
   47 #include <sys/mount.h>
   48 #include <sys/time.h>
   49 #include <sys/fcntl.h>
   50 #include <sys/kernel.h>
   51 #include <sys/vnode.h>
   52 #include <sys/stat.h>
   53 #include <sys/namei.h>
   54 #include <sys/ucred.h>
   55 #include <sys/buf.h>
   56 #include <sys/errno.h>
   57 #include <sys/malloc.h>
   58 #include <sys/domain.h>
   59 #include <sys/mbuf.h>
   60 #include <sys/syscallargs.h>
   61 #include <sys/pool.h>
   62 
   63 #include <uvm/uvm_extern.h>
   64 #include <sys/sysctl.h>
   65 
   66 #include <miscfs/specfs/specdev.h>
   67 
   68 enum vtype iftovt_tab[16] = {
   69         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
   70         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
   71 };
   72 
   73 int     vttoif_tab[9] = {
   74         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
   75         S_IFSOCK, S_IFIFO, S_IFMT,
   76 };
   77 
   78 int doforce = 1;                /* 1 => permit forcible unmounting */
   79 int prtactive = 0;              /* 1 => print out reclaim of active vnodes */
   80 int suid_clear = 1;             /* 1 => clear SUID / SGID on owner change */
   81 
   82 /*
   83  * Insq/Remq for the vnode usage lists.
   84  */
   85 #define bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
   86 #define bufremvn(bp) {                                                  \
   87         LIST_REMOVE(bp, b_vnbufs);                                      \
   88         LIST_NEXT(bp, b_vnbufs) = NOLIST;                               \
   89 }
   90 
   91 struct freelst vnode_hold_list; /* list of vnodes referencing buffers */
   92 struct freelst vnode_free_list; /* vnode free list */
   93 
   94 struct mntlist mountlist;       /* mounted filesystem list */
   95 
   96 void    vclean(struct vnode *, int, struct proc *);
   97 void    vhold(struct vnode *);
   98 void    vdrop(struct vnode *);
   99 
  100 void insmntque(struct vnode *, struct mount *);
  101 int getdevvp(dev_t, struct vnode **, enum vtype);
  102 
  103 int vfs_hang_addrlist(struct mount *, struct netexport *,
  104                                   struct export_args *);
  105 int vfs_free_netcred(struct radix_node *, void *);
  106 void vfs_free_addrlist(struct netexport *);
  107 void vputonfreelist(struct vnode *);
  108 
  109 int vflush_vnode(struct vnode *, void *);
  110 int maxvnodes;
  111 
  112 #ifdef DEBUG
  113 void printlockedvnodes(void);
  114 #endif
  115 
  116 struct pool vnode_pool;
  117 
  118 /*
  119  * Initialize the vnode management data structures.
  120  */
  121 void
  122 vntblinit(void)
  123 {
  124         /* buffer cache may need a vnode for each buffer */
  125         maxvnodes = desiredvnodes;
  126         pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodes",
  127             &pool_allocator_nointr);
  128         TAILQ_INIT(&vnode_hold_list);
  129         TAILQ_INIT(&vnode_free_list);
  130         CIRCLEQ_INIT(&mountlist);
  131         /*
  132          * Initialize the filesystem syncer.
  133          */
  134         vn_initialize_syncerd();
  135 }
  136 
  137 /*
  138  * Mark a mount point as busy. Used to synchronize access and to delay
  139  * unmounting.
  140  *
  141  * Default behaviour is to attempt getting a READ lock and in case of an
  142  * ongoing unmount, to wait for it to finish and then return failure.
  143  */
  144 int
  145 vfs_busy(struct mount *mp, int flags)
  146 {
  147         int rwflags = 0;
  148 
  149         /* new mountpoints need their lock initialised */
  150         if (mp->mnt_lock.rwl_name == NULL)
  151                 rw_init(&mp->mnt_lock, "vfslock");
  152 
  153         if (flags & VB_WRITE)
  154                 rwflags |= RW_WRITE;
  155         else
  156                 rwflags |= RW_READ;
  157 
  158         if (flags & VB_WAIT)
  159                 rwflags |= RW_SLEEPFAIL;
  160         else
  161                 rwflags |= RW_NOSLEEP;
  162 
  163         if (rw_enter(&mp->mnt_lock, rwflags))
  164                 return (EBUSY);
  165 
  166         return (0);
  167 }
  168 
  169 /*
  170  * Free a busy file system
  171  */
  172 void
  173 vfs_unbusy(struct mount *mp)
  174 {
  175         rw_exit(&mp->mnt_lock);
  176 }
  177 
  178 int
  179 vfs_isbusy(struct mount *mp) 
  180 {
  181         if (RWLOCK_OWNER(&mp->mnt_lock) > 0)
  182                 return (1);
  183         else
  184                 return (0);
  185 }
  186 
  187 /*
  188  * Lookup a filesystem type, and if found allocate and initialize
  189  * a mount structure for it.
  190  *
  191  * Devname is usually updated by mount(8) after booting.
  192  */
  193 int
  194 vfs_rootmountalloc(char *fstypename, char *devname, struct mount **mpp)
  195 {
  196         struct vfsconf *vfsp;
  197         struct mount *mp;
  198 
  199         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
  200                 if (!strcmp(vfsp->vfc_name, fstypename))
  201                         break;
  202         if (vfsp == NULL)
  203                 return (ENODEV);
  204         mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK|M_ZERO);
  205         (void)vfs_busy(mp, VB_READ|VB_NOWAIT);
  206         LIST_INIT(&mp->mnt_vnodelist);
  207         mp->mnt_vfc = vfsp;
  208         mp->mnt_op = vfsp->vfc_vfsops;
  209         mp->mnt_flag = MNT_RDONLY;
  210         mp->mnt_vnodecovered = NULLVP;
  211         vfsp->vfc_refcount++;
  212         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
  213         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  214         mp->mnt_stat.f_mntonname[0] = '/';
  215         (void)copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
  216         *mpp = mp;
  217         return (0);
  218  }
  219 
  220 /*
  221  * Lookup a mount point by filesystem identifier.
  222  */
  223 struct mount *
  224 vfs_getvfs(fsid_t *fsid)
  225 {
  226         struct mount *mp;
  227 
  228         CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
  229                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
  230                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
  231                         return (mp);
  232                 }
  233         }
  234 
  235         return (NULL);
  236 }
  237 
  238 
  239 /*
  240  * Get a new unique fsid
  241  */
  242 void
  243 vfs_getnewfsid(struct mount *mp)
  244 {
  245         static u_short xxxfs_mntid;
  246 
  247         fsid_t tfsid;
  248         int mtype;
  249 
  250         mtype = mp->mnt_vfc->vfc_typenum;
  251         mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
  252         mp->mnt_stat.f_fsid.val[1] = mtype;
  253         if (xxxfs_mntid == 0)
  254                 ++xxxfs_mntid;
  255         tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
  256         tfsid.val[1] = mtype;
  257         if (!CIRCLEQ_EMPTY(&mountlist)) {
  258                 while (vfs_getvfs(&tfsid)) {
  259                         tfsid.val[0]++;
  260                         xxxfs_mntid++;
  261                 }
  262         }
  263         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
  264 }
  265 
  266 /*
  267  * Make a 'unique' number from a mount type name.
  268  * Note that this is no longer used for ffs which
  269  * now has an on-disk filesystem id.
  270  */
  271 long
  272 makefstype(char *type)
  273 {
  274         long rv;
  275 
  276         for (rv = 0; *type; type++) {
  277                 rv <<= 2;
  278                 rv ^= *type;
  279         }
  280         return rv;
  281 }
  282 
  283 /*
  284  * Set vnode attributes to VNOVAL
  285  */
  286 void
  287 vattr_null(struct vattr *vap)
  288 {
  289 
  290         vap->va_type = VNON;
  291         /* XXX These next two used to be one line, but for a GCC bug. */
  292         vap->va_size = VNOVAL;
  293         vap->va_bytes = VNOVAL;
  294         vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
  295                 vap->va_fsid = vap->va_fileid =
  296                 vap->va_blocksize = vap->va_rdev =
  297                 vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
  298                 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
  299                 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
  300                 vap->va_flags = vap->va_gen = VNOVAL;
  301         vap->va_vaflags = 0;
  302 }
  303 
  304 /*
  305  * Routines having to do with the management of the vnode table.
  306  */
  307 extern int (**dead_vnodeop_p)(void *);
  308 long numvnodes;
  309 
  310 /*
  311  * Return the next vnode from the free list.
  312  */
  313 int
  314 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
  315     struct vnode **vpp)
  316 {
  317         struct proc *p = curproc;
  318         struct freelst *listhd;
  319         static int toggle;
  320         struct vnode *vp;
  321         int s;
  322 
  323         /*
  324          * We must choose whether to allocate a new vnode or recycle an
  325          * existing one. The criterion for allocating a new one is that
  326          * the total number of vnodes is less than the number desired or
  327          * there are no vnodes on either free list. Generally we only
  328          * want to recycle vnodes that have no buffers associated with
  329          * them, so we look first on the vnode_free_list. If it is empty,
  330          * we next consider vnodes with referencing buffers on the
  331          * vnode_hold_list. The toggle ensures that half the time we
  332          * will use a buffer from the vnode_hold_list, and half the time
  333          * we will allocate a new one unless the list has grown to twice
  334          * the desired size. We are reticent to recycle vnodes from the
  335          * vnode_hold_list because we will lose the identity of all its
  336          * referencing buffers.
  337          */
  338         toggle ^= 1;
  339         if (numvnodes > 2 * maxvnodes)
  340                 toggle = 0;
  341 
  342         s = splbio();
  343         if ((numvnodes < maxvnodes) ||
  344             ((TAILQ_FIRST(listhd = &vnode_free_list) == NULL) &&
  345             ((TAILQ_FIRST(listhd = &vnode_hold_list) == NULL) || toggle))) {
  346                 splx(s);
  347                 vp = pool_get(&vnode_pool, PR_WAITOK | PR_ZERO);
  348                 numvnodes++;
  349         } else {
  350                 for (vp = TAILQ_FIRST(listhd); vp != NULLVP;
  351                     vp = TAILQ_NEXT(vp, v_freelist)) {
  352                         if (VOP_ISLOCKED(vp) == 0)
  353                                 break;
  354                 }
  355                 /*
  356                  * Unless this is a bad time of the month, at most
  357                  * the first NCPUS items on the free list are
  358                  * locked, so this is close enough to being empty.
  359                  */
  360                 if (vp == NULL) {
  361                         splx(s);
  362                         tablefull("vnode");
  363                         *vpp = 0;
  364                         return (ENFILE);
  365                 }
  366 
  367 #ifdef DIAGNOSTIC
  368                 if (vp->v_usecount) {
  369                         vprint("free vnode", vp);
  370                         panic("free vnode isn't");
  371                 }
  372 #endif
  373 
  374                 TAILQ_REMOVE(listhd, vp, v_freelist);
  375                 vp->v_bioflag &= ~VBIOONFREELIST;
  376                 splx(s);
  377 
  378                 if (vp->v_type != VBAD)
  379                         vgonel(vp, p);
  380 #ifdef DIAGNOSTIC
  381                 if (vp->v_data) {
  382                         vprint("cleaned vnode", vp);
  383                         panic("cleaned vnode isn't");
  384                 }
  385                 s = splbio();
  386                 if (vp->v_numoutput)
  387                         panic("Clean vnode has pending I/O's");
  388                 splx(s);
  389 #endif
  390                 vp->v_flag = 0;
  391                 vp->v_socket = 0;
  392         }
  393         vp->v_type = VNON;
  394         cache_purge(vp);
  395         vp->v_tag = tag;
  396         vp->v_op = vops;
  397         insmntque(vp, mp);
  398         *vpp = vp;
  399         vp->v_usecount = 1;
  400         vp->v_data = 0;
  401         simple_lock_init(&vp->v_uvm.u_obj.vmobjlock);
  402         return (0);
  403 }
  404 
  405 /*
  406  * Move a vnode from one mount queue to another.
  407  */
  408 void
  409 insmntque(struct vnode *vp, struct mount *mp)
  410 {
  411         /*
  412          * Delete from old mount point vnode list, if on one.
  413          */
  414         if (vp->v_mount != NULL)
  415                 LIST_REMOVE(vp, v_mntvnodes);
  416         /*
  417          * Insert into list of vnodes for the new mount point, if available.
  418          */
  419         if ((vp->v_mount = mp) != NULL)
  420                 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
  421 }
  422 
  423 /*
  424  * Create a vnode for a block device.
  425  * Used for root filesystem, argdev, and swap areas.
  426  * Also used for memory file system special devices.
  427  */
  428 int
  429 bdevvp(dev_t dev, struct vnode **vpp)
  430 {
  431         return (getdevvp(dev, vpp, VBLK));
  432 }
  433 
  434 /*
  435  * Create a vnode for a character device.
  436  * Used for console handling.
  437  */
  438 int
  439 cdevvp(dev_t dev, struct vnode **vpp)
  440 {
  441         return (getdevvp(dev, vpp, VCHR));
  442 }
  443 
  444 /*
  445  * Create a vnode for a device.
  446  * Used by bdevvp (block device) for root file system etc.,
  447  * and by cdevvp (character device) for console.
  448  */
  449 int
  450 getdevvp(dev_t dev, struct vnode **vpp, enum vtype type)
  451 {
  452         struct vnode *vp;
  453         struct vnode *nvp;
  454         int error;
  455 
  456         if (dev == NODEV) {
  457                 *vpp = NULLVP;
  458                 return (0);
  459         }
  460         error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
  461         if (error) {
  462                 *vpp = NULLVP;
  463                 return (error);
  464         }
  465         vp = nvp;
  466         vp->v_type = type;
  467         if ((nvp = checkalias(vp, dev, NULL)) != 0) {
  468                 vput(vp);
  469                 vp = nvp;
  470         }
  471         *vpp = vp;
  472         return (0);
  473 }
  474 
  475 /*
  476  * Check to see if the new vnode represents a special device
  477  * for which we already have a vnode (either because of
  478  * bdevvp() or because of a different vnode representing
  479  * the same block device). If such an alias exists, deallocate
  480  * the existing contents and return the aliased vnode. The
  481  * caller is responsible for filling it with its new contents.
  482  */
  483 struct vnode *
  484 checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp)
  485 {
  486         struct proc *p = curproc;
  487         struct vnode *vp;
  488         struct vnode **vpp;
  489 
  490         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
  491                 return (NULLVP);
  492 
  493         vpp = &speclisth[SPECHASH(nvp_rdev)];
  494 loop:
  495         for (vp = *vpp; vp; vp = vp->v_specnext) {
  496                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) {
  497                         continue;
  498                 }
  499                 /*
  500                  * Alias, but not in use, so flush it out.
  501                  */
  502                 if (vp->v_usecount == 0) {
  503                         vgonel(vp, p);
  504                         goto loop;
  505                 }
  506                 if (vget(vp, LK_EXCLUSIVE, p)) {
  507                         goto loop;
  508                 }
  509                 break;
  510         }
  511 
  512         /*
  513          * Common case is actually in the if statement
  514          */
  515         if (vp == NULL || !(vp->v_tag == VT_NON && vp->v_type == VBLK)) {
  516                 nvp->v_specinfo = malloc(sizeof(struct specinfo), M_VNODE,
  517                         M_WAITOK);
  518                 nvp->v_rdev = nvp_rdev;
  519                 nvp->v_hashchain = vpp;
  520                 nvp->v_specnext = *vpp;
  521                 nvp->v_specmountpoint = NULL;
  522                 nvp->v_speclockf = NULL;
  523                 bzero(nvp->v_specbitmap, sizeof(nvp->v_specbitmap));
  524                 *vpp = nvp;
  525                 if (vp != NULLVP) {
  526                         nvp->v_flag |= VALIASED;
  527                         vp->v_flag |= VALIASED;
  528                         vput(vp);
  529                 }
  530                 return (NULLVP);
  531         }
  532 
  533         /*
  534          * This code is the uncommon case. It is called in case
  535          * we found an alias that was VT_NON && vtype of VBLK
  536          * This means we found a block device that was created
  537          * using bdevvp.
  538          * An example of such a vnode is the root partition device vnode
  539          * created in ffs_mountroot.
  540          *
  541          * The vnodes created by bdevvp should not be aliased (why?).
  542          */
  543 
  544         VOP_UNLOCK(vp, 0, p);
  545         vclean(vp, 0, p);
  546         vp->v_op = nvp->v_op;
  547         vp->v_tag = nvp->v_tag;
  548         nvp->v_type = VNON;
  549         insmntque(vp, mp);
  550         return (vp);
  551 }
  552 
  553 /*
  554  * Grab a particular vnode from the free list, increment its
  555  * reference count and lock it. If the vnode lock bit is set,
  556  * the vnode is being eliminated in vgone. In that case, we
  557  * cannot grab it, so the process is awakened when the
  558  * transition is completed, and an error code is returned to
  559  * indicate that the vnode is no longer usable, possibly
  560  * having been changed to a new file system type.
  561  */
  562 int
  563 vget(struct vnode *vp, int flags, struct proc *p)
  564 {
  565         int error, s, onfreelist;
  566 
  567         /*
  568          * If the vnode is in the process of being cleaned out for
  569          * another use, we wait for the cleaning to finish and then
  570          * return failure. Cleaning is determined by checking that
  571          * the VXLOCK flag is set.
  572          */
  573 
  574         if (vp->v_flag & VXLOCK) {
  575                 if (flags & LK_NOWAIT) {
  576                         return (EBUSY);
  577                 }
  578 
  579                 vp->v_flag |= VXWANT;
  580                 tsleep(vp, PINOD, "vget", 0);
  581                 return (ENOENT);
  582         }
  583 
  584         onfreelist = vp->v_bioflag & VBIOONFREELIST;
  585         if (vp->v_usecount == 0 && onfreelist) {
  586                 s = splbio();
  587                 if (vp->v_holdcnt > 0)
  588                         TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
  589                 else
  590                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  591                 vp->v_bioflag &= ~VBIOONFREELIST;
  592                 splx(s);
  593         }
  594 
  595         vp->v_usecount++;
  596         if (flags & LK_TYPE_MASK) {
  597                 if ((error = vn_lock(vp, flags, p)) != 0) {
  598                         vp->v_usecount--;
  599                         if (vp->v_usecount == 0 && onfreelist)
  600                                 vputonfreelist(vp);
  601                 }
  602                 return (error);
  603         }
  604 
  605         return (0);
  606 }
  607 
  608 
  609 /* Vnode reference. */
  610 void
  611 vref(struct vnode *vp)
  612 {
  613 #ifdef DIAGNOSTIC
  614         if (vp->v_usecount == 0)
  615                 panic("vref used where vget required");
  616 #endif
  617         vp->v_usecount++;
  618 }
  619 
  620 void
  621 vputonfreelist(struct vnode *vp)
  622 {
  623         int s;
  624         struct freelst *lst;
  625 
  626         s = splbio();
  627 #ifdef DIAGNOSTIC
  628         if (vp->v_usecount != 0)
  629                 panic("Use count is not zero!");
  630 
  631         if (vp->v_bioflag & VBIOONFREELIST) {
  632                 vprint("vnode already on free list: ", vp);
  633                 panic("vnode already on free list");
  634         }
  635 #endif
  636 
  637         vp->v_bioflag |= VBIOONFREELIST;
  638 
  639         if (vp->v_holdcnt > 0)
  640                 lst = &vnode_hold_list;
  641         else
  642                 lst = &vnode_free_list;
  643 
  644         if (vp->v_type == VBAD)
  645                 TAILQ_INSERT_HEAD(lst, vp, v_freelist);
  646         else
  647                 TAILQ_INSERT_TAIL(lst, vp, v_freelist);
  648 
  649         splx(s);
  650 }
  651 
  652 /*
  653  * vput(), just unlock and vrele()
  654  */
  655 void
  656 vput(struct vnode *vp)
  657 {
  658         struct proc *p = curproc;
  659 
  660 #ifdef DIAGNOSTIC
  661         if (vp == NULL)
  662                 panic("vput: null vp");
  663 #endif
  664 
  665 #ifdef DIAGNOSTIC
  666         if (vp->v_usecount == 0) {
  667                 vprint("vput: bad ref count", vp);
  668                 panic("vput: ref cnt");
  669         }
  670 #endif
  671         vp->v_usecount--;
  672         if (vp->v_usecount > 0) {
  673                 VOP_UNLOCK(vp, 0, p);
  674                 return;
  675         }
  676 
  677 #ifdef DIAGNOSTIC
  678         if (vp->v_writecount != 0) {
  679                 vprint("vput: bad writecount", vp);
  680                 panic("vput: v_writecount != 0");
  681         }
  682 #endif
  683 
  684         VOP_INACTIVE(vp, p);
  685 
  686         if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
  687                 vputonfreelist(vp);
  688 }
  689 
  690 /*
  691  * Vnode release - use for active VNODES.
  692  * If count drops to zero, call inactive routine and return to freelist.
  693  * Returns 0 if it did not sleep.
  694  */
  695 int
  696 vrele(struct vnode *vp)
  697 {
  698         struct proc *p = curproc;
  699 
  700 #ifdef DIAGNOSTIC
  701         if (vp == NULL)
  702                 panic("vrele: null vp");
  703 #endif
  704 #ifdef DIAGNOSTIC
  705         if (vp->v_usecount == 0) {
  706                 vprint("vrele: bad ref count", vp);
  707                 panic("vrele: ref cnt");
  708         }
  709 #endif
  710         vp->v_usecount--;
  711         if (vp->v_usecount > 0) {
  712                 return (0);
  713         }
  714 
  715 #ifdef DIAGNOSTIC
  716         if (vp->v_writecount != 0) {
  717                 vprint("vrele: bad writecount", vp);
  718                 panic("vrele: v_writecount != 0");
  719         }
  720 #endif
  721 
  722         if (vn_lock(vp, LK_EXCLUSIVE, p)) {
  723 #ifdef DIAGNOSTIC
  724                 vprint("vrele: cannot lock", vp);
  725 #endif
  726                 return (1);
  727         }
  728 
  729         VOP_INACTIVE(vp, p);
  730 
  731         if (vp->v_usecount == 0 && !(vp->v_bioflag & VBIOONFREELIST))
  732                 vputonfreelist(vp);
  733         return (1);
  734 }
  735 
  736 /* Page or buffer structure gets a reference. */
  737 void
  738 vhold(struct vnode *vp)
  739 {
  740         /*
  741          * If it is on the freelist and the hold count is currently
  742          * zero, move it to the hold list.
  743          */
  744         if ((vp->v_bioflag & VBIOONFREELIST) &&
  745             vp->v_holdcnt == 0 && vp->v_usecount == 0) {
  746                 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  747                 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
  748         }
  749         vp->v_holdcnt++;
  750 }
  751 
  752 /* Lose interest in a vnode. */
  753 void
  754 vdrop(struct vnode *vp)
  755 {
  756 #ifdef DIAGNOSTIC
  757         if (vp->v_holdcnt == 0)
  758                 panic("vdrop: zero holdcnt"); 
  759 #endif
  760 
  761         vp->v_holdcnt--;
  762 
  763         /*
  764          * If it is on the holdlist and the hold count drops to
  765          * zero, move it to the free list.
  766          */
  767         if ((vp->v_bioflag & VBIOONFREELIST) &&
  768             vp->v_holdcnt == 0 && vp->v_usecount == 0) {
  769                 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
  770                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
  771         }
  772 }
  773 
  774 /*
  775  * Remove any vnodes in the vnode table belonging to mount point mp.
  776  *
  777  * If MNT_NOFORCE is specified, there should not be any active ones,
  778  * return error if any are found (nb: this is a user error, not a
  779  * system error). If MNT_FORCE is specified, detach any active vnodes
  780  * that are found.
  781  */
  782 #ifdef DEBUG
  783 int busyprt = 0;        /* print out busy vnodes */
  784 struct ctldebug debug1 = { "busyprt", &busyprt };
  785 #endif
  786 
  787 int
  788 vfs_mount_foreach_vnode(struct mount *mp, 
  789     int (*func)(struct vnode *, void *), void *arg) {
  790         struct vnode *vp, *nvp;
  791         int error = 0;
  792 
  793 loop:
  794         for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
  795                 if (vp->v_mount != mp)
  796                         goto loop;
  797                 nvp = LIST_NEXT(vp, v_mntvnodes);
  798 
  799                 error = func(vp, arg);
  800 
  801                 if (error != 0)
  802                         break;
  803         }
  804 
  805         return (error);
  806 }
  807 
  808 struct vflush_args {
  809         struct vnode *skipvp;
  810         int busy;
  811         int flags;
  812 };
  813 
  814 int
  815 vflush_vnode(struct vnode *vp, void *arg) {
  816         struct vflush_args *va = arg;
  817         struct proc *p = curproc;
  818 
  819         if (vp == va->skipvp) {
  820                 return (0);
  821         }
  822 
  823         if ((va->flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
  824                 return (0);
  825         }
  826 
  827         /*
  828          * If WRITECLOSE is set, only flush out regular file
  829          * vnodes open for writing.
  830          */
  831         if ((va->flags & WRITECLOSE) &&
  832             (vp->v_writecount == 0 || vp->v_type != VREG)) {
  833                 return (0);
  834         }
  835 
  836         /*
  837          * With v_usecount == 0, all we need to do is clear
  838          * out the vnode data structures and we are done.
  839          */
  840         if (vp->v_usecount == 0) {
  841                 vgonel(vp, p);
  842                 return (0);
  843         }
  844                 
  845         /*
  846          * If FORCECLOSE is set, forcibly close the vnode.
  847          * For block or character devices, revert to an
  848          * anonymous device. For all other files, just kill them.
  849          */
  850         if (va->flags & FORCECLOSE) {
  851                 if (vp->v_type != VBLK && vp->v_type != VCHR) {
  852                         vgonel(vp, p);
  853                 } else {
  854                         vclean(vp, 0, p);
  855                         vp->v_op = spec_vnodeop_p;
  856                         insmntque(vp, (struct mount *)0);
  857                 }
  858                 return (0);
  859         }
  860 
  861 #ifdef DEBUG
  862         if (busyprt)
  863                 vprint("vflush: busy vnode", vp);
  864 #endif
  865         va->busy++;
  866         return (0);
  867 }
  868 
  869 int
  870 vflush(struct mount *mp, struct vnode *skipvp, int flags)
  871 {
  872         struct vflush_args va;
  873         va.skipvp = skipvp;
  874         va.busy = 0;
  875         va.flags = flags;
  876 
  877         vfs_mount_foreach_vnode(mp, vflush_vnode, &va);
  878 
  879         if (va.busy)
  880                 return (EBUSY);
  881         return (0);
  882 }
  883 
  884 /*
  885  * Disassociate the underlying file system from a vnode.
  886  */
  887 void
  888 vclean(struct vnode *vp, int flags, struct proc *p)
  889 {
  890         int active;
  891 
  892         /*
  893          * Check to see if the vnode is in use.
  894          * If so we have to reference it before we clean it out
  895          * so that its count cannot fall to zero and generate a
  896          * race against ourselves to recycle it.
  897          */
  898         if ((active = vp->v_usecount) != 0)
  899                 vp->v_usecount++;
  900 
  901         /*
  902          * Prevent the vnode from being recycled or
  903          * brought into use while we clean it out.
  904          */
  905         if (vp->v_flag & VXLOCK)
  906                 panic("vclean: deadlock");
  907         vp->v_flag |= VXLOCK;
  908         /*
  909          * Even if the count is zero, the VOP_INACTIVE routine may still
  910          * have the object locked while it cleans it out. The VOP_LOCK
  911          * ensures that the VOP_INACTIVE routine is done with its work.
  912          * For active vnodes, it ensures that no other activity can
  913          * occur while the underlying object is being cleaned out.
  914          */
  915         VOP_LOCK(vp, LK_DRAIN, p);
  916 
  917         /*
  918          * Clean out any VM data associated with the vnode.
  919          */
  920         uvm_vnp_terminate(vp);
  921         /*
  922          * Clean out any buffers associated with the vnode.
  923          */
  924         if (flags & DOCLOSE)
  925                 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
  926         /*
  927          * If purging an active vnode, it must be closed and
  928          * deactivated before being reclaimed. Note that the
  929          * VOP_INACTIVE will unlock the vnode
  930          */
  931         if (active) {
  932                 if (flags & DOCLOSE)
  933                         VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
  934                 VOP_INACTIVE(vp, p);
  935         } else {
  936                 /*
  937                  * Any other processes trying to obtain this lock must first
  938                  * wait for VXLOCK to clear, then call the new lock operation.
  939                  */
  940                 VOP_UNLOCK(vp, 0, p);
  941         }
  942 
  943         /*
  944          * Reclaim the vnode.
  945          */
  946         if (VOP_RECLAIM(vp, p))
  947                 panic("vclean: cannot reclaim");
  948         if (active) {
  949                 vp->v_usecount--;
  950                 if (vp->v_usecount == 0) {
  951                         if (vp->v_holdcnt > 0)
  952                                 panic("vclean: not clean");
  953                         vputonfreelist(vp);
  954                 }
  955         }
  956         cache_purge(vp);
  957 
  958         /*
  959          * Done with purge, notify sleepers of the grim news.
  960          */
  961         vp->v_op = dead_vnodeop_p;
  962         VN_KNOTE(vp, NOTE_REVOKE);
  963         vp->v_tag = VT_NON;
  964         vp->v_flag &= ~VXLOCK;
  965 #ifdef VFSDEBUG
  966         vp->v_flag &= ~VLOCKSWORK;
  967 #endif
  968         if (vp->v_flag & VXWANT) {
  969                 vp->v_flag &= ~VXWANT;
  970                 wakeup(vp);
  971         }
  972 }
  973 
  974 /*
  975  * Recycle an unused vnode to the front of the free list.
  976  */
  977 int
  978 vrecycle(struct vnode *vp, struct proc *p)
  979 {
  980         if (vp->v_usecount == 0) {
  981                 vgonel(vp, p);
  982                 return (1);
  983         }
  984         return (0);
  985 }
  986 
  987 /*
  988  * Eliminate all activity associated with a vnode
  989  * in preparation for reuse.
  990  */
  991 void
  992 vgone(struct vnode *vp)
  993 {
  994         struct proc *p = curproc;
  995         vgonel(vp, p);
  996 }
  997 
  998 /*
  999  * vgone, with struct proc.
 1000  */
 1001 void
 1002 vgonel(struct vnode *vp, struct proc *p)
 1003 {
 1004         struct vnode *vq;
 1005         struct vnode *vx;
 1006 
 1007         /*
 1008          * If a vgone (or vclean) is already in progress,
 1009          * wait until it is done and return.
 1010          */
 1011         if (vp->v_flag & VXLOCK) {
 1012                 vp->v_flag |= VXWANT;
 1013                 tsleep(vp, PINOD, "vgone", 0);
 1014                 return;
 1015         }
 1016 
 1017         /*
 1018          * Clean out the filesystem specific data.
 1019          */
 1020         vclean(vp, DOCLOSE, p);
 1021         /*
 1022          * Delete from old mount point vnode list, if on one.
 1023          */
 1024         if (vp->v_mount != NULL)
 1025                 insmntque(vp, (struct mount *)0);
 1026         /*
 1027          * If special device, remove it from special device alias list
 1028          * if it is on one.
 1029          */
 1030         if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
 1031                 if (*vp->v_hashchain == vp) {
 1032                         *vp->v_hashchain = vp->v_specnext;
 1033                 } else {
 1034                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1035                                 if (vq->v_specnext != vp)
 1036                                         continue;
 1037                                 vq->v_specnext = vp->v_specnext;
 1038                                 break;
 1039                         }
 1040                         if (vq == NULL)
 1041                                 panic("missing bdev");
 1042                 }
 1043                 if (vp->v_flag & VALIASED) {
 1044                         vx = NULL;
 1045                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1046                                 if (vq->v_rdev != vp->v_rdev ||
 1047                                     vq->v_type != vp->v_type)
 1048                                         continue;
 1049                                 if (vx)
 1050                                         break;
 1051                                 vx = vq;
 1052                         }
 1053                         if (vx == NULL)
 1054                                 panic("missing alias");
 1055                         if (vq == NULL)
 1056                                 vx->v_flag &= ~VALIASED;
 1057                         vp->v_flag &= ~VALIASED;
 1058                 }
 1059                 free(vp->v_specinfo, M_VNODE);
 1060                 vp->v_specinfo = NULL;
 1061         }
 1062         /*
 1063          * If it is on the freelist and not already at the head,
 1064          * move it to the head of the list.
 1065          */
 1066         vp->v_type = VBAD;
 1067 
 1068         /*
 1069          * Move onto the free list, unless we were called from
 1070          * getnewvnode and we're not on any free list
 1071          */
 1072         if (vp->v_usecount == 0 &&
 1073             (vp->v_bioflag & VBIOONFREELIST)) {
 1074                 int s;
 1075 
 1076                 s = splbio();
 1077 
 1078                 if (vp->v_holdcnt > 0)
 1079                         panic("vgonel: not clean");
 1080 
 1081                 if (TAILQ_FIRST(&vnode_free_list) != vp) {
 1082                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 1083                         TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 1084                 }
 1085                 splx(s);
 1086         }
 1087 }
 1088 
 1089 /*
 1090  * Lookup a vnode by device number.
 1091  */
 1092 int
 1093 vfinddev(dev_t dev, enum vtype type, struct vnode **vpp)
 1094 {
 1095         struct vnode *vp;
 1096         int rc =0;
 1097 
 1098         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
 1099                 if (dev != vp->v_rdev || type != vp->v_type)
 1100                         continue;
 1101                 *vpp = vp;
 1102                 rc = 1;
 1103                 break;
 1104         }
 1105         return (rc);
 1106 }
 1107 
 1108 /*
 1109  * Revoke all the vnodes corresponding to the specified minor number
 1110  * range (endpoints inclusive) of the specified major.
 1111  */
 1112 void
 1113 vdevgone(int maj, int minl, int minh, enum vtype type)
 1114 {
 1115         struct vnode *vp;
 1116         int mn;
 1117 
 1118         for (mn = minl; mn <= minh; mn++)
 1119                 if (vfinddev(makedev(maj, mn), type, &vp))
 1120                         VOP_REVOKE(vp, REVOKEALL);
 1121 }
 1122 
 1123 /*
 1124  * Calculate the total number of references to a special device.
 1125  */
 1126 int
 1127 vcount(struct vnode *vp)
 1128 {
 1129         struct vnode *vq, *vnext;
 1130         int count;
 1131 
 1132 loop:
 1133         if ((vp->v_flag & VALIASED) == 0)
 1134                 return (vp->v_usecount);
 1135         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
 1136                 vnext = vq->v_specnext;
 1137                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
 1138                         continue;
 1139                 /*
 1140                  * Alias, but not in use, so flush it out.
 1141                  */
 1142                 if (vq->v_usecount == 0 && vq != vp) {
 1143                         vgone(vq);
 1144                         goto loop;
 1145                 }
 1146                 count += vq->v_usecount;
 1147         }
 1148         return (count);
 1149 }
 1150 
 1151 #if defined(DEBUG) || defined(DIAGNOSTIC)
 1152 /*
 1153  * Print out a description of a vnode.
 1154  */
 1155 static char *typename[] =
 1156    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
 1157 
 1158 void
 1159 vprint(char *label, struct vnode *vp)
 1160 {
 1161         char buf[64];
 1162 
 1163         if (label != NULL)
 1164                 printf("%s: ", label);
 1165         printf("%p, type %s, use %u, write %u, hold %u,",
 1166                 vp, typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 1167                 vp->v_holdcnt);
 1168         buf[0] = '\0';
 1169         if (vp->v_flag & VROOT)
 1170                 strlcat(buf, "|VROOT", sizeof buf);
 1171         if (vp->v_flag & VTEXT)
 1172                 strlcat(buf, "|VTEXT", sizeof buf);
 1173         if (vp->v_flag & VSYSTEM)
 1174                 strlcat(buf, "|VSYSTEM", sizeof buf);
 1175         if (vp->v_flag & VXLOCK)
 1176                 strlcat(buf, "|VXLOCK", sizeof buf);
 1177         if (vp->v_flag & VXWANT)
 1178                 strlcat(buf, "|VXWANT", sizeof buf);
 1179         if (vp->v_bioflag & VBIOWAIT)
 1180                 strlcat(buf, "|VBIOWAIT", sizeof buf);
 1181         if (vp->v_bioflag & VBIOONFREELIST)
 1182                 strlcat(buf, "|VBIOONFREELIST", sizeof buf);
 1183         if (vp->v_bioflag & VBIOONSYNCLIST)
 1184                 strlcat(buf, "|VBIOONSYNCLIST", sizeof buf);
 1185         if (vp->v_flag & VALIASED)
 1186                 strlcat(buf, "|VALIASED", sizeof buf);
 1187         if (buf[0] != '\0')
 1188                 printf(" flags (%s)", &buf[1]);
 1189         if (vp->v_data == NULL) {
 1190                 printf("\n");
 1191         } else {
 1192                 printf("\n\t");
 1193                 VOP_PRINT(vp);
 1194         }
 1195 }
 1196 #endif /* DEBUG || DIAGNOSTIC */
 1197 
 1198 #ifdef DEBUG
 1199 /*
 1200  * List all of the locked vnodes in the system.
 1201  * Called when debugging the kernel.
 1202  */
 1203 void
 1204 printlockedvnodes(void)
 1205 {
 1206         struct mount *mp, *nmp;
 1207         struct vnode *vp;
 1208 
 1209         printf("Locked vnodes\n");
 1210 
 1211         for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
 1212             mp = nmp) {
 1213                 if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
 1214                         nmp = CIRCLEQ_NEXT(mp, mnt_list);
 1215                         continue;
 1216                 }
 1217                 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 1218                         if (VOP_ISLOCKED(vp))
 1219                                 vprint((char *)0, vp);
 1220                 }
 1221                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
 1222                 vfs_unbusy(mp);
 1223         }
 1224 
 1225 }
 1226 #endif
 1227 
 1228 /*
 1229  * Top level filesystem related information gathering.
 1230  */
 1231 int
 1232 vfs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
 1233     size_t newlen, struct proc *p)
 1234 {
 1235         struct vfsconf *vfsp, *tmpvfsp;
 1236         int ret;
 1237 
 1238         /* all sysctl names at this level are at least name and field */
 1239         if (namelen < 2)
 1240                 return (ENOTDIR);               /* overloaded */
 1241 
 1242         if (name[0] != VFS_GENERIC) {
 1243                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 1244                         if (vfsp->vfc_typenum == name[0])
 1245                                 break;
 1246 
 1247                 if (vfsp == NULL)
 1248                         return (EOPNOTSUPP);
 1249 
 1250                 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
 1251                     oldp, oldlenp, newp, newlen, p));
 1252         }
 1253 
 1254         switch (name[1]) {
 1255         case VFS_MAXTYPENUM:
 1256                 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
 1257 
 1258         case VFS_CONF:
 1259                 if (namelen < 3)
 1260                         return (ENOTDIR);       /* overloaded */
 1261 
 1262                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 1263                         if (vfsp->vfc_typenum == name[2])
 1264                                 break;
 1265 
 1266                 if (vfsp == NULL)
 1267                         return (EOPNOTSUPP);
 1268 
 1269                 /* Make a copy, clear out kernel pointers */
 1270                 tmpvfsp = malloc(sizeof(*tmpvfsp), M_TEMP, M_WAITOK);
 1271                 bcopy(vfsp, tmpvfsp, sizeof(*tmpvfsp));
 1272                 tmpvfsp->vfc_vfsops = NULL;
 1273                 tmpvfsp->vfc_next = NULL;
 1274 
 1275                 ret = sysctl_rdstruct(oldp, oldlenp, newp, tmpvfsp,
 1276                     sizeof(struct vfsconf));
 1277 
 1278                 free(tmpvfsp, M_TEMP);
 1279                 return (ret);
 1280         case VFS_BCACHESTAT:    /* buffer cache statistics */
 1281                 ret = sysctl_rdstruct(oldp, oldlenp, newp, &bcstats,
 1282                     sizeof(struct bcachestats));
 1283                 return(ret);
 1284         }
 1285         return (EOPNOTSUPP);
 1286 }
 1287 
 1288 int kinfo_vdebug = 1;
 1289 #define KINFO_VNODESLOP 10
 1290 /*
 1291  * Dump vnode list (via sysctl).
 1292  * Copyout address of vnode followed by vnode.
 1293  */
 1294 /* ARGSUSED */
 1295 int
 1296 sysctl_vnode(char *where, size_t *sizep, struct proc *p)
 1297 {
 1298         struct mount *mp, *nmp;
 1299         struct vnode *vp, *nvp;
 1300         char *bp = where, *savebp;
 1301         char *ewhere;
 1302         int error;
 1303 
 1304         if (where == NULL) {
 1305                 *sizep = (numvnodes + KINFO_VNODESLOP) * sizeof(struct e_vnode);
 1306                 return (0);
 1307         }
 1308         ewhere = where + *sizep;
 1309 
 1310         for (mp = CIRCLEQ_FIRST(&mountlist); mp != CIRCLEQ_END(&mountlist);
 1311             mp = nmp) {
 1312                 if (vfs_busy(mp, VB_READ|VB_NOWAIT)) {
 1313                         nmp = CIRCLEQ_NEXT(mp, mnt_list);
 1314                         continue;
 1315                 }
 1316                 savebp = bp;
 1317 again:
 1318                 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL;
 1319                     vp = nvp) {
 1320                         /*
 1321                          * Check that the vp is still associated with
 1322                          * this filesystem.  RACE: could have been
 1323                          * recycled onto the same filesystem.
 1324                          */
 1325                         if (vp->v_mount != mp) {
 1326                                 if (kinfo_vdebug)
 1327                                         printf("kinfo: vp changed\n");
 1328                                 bp = savebp;
 1329                                 goto again;
 1330                         }
 1331                         nvp = LIST_NEXT(vp, v_mntvnodes);
 1332                         if (bp + sizeof(struct e_vnode) > ewhere) {
 1333                                 *sizep = bp - where;
 1334                                 vfs_unbusy(mp);
 1335                                 return (ENOMEM);
 1336                         }
 1337                         if ((error = copyout(&vp,
 1338                             &((struct e_vnode *)bp)->vptr,
 1339                             sizeof(struct vnode *))) ||
 1340                            (error = copyout(vp,
 1341                             &((struct e_vnode *)bp)->vnode,
 1342                             sizeof(struct vnode)))) {
 1343                                 vfs_unbusy(mp);
 1344                                 return (error);
 1345                         }
 1346                         bp += sizeof(struct e_vnode);
 1347                 }
 1348 
 1349                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
 1350                 vfs_unbusy(mp);
 1351         }
 1352 
 1353         *sizep = bp - where;
 1354 
 1355         return (0);
 1356 }
 1357 
 1358 /*
 1359  * Check to see if a filesystem is mounted on a block device.
 1360  */
 1361 int
 1362 vfs_mountedon(struct vnode *vp)
 1363 {
 1364         struct vnode *vq;
 1365         int error = 0;
 1366 
 1367         if (vp->v_specmountpoint != NULL)
 1368                 return (EBUSY);
 1369         if (vp->v_flag & VALIASED) {
 1370                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1371                         if (vq->v_rdev != vp->v_rdev ||
 1372                             vq->v_type != vp->v_type)
 1373                                 continue;
 1374                         if (vq->v_specmountpoint != NULL) {
 1375                                 error = EBUSY;
 1376                                 break;
 1377                         }
 1378                 }
 1379         }
 1380         return (error);
 1381 }
 1382 
 1383 /*
 1384  * Build hash lists of net addresses and hang them off the mount point.
 1385  * Called by ufs_mount() to set up the lists of export addresses.
 1386  */
 1387 int
 1388 vfs_hang_addrlist(struct mount *mp, struct netexport *nep,
 1389     struct export_args *argp)
 1390 {
 1391         struct netcred *np;
 1392         struct radix_node_head *rnh;
 1393         int i;
 1394         struct radix_node *rn;
 1395         struct sockaddr *saddr, *smask = 0;
 1396         struct domain *dom;
 1397         int error;
 1398 
 1399         if (argp->ex_addrlen == 0) {
 1400                 if (mp->mnt_flag & MNT_DEFEXPORTED)
 1401                         return (EPERM);
 1402                 np = &nep->ne_defexported;
 1403                 np->netc_exflags = argp->ex_flags;
 1404                 np->netc_anon = argp->ex_anon;
 1405                 np->netc_anon.cr_ref = 1;
 1406                 mp->mnt_flag |= MNT_DEFEXPORTED;
 1407                 return (0);
 1408         }
 1409         if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN ||
 1410             argp->ex_addrlen < 0 || argp->ex_masklen < 0)
 1411                 return (EINVAL);
 1412         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 1413         np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK|M_ZERO);
 1414         saddr = (struct sockaddr *)(np + 1);
 1415         error = copyin(argp->ex_addr, saddr, argp->ex_addrlen);
 1416         if (error)
 1417                 goto out;
 1418         if (saddr->sa_len > argp->ex_addrlen)
 1419                 saddr->sa_len = argp->ex_addrlen;
 1420         if (argp->ex_masklen) {
 1421                 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
 1422                 error = copyin(argp->ex_mask, smask, argp->ex_masklen);
 1423                 if (error)
 1424                         goto out;
 1425                 if (smask->sa_len > argp->ex_masklen)
 1426                         smask->sa_len = argp->ex_masklen;
 1427         }
 1428         i = saddr->sa_family;
 1429         if (i < 0 || i > AF_MAX) {
 1430                 error = EINVAL;
 1431                 goto out;
 1432         }
 1433         if ((rnh = nep->ne_rtable[i]) == 0) {
 1434                 /*
 1435                  * Seems silly to initialize every AF when most are not
 1436                  * used, do so on demand here
 1437                  */
 1438                 for (dom = domains; dom; dom = dom->dom_next)
 1439                         if (dom->dom_family == i && dom->dom_rtattach) {
 1440                                 dom->dom_rtattach((void **)&nep->ne_rtable[i],
 1441                                         dom->dom_rtoffset);
 1442                                 break;
 1443                         }
 1444                 if ((rnh = nep->ne_rtable[i]) == 0) {
 1445                         error = ENOBUFS;
 1446                         goto out;
 1447                 }
 1448         }
 1449         rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
 1450                 np->netc_rnodes, 0);
 1451         if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
 1452                 error = EPERM;
 1453                 goto out;
 1454         }
 1455         np->netc_exflags = argp->ex_flags;
 1456         np->netc_anon = argp->ex_anon;
 1457         np->netc_anon.cr_ref = 1;
 1458         return (0);
 1459 out:
 1460         free(np, M_NETADDR);
 1461         return (error);
 1462 }
 1463 
 1464 /* ARGSUSED */
 1465 int
 1466 vfs_free_netcred(struct radix_node *rn, void *w)
 1467 {
 1468         struct radix_node_head *rnh = (struct radix_node_head *)w;
 1469 
 1470         (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh, NULL);
 1471         free(rn, M_NETADDR);
 1472         return (0);
 1473 }
 1474 
 1475 /*
 1476  * Free the net address hash lists that are hanging off the mount points.
 1477  */
 1478 void
 1479 vfs_free_addrlist(struct netexport *nep)
 1480 {
 1481         int i;
 1482         struct radix_node_head *rnh;
 1483 
 1484         for (i = 0; i <= AF_MAX; i++)
 1485                 if ((rnh = nep->ne_rtable[i]) != NULL) {
 1486                         (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
 1487                         free(rnh, M_RTABLE);
 1488                         nep->ne_rtable[i] = 0;
 1489                 }
 1490 }
 1491 
 1492 int
 1493 vfs_export(struct mount *mp, struct netexport *nep, struct export_args *argp)
 1494 {
 1495         int error;
 1496 
 1497         if (argp->ex_flags & MNT_DELEXPORT) {
 1498                 vfs_free_addrlist(nep);
 1499                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 1500         }
 1501         if (argp->ex_flags & MNT_EXPORTED) {
 1502                 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
 1503                         return (error);
 1504                 mp->mnt_flag |= MNT_EXPORTED;
 1505         }
 1506         return (0);
 1507 }
 1508 
 1509 struct netcred *
 1510 vfs_export_lookup(struct mount *mp, struct netexport *nep, struct mbuf *nam)
 1511 {
 1512         struct netcred *np;
 1513         struct radix_node_head *rnh;
 1514         struct sockaddr *saddr;
 1515 
 1516         np = NULL;
 1517         if (mp->mnt_flag & MNT_EXPORTED) {
 1518                 /*
 1519                  * Lookup in the export list first.
 1520                  */
 1521                 if (nam != NULL) {
 1522                         saddr = mtod(nam, struct sockaddr *);
 1523                         rnh = nep->ne_rtable[saddr->sa_family];
 1524                         if (rnh != NULL) {
 1525                                 np = (struct netcred *)
 1526                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
 1527                                             rnh);
 1528                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 1529                                         np = NULL;
 1530                         }
 1531                 }
 1532                 /*
 1533                  * If no address match, use the default if it exists.
 1534                  */
 1535                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 1536                         np = &nep->ne_defexported;
 1537         }
 1538         return (np);
 1539 }
 1540 
 1541 /*
 1542  * Do the usual access checking.
 1543  * file_mode, uid and gid are from the vnode in question,
 1544  * while acc_mode and cred are from the VOP_ACCESS parameter list
 1545  */
 1546 int
 1547 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
 1548     mode_t acc_mode, struct ucred *cred)
 1549 {
 1550         mode_t mask;
 1551 
 1552         /* User id 0 always gets read/write access. */
 1553         if (cred->cr_uid == 0) {
 1554                 /* For VEXEC, at least one of the execute bits must be set. */
 1555                 if ((acc_mode & VEXEC) && type != VDIR &&
 1556                     (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
 1557                         return EACCES;
 1558                 return 0;
 1559         }
 1560 
 1561         mask = 0;
 1562 
 1563         /* Otherwise, check the owner. */
 1564         if (cred->cr_uid == uid) {
 1565                 if (acc_mode & VEXEC)
 1566                         mask |= S_IXUSR;
 1567                 if (acc_mode & VREAD)
 1568                         mask |= S_IRUSR;
 1569                 if (acc_mode & VWRITE)
 1570                         mask |= S_IWUSR;
 1571                 return (file_mode & mask) == mask ? 0 : EACCES;
 1572         }
 1573 
 1574         /* Otherwise, check the groups. */
 1575         if (cred->cr_gid == gid || groupmember(gid, cred)) {
 1576                 if (acc_mode & VEXEC)
 1577                         mask |= S_IXGRP;
 1578                 if (acc_mode & VREAD)
 1579                         mask |= S_IRGRP;
 1580                 if (acc_mode & VWRITE)
 1581                         mask |= S_IWGRP;
 1582                 return (file_mode & mask) == mask ? 0 : EACCES;
 1583         }
 1584 
 1585         /* Otherwise, check everyone else. */
 1586         if (acc_mode & VEXEC)
 1587                 mask |= S_IXOTH;
 1588         if (acc_mode & VREAD)
 1589                 mask |= S_IROTH;
 1590         if (acc_mode & VWRITE)
 1591                 mask |= S_IWOTH;
 1592         return (file_mode & mask) == mask ? 0 : EACCES;
 1593 }
 1594 
 1595 /*
 1596  * Unmount all file systems.
 1597  * We traverse the list in reverse order under the assumption that doing so
 1598  * will avoid needing to worry about dependencies.
 1599  */
 1600 void
 1601 vfs_unmountall(void)
 1602 {
 1603         struct mount *mp, *nmp;
 1604         int allerror, error, again = 1;
 1605 
 1606  retry:
 1607         allerror = 0;
 1608         for (mp = CIRCLEQ_LAST(&mountlist); mp != CIRCLEQ_END(&mountlist);
 1609             mp = nmp) {
 1610                 nmp = CIRCLEQ_PREV(mp, mnt_list);
 1611                 if ((vfs_busy(mp, VB_WRITE|VB_NOWAIT)) != 0)
 1612                         continue;
 1613                 if ((error = dounmount(mp, MNT_FORCE, curproc, NULL)) != 0) {
 1614                         printf("unmount of %s failed with error %d\n",
 1615                             mp->mnt_stat.f_mntonname, error);
 1616                         allerror = 1;
 1617                 }
 1618         }
 1619 
 1620         if (allerror) {
 1621                 printf("WARNING: some file systems would not unmount\n");
 1622                 if (again) {
 1623                         printf("retrying\n");
 1624                         again = 0;
 1625                         goto retry;
 1626                 }
 1627         }
 1628 }
 1629 
 1630 /*
 1631  * Sync and unmount file systems before shutting down.
 1632  */
 1633 void
 1634 vfs_shutdown(void)
 1635 {
 1636 #ifdef ACCOUNTING
 1637         extern void acct_shutdown(void);
 1638 
 1639         acct_shutdown();
 1640 #endif
 1641 
 1642         /* XXX Should suspend scheduling. */
 1643         (void) spl0();
 1644 
 1645         printf("syncing disks... ");
 1646 
 1647         if (panicstr == 0) {
 1648                 /* Sync before unmount, in case we hang on something. */
 1649                 sys_sync(&proc0, (void *)0, (register_t *)0);
 1650 
 1651                 /* Unmount file systems. */
 1652                 vfs_unmountall();
 1653         }
 1654 
 1655         if (vfs_syncwait(1))
 1656                 printf("giving up\n");
 1657         else
 1658                 printf("done\n");
 1659 }
 1660 
 1661 /*
 1662  * perform sync() operation and wait for buffers to flush.
 1663  * assumptions: called w/ scheduler disabled and physical io enabled
 1664  * for now called at spl0() XXX
 1665  */
 1666 int
 1667 vfs_syncwait(int verbose)
 1668 {
 1669         struct buf *bp;
 1670         int iter, nbusy, dcount, s;
 1671         struct proc *p;
 1672 
 1673         p = curproc? curproc : &proc0;
 1674         sys_sync(p, (void *)0, (register_t *)0);
 1675 
 1676         /* Wait for sync to finish. */
 1677         dcount = 10000;
 1678         for (iter = 0; iter < 20; iter++) {
 1679                 nbusy = 0;
 1680                 LIST_FOREACH(bp, &bufhead, b_list) {
 1681                         if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY)
 1682                                 nbusy++;
 1683                         /*
 1684                          * With soft updates, some buffers that are
 1685                          * written will be remarked as dirty until other
 1686                          * buffers are written.
 1687                          */
 1688                         if (bp->b_flags & B_DELWRI) {
 1689                                 s = splbio();
 1690                                 bremfree(bp);
 1691                                 buf_acquire(bp);
 1692                                 splx(s);
 1693                                 nbusy++;
 1694                                 bawrite(bp);
 1695                                 if (dcount-- <= 0) {
 1696                                         if (verbose)
 1697                                                 printf("softdep ");
 1698                                         return 1;
 1699                                 }
 1700                         }
 1701                 }
 1702                 if (nbusy == 0)
 1703                         break;
 1704                 if (verbose)
 1705                         printf("%d ", nbusy);
 1706                 DELAY(40000 * iter);
 1707         }
 1708 
 1709         return nbusy;
 1710 }
 1711 
 1712 /*
 1713  * posix file system related system variables.
 1714  */
 1715 int
 1716 fs_posix_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
 1717     void *newp, size_t newlen, struct proc *p)
 1718 {
 1719         /* all sysctl names at this level are terminal */
 1720         if (namelen != 1)
 1721                 return (ENOTDIR);
 1722 
 1723         switch (name[0]) {
 1724         case FS_POSIX_SETUID:
 1725                 if (newp && securelevel > 0)
 1726                         return (EPERM);
 1727                 return(sysctl_int(oldp, oldlenp, newp, newlen, &suid_clear));
 1728         default:
 1729                 return (EOPNOTSUPP);
 1730         }
 1731         /* NOTREACHED */
 1732 }
 1733 
 1734 /*
 1735  * file system related system variables.
 1736  */
 1737 int
 1738 fs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
 1739     size_t newlen, struct proc *p)
 1740 {
 1741         sysctlfn *fn;
 1742 
 1743         switch (name[0]) {
 1744         case FS_POSIX:
 1745                 fn = fs_posix_sysctl;
 1746                 break;
 1747         default:
 1748                 return (EOPNOTSUPP);
 1749         }
 1750         return (*fn)(name + 1, namelen - 1, oldp, oldlenp, newp, newlen, p);
 1751 }
 1752 
 1753 
 1754 /*
 1755  * Routines dealing with vnodes and buffers
 1756  */
 1757 
 1758 /*
 1759  * Wait for all outstanding I/Os to complete
 1760  *
 1761  * Manipulates v_numoutput. Must be called at splbio()
 1762  */
 1763 int
 1764 vwaitforio(struct vnode *vp, int slpflag, char *wmesg, int timeo)
 1765 {
 1766         int error = 0;
 1767 
 1768         splassert(IPL_BIO);
 1769 
 1770         while (vp->v_numoutput) {
 1771                 vp->v_bioflag |= VBIOWAIT;
 1772                 error = tsleep(&vp->v_numoutput,
 1773                     slpflag | (PRIBIO + 1), wmesg, timeo);
 1774                 if (error)
 1775                         break;
 1776         }
 1777 
 1778         return (error);
 1779 }
 1780 
 1781 /*
 1782  * Update outstanding I/O count and do wakeup if requested.
 1783  *
 1784  * Manipulates v_numoutput. Must be called at splbio()
 1785  */
 1786 void
 1787 vwakeup(struct vnode *vp)
 1788 {
 1789         splassert(IPL_BIO);
 1790 
 1791         if (vp != NULL) {
 1792                 if (vp->v_numoutput-- == 0)
 1793                         panic("vwakeup: neg numoutput");
 1794                 if ((vp->v_bioflag & VBIOWAIT) && vp->v_numoutput == 0) {
 1795                         vp->v_bioflag &= ~VBIOWAIT;
 1796                         wakeup(&vp->v_numoutput);
 1797                 }
 1798         }
 1799 }
 1800 
 1801 /*
 1802  * Flush out and invalidate all buffers associated with a vnode.
 1803  * Called with the underlying object locked.
 1804  */
 1805 int
 1806 vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct proc *p,
 1807     int slpflag, int slptimeo)
 1808 {
 1809         struct buf *bp;
 1810         struct buf *nbp, *blist;
 1811         int s, error;
 1812 
 1813 #ifdef VFSDEBUG
 1814         if ((vp->v_flag & VLOCKSWORK) && !VOP_ISLOCKED(vp))
 1815                 panic("vinvalbuf(): vp isn't locked");
 1816 #endif
 1817 
 1818         if (flags & V_SAVE) {
 1819                 s = splbio();
 1820                 vwaitforio(vp, 0, "vinvalbuf", 0);
 1821                 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
 1822                         splx(s);
 1823                         if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
 1824                                 return (error);
 1825                         s = splbio();
 1826                         if (vp->v_numoutput > 0 ||
 1827                             !LIST_EMPTY(&vp->v_dirtyblkhd))
 1828                                 panic("vinvalbuf: dirty bufs");
 1829                 }
 1830                 splx(s);
 1831         }
 1832 loop:
 1833         s = splbio();
 1834         for (;;) {
 1835                 if ((blist = LIST_FIRST(&vp->v_cleanblkhd)) &&
 1836                     (flags & V_SAVEMETA))
 1837                         while (blist && blist->b_lblkno < 0)
 1838                                 blist = LIST_NEXT(blist, b_vnbufs);
 1839                 if (blist == NULL &&
 1840                     (blist = LIST_FIRST(&vp->v_dirtyblkhd)) &&
 1841                     (flags & V_SAVEMETA))
 1842                         while (blist && blist->b_lblkno < 0)
 1843                                 blist = LIST_NEXT(blist, b_vnbufs);
 1844                 if (!blist)
 1845                         break;
 1846 
 1847                 for (bp = blist; bp; bp = nbp) {
 1848                         nbp = LIST_NEXT(bp, b_vnbufs);
 1849                         if (flags & V_SAVEMETA && bp->b_lblkno < 0)
 1850                                 continue;
 1851                         if (bp->b_flags & B_BUSY) {
 1852                                 bp->b_flags |= B_WANTED;
 1853                                 error = tsleep(bp, slpflag | (PRIBIO + 1),
 1854                                     "vinvalbuf", slptimeo);
 1855                                 if (error) {
 1856                                         splx(s);
 1857                                         return (error);
 1858                                 }
 1859                                 break;
 1860                         }
 1861                         bremfree(bp);
 1862                         buf_acquire(bp);
 1863                         /*
 1864                          * XXX Since there are no node locks for NFS, I believe
 1865                          * there is a slight chance that a delayed write will
 1866                          * occur while sleeping just above, so check for it.
 1867                          */
 1868                         if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
 1869                                 splx(s);
 1870                                 (void) VOP_BWRITE(bp);
 1871                                 goto loop;
 1872                         }
 1873                         bp->b_flags |= B_INVAL;
 1874                         brelse(bp);
 1875                 }
 1876         }
 1877         if (!(flags & V_SAVEMETA) &&
 1878             (!LIST_EMPTY(&vp->v_dirtyblkhd) || !LIST_EMPTY(&vp->v_cleanblkhd)))
 1879                 panic("vinvalbuf: flush failed");
 1880         splx(s);
 1881         return (0);
 1882 }
 1883 
 1884 void
 1885 vflushbuf(struct vnode *vp, int sync)
 1886 {
 1887         struct buf *bp, *nbp;
 1888         int s;
 1889 
 1890 loop:
 1891         s = splbio();
 1892         for (bp = LIST_FIRST(&vp->v_dirtyblkhd);
 1893             bp != LIST_END(&vp->v_dirtyblkhd); bp = nbp) {
 1894                 nbp = LIST_NEXT(bp, b_vnbufs);
 1895                 if ((bp->b_flags & B_BUSY))
 1896                         continue;
 1897                 if ((bp->b_flags & B_DELWRI) == 0)
 1898                         panic("vflushbuf: not dirty");
 1899                 bremfree(bp);
 1900                 buf_acquire(bp);
 1901                 splx(s);
 1902                 /*
 1903                  * Wait for I/O associated with indirect blocks to complete,
 1904                  * since there is no way to quickly wait for them below.
 1905                  */
 1906                 if (bp->b_vp == vp || sync == 0)
 1907                         (void) bawrite(bp);
 1908                 else
 1909                         (void) bwrite(bp);
 1910                 goto loop;
 1911         }
 1912         if (sync == 0) {
 1913                 splx(s);
 1914                 return;
 1915         }
 1916         vwaitforio(vp, 0, "vflushbuf", 0);
 1917         if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
 1918                 splx(s);
 1919 #ifdef DIAGNOSTIC
 1920                 vprint("vflushbuf: dirty", vp);
 1921 #endif
 1922                 goto loop;
 1923         }
 1924         splx(s);
 1925 }
 1926 
 1927 /*
 1928  * Associate a buffer with a vnode.
 1929  *
 1930  * Manipulates buffer vnode queues. Must be called at splbio().
 1931  */
 1932 void
 1933 bgetvp(struct vnode *vp, struct buf *bp)
 1934 {
 1935         splassert(IPL_BIO);
 1936 
 1937 
 1938         if (bp->b_vp)
 1939                 panic("bgetvp: not free");
 1940         vhold(vp);
 1941         bp->b_vp = vp;
 1942         if (vp->v_type == VBLK || vp->v_type == VCHR)
 1943                 bp->b_dev = vp->v_rdev;
 1944         else
 1945                 bp->b_dev = NODEV;
 1946         /*
 1947          * Insert onto list for new vnode.
 1948          */
 1949         bufinsvn(bp, &vp->v_cleanblkhd);
 1950 }
 1951 
 1952 /*
 1953  * Disassociate a buffer from a vnode.
 1954  *
 1955  * Manipulates vnode buffer queues. Must be called at splbio().
 1956  */
 1957 void
 1958 brelvp(struct buf *bp)
 1959 {
 1960         struct vnode *vp;
 1961 
 1962         splassert(IPL_BIO);
 1963 
 1964         if ((vp = bp->b_vp) == (struct vnode *) 0)
 1965                 panic("brelvp: NULL");
 1966         /*
 1967          * Delete from old vnode list, if on one.
 1968          */
 1969         if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
 1970                 bufremvn(bp);
 1971         if ((vp->v_bioflag & VBIOONSYNCLIST) &&
 1972             LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
 1973                 vp->v_bioflag &= ~VBIOONSYNCLIST;
 1974                 LIST_REMOVE(vp, v_synclist);
 1975         }
 1976         bp->b_vp = NULL;
 1977 
 1978         vdrop(vp);
 1979 }
 1980 
 1981 /*
 1982  * Replaces the current vnode associated with the buffer, if any,
 1983  * with a new vnode.
 1984  *
 1985  * If an output I/O is pending on the buffer, the old vnode
 1986  * I/O count is adjusted.
 1987  *
 1988  * Ignores vnode buffer queues. Must be called at splbio().
 1989  */
 1990 void
 1991 buf_replacevnode(struct buf *bp, struct vnode *newvp)
 1992 {
 1993         struct vnode *oldvp = bp->b_vp;
 1994 
 1995         splassert(IPL_BIO);
 1996 
 1997         if (oldvp)
 1998                 brelvp(bp);
 1999 
 2000         if ((bp->b_flags & (B_READ | B_DONE)) == 0) {
 2001                 newvp->v_numoutput++;   /* put it on swapdev */
 2002                 vwakeup(oldvp);
 2003         }
 2004 
 2005         bgetvp(newvp, bp);
 2006         bufremvn(bp);
 2007 }
 2008 
 2009 /*
 2010  * Used to assign buffers to the appropriate clean or dirty list on
 2011  * the vnode and to add newly dirty vnodes to the appropriate
 2012  * filesystem syncer list.
 2013  *
 2014  * Manipulates vnode buffer queues. Must be called at splbio().
 2015  */
 2016 void
 2017 reassignbuf(struct buf *bp)
 2018 {
 2019         struct buflists *listheadp;
 2020         int delay;
 2021         struct vnode *vp = bp->b_vp;
 2022 
 2023         splassert(IPL_BIO);
 2024 
 2025         /*
 2026          * Delete from old vnode list, if on one.
 2027          */
 2028         if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
 2029                 bufremvn(bp);
 2030 
 2031         /*
 2032          * If dirty, put on list of dirty buffers;
 2033          * otherwise insert onto list of clean buffers.
 2034          */
 2035         if ((bp->b_flags & B_DELWRI) == 0) {
 2036                 listheadp = &vp->v_cleanblkhd;
 2037                 if ((vp->v_bioflag & VBIOONSYNCLIST) &&
 2038                     LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
 2039                         vp->v_bioflag &= ~VBIOONSYNCLIST;
 2040                         LIST_REMOVE(vp, v_synclist);
 2041                 }
 2042         } else {
 2043                 listheadp = &vp->v_dirtyblkhd;
 2044                 if ((vp->v_bioflag & VBIOONSYNCLIST) == 0) {
 2045                         switch (vp->v_type) {
 2046                         case VDIR:
 2047                                 delay = syncdelay / 2;
 2048                                 break;
 2049                         case VBLK:
 2050                                 if (vp->v_specmountpoint != NULL) {
 2051                                         delay = syncdelay / 3;
 2052                                         break;
 2053                                 }
 2054                                 /* FALLTHROUGH */
 2055                         default:
 2056                                 delay = syncdelay;
 2057                         }
 2058                         vn_syncer_add_to_worklist(vp, delay);
 2059                 }
 2060         }
 2061         bufinsvn(bp, listheadp);
 2062 }
 2063 
 2064 int
 2065 vfs_register(struct vfsconf *vfs)
 2066 {
 2067         struct vfsconf *vfsp;
 2068         struct vfsconf **vfspp;
 2069 
 2070 #ifdef DIAGNOSTIC
 2071         /* Paranoia? */
 2072         if (vfs->vfc_refcount != 0)
 2073                 printf("vfs_register called with vfc_refcount > 0\n");
 2074 #endif
 2075 
 2076         /* Check if filesystem already known */
 2077         for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
 2078             vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next)
 2079                 if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
 2080                         return (EEXIST);
 2081 
 2082         if (vfs->vfc_typenum > maxvfsconf)
 2083                 maxvfsconf = vfs->vfc_typenum;
 2084 
 2085         vfs->vfc_next = NULL;
 2086 
 2087         /* Add to the end of the list */
 2088         *vfspp = vfs;
 2089 
 2090         /* Call vfs_init() */
 2091         if (vfs->vfc_vfsops->vfs_init)
 2092                 (*(vfs->vfc_vfsops->vfs_init))(vfs);
 2093 
 2094         return 0;
 2095 }
 2096 
 2097 int
 2098 vfs_unregister(struct vfsconf *vfs)
 2099 {
 2100         struct vfsconf *vfsp;
 2101         struct vfsconf **vfspp;
 2102         int maxtypenum;
 2103 
 2104         /* Find our vfsconf struct */
 2105         for (vfspp = &vfsconf, vfsp = vfsconf; vfsp;
 2106             vfspp = &vfsp->vfc_next, vfsp = vfsp->vfc_next) {
 2107                 if (strcmp(vfsp->vfc_name, vfs->vfc_name) == 0)
 2108                         break;
 2109         }
 2110 
 2111         if (!vfsp)                      /* Not found */
 2112                 return (ENOENT);
 2113 
 2114         if (vfsp->vfc_refcount)         /* In use */
 2115                 return (EBUSY);
 2116 
 2117         /* Remove from list and free */
 2118         *vfspp = vfsp->vfc_next;
 2119 
 2120         maxtypenum = 0;
 2121 
 2122         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 2123                 if (vfsp->vfc_typenum > maxtypenum)
 2124                         maxtypenum = vfsp->vfc_typenum;
 2125 
 2126         maxvfsconf = maxtypenum;
 2127         return 0;
 2128 }
 2129 
 2130 /*
 2131  * Check if vnode represents a disk device
 2132  */
 2133 int
 2134 vn_isdisk(struct vnode *vp, int *errp)
 2135 {
 2136         if (vp->v_type != VBLK && vp->v_type != VCHR)
 2137                 return (0);
 2138 
 2139         return (1);
 2140 }
 2141 
 2142 #ifdef DDB
 2143 #include <machine/db_machdep.h>
 2144 #include <ddb/db_interface.h>
 2145 #include <ddb/db_output.h>
 2146 
 2147 void
 2148 vfs_buf_print(struct buf *bp, int full, int (*pr)(const char *, ...))
 2149 {
 2150 
 2151         (*pr)("  vp %p lblkno 0x%llx blkno 0x%llx dev 0x%x\n"
 2152               "  proc %p error %d flags %b\n",
 2153             bp->b_vp, (int64_t)bp->b_lblkno, (int64_t)bp->b_blkno, bp->b_dev,
 2154             bp->b_proc, bp->b_error, bp->b_flags, B_BITS);
 2155 
 2156         (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx sync 0x%x\n"
 2157               "  data %p saveaddr %p dep %p iodone %p\n",
 2158             bp->b_bufsize, bp->b_bcount, (long)bp->b_resid, bp->b_synctime,
 2159             bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep), bp->b_iodone);
 2160 
 2161         (*pr)("  dirty {off 0x%x end 0x%x} valid {off 0x%x end 0x%x}\n",
 2162             bp->b_dirtyoff, bp->b_dirtyend, bp->b_validoff, bp->b_validend);
 2163 
 2164 #ifdef FFS_SOFTUPDATES
 2165         if (full)
 2166                 softdep_print(bp, full, pr);
 2167 #endif
 2168 }
 2169 
 2170 const char *vtypes[] = { VTYPE_NAMES };
 2171 const char *vtags[] = { VTAG_NAMES };
 2172 
 2173 void
 2174 vfs_vnode_print(struct vnode *vp, int full, int (*pr)(const char *, ...))
 2175 {
 2176 
 2177 #define NENTS(n)        (sizeof n / sizeof(n[0]))
 2178         (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
 2179               vp->v_tag > NENTS(vtags)? "<unk>":vtags[vp->v_tag], vp->v_tag,
 2180               vp->v_type > NENTS(vtypes)? "<unk>":vtypes[vp->v_type],
 2181               vp->v_type, vp->v_mount, vp->v_mountedhere);
 2182 
 2183         (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
 2184               vp->v_data, vp->v_usecount, vp->v_writecount,
 2185               vp->v_holdcnt, vp->v_numoutput);
 2186 
 2187         /* uvm_object_printit(&vp->v_uobj, full, pr); */
 2188 
 2189         if (full) {
 2190                 struct buf *bp;
 2191 
 2192                 (*pr)("clean bufs:\n");
 2193                 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
 2194                         (*pr)(" bp %p\n", bp);
 2195                         vfs_buf_print(bp, full, pr);
 2196                 }
 2197 
 2198                 (*pr)("dirty bufs:\n");
 2199                 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
 2200                         (*pr)(" bp %p\n", bp);
 2201                         vfs_buf_print(bp, full, pr);
 2202                 }
 2203         }
 2204 }
 2205 
 2206 void
 2207 vfs_mount_print(struct mount *mp, int full, int (*pr)(const char *, ...))
 2208 {
 2209         struct vfsconf *vfc = mp->mnt_vfc;
 2210         struct vnode *vp;
 2211         int cnt = 0;
 2212 
 2213         (*pr)("flags %b\nvnodecovered %p syncer %p data %p\n",
 2214             mp->mnt_flag, MNT_BITS,
 2215             mp->mnt_vnodecovered, mp->mnt_syncer, mp->mnt_data);
 2216 
 2217         (*pr)("vfsconf: ops %p name \"%s\" num %d ref %d flags 0x%x\n",
 2218             vfc->vfc_vfsops, vfc->vfc_name, vfc->vfc_typenum,
 2219             vfc->vfc_refcount, vfc->vfc_flags);
 2220 
 2221         (*pr)("statvfs cache: bsize %x iosize %x\nblocks %llu free %llu avail %lld\n",
 2222             mp->mnt_stat.f_bsize, mp->mnt_stat.f_iosize, mp->mnt_stat.f_blocks,
 2223             mp->mnt_stat.f_bfree, mp->mnt_stat.f_bavail);
 2224 
 2225         (*pr)("  files %llu ffiles %llu favail $lld\n", mp->mnt_stat.f_files,
 2226             mp->mnt_stat.f_ffree, mp->mnt_stat.f_favail);
 2227 
 2228         (*pr)("  f_fsidx {0x%x, 0x%x} owner %u ctime 0x%x\n",
 2229             mp->mnt_stat.f_fsid.val[0], mp->mnt_stat.f_fsid.val[1],
 2230             mp->mnt_stat.f_owner, mp->mnt_stat.f_ctime);
 2231 
 2232         (*pr)("  syncwrites %llu asyncwrites = %llu\n",
 2233             mp->mnt_stat.f_syncwrites, mp->mnt_stat.f_asyncwrites);
 2234 
 2235         (*pr)("  syncreads %llu asyncreads = %llu\n",
 2236             mp->mnt_stat.f_syncreads, mp->mnt_stat.f_asyncreads);
 2237 
 2238         (*pr)("  fstype \"%s\" mnton \"%s\" mntfrom \"%s\"\n",
 2239             mp->mnt_stat.f_fstypename, mp->mnt_stat.f_mntonname,
 2240             mp->mnt_stat.f_mntfromname);
 2241 
 2242         (*pr)("locked vnodes:");
 2243         /* XXX would take mountlist lock, except ddb has no context */
 2244         LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes)
 2245                 if (VOP_ISLOCKED(vp)) {
 2246                         if (!LIST_NEXT(vp, v_mntvnodes))
 2247                                 (*pr)(" %p", vp);
 2248                         else if (!(cnt++ % (72 / (sizeof(void *) * 2 + 4))))
 2249                                 (*pr)("\n\t%p", vp);
 2250                         else
 2251                                 (*pr)(", %p", vp);
 2252                 }
 2253         (*pr)("\n");
 2254 
 2255         if (full) {
 2256                 (*pr)("all vnodes:\n\t");
 2257                 /* XXX would take mountlist lock, except ddb has no context */
 2258                 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes)
 2259                         if (!LIST_NEXT(vp, v_mntvnodes))
 2260                                 (*pr)(" %p", vp);
 2261                         else if (!(cnt++ % (72 / (sizeof(void *) * 2 + 4))))
 2262                                 (*pr)(" %p,\n\t", vp);
 2263                         else
 2264                                 (*pr)(" %p,", vp);
 2265                 (*pr)("\n");
 2266         }
 2267 }
 2268 #endif /* DDB */
 2269 
 2270 void
 2271 copy_statfs_info(struct statfs *sbp, const struct mount *mp)
 2272 {
 2273         const struct statfs *mbp;
 2274 
 2275         strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
 2276 
 2277         if (sbp == (mbp = &mp->mnt_stat))
 2278                 return;
 2279 
 2280         sbp->f_fsid = mbp->f_fsid;
 2281         sbp->f_owner = mbp->f_owner;
 2282         sbp->f_flags = mbp->f_flags;
 2283         sbp->f_syncwrites = mbp->f_syncwrites;
 2284         sbp->f_asyncwrites = mbp->f_asyncwrites;
 2285         sbp->f_syncreads = mbp->f_syncreads;
 2286         sbp->f_asyncreads = mbp->f_asyncreads;
 2287         sbp->f_namemax = mbp->f_namemax;
 2288         bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
 2289         bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
 2290         bcopy(&mp->mnt_stat.mount_info.ufs_args, &sbp->mount_info.ufs_args,
 2291             sizeof(struct ufs_args));
 2292 }
 2293 

Cache object: 48ef301cd5dee3922780ca649e4ad0ae


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.