The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_subr.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)vfs_subr.c  8.31 (Berkeley) 5/26/95
   39  * $FreeBSD: stable/4/sys/kern/vfs_subr.c 145953 2005-05-06 02:50:00Z cperciva $
   40  */
   41 
   42 /*
   43  * External virtual filesystem routines
   44  */
   45 #include "opt_ddb.h"
   46 
   47 #include <sys/param.h>
   48 #include <sys/systm.h>
   49 #include <sys/buf.h>
   50 #include <sys/conf.h>
   51 #include <sys/dirent.h>
   52 #include <sys/domain.h>
   53 #include <sys/eventhandler.h>
   54 #include <sys/fcntl.h>
   55 #include <sys/kernel.h>
   56 #include <sys/kthread.h>
   57 #include <sys/malloc.h>
   58 #include <sys/mbuf.h>
   59 #include <sys/mount.h>
   60 #include <sys/namei.h>
   61 #include <sys/proc.h>
   62 #include <sys/reboot.h>
   63 #include <sys/socket.h>
   64 #include <sys/stat.h>
   65 #include <sys/sysctl.h>
   66 #include <sys/syslog.h>
   67 #include <sys/vmmeter.h>
   68 #include <sys/vnode.h>
   69 
   70 #include <machine/limits.h>
   71 
   72 #include <vm/vm.h>
   73 #include <vm/vm_object.h>
   74 #include <vm/vm_extern.h>
   75 #include <vm/vm_kern.h>
   76 #include <vm/pmap.h>
   77 #include <vm/vm_map.h>
   78 #include <vm/vm_page.h>
   79 #include <vm/vm_pager.h>
   80 #include <vm/vnode_pager.h>
   81 #include <vm/vm_zone.h>
   82 
   83 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
   84 
   85 static void     insmntque __P((struct vnode *vp, struct mount *mp));
   86 static void     vclean __P((struct vnode *vp, int flags, struct proc *p));
   87 static unsigned long    numvnodes;
   88 static void     vlruvp(struct vnode *vp);
   89 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
   90 
   91 enum vtype iftovt_tab[16] = {
   92         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
   93         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
   94 };
   95 int vttoif_tab[9] = {
   96         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
   97         S_IFSOCK, S_IFIFO, S_IFMT,
   98 };
   99 
  100 static TAILQ_HEAD(freelst, vnode) vnode_free_list;      /* vnode free list */
  101 
  102 static u_long wantfreevnodes = 25;
  103 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
  104 static u_long freevnodes = 0;
  105 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
  106 
  107 static int reassignbufcalls;
  108 SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, "");
  109 static int reassignbufloops;
  110 SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, "");
  111 static int reassignbufsortgood;
  112 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, "");
  113 static int reassignbufsortbad;
  114 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, "");
  115 static int reassignbufmethod = 1;
  116 SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
  117 static int nameileafonly = 0;
  118 SYSCTL_INT(_vfs, OID_AUTO, nameileafonly, CTLFLAG_RW, &nameileafonly, 0, "");
  119 
  120 #ifdef ENABLE_VFS_IOOPT
  121 int vfs_ioopt = 0;
  122 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
  123 #endif
  124 
  125 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist); /* mounted fs */
  126 struct simplelock mountlist_slock;
  127 struct simplelock mntvnode_slock;
  128 int     nfs_mount_type = -1;
  129 #ifndef NULL_SIMPLELOCKS
  130 static struct simplelock mntid_slock;
  131 static struct simplelock vnode_free_list_slock;
  132 static struct simplelock spechash_slock;
  133 #endif
  134 struct nfs_public nfs_pub;      /* publicly exported FS */
  135 static vm_zone_t vnode_zone;
  136 
  137 /*
  138  * The workitem queue.
  139  */
  140 #define SYNCER_MAXDELAY         32
  141 static int syncer_maxdelay = SYNCER_MAXDELAY;   /* maximum delay time */
  142 time_t syncdelay = 30;          /* max time to delay syncing data */
  143 time_t filedelay = 30;          /* time to delay syncing files */
  144 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "");
  145 time_t dirdelay = 29;           /* time to delay syncing directories */
  146 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "");
  147 time_t metadelay = 28;          /* time to delay syncing metadata */
  148 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "");
  149 static int rushjob;                     /* number of slots to run ASAP */
  150 static int stat_rush_requests;  /* number of times I/O speeded up */
  151 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
  152 
  153 static int syncer_delayno = 0;
  154 static long syncer_mask; 
  155 LIST_HEAD(synclist, vnode);
  156 static struct synclist *syncer_workitem_pending;
  157 
  158 int desiredvnodes;
  159 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 
  160     &desiredvnodes, 0, "Maximum number of vnodes");
  161 static int minvnodes;
  162 SYSCTL_INT(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, 
  163     &minvnodes, 0, "Minimum number of vnodes");
  164 static int vnlru_nowhere = 0;
  165 SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, &vnlru_nowhere, 0,
  166     "Number of times the vnlru process ran without success");
  167 
  168 static void     vfs_free_addrlist __P((struct netexport *nep));
  169 static int      vfs_free_netcred __P((struct radix_node *rn, void *w));
  170 static int      vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
  171                                        struct export_args *argp));
  172 
  173 /*
  174  * Initialize the vnode management data structures.
  175  */
  176 void
  177 vntblinit()
  178 {
  179 
  180         /*
  181          * Desiredvnodes is a function of the physical memory size and
  182          * the kernel's heap size.  Specifically, desiredvnodes scales
  183          * in proportion to the physical memory size until two fifths
  184          * of the kernel's heap size is consumed by vnodes and vm
  185          * objects.  
  186          */
  187         desiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * vm_kmem_size /
  188             (5 * (sizeof(struct vm_object) + sizeof(struct vnode))));
  189         minvnodes = desiredvnodes / 4;
  190         simple_lock_init(&mntvnode_slock);
  191         simple_lock_init(&mntid_slock);
  192         simple_lock_init(&spechash_slock);
  193         TAILQ_INIT(&vnode_free_list);
  194         simple_lock_init(&vnode_free_list_slock);
  195         vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
  196         /*
  197          * Initialize the filesystem syncer.
  198          */     
  199         syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 
  200                 &syncer_mask);
  201         syncer_maxdelay = syncer_mask + 1;
  202 }
  203 
  204 /*
  205  * Mark a mount point as busy. Used to synchronize access and to delay
  206  * unmounting. Interlock is not released on failure.
  207  */
  208 int
  209 vfs_busy(mp, flags, interlkp, p)
  210         struct mount *mp;
  211         int flags;
  212         struct simplelock *interlkp;
  213         struct proc *p;
  214 {
  215         int lkflags;
  216 
  217         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
  218                 if (flags & LK_NOWAIT)
  219                         return (ENOENT);
  220                 mp->mnt_kern_flag |= MNTK_MWAIT;
  221                 if (interlkp) {
  222                         simple_unlock(interlkp);
  223                 }
  224                 /*
  225                  * Since all busy locks are shared except the exclusive
  226                  * lock granted when unmounting, the only place that a
  227                  * wakeup needs to be done is at the release of the
  228                  * exclusive lock at the end of dounmount.
  229                  */
  230                 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
  231                 if (interlkp) {
  232                         simple_lock(interlkp);
  233                 }
  234                 return (ENOENT);
  235         }
  236         lkflags = LK_SHARED | LK_NOPAUSE;
  237         if (interlkp)
  238                 lkflags |= LK_INTERLOCK;
  239         if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
  240                 panic("vfs_busy: unexpected lock failure");
  241         return (0);
  242 }
  243 
  244 /*
  245  * Free a busy filesystem.
  246  */
  247 void
  248 vfs_unbusy(mp, p)
  249         struct mount *mp;
  250         struct proc *p;
  251 {
  252 
  253         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
  254 }
  255 
  256 /*
  257  * Lookup a filesystem type, and if found allocate and initialize
  258  * a mount structure for it.
  259  *
  260  * Devname is usually updated by mount(8) after booting.
  261  */
  262 int
  263 vfs_rootmountalloc(fstypename, devname, mpp)
  264         char *fstypename;
  265         char *devname;
  266         struct mount **mpp;
  267 {
  268         struct proc *p = curproc;       /* XXX */
  269         struct vfsconf *vfsp;
  270         struct mount *mp;
  271 
  272         if (fstypename == NULL)
  273                 return (ENODEV);
  274         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
  275                 if (!strcmp(vfsp->vfc_name, fstypename))
  276                         break;
  277         if (vfsp == NULL)
  278                 return (ENODEV);
  279         mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
  280         bzero((char *)mp, (u_long)sizeof(struct mount));
  281         lockinit(&mp->mnt_lock, PVFS, "vfslock", VLKTIMEOUT, LK_NOPAUSE);
  282         (void)vfs_busy(mp, LK_NOWAIT, 0, p);
  283         TAILQ_INIT(&mp->mnt_nvnodelist);
  284         TAILQ_INIT(&mp->mnt_reservedvnlist);
  285         mp->mnt_nvnodelistsize = 0;
  286         mp->mnt_vfc = vfsp;
  287         mp->mnt_op = vfsp->vfc_vfsops;
  288         mp->mnt_flag = MNT_RDONLY;
  289         mp->mnt_vnodecovered = NULLVP;
  290         vfsp->vfc_refcount++;
  291         mp->mnt_iosize_max = DFLTPHYS;
  292         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  293         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
  294         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  295         mp->mnt_stat.f_mntonname[0] = '/';
  296         mp->mnt_stat.f_mntonname[1] = 0;
  297         (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
  298         *mpp = mp;
  299         return (0);
  300 }
  301 
  302 /*
  303  * Find an appropriate filesystem to use for the root. If a filesystem
  304  * has not been preselected, walk through the list of known filesystems
  305  * trying those that have mountroot routines, and try them until one
  306  * works or we have tried them all.
  307  */
  308 #ifdef notdef   /* XXX JH */
  309 int
  310 lite2_vfs_mountroot()
  311 {
  312         struct vfsconf *vfsp;
  313         extern int (*lite2_mountroot) __P((void));
  314         int error;
  315 
  316         if (lite2_mountroot != NULL)
  317                 return ((*lite2_mountroot)());
  318         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
  319                 if (vfsp->vfc_mountroot == NULL)
  320                         continue;
  321                 if ((error = (*vfsp->vfc_mountroot)()) == 0)
  322                         return (0);
  323                 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
  324         }
  325         return (ENODEV);
  326 }
  327 #endif
  328 
  329 /*
  330  * Lookup a mount point by filesystem identifier.
  331  */
  332 struct mount *
  333 vfs_getvfs(fsid)
  334         fsid_t *fsid;
  335 {
  336         register struct mount *mp;
  337 
  338         simple_lock(&mountlist_slock);
  339         TAILQ_FOREACH(mp, &mountlist, mnt_list) {
  340                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
  341                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
  342                         simple_unlock(&mountlist_slock);
  343                         return (mp);
  344             }
  345         }
  346         simple_unlock(&mountlist_slock);
  347         return ((struct mount *) 0);
  348 }
  349 
  350 /*
  351  * Get a new unique fsid.  Try to make its val[0] unique, since this value
  352  * will be used to create fake device numbers for stat().  Also try (but
  353  * not so hard) make its val[0] unique mod 2^16, since some emulators only
  354  * support 16-bit device numbers.  We end up with unique val[0]'s for the
  355  * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls.
  356  *
  357  * Keep in mind that several mounts may be running in parallel.  Starting
  358  * the search one past where the previous search terminated is both a
  359  * micro-optimization and a defense against returning the same fsid to
  360  * different mounts.
  361  */
  362 void
  363 vfs_getnewfsid(mp)
  364         struct mount *mp;
  365 {
  366         static u_int16_t mntid_base;
  367         fsid_t tfsid;
  368         int mtype;
  369 
  370         simple_lock(&mntid_slock);
  371         mtype = mp->mnt_vfc->vfc_typenum;
  372         tfsid.val[1] = mtype;
  373         mtype = (mtype & 0xFF) << 24;
  374         for (;;) {
  375                 tfsid.val[0] = makeudev(255,
  376                     mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF));
  377                 mntid_base++;
  378                 if (vfs_getvfs(&tfsid) == NULL)
  379                         break;
  380         }
  381         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
  382         mp->mnt_stat.f_fsid.val[1] = tfsid.val[1];
  383         simple_unlock(&mntid_slock);
  384 }
  385 
  386 /*
  387  * Knob to control the precision of file timestamps:
  388  *
  389  *   0 = seconds only; nanoseconds zeroed.
  390  *   1 = seconds and nanoseconds, accurate within 1/HZ.
  391  *   2 = seconds and nanoseconds, truncated to microseconds.
  392  * >=3 = seconds and nanoseconds, maximum precision.
  393  */
  394 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
  395 
  396 static int timestamp_precision = TSP_SEC;
  397 SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW,
  398     &timestamp_precision, 0, "");
  399 
  400 /*
  401  * Get a current timestamp.
  402  */
  403 void
  404 vfs_timestamp(tsp)
  405         struct timespec *tsp;
  406 {
  407         struct timeval tv;
  408 
  409         switch (timestamp_precision) {
  410         case TSP_SEC:
  411                 tsp->tv_sec = time_second;
  412                 tsp->tv_nsec = 0;
  413                 break;
  414         case TSP_HZ:
  415                 getnanotime(tsp);
  416                 break;
  417         case TSP_USEC:
  418                 microtime(&tv);
  419                 TIMEVAL_TO_TIMESPEC(&tv, tsp);
  420                 break;
  421         case TSP_NSEC:
  422         default:
  423                 nanotime(tsp);
  424                 break;
  425         }
  426 }
  427 
  428 /*
  429  * Set vnode attributes to VNOVAL
  430  */
  431 void
  432 vattr_null(vap)
  433         register struct vattr *vap;
  434 {
  435 
  436         vap->va_type = VNON;
  437         vap->va_size = VNOVAL;
  438         vap->va_bytes = VNOVAL;
  439         vap->va_mode = VNOVAL;
  440         vap->va_nlink = VNOVAL;
  441         vap->va_uid = VNOVAL;
  442         vap->va_gid = VNOVAL;
  443         vap->va_fsid = VNOVAL;
  444         vap->va_fileid = VNOVAL;
  445         vap->va_blocksize = VNOVAL;
  446         vap->va_rdev = VNOVAL;
  447         vap->va_atime.tv_sec = VNOVAL;
  448         vap->va_atime.tv_nsec = VNOVAL;
  449         vap->va_mtime.tv_sec = VNOVAL;
  450         vap->va_mtime.tv_nsec = VNOVAL;
  451         vap->va_ctime.tv_sec = VNOVAL;
  452         vap->va_ctime.tv_nsec = VNOVAL;
  453         vap->va_flags = VNOVAL;
  454         vap->va_gen = VNOVAL;
  455         vap->va_vaflags = 0;
  456 }
  457 
  458 /*
  459  * This routine is called when we have too many vnodes.  It attempts
  460  * to free <count> vnodes and will potentially free vnodes that still
  461  * have VM backing store (VM backing store is typically the cause
  462  * of a vnode blowout so we want to do this).  Therefore, this operation
  463  * is not considered cheap.
  464  *
  465  * A number of conditions may prevent a vnode from being reclaimed.
  466  * the buffer cache may have references on the vnode, a directory
  467  * vnode may still have references due to the namei cache representing
  468  * underlying files, or the vnode may be in active use.   It is not
  469  * desireable to reuse such vnodes.  These conditions may cause the
  470  * number of vnodes to reach some minimum value regardless of what
  471  * you set kern.maxvnodes to.  Do not set kern.maxvnodes too low.
  472  */
  473 static int
  474 vlrureclaim(struct mount *mp)
  475 {
  476         struct vnode *vp;
  477         int done;
  478         int trigger;
  479         int usevnodes;
  480         int count;
  481 
  482         /*
  483          * Calculate the trigger point, don't allow user
  484          * screwups to blow us up.   This prevents us from
  485          * recycling vnodes with lots of resident pages.  We
  486          * aren't trying to free memory, we are trying to
  487          * free vnodes.
  488          */
  489         usevnodes = desiredvnodes;
  490         if (usevnodes <= 0)
  491                 usevnodes = 1;
  492         trigger = cnt.v_page_count * 2 / usevnodes;
  493 
  494         done = 0;
  495         simple_lock(&mntvnode_slock);
  496         count = mp->mnt_nvnodelistsize / 10 + 1;
  497         while (count && (vp = TAILQ_FIRST(&mp->mnt_nvnodelist)) != NULL) {
  498                 TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
  499                 TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
  500 
  501                 if (vp->v_type != VNON &&
  502                     vp->v_type != VBAD &&
  503                     VMIGHTFREE(vp) &&           /* critical path opt */
  504                     (vp->v_object == NULL || vp->v_object->resident_page_count < trigger) &&
  505                     simple_lock_try(&vp->v_interlock)
  506                 ) {
  507                         simple_unlock(&mntvnode_slock);
  508                         if (VMIGHTFREE(vp)) {
  509                                 vgonel(vp, curproc);
  510                                 done++;
  511                         } else {
  512                                 simple_unlock(&vp->v_interlock);
  513                         }
  514                         simple_lock(&mntvnode_slock);
  515                 }
  516                 --count;
  517         }
  518         simple_unlock(&mntvnode_slock);
  519         return done;
  520 }
  521 
  522 /*
  523  * Attempt to recycle vnodes in a context that is always safe to block.
  524  * Calling vlrurecycle() from the bowels of file system code has some
  525  * interesting deadlock problems.
  526  */
  527 static struct proc *vnlruproc;
  528 static int vnlruproc_sig;
  529 
  530 static void 
  531 vnlru_proc(void)
  532 {
  533         struct mount *mp, *nmp;
  534         int s;
  535         int done;
  536         struct proc *p = vnlruproc;
  537 
  538         EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p,
  539             SHUTDOWN_PRI_FIRST);   
  540 
  541         s = splbio();
  542         for (;;) {
  543                 kproc_suspend_loop(p);
  544                 if (numvnodes - freevnodes <= desiredvnodes * 9 / 10) {
  545                         vnlruproc_sig = 0;
  546                         wakeup(&vnlruproc_sig);
  547                         tsleep(vnlruproc, PVFS, "vlruwt", hz);
  548                         continue;
  549                 }
  550                 done = 0;
  551                 simple_lock(&mountlist_slock);
  552                 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
  553                         if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
  554                                 nmp = TAILQ_NEXT(mp, mnt_list);
  555                                 continue;
  556                         }
  557                         done += vlrureclaim(mp);
  558                         simple_lock(&mountlist_slock);
  559                         nmp = TAILQ_NEXT(mp, mnt_list);
  560                         vfs_unbusy(mp, p);
  561                 }
  562                 simple_unlock(&mountlist_slock);
  563                 if (done == 0) {
  564                         vnlru_nowhere++;
  565                         tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3);
  566                 }
  567         }
  568         splx(s);
  569 }
  570 
  571 static struct kproc_desc vnlru_kp = {
  572         "vnlru",
  573         vnlru_proc,
  574         &vnlruproc
  575 };
  576 SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp)
  577 
  578 /*
  579  * Routines having to do with the management of the vnode table.
  580  */
  581 extern vop_t **dead_vnodeop_p;
  582 
  583 /*
  584  * Return the next vnode from the free list.
  585  */
  586 int
  587 getnewvnode(tag, mp, vops, vpp)
  588         enum vtagtype tag;
  589         struct mount *mp;
  590         vop_t **vops;
  591         struct vnode **vpp;
  592 {
  593         int s;
  594         struct proc *p = curproc;       /* XXX */
  595         struct vnode *vp = NULL;
  596         vm_object_t object;
  597 
  598         s = splbio();
  599 
  600         /*
  601          * Try to reuse vnodes if we hit the max.  This situation only
  602          * occurs in certain large-memory (2G+) situations.  We cannot
  603          * attempt to directly reclaim vnodes due to nasty recursion
  604          * problems.
  605          */
  606         while (numvnodes - freevnodes > desiredvnodes) {
  607                 if (vnlruproc_sig == 0) {
  608                         vnlruproc_sig = 1;      /* avoid unnecessary wakeups */
  609                         wakeup(vnlruproc);
  610                 }
  611                 tsleep(&vnlruproc_sig, PVFS, "vlruwk", hz);
  612         }
  613 
  614 
  615         /*
  616          * Attempt to reuse a vnode already on the free list, allocating
  617          * a new vnode if we can't find one or if we have not reached a
  618          * good minimum for good LRU performance.
  619          */
  620         simple_lock(&vnode_free_list_slock);
  621         if (freevnodes >= wantfreevnodes && numvnodes >= minvnodes) {
  622                 int count;
  623 
  624                 for (count = 0; count < freevnodes; count++) {
  625                         vp = TAILQ_FIRST(&vnode_free_list);
  626                         if (vp == NULL || vp->v_usecount)
  627                                 panic("getnewvnode: free vnode isn't");
  628 
  629                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  630                         if ((VOP_GETVOBJECT(vp, &object) == 0 &&
  631                             (object->resident_page_count || object->ref_count)) ||
  632                             !simple_lock_try(&vp->v_interlock)) {
  633                                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
  634                                 vp = NULL;
  635                                 continue;
  636                         }
  637                         if (LIST_FIRST(&vp->v_cache_src)) {
  638                                 /*
  639                                  * note: nameileafonly sysctl is temporary,
  640                                  * for debugging only, and will eventually be
  641                                  * removed.
  642                                  */
  643                                 if (nameileafonly > 0) {
  644                                         /*
  645                                          * Do not reuse namei-cached directory
  646                                          * vnodes that have cached
  647                                          * subdirectories.
  648                                          */
  649                                         if (cache_leaf_test(vp) < 0) {
  650                                                 simple_unlock(&vp->v_interlock);
  651                                                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
  652                                                 vp = NULL;
  653                                                 continue;
  654                                         }
  655                                 } else if (nameileafonly < 0 || 
  656                                             vmiodirenable == 0) {
  657                                         /*
  658                                          * Do not reuse namei-cached directory
  659                                          * vnodes if nameileafonly is -1 or
  660                                          * if VMIO backing for directories is
  661                                          * turned off (otherwise we reuse them
  662                                          * too quickly).
  663                                          */
  664                                         simple_unlock(&vp->v_interlock);
  665                                         TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
  666                                         vp = NULL;
  667                                         continue;
  668                                 }
  669                         }
  670                         break;
  671                 }
  672         }
  673 
  674         if (vp) {
  675                 vp->v_flag |= VDOOMED;
  676                 vp->v_flag &= ~VFREE;
  677                 freevnodes--;
  678                 simple_unlock(&vnode_free_list_slock);
  679                 cache_purge(vp);
  680                 vp->v_lease = NULL;
  681                 if (vp->v_type != VBAD) {
  682                         vgonel(vp, p);
  683                 } else {
  684                         simple_unlock(&vp->v_interlock);
  685                 }
  686 
  687 #ifdef INVARIANTS
  688                 {
  689                         int s;
  690 
  691                         if (vp->v_data)
  692                                 panic("cleaned vnode isn't");
  693                         s = splbio();
  694                         if (vp->v_numoutput)
  695                                 panic("Clean vnode has pending I/O's");
  696                         splx(s);
  697                 }
  698 #endif
  699                 vp->v_flag = 0;
  700                 vp->v_lastw = 0;
  701                 vp->v_lasta = 0;
  702                 vp->v_cstart = 0;
  703                 vp->v_clen = 0;
  704                 vp->v_socket = 0;
  705                 vp->v_writecount = 0;   /* XXX */
  706         } else {
  707                 simple_unlock(&vnode_free_list_slock);
  708                 vp = (struct vnode *) zalloc(vnode_zone);
  709                 bzero((char *) vp, sizeof *vp);
  710                 simple_lock_init(&vp->v_interlock);
  711                 vp->v_dd = vp;
  712                 cache_purge(vp);
  713                 LIST_INIT(&vp->v_cache_src);
  714                 TAILQ_INIT(&vp->v_cache_dst);
  715                 numvnodes++;
  716         }
  717 
  718         TAILQ_INIT(&vp->v_cleanblkhd);
  719         TAILQ_INIT(&vp->v_dirtyblkhd);
  720         vp->v_type = VNON;
  721         vp->v_tag = tag;
  722         vp->v_op = vops;
  723         insmntque(vp, mp);
  724         *vpp = vp;
  725         vp->v_usecount = 1;
  726         vp->v_data = 0;
  727         splx(s);
  728 
  729         vfs_object_create(vp, p, p->p_ucred);
  730         return (0);
  731 }
  732 
  733 /*
  734  * Move a vnode from one mount queue to another.
  735  */
  736 static void
  737 insmntque(vp, mp)
  738         register struct vnode *vp;
  739         register struct mount *mp;
  740 {
  741 
  742         simple_lock(&mntvnode_slock);
  743         /*
  744          * Delete from old mount point vnode list, if on one.
  745          */
  746         if (vp->v_mount != NULL) {
  747                 KASSERT(vp->v_mount->mnt_nvnodelistsize > 0,
  748                         ("bad mount point vnode list size"));
  749                 TAILQ_REMOVE(&vp->v_mount->mnt_nvnodelist, vp, v_nmntvnodes);
  750                 vp->v_mount->mnt_nvnodelistsize--;
  751         }
  752         /*
  753          * Insert into list of vnodes for the new mount point, if available.
  754          */
  755         if ((vp->v_mount = mp) == NULL) {
  756                 simple_unlock(&mntvnode_slock);
  757                 return;
  758         }
  759         TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
  760         mp->mnt_nvnodelistsize++;
  761         simple_unlock(&mntvnode_slock);
  762 }
  763 
  764 /*
  765  * Update outstanding I/O count and do wakeup if requested.
  766  */
  767 void
  768 vwakeup(bp)
  769         register struct buf *bp;
  770 {
  771         register struct vnode *vp;
  772 
  773         bp->b_flags &= ~B_WRITEINPROG;
  774         if ((vp = bp->b_vp)) {
  775                 vp->v_numoutput--;
  776                 if (vp->v_numoutput < 0)
  777                         panic("vwakeup: neg numoutput");
  778                 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
  779                         vp->v_flag &= ~VBWAIT;
  780                         wakeup((caddr_t) &vp->v_numoutput);
  781                 }
  782         }
  783 }
  784 
  785 /*
  786  * Flush out and invalidate all buffers associated with a vnode.
  787  * Called with the underlying object locked.
  788  */
  789 int
  790 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
  791         register struct vnode *vp;
  792         int flags;
  793         struct ucred *cred;
  794         struct proc *p;
  795         int slpflag, slptimeo;
  796 {
  797         register struct buf *bp;
  798         struct buf *nbp, *blist;
  799         int s, error;
  800         vm_object_t object;
  801 
  802         if (flags & V_SAVE) {
  803                 s = splbio();
  804                 while (vp->v_numoutput) {
  805                         vp->v_flag |= VBWAIT;
  806                         error = tsleep((caddr_t)&vp->v_numoutput,
  807                             slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo);
  808                         if (error) {
  809                                 splx(s);
  810                                 return (error);
  811                         }
  812                 }
  813                 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
  814                         splx(s);
  815                         if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
  816                                 return (error);
  817                         s = splbio();
  818                         if (vp->v_numoutput > 0 ||
  819                             !TAILQ_EMPTY(&vp->v_dirtyblkhd))
  820                                 panic("vinvalbuf: dirty bufs");
  821                 }
  822                 splx(s);
  823         }
  824         s = splbio();
  825         for (;;) {
  826                 blist = TAILQ_FIRST(&vp->v_cleanblkhd);
  827                 if (!blist)
  828                         blist = TAILQ_FIRST(&vp->v_dirtyblkhd);
  829                 if (!blist)
  830                         break;
  831 
  832                 for (bp = blist; bp; bp = nbp) {
  833                         nbp = TAILQ_NEXT(bp, b_vnbufs);
  834                         if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
  835                                 error = BUF_TIMELOCK(bp,
  836                                     LK_EXCLUSIVE | LK_SLEEPFAIL,
  837                                     "vinvalbuf", slpflag, slptimeo);
  838                                 if (error == ENOLCK)
  839                                         break;
  840                                 splx(s);
  841                                 return (error);
  842                         }
  843                         /*
  844                          * XXX Since there are no node locks for NFS, I
  845                          * believe there is a slight chance that a delayed
  846                          * write will occur while sleeping just above, so
  847                          * check for it.  Note that vfs_bio_awrite expects
  848                          * buffers to reside on a queue, while VOP_BWRITE and
  849                          * brelse do not.
  850                          */
  851                         if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
  852                                 (flags & V_SAVE)) {
  853 
  854                                 if (bp->b_vp == vp) {
  855                                         if (bp->b_flags & B_CLUSTEROK) {
  856                                                 BUF_UNLOCK(bp);
  857                                                 vfs_bio_awrite(bp);
  858                                         } else {
  859                                                 bremfree(bp);
  860                                                 bp->b_flags |= B_ASYNC;
  861                                                 VOP_BWRITE(bp->b_vp, bp);
  862                                         }
  863                                 } else {
  864                                         bremfree(bp);
  865                                         (void) VOP_BWRITE(bp->b_vp, bp);
  866                                 }
  867                                 break;
  868                         }
  869                         bremfree(bp);
  870                         bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF);
  871                         bp->b_flags &= ~B_ASYNC;
  872                         brelse(bp);
  873                 }
  874         }
  875 
  876         /*
  877          * Wait for I/O to complete.  XXX needs cleaning up.  The vnode can
  878          * have write I/O in-progress but if there is a VM object then the
  879          * VM object can also have read-I/O in-progress.
  880          */
  881         do {
  882                 while (vp->v_numoutput > 0) {
  883                         vp->v_flag |= VBWAIT;
  884                         tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
  885                 }
  886                 if (VOP_GETVOBJECT(vp, &object) == 0) {
  887                         while (object->paging_in_progress)
  888                                 vm_object_pip_sleep(object, "vnvlbx");
  889                 }
  890         } while (vp->v_numoutput > 0);
  891 
  892         splx(s);
  893 
  894         /*
  895          * Destroy the copy in the VM cache, too.
  896          */
  897         simple_lock(&vp->v_interlock);
  898         if (VOP_GETVOBJECT(vp, &object) == 0) {
  899                 vm_object_page_remove(object, 0, 0,
  900                         (flags & V_SAVE) ? TRUE : FALSE);
  901         }
  902         simple_unlock(&vp->v_interlock);
  903 
  904         if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd))
  905                 panic("vinvalbuf: flush failed");
  906         return (0);
  907 }
  908 
  909 /*
  910  * Truncate a file's buffer and pages to a specified length.  This
  911  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
  912  * sync activity.
  913  */
  914 int
  915 vtruncbuf(vp, cred, p, length, blksize)
  916         register struct vnode *vp;
  917         struct ucred *cred;
  918         struct proc *p;
  919         off_t length;
  920         int blksize;
  921 {
  922         register struct buf *bp;
  923         struct buf *nbp;
  924         int s, anyfreed;
  925         int trunclbn;
  926 
  927         /*
  928          * Round up to the *next* lbn.
  929          */
  930         trunclbn = (length + blksize - 1) / blksize;
  931 
  932         s = splbio();
  933 restart:
  934         anyfreed = 1;
  935         for (;anyfreed;) {
  936                 anyfreed = 0;
  937                 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
  938                         nbp = TAILQ_NEXT(bp, b_vnbufs);
  939                         if (bp->b_lblkno >= trunclbn) {
  940                                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
  941                                         BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
  942                                         goto restart;
  943                                 } else {
  944                                         bremfree(bp);
  945                                         bp->b_flags |= (B_INVAL | B_RELBUF);
  946                                         bp->b_flags &= ~B_ASYNC;
  947                                         brelse(bp);
  948                                         anyfreed = 1;
  949                                 }
  950                                 if (nbp &&
  951                                     (((nbp->b_xflags & BX_VNCLEAN) == 0) ||
  952                                     (nbp->b_vp != vp) ||
  953                                     (nbp->b_flags & B_DELWRI))) {
  954                                         goto restart;
  955                                 }
  956                         }
  957                 }
  958 
  959                 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  960                         nbp = TAILQ_NEXT(bp, b_vnbufs);
  961                         if (bp->b_lblkno >= trunclbn) {
  962                                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
  963                                         BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
  964                                         goto restart;
  965                                 } else {
  966                                         bremfree(bp);
  967                                         bp->b_flags |= (B_INVAL | B_RELBUF);
  968                                         bp->b_flags &= ~B_ASYNC;
  969                                         brelse(bp);
  970                                         anyfreed = 1;
  971                                 }
  972                                 if (nbp &&
  973                                     (((nbp->b_xflags & BX_VNDIRTY) == 0) ||
  974                                     (nbp->b_vp != vp) ||
  975                                     (nbp->b_flags & B_DELWRI) == 0)) {
  976                                         goto restart;
  977                                 }
  978                         }
  979                 }
  980         }
  981 
  982         if (length > 0) {
  983 restartsync:
  984                 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  985                         nbp = TAILQ_NEXT(bp, b_vnbufs);
  986                         if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) {
  987                                 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
  988                                         BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
  989                                         goto restart;
  990                                 } else {
  991                                         bremfree(bp);
  992                                         if (bp->b_vp == vp) {
  993                                                 bp->b_flags |= B_ASYNC;
  994                                         } else {
  995                                                 bp->b_flags &= ~B_ASYNC;
  996                                         }
  997                                         VOP_BWRITE(bp->b_vp, bp);
  998                                 }
  999                                 goto restartsync;
 1000                         }
 1001 
 1002                 }
 1003         }
 1004 
 1005         while (vp->v_numoutput > 0) {
 1006                 vp->v_flag |= VBWAIT;
 1007                 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0);
 1008         }
 1009 
 1010         splx(s);
 1011 
 1012         vnode_pager_setsize(vp, length);
 1013 
 1014         return (0);
 1015 }
 1016 
 1017 /*
 1018  * Associate a buffer with a vnode.
 1019  */
 1020 void
 1021 bgetvp(vp, bp)
 1022         register struct vnode *vp;
 1023         register struct buf *bp;
 1024 {
 1025         int s;
 1026 
 1027         KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
 1028 
 1029         vhold(vp);
 1030         bp->b_vp = vp;
 1031         bp->b_dev = vn_todev(vp);
 1032         /*
 1033          * Insert onto list for new vnode.
 1034          */
 1035         s = splbio();
 1036         bp->b_xflags |= BX_VNCLEAN;
 1037         bp->b_xflags &= ~BX_VNDIRTY;
 1038         TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs);
 1039         splx(s);
 1040 }
 1041 
 1042 /*
 1043  * Disassociate a buffer from a vnode.
 1044  */
 1045 void
 1046 brelvp(bp)
 1047         register struct buf *bp;
 1048 {
 1049         struct vnode *vp;
 1050         struct buflists *listheadp;
 1051         int s;
 1052 
 1053         KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
 1054 
 1055         /*
 1056          * Delete from old vnode list, if on one.
 1057          */
 1058         vp = bp->b_vp;
 1059         s = splbio();
 1060         if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) {
 1061                 if (bp->b_xflags & BX_VNDIRTY)
 1062                         listheadp = &vp->v_dirtyblkhd;
 1063                 else 
 1064                         listheadp = &vp->v_cleanblkhd;
 1065                 TAILQ_REMOVE(listheadp, bp, b_vnbufs);
 1066                 bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
 1067         }
 1068         if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 1069                 vp->v_flag &= ~VONWORKLST;
 1070                 LIST_REMOVE(vp, v_synclist);
 1071         }
 1072         splx(s);
 1073         bp->b_vp = (struct vnode *) 0;
 1074         vdrop(vp);
 1075 }
 1076 
 1077 /*
 1078  * The workitem queue.
 1079  * 
 1080  * It is useful to delay writes of file data and filesystem metadata
 1081  * for tens of seconds so that quickly created and deleted files need
 1082  * not waste disk bandwidth being created and removed. To realize this,
 1083  * we append vnodes to a "workitem" queue. When running with a soft
 1084  * updates implementation, most pending metadata dependencies should
 1085  * not wait for more than a few seconds. Thus, mounted on block devices
 1086  * are delayed only about a half the time that file data is delayed.
 1087  * Similarly, directory updates are more critical, so are only delayed
 1088  * about a third the time that file data is delayed. Thus, there are
 1089  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
 1090  * one each second (driven off the filesystem syncer process). The
 1091  * syncer_delayno variable indicates the next queue that is to be processed.
 1092  * Items that need to be processed soon are placed in this queue:
 1093  *
 1094  *      syncer_workitem_pending[syncer_delayno]
 1095  *
 1096  * A delay of fifteen seconds is done by placing the request fifteen
 1097  * entries later in the queue:
 1098  *
 1099  *      syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
 1100  *
 1101  */
 1102 
 1103 /*
 1104  * Add an item to the syncer work queue.
 1105  */
 1106 static void
 1107 vn_syncer_add_to_worklist(struct vnode *vp, int delay)
 1108 {
 1109         int s, slot;
 1110 
 1111         s = splbio();
 1112 
 1113         if (vp->v_flag & VONWORKLST) {
 1114                 LIST_REMOVE(vp, v_synclist);
 1115         }
 1116 
 1117         if (delay > syncer_maxdelay - 2)
 1118                 delay = syncer_maxdelay - 2;
 1119         slot = (syncer_delayno + delay) & syncer_mask;
 1120 
 1121         LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
 1122         vp->v_flag |= VONWORKLST;
 1123         splx(s);
 1124 }
 1125 
 1126 struct  proc *updateproc;
 1127 static void sched_sync __P((void));
 1128 static struct kproc_desc up_kp = {
 1129         "syncer",
 1130         sched_sync,
 1131         &updateproc
 1132 };
 1133 SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
 1134 
 1135 /*
 1136  * System filesystem synchronizer daemon.
 1137  */
 1138 void 
 1139 sched_sync(void)
 1140 {
 1141         struct synclist *slp;
 1142         struct vnode *vp;
 1143         long starttime;
 1144         int s;
 1145         struct proc *p = updateproc;
 1146 
 1147         EVENTHANDLER_REGISTER(shutdown_pre_sync, shutdown_kproc, p,
 1148             SHUTDOWN_PRI_LAST);   
 1149 
 1150         for (;;) {
 1151                 kproc_suspend_loop(p);
 1152 
 1153                 starttime = time_second;
 1154 
 1155                 /*
 1156                  * Push files whose dirty time has expired.  Be careful
 1157                  * of interrupt race on slp queue.
 1158                  */
 1159                 s = splbio();
 1160                 slp = &syncer_workitem_pending[syncer_delayno];
 1161                 syncer_delayno += 1;
 1162                 if (syncer_delayno == syncer_maxdelay)
 1163                         syncer_delayno = 0;
 1164                 splx(s);
 1165 
 1166                 while ((vp = LIST_FIRST(slp)) != NULL) {
 1167                         if (VOP_ISLOCKED(vp, NULL) == 0) {
 1168                                 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 1169                                 (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
 1170                                 VOP_UNLOCK(vp, 0, p);
 1171                         }
 1172                         s = splbio();
 1173                         if (LIST_FIRST(slp) == vp) {
 1174                                 /*
 1175                                  * Note: v_tag VT_VFS vps can remain on the
 1176                                  * worklist too with no dirty blocks, but 
 1177                                  * since sync_fsync() moves it to a different 
 1178                                  * slot we are safe.
 1179                                  */
 1180                                 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) &&
 1181                                     !vn_isdisk(vp, NULL))
 1182                                         panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag);
 1183                                 /*
 1184                                  * Put us back on the worklist.  The worklist
 1185                                  * routine will remove us from our current
 1186                                  * position and then add us back in at a later
 1187                                  * position.
 1188                                  */
 1189                                 vn_syncer_add_to_worklist(vp, syncdelay);
 1190                         }
 1191                         splx(s);
 1192                 }
 1193 
 1194                 /*
 1195                  * Do soft update processing.
 1196                  */
 1197                 if (bioops.io_sync)
 1198                         (*bioops.io_sync)(NULL);
 1199 
 1200                 /*
 1201                  * The variable rushjob allows the kernel to speed up the
 1202                  * processing of the filesystem syncer process. A rushjob
 1203                  * value of N tells the filesystem syncer to process the next
 1204                  * N seconds worth of work on its queue ASAP. Currently rushjob
 1205                  * is used by the soft update code to speed up the filesystem
 1206                  * syncer process when the incore state is getting so far
 1207                  * ahead of the disk that the kernel memory pool is being
 1208                  * threatened with exhaustion.
 1209                  */
 1210                 if (rushjob > 0) {
 1211                         rushjob -= 1;
 1212                         continue;
 1213                 }
 1214                 /*
 1215                  * If it has taken us less than a second to process the
 1216                  * current work, then wait. Otherwise start right over
 1217                  * again. We can still lose time if any single round
 1218                  * takes more than two seconds, but it does not really
 1219                  * matter as we are just trying to generally pace the
 1220                  * filesystem activity.
 1221                  */
 1222                 if (time_second == starttime)
 1223                         tsleep(&lbolt, PPAUSE, "syncer", 0);
 1224         }
 1225 }
 1226 
 1227 /*
 1228  * Request the syncer daemon to speed up its work.
 1229  * We never push it to speed up more than half of its
 1230  * normal turn time, otherwise it could take over the cpu.
 1231  */
 1232 int
 1233 speedup_syncer()
 1234 {
 1235         int s;
 1236 
 1237         s = splhigh();
 1238         if (updateproc->p_wchan == &lbolt)
 1239                 setrunnable(updateproc);
 1240         splx(s);
 1241         if (rushjob < syncdelay / 2) {
 1242                 rushjob += 1;
 1243                 stat_rush_requests += 1;
 1244                 return (1);
 1245         }
 1246         return(0);
 1247 }
 1248 
 1249 /*
 1250  * Associate a p-buffer with a vnode.
 1251  *
 1252  * Also sets B_PAGING flag to indicate that vnode is not fully associated
 1253  * with the buffer.  i.e. the bp has not been linked into the vnode or
 1254  * ref-counted.
 1255  */
 1256 void
 1257 pbgetvp(vp, bp)
 1258         register struct vnode *vp;
 1259         register struct buf *bp;
 1260 {
 1261 
 1262         KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
 1263 
 1264         bp->b_vp = vp;
 1265         bp->b_flags |= B_PAGING;
 1266         bp->b_dev = vn_todev(vp);
 1267 }
 1268 
 1269 /*
 1270  * Disassociate a p-buffer from a vnode.
 1271  */
 1272 void
 1273 pbrelvp(bp)
 1274         register struct buf *bp;
 1275 {
 1276 
 1277         KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
 1278 
 1279         /* XXX REMOVE ME */
 1280         if (TAILQ_NEXT(bp, b_vnbufs) != NULL) {
 1281                 panic(
 1282                     "relpbuf(): b_vp was probably reassignbuf()d %p %x", 
 1283                     bp,
 1284                     (int)bp->b_flags
 1285                 );
 1286         }
 1287         bp->b_vp = (struct vnode *) 0;
 1288         bp->b_flags &= ~B_PAGING;
 1289 }
 1290 
 1291 void
 1292 pbreassignbuf(bp, newvp)
 1293         struct buf *bp;
 1294         struct vnode *newvp;
 1295 {
 1296         if ((bp->b_flags & B_PAGING) == 0) {
 1297                 panic(
 1298                     "pbreassignbuf() on non phys bp %p", 
 1299                     bp
 1300                 );
 1301         }
 1302         bp->b_vp = newvp;
 1303 }
 1304 
 1305 /*
 1306  * Reassign a buffer from one vnode to another.
 1307  * Used to assign file specific control information
 1308  * (indirect blocks) to the vnode to which they belong.
 1309  */
 1310 void
 1311 reassignbuf(bp, newvp)
 1312         register struct buf *bp;
 1313         register struct vnode *newvp;
 1314 {
 1315         struct buflists *listheadp;
 1316         int delay;
 1317         int s;
 1318 
 1319         if (newvp == NULL) {
 1320                 printf("reassignbuf: NULL");
 1321                 return;
 1322         }
 1323         ++reassignbufcalls;
 1324 
 1325         /*
 1326          * B_PAGING flagged buffers cannot be reassigned because their vp
 1327          * is not fully linked in.
 1328          */
 1329         if (bp->b_flags & B_PAGING)
 1330                 panic("cannot reassign paging buffer");
 1331 
 1332         s = splbio();
 1333         /*
 1334          * Delete from old vnode list, if on one.
 1335          */
 1336         if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) {
 1337                 if (bp->b_xflags & BX_VNDIRTY)
 1338                         listheadp = &bp->b_vp->v_dirtyblkhd;
 1339                 else 
 1340                         listheadp = &bp->b_vp->v_cleanblkhd;
 1341                 TAILQ_REMOVE(listheadp, bp, b_vnbufs);
 1342                 bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
 1343                 if (bp->b_vp != newvp) {
 1344                         vdrop(bp->b_vp);
 1345                         bp->b_vp = NULL;        /* for clarification */
 1346                 }
 1347         }
 1348         /*
 1349          * If dirty, put on list of dirty buffers; otherwise insert onto list
 1350          * of clean buffers.
 1351          */
 1352         if (bp->b_flags & B_DELWRI) {
 1353                 struct buf *tbp;
 1354 
 1355                 listheadp = &newvp->v_dirtyblkhd;
 1356                 if ((newvp->v_flag & VONWORKLST) == 0) {
 1357                         switch (newvp->v_type) {
 1358                         case VDIR:
 1359                                 delay = dirdelay;
 1360                                 break;
 1361                         case VCHR:
 1362                         case VBLK:
 1363                                 if (newvp->v_specmountpoint != NULL) {
 1364                                         delay = metadelay;
 1365                                         break;
 1366                                 }
 1367                                 /* fall through */
 1368                         default:
 1369                                 delay = filedelay;
 1370                         }
 1371                         vn_syncer_add_to_worklist(newvp, delay);
 1372                 }
 1373                 bp->b_xflags |= BX_VNDIRTY;
 1374                 tbp = TAILQ_FIRST(listheadp);
 1375                 if (tbp == NULL ||
 1376                     bp->b_lblkno == 0 ||
 1377                     (bp->b_lblkno > 0 && tbp->b_lblkno < 0) ||
 1378                     (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
 1379                         TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
 1380                         ++reassignbufsortgood;
 1381                 } else if (bp->b_lblkno < 0) {
 1382                         TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs);
 1383                         ++reassignbufsortgood;
 1384                 } else if (reassignbufmethod == 1) {
 1385                         /*
 1386                          * New sorting algorithm, only handle sequential case,
 1387                          * otherwise append to end (but before metadata)
 1388                          */
 1389                         if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
 1390                             (tbp->b_xflags & BX_VNDIRTY)) {
 1391                                 /*
 1392                                  * Found the best place to insert the buffer
 1393                                  */
 1394                                 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 1395                                 ++reassignbufsortgood;
 1396                         } else {
 1397                                 /*
 1398                                  * Missed, append to end, but before meta-data.
 1399                                  * We know that the head buffer in the list is
 1400                                  * not meta-data due to prior conditionals.
 1401                                  *
 1402                                  * Indirect effects:  NFS second stage write
 1403                                  * tends to wind up here, giving maximum 
 1404                                  * distance between the unstable write and the
 1405                                  * commit rpc.
 1406                                  */
 1407                                 tbp = TAILQ_LAST(listheadp, buflists);
 1408                                 while (tbp && tbp->b_lblkno < 0)
 1409                                         tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
 1410                                 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 1411                                 ++reassignbufsortbad;
 1412                         }
 1413                 } else {
 1414                         /*
 1415                          * Old sorting algorithm, scan queue and insert
 1416                          */
 1417                         struct buf *ttbp;
 1418                         while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) &&
 1419                             (ttbp->b_lblkno < bp->b_lblkno)) {
 1420                                 ++reassignbufloops;
 1421                                 tbp = ttbp;
 1422                         }
 1423                         TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 1424                 }
 1425         } else {
 1426                 bp->b_xflags |= BX_VNCLEAN;
 1427                 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs);
 1428                 if ((newvp->v_flag & VONWORKLST) &&
 1429                     TAILQ_EMPTY(&newvp->v_dirtyblkhd)) {
 1430                         newvp->v_flag &= ~VONWORKLST;
 1431                         LIST_REMOVE(newvp, v_synclist);
 1432                 }
 1433         }
 1434         if (bp->b_vp != newvp) {
 1435                 bp->b_vp = newvp;
 1436                 vhold(bp->b_vp);
 1437         }
 1438         splx(s);
 1439 }
 1440 
 1441 /*
 1442  * Create a vnode for a block device.
 1443  * Used for mounting the root file system.
 1444  */
 1445 int
 1446 bdevvp(dev, vpp)
 1447         dev_t dev;
 1448         struct vnode **vpp;
 1449 {
 1450         register struct vnode *vp;
 1451         struct vnode *nvp;
 1452         int error;
 1453 
 1454         if (dev == NODEV) {
 1455                 *vpp = NULLVP;
 1456                 return (ENXIO);
 1457         }
 1458         error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 1459         if (error) {
 1460                 *vpp = NULLVP;
 1461                 return (error);
 1462         }
 1463         vp = nvp;
 1464         vp->v_type = VBLK;
 1465         addalias(vp, dev);
 1466         *vpp = vp;
 1467         return (0);
 1468 }
 1469 
 1470 /*
 1471  * Add vnode to the alias list hung off the dev_t.
 1472  *
 1473  * The reason for this gunk is that multiple vnodes can reference
 1474  * the same physical device, so checking vp->v_usecount to see
 1475  * how many users there are is inadequate; the v_usecount for
 1476  * the vnodes need to be accumulated.  vcount() does that.
 1477  */
 1478 void
 1479 addaliasu(nvp, nvp_rdev)
 1480         struct vnode *nvp;
 1481         udev_t nvp_rdev;
 1482 {
 1483 
 1484         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 1485                 panic("addaliasu on non-special vnode");
 1486         addalias(nvp, udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0));
 1487 }
 1488 
 1489 void
 1490 addalias(nvp, dev)
 1491         struct vnode *nvp;
 1492         dev_t dev;
 1493 {
 1494 
 1495         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 1496                 panic("addalias on non-special vnode");
 1497 
 1498         nvp->v_rdev = dev;
 1499         simple_lock(&spechash_slock);
 1500         SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext);
 1501         simple_unlock(&spechash_slock);
 1502 }
 1503 
 1504 /*
 1505  * Grab a particular vnode from the free list, increment its
 1506  * reference count and lock it. The vnode lock bit is set if the
 1507  * vnode is being eliminated in vgone. The process is awakened
 1508  * when the transition is completed, and an error returned to
 1509  * indicate that the vnode is no longer usable (possibly having
 1510  * been changed to a new file system type).
 1511  */
 1512 int
 1513 vget(vp, flags, p)
 1514         register struct vnode *vp;
 1515         int flags;
 1516         struct proc *p;
 1517 {
 1518         int error;
 1519 
 1520         /*
 1521          * If the vnode is in the process of being cleaned out for
 1522          * another use, we wait for the cleaning to finish and then
 1523          * return failure. Cleaning is determined by checking that
 1524          * the VXLOCK flag is set.
 1525          */
 1526         if ((flags & LK_INTERLOCK) == 0) {
 1527                 simple_lock(&vp->v_interlock);
 1528         }
 1529         if (vp->v_flag & VXLOCK) {
 1530                 if (vp->v_vxproc == curproc) {
 1531 #if 0
 1532                         /* this can now occur in normal operation */
 1533                         log(LOG_INFO, "VXLOCK interlock avoided\n");
 1534 #endif
 1535                 } else {
 1536                         vp->v_flag |= VXWANT;
 1537                         simple_unlock(&vp->v_interlock);
 1538                         tsleep((caddr_t)vp, PINOD, "vget", 0);
 1539                         return (ENOENT);
 1540                 }
 1541         }
 1542 
 1543         vp->v_usecount++;
 1544 
 1545         if (VSHOULDBUSY(vp))
 1546                 vbusy(vp);
 1547         if (flags & LK_TYPE_MASK) {
 1548                 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
 1549                         /*
 1550                          * must expand vrele here because we do not want
 1551                          * to call VOP_INACTIVE if the reference count
 1552                          * drops back to zero since it was never really
 1553                          * active. We must remove it from the free list
 1554                          * before sleeping so that multiple processes do
 1555                          * not try to recycle it.
 1556                          */
 1557                         simple_lock(&vp->v_interlock);
 1558                         vp->v_usecount--;
 1559                         if (VSHOULDFREE(vp))
 1560                                 vfree(vp);
 1561                         else
 1562                                 vlruvp(vp);
 1563                         simple_unlock(&vp->v_interlock);
 1564                 }
 1565                 return (error);
 1566         }
 1567         simple_unlock(&vp->v_interlock);
 1568         return (0);
 1569 }
 1570 
 1571 void
 1572 vref(struct vnode *vp)
 1573 {
 1574         simple_lock(&vp->v_interlock);
 1575         vp->v_usecount++;
 1576         simple_unlock(&vp->v_interlock);
 1577 }
 1578 
 1579 /*
 1580  * Vnode put/release.
 1581  * If count drops to zero, call inactive routine and return to freelist.
 1582  */
 1583 void
 1584 vrele(vp)
 1585         struct vnode *vp;
 1586 {
 1587         struct proc *p = curproc;       /* XXX */
 1588 
 1589         KASSERT(vp != NULL, ("vrele: null vp"));
 1590 
 1591         simple_lock(&vp->v_interlock);
 1592 
 1593         if (vp->v_usecount > 1) {
 1594 
 1595                 vp->v_usecount--;
 1596                 simple_unlock(&vp->v_interlock);
 1597 
 1598                 return;
 1599         }
 1600 
 1601         if (vp->v_usecount == 1) {
 1602                 vp->v_usecount--;
 1603                 /*
 1604                  * We must call VOP_INACTIVE with the node locked.
 1605                  * If we are doing a vpu, the node is already locked,
 1606                  * but, in the case of vrele, we must explicitly lock
 1607                  * the vnode before calling VOP_INACTIVE
 1608                  */
 1609 
 1610                 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0)
 1611                         VOP_INACTIVE(vp, p);
 1612                 if (VSHOULDFREE(vp))
 1613                         vfree(vp);
 1614                 else
 1615                         vlruvp(vp);
 1616         } else {
 1617 #ifdef DIAGNOSTIC
 1618                 vprint("vrele: negative ref count", vp);
 1619                 simple_unlock(&vp->v_interlock);
 1620 #endif
 1621                 panic("vrele: negative ref cnt");
 1622         }
 1623 }
 1624 
 1625 void
 1626 vput(vp)
 1627         struct vnode *vp;
 1628 {
 1629         struct proc *p = curproc;       /* XXX */
 1630 
 1631         KASSERT(vp != NULL, ("vput: null vp"));
 1632 
 1633         simple_lock(&vp->v_interlock);
 1634 
 1635         if (vp->v_usecount > 1) {
 1636                 vp->v_usecount--;
 1637                 VOP_UNLOCK(vp, LK_INTERLOCK, p);
 1638                 return;
 1639         }
 1640 
 1641         if (vp->v_usecount == 1) {
 1642                 vp->v_usecount--;
 1643                 /*
 1644                  * We must call VOP_INACTIVE with the node locked.
 1645                  * If we are doing a vpu, the node is already locked,
 1646                  * so we just need to release the vnode mutex.
 1647                  */
 1648                 simple_unlock(&vp->v_interlock);
 1649                 VOP_INACTIVE(vp, p);
 1650                 if (VSHOULDFREE(vp))
 1651                         vfree(vp);
 1652                 else
 1653                         vlruvp(vp);
 1654         } else {
 1655 #ifdef DIAGNOSTIC
 1656                 vprint("vput: negative ref count", vp);
 1657 #endif
 1658                 panic("vput: negative ref cnt");
 1659         }
 1660 }
 1661 
 1662 /*
 1663  * Somebody doesn't want the vnode recycled.
 1664  */
 1665 void
 1666 vhold(vp)
 1667         register struct vnode *vp;
 1668 {
 1669         int s;
 1670 
 1671         s = splbio();
 1672         vp->v_holdcnt++;
 1673         if (VSHOULDBUSY(vp))
 1674                 vbusy(vp);
 1675         splx(s);
 1676 }
 1677 
 1678 /*
 1679  * One less who cares about this vnode.
 1680  */
 1681 void
 1682 vdrop(vp)
 1683         register struct vnode *vp;
 1684 {
 1685         int s;
 1686 
 1687         s = splbio();
 1688         if (vp->v_holdcnt <= 0)
 1689                 panic("vdrop: holdcnt");
 1690         vp->v_holdcnt--;
 1691         if (VSHOULDFREE(vp))
 1692                 vfree(vp);
 1693         splx(s);
 1694 }
 1695 
 1696 /*
 1697  * Remove any vnodes in the vnode table belonging to mount point mp.
 1698  *
 1699  * If FORCECLOSE is not specified, there should not be any active ones,
 1700  * return error if any are found (nb: this is a user error, not a
 1701  * system error). If FORCECLOSE is specified, detach any active vnodes
 1702  * that are found.
 1703  *
 1704  * If WRITECLOSE is set, only flush out regular file vnodes open for
 1705  * writing.
 1706  *
 1707  * SKIPSYSTEM causes any vnodes marked VSYSTEM to be skipped.
 1708  *
 1709  * `rootrefs' specifies the base reference count for the root vnode
 1710  * of this filesystem. The root vnode is considered busy if its
 1711  * v_usecount exceeds this value. On a successful return, vflush()
 1712  * will call vrele() on the root vnode exactly rootrefs times.
 1713  * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must
 1714  * be zero.
 1715  */
 1716 #ifdef DIAGNOSTIC
 1717 static int busyprt = 0;         /* print out busy vnodes */
 1718 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
 1719 #endif
 1720 
 1721 int
 1722 vflush(mp, rootrefs, flags)
 1723         struct mount *mp;
 1724         int rootrefs;
 1725         int flags;
 1726 {
 1727         struct proc *p = curproc;       /* XXX */
 1728         struct vnode *vp, *nvp, *rootvp = NULL;
 1729         struct vattr vattr;
 1730         int busy = 0, error;
 1731 
 1732         if (rootrefs > 0) {
 1733                 KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0,
 1734                     ("vflush: bad args"));
 1735                 /*
 1736                  * Get the filesystem root vnode. We can vput() it
 1737                  * immediately, since with rootrefs > 0, it won't go away.
 1738                  */
 1739                 if ((error = VFS_ROOT(mp, &rootvp)) != 0)
 1740                         return (error);
 1741                 vput(rootvp);
 1742         }
 1743         simple_lock(&mntvnode_slock);
 1744 loop:
 1745         for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp; vp = nvp) {
 1746                 /*
 1747                  * Make sure this vnode wasn't reclaimed in getnewvnode().
 1748                  * Start over if it has (it won't be on the list anymore).
 1749                  */
 1750                 if (vp->v_mount != mp)
 1751                         goto loop;
 1752                 nvp = TAILQ_NEXT(vp, v_nmntvnodes);
 1753 
 1754                 simple_lock(&vp->v_interlock);
 1755                 /*
 1756                  * Skip over a vnodes marked VSYSTEM.
 1757                  */
 1758                 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
 1759                         simple_unlock(&vp->v_interlock);
 1760                         continue;
 1761                 }
 1762                 /*
 1763                  * If WRITECLOSE is set, flush out unlinked but still open
 1764                  * files (even if open only for reading) and regular file
 1765                  * vnodes open for writing. 
 1766                  */
 1767                 if ((flags & WRITECLOSE) &&
 1768                     (vp->v_type == VNON ||
 1769                     (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0 &&
 1770                     vattr.va_nlink > 0)) &&
 1771                     (vp->v_writecount == 0 || vp->v_type != VREG)) {
 1772                         simple_unlock(&vp->v_interlock);
 1773                         continue;
 1774                 }
 1775 
 1776                 /*
 1777                  * With v_usecount == 0, all we need to do is clear out the
 1778                  * vnode data structures and we are done.
 1779                  */
 1780                 if (vp->v_usecount == 0) {
 1781                         simple_unlock(&mntvnode_slock);
 1782                         vgonel(vp, p);
 1783                         simple_lock(&mntvnode_slock);
 1784                         continue;
 1785                 }
 1786 
 1787                 /*
 1788                  * If FORCECLOSE is set, forcibly close the vnode. For block
 1789                  * or character devices, revert to an anonymous device. For
 1790                  * all other files, just kill them.
 1791                  */
 1792                 if (flags & FORCECLOSE) {
 1793                         simple_unlock(&mntvnode_slock);
 1794                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
 1795                                 vgonel(vp, p);
 1796                         } else {
 1797                                 vclean(vp, 0, p);
 1798                                 vp->v_op = spec_vnodeop_p;
 1799                                 insmntque(vp, (struct mount *) 0);
 1800                         }
 1801                         simple_lock(&mntvnode_slock);
 1802                         continue;
 1803                 }
 1804 #ifdef DIAGNOSTIC
 1805                 if (busyprt)
 1806                         vprint("vflush: busy vnode", vp);
 1807 #endif
 1808                 simple_unlock(&vp->v_interlock);
 1809                 busy++;
 1810         }
 1811         simple_unlock(&mntvnode_slock);
 1812         if (rootrefs > 0 && (flags & FORCECLOSE) == 0) {
 1813                 /*
 1814                  * If just the root vnode is busy, and if its refcount
 1815                  * is equal to `rootrefs', then go ahead and kill it.
 1816                  */
 1817                 simple_lock(&rootvp->v_interlock);
 1818                 KASSERT(busy > 0, ("vflush: not busy"));
 1819                 KASSERT(rootvp->v_usecount >= rootrefs, ("vflush: rootrefs"));
 1820                 if (busy == 1 && rootvp->v_usecount == rootrefs) {
 1821                         vgonel(rootvp, p);
 1822                         busy = 0;
 1823                 } else
 1824                         simple_unlock(&rootvp->v_interlock);
 1825         }
 1826         if (busy)
 1827                 return (EBUSY);
 1828         for (; rootrefs > 0; rootrefs--)
 1829                 vrele(rootvp);
 1830         return (0);
 1831 }
 1832 
 1833 /*
 1834  * We do not want to recycle the vnode too quickly.
 1835  *
 1836  * XXX we can't move vp's around the nvnodelist without really screwing
 1837  * up the efficiency of filesystem SYNC and friends.  This code is 
 1838  * disabled until we fix the syncing code's scanning algorithm.
 1839  */
 1840 static void
 1841 vlruvp(struct vnode *vp)
 1842 {
 1843 #if 0
 1844         struct mount *mp;
 1845 
 1846         if ((mp = vp->v_mount) != NULL) {
 1847                 simple_lock(&mntvnode_slock);
 1848                 TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 1849                 TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
 1850                 simple_unlock(&mntvnode_slock);
 1851         }
 1852 #endif
 1853 }
 1854 
 1855 /*
 1856  * Disassociate the underlying file system from a vnode.
 1857  */
 1858 static void
 1859 vclean(vp, flags, p)
 1860         struct vnode *vp;
 1861         int flags;
 1862         struct proc *p;
 1863 {
 1864         int active;
 1865 
 1866         /*
 1867          * Check to see if the vnode is in use. If so we have to reference it
 1868          * before we clean it out so that its count cannot fall to zero and
 1869          * generate a race against ourselves to recycle it.
 1870          */
 1871         if ((active = vp->v_usecount))
 1872                 vp->v_usecount++;
 1873 
 1874         /*
 1875          * Prevent the vnode from being recycled or brought into use while we
 1876          * clean it out.
 1877          */
 1878         if (vp->v_flag & VXLOCK)
 1879                 panic("vclean: deadlock");
 1880         vp->v_flag |= VXLOCK;
 1881         vp->v_vxproc = curproc;
 1882         /*
 1883          * Even if the count is zero, the VOP_INACTIVE routine may still
 1884          * have the object locked while it cleans it out. The VOP_LOCK
 1885          * ensures that the VOP_INACTIVE routine is done with its work.
 1886          * For active vnodes, it ensures that no other activity can
 1887          * occur while the underlying object is being cleaned out.
 1888          */
 1889         VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
 1890 
 1891         /*
 1892          * Clean out any buffers associated with the vnode.
 1893          */
 1894         vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
 1895 
 1896         VOP_DESTROYVOBJECT(vp);
 1897 
 1898         /*
 1899          * If purging an active vnode, it must be closed and
 1900          * deactivated before being reclaimed. Note that the
 1901          * VOP_INACTIVE will unlock the vnode.
 1902          */
 1903         if (active) {
 1904                 if (flags & DOCLOSE)
 1905                         VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
 1906                 VOP_INACTIVE(vp, p);
 1907         } else {
 1908                 /*
 1909                  * Any other processes trying to obtain this lock must first
 1910                  * wait for VXLOCK to clear, then call the new lock operation.
 1911                  */
 1912                 VOP_UNLOCK(vp, 0, p);
 1913         }
 1914         /*
 1915          * Reclaim the vnode.
 1916          */
 1917         if (VOP_RECLAIM(vp, p))
 1918                 panic("vclean: cannot reclaim");
 1919 
 1920         if (active) {
 1921                 /*
 1922                  * Inline copy of vrele() since VOP_INACTIVE
 1923                  * has already been called.
 1924                  */
 1925                 simple_lock(&vp->v_interlock);
 1926                 if (--vp->v_usecount <= 0) {
 1927 #ifdef DIAGNOSTIC
 1928                         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
 1929                                 vprint("vclean: bad ref count", vp);
 1930                                 panic("vclean: ref cnt");
 1931                         }
 1932 #endif
 1933                         vfree(vp);
 1934                 }
 1935                 simple_unlock(&vp->v_interlock);
 1936         }
 1937 
 1938         cache_purge(vp);
 1939         vp->v_vnlock = NULL;
 1940 
 1941         if (VSHOULDFREE(vp))
 1942                 vfree(vp);
 1943         
 1944         /*
 1945          * Done with purge, notify sleepers of the grim news.
 1946          */
 1947         vp->v_op = dead_vnodeop_p;
 1948         vn_pollgone(vp);
 1949         vp->v_tag = VT_NON;
 1950         vp->v_flag &= ~VXLOCK;
 1951         vp->v_vxproc = NULL;
 1952         if (vp->v_flag & VXWANT) {
 1953                 vp->v_flag &= ~VXWANT;
 1954                 wakeup((caddr_t) vp);
 1955         }
 1956 }
 1957 
 1958 /*
 1959  * Eliminate all activity associated with the requested vnode
 1960  * and with all vnodes aliased to the requested vnode.
 1961  */
 1962 int
 1963 vop_revoke(ap)
 1964         struct vop_revoke_args /* {
 1965                 struct vnode *a_vp;
 1966                 int a_flags;
 1967         } */ *ap;
 1968 {
 1969         struct vnode *vp, *vq;
 1970         dev_t dev;
 1971 
 1972         KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke"));
 1973 
 1974         vp = ap->a_vp;
 1975         /*
 1976          * If a vgone (or vclean) is already in progress,
 1977          * wait until it is done and return.
 1978          */
 1979         if (vp->v_flag & VXLOCK) {
 1980                 vp->v_flag |= VXWANT;
 1981                 simple_unlock(&vp->v_interlock);
 1982                 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
 1983                 return (0);
 1984         }
 1985         dev = vp->v_rdev;
 1986         for (;;) {
 1987                 simple_lock(&spechash_slock);
 1988                 vq = SLIST_FIRST(&dev->si_hlist);
 1989                 simple_unlock(&spechash_slock);
 1990                 if (!vq)
 1991                         break;
 1992                 vgone(vq);
 1993         }
 1994         return (0);
 1995 }
 1996 
 1997 /*
 1998  * Recycle an unused vnode to the front of the free list.
 1999  * Release the passed interlock if the vnode will be recycled.
 2000  */
 2001 int
 2002 vrecycle(vp, inter_lkp, p)
 2003         struct vnode *vp;
 2004         struct simplelock *inter_lkp;
 2005         struct proc *p;
 2006 {
 2007 
 2008         simple_lock(&vp->v_interlock);
 2009         if (vp->v_usecount == 0) {
 2010                 if (inter_lkp) {
 2011                         simple_unlock(inter_lkp);
 2012                 }
 2013                 vgonel(vp, p);
 2014                 return (1);
 2015         }
 2016         simple_unlock(&vp->v_interlock);
 2017         return (0);
 2018 }
 2019 
 2020 /*
 2021  * Eliminate all activity associated with a vnode
 2022  * in preparation for reuse.
 2023  */
 2024 void
 2025 vgone(vp)
 2026         register struct vnode *vp;
 2027 {
 2028         struct proc *p = curproc;       /* XXX */
 2029 
 2030         simple_lock(&vp->v_interlock);
 2031         vgonel(vp, p);
 2032 }
 2033 
 2034 /*
 2035  * vgone, with the vp interlock held.
 2036  */
 2037 void
 2038 vgonel(vp, p)
 2039         struct vnode *vp;
 2040         struct proc *p;
 2041 {
 2042         int s;
 2043 
 2044         /*
 2045          * If a vgone (or vclean) is already in progress,
 2046          * wait until it is done and return.
 2047          */
 2048         if (vp->v_flag & VXLOCK) {
 2049                 vp->v_flag |= VXWANT;
 2050                 simple_unlock(&vp->v_interlock);
 2051                 tsleep((caddr_t)vp, PINOD, "vgone", 0);
 2052                 return;
 2053         }
 2054 
 2055         /*
 2056          * Clean out the filesystem specific data.
 2057          */
 2058         vclean(vp, DOCLOSE, p);
 2059         simple_lock(&vp->v_interlock);
 2060 
 2061         /*
 2062          * Delete from old mount point vnode list, if on one.
 2063          */
 2064         if (vp->v_mount != NULL)
 2065                 insmntque(vp, (struct mount *)0);
 2066         /*
 2067          * If special device, remove it from special device alias list
 2068          * if it is on one.
 2069          */
 2070         if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) {
 2071                 simple_lock(&spechash_slock);
 2072                 SLIST_REMOVE(&vp->v_hashchain, vp, vnode, v_specnext);
 2073                 freedev(vp->v_rdev);
 2074                 simple_unlock(&spechash_slock);
 2075                 vp->v_rdev = NULL;
 2076         }
 2077 
 2078         /*
 2079          * If it is on the freelist and not already at the head,
 2080          * move it to the head of the list. The test of the
 2081          * VDOOMED flag and the reference count of zero is because
 2082          * it will be removed from the free list by getnewvnode,
 2083          * but will not have its reference count incremented until
 2084          * after calling vgone. If the reference count were
 2085          * incremented first, vgone would (incorrectly) try to
 2086          * close the previous instance of the underlying object.
 2087          */
 2088         if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
 2089                 s = splbio();
 2090                 simple_lock(&vnode_free_list_slock);
 2091                 if (vp->v_flag & VFREE)
 2092                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 2093                 else
 2094                         freevnodes++;
 2095                 vp->v_flag |= VFREE;
 2096                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 2097                 simple_unlock(&vnode_free_list_slock);
 2098                 splx(s);
 2099         }
 2100 
 2101         vp->v_type = VBAD;
 2102         simple_unlock(&vp->v_interlock);
 2103 }
 2104 
 2105 /*
 2106  * Lookup a vnode by device number.
 2107  */
 2108 int
 2109 vfinddev(dev, type, vpp)
 2110         dev_t dev;
 2111         enum vtype type;
 2112         struct vnode **vpp;
 2113 {
 2114         struct vnode *vp;
 2115 
 2116         simple_lock(&spechash_slock);
 2117         SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) {
 2118                 if (type == vp->v_type) {
 2119                         *vpp = vp;
 2120                         simple_unlock(&spechash_slock);
 2121                         return (1);
 2122                 }
 2123         }
 2124         simple_unlock(&spechash_slock);
 2125         return (0);
 2126 }
 2127 
 2128 /*
 2129  * Calculate the total number of references to a special device.
 2130  */
 2131 int
 2132 vcount(vp)
 2133         struct vnode *vp;
 2134 {
 2135         struct vnode *vq;
 2136         int count;
 2137 
 2138         count = 0;
 2139         simple_lock(&spechash_slock);
 2140         SLIST_FOREACH(vq, &vp->v_hashchain, v_specnext)
 2141                 count += vq->v_usecount;
 2142         simple_unlock(&spechash_slock);
 2143         return (count);
 2144 }
 2145 
 2146 /*
 2147  * Same as above, but using the dev_t as argument
 2148  */
 2149 
 2150 int
 2151 count_dev(dev)
 2152         dev_t dev;
 2153 {
 2154         struct vnode *vp;
 2155 
 2156         vp = SLIST_FIRST(&dev->si_hlist);
 2157         if (vp == NULL)
 2158                 return (0);
 2159         return(vcount(vp));
 2160 }
 2161 
 2162 /*
 2163  * Print out a description of a vnode.
 2164  */
 2165 static char *typename[] =
 2166 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
 2167 
 2168 void
 2169 vprint(label, vp)
 2170         char *label;
 2171         struct vnode *vp;
 2172 {
 2173         char buf[96];
 2174 
 2175         if (label != NULL)
 2176                 printf("%s: %p: ", label, (void *)vp);
 2177         else
 2178                 printf("%p: ", (void *)vp);
 2179         printf("type %s, usecount %d, writecount %d, refcount %d,",
 2180             typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 2181             vp->v_holdcnt);
 2182         buf[0] = '\0';
 2183         if (vp->v_flag & VROOT)
 2184                 strcat(buf, "|VROOT");
 2185         if (vp->v_flag & VTEXT)
 2186                 strcat(buf, "|VTEXT");
 2187         if (vp->v_flag & VSYSTEM)
 2188                 strcat(buf, "|VSYSTEM");
 2189         if (vp->v_flag & VXLOCK)
 2190                 strcat(buf, "|VXLOCK");
 2191         if (vp->v_flag & VXWANT)
 2192                 strcat(buf, "|VXWANT");
 2193         if (vp->v_flag & VBWAIT)
 2194                 strcat(buf, "|VBWAIT");
 2195         if (vp->v_flag & VDOOMED)
 2196                 strcat(buf, "|VDOOMED");
 2197         if (vp->v_flag & VFREE)
 2198                 strcat(buf, "|VFREE");
 2199         if (vp->v_flag & VOBJBUF)
 2200                 strcat(buf, "|VOBJBUF");
 2201         if (buf[0] != '\0')
 2202                 printf(" flags (%s)", &buf[1]);
 2203         if (vp->v_data == NULL) {
 2204                 printf("\n");
 2205         } else {
 2206                 printf("\n\t");
 2207                 VOP_PRINT(vp);
 2208         }
 2209 }
 2210 
 2211 #ifdef DDB
 2212 #include <ddb/ddb.h>
 2213 /*
 2214  * List all of the locked vnodes in the system.
 2215  * Called when debugging the kernel.
 2216  */
 2217 DB_SHOW_COMMAND(lockedvnodes, lockedvnodes)
 2218 {
 2219         struct proc *p = curproc;       /* XXX */
 2220         struct mount *mp, *nmp;
 2221         struct vnode *vp;
 2222 
 2223         printf("Locked vnodes\n");
 2224         simple_lock(&mountlist_slock);
 2225         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 2226                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 2227                         nmp = TAILQ_NEXT(mp, mnt_list);
 2228                         continue;
 2229                 }
 2230                 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
 2231                         if (VOP_ISLOCKED(vp, NULL))
 2232                                 vprint((char *)0, vp);
 2233                 }
 2234                 simple_lock(&mountlist_slock);
 2235                 nmp = TAILQ_NEXT(mp, mnt_list);
 2236                 vfs_unbusy(mp, p);
 2237         }
 2238         simple_unlock(&mountlist_slock);
 2239 }
 2240 #endif
 2241 
 2242 /*
 2243  * Top level filesystem related information gathering.
 2244  */
 2245 static int      sysctl_ovfs_conf __P((SYSCTL_HANDLER_ARGS));
 2246 
 2247 static int
 2248 vfs_sysctl(SYSCTL_HANDLER_ARGS)
 2249 {
 2250         int *name = (int *)arg1 - 1;    /* XXX */
 2251         u_int namelen = arg2 + 1;       /* XXX */
 2252         struct vfsconf *vfsp;
 2253 
 2254 #if 1 || defined(COMPAT_PRELITE2)
 2255         /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 2256         if (namelen == 1)
 2257                 return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 2258 #endif
 2259 
 2260 #ifdef notyet
 2261         /* all sysctl names at this level are at least name and field */
 2262         if (namelen < 2)
 2263                 return (ENOTDIR);               /* overloaded */
 2264         if (name[0] != VFS_GENERIC) {
 2265                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 2266                         if (vfsp->vfc_typenum == name[0])
 2267                                 break;
 2268                 if (vfsp == NULL)
 2269                         return (EOPNOTSUPP);
 2270                 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
 2271                     oldp, oldlenp, newp, newlen, p));
 2272         }
 2273 #endif
 2274         switch (name[1]) {
 2275         case VFS_MAXTYPENUM:
 2276                 if (namelen != 2)
 2277                         return (ENOTDIR);
 2278                 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 2279         case VFS_CONF:
 2280                 if (namelen != 3)
 2281                         return (ENOTDIR);       /* overloaded */
 2282                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 2283                         if (vfsp->vfc_typenum == name[2])
 2284                                 break;
 2285                 if (vfsp == NULL)
 2286                         return (EOPNOTSUPP);
 2287                 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
 2288         }
 2289         return (EOPNOTSUPP);
 2290 }
 2291 
 2292 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
 2293         "Generic filesystem");
 2294 
 2295 #if 1 || defined(COMPAT_PRELITE2)
 2296 
 2297 static int
 2298 sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
 2299 {
 2300         int error;
 2301         struct vfsconf *vfsp;
 2302         struct ovfsconf ovfs;
 2303 
 2304         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 2305                 bzero(&ovfs, sizeof(ovfs));
 2306                 ovfs.vfc_vfsops = vfsp->vfc_vfsops;     /* XXX used as flag */
 2307                 strcpy(ovfs.vfc_name, vfsp->vfc_name);
 2308                 ovfs.vfc_index = vfsp->vfc_typenum;
 2309                 ovfs.vfc_refcount = vfsp->vfc_refcount;
 2310                 ovfs.vfc_flags = vfsp->vfc_flags;
 2311                 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 2312                 if (error)
 2313                         return error;
 2314         }
 2315         return 0;
 2316 }
 2317 
 2318 #endif /* 1 || COMPAT_PRELITE2 */
 2319 
 2320 #if 0
 2321 #define KINFO_VNODESLOP 10
 2322 /*
 2323  * Dump vnode list (via sysctl).
 2324  * Copyout address of vnode followed by vnode.
 2325  */
 2326 /* ARGSUSED */
 2327 static int
 2328 sysctl_vnode(SYSCTL_HANDLER_ARGS)
 2329 {
 2330         struct proc *p = curproc;       /* XXX */
 2331         struct mount *mp, *nmp;
 2332         struct vnode *nvp, *vp;
 2333         int error;
 2334 
 2335 #define VPTRSZ  sizeof (struct vnode *)
 2336 #define VNODESZ sizeof (struct vnode)
 2337 
 2338         req->lock = 0;
 2339         if (!req->oldptr) /* Make an estimate */
 2340                 return (SYSCTL_OUT(req, 0,
 2341                         (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
 2342 
 2343         simple_lock(&mountlist_slock);
 2344         for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 2345                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 2346                         nmp = TAILQ_NEXT(mp, mnt_list);
 2347                         continue;
 2348                 }
 2349 again:
 2350                 simple_lock(&mntvnode_slock);
 2351                 for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist);
 2352                      vp != NULL;
 2353                      vp = nvp) {
 2354                         /*
 2355                          * Check that the vp is still associated with
 2356                          * this filesystem.  RACE: could have been
 2357                          * recycled onto the same filesystem.
 2358                          */
 2359                         if (vp->v_mount != mp) {
 2360                                 simple_unlock(&mntvnode_slock);
 2361                                 goto again;
 2362                         }
 2363                         nvp = TAILQ_NEXT(vp, v_nmntvnodes);
 2364                         simple_unlock(&mntvnode_slock);
 2365                         if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
 2366                             (error = SYSCTL_OUT(req, vp, VNODESZ)))
 2367                                 return (error);
 2368                         simple_lock(&mntvnode_slock);
 2369                 }
 2370                 simple_unlock(&mntvnode_slock);
 2371                 simple_lock(&mountlist_slock);
 2372                 nmp = TAILQ_NEXT(mp, mnt_list);
 2373                 vfs_unbusy(mp, p);
 2374         }
 2375         simple_unlock(&mountlist_slock);
 2376 
 2377         return (0);
 2378 }
 2379 #endif
 2380 
 2381 /*
 2382  * XXX
 2383  * Exporting the vnode list on large systems causes them to crash.
 2384  * Exporting the vnode list on medium systems causes sysctl to coredump.
 2385  */
 2386 #if 0
 2387 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
 2388         0, 0, sysctl_vnode, "S,vnode", "");
 2389 #endif
 2390 
 2391 /*
 2392  * Check to see if a filesystem is mounted on a block device.
 2393  */
 2394 int
 2395 vfs_mountedon(vp)
 2396         struct vnode *vp;
 2397 {
 2398 
 2399         if (vp->v_specmountpoint != NULL)
 2400                 return (EBUSY);
 2401         return (0);
 2402 }
 2403 
 2404 /*
 2405  * Unmount all filesystems. The list is traversed in reverse order
 2406  * of mounting to avoid dependencies.
 2407  */
 2408 void
 2409 vfs_unmountall()
 2410 {
 2411         struct mount *mp;
 2412         struct proc *p;
 2413         int error;
 2414 
 2415         if (curproc != NULL)
 2416                 p = curproc;
 2417         else
 2418                 p = initproc;   /* XXX XXX should this be proc0? */
 2419         /*
 2420          * Since this only runs when rebooting, it is not interlocked.
 2421          */
 2422         while(!TAILQ_EMPTY(&mountlist)) {
 2423                 mp = TAILQ_LAST(&mountlist, mntlist);
 2424                 error = dounmount(mp, MNT_FORCE, p);
 2425                 if (error) {
 2426                         TAILQ_REMOVE(&mountlist, mp, mnt_list);
 2427                         printf("unmount of %s failed (",
 2428                             mp->mnt_stat.f_mntonname);
 2429                         if (error == EBUSY)
 2430                                 printf("BUSY)\n");
 2431                         else
 2432                                 printf("%d)\n", error);
 2433                 } else {
 2434                         /* The unmount has removed mp from the mountlist */
 2435                 }
 2436         }
 2437 }
 2438 
 2439 /*
 2440  * Build hash lists of net addresses and hang them off the mount point.
 2441  * Called by ufs_mount() to set up the lists of export addresses.
 2442  */
 2443 static int
 2444 vfs_hang_addrlist(mp, nep, argp)
 2445         struct mount *mp;
 2446         struct netexport *nep;
 2447         struct export_args *argp;
 2448 {
 2449         register struct netcred *np;
 2450         register struct radix_node_head *rnh;
 2451         register int i;
 2452         struct radix_node *rn;
 2453         struct sockaddr *saddr, *smask = 0;
 2454         struct domain *dom;
 2455         int error;
 2456 
 2457         if (argp->ex_addrlen == 0) {
 2458                 if (mp->mnt_flag & MNT_DEFEXPORTED)
 2459                         return (EPERM);
 2460                 np = &nep->ne_defexported;
 2461                 np->netc_exflags = argp->ex_flags;
 2462                 np->netc_anon = argp->ex_anon;
 2463                 np->netc_anon.cr_ref = 1;
 2464                 mp->mnt_flag |= MNT_DEFEXPORTED;
 2465                 return (0);
 2466         }
 2467 
 2468         if (argp->ex_addrlen > MLEN)
 2469                 return (EINVAL);
 2470 
 2471         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 2472         np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
 2473         bzero((caddr_t) np, i);
 2474         saddr = (struct sockaddr *) (np + 1);
 2475         if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
 2476                 goto out;
 2477         if (saddr->sa_len > argp->ex_addrlen)
 2478                 saddr->sa_len = argp->ex_addrlen;
 2479         if (argp->ex_masklen) {
 2480                 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
 2481                 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
 2482                 if (error)
 2483                         goto out;
 2484                 if (smask->sa_len > argp->ex_masklen)
 2485                         smask->sa_len = argp->ex_masklen;
 2486         }
 2487         i = saddr->sa_family;
 2488         if ((rnh = nep->ne_rtable[i]) == 0) {
 2489                 /*
 2490                  * Seems silly to initialize every AF when most are not used,
 2491                  * do so on demand here
 2492                  */
 2493                 for (dom = domains; dom; dom = dom->dom_next)
 2494                         if (dom->dom_family == i && dom->dom_rtattach) {
 2495                                 dom->dom_rtattach((void **) &nep->ne_rtable[i],
 2496                                     dom->dom_rtoffset);
 2497                                 break;
 2498                         }
 2499                 if ((rnh = nep->ne_rtable[i]) == 0) {
 2500                         error = ENOBUFS;
 2501                         goto out;
 2502                 }
 2503         }
 2504         rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
 2505             np->netc_rnodes);
 2506         if (rn == 0 || np != (struct netcred *) rn) {   /* already exists */
 2507                 error = EPERM;
 2508                 goto out;
 2509         }
 2510         np->netc_exflags = argp->ex_flags;
 2511         np->netc_anon = argp->ex_anon;
 2512         np->netc_anon.cr_ref = 1;
 2513         return (0);
 2514 out:
 2515         free(np, M_NETADDR);
 2516         return (error);
 2517 }
 2518 
 2519 /* ARGSUSED */
 2520 static int
 2521 vfs_free_netcred(rn, w)
 2522         struct radix_node *rn;
 2523         void *w;
 2524 {
 2525         register struct radix_node_head *rnh = (struct radix_node_head *) w;
 2526 
 2527         (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
 2528         free((caddr_t) rn, M_NETADDR);
 2529         return (0);
 2530 }
 2531 
 2532 /*
 2533  * Free the net address hash lists that are hanging off the mount points.
 2534  */
 2535 static void
 2536 vfs_free_addrlist(nep)
 2537         struct netexport *nep;
 2538 {
 2539         register int i;
 2540         register struct radix_node_head *rnh;
 2541 
 2542         for (i = 0; i <= AF_MAX; i++)
 2543                 if ((rnh = nep->ne_rtable[i])) {
 2544                         (*rnh->rnh_walktree) (rnh, vfs_free_netcred,
 2545                             (caddr_t) rnh);
 2546                         free((caddr_t) rnh, M_RTABLE);
 2547                         nep->ne_rtable[i] = 0;
 2548                 }
 2549 }
 2550 
 2551 int
 2552 vfs_export(mp, nep, argp)
 2553         struct mount *mp;
 2554         struct netexport *nep;
 2555         struct export_args *argp;
 2556 {
 2557         int error;
 2558 
 2559         if (argp->ex_flags & MNT_DELEXPORT) {
 2560                 if (mp->mnt_flag & MNT_EXPUBLIC) {
 2561                         vfs_setpublicfs(NULL, NULL, NULL);
 2562                         mp->mnt_flag &= ~MNT_EXPUBLIC;
 2563                 }
 2564                 vfs_free_addrlist(nep);
 2565                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 2566         }
 2567         if (argp->ex_flags & MNT_EXPORTED) {
 2568                 if (argp->ex_flags & MNT_EXPUBLIC) {
 2569                         if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 2570                                 return (error);
 2571                         mp->mnt_flag |= MNT_EXPUBLIC;
 2572                 }
 2573                 if ((error = vfs_hang_addrlist(mp, nep, argp)))
 2574                         return (error);
 2575                 mp->mnt_flag |= MNT_EXPORTED;
 2576         }
 2577         return (0);
 2578 }
 2579 
 2580 
 2581 /*
 2582  * Set the publicly exported filesystem (WebNFS). Currently, only
 2583  * one public filesystem is possible in the spec (RFC 2054 and 2055)
 2584  */
 2585 int
 2586 vfs_setpublicfs(mp, nep, argp)
 2587         struct mount *mp;
 2588         struct netexport *nep;
 2589         struct export_args *argp;
 2590 {
 2591         int error;
 2592         struct vnode *rvp;
 2593         char *cp;
 2594 
 2595         /*
 2596          * mp == NULL -> invalidate the current info, the FS is
 2597          * no longer exported. May be called from either vfs_export
 2598          * or unmount, so check if it hasn't already been done.
 2599          */
 2600         if (mp == NULL) {
 2601                 if (nfs_pub.np_valid) {
 2602                         nfs_pub.np_valid = 0;
 2603                         if (nfs_pub.np_index != NULL) {
 2604                                 FREE(nfs_pub.np_index, M_TEMP);
 2605                                 nfs_pub.np_index = NULL;
 2606                         }
 2607                 }
 2608                 return (0);
 2609         }
 2610 
 2611         /*
 2612          * Only one allowed at a time.
 2613          */
 2614         if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 2615                 return (EBUSY);
 2616 
 2617         /*
 2618          * Get real filehandle for root of exported FS.
 2619          */
 2620         bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
 2621         nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
 2622 
 2623         if ((error = VFS_ROOT(mp, &rvp)))
 2624                 return (error);
 2625 
 2626         if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 2627                 return (error);
 2628 
 2629         vput(rvp);
 2630 
 2631         /*
 2632          * If an indexfile was specified, pull it in.
 2633          */
 2634         if (argp->ex_indexfile != NULL) {
 2635                 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
 2636                     M_WAITOK);
 2637                 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 2638                     MAXNAMLEN, (size_t *)0);
 2639                 if (!error) {
 2640                         /*
 2641                          * Check for illegal filenames.
 2642                          */
 2643                         for (cp = nfs_pub.np_index; *cp; cp++) {
 2644                                 if (*cp == '/') {
 2645                                         error = EINVAL;
 2646                                         break;
 2647                                 }
 2648                         }
 2649                 }
 2650                 if (error) {
 2651                         FREE(nfs_pub.np_index, M_TEMP);
 2652                         return (error);
 2653                 }
 2654         }
 2655 
 2656         nfs_pub.np_mount = mp;
 2657         nfs_pub.np_valid = 1;
 2658         return (0);
 2659 }
 2660 
 2661 struct netcred *
 2662 vfs_export_lookup(mp, nep, nam)
 2663         register struct mount *mp;
 2664         struct netexport *nep;
 2665         struct sockaddr *nam;
 2666 {
 2667         register struct netcred *np;
 2668         register struct radix_node_head *rnh;
 2669         struct sockaddr *saddr;
 2670 
 2671         np = NULL;
 2672         if (mp->mnt_flag & MNT_EXPORTED) {
 2673                 /*
 2674                  * Lookup in the export list first.
 2675                  */
 2676                 if (nam != NULL) {
 2677                         saddr = nam;
 2678                         rnh = nep->ne_rtable[saddr->sa_family];
 2679                         if (rnh != NULL) {
 2680                                 np = (struct netcred *)
 2681                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
 2682                                                               rnh);
 2683                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 2684                                         np = NULL;
 2685                         }
 2686                 }
 2687                 /*
 2688                  * If no address match, use the default if it exists.
 2689                  */
 2690                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 2691                         np = &nep->ne_defexported;
 2692         }
 2693         return (np);
 2694 }
 2695 
 2696 /*
 2697  * perform msync on all vnodes under a mount point
 2698  * the mount point must be locked.
 2699  */
 2700 void
 2701 vfs_msync(struct mount *mp, int flags) 
 2702 {
 2703         struct vnode *vp, *nvp;
 2704         struct vm_object *obj;
 2705         int tries;
 2706 
 2707         tries = 5;
 2708         simple_lock(&mntvnode_slock);
 2709 loop:
 2710         for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
 2711                 if (vp->v_mount != mp) {
 2712                         if (--tries > 0)
 2713                                 goto loop;
 2714                         break;
 2715                 }
 2716                 nvp = TAILQ_NEXT(vp, v_nmntvnodes);
 2717 
 2718                 if (vp->v_flag & VXLOCK)        /* XXX: what if MNT_WAIT? */
 2719                         continue;
 2720 
 2721                 /*
 2722                  * There could be hundreds of thousands of vnodes, we cannot
 2723                  * afford to do anything heavy-weight until we have a fairly
 2724                  * good indication that there is something to do.
 2725                  */
 2726                 if ((vp->v_flag & VOBJDIRTY) &&
 2727                     (flags == MNT_WAIT || VOP_ISLOCKED(vp, NULL) == 0)) {
 2728                         simple_unlock(&mntvnode_slock);
 2729                         if (!vget(vp,
 2730                             LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
 2731                                 if (VOP_GETVOBJECT(vp, &obj) == 0) {
 2732                                         vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);
 2733                                 }
 2734                                 vput(vp);
 2735                         }
 2736                         simple_lock(&mntvnode_slock);
 2737                         if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp) {
 2738                                 if (--tries > 0)
 2739                                         goto loop;
 2740                                 break;
 2741                         }
 2742                 }
 2743         }
 2744         simple_unlock(&mntvnode_slock);
 2745 }
 2746 
 2747 /*
 2748  * Create the VM object needed for VMIO and mmap support.  This
 2749  * is done for all VREG files in the system.  Some filesystems might
 2750  * afford the additional metadata buffering capability of the
 2751  * VMIO code by making the device node be VMIO mode also.
 2752  *
 2753  * vp must be locked when vfs_object_create is called.
 2754  */
 2755 int
 2756 vfs_object_create(vp, p, cred)
 2757         struct vnode *vp;
 2758         struct proc *p;
 2759         struct ucred *cred;
 2760 {
 2761         return (VOP_CREATEVOBJECT(vp, cred, p));
 2762 }
 2763 
 2764 void
 2765 vfree(vp)
 2766         struct vnode *vp;
 2767 {
 2768         int s;
 2769 
 2770         s = splbio();
 2771         simple_lock(&vnode_free_list_slock);
 2772         KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free"));
 2773         if (vp->v_flag & VAGE) {
 2774                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 2775         } else {
 2776                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 2777         }
 2778         freevnodes++;
 2779         simple_unlock(&vnode_free_list_slock);
 2780         vp->v_flag &= ~VAGE;
 2781         vp->v_flag |= VFREE;
 2782         splx(s);
 2783 }
 2784 
 2785 void
 2786 vbusy(vp)
 2787         struct vnode *vp;
 2788 {
 2789         int s;
 2790 
 2791         s = splbio();
 2792         simple_lock(&vnode_free_list_slock);
 2793         KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free"));
 2794         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 2795         freevnodes--;
 2796         simple_unlock(&vnode_free_list_slock);
 2797         vp->v_flag &= ~(VFREE|VAGE);
 2798         splx(s);
 2799 }
 2800 
 2801 /*
 2802  * Record a process's interest in events which might happen to
 2803  * a vnode.  Because poll uses the historic select-style interface
 2804  * internally, this routine serves as both the ``check for any
 2805  * pending events'' and the ``record my interest in future events''
 2806  * functions.  (These are done together, while the lock is held,
 2807  * to avoid race conditions.)
 2808  */
 2809 int
 2810 vn_pollrecord(vp, p, events)
 2811         struct vnode *vp;
 2812         struct proc *p;
 2813         short events;
 2814 {
 2815         simple_lock(&vp->v_pollinfo.vpi_lock);
 2816         if (vp->v_pollinfo.vpi_revents & events) {
 2817                 /*
 2818                  * This leaves events we are not interested
 2819                  * in available for the other process which
 2820                  * which presumably had requested them
 2821                  * (otherwise they would never have been
 2822                  * recorded).
 2823                  */
 2824                 events &= vp->v_pollinfo.vpi_revents;
 2825                 vp->v_pollinfo.vpi_revents &= ~events;
 2826 
 2827                 simple_unlock(&vp->v_pollinfo.vpi_lock);
 2828                 return events;
 2829         }
 2830         vp->v_pollinfo.vpi_events |= events;
 2831         selrecord(p, &vp->v_pollinfo.vpi_selinfo);
 2832         simple_unlock(&vp->v_pollinfo.vpi_lock);
 2833         return 0;
 2834 }
 2835 
 2836 /*
 2837  * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
 2838  * it is possible for us to miss an event due to race conditions, but
 2839  * that condition is expected to be rare, so for the moment it is the
 2840  * preferred interface.
 2841  */
 2842 void
 2843 vn_pollevent(vp, events)
 2844         struct vnode *vp;
 2845         short events;
 2846 {
 2847         simple_lock(&vp->v_pollinfo.vpi_lock);
 2848         if (vp->v_pollinfo.vpi_events & events) {
 2849                 /*
 2850                  * We clear vpi_events so that we don't
 2851                  * call selwakeup() twice if two events are
 2852                  * posted before the polling process(es) is
 2853                  * awakened.  This also ensures that we take at
 2854                  * most one selwakeup() if the polling process
 2855                  * is no longer interested.  However, it does
 2856                  * mean that only one event can be noticed at
 2857                  * a time.  (Perhaps we should only clear those
 2858                  * event bits which we note?) XXX
 2859                  */
 2860                 vp->v_pollinfo.vpi_events = 0;  /* &= ~events ??? */
 2861                 vp->v_pollinfo.vpi_revents |= events;
 2862                 selwakeup(&vp->v_pollinfo.vpi_selinfo);
 2863         }
 2864         simple_unlock(&vp->v_pollinfo.vpi_lock);
 2865 }
 2866 
 2867 /*
 2868  * Wake up anyone polling on vp because it is being revoked.
 2869  * This depends on dead_poll() returning POLLHUP for correct
 2870  * behavior.
 2871  */
 2872 void
 2873 vn_pollgone(vp)
 2874         struct vnode *vp;
 2875 {
 2876         simple_lock(&vp->v_pollinfo.vpi_lock);
 2877         if (vp->v_pollinfo.vpi_events) {
 2878                 vp->v_pollinfo.vpi_events = 0;
 2879                 selwakeup(&vp->v_pollinfo.vpi_selinfo);
 2880         }
 2881         simple_unlock(&vp->v_pollinfo.vpi_lock);
 2882 }
 2883 
 2884 
 2885 
 2886 /*
 2887  * Routine to create and manage a filesystem syncer vnode.
 2888  */
 2889 #define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
 2890 static int      sync_fsync __P((struct  vop_fsync_args *));
 2891 static int      sync_inactive __P((struct  vop_inactive_args *));
 2892 static int      sync_reclaim  __P((struct  vop_reclaim_args *));
 2893 #define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
 2894 #define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
 2895 static int      sync_print __P((struct vop_print_args *));
 2896 #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
 2897 
 2898 static vop_t **sync_vnodeop_p;
 2899 static struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
 2900         { &vop_default_desc,    (vop_t *) vop_eopnotsupp },
 2901         { &vop_close_desc,      (vop_t *) sync_close },         /* close */
 2902         { &vop_fsync_desc,      (vop_t *) sync_fsync },         /* fsync */
 2903         { &vop_inactive_desc,   (vop_t *) sync_inactive },      /* inactive */
 2904         { &vop_reclaim_desc,    (vop_t *) sync_reclaim },       /* reclaim */
 2905         { &vop_lock_desc,       (vop_t *) sync_lock },          /* lock */
 2906         { &vop_unlock_desc,     (vop_t *) sync_unlock },        /* unlock */
 2907         { &vop_print_desc,      (vop_t *) sync_print },         /* print */
 2908         { &vop_islocked_desc,   (vop_t *) sync_islocked },      /* islocked */
 2909         { NULL, NULL }
 2910 };
 2911 static struct vnodeopv_desc sync_vnodeop_opv_desc =
 2912         { &sync_vnodeop_p, sync_vnodeop_entries };
 2913 
 2914 VNODEOP_SET(sync_vnodeop_opv_desc);
 2915 
 2916 /*
 2917  * Create a new filesystem syncer vnode for the specified mount point.
 2918  */
 2919 int
 2920 vfs_allocate_syncvnode(mp)
 2921         struct mount *mp;
 2922 {
 2923         struct vnode *vp;
 2924         static long start, incr, next;
 2925         int error;
 2926 
 2927         /* Allocate a new vnode */
 2928         if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
 2929                 mp->mnt_syncer = NULL;
 2930                 return (error);
 2931         }
 2932         vp->v_type = VNON;
 2933         /*
 2934          * Place the vnode onto the syncer worklist. We attempt to
 2935          * scatter them about on the list so that they will go off
 2936          * at evenly distributed times even if all the filesystems
 2937          * are mounted at once.
 2938          */
 2939         next += incr;
 2940         if (next == 0 || next > syncer_maxdelay) {
 2941                 start /= 2;
 2942                 incr /= 2;
 2943                 if (start == 0) {
 2944                         start = syncer_maxdelay / 2;
 2945                         incr = syncer_maxdelay;
 2946                 }
 2947                 next = start;
 2948         }
 2949         vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
 2950         mp->mnt_syncer = vp;
 2951         return (0);
 2952 }
 2953 
 2954 /*
 2955  * Do a lazy sync of the filesystem.
 2956  */
 2957 static int
 2958 sync_fsync(ap)
 2959         struct vop_fsync_args /* {
 2960                 struct vnode *a_vp;
 2961                 struct ucred *a_cred;
 2962                 int a_waitfor;
 2963                 struct proc *a_p;
 2964         } */ *ap;
 2965 {
 2966         struct vnode *syncvp = ap->a_vp;
 2967         struct mount *mp = syncvp->v_mount;
 2968         struct proc *p = ap->a_p;
 2969         int asyncflag;
 2970 
 2971         /*
 2972          * We only need to do something if this is a lazy evaluation.
 2973          */
 2974         if (ap->a_waitfor != MNT_LAZY)
 2975                 return (0);
 2976 
 2977         /*
 2978          * Move ourselves to the back of the sync list.
 2979          */
 2980         vn_syncer_add_to_worklist(syncvp, syncdelay);
 2981 
 2982         /*
 2983          * Walk the list of vnodes pushing all that are dirty and
 2984          * not already on the sync list.
 2985          */
 2986         simple_lock(&mountlist_slock);
 2987         if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) {
 2988                 simple_unlock(&mountlist_slock);
 2989                 return (0);
 2990         }
 2991         asyncflag = mp->mnt_flag & MNT_ASYNC;
 2992         mp->mnt_flag &= ~MNT_ASYNC;
 2993         vfs_msync(mp, MNT_NOWAIT);
 2994         VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
 2995         if (asyncflag)
 2996                 mp->mnt_flag |= MNT_ASYNC;
 2997         vfs_unbusy(mp, p);
 2998         return (0);
 2999 }
 3000 
 3001 /*
 3002  * The syncer vnode is no referenced.
 3003  */
 3004 static int
 3005 sync_inactive(ap)
 3006         struct vop_inactive_args /* {
 3007                 struct vnode *a_vp;
 3008                 struct proc *a_p;
 3009         } */ *ap;
 3010 {
 3011 
 3012         vgone(ap->a_vp);
 3013         return (0);
 3014 }
 3015 
 3016 /*
 3017  * The syncer vnode is no longer needed and is being decommissioned.
 3018  *
 3019  * Modifications to the worklist must be protected at splbio().
 3020  */
 3021 static int
 3022 sync_reclaim(ap)
 3023         struct vop_reclaim_args /* {
 3024                 struct vnode *a_vp;
 3025         } */ *ap;
 3026 {
 3027         struct vnode *vp = ap->a_vp;
 3028         int s;
 3029 
 3030         s = splbio();
 3031         vp->v_mount->mnt_syncer = NULL;
 3032         if (vp->v_flag & VONWORKLST) {
 3033                 LIST_REMOVE(vp, v_synclist);
 3034                 vp->v_flag &= ~VONWORKLST;
 3035         }
 3036         splx(s);
 3037 
 3038         return (0);
 3039 }
 3040 
 3041 /*
 3042  * Print out a syncer vnode.
 3043  */
 3044 static int
 3045 sync_print(ap)
 3046         struct vop_print_args /* {
 3047                 struct vnode *a_vp;
 3048         } */ *ap;
 3049 {
 3050         struct vnode *vp = ap->a_vp;
 3051 
 3052         printf("syncer vnode");
 3053         if (vp->v_vnlock != NULL)
 3054                 lockmgr_printinfo(vp->v_vnlock);
 3055         printf("\n");
 3056         return (0);
 3057 }
 3058 
 3059 /*
 3060  * extract the dev_t from a VBLK or VCHR
 3061  */
 3062 dev_t
 3063 vn_todev(vp)
 3064         struct vnode *vp;
 3065 {
 3066         if (vp->v_type != VBLK && vp->v_type != VCHR)
 3067                 return (NODEV);
 3068         return (vp->v_rdev);
 3069 }
 3070 
 3071 /*
 3072  * Check if vnode represents a disk device
 3073  */
 3074 int
 3075 vn_isdisk(vp, errp)
 3076         struct vnode *vp;
 3077         int *errp;
 3078 {
 3079         if (vp->v_type != VBLK && vp->v_type != VCHR) {
 3080                 if (errp != NULL)
 3081                         *errp = ENOTBLK;
 3082                 return (0);
 3083         }
 3084         if (vp->v_rdev == NULL) {
 3085                 if (errp != NULL)
 3086                         *errp = ENXIO;
 3087                 return (0);
 3088         }
 3089         if (!devsw(vp->v_rdev)) {
 3090                 if (errp != NULL)
 3091                         *errp = ENXIO;
 3092                 return (0);
 3093         }
 3094         if (!(devsw(vp->v_rdev)->d_flags & D_DISK)) {
 3095                 if (errp != NULL)
 3096                         *errp = ENOTBLK;
 3097                 return (0);
 3098         }
 3099         if (errp != NULL)
 3100                 *errp = 0;
 3101         return (1);
 3102 }
 3103 
 3104 void
 3105 NDFREE(ndp, flags)
 3106      struct nameidata *ndp;
 3107      const uint flags;
 3108 {
 3109         if (!(flags & NDF_NO_FREE_PNBUF) &&
 3110             (ndp->ni_cnd.cn_flags & HASBUF)) {
 3111                 zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
 3112                 ndp->ni_cnd.cn_flags &= ~HASBUF;
 3113         }
 3114         if (!(flags & NDF_NO_DVP_UNLOCK) &&
 3115             (ndp->ni_cnd.cn_flags & LOCKPARENT) &&
 3116             ndp->ni_dvp != ndp->ni_vp)
 3117                 VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_proc);
 3118         if (!(flags & NDF_NO_DVP_RELE) &&
 3119             (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) {
 3120                 vrele(ndp->ni_dvp);
 3121                 ndp->ni_dvp = NULL;
 3122         }
 3123         if (!(flags & NDF_NO_VP_UNLOCK) &&
 3124             (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp)
 3125                 VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_proc);
 3126         if (!(flags & NDF_NO_VP_RELE) &&
 3127             ndp->ni_vp) {
 3128                 vrele(ndp->ni_vp);
 3129                 ndp->ni_vp = NULL;
 3130         }
 3131         if (!(flags & NDF_NO_STARTDIR_RELE) &&
 3132             (ndp->ni_cnd.cn_flags & SAVESTART)) {
 3133                 vrele(ndp->ni_startdir);
 3134                 ndp->ni_startdir = NULL;
 3135         }
 3136 }

Cache object: ef9916959bb01112349f4d0285e7a14e


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.