The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_subr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: vfs_subr.c,v 1.243.2.7 2007/08/26 18:44:17 bouyer Exp $        */
    2 
    3 /*-
    4  * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
    9  * NASA Ames Research Center.
   10  * This code is derived from software contributed to The NetBSD Foundation
   11  * by Charles M. Hannum.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed by the NetBSD
   24  *      Foundation, Inc. and its contributors.
   25  * 4. Neither the name of The NetBSD Foundation nor the names of its
   26  *    contributors may be used to endorse or promote products derived
   27  *    from this software without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   30  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   31  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   32  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   33  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   34  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   35  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   36  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   37  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   38  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   39  * POSSIBILITY OF SUCH DAMAGE.
   40  */
   41 
   42 /*
   43  * Copyright (c) 1989, 1993
   44  *      The Regents of the University of California.  All rights reserved.
   45  * (c) UNIX System Laboratories, Inc.
   46  * All or some portions of this file are derived from material licensed
   47  * to the University of California by American Telephone and Telegraph
   48  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   49  * the permission of UNIX System Laboratories, Inc.
   50  *
   51  * Redistribution and use in source and binary forms, with or without
   52  * modification, are permitted provided that the following conditions
   53  * are met:
   54  * 1. Redistributions of source code must retain the above copyright
   55  *    notice, this list of conditions and the following disclaimer.
   56  * 2. Redistributions in binary form must reproduce the above copyright
   57  *    notice, this list of conditions and the following disclaimer in the
   58  *    documentation and/or other materials provided with the distribution.
   59  * 3. Neither the name of the University nor the names of its contributors
   60  *    may be used to endorse or promote products derived from this software
   61  *    without specific prior written permission.
   62  *
   63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   73  * SUCH DAMAGE.
   74  *
   75  *      @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
   76  */
   77 
   78 /*
   79  * External virtual filesystem routines
   80  */
   81 
   82 #include <sys/cdefs.h>
   83 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.243.2.7 2007/08/26 18:44:17 bouyer Exp $");
   84 
   85 #include "opt_inet.h"
   86 #include "opt_ddb.h"
   87 #include "opt_compat_netbsd.h"
   88 #include "opt_compat_43.h"
   89 
   90 #include <sys/param.h>
   91 #include <sys/systm.h>
   92 #include <sys/proc.h>
   93 #include <sys/kernel.h>
   94 #include <sys/mount.h>
   95 #include <sys/time.h>
   96 #include <sys/event.h>
   97 #include <sys/fcntl.h>
   98 #include <sys/vnode.h>
   99 #include <sys/stat.h>
  100 #include <sys/namei.h>
  101 #include <sys/ucred.h>
  102 #include <sys/buf.h>
  103 #include <sys/errno.h>
  104 #include <sys/malloc.h>
  105 #include <sys/domain.h>
  106 #include <sys/mbuf.h>
  107 #include <sys/sa.h>
  108 #include <sys/syscallargs.h>
  109 #include <sys/device.h>
  110 #include <sys/extattr.h>
  111 #include <sys/dirent.h>
  112 #include <sys/filedesc.h>
  113 
  114 #include <miscfs/specfs/specdev.h>
  115 #include <miscfs/genfs/genfs.h>
  116 #include <miscfs/syncfs/syncfs.h>
  117 
  118 #include <netinet/in.h>
  119 
  120 #include <uvm/uvm.h>
  121 #include <uvm/uvm_ddb.h>
  122 
  123 #include <netinet/in.h>
  124 
  125 #include <sys/sysctl.h>
  126 
  127 const enum vtype iftovt_tab[16] = {
  128         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
  129         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
  130 };
  131 const int       vttoif_tab[9] = {
  132         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
  133         S_IFSOCK, S_IFIFO, S_IFMT,
  134 };
  135 
  136 int doforce = 1;                /* 1 => permit forcible unmounting */
  137 int prtactive = 0;              /* 1 => print out reclaim of active vnodes */
  138 
  139 extern int dovfsusermount;      /* 1 => permit any user to mount filesystems */
  140 
  141 /*
  142  * Insq/Remq for the vnode usage lists.
  143  */
  144 #define bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
  145 #define bufremvn(bp) {                                                  \
  146         LIST_REMOVE(bp, b_vnbufs);                                      \
  147         (bp)->b_vnbufs.le_next = NOLIST;                                \
  148 }
  149 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */
  150 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
  151 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
  152 
  153 struct mntlist mountlist =                      /* mounted filesystem list */
  154     CIRCLEQ_HEAD_INITIALIZER(mountlist);
  155 struct vfs_list_head vfs_list =                 /* vfs list */
  156     LIST_HEAD_INITIALIZER(vfs_list);
  157 
  158 struct nfs_public nfs_pub;                      /* publicly exported FS */
  159 
  160 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
  161 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
  162 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
  163 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
  164 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
  165 
  166 /* XXX - gross; single global lock to protect v_numoutput */
  167 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER;
  168 
  169 /*
  170  * These define the root filesystem and device.
  171  */
  172 struct mount *rootfs;
  173 struct vnode *rootvnode;
  174 struct device *root_device;                     /* root device */
  175 
  176 POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
  177     &pool_allocator_nointr);
  178 
  179 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
  180 
  181 /*
  182  * Local declarations.
  183  */
  184 void insmntque(struct vnode *, struct mount *);
  185 int getdevvp(dev_t, struct vnode **, enum vtype);
  186 
  187 void vclean(struct vnode *, int, struct proc *);
  188 
  189 static int vfs_hang_addrlist(struct mount *, struct netexport *,
  190                              struct export_args *);
  191 static int vfs_free_netcred(struct radix_node *, void *);
  192 static void vfs_free_addrlist(struct netexport *);
  193 static struct vnode *getcleanvnode(struct proc *);
  194 
  195 #ifdef DEBUG
  196 void printlockedvnodes(void);
  197 #endif
  198 
  199 /*
  200  * Initialize the vnode management data structures.
  201  */
  202 void
  203 vntblinit()
  204 {
  205 
  206         /*
  207          * Initialize the filesystem syncer.
  208          */
  209         vn_initialize_syncerd();
  210 }
  211 
  212 int
  213 vfs_drainvnodes(long target, struct proc *p)
  214 {
  215 
  216         simple_lock(&vnode_free_list_slock);
  217         while (numvnodes > target) {
  218                 struct vnode *vp;
  219 
  220                 vp = getcleanvnode(p);
  221                 if (vp == NULL)
  222                         return EBUSY; /* give up */
  223                 pool_put(&vnode_pool, vp);
  224                 simple_lock(&vnode_free_list_slock);
  225                 numvnodes--;
  226         }
  227         simple_unlock(&vnode_free_list_slock);
  228 
  229         return 0;
  230 }
  231 
  232 /*
  233  * grab a vnode from freelist and clean it.
  234  */
  235 struct vnode *
  236 getcleanvnode(p)
  237         struct proc *p;
  238 {
  239         struct vnode *vp;
  240         struct mount *mp;
  241         struct freelst *listhd;
  242 
  243         LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
  244 
  245         listhd = &vnode_free_list;
  246 try_nextlist:
  247         TAILQ_FOREACH(vp, listhd, v_freelist) {
  248                 if (!simple_lock_try(&vp->v_interlock))
  249                         continue;
  250                 /*
  251                  * as our lwp might hold the underlying vnode locked,
  252                  * don't try to reclaim the VLAYER vnode if it's locked.
  253                  */
  254                 if ((vp->v_flag & VXLOCK) == 0 &&
  255                     ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
  256                         if (vn_start_write(vp, &mp, V_NOWAIT) == 0)
  257                                 break;
  258                 }
  259                 mp = NULL;
  260                 simple_unlock(&vp->v_interlock);
  261         }
  262 
  263         if (vp == NULLVP) {
  264                 if (listhd == &vnode_free_list) {
  265                         listhd = &vnode_hold_list;
  266                         goto try_nextlist;
  267                 }
  268                 simple_unlock(&vnode_free_list_slock);
  269                 return NULLVP;
  270         }
  271 
  272         if (vp->v_usecount)
  273                 panic("free vnode isn't, vp %p", vp);
  274         TAILQ_REMOVE(listhd, vp, v_freelist);
  275         /* see comment on why 0xdeadb is set at end of vgone (below) */
  276         vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
  277         simple_unlock(&vnode_free_list_slock);
  278         vp->v_lease = NULL;
  279 
  280         if (vp->v_type != VBAD)
  281                 vgonel(vp, p);
  282         else
  283                 simple_unlock(&vp->v_interlock);
  284         vn_finished_write(mp, 0);
  285 #ifdef DIAGNOSTIC
  286         if (vp->v_data || vp->v_uobj.uo_npages ||
  287             TAILQ_FIRST(&vp->v_uobj.memq))
  288                 panic("cleaned vnode isn't, vp %p", vp);
  289         if (vp->v_numoutput)
  290                 panic("clean vnode has pending I/O's, vp %p", vp);
  291 #endif
  292         KASSERT((vp->v_flag & VONWORKLST) == 0);
  293 
  294         return vp;
  295 }
  296 
  297 /*
  298  * Mark a mount point as busy. Used to synchronize access and to delay
  299  * unmounting. Interlock is not released on failure.
  300  */
  301 int
  302 vfs_busy(mp, flags, interlkp)
  303         struct mount *mp;
  304         int flags;
  305         struct simplelock *interlkp;
  306 {
  307         int lkflags;
  308 
  309         while (mp->mnt_iflag & IMNT_UNMOUNT) {
  310                 int gone, n;
  311 
  312                 if (flags & LK_NOWAIT)
  313                         return (ENOENT);
  314                 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
  315                     && mp->mnt_unmounter == curproc)
  316                         return (EDEADLK);
  317                 if (interlkp)
  318                         simple_unlock(interlkp);
  319                 /*
  320                  * Since all busy locks are shared except the exclusive
  321                  * lock granted when unmounting, the only place that a
  322                  * wakeup needs to be done is at the release of the
  323                  * exclusive lock at the end of dounmount.
  324                  */
  325                 simple_lock(&mp->mnt_slock);
  326                 mp->mnt_wcnt++;
  327                 ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock);
  328                 n = --mp->mnt_wcnt;
  329                 simple_unlock(&mp->mnt_slock);
  330                 gone = mp->mnt_iflag & IMNT_GONE;
  331 
  332                 if (n == 0)
  333                         wakeup(&mp->mnt_wcnt);
  334                 if (interlkp)
  335                         simple_lock(interlkp);
  336                 if (gone)
  337                         return (ENOENT);
  338         }
  339         lkflags = LK_SHARED;
  340         if (interlkp)
  341                 lkflags |= LK_INTERLOCK;
  342         if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
  343                 panic("vfs_busy: unexpected lock failure");
  344         return (0);
  345 }
  346 
  347 /*
  348  * Free a busy filesystem.
  349  */
  350 void
  351 vfs_unbusy(mp)
  352         struct mount *mp;
  353 {
  354 
  355         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
  356 }
  357 
  358 /*
  359  * Lookup a filesystem type, and if found allocate and initialize
  360  * a mount structure for it.
  361  *
  362  * Devname is usually updated by mount(8) after booting.
  363  */
  364 int
  365 vfs_rootmountalloc(fstypename, devname, mpp)
  366         char *fstypename;
  367         char *devname;
  368         struct mount **mpp;
  369 {
  370         struct vfsops *vfsp = NULL;
  371         struct mount *mp;
  372 
  373         LIST_FOREACH(vfsp, &vfs_list, vfs_list)
  374                 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
  375                         break;
  376 
  377         if (vfsp == NULL)
  378                 return (ENODEV);
  379         mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
  380         memset((char *)mp, 0, (u_long)sizeof(struct mount));
  381         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
  382         simple_lock_init(&mp->mnt_slock);
  383         (void)vfs_busy(mp, LK_NOWAIT, 0);
  384         LIST_INIT(&mp->mnt_vnodelist);
  385         mp->mnt_op = vfsp;
  386         mp->mnt_flag = MNT_RDONLY;
  387         mp->mnt_vnodecovered = NULLVP;
  388         mp->mnt_leaf = mp;
  389         vfsp->vfs_refcount++;
  390         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
  391         mp->mnt_stat.f_mntonname[0] = '/';
  392         (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
  393         *mpp = mp;
  394         return (0);
  395 }
  396 
  397 /*
  398  * Lookup a mount point by filesystem identifier.
  399  */
  400 struct mount *
  401 vfs_getvfs(fsid)
  402         fsid_t *fsid;
  403 {
  404         struct mount *mp;
  405 
  406         simple_lock(&mountlist_slock);
  407         CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
  408                 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
  409                     mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
  410                         simple_unlock(&mountlist_slock);
  411                         return (mp);
  412                 }
  413         }
  414         simple_unlock(&mountlist_slock);
  415         return ((struct mount *)0);
  416 }
  417 
  418 /*
  419  * Get a new unique fsid
  420  */
  421 void
  422 vfs_getnewfsid(mp)
  423         struct mount *mp;
  424 {
  425         static u_short xxxfs_mntid;
  426         fsid_t tfsid;
  427         int mtype;
  428 
  429         simple_lock(&mntid_slock);
  430         mtype = makefstype(mp->mnt_op->vfs_name);
  431         mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
  432         mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
  433         mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
  434         if (xxxfs_mntid == 0)
  435                 ++xxxfs_mntid;
  436         tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
  437         tfsid.__fsid_val[1] = mtype;
  438         if (!CIRCLEQ_EMPTY(&mountlist)) {
  439                 while (vfs_getvfs(&tfsid)) {
  440                         tfsid.__fsid_val[0]++;
  441                         xxxfs_mntid++;
  442                 }
  443         }
  444         mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
  445         mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
  446         simple_unlock(&mntid_slock);
  447 }
  448 
  449 /*
  450  * Make a 'unique' number from a mount type name.
  451  */
  452 long
  453 makefstype(type)
  454         const char *type;
  455 {
  456         long rv;
  457 
  458         for (rv = 0; *type; type++) {
  459                 rv <<= 2;
  460                 rv ^= *type;
  461         }
  462         return rv;
  463 }
  464 
  465 
  466 /*
  467  * Set vnode attributes to VNOVAL
  468  */
  469 void
  470 vattr_null(vap)
  471         struct vattr *vap;
  472 {
  473 
  474         vap->va_type = VNON;
  475 
  476         /*
  477          * Assign individually so that it is safe even if size and
  478          * sign of each member are varied.
  479          */
  480         vap->va_mode = VNOVAL;
  481         vap->va_nlink = VNOVAL;
  482         vap->va_uid = VNOVAL;
  483         vap->va_gid = VNOVAL;
  484         vap->va_fsid = VNOVAL;
  485         vap->va_fileid = VNOVAL;
  486         vap->va_size = VNOVAL;
  487         vap->va_blocksize = VNOVAL;
  488         vap->va_atime.tv_sec =
  489             vap->va_mtime.tv_sec =
  490             vap->va_ctime.tv_sec =
  491             vap->va_birthtime.tv_sec = VNOVAL;
  492         vap->va_atime.tv_nsec =
  493             vap->va_mtime.tv_nsec =
  494             vap->va_ctime.tv_nsec =
  495             vap->va_birthtime.tv_nsec = VNOVAL;
  496         vap->va_gen = VNOVAL;
  497         vap->va_flags = VNOVAL;
  498         vap->va_rdev = VNOVAL;
  499         vap->va_bytes = VNOVAL;
  500         vap->va_vaflags = 0;
  501 }
  502 
  503 /*
  504  * Routines having to do with the management of the vnode table.
  505  */
  506 extern int (**dead_vnodeop_p)(void *);
  507 long numvnodes;
  508 
  509 /*
  510  * Return the next vnode from the free list.
  511  */
  512 int
  513 getnewvnode(tag, mp, vops, vpp)
  514         enum vtagtype tag;
  515         struct mount *mp;
  516         int (**vops)(void *);
  517         struct vnode **vpp;
  518 {
  519         extern struct uvm_pagerops uvm_vnodeops;
  520         struct uvm_object *uobj;
  521         struct proc *p = curproc;       /* XXX */
  522         static int toggle;
  523         struct vnode *vp;
  524         int error = 0, tryalloc;
  525 
  526  try_again:
  527         if (mp) {
  528                 /*
  529                  * Mark filesystem busy while we're creating a vnode.
  530                  * If unmount is in progress, this will wait; if the
  531                  * unmount succeeds (only if umount -f), this will
  532                  * return an error.  If the unmount fails, we'll keep
  533                  * going afterwards.
  534                  * (This puts the per-mount vnode list logically under
  535                  * the protection of the vfs_busy lock).
  536                  */
  537                 error = vfs_busy(mp, LK_RECURSEFAIL, 0);
  538                 if (error && error != EDEADLK)
  539                         return error;
  540         }
  541 
  542         /*
  543          * We must choose whether to allocate a new vnode or recycle an
  544          * existing one. The criterion for allocating a new one is that
  545          * the total number of vnodes is less than the number desired or
  546          * there are no vnodes on either free list. Generally we only
  547          * want to recycle vnodes that have no buffers associated with
  548          * them, so we look first on the vnode_free_list. If it is empty,
  549          * we next consider vnodes with referencing buffers on the
  550          * vnode_hold_list. The toggle ensures that half the time we
  551          * will use a buffer from the vnode_hold_list, and half the time
  552          * we will allocate a new one unless the list has grown to twice
  553          * the desired size. We are reticent to recycle vnodes from the
  554          * vnode_hold_list because we will lose the identity of all its
  555          * referencing buffers.
  556          */
  557 
  558         vp = NULL;
  559 
  560         simple_lock(&vnode_free_list_slock);
  561 
  562         toggle ^= 1;
  563         if (numvnodes > 2 * desiredvnodes)
  564                 toggle = 0;
  565 
  566         tryalloc = numvnodes < desiredvnodes ||
  567             (TAILQ_FIRST(&vnode_free_list) == NULL &&
  568              (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
  569 
  570         if (tryalloc &&
  571             (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
  572                 numvnodes++;
  573                 simple_unlock(&vnode_free_list_slock);
  574                 memset(vp, 0, sizeof(*vp));
  575                 simple_lock_init(&vp->v_interlock);
  576                 uobj = &vp->v_uobj;
  577                 uobj->pgops = &uvm_vnodeops;
  578                 TAILQ_INIT(&uobj->memq);
  579                 /*
  580                  * done by memset() above.
  581                  *      uobj->uo_npages = 0;
  582                  *      LIST_INIT(&vp->v_nclist);
  583                  *      LIST_INIT(&vp->v_dnclist);
  584                  */
  585         } else {
  586                 vp = getcleanvnode(p);
  587                 /*
  588                  * Unless this is a bad time of the month, at most
  589                  * the first NCPUS items on the free list are
  590                  * locked, so this is close enough to being empty.
  591                  */
  592                 if (vp == NULLVP) {
  593                         if (mp && error != EDEADLK)
  594                                 vfs_unbusy(mp);
  595                         if (tryalloc) {
  596                                 printf("WARNING: unable to allocate new "
  597                                     "vnode, retrying...\n");
  598                                 (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
  599                                 goto try_again;
  600                         }
  601                         tablefull("vnode", "increase kern.maxvnodes or NVNODE");
  602                         *vpp = 0;
  603                         return (ENFILE);
  604                 }
  605                 vp->v_flag = 0;
  606                 vp->v_socket = NULL;
  607         }
  608         vp->v_type = VNON;
  609         vp->v_vnlock = &vp->v_lock;
  610         lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
  611         KASSERT(LIST_EMPTY(&vp->v_nclist));
  612         KASSERT(LIST_EMPTY(&vp->v_dnclist));
  613         vp->v_tag = tag;
  614         vp->v_op = vops;
  615         insmntque(vp, mp);
  616         *vpp = vp;
  617         vp->v_usecount = 1;
  618         vp->v_data = 0;
  619         simple_lock_init(&vp->v_interlock);
  620 
  621         /*
  622          * initialize uvm_object within vnode.
  623          */
  624 
  625         uobj = &vp->v_uobj;
  626         KASSERT(uobj->pgops == &uvm_vnodeops);
  627         KASSERT(uobj->uo_npages == 0);
  628         KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
  629         vp->v_size = VSIZENOTSET;
  630 
  631         if (mp && error != EDEADLK)
  632                 vfs_unbusy(mp);
  633         return (0);
  634 }
  635 
  636 /*
  637  * This is really just the reverse of getnewvnode(). Needed for
  638  * VFS_VGET functions who may need to push back a vnode in case
  639  * of a locking race.
  640  */
  641 void
  642 ungetnewvnode(vp)
  643         struct vnode *vp;
  644 {
  645 #ifdef DIAGNOSTIC
  646         if (vp->v_usecount != 1)
  647                 panic("ungetnewvnode: busy vnode");
  648 #endif
  649         vp->v_usecount--;
  650         insmntque(vp, NULL);
  651         vp->v_type = VBAD;
  652 
  653         simple_lock(&vp->v_interlock);
  654         /*
  655          * Insert at head of LRU list
  656          */
  657         simple_lock(&vnode_free_list_slock);
  658         if (vp->v_holdcnt > 0)
  659                 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
  660         else
  661                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
  662         simple_unlock(&vnode_free_list_slock);
  663         simple_unlock(&vp->v_interlock);
  664 }
  665 
  666 /*
  667  * Move a vnode from one mount queue to another.
  668  */
  669 void
  670 insmntque(vp, mp)
  671         struct vnode *vp;
  672         struct mount *mp;
  673 {
  674 
  675 #ifdef DIAGNOSTIC
  676         if ((mp != NULL) &&
  677             (mp->mnt_iflag & IMNT_UNMOUNT) &&
  678             !(mp->mnt_flag & MNT_SOFTDEP) &&
  679             vp->v_tag != VT_VFS) {
  680                 panic("insmntque into dying filesystem");
  681         }
  682 #endif
  683 
  684         simple_lock(&mntvnode_slock);
  685         /*
  686          * Delete from old mount point vnode list, if on one.
  687          */
  688         if (vp->v_mount != NULL)
  689                 LIST_REMOVE(vp, v_mntvnodes);
  690         /*
  691          * Insert into list of vnodes for the new mount point, if available.
  692          */
  693         if ((vp->v_mount = mp) != NULL)
  694                 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
  695         simple_unlock(&mntvnode_slock);
  696 }
  697 
  698 /*
  699  * Update outstanding I/O count and do wakeup if requested.
  700  */
  701 void
  702 vwakeup(bp)
  703         struct buf *bp;
  704 {
  705         struct vnode *vp;
  706 
  707         if ((vp = bp->b_vp) != NULL) {
  708                 /* XXX global lock hack
  709                  * can't use v_interlock here since this is called
  710                  * in interrupt context from biodone().
  711                  */
  712                 simple_lock(&global_v_numoutput_slock);
  713                 if (--vp->v_numoutput < 0)
  714                         panic("vwakeup: neg numoutput, vp %p", vp);
  715                 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
  716                         vp->v_flag &= ~VBWAIT;
  717                         wakeup((caddr_t)&vp->v_numoutput);
  718                 }
  719                 simple_unlock(&global_v_numoutput_slock);
  720         }
  721 }
  722 
  723 /*
  724  * Flush out and invalidate all buffers associated with a vnode.
  725  * Called with the underlying vnode locked, which should prevent new dirty
  726  * buffers from being queued.
  727  */
  728 int
  729 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
  730         struct vnode *vp;
  731         int flags;
  732         struct ucred *cred;
  733         struct proc *p;
  734         int slpflag, slptimeo;
  735 {
  736         struct buf *bp, *nbp;
  737         int s, error;
  738         int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
  739                 (flags & V_SAVE ? PGO_CLEANIT : 0);
  740 
  741         /* XXXUBC this doesn't look at flags or slp* */
  742         simple_lock(&vp->v_interlock);
  743         error = VOP_PUTPAGES(vp, 0, 0, flushflags);
  744         if (error) {
  745                 return error;
  746         }
  747 
  748         if (flags & V_SAVE) {
  749                 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
  750                 if (error)
  751                         return (error);
  752 #ifdef DIAGNOSTIC
  753                 s = splbio();
  754                 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
  755                         panic("vinvalbuf: dirty bufs, vp %p", vp);
  756                 splx(s);
  757 #endif
  758         }
  759 
  760         s = splbio();
  761 
  762 restart:
  763         for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
  764                 nbp = LIST_NEXT(bp, b_vnbufs);
  765                 simple_lock(&bp->b_interlock);
  766                 if (bp->b_flags & B_BUSY) {
  767                         bp->b_flags |= B_WANTED;
  768                         error = ltsleep((caddr_t)bp,
  769                                     slpflag | (PRIBIO + 1) | PNORELOCK,
  770                                     "vinvalbuf", slptimeo, &bp->b_interlock);
  771                         if (error) {
  772                                 splx(s);
  773                                 return (error);
  774                         }
  775                         goto restart;
  776                 }
  777                 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
  778                 simple_unlock(&bp->b_interlock);
  779                 brelse(bp);
  780         }
  781 
  782         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  783                 nbp = LIST_NEXT(bp, b_vnbufs);
  784                 simple_lock(&bp->b_interlock);
  785                 if (bp->b_flags & B_BUSY) {
  786                         bp->b_flags |= B_WANTED;
  787                         error = ltsleep((caddr_t)bp,
  788                                     slpflag | (PRIBIO + 1) | PNORELOCK,
  789                                     "vinvalbuf", slptimeo, &bp->b_interlock);
  790                         if (error) {
  791                                 splx(s);
  792                                 return (error);
  793                         }
  794                         goto restart;
  795                 }
  796                 /*
  797                  * XXX Since there are no node locks for NFS, I believe
  798                  * there is a slight chance that a delayed write will
  799                  * occur while sleeping just above, so check for it.
  800                  */
  801                 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
  802 #ifdef DEBUG
  803                         printf("buffer still DELWRI\n");
  804 #endif
  805                         bp->b_flags |= B_BUSY | B_VFLUSH;
  806                         simple_unlock(&bp->b_interlock);
  807                         VOP_BWRITE(bp);
  808                         goto restart;
  809                 }
  810                 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
  811                 simple_unlock(&bp->b_interlock);
  812                 brelse(bp);
  813         }
  814 
  815 #ifdef DIAGNOSTIC
  816         if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
  817                 panic("vinvalbuf: flush failed, vp %p", vp);
  818 #endif
  819 
  820         splx(s);
  821 
  822         return (0);
  823 }
  824 
  825 /*
  826  * Destroy any in core blocks past the truncation length.
  827  * Called with the underlying vnode locked, which should prevent new dirty
  828  * buffers from being queued.
  829  */
  830 int
  831 vtruncbuf(vp, lbn, slpflag, slptimeo)
  832         struct vnode *vp;
  833         daddr_t lbn;
  834         int slpflag, slptimeo;
  835 {
  836         struct buf *bp, *nbp;
  837         int s, error;
  838         voff_t off;
  839 
  840         off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
  841         simple_lock(&vp->v_interlock);
  842         error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
  843         if (error) {
  844                 return error;
  845         }
  846 
  847         s = splbio();
  848 
  849 restart:
  850         for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
  851                 nbp = LIST_NEXT(bp, b_vnbufs);
  852                 if (bp->b_lblkno < lbn)
  853                         continue;
  854                 simple_lock(&bp->b_interlock);
  855                 if (bp->b_flags & B_BUSY) {
  856                         bp->b_flags |= B_WANTED;
  857                         error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
  858                             "vtruncbuf", slptimeo, &bp->b_interlock);
  859                         if (error) {
  860                                 splx(s);
  861                                 return (error);
  862                         }
  863                         goto restart;
  864                 }
  865                 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
  866                 simple_unlock(&bp->b_interlock);
  867                 brelse(bp);
  868         }
  869 
  870         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  871                 nbp = LIST_NEXT(bp, b_vnbufs);
  872                 if (bp->b_lblkno < lbn)
  873                         continue;
  874                 simple_lock(&bp->b_interlock);
  875                 if (bp->b_flags & B_BUSY) {
  876                         bp->b_flags |= B_WANTED;
  877                         error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
  878                             "vtruncbuf", slptimeo, &bp->b_interlock);
  879                         if (error) {
  880                                 splx(s);
  881                                 return (error);
  882                         }
  883                         goto restart;
  884                 }
  885                 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
  886                 simple_unlock(&bp->b_interlock);
  887                 brelse(bp);
  888         }
  889 
  890         splx(s);
  891 
  892         return (0);
  893 }
  894 
  895 void
  896 vflushbuf(vp, sync)
  897         struct vnode *vp;
  898         int sync;
  899 {
  900         struct buf *bp, *nbp;
  901         int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
  902         int s;
  903 
  904         simple_lock(&vp->v_interlock);
  905         (void) VOP_PUTPAGES(vp, 0, 0, flags);
  906 
  907 loop:
  908         s = splbio();
  909         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  910                 nbp = LIST_NEXT(bp, b_vnbufs);
  911                 simple_lock(&bp->b_interlock);
  912                 if ((bp->b_flags & B_BUSY)) {
  913                         simple_unlock(&bp->b_interlock);
  914                         continue;
  915                 }
  916                 if ((bp->b_flags & B_DELWRI) == 0)
  917                         panic("vflushbuf: not dirty, bp %p", bp);
  918                 bp->b_flags |= B_BUSY | B_VFLUSH;
  919                 simple_unlock(&bp->b_interlock);
  920                 splx(s);
  921                 /*
  922                  * Wait for I/O associated with indirect blocks to complete,
  923                  * since there is no way to quickly wait for them below.
  924                  */
  925                 if (bp->b_vp == vp || sync == 0)
  926                         (void) bawrite(bp);
  927                 else
  928                         (void) bwrite(bp);
  929                 goto loop;
  930         }
  931         if (sync == 0) {
  932                 splx(s);
  933                 return;
  934         }
  935         simple_lock(&global_v_numoutput_slock);
  936         while (vp->v_numoutput) {
  937                 vp->v_flag |= VBWAIT;
  938                 ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0,
  939                         &global_v_numoutput_slock);
  940         }
  941         simple_unlock(&global_v_numoutput_slock);
  942         splx(s);
  943         if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
  944                 vprint("vflushbuf: dirty", vp);
  945                 goto loop;
  946         }
  947 }
  948 
  949 /*
  950  * Associate a buffer with a vnode.
  951  */
  952 void
  953 bgetvp(vp, bp)
  954         struct vnode *vp;
  955         struct buf *bp;
  956 {
  957         int s;
  958 
  959         if (bp->b_vp)
  960                 panic("bgetvp: not free, bp %p", bp);
  961         VHOLD(vp);
  962         s = splbio();
  963         bp->b_vp = vp;
  964         if (vp->v_type == VBLK || vp->v_type == VCHR)
  965                 bp->b_dev = vp->v_rdev;
  966         else
  967                 bp->b_dev = NODEV;
  968         /*
  969          * Insert onto list for new vnode.
  970          */
  971         bufinsvn(bp, &vp->v_cleanblkhd);
  972         splx(s);
  973 }
  974 
  975 /*
  976  * Disassociate a buffer from a vnode.
  977  */
  978 void
  979 brelvp(bp)
  980         struct buf *bp;
  981 {
  982         struct vnode *vp;
  983         int s;
  984 
  985         if (bp->b_vp == NULL)
  986                 panic("brelvp: vp NULL, bp %p", bp);
  987 
  988         s = splbio();
  989         vp = bp->b_vp;
  990         /*
  991          * Delete from old vnode list, if on one.
  992          */
  993         if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
  994                 bufremvn(bp);
  995 
  996         if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
  997             LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
  998                 vp->v_flag &= ~(VWRITEMAPDIRTY|VONWORKLST);
  999                 LIST_REMOVE(vp, v_synclist);
 1000         }
 1001 
 1002         bp->b_vp = NULL;
 1003         HOLDRELE(vp);
 1004         splx(s);
 1005 }
 1006 
 1007 /*
 1008  * Reassign a buffer from one vnode to another.
 1009  * Used to assign file specific control information
 1010  * (indirect blocks) to the vnode to which they belong.
 1011  *
 1012  * This function must be called at splbio().
 1013  */
 1014 void
 1015 reassignbuf(bp, newvp)
 1016         struct buf *bp;
 1017         struct vnode *newvp;
 1018 {
 1019         struct buflists *listheadp;
 1020         int delay;
 1021 
 1022         /*
 1023          * Delete from old vnode list, if on one.
 1024          */
 1025         if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
 1026                 bufremvn(bp);
 1027         /*
 1028          * If dirty, put on list of dirty buffers;
 1029          * otherwise insert onto list of clean buffers.
 1030          */
 1031         if ((bp->b_flags & B_DELWRI) == 0) {
 1032                 listheadp = &newvp->v_cleanblkhd;
 1033                 if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
 1034                     (newvp->v_flag & VONWORKLST) &&
 1035                     LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
 1036                         newvp->v_flag &= ~(VWRITEMAPDIRTY|VONWORKLST);
 1037                         LIST_REMOVE(newvp, v_synclist);
 1038                 }
 1039         } else {
 1040                 listheadp = &newvp->v_dirtyblkhd;
 1041                 if ((newvp->v_flag & VONWORKLST) == 0) {
 1042                         switch (newvp->v_type) {
 1043                         case VDIR:
 1044                                 delay = dirdelay;
 1045                                 break;
 1046                         case VBLK:
 1047                                 if (newvp->v_specmountpoint != NULL) {
 1048                                         delay = metadelay;
 1049                                         break;
 1050                                 }
 1051                                 /* fall through */
 1052                         default:
 1053                                 delay = filedelay;
 1054                                 break;
 1055                         }
 1056                         if (!newvp->v_mount ||
 1057                             (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
 1058                                 vn_syncer_add_to_worklist(newvp, delay);
 1059                 }
 1060         }
 1061         bufinsvn(bp, listheadp);
 1062 }
 1063 
 1064 /*
 1065  * Create a vnode for a block device.
 1066  * Used for root filesystem and swap areas.
 1067  * Also used for memory file system special devices.
 1068  */
 1069 int
 1070 bdevvp(dev, vpp)
 1071         dev_t dev;
 1072         struct vnode **vpp;
 1073 {
 1074 
 1075         return (getdevvp(dev, vpp, VBLK));
 1076 }
 1077 
 1078 /*
 1079  * Create a vnode for a character device.
 1080  * Used for kernfs and some console handling.
 1081  */
 1082 int
 1083 cdevvp(dev, vpp)
 1084         dev_t dev;
 1085         struct vnode **vpp;
 1086 {
 1087 
 1088         return (getdevvp(dev, vpp, VCHR));
 1089 }
 1090 
 1091 /*
 1092  * Create a vnode for a device.
 1093  * Used by bdevvp (block device) for root file system etc.,
 1094  * and by cdevvp (character device) for console and kernfs.
 1095  */
 1096 int
 1097 getdevvp(dev, vpp, type)
 1098         dev_t dev;
 1099         struct vnode **vpp;
 1100         enum vtype type;
 1101 {
 1102         struct vnode *vp;
 1103         struct vnode *nvp;
 1104         int error;
 1105 
 1106         if (dev == NODEV) {
 1107                 *vpp = NULLVP;
 1108                 return (0);
 1109         }
 1110         error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
 1111         if (error) {
 1112                 *vpp = NULLVP;
 1113                 return (error);
 1114         }
 1115         vp = nvp;
 1116         vp->v_type = type;
 1117         if ((nvp = checkalias(vp, dev, NULL)) != 0) {
 1118                 vput(vp);
 1119                 vp = nvp;
 1120         }
 1121         *vpp = vp;
 1122         return (0);
 1123 }
 1124 
 1125 /*
 1126  * Check to see if the new vnode represents a special device
 1127  * for which we already have a vnode (either because of
 1128  * bdevvp() or because of a different vnode representing
 1129  * the same block device). If such an alias exists, deallocate
 1130  * the existing contents and return the aliased vnode. The
 1131  * caller is responsible for filling it with its new contents.
 1132  */
 1133 struct vnode *
 1134 checkalias(nvp, nvp_rdev, mp)
 1135         struct vnode *nvp;
 1136         dev_t nvp_rdev;
 1137         struct mount *mp;
 1138 {
 1139         struct proc *p = curproc;       /* XXX */
 1140         struct vnode *vp;
 1141         struct vnode **vpp;
 1142 
 1143         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 1144                 return (NULLVP);
 1145 
 1146         vpp = &speclisth[SPECHASH(nvp_rdev)];
 1147 loop:
 1148         simple_lock(&spechash_slock);
 1149         for (vp = *vpp; vp; vp = vp->v_specnext) {
 1150                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 1151                         continue;
 1152                 /*
 1153                  * Alias, but not in use, so flush it out.
 1154                  */
 1155                 simple_lock(&vp->v_interlock);
 1156                 simple_unlock(&spechash_slock);
 1157                 if (vp->v_usecount == 0) {
 1158                         vgonel(vp, p);
 1159                         goto loop;
 1160                 }
 1161                 /*
 1162                  * What we're interested to know here is if someone else has
 1163                  * removed this vnode from the device hash list while we were
 1164                  * waiting.  This can only happen if vclean() did it, and
 1165                  * this requires the vnode to be locked.  Therefore, we use
 1166                  * LK_SLEEPFAIL and retry.
 1167                  */
 1168                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL))
 1169                         goto loop;
 1170                 simple_lock(&spechash_slock);
 1171                 break;
 1172         }
 1173         if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
 1174                 MALLOC(nvp->v_specinfo, struct specinfo *,
 1175                         sizeof(struct specinfo), M_VNODE, M_NOWAIT);
 1176                 /* XXX Erg. */
 1177                 if (nvp->v_specinfo == NULL) {
 1178                         simple_unlock(&spechash_slock);
 1179                         uvm_wait("checkalias");
 1180                         goto loop;
 1181                 }
 1182 
 1183                 nvp->v_rdev = nvp_rdev;
 1184                 nvp->v_hashchain = vpp;
 1185                 nvp->v_specnext = *vpp;
 1186                 nvp->v_specmountpoint = NULL;
 1187                 simple_unlock(&spechash_slock);
 1188                 nvp->v_speclockf = NULL;
 1189                 simple_lock_init(&nvp->v_spec_cow_slock);
 1190                 SLIST_INIT(&nvp->v_spec_cow_head);
 1191                 nvp->v_spec_cow_req = 0;
 1192                 nvp->v_spec_cow_count = 0;
 1193 
 1194                 *vpp = nvp;
 1195                 if (vp != NULLVP) {
 1196                         nvp->v_flag |= VALIASED;
 1197                         vp->v_flag |= VALIASED;
 1198                         vput(vp);
 1199                 }
 1200                 return (NULLVP);
 1201         }
 1202         simple_unlock(&spechash_slock);
 1203         VOP_UNLOCK(vp, 0);
 1204         simple_lock(&vp->v_interlock);
 1205         vclean(vp, 0, p);
 1206         vp->v_op = nvp->v_op;
 1207         vp->v_tag = nvp->v_tag;
 1208         vp->v_vnlock = &vp->v_lock;
 1209         lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
 1210         nvp->v_type = VNON;
 1211         insmntque(vp, mp);
 1212         return (vp);
 1213 }
 1214 
 1215 /*
 1216  * Grab a particular vnode from the free list, increment its
 1217  * reference count and lock it. If the vnode lock bit is set the
 1218  * vnode is being eliminated in vgone. In that case, we can not
 1219  * grab the vnode, so the process is awakened when the transition is
 1220  * completed, and an error returned to indicate that the vnode is no
 1221  * longer usable (possibly having been changed to a new file system type).
 1222  */
 1223 int
 1224 vget(vp, flags)
 1225         struct vnode *vp;
 1226         int flags;
 1227 {
 1228         int error;
 1229 
 1230         /*
 1231          * If the vnode is in the process of being cleaned out for
 1232          * another use, we wait for the cleaning to finish and then
 1233          * return failure. Cleaning is determined by checking that
 1234          * the VXLOCK flag is set.
 1235          */
 1236 
 1237         if ((flags & LK_INTERLOCK) == 0)
 1238                 simple_lock(&vp->v_interlock);
 1239         if (vp->v_flag & VXLOCK) {
 1240                 if (flags & LK_NOWAIT) {
 1241                         simple_unlock(&vp->v_interlock);
 1242                         return EBUSY;
 1243                 }
 1244                 vp->v_flag |= VXWANT;
 1245                 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
 1246                 return (ENOENT);
 1247         }
 1248         if (vp->v_usecount == 0) {
 1249                 simple_lock(&vnode_free_list_slock);
 1250                 if (vp->v_holdcnt > 0)
 1251                         TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
 1252                 else
 1253                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 1254                 simple_unlock(&vnode_free_list_slock);
 1255         }
 1256         vp->v_usecount++;
 1257 #ifdef DIAGNOSTIC
 1258         if (vp->v_usecount == 0) {
 1259                 vprint("vget", vp);
 1260                 panic("vget: usecount overflow, vp %p", vp);
 1261         }
 1262 #endif
 1263         if (flags & LK_TYPE_MASK) {
 1264                 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
 1265                         /*
 1266                          * must expand vrele here because we do not want
 1267                          * to call VOP_INACTIVE if the reference count
 1268                          * drops back to zero since it was never really
 1269                          * active. We must remove it from the free list
 1270                          * before sleeping so that multiple processes do
 1271                          * not try to recycle it.
 1272                          */
 1273                         simple_lock(&vp->v_interlock);
 1274                         vp->v_usecount--;
 1275                         if (vp->v_usecount > 0) {
 1276                                 simple_unlock(&vp->v_interlock);
 1277                                 return (error);
 1278                         }
 1279                         /*
 1280                          * insert at tail of LRU list
 1281                          */
 1282                         simple_lock(&vnode_free_list_slock);
 1283                         if (vp->v_holdcnt > 0)
 1284                                 TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
 1285                                     v_freelist);
 1286                         else
 1287                                 TAILQ_INSERT_TAIL(&vnode_free_list, vp,
 1288                                     v_freelist);
 1289                         simple_unlock(&vnode_free_list_slock);
 1290                         simple_unlock(&vp->v_interlock);
 1291                 }
 1292                 return (error);
 1293         }
 1294         simple_unlock(&vp->v_interlock);
 1295         return (0);
 1296 }
 1297 
 1298 /*
 1299  * vput(), just unlock and vrele()
 1300  */
 1301 void
 1302 vput(vp)
 1303         struct vnode *vp;
 1304 {
 1305         struct proc *p = curproc;       /* XXX */
 1306 
 1307 #ifdef DIAGNOSTIC
 1308         if (vp == NULL)
 1309                 panic("vput: null vp");
 1310 #endif
 1311         simple_lock(&vp->v_interlock);
 1312         vp->v_usecount--;
 1313         if (vp->v_usecount > 0) {
 1314                 simple_unlock(&vp->v_interlock);
 1315                 VOP_UNLOCK(vp, 0);
 1316                 return;
 1317         }
 1318 #ifdef DIAGNOSTIC
 1319         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
 1320                 vprint("vput: bad ref count", vp);
 1321                 panic("vput: ref cnt");
 1322         }
 1323 #endif
 1324         /*
 1325          * Insert at tail of LRU list.
 1326          */
 1327         simple_lock(&vnode_free_list_slock);
 1328         if (vp->v_holdcnt > 0)
 1329                 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
 1330         else
 1331                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 1332         simple_unlock(&vnode_free_list_slock);
 1333         if (vp->v_flag & VEXECMAP) {
 1334                 uvmexp.execpages -= vp->v_uobj.uo_npages;
 1335                 uvmexp.filepages += vp->v_uobj.uo_npages;
 1336         }
 1337         vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP);
 1338         simple_unlock(&vp->v_interlock);
 1339         VOP_INACTIVE(vp, p);
 1340 }
 1341 
 1342 /*
 1343  * Vnode release.
 1344  * If count drops to zero, call inactive routine and return to freelist.
 1345  */
 1346 void
 1347 vrele(vp)
 1348         struct vnode *vp;
 1349 {
 1350         struct proc *p = curproc;       /* XXX */
 1351 
 1352 #ifdef DIAGNOSTIC
 1353         if (vp == NULL)
 1354                 panic("vrele: null vp");
 1355 #endif
 1356         simple_lock(&vp->v_interlock);
 1357         vp->v_usecount--;
 1358         if (vp->v_usecount > 0) {
 1359                 simple_unlock(&vp->v_interlock);
 1360                 return;
 1361         }
 1362 #ifdef DIAGNOSTIC
 1363         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
 1364                 vprint("vrele: bad ref count", vp);
 1365                 panic("vrele: ref cnt vp %p", vp);
 1366         }
 1367 #endif
 1368         /*
 1369          * Insert at tail of LRU list.
 1370          */
 1371         simple_lock(&vnode_free_list_slock);
 1372         if (vp->v_holdcnt > 0)
 1373                 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
 1374         else
 1375                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 1376         simple_unlock(&vnode_free_list_slock);
 1377         if (vp->v_flag & VEXECMAP) {
 1378                 uvmexp.execpages -= vp->v_uobj.uo_npages;
 1379                 uvmexp.filepages += vp->v_uobj.uo_npages;
 1380         }
 1381         vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP);
 1382         if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
 1383                 VOP_INACTIVE(vp, p);
 1384 }
 1385 
 1386 #ifdef DIAGNOSTIC
 1387 /*
 1388  * Page or buffer structure gets a reference.
 1389  */
 1390 void
 1391 vholdl(vp)
 1392         struct vnode *vp;
 1393 {
 1394 
 1395         /*
 1396          * If it is on the freelist and the hold count is currently
 1397          * zero, move it to the hold list. The test of the back
 1398          * pointer and the use reference count of zero is because
 1399          * it will be removed from a free list by getnewvnode,
 1400          * but will not have its reference count incremented until
 1401          * after calling vgone. If the reference count were
 1402          * incremented first, vgone would (incorrectly) try to
 1403          * close the previous instance of the underlying object.
 1404          * So, the back pointer is explicitly set to `0xdeadb' in
 1405          * getnewvnode after removing it from a freelist to ensure
 1406          * that we do not try to move it here.
 1407          */
 1408         if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
 1409             vp->v_holdcnt == 0 && vp->v_usecount == 0) {
 1410                 simple_lock(&vnode_free_list_slock);
 1411                 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 1412                 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
 1413                 simple_unlock(&vnode_free_list_slock);
 1414         }
 1415         vp->v_holdcnt++;
 1416 }
 1417 
 1418 /*
 1419  * Page or buffer structure frees a reference.
 1420  */
 1421 void
 1422 holdrelel(vp)
 1423         struct vnode *vp;
 1424 {
 1425 
 1426         if (vp->v_holdcnt <= 0)
 1427                 panic("holdrelel: holdcnt vp %p", vp);
 1428         vp->v_holdcnt--;
 1429 
 1430         /*
 1431          * If it is on the holdlist and the hold count drops to
 1432          * zero, move it to the free list. The test of the back
 1433          * pointer and the use reference count of zero is because
 1434          * it will be removed from a free list by getnewvnode,
 1435          * but will not have its reference count incremented until
 1436          * after calling vgone. If the reference count were
 1437          * incremented first, vgone would (incorrectly) try to
 1438          * close the previous instance of the underlying object.
 1439          * So, the back pointer is explicitly set to `0xdeadb' in
 1440          * getnewvnode after removing it from a freelist to ensure
 1441          * that we do not try to move it here.
 1442          */
 1443 
 1444         if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
 1445             vp->v_holdcnt == 0 && vp->v_usecount == 0) {
 1446                 simple_lock(&vnode_free_list_slock);
 1447                 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
 1448                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 1449                 simple_unlock(&vnode_free_list_slock);
 1450         }
 1451 }
 1452 
 1453 /*
 1454  * Vnode reference.
 1455  */
 1456 void
 1457 vref(vp)
 1458         struct vnode *vp;
 1459 {
 1460 
 1461         simple_lock(&vp->v_interlock);
 1462         if (vp->v_usecount <= 0)
 1463                 panic("vref used where vget required, vp %p", vp);
 1464         vp->v_usecount++;
 1465 #ifdef DIAGNOSTIC
 1466         if (vp->v_usecount == 0) {
 1467                 vprint("vref", vp);
 1468                 panic("vref: usecount overflow, vp %p", vp);
 1469         }
 1470 #endif
 1471         simple_unlock(&vp->v_interlock);
 1472 }
 1473 #endif /* DIAGNOSTIC */
 1474 
 1475 /*
 1476  * Remove any vnodes in the vnode table belonging to mount point mp.
 1477  *
 1478  * If FORCECLOSE is not specified, there should not be any active ones,
 1479  * return error if any are found (nb: this is a user error, not a
 1480  * system error). If FORCECLOSE is specified, detach any active vnodes
 1481  * that are found.
 1482  *
 1483  * If WRITECLOSE is set, only flush out regular file vnodes open for
 1484  * writing.
 1485  *
 1486  * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
 1487  */
 1488 #ifdef DEBUG
 1489 int busyprt = 0;        /* print out busy vnodes */
 1490 struct ctldebug debug1 = { "busyprt", &busyprt };
 1491 #endif
 1492 
 1493 int
 1494 vflush(mp, skipvp, flags)
 1495         struct mount *mp;
 1496         struct vnode *skipvp;
 1497         int flags;
 1498 {
 1499         struct proc *p = curproc;       /* XXX */
 1500         struct vnode *vp, *nvp;
 1501         int busy = 0;
 1502 
 1503         simple_lock(&mntvnode_slock);
 1504 loop:
 1505         for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
 1506                 if (vp->v_mount != mp)
 1507                         goto loop;
 1508                 nvp = LIST_NEXT(vp, v_mntvnodes);
 1509                 /*
 1510                  * Skip over a selected vnode.
 1511                  */
 1512                 if (vp == skipvp)
 1513                         continue;
 1514                 simple_lock(&vp->v_interlock);
 1515                 /*
 1516                  * Skip over a vnodes marked VSYSTEM.
 1517                  */
 1518                 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
 1519                         simple_unlock(&vp->v_interlock);
 1520                         continue;
 1521                 }
 1522                 /*
 1523                  * If WRITECLOSE is set, only flush out regular file
 1524                  * vnodes open for writing.
 1525                  */
 1526                 if ((flags & WRITECLOSE) &&
 1527                     (vp->v_writecount == 0 || vp->v_type != VREG)) {
 1528                         simple_unlock(&vp->v_interlock);
 1529                         continue;
 1530                 }
 1531                 /*
 1532                  * With v_usecount == 0, all we need to do is clear
 1533                  * out the vnode data structures and we are done.
 1534                  */
 1535                 if (vp->v_usecount == 0) {
 1536                         simple_unlock(&mntvnode_slock);
 1537                         vgonel(vp, p);
 1538                         simple_lock(&mntvnode_slock);
 1539                         continue;
 1540                 }
 1541                 /*
 1542                  * If FORCECLOSE is set, forcibly close the vnode.
 1543                  * For block or character devices, revert to an
 1544                  * anonymous device. For all other files, just kill them.
 1545                  */
 1546                 if (flags & FORCECLOSE) {
 1547                         simple_unlock(&mntvnode_slock);
 1548                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
 1549                                 vgonel(vp, p);
 1550                         } else {
 1551                                 vclean(vp, 0, p);
 1552                                 vp->v_op = spec_vnodeop_p;
 1553                                 insmntque(vp, (struct mount *)0);
 1554                         }
 1555                         simple_lock(&mntvnode_slock);
 1556                         continue;
 1557                 }
 1558 #ifdef DEBUG
 1559                 if (busyprt)
 1560                         vprint("vflush: busy vnode", vp);
 1561 #endif
 1562                 simple_unlock(&vp->v_interlock);
 1563                 busy++;
 1564         }
 1565         simple_unlock(&mntvnode_slock);
 1566         if (busy)
 1567                 return (EBUSY);
 1568         return (0);
 1569 }
 1570 
 1571 /*
 1572  * Disassociate the underlying file system from a vnode.
 1573  */
 1574 void
 1575 vclean(vp, flags, p)
 1576         struct vnode *vp;
 1577         int flags;
 1578         struct proc *p;
 1579 {
 1580         struct mount *mp;
 1581         int active;
 1582 
 1583         LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
 1584 
 1585         /*
 1586          * Check to see if the vnode is in use.
 1587          * If so we have to reference it before we clean it out
 1588          * so that its count cannot fall to zero and generate a
 1589          * race against ourselves to recycle it.
 1590          */
 1591 
 1592         if ((active = vp->v_usecount) != 0) {
 1593                 vp->v_usecount++;
 1594 #ifdef DIAGNOSTIC
 1595                 if (vp->v_usecount == 0) {
 1596                         vprint("vclean", vp);
 1597                         panic("vclean: usecount overflow");
 1598                 }
 1599 #endif
 1600         }
 1601 
 1602         /*
 1603          * Prevent the vnode from being recycled or
 1604          * brought into use while we clean it out.
 1605          */
 1606         if (vp->v_flag & VXLOCK)
 1607                 panic("vclean: deadlock, vp %p", vp);
 1608         vp->v_flag |= VXLOCK;
 1609         if (vp->v_flag & VEXECMAP) {
 1610                 uvmexp.execpages -= vp->v_uobj.uo_npages;
 1611                 uvmexp.filepages += vp->v_uobj.uo_npages;
 1612         }
 1613         vp->v_flag &= ~(VTEXT|VEXECMAP);
 1614 
 1615         /*
 1616          * Even if the count is zero, the VOP_INACTIVE routine may still
 1617          * have the object locked while it cleans it out.  For
 1618          * active vnodes, it ensures that no other activity can
 1619          * occur while the underlying object is being cleaned out.
 1620          *
 1621          * We drain the lock to make sure we are the last one trying to
 1622          * get it and immediately resurrect the lock.  Future accesses
 1623          * for locking this _vnode_ will be protected by VXLOCK.  However,
 1624          * upper layers might be using the _lock_ in case the file system
 1625          * exported it and might access it while the vnode lingers in
 1626          * deadfs.
 1627          */
 1628         VOP_LOCK(vp, LK_DRAIN | LK_RESURRECT | LK_INTERLOCK);
 1629 
 1630         /*
 1631          * Clean out any cached data associated with the vnode.
 1632          * If special device, remove it from special device alias list.
 1633          * if it is on one.
 1634          */
 1635         if (flags & DOCLOSE) {
 1636                 int error;
 1637                 struct vnode *vq, *vx;
 1638 
 1639                 vn_start_write(vp, &mp, V_WAIT | V_LOWER);
 1640                 error = vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
 1641                 vn_finished_write(mp, V_LOWER);
 1642                 if (error)
 1643                         error = vinvalbuf(vp, 0, NOCRED, p, 0, 0);
 1644                 KASSERT(error == 0);
 1645                 KASSERT((vp->v_flag & VONWORKLST) == 0);
 1646 
 1647                 if (active)
 1648                         VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
 1649 
 1650                 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
 1651                     vp->v_specinfo != 0) {
 1652                         simple_lock(&spechash_slock);
 1653                         if (vp->v_hashchain != NULL) {
 1654                                 if (*vp->v_hashchain == vp) {
 1655                                         *vp->v_hashchain = vp->v_specnext;
 1656                                 } else {
 1657                                         for (vq = *vp->v_hashchain; vq;
 1658                                              vq = vq->v_specnext) {
 1659                                                 if (vq->v_specnext != vp)
 1660                                                         continue;
 1661                                                 vq->v_specnext = vp->v_specnext;
 1662                                                 break;
 1663                                         }
 1664                                         if (vq == NULL)
 1665                                                 panic("missing bdev");
 1666                                 }
 1667                                 if (vp->v_flag & VALIASED) {
 1668                                         vx = NULL;
 1669                                                 for (vq = *vp->v_hashchain; vq;
 1670                                                      vq = vq->v_specnext) {
 1671                                                 if (vq->v_rdev != vp->v_rdev ||
 1672                                                     vq->v_type != vp->v_type)
 1673                                                         continue;
 1674                                                 if (vx)
 1675                                                         break;
 1676                                                 vx = vq;
 1677                                         }
 1678                                         if (vx == NULL)
 1679                                                 panic("missing alias");
 1680                                         if (vq == NULL)
 1681                                                 vx->v_flag &= ~VALIASED;
 1682                                         vp->v_flag &= ~VALIASED;
 1683                                 }
 1684                         }
 1685                         simple_unlock(&spechash_slock);
 1686                         FREE(vp->v_specinfo, M_VNODE);
 1687                         vp->v_specinfo = NULL;
 1688                 }
 1689         }
 1690         LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
 1691 
 1692         /*
 1693          * If purging an active vnode, it must be closed and
 1694          * deactivated before being reclaimed. Note that the
 1695          * VOP_INACTIVE will unlock the vnode.
 1696          */
 1697         if (active) {
 1698                 VOP_INACTIVE(vp, p);
 1699         } else {
 1700                 /*
 1701                  * Any other processes trying to obtain this lock must first
 1702                  * wait for VXLOCK to clear, then call the new lock operation.
 1703                  */
 1704                 VOP_UNLOCK(vp, 0);
 1705         }
 1706         /*
 1707          * Reclaim the vnode.
 1708          */
 1709         if (VOP_RECLAIM(vp, p))
 1710                 panic("vclean: cannot reclaim, vp %p", vp);
 1711         if (active) {
 1712                 /*
 1713                  * Inline copy of vrele() since VOP_INACTIVE
 1714                  * has already been called.
 1715                  */
 1716                 simple_lock(&vp->v_interlock);
 1717                 if (--vp->v_usecount <= 0) {
 1718 #ifdef DIAGNOSTIC
 1719                         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
 1720                                 vprint("vclean: bad ref count", vp);
 1721                                 panic("vclean: ref cnt");
 1722                         }
 1723 #endif
 1724                         /*
 1725                          * Insert at tail of LRU list.
 1726                          */
 1727 
 1728                         simple_unlock(&vp->v_interlock);
 1729                         simple_lock(&vnode_free_list_slock);
 1730 #ifdef DIAGNOSTIC
 1731                         if (vp->v_holdcnt > 0)
 1732                                 panic("vclean: not clean, vp %p", vp);
 1733 #endif
 1734                         TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 1735                         simple_unlock(&vnode_free_list_slock);
 1736                 } else
 1737                         simple_unlock(&vp->v_interlock);
 1738         }
 1739 
 1740         KASSERT(vp->v_uobj.uo_npages == 0);
 1741         cache_purge(vp);
 1742 
 1743         /*
 1744          * Done with purge, notify sleepers of the grim news.
 1745          */
 1746         vp->v_op = dead_vnodeop_p;
 1747         vp->v_tag = VT_NON;
 1748         vp->v_vnlock = NULL;
 1749         simple_lock(&vp->v_interlock);
 1750         VN_KNOTE(vp, NOTE_REVOKE);      /* FreeBSD has this in vn_pollgone() */
 1751         vp->v_flag &= ~(VXLOCK|VLOCKSWORK);
 1752         if (vp->v_flag & VXWANT) {
 1753                 vp->v_flag &= ~VXWANT;
 1754                 simple_unlock(&vp->v_interlock);
 1755                 wakeup((caddr_t)vp);
 1756         } else
 1757                 simple_unlock(&vp->v_interlock);
 1758 }
 1759 
 1760 /*
 1761  * Recycle an unused vnode to the front of the free list.
 1762  * Release the passed interlock if the vnode will be recycled.
 1763  */
 1764 int
 1765 vrecycle(vp, inter_lkp, p)
 1766         struct vnode *vp;
 1767         struct simplelock *inter_lkp;
 1768         struct proc *p;
 1769 {
 1770 
 1771         simple_lock(&vp->v_interlock);
 1772         if (vp->v_usecount == 0) {
 1773                 if (inter_lkp)
 1774                         simple_unlock(inter_lkp);
 1775                 vgonel(vp, p);
 1776                 return (1);
 1777         }
 1778         simple_unlock(&vp->v_interlock);
 1779         return (0);
 1780 }
 1781 
 1782 /*
 1783  * Eliminate all activity associated with a vnode
 1784  * in preparation for reuse.
 1785  */
 1786 void
 1787 vgone(vp)
 1788         struct vnode *vp;
 1789 {
 1790         struct proc *p = curproc;       /* XXX */
 1791 
 1792         simple_lock(&vp->v_interlock);
 1793         vgonel(vp, p);
 1794 }
 1795 
 1796 /*
 1797  * vgone, with the vp interlock held.
 1798  */
 1799 void
 1800 vgonel(vp, p)
 1801         struct vnode *vp;
 1802         struct proc *p;
 1803 {
 1804 
 1805         LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
 1806 
 1807         /*
 1808          * If a vgone (or vclean) is already in progress,
 1809          * wait until it is done and return.
 1810          */
 1811 
 1812         if (vp->v_flag & VXLOCK) {
 1813                 vp->v_flag |= VXWANT;
 1814                 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
 1815                 return;
 1816         }
 1817 
 1818         /*
 1819          * Clean out the filesystem specific data.
 1820          */
 1821 
 1822         vclean(vp, DOCLOSE, p);
 1823         KASSERT((vp->v_flag & VONWORKLST) == 0);
 1824 
 1825         /*
 1826          * Delete from old mount point vnode list, if on one.
 1827          */
 1828 
 1829         if (vp->v_mount != NULL)
 1830                 insmntque(vp, (struct mount *)0);
 1831 
 1832         /*
 1833          * The test of the back pointer and the reference count of
 1834          * zero is because it will be removed from the free list by
 1835          * getcleanvnode, but will not have its reference count
 1836          * incremented until after calling vgone. If the reference
 1837          * count were incremented first, vgone would (incorrectly)
 1838          * try to close the previous instance of the underlying object.
 1839          * So, the back pointer is explicitly set to `0xdeadb' in
 1840          * getnewvnode after removing it from the freelist to ensure
 1841          * that we do not try to move it here.
 1842          */
 1843 
 1844         vp->v_type = VBAD;
 1845         if (vp->v_usecount == 0) {
 1846                 boolean_t dofree;
 1847 
 1848                 simple_lock(&vnode_free_list_slock);
 1849                 if (vp->v_holdcnt > 0)
 1850                         panic("vgonel: not clean, vp %p", vp);
 1851                 /*
 1852                  * if it isn't on the freelist, we're called by getcleanvnode
 1853                  * and vnode is being re-used.  otherwise, we'll free it.
 1854                  */
 1855                 dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
 1856                 if (dofree) {
 1857                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 1858                         numvnodes--;
 1859                 }
 1860                 simple_unlock(&vnode_free_list_slock);
 1861                 if (dofree)
 1862                         pool_put(&vnode_pool, vp);
 1863         }
 1864 }
 1865 
 1866 /*
 1867  * Lookup a vnode by device number.
 1868  */
 1869 int
 1870 vfinddev(dev, type, vpp)
 1871         dev_t dev;
 1872         enum vtype type;
 1873         struct vnode **vpp;
 1874 {
 1875         struct vnode *vp;
 1876         int rc = 0;
 1877 
 1878         simple_lock(&spechash_slock);
 1879         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
 1880                 if (dev != vp->v_rdev || type != vp->v_type)
 1881                         continue;
 1882                 *vpp = vp;
 1883                 rc = 1;
 1884                 break;
 1885         }
 1886         simple_unlock(&spechash_slock);
 1887         return (rc);
 1888 }
 1889 
 1890 /*
 1891  * Revoke all the vnodes corresponding to the specified minor number
 1892  * range (endpoints inclusive) of the specified major.
 1893  */
 1894 void
 1895 vdevgone(maj, minl, minh, type)
 1896         int maj, minl, minh;
 1897         enum vtype type;
 1898 {
 1899         struct vnode *vp;
 1900         int mn;
 1901 
 1902         for (mn = minl; mn <= minh; mn++)
 1903                 if (vfinddev(makedev(maj, mn), type, &vp))
 1904                         VOP_REVOKE(vp, REVOKEALL);
 1905 }
 1906 
 1907 /*
 1908  * Calculate the total number of references to a special device.
 1909  */
 1910 int
 1911 vcount(vp)
 1912         struct vnode *vp;
 1913 {
 1914         struct vnode *vq, *vnext;
 1915         int count;
 1916 
 1917 loop:
 1918         if ((vp->v_flag & VALIASED) == 0)
 1919                 return (vp->v_usecount);
 1920         simple_lock(&spechash_slock);
 1921         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
 1922                 vnext = vq->v_specnext;
 1923                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
 1924                         continue;
 1925                 /*
 1926                  * Alias, but not in use, so flush it out.
 1927                  */
 1928                 if (vq->v_usecount == 0 && vq != vp &&
 1929                     (vq->v_flag & VXLOCK) == 0) {
 1930                         simple_unlock(&spechash_slock);
 1931                         vgone(vq);
 1932                         goto loop;
 1933                 }
 1934                 count += vq->v_usecount;
 1935         }
 1936         simple_unlock(&spechash_slock);
 1937         return (count);
 1938 }
 1939 
 1940 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
 1941 #define ARRAY_PRINT(idx, arr) \
 1942     ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
 1943 
 1944 const char * const vnode_tags[] = { VNODE_TAGS };
 1945 const char * const vnode_types[] = { VNODE_TYPES };
 1946 const char vnode_flagbits[] = VNODE_FLAGBITS;
 1947 
 1948 /*
 1949  * Print out a description of a vnode.
 1950  */
 1951 void
 1952 vprint(label, vp)
 1953         char *label;
 1954         struct vnode *vp;
 1955 {
 1956         char buf[96];
 1957 
 1958         if (label != NULL)
 1959                 printf("%s: ", label);
 1960         printf("tag %s(%d) type %s(%d), usecount %d, writecount %ld, "
 1961             "refcount %ld,", ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
 1962             ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
 1963             vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
 1964         bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
 1965         if (buf[0] != '\0')
 1966                 printf(" flags (%s)", &buf[1]);
 1967         if (vp->v_data == NULL) {
 1968                 printf("\n");
 1969         } else {
 1970                 printf("\n\t");
 1971                 VOP_PRINT(vp);
 1972         }
 1973 }
 1974 
 1975 #ifdef DEBUG
 1976 /*
 1977  * List all of the locked vnodes in the system.
 1978  * Called when debugging the kernel.
 1979  */
 1980 void
 1981 printlockedvnodes()
 1982 {
 1983         struct mount *mp, *nmp;
 1984         struct vnode *vp;
 1985 
 1986         printf("Locked vnodes\n");
 1987         simple_lock(&mountlist_slock);
 1988         for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
 1989              mp = nmp) {
 1990                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
 1991                         nmp = CIRCLEQ_NEXT(mp, mnt_list);
 1992                         continue;
 1993                 }
 1994                 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 1995                         if (VOP_ISLOCKED(vp))
 1996                                 vprint(NULL, vp);
 1997                 }
 1998                 simple_lock(&mountlist_slock);
 1999                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
 2000                 vfs_unbusy(mp);
 2001         }
 2002         simple_unlock(&mountlist_slock);
 2003 }
 2004 #endif
 2005 
 2006 /*
 2007  * sysctl helper routine for vfs.generic.conf lookups.
 2008  */
 2009 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
 2010 static int
 2011 sysctl_vfs_generic_conf(SYSCTLFN_ARGS)
 2012 {
 2013         struct vfsconf vfc;
 2014         extern const char * const mountcompatnames[];
 2015         extern int nmountcompatnames;
 2016         struct sysctlnode node;
 2017         struct vfsops *vfsp;
 2018         u_int vfsnum;
 2019 
 2020         if (namelen != 1)
 2021                 return (ENOTDIR);
 2022         vfsnum = name[0];
 2023         if (vfsnum >= nmountcompatnames ||
 2024             mountcompatnames[vfsnum] == NULL)
 2025                 return (EOPNOTSUPP);
 2026         vfsp = vfs_getopsbyname(mountcompatnames[vfsnum]);
 2027         if (vfsp == NULL)
 2028                 return (EOPNOTSUPP);
 2029 
 2030         vfc.vfc_vfsops = vfsp;
 2031         strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
 2032         vfc.vfc_typenum = vfsnum;
 2033         vfc.vfc_refcount = vfsp->vfs_refcount;
 2034         vfc.vfc_flags = 0;
 2035         vfc.vfc_mountroot = vfsp->vfs_mountroot;
 2036         vfc.vfc_next = NULL;
 2037 
 2038         node = *rnode;
 2039         node.sysctl_data = &vfc;
 2040         return (sysctl_lookup(SYSCTLFN_CALL(&node)));
 2041 }
 2042 #endif
 2043 
 2044 /*
 2045  * sysctl helper routine to return list of supported fstypes
 2046  */
 2047 static int
 2048 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
 2049 {
 2050         char buf[MFSNAMELEN];
 2051         char *where = oldp;
 2052         struct vfsops *v;
 2053         size_t needed, left, slen;
 2054         int error, first;
 2055 
 2056         if (newp != NULL)
 2057                 return (EPERM);
 2058         if (namelen != 0)
 2059                 return (EINVAL);
 2060 
 2061         first = 1;
 2062         error = 0;
 2063         needed = 0;
 2064         left = *oldlenp;
 2065 
 2066         LIST_FOREACH(v, &vfs_list, vfs_list) {
 2067                 if (where == NULL)
 2068                         needed += strlen(v->vfs_name) + 1;
 2069                 else {
 2070                         memset(buf, 0, sizeof(buf));
 2071                         if (first) {
 2072                                 strncpy(buf, v->vfs_name, sizeof(buf));
 2073                                 first = 0;
 2074                         } else {
 2075                                 buf[0] = ' ';
 2076                                 strncpy(buf + 1, v->vfs_name, sizeof(buf) - 1);
 2077                         }
 2078                         buf[sizeof(buf)-1] = '\0';
 2079                         slen = strlen(buf);
 2080                         if (left < slen + 1)
 2081                                 break;
 2082                         /* +1 to copy out the trailing NUL byte */
 2083                         error = copyout(buf, where, slen + 1);
 2084                         if (error)
 2085                                 break;
 2086                         where += slen;
 2087                         needed += slen;
 2088                         left -= slen;
 2089                 }
 2090         }
 2091         *oldlenp = needed;
 2092         return (error);
 2093 }
 2094 
 2095 /*
 2096  * Top level filesystem related information gathering.
 2097  */
 2098 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
 2099 {
 2100 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
 2101         extern int nmountcompatnames;
 2102 #endif
 2103 
 2104         sysctl_createv(clog, 0, NULL, NULL,
 2105                        CTLFLAG_PERMANENT,
 2106                        CTLTYPE_NODE, "vfs", NULL,
 2107                        NULL, 0, NULL, 0,
 2108                        CTL_VFS, CTL_EOL);
 2109         sysctl_createv(clog, 0, NULL, NULL,
 2110                        CTLFLAG_PERMANENT,
 2111                        CTLTYPE_NODE, "generic",
 2112                        SYSCTL_DESCR("Non-specific vfs related information"),
 2113                        NULL, 0, NULL, 0,
 2114                        CTL_VFS, VFS_GENERIC, CTL_EOL);
 2115 
 2116 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
 2117         sysctl_createv(clog, 0, NULL, NULL,
 2118                        CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
 2119                        CTLTYPE_INT, "maxtypenum",
 2120                        SYSCTL_DESCR("Highest valid filesystem type number"),
 2121                        NULL, nmountcompatnames, NULL, 0,
 2122                        CTL_VFS, VFS_GENERIC, VFS_MAXTYPENUM, CTL_EOL);
 2123 #endif
 2124         sysctl_createv(clog, 0, NULL, NULL,
 2125                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2126                        CTLTYPE_INT, "usermount",
 2127                        SYSCTL_DESCR("Whether unprivileged users may mount "
 2128                                     "filesystems"),
 2129                        NULL, 0, &dovfsusermount, 0,
 2130                        CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
 2131         sysctl_createv(clog, 0, NULL, NULL,
 2132                        CTLFLAG_PERMANENT,
 2133                        CTLTYPE_STRING, "fstypes",
 2134                        SYSCTL_DESCR("List of file systems present"),
 2135                        sysctl_vfs_generic_fstypes, 0, NULL, 0,
 2136                        CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
 2137 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
 2138         sysctl_createv(clog, 0, NULL, NULL,
 2139                        CTLFLAG_PERMANENT,
 2140                        CTLTYPE_STRUCT, "conf",
 2141                        SYSCTL_DESCR("Filesystem configuration information"),
 2142                        sysctl_vfs_generic_conf, 0, NULL,
 2143                        sizeof(struct vfsconf),
 2144                        CTL_VFS, VFS_GENERIC, VFS_CONF, CTL_EOL);
 2145 #endif
 2146 }
 2147 
 2148 
 2149 int kinfo_vdebug = 1;
 2150 int kinfo_vgetfailed;
 2151 #define KINFO_VNODESLOP 10
 2152 /*
 2153  * Dump vnode list (via sysctl).
 2154  * Copyout address of vnode followed by vnode.
 2155  */
 2156 /* ARGSUSED */
 2157 int
 2158 sysctl_kern_vnode(SYSCTLFN_ARGS)
 2159 {
 2160         char *where = oldp;
 2161         size_t *sizep = oldlenp;
 2162         struct mount *mp, *nmp;
 2163         struct vnode *nvp, *vp;
 2164         char *bp = where, *savebp;
 2165         char *ewhere;
 2166         int error;
 2167 
 2168         if (namelen != 0)
 2169                 return (EOPNOTSUPP);
 2170         if (newp != NULL)
 2171                 return (EPERM);
 2172 
 2173 #define VPTRSZ  sizeof(struct vnode *)
 2174 #define VNODESZ sizeof(struct vnode)
 2175         if (where == NULL) {
 2176                 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
 2177                 return (0);
 2178         }
 2179         ewhere = where + *sizep;
 2180 
 2181         simple_lock(&mountlist_slock);
 2182         for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
 2183              mp = nmp) {
 2184                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
 2185                         nmp = CIRCLEQ_NEXT(mp, mnt_list);
 2186                         continue;
 2187                 }
 2188                 savebp = bp;
 2189 again:
 2190                 simple_lock(&mntvnode_slock);
 2191                 for (vp = LIST_FIRST(&mp->mnt_vnodelist);
 2192                      vp != NULL;
 2193                      vp = nvp) {
 2194                         /*
 2195                          * Check that the vp is still associated with
 2196                          * this filesystem.  RACE: could have been
 2197                          * recycled onto the same filesystem.
 2198                          */
 2199                         if (vp->v_mount != mp) {
 2200                                 simple_unlock(&mntvnode_slock);
 2201                                 if (kinfo_vdebug)
 2202                                         printf("kinfo: vp changed\n");
 2203                                 bp = savebp;
 2204                                 goto again;
 2205                         }
 2206                         nvp = LIST_NEXT(vp, v_mntvnodes);
 2207                         if (bp + VPTRSZ + VNODESZ > ewhere) {
 2208                                 simple_unlock(&mntvnode_slock);
 2209                                 *sizep = bp - where;
 2210                                 return (ENOMEM);
 2211                         }
 2212                         simple_unlock(&mntvnode_slock);
 2213                         if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
 2214                            (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
 2215                                 return (error);
 2216                         bp += VPTRSZ + VNODESZ;
 2217                         simple_lock(&mntvnode_slock);
 2218                 }
 2219                 simple_unlock(&mntvnode_slock);
 2220                 simple_lock(&mountlist_slock);
 2221                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
 2222                 vfs_unbusy(mp);
 2223         }
 2224         simple_unlock(&mountlist_slock);
 2225 
 2226         *sizep = bp - where;
 2227         return (0);
 2228 }
 2229 
 2230 /*
 2231  * Check to see if a filesystem is mounted on a block device.
 2232  */
 2233 int
 2234 vfs_mountedon(vp)
 2235         struct vnode *vp;
 2236 {
 2237         struct vnode *vq;
 2238         int error = 0;
 2239 
 2240         if (vp->v_specmountpoint != NULL)
 2241                 return (EBUSY);
 2242         if (vp->v_flag & VALIASED) {
 2243                 simple_lock(&spechash_slock);
 2244                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 2245                         if (vq->v_rdev != vp->v_rdev ||
 2246                             vq->v_type != vp->v_type)
 2247                                 continue;
 2248                         if (vq->v_specmountpoint != NULL) {
 2249                                 error = EBUSY;
 2250                                 break;
 2251                         }
 2252                 }
 2253                 simple_unlock(&spechash_slock);
 2254         }
 2255         return (error);
 2256 }
 2257 
 2258 static int
 2259 sacheck(struct sockaddr *sa)
 2260 {
 2261         switch (sa->sa_family) {
 2262 #ifdef INET
 2263         case AF_INET: {
 2264                 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
 2265                 char *p = (char *)sin->sin_zero;
 2266                 size_t i;
 2267 
 2268                 if (sin->sin_len != sizeof(*sin))
 2269                         return -1;
 2270                 if (sin->sin_port != 0)
 2271                         return -1;
 2272                 for (i = 0; i < sizeof(sin->sin_zero); i++)
 2273                         if (*p++ != '\0')
 2274                                 return -1;
 2275                 return 0;
 2276         }
 2277 #endif
 2278 #ifdef INET6
 2279         case AF_INET6: {
 2280                 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
 2281 
 2282                 if (sin6->sin6_len != sizeof(*sin6))
 2283                         return -1;
 2284                 if (sin6->sin6_port != 0)
 2285                         return -1;
 2286                 return 0;
 2287         }
 2288 #endif
 2289         default:
 2290                 return -1;
 2291         }
 2292 }
 2293 
 2294 /*
 2295  * Build hash lists of net addresses and hang them off the mount point.
 2296  * Called by ufs_mount() to set up the lists of export addresses.
 2297  */
 2298 static int
 2299 vfs_hang_addrlist(mp, nep, argp)
 2300         struct mount *mp;
 2301         struct netexport *nep;
 2302         struct export_args *argp;
 2303 {
 2304         struct netcred *np, *enp;
 2305         struct radix_node_head *rnh;
 2306         int i;
 2307         struct sockaddr *saddr, *smask = 0;
 2308         struct domain *dom;
 2309         int error;
 2310 
 2311         if (argp->ex_addrlen == 0) {
 2312                 if (mp->mnt_flag & MNT_DEFEXPORTED)
 2313                         return (EPERM);
 2314                 np = &nep->ne_defexported;
 2315                 np->netc_exflags = argp->ex_flags;
 2316                 crcvt(&np->netc_anon, &argp->ex_anon);
 2317                 np->netc_anon.cr_ref = 1;
 2318                 mp->mnt_flag |= MNT_DEFEXPORTED;
 2319                 return (0);
 2320         }
 2321 
 2322         if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN)
 2323                 return (EINVAL);
 2324 
 2325         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 2326         np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
 2327         memset((caddr_t)np, 0, i);
 2328         saddr = (struct sockaddr *)(np + 1);
 2329         error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
 2330         if (error)
 2331                 goto out;
 2332         if (saddr->sa_len > argp->ex_addrlen)
 2333                 saddr->sa_len = argp->ex_addrlen;
 2334         if (sacheck(saddr) == -1)
 2335                 return EINVAL;
 2336         if (argp->ex_masklen) {
 2337                 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
 2338                 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
 2339                 if (error)
 2340                         goto out;
 2341                 if (smask->sa_len > argp->ex_masklen)
 2342                         smask->sa_len = argp->ex_masklen;
 2343                 if (smask->sa_family != saddr->sa_family)
 2344                         return EINVAL;
 2345                 if (sacheck(smask) == -1)
 2346                         return EINVAL;
 2347         }
 2348         i = saddr->sa_family;
 2349         if ((rnh = nep->ne_rtable[i]) == 0) {
 2350                 /*
 2351                  * Seems silly to initialize every AF when most are not
 2352                  * used, do so on demand here
 2353                  */
 2354                 DOMAIN_FOREACH(dom) {
 2355                         if (dom->dom_family == i && dom->dom_rtattach) {
 2356                                 dom->dom_rtattach((void **)&nep->ne_rtable[i],
 2357                                         dom->dom_rtoffset);
 2358                                 break;
 2359                         }
 2360                 }
 2361                 if ((rnh = nep->ne_rtable[i]) == 0) {
 2362                         error = ENOBUFS;
 2363                         goto out;
 2364                 }
 2365         }
 2366 
 2367         enp = (struct netcred *)(*rnh->rnh_addaddr)(saddr, smask, rnh,
 2368             np->netc_rnodes);
 2369         if (enp != np) {
 2370                 if (enp == NULL) {
 2371                         enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
 2372                             smask, rnh);
 2373                         if (enp == NULL) {
 2374                                 error = EPERM;
 2375                                 goto out;
 2376                         }
 2377                 } else
 2378                         enp->netc_refcnt++;
 2379 
 2380                 goto check;
 2381         } else
 2382                 enp->netc_refcnt = 1;
 2383 
 2384         np->netc_exflags = argp->ex_flags;
 2385         crcvt(&np->netc_anon, &argp->ex_anon);
 2386         np->netc_anon.cr_ref = 1;
 2387         return 0;
 2388 check:
 2389         if (enp->netc_exflags != argp->ex_flags ||
 2390             crcmp(&enp->netc_anon, &argp->ex_anon) != 0)
 2391                 error = EPERM;
 2392         else
 2393                 error = 0;
 2394 out:
 2395         free(np, M_NETADDR);
 2396         return error;
 2397 }
 2398 
 2399 /* ARGSUSED */
 2400 static int
 2401 vfs_free_netcred(rn, w)
 2402         struct radix_node *rn;
 2403         void *w;
 2404 {
 2405         struct radix_node_head *rnh = (struct radix_node_head *)w;
 2406         struct netcred *np = (struct netcred *)(void *)rn;
 2407 
 2408         (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
 2409         if (--(np->netc_refcnt) <= 0)
 2410                 free(np, M_NETADDR);
 2411         return (0);
 2412 }
 2413 
 2414 /*
 2415  * Free the net address hash lists that are hanging off the mount points.
 2416  */
 2417 static void
 2418 vfs_free_addrlist(nep)
 2419         struct netexport *nep;
 2420 {
 2421         int i;
 2422         struct radix_node_head *rnh;
 2423 
 2424         for (i = 0; i <= AF_MAX; i++)
 2425                 if ((rnh = nep->ne_rtable[i]) != NULL) {
 2426                         (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
 2427                         free((caddr_t)rnh, M_RTABLE);
 2428                         nep->ne_rtable[i] = 0;
 2429                 }
 2430 }
 2431 
 2432 int
 2433 vfs_export(mp, nep, argp)
 2434         struct mount *mp;
 2435         struct netexport *nep;
 2436         struct export_args *argp;
 2437 {
 2438         int error;
 2439 
 2440         if (argp->ex_flags & MNT_DELEXPORT) {
 2441                 if (mp->mnt_flag & MNT_EXPUBLIC) {
 2442                         vfs_setpublicfs(NULL, NULL, NULL);
 2443                         mp->mnt_flag &= ~MNT_EXPUBLIC;
 2444                 }
 2445                 vfs_free_addrlist(nep);
 2446                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 2447         }
 2448         if (argp->ex_flags & MNT_EXPORTED) {
 2449                 if (argp->ex_flags & MNT_EXPUBLIC) {
 2450                         if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 2451                                 return (error);
 2452                         mp->mnt_flag |= MNT_EXPUBLIC;
 2453                 }
 2454                 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
 2455                         return (error);
 2456                 mp->mnt_flag |= MNT_EXPORTED;
 2457         }
 2458         return (0);
 2459 }
 2460 
 2461 /*
 2462  * Set the publicly exported filesystem (WebNFS). Currently, only
 2463  * one public filesystem is possible in the spec (RFC 2054 and 2055)
 2464  */
 2465 int
 2466 vfs_setpublicfs(mp, nep, argp)
 2467         struct mount *mp;
 2468         struct netexport *nep;
 2469         struct export_args *argp;
 2470 {
 2471         int error;
 2472         struct vnode *rvp;
 2473         char *cp;
 2474 
 2475         /*
 2476          * mp == NULL -> invalidate the current info, the FS is
 2477          * no longer exported. May be called from either vfs_export
 2478          * or unmount, so check if it hasn't already been done.
 2479          */
 2480         if (mp == NULL) {
 2481                 if (nfs_pub.np_valid) {
 2482                         nfs_pub.np_valid = 0;
 2483                         if (nfs_pub.np_index != NULL) {
 2484                                 FREE(nfs_pub.np_index, M_TEMP);
 2485                                 nfs_pub.np_index = NULL;
 2486                         }
 2487                 }
 2488                 return (0);
 2489         }
 2490 
 2491         /*
 2492          * Only one allowed at a time.
 2493          */
 2494         if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 2495                 return (EBUSY);
 2496 
 2497         /*
 2498          * Get real filehandle for root of exported FS.
 2499          */
 2500         memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
 2501         nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsidx;
 2502 
 2503         if ((error = VFS_ROOT(mp, &rvp)))
 2504                 return (error);
 2505 
 2506         if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 2507                 return (error);
 2508 
 2509         vput(rvp);
 2510 
 2511         /*
 2512          * If an indexfile was specified, pull it in.
 2513          */
 2514         if (argp->ex_indexfile != NULL) {
 2515                 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
 2516                     M_WAITOK);
 2517                 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 2518                     MAXNAMLEN, (size_t *)0);
 2519                 if (!error) {
 2520                         /*
 2521                          * Check for illegal filenames.
 2522                          */
 2523                         for (cp = nfs_pub.np_index; *cp; cp++) {
 2524                                 if (*cp == '/') {
 2525                                         error = EINVAL;
 2526                                         break;
 2527                                 }
 2528                         }
 2529                 }
 2530                 if (error) {
 2531                         FREE(nfs_pub.np_index, M_TEMP);
 2532                         return (error);
 2533                 }
 2534         }
 2535 
 2536         nfs_pub.np_mount = mp;
 2537         nfs_pub.np_valid = 1;
 2538         return (0);
 2539 }
 2540 
 2541 struct netcred *
 2542 vfs_export_lookup(mp, nep, nam)
 2543         struct mount *mp;
 2544         struct netexport *nep;
 2545         struct mbuf *nam;
 2546 {
 2547         struct netcred *np;
 2548         struct radix_node_head *rnh;
 2549         struct sockaddr *saddr;
 2550 
 2551         np = NULL;
 2552         if (mp->mnt_flag & MNT_EXPORTED) {
 2553                 /*
 2554                  * Lookup in the export list first.
 2555                  */
 2556                 if (nam != NULL) {
 2557                         saddr = mtod(nam, struct sockaddr *);
 2558                         rnh = nep->ne_rtable[saddr->sa_family];
 2559                         if (rnh != NULL) {
 2560                                 np = (struct netcred *)
 2561                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
 2562                                                               rnh);
 2563                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 2564                                         np = NULL;
 2565                         }
 2566                 }
 2567                 /*
 2568                  * If no address match, use the default if it exists.
 2569                  */
 2570                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 2571                         np = &nep->ne_defexported;
 2572         }
 2573         return (np);
 2574 }
 2575 
 2576 /*
 2577  * Do the usual access checking.
 2578  * file_mode, uid and gid are from the vnode in question,
 2579  * while acc_mode and cred are from the VOP_ACCESS parameter list
 2580  */
 2581 int
 2582 vaccess(type, file_mode, uid, gid, acc_mode, cred)
 2583         enum vtype type;
 2584         mode_t file_mode;
 2585         uid_t uid;
 2586         gid_t gid;
 2587         mode_t acc_mode;
 2588         struct ucred *cred;
 2589 {
 2590         mode_t mask;
 2591 
 2592         /*
 2593          * Super-user always gets read/write access, but execute access depends
 2594          * on at least one execute bit being set.
 2595          */
 2596         if (cred->cr_uid == 0) {
 2597                 if ((acc_mode & VEXEC) && type != VDIR &&
 2598                     (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
 2599                         return (EACCES);
 2600                 return (0);
 2601         }
 2602 
 2603         mask = 0;
 2604 
 2605         /* Otherwise, check the owner. */
 2606         if (cred->cr_uid == uid) {
 2607                 if (acc_mode & VEXEC)
 2608                         mask |= S_IXUSR;
 2609                 if (acc_mode & VREAD)
 2610                         mask |= S_IRUSR;
 2611                 if (acc_mode & VWRITE)
 2612                         mask |= S_IWUSR;
 2613                 return ((file_mode & mask) == mask ? 0 : EACCES);
 2614         }
 2615 
 2616         /* Otherwise, check the groups. */
 2617         if (cred->cr_gid == gid || groupmember(gid, cred)) {
 2618                 if (acc_mode & VEXEC)
 2619                         mask |= S_IXGRP;
 2620                 if (acc_mode & VREAD)
 2621                         mask |= S_IRGRP;
 2622                 if (acc_mode & VWRITE)
 2623                         mask |= S_IWGRP;
 2624                 return ((file_mode & mask) == mask ? 0 : EACCES);
 2625         }
 2626 
 2627         /* Otherwise, check everyone else. */
 2628         if (acc_mode & VEXEC)
 2629                 mask |= S_IXOTH;
 2630         if (acc_mode & VREAD)
 2631                 mask |= S_IROTH;
 2632         if (acc_mode & VWRITE)
 2633                 mask |= S_IWOTH;
 2634         return ((file_mode & mask) == mask ? 0 : EACCES);
 2635 }
 2636 
 2637 /*
 2638  * Unmount all file systems.
 2639  * We traverse the list in reverse order under the assumption that doing so
 2640  * will avoid needing to worry about dependencies.
 2641  */
 2642 void
 2643 vfs_unmountall(p)
 2644         struct proc *p;
 2645 {
 2646         struct mount *mp, *nmp;
 2647         int allerror, error;
 2648 
 2649         printf("unmounting file systems...");
 2650         for (allerror = 0,
 2651              mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
 2652                 nmp = mp->mnt_list.cqe_prev;
 2653 #ifdef DEBUG
 2654                 printf("\nunmounting %s (%s)...",
 2655                     mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
 2656 #endif
 2657                 /*
 2658                  * XXX Freeze syncer.  Must do this before locking the
 2659                  * mount point.  See dounmount() for details.
 2660                  */
 2661                 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
 2662                 if (vfs_busy(mp, 0, 0)) {
 2663                         lockmgr(&syncer_lock, LK_RELEASE, NULL);
 2664                         continue;
 2665                 }
 2666                 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
 2667                         printf("unmount of %s failed with error %d\n",
 2668                             mp->mnt_stat.f_mntonname, error);
 2669                         allerror = 1;
 2670                 }
 2671         }
 2672         printf(" done\n");
 2673         if (allerror)
 2674                 printf("WARNING: some file systems would not unmount\n");
 2675 }
 2676 
 2677 extern struct simplelock bqueue_slock; /* XXX */
 2678 
 2679 /*
 2680  * Sync and unmount file systems before shutting down.
 2681  */
 2682 void
 2683 vfs_shutdown()
 2684 {
 2685         struct lwp *l = curlwp;
 2686         struct proc *p;
 2687 
 2688         /* XXX we're certainly not running in proc0's context! */
 2689         if (l == NULL || (p = l->l_proc) == NULL)
 2690                 p = &proc0;
 2691 
 2692         printf("syncing disks... ");
 2693 
 2694         /* remove user process from run queue */
 2695         suspendsched();
 2696         (void) spl0();
 2697 
 2698         /* avoid coming back this way again if we panic. */
 2699         doing_shutdown = 1;
 2700 
 2701         sys_sync(l, NULL, NULL);
 2702 
 2703         /* Wait for sync to finish. */
 2704         if (buf_syncwait() != 0) {
 2705 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
 2706                 Debugger();
 2707 #endif
 2708                 printf("giving up\n");
 2709                 return;
 2710         } else
 2711                 printf("done\n");
 2712 
 2713         /*
 2714          * If we've panic'd, don't make the situation potentially
 2715          * worse by unmounting the file systems.
 2716          */
 2717         if (panicstr != NULL)
 2718                 return;
 2719 
 2720         /* Release inodes held by texts before update. */
 2721 #ifdef notdef
 2722         vnshutdown();
 2723 #endif
 2724         /* Unmount file systems. */
 2725         vfs_unmountall(p);
 2726 }
 2727 
 2728 /*
 2729  * Mount the root file system.  If the operator didn't specify a
 2730  * file system to use, try all possible file systems until one
 2731  * succeeds.
 2732  */
 2733 int
 2734 vfs_mountroot()
 2735 {
 2736         struct vfsops *v;
 2737         int error = ENODEV;
 2738 
 2739         if (root_device == NULL)
 2740                 panic("vfs_mountroot: root device unknown");
 2741 
 2742         switch (root_device->dv_class) {
 2743         case DV_IFNET:
 2744                 if (rootdev != NODEV)
 2745                         panic("vfs_mountroot: rootdev set for DV_IFNET "
 2746                             "(0x%08x -> %d,%d)", rootdev,
 2747                             major(rootdev), minor(rootdev));
 2748                 break;
 2749 
 2750         case DV_DISK:
 2751                 if (rootdev == NODEV)
 2752                         panic("vfs_mountroot: rootdev not set for DV_DISK");
 2753                 if (bdevvp(rootdev, &rootvp))
 2754                         panic("vfs_mountroot: can't get vnode for rootdev");
 2755                 error = VOP_OPEN(rootvp, FREAD, FSCRED, curproc);
 2756                 if (error) {
 2757                         printf("vfs_mountroot: can't open root device\n");
 2758                         return (error);
 2759                 }
 2760                 break;
 2761 
 2762         default:
 2763                 printf("%s: inappropriate for root file system\n",
 2764                     root_device->dv_xname);
 2765                 return (ENODEV);
 2766         }
 2767 
 2768         /*
 2769          * If user specified a file system, use it.
 2770          */
 2771         if (mountroot != NULL) {
 2772                 error = (*mountroot)();
 2773                 goto done;
 2774         }
 2775 
 2776         /*
 2777          * Try each file system currently configured into the kernel.
 2778          */
 2779         LIST_FOREACH(v, &vfs_list, vfs_list) {
 2780                 if (v->vfs_mountroot == NULL)
 2781                         continue;
 2782 #ifdef DEBUG
 2783                 aprint_normal("mountroot: trying %s...\n", v->vfs_name);
 2784 #endif
 2785                 error = (*v->vfs_mountroot)();
 2786                 if (!error) {
 2787                         aprint_normal("root file system type: %s\n",
 2788                             v->vfs_name);
 2789                         break;
 2790                 }
 2791         }
 2792 
 2793         if (v == NULL) {
 2794                 printf("no file system for %s", root_device->dv_xname);
 2795                 if (root_device->dv_class == DV_DISK)
 2796                         printf(" (dev 0x%x)", rootdev);
 2797                 printf("\n");
 2798                 error = EFTYPE;
 2799         }
 2800 
 2801 done:
 2802         if (error && root_device->dv_class == DV_DISK) {
 2803                 VOP_CLOSE(rootvp, FREAD, FSCRED, curproc);
 2804                 vrele(rootvp);
 2805         }
 2806         return (error);
 2807 }
 2808 
 2809 /*
 2810  * Given a file system name, look up the vfsops for that
 2811  * file system, or return NULL if file system isn't present
 2812  * in the kernel.
 2813  */
 2814 struct vfsops *
 2815 vfs_getopsbyname(name)
 2816         const char *name;
 2817 {
 2818         struct vfsops *v;
 2819 
 2820         LIST_FOREACH(v, &vfs_list, vfs_list) {
 2821                 if (strcmp(v->vfs_name, name) == 0)
 2822                         break;
 2823         }
 2824 
 2825         return (v);
 2826 }
 2827 
 2828 /*
 2829  * Establish a file system and initialize it.
 2830  */
 2831 int
 2832 vfs_attach(vfs)
 2833         struct vfsops *vfs;
 2834 {
 2835         struct vfsops *v;
 2836         int error = 0;
 2837 
 2838 
 2839         /*
 2840          * Make sure this file system doesn't already exist.
 2841          */
 2842         LIST_FOREACH(v, &vfs_list, vfs_list) {
 2843                 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
 2844                         error = EEXIST;
 2845                         goto out;
 2846                 }
 2847         }
 2848 
 2849         /*
 2850          * Initialize the vnode operations for this file system.
 2851          */
 2852         vfs_opv_init(vfs->vfs_opv_descs);
 2853 
 2854         /*
 2855          * Now initialize the file system itself.
 2856          */
 2857         (*vfs->vfs_init)();
 2858 
 2859         /*
 2860          * ...and link it into the kernel's list.
 2861          */
 2862         LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
 2863 
 2864         /*
 2865          * Sanity: make sure the reference count is 0.
 2866          */
 2867         vfs->vfs_refcount = 0;
 2868 
 2869  out:
 2870         return (error);
 2871 }
 2872 
 2873 /*
 2874  * Remove a file system from the kernel.
 2875  */
 2876 int
 2877 vfs_detach(vfs)
 2878         struct vfsops *vfs;
 2879 {
 2880         struct vfsops *v;
 2881 
 2882         /*
 2883          * Make sure no one is using the filesystem.
 2884          */
 2885         if (vfs->vfs_refcount != 0)
 2886                 return (EBUSY);
 2887 
 2888         /*
 2889          * ...and remove it from the kernel's list.
 2890          */
 2891         LIST_FOREACH(v, &vfs_list, vfs_list) {
 2892                 if (v == vfs) {
 2893                         LIST_REMOVE(v, vfs_list);
 2894                         break;
 2895                 }
 2896         }
 2897 
 2898         if (v == NULL)
 2899                 return (ESRCH);
 2900 
 2901         /*
 2902          * Now run the file system-specific cleanups.
 2903          */
 2904         (*vfs->vfs_done)();
 2905 
 2906         /*
 2907          * Free the vnode operations vector.
 2908          */
 2909         vfs_opv_free(vfs->vfs_opv_descs);
 2910         return (0);
 2911 }
 2912 
 2913 void
 2914 vfs_reinit(void)
 2915 {
 2916         struct vfsops *vfs;
 2917 
 2918         LIST_FOREACH(vfs, &vfs_list, vfs_list) {
 2919                 if (vfs->vfs_reinit) {
 2920                         (*vfs->vfs_reinit)();
 2921                 }
 2922         }
 2923 }
 2924 
 2925 /*
 2926  * Request a filesystem to suspend write operations.
 2927  */
 2928 int
 2929 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo)
 2930 {
 2931         struct proc *p = curproc;       /* XXX */
 2932         int error;
 2933 
 2934         while ((mp->mnt_iflag & IMNT_SUSPEND)) {
 2935                 if (slptimeo < 0)
 2936                         return EWOULDBLOCK;
 2937                 error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo);
 2938                 if (error)
 2939                         return error;
 2940         }
 2941         mp->mnt_iflag |= IMNT_SUSPEND;
 2942 
 2943         simple_lock(&mp->mnt_slock);
 2944         if (mp->mnt_writeopcountupper > 0)
 2945                 ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt",
 2946                         0, &mp->mnt_slock);
 2947         simple_unlock(&mp->mnt_slock);
 2948 
 2949         error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
 2950         if (error) {
 2951                 vfs_write_resume(mp);
 2952                 return error;
 2953         }
 2954         mp->mnt_iflag |= IMNT_SUSPENDLOW;
 2955 
 2956         simple_lock(&mp->mnt_slock);
 2957         if (mp->mnt_writeopcountlower > 0)
 2958                 ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt",
 2959                         0, &mp->mnt_slock);
 2960         mp->mnt_iflag |= IMNT_SUSPENDED;
 2961         simple_unlock(&mp->mnt_slock);
 2962 
 2963         return 0;
 2964 }
 2965 
 2966 /*
 2967  * Request a filesystem to resume write operations.
 2968  */
 2969 void
 2970 vfs_write_resume(struct mount *mp)
 2971 {
 2972 
 2973         if ((mp->mnt_iflag & IMNT_SUSPEND) == 0)
 2974                 return;
 2975         mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED);
 2976         wakeup(&mp->mnt_flag);
 2977 }
 2978 
 2979 void
 2980 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
 2981 {
 2982         const struct statvfs *mbp;
 2983 
 2984         if (sbp == (mbp = &mp->mnt_stat))
 2985                 return;
 2986 
 2987         (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
 2988         sbp->f_fsid = mbp->f_fsid;
 2989         sbp->f_owner = mbp->f_owner;
 2990         sbp->f_flag = mbp->f_flag;
 2991         sbp->f_syncwrites = mbp->f_syncwrites;
 2992         sbp->f_asyncwrites = mbp->f_asyncwrites;
 2993         sbp->f_syncreads = mbp->f_syncreads;
 2994         sbp->f_asyncreads = mbp->f_asyncreads;
 2995         (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
 2996         (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
 2997             sizeof(sbp->f_fstypename));
 2998         (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
 2999             sizeof(sbp->f_mntonname));
 3000         (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
 3001             sizeof(sbp->f_mntfromname));
 3002         sbp->f_namemax = mbp->f_namemax;
 3003 }
 3004 
 3005 int
 3006 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
 3007     struct mount *mp, struct proc *p)
 3008 {
 3009         int error;
 3010         size_t size;
 3011         struct statvfs *sfs = &mp->mnt_stat;
 3012         int (*fun)(const void *, void *, size_t, size_t *);
 3013 
 3014         (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name,
 3015             sizeof(mp->mnt_stat.f_fstypename));
 3016 
 3017         if (onp) {
 3018                 struct cwdinfo *cwdi = p->p_cwdi;
 3019                 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
 3020                 if (cwdi->cwdi_rdir != NULL) {
 3021                         size_t len;
 3022                         char *bp;
 3023                         char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 3024 
 3025                         if (!path) /* XXX can't happen with M_WAITOK */
 3026                                 return ENOMEM;
 3027 
 3028                         bp = path + MAXPATHLEN;
 3029                         *--bp = '\0';
 3030                         error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
 3031                             path, MAXPATHLEN / 2, 0, p);
 3032                         if (error) {
 3033                                 free(path, M_TEMP);
 3034                                 return error;
 3035                         }
 3036 
 3037                         len = strlen(bp);
 3038                         if (len > sizeof(sfs->f_mntonname) - 1)
 3039                                 len = sizeof(sfs->f_mntonname) - 1;
 3040                         (void)strncpy(sfs->f_mntonname, bp, len);
 3041                         free(path, M_TEMP);
 3042 
 3043                         if (len < sizeof(sfs->f_mntonname) - 1) {
 3044                                 error = (*fun)(onp, &sfs->f_mntonname[len],
 3045                                     sizeof(sfs->f_mntonname) - len - 1, &size);
 3046                                 if (error)
 3047                                         return error;
 3048                                 size += len;
 3049                         } else {
 3050                                 size = len;
 3051                         }
 3052                 } else {
 3053                         error = (*fun)(onp, &sfs->f_mntonname,
 3054                             sizeof(sfs->f_mntonname) - 1, &size);
 3055                         if (error)
 3056                                 return error;
 3057                 }
 3058                 (void)memset(sfs->f_mntonname + size, 0,
 3059                     sizeof(sfs->f_mntonname) - size);
 3060         }
 3061 
 3062         if (fromp) {
 3063                 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
 3064                 error = (*fun)(fromp, sfs->f_mntfromname,
 3065                     sizeof(sfs->f_mntfromname) - 1, &size);
 3066                 if (error)
 3067                         return error;
 3068                 (void)memset(sfs->f_mntfromname + size, 0,
 3069                     sizeof(sfs->f_mntfromname) - size);
 3070         }
 3071         return 0;
 3072 }
 3073 
 3074 /*
 3075  * Default vfs_extattrctl routine for file systems that do not support
 3076  * it.
 3077  */
 3078 /*ARGSUSED*/
 3079 int
 3080 vfs_stdextattrctl(struct mount *mp, int cmt, struct vnode *vp,
 3081     int attrnamespace, const char *attrname, struct proc *p)
 3082 {
 3083 
 3084         if (vp != NULL)
 3085                 VOP_UNLOCK(vp, 0);
 3086         return (EOPNOTSUPP);
 3087 }
 3088 
 3089 /*
 3090  * Credential check based on process requesting service, and per-attribute
 3091  * permissions.
 3092  *
 3093  * NOTE: Vnode must be locked.
 3094  */
 3095 int
 3096 extattr_check_cred(struct vnode *vp, int attrnamespace,
 3097     struct ucred *cred, struct proc *p, int access)
 3098 {
 3099 
 3100         if (cred == NOCRED)
 3101                 return (0);
 3102 
 3103         switch (attrnamespace) {
 3104         case EXTATTR_NAMESPACE_SYSTEM:
 3105                 /*
 3106                  * Do we really want to allow this, or just require that
 3107                  * these requests come from kernel code (NOCRED case above)?
 3108                  */
 3109                 return (suser(cred, &p->p_acflag));
 3110 
 3111         case EXTATTR_NAMESPACE_USER:
 3112                 return (VOP_ACCESS(vp, access, cred, p));
 3113 
 3114         default:
 3115                 return (EPERM);
 3116         }
 3117 }
 3118 
 3119 #ifdef DDB
 3120 const char buf_flagbits[] = BUF_FLAGBITS;
 3121 
 3122 void
 3123 vfs_buf_print(bp, full, pr)
 3124         struct buf *bp;
 3125         int full;
 3126         void (*pr)(const char *, ...);
 3127 {
 3128         char buf[1024];
 3129 
 3130         (*pr)("  vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n",
 3131                   bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
 3132 
 3133         bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
 3134         (*pr)("  error %d flags 0x%s\n", bp->b_error, buf);
 3135 
 3136         (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
 3137                   bp->b_bufsize, bp->b_bcount, bp->b_resid);
 3138         (*pr)("  data %p saveaddr %p dep %p\n",
 3139                   bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
 3140         (*pr)("  iodone %p\n", bp->b_iodone);
 3141 }
 3142 
 3143 
 3144 void
 3145 vfs_vnode_print(vp, full, pr)
 3146         struct vnode *vp;
 3147         int full;
 3148         void (*pr)(const char *, ...);
 3149 {
 3150         char buf[256];
 3151 
 3152         uvm_object_printit(&vp->v_uobj, full, pr);
 3153         bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
 3154         (*pr)("\nVNODE flags %s\n", buf);
 3155         (*pr)("mp %p numoutput %d size 0x%llx\n",
 3156               vp->v_mount, vp->v_numoutput, vp->v_size);
 3157 
 3158         (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
 3159               vp->v_data, vp->v_usecount, vp->v_writecount,
 3160               vp->v_holdcnt, vp->v_numoutput);
 3161 
 3162         (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
 3163               ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
 3164               ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
 3165               vp->v_mount, vp->v_mountedhere);
 3166 
 3167         if (full) {
 3168                 struct buf *bp;
 3169 
 3170                 (*pr)("clean bufs:\n");
 3171                 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
 3172                         (*pr)(" bp %p\n", bp);
 3173                         vfs_buf_print(bp, full, pr);
 3174                 }
 3175 
 3176                 (*pr)("dirty bufs:\n");
 3177                 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
 3178                         (*pr)(" bp %p\n", bp);
 3179                         vfs_buf_print(bp, full, pr);
 3180                 }
 3181         }
 3182 }
 3183 
 3184 void
 3185 vfs_mount_print(mp, full, pr)
 3186         struct mount *mp;
 3187         int full;
 3188         void (*pr)(const char *, ...);
 3189 {
 3190         char sbuf[256];
 3191 
 3192         (*pr)("vnodecovered = %p syncer = %p data = %p\n",
 3193                         mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
 3194 
 3195         (*pr)("fs_bshift %d dev_bshift = %d\n",
 3196                         mp->mnt_fs_bshift,mp->mnt_dev_bshift);
 3197 
 3198         bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
 3199         (*pr)("flag = %s\n", sbuf);
 3200 
 3201         bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf));
 3202         (*pr)("iflag = %s\n", sbuf);
 3203 
 3204         /* XXX use lockmgr_printinfo */
 3205         if (mp->mnt_lock.lk_sharecount)
 3206                 (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg,
 3207                     mp->mnt_lock.lk_sharecount);
 3208         else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) {
 3209                 (*pr)(" lock type %s: EXCL (count %d) by ",
 3210                     mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount);
 3211                 if (mp->mnt_lock.lk_flags & LK_SPIN)
 3212                         (*pr)("processor %lu", mp->mnt_lock.lk_cpu);
 3213                 else
 3214                         (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder,
 3215                             mp->mnt_lock.lk_locklwp);
 3216         } else
 3217                 (*pr)(" not locked");
 3218         if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0)
 3219                 (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount);
 3220 
 3221         (*pr)("\n");
 3222 
 3223         if (mp->mnt_unmounter) {
 3224                 (*pr)("unmounter pid = %d ",mp->mnt_unmounter->p_pid);
 3225         }
 3226         (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n",
 3227                 mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower);
 3228 
 3229         (*pr)("statvfs cache:\n");
 3230         (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
 3231         (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
 3232         (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
 3233 
 3234         (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
 3235         (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
 3236         (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
 3237         (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
 3238 
 3239         (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
 3240         (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
 3241         (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
 3242         (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
 3243 
 3244         (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
 3245                         mp->mnt_stat.f_fsidx.__fsid_val[0],
 3246                         mp->mnt_stat.f_fsidx.__fsid_val[1]);
 3247 
 3248         (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
 3249         (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
 3250 
 3251         bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf,
 3252             sizeof(sbuf));
 3253         (*pr)("\tflag = %s\n",sbuf);
 3254         (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
 3255         (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
 3256         (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
 3257         (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
 3258         (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
 3259         (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
 3260         (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
 3261 
 3262         {
 3263                 int cnt = 0;
 3264                 struct vnode *vp;
 3265                 (*pr)("locked vnodes =");
 3266                 /* XXX would take mountlist lock, except ddb may not have context */
 3267                 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 3268                         if (VOP_ISLOCKED(vp)) {
 3269                                 if ((++cnt % 6) == 0) {
 3270                                         (*pr)(" %p,\n\t", vp);
 3271                                 } else {
 3272                                         (*pr)(" %p,", vp);
 3273                                 }
 3274                         }
 3275                 }
 3276                 (*pr)("\n");
 3277         }
 3278 
 3279         if (full) {
 3280                 int cnt = 0;
 3281                 struct vnode *vp;
 3282                 (*pr)("all vnodes =");
 3283                 /* XXX would take mountlist lock, except ddb may not have context */
 3284                 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 3285                         if (!LIST_NEXT(vp, v_mntvnodes)) {
 3286                                 (*pr)(" %p", vp);
 3287                         } else if ((++cnt % 6) == 0) {
 3288                                 (*pr)(" %p,\n\t", vp);
 3289                         } else {
 3290                                 (*pr)(" %p,", vp);
 3291                         }
 3292                 }
 3293                 (*pr)("\n", vp);
 3294         }
 3295 }
 3296 
 3297 #endif

Cache object: 18202f87804353b67c1d2d03c458e7e9


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.