The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_subr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: vfs_subr.c,v 1.218.2.5 2005/12/29 01:37:32 riz Exp $   */
    2 
    3 /*-
    4  * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
    9  * NASA Ames Research Center.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. All advertising materials mentioning features or use of this software
   20  *    must display the following acknowledgement:
   21  *      This product includes software developed by the NetBSD
   22  *      Foundation, Inc. and its contributors.
   23  * 4. Neither the name of The NetBSD Foundation nor the names of its
   24  *    contributors may be used to endorse or promote products derived
   25  *    from this software without specific prior written permission.
   26  *
   27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   37  * POSSIBILITY OF SUCH DAMAGE.
   38  */
   39 
   40 /*
   41  * Copyright (c) 1989, 1993
   42  *      The Regents of the University of California.  All rights reserved.
   43  * (c) UNIX System Laboratories, Inc.
   44  * All or some portions of this file are derived from material licensed
   45  * to the University of California by American Telephone and Telegraph
   46  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   47  * the permission of UNIX System Laboratories, Inc.
   48  *
   49  * Redistribution and use in source and binary forms, with or without
   50  * modification, are permitted provided that the following conditions
   51  * are met:
   52  * 1. Redistributions of source code must retain the above copyright
   53  *    notice, this list of conditions and the following disclaimer.
   54  * 2. Redistributions in binary form must reproduce the above copyright
   55  *    notice, this list of conditions and the following disclaimer in the
   56  *    documentation and/or other materials provided with the distribution.
   57  * 3. Neither the name of the University nor the names of its contributors
   58  *    may be used to endorse or promote products derived from this software
   59  *    without specific prior written permission.
   60  *
   61  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   62  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   63  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   64  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   65  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   66  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   67  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   68  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   69  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   70  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   71  * SUCH DAMAGE.
   72  *
   73  *      @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
   74  */
   75 
   76 /*
   77  * External virtual filesystem routines
   78  */
   79 
   80 #include <sys/cdefs.h>
   81 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.218.2.5 2005/12/29 01:37:32 riz Exp $");
   82 
   83 #include "opt_inet.h"
   84 #include "opt_ddb.h"
   85 #include "opt_compat_netbsd.h"
   86 #include "opt_compat_43.h"
   87 
   88 #include <sys/param.h>
   89 #include <sys/systm.h>
   90 #include <sys/proc.h>
   91 #include <sys/kernel.h>
   92 #include <sys/mount.h>
   93 #include <sys/time.h>
   94 #include <sys/event.h>
   95 #include <sys/fcntl.h>
   96 #include <sys/vnode.h>
   97 #include <sys/stat.h>
   98 #include <sys/namei.h>
   99 #include <sys/ucred.h>
  100 #include <sys/buf.h>
  101 #include <sys/errno.h>
  102 #include <sys/malloc.h>
  103 #include <sys/domain.h>
  104 #include <sys/mbuf.h>
  105 #include <sys/sa.h>
  106 #include <sys/syscallargs.h>
  107 #include <sys/device.h>
  108 #include <sys/dirent.h>
  109 #include <sys/filedesc.h>
  110 
  111 #include <miscfs/specfs/specdev.h>
  112 #include <miscfs/genfs/genfs.h>
  113 #include <miscfs/syncfs/syncfs.h>
  114 
  115 #include <netinet/in.h>
  116 
  117 #include <uvm/uvm.h>
  118 #include <uvm/uvm_ddb.h>
  119 
  120 #include <netinet/in.h>
  121 
  122 #include <sys/sysctl.h>
  123 
  124 const enum vtype iftovt_tab[16] = {
  125         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
  126         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
  127 };
  128 const int       vttoif_tab[9] = {
  129         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
  130         S_IFSOCK, S_IFIFO, S_IFMT,
  131 };
  132 
  133 int doforce = 1;                /* 1 => permit forcible unmounting */
  134 int prtactive = 0;              /* 1 => print out reclaim of active vnodes */
  135 
  136 extern int dovfsusermount;      /* 1 => permit any user to mount filesystems */
  137 
  138 /*
  139  * Insq/Remq for the vnode usage lists.
  140  */
  141 #define bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
  142 #define bufremvn(bp) {                                                  \
  143         LIST_REMOVE(bp, b_vnbufs);                                      \
  144         (bp)->b_vnbufs.le_next = NOLIST;                                \
  145 }
  146 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */
  147 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
  148 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
  149 
  150 struct mntlist mountlist =                      /* mounted filesystem list */
  151     CIRCLEQ_HEAD_INITIALIZER(mountlist);
  152 struct vfs_list_head vfs_list =                 /* vfs list */
  153     LIST_HEAD_INITIALIZER(vfs_list);
  154 
  155 struct nfs_public nfs_pub;                      /* publicly exported FS */
  156 
  157 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER;
  158 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER;
  159 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER;
  160 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER;
  161 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER;
  162 
  163 /* XXX - gross; single global lock to protect v_numoutput */
  164 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER;
  165 
  166 /*
  167  * These define the root filesystem and device.
  168  */
  169 struct mount *rootfs;
  170 struct vnode *rootvnode;
  171 struct device *root_device;                     /* root device */
  172 
  173 struct pool vnode_pool;                         /* memory pool for vnodes */
  174 
  175 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
  176 
  177 /*
  178  * Local declarations.
  179  */
  180 void insmntque(struct vnode *, struct mount *);
  181 int getdevvp(dev_t, struct vnode **, enum vtype);
  182 void vgoneall(struct vnode *);
  183 
  184 void vclean(struct vnode *, int, struct proc *);
  185 
  186 static int vfs_hang_addrlist(struct mount *, struct netexport *,
  187                              struct export_args *);
  188 static int vfs_free_netcred(struct radix_node *, void *);
  189 static void vfs_free_addrlist(struct netexport *);
  190 static struct vnode *getcleanvnode(struct proc *);
  191 
  192 #ifdef DEBUG
  193 void printlockedvnodes(void);
  194 #endif
  195 
  196 /*
  197  * Initialize the vnode management data structures.
  198  */
  199 void
  200 vntblinit()
  201 {
  202 
  203         pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl",
  204             &pool_allocator_nointr);
  205 
  206         /*
  207          * Initialize the filesystem syncer.
  208          */
  209         vn_initialize_syncerd();
  210 }
  211 
  212 int
  213 vfs_drainvnodes(long target, struct proc *p)
  214 {
  215 
  216         simple_lock(&vnode_free_list_slock);
  217         while (numvnodes > target) {
  218                 struct vnode *vp;
  219 
  220                 vp = getcleanvnode(p);
  221                 if (vp == NULL)
  222                         return EBUSY; /* give up */
  223                 pool_put(&vnode_pool, vp);
  224                 simple_lock(&vnode_free_list_slock);
  225                 numvnodes--;
  226         }
  227         simple_unlock(&vnode_free_list_slock);
  228 
  229         return 0;
  230 }
  231 
  232 /*
  233  * grab a vnode from freelist and clean it.
  234  */
  235 struct vnode *
  236 getcleanvnode(p)
  237         struct proc *p;
  238 {
  239         struct vnode *vp;
  240         struct mount *mp;
  241         struct freelst *listhd;
  242 
  243         LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock));
  244         if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL)
  245                 vp = TAILQ_FIRST(listhd = &vnode_hold_list);
  246         for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) {
  247                 if (!simple_lock_try(&vp->v_interlock))
  248                         continue;
  249                 /*
  250                  * as our lwp might hold the underlying vnode locked,
  251                  * don't try to reclaim the VLAYER vnode if it's locked.
  252                  */
  253                 if ((vp->v_flag & VXLOCK) == 0 &&
  254                     ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
  255                         if (vn_start_write(vp, &mp, V_NOWAIT) == 0)
  256                                 break;
  257                 }
  258                 mp = NULL;
  259                 simple_unlock(&vp->v_interlock);
  260         }
  261 
  262         if (vp == NULLVP) {
  263                 simple_unlock(&vnode_free_list_slock);
  264                 return NULLVP;
  265         }
  266 
  267         if (vp->v_usecount)
  268                 panic("free vnode isn't, vp %p", vp);
  269         TAILQ_REMOVE(listhd, vp, v_freelist);
  270         /* see comment on why 0xdeadb is set at end of vgone (below) */
  271         vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
  272         simple_unlock(&vnode_free_list_slock);
  273         vp->v_lease = NULL;
  274 
  275         if (vp->v_type != VBAD)
  276                 vgonel(vp, p);
  277         else
  278                 simple_unlock(&vp->v_interlock);
  279         vn_finished_write(mp, 0);
  280 #ifdef DIAGNOSTIC
  281         if (vp->v_data || vp->v_uobj.uo_npages ||
  282             TAILQ_FIRST(&vp->v_uobj.memq))
  283                 panic("cleaned vnode isn't, vp %p", vp);
  284         if (vp->v_numoutput)
  285                 panic("clean vnode has pending I/O's, vp %p", vp);
  286 #endif
  287         KASSERT((vp->v_flag & VONWORKLST) == 0);
  288 
  289         return vp;
  290 }
  291 
  292 /*
  293  * Mark a mount point as busy. Used to synchronize access and to delay
  294  * unmounting. Interlock is not released on failure.
  295  */
  296 int
  297 vfs_busy(mp, flags, interlkp)
  298         struct mount *mp;
  299         int flags;
  300         struct simplelock *interlkp;
  301 {
  302         int lkflags;
  303 
  304         while (mp->mnt_iflag & IMNT_UNMOUNT) {
  305                 int gone;
  306 
  307                 if (flags & LK_NOWAIT)
  308                         return (ENOENT);
  309                 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL
  310                     && mp->mnt_unmounter == curproc)
  311                         return (EDEADLK);
  312                 if (interlkp)
  313                         simple_unlock(interlkp);
  314                 /*
  315                  * Since all busy locks are shared except the exclusive
  316                  * lock granted when unmounting, the only place that a
  317                  * wakeup needs to be done is at the release of the
  318                  * exclusive lock at the end of dounmount.
  319                  *
  320                  * XXX MP: add spinlock protecting mnt_wcnt here once you
  321                  * can atomically unlock-and-sleep.
  322                  */
  323                 mp->mnt_wcnt++;
  324                 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
  325                 mp->mnt_wcnt--;
  326                 gone = mp->mnt_iflag & IMNT_GONE;
  327 
  328                 if (mp->mnt_wcnt == 0)
  329                         wakeup(&mp->mnt_wcnt);
  330                 if (interlkp)
  331                         simple_lock(interlkp);
  332                 if (gone)
  333                         return (ENOENT);
  334         }
  335         lkflags = LK_SHARED;
  336         if (interlkp)
  337                 lkflags |= LK_INTERLOCK;
  338         if (lockmgr(&mp->mnt_lock, lkflags, interlkp))
  339                 panic("vfs_busy: unexpected lock failure");
  340         return (0);
  341 }
  342 
  343 /*
  344  * Free a busy filesystem.
  345  */
  346 void
  347 vfs_unbusy(mp)
  348         struct mount *mp;
  349 {
  350 
  351         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL);
  352 }
  353 
  354 /*
  355  * Lookup a filesystem type, and if found allocate and initialize
  356  * a mount structure for it.
  357  *
  358  * Devname is usually updated by mount(8) after booting.
  359  */
  360 int
  361 vfs_rootmountalloc(fstypename, devname, mpp)
  362         char *fstypename;
  363         char *devname;
  364         struct mount **mpp;
  365 {
  366         struct vfsops *vfsp = NULL;
  367         struct mount *mp;
  368 
  369         LIST_FOREACH(vfsp, &vfs_list, vfs_list)
  370                 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN))
  371                         break;
  372 
  373         if (vfsp == NULL)
  374                 return (ENODEV);
  375         mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
  376         memset((char *)mp, 0, (u_long)sizeof(struct mount));
  377         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
  378         (void)vfs_busy(mp, LK_NOWAIT, 0);
  379         LIST_INIT(&mp->mnt_vnodelist);
  380         mp->mnt_op = vfsp;
  381         mp->mnt_flag = MNT_RDONLY;
  382         mp->mnt_vnodecovered = NULLVP;
  383         vfsp->vfs_refcount++;
  384         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN);
  385         mp->mnt_stat.f_mntonname[0] = '/';
  386         (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
  387         *mpp = mp;
  388         return (0);
  389 }
  390 
  391 /*
  392  * Lookup a mount point by filesystem identifier.
  393  */
  394 struct mount *
  395 vfs_getvfs(fsid)
  396         fsid_t *fsid;
  397 {
  398         struct mount *mp;
  399 
  400         simple_lock(&mountlist_slock);
  401         CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
  402                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
  403                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
  404                         simple_unlock(&mountlist_slock);
  405                         return (mp);
  406                 }
  407         }
  408         simple_unlock(&mountlist_slock);
  409         return ((struct mount *)0);
  410 }
  411 
  412 /*
  413  * Get a new unique fsid
  414  */
  415 void
  416 vfs_getnewfsid(mp)
  417         struct mount *mp;
  418 {
  419         static u_short xxxfs_mntid;
  420         fsid_t tfsid;
  421         int mtype;
  422 
  423         simple_lock(&mntid_slock);
  424         mtype = makefstype(mp->mnt_op->vfs_name);
  425         mp->mnt_stat.f_fsid.val[0] = makedev(mtype, 0);
  426         mp->mnt_stat.f_fsid.val[1] = mtype;
  427         if (xxxfs_mntid == 0)
  428                 ++xxxfs_mntid;
  429         tfsid.val[0] = makedev(mtype & 0xff, xxxfs_mntid);
  430         tfsid.val[1] = mtype;
  431         if (!CIRCLEQ_EMPTY(&mountlist)) {
  432                 while (vfs_getvfs(&tfsid)) {
  433                         tfsid.val[0]++;
  434                         xxxfs_mntid++;
  435                 }
  436         }
  437         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
  438         simple_unlock(&mntid_slock);
  439 }
  440 
  441 /*
  442  * Make a 'unique' number from a mount type name.
  443  */
  444 long
  445 makefstype(type)
  446         const char *type;
  447 {
  448         long rv;
  449 
  450         for (rv = 0; *type; type++) {
  451                 rv <<= 2;
  452                 rv ^= *type;
  453         }
  454         return rv;
  455 }
  456 
  457 
  458 /*
  459  * Set vnode attributes to VNOVAL
  460  */
  461 void
  462 vattr_null(vap)
  463         struct vattr *vap;
  464 {
  465 
  466         vap->va_type = VNON;
  467 
  468         /*
  469          * Assign individually so that it is safe even if size and
  470          * sign of each member are varied.
  471          */
  472         vap->va_mode = VNOVAL;
  473         vap->va_nlink = VNOVAL;
  474         vap->va_uid = VNOVAL;
  475         vap->va_gid = VNOVAL;
  476         vap->va_fsid = VNOVAL;
  477         vap->va_fileid = VNOVAL;
  478         vap->va_size = VNOVAL;
  479         vap->va_blocksize = VNOVAL;
  480         vap->va_atime.tv_sec =
  481             vap->va_mtime.tv_sec =
  482             vap->va_ctime.tv_sec =
  483             vap->va_birthtime.tv_sec = VNOVAL;
  484         vap->va_atime.tv_nsec =
  485             vap->va_mtime.tv_nsec =
  486             vap->va_ctime.tv_nsec =
  487             vap->va_birthtime.tv_nsec = VNOVAL;
  488         vap->va_gen = VNOVAL;
  489         vap->va_flags = VNOVAL;
  490         vap->va_rdev = VNOVAL;
  491         vap->va_bytes = VNOVAL;
  492         vap->va_vaflags = 0;
  493 }
  494 
  495 /*
  496  * Routines having to do with the management of the vnode table.
  497  */
  498 extern int (**dead_vnodeop_p)(void *);
  499 long numvnodes;
  500 
  501 /*
  502  * Return the next vnode from the free list.
  503  */
  504 int
  505 getnewvnode(tag, mp, vops, vpp)
  506         enum vtagtype tag;
  507         struct mount *mp;
  508         int (**vops)(void *);
  509         struct vnode **vpp;
  510 {
  511         extern struct uvm_pagerops uvm_vnodeops;
  512         struct uvm_object *uobj;
  513         struct proc *p = curproc;       /* XXX */
  514         static int toggle;
  515         struct vnode *vp;
  516         int error = 0, tryalloc;
  517 
  518  try_again:
  519         if (mp) {
  520                 /*
  521                  * Mark filesystem busy while we're creating a vnode.
  522                  * If unmount is in progress, this will wait; if the
  523                  * unmount succeeds (only if umount -f), this will
  524                  * return an error.  If the unmount fails, we'll keep
  525                  * going afterwards.
  526                  * (This puts the per-mount vnode list logically under
  527                  * the protection of the vfs_busy lock).
  528                  */
  529                 error = vfs_busy(mp, LK_RECURSEFAIL, 0);
  530                 if (error && error != EDEADLK)
  531                         return error;
  532         }
  533 
  534         /*
  535          * We must choose whether to allocate a new vnode or recycle an
  536          * existing one. The criterion for allocating a new one is that
  537          * the total number of vnodes is less than the number desired or
  538          * there are no vnodes on either free list. Generally we only
  539          * want to recycle vnodes that have no buffers associated with
  540          * them, so we look first on the vnode_free_list. If it is empty,
  541          * we next consider vnodes with referencing buffers on the
  542          * vnode_hold_list. The toggle ensures that half the time we
  543          * will use a buffer from the vnode_hold_list, and half the time
  544          * we will allocate a new one unless the list has grown to twice
  545          * the desired size. We are reticent to recycle vnodes from the
  546          * vnode_hold_list because we will lose the identity of all its
  547          * referencing buffers.
  548          */
  549 
  550         vp = NULL;
  551 
  552         simple_lock(&vnode_free_list_slock);
  553 
  554         toggle ^= 1;
  555         if (numvnodes > 2 * desiredvnodes)
  556                 toggle = 0;
  557 
  558         tryalloc = numvnodes < desiredvnodes ||
  559             (TAILQ_FIRST(&vnode_free_list) == NULL &&
  560              (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
  561 
  562         if (tryalloc &&
  563             (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) {
  564                 numvnodes++;
  565                 simple_unlock(&vnode_free_list_slock);
  566                 memset(vp, 0, sizeof(*vp));
  567                 simple_lock_init(&vp->v_interlock);
  568                 uobj = &vp->v_uobj;
  569                 uobj->pgops = &uvm_vnodeops;
  570                 uobj->uo_npages = 0;
  571                 TAILQ_INIT(&uobj->memq);
  572         } else {
  573                 vp = getcleanvnode(p);
  574                 /*
  575                  * Unless this is a bad time of the month, at most
  576                  * the first NCPUS items on the free list are
  577                  * locked, so this is close enough to being empty.
  578                  */
  579                 if (vp == NULLVP) {
  580                         if (mp && error != EDEADLK)
  581                                 vfs_unbusy(mp);
  582                         if (tryalloc) {
  583                                 printf("WARNING: unable to allocate new "
  584                                     "vnode, retrying...\n");
  585                                 (void) tsleep(&lbolt, PRIBIO, "newvn", hz);
  586                                 goto try_again;
  587                         }
  588                         tablefull("vnode", "increase kern.maxvnodes or NVNODE");
  589                         *vpp = 0;
  590                         return (ENFILE);
  591                 }
  592                 vp->v_flag = 0;
  593                 vp->v_socket = NULL;
  594 #ifdef VERIFIED_EXEC
  595                 vp->fp_status = FINGERPRINT_INVALID;
  596 #endif
  597         }
  598         vp->v_type = VNON;
  599         vp->v_vnlock = &vp->v_lock;
  600         lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
  601         cache_purge(vp);
  602         vp->v_tag = tag;
  603         vp->v_op = vops;
  604         insmntque(vp, mp);
  605         *vpp = vp;
  606         vp->v_usecount = 1;
  607         vp->v_data = 0;
  608         simple_lock_init(&vp->v_uobj.vmobjlock);
  609 
  610         /*
  611          * initialize uvm_object within vnode.
  612          */
  613 
  614         uobj = &vp->v_uobj;
  615         KASSERT(uobj->pgops == &uvm_vnodeops);
  616         KASSERT(uobj->uo_npages == 0);
  617         KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
  618         vp->v_size = VSIZENOTSET;
  619 
  620         if (mp && error != EDEADLK)
  621                 vfs_unbusy(mp);
  622         return (0);
  623 }
  624 
  625 /*
  626  * This is really just the reverse of getnewvnode(). Needed for
  627  * VFS_VGET functions who may need to push back a vnode in case
  628  * of a locking race.
  629  */
  630 void
  631 ungetnewvnode(vp)
  632         struct vnode *vp;
  633 {
  634 #ifdef DIAGNOSTIC
  635         if (vp->v_usecount != 1)
  636                 panic("ungetnewvnode: busy vnode");
  637 #endif
  638         vp->v_usecount--;
  639         insmntque(vp, NULL);
  640         vp->v_type = VBAD;
  641 
  642         simple_lock(&vp->v_interlock);
  643         /*
  644          * Insert at head of LRU list
  645          */
  646         simple_lock(&vnode_free_list_slock);
  647         if (vp->v_holdcnt > 0)
  648                 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist);
  649         else
  650                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
  651         simple_unlock(&vnode_free_list_slock);
  652         simple_unlock(&vp->v_interlock);
  653 }
  654 
  655 /*
  656  * Move a vnode from one mount queue to another.
  657  */
  658 void
  659 insmntque(vp, mp)
  660         struct vnode *vp;
  661         struct mount *mp;
  662 {
  663 
  664 #ifdef DIAGNOSTIC
  665         if ((mp != NULL) &&
  666             (mp->mnt_iflag & IMNT_UNMOUNT) &&
  667             !(mp->mnt_flag & MNT_SOFTDEP) &&
  668             vp->v_tag != VT_VFS) {
  669                 panic("insmntque into dying filesystem");
  670         }
  671 #endif
  672 
  673         simple_lock(&mntvnode_slock);
  674         /*
  675          * Delete from old mount point vnode list, if on one.
  676          */
  677         if (vp->v_mount != NULL)
  678                 LIST_REMOVE(vp, v_mntvnodes);
  679         /*
  680          * Insert into list of vnodes for the new mount point, if available.
  681          */
  682         if ((vp->v_mount = mp) != NULL)
  683                 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
  684         simple_unlock(&mntvnode_slock);
  685 }
  686 
  687 /*
  688  * Update outstanding I/O count and do wakeup if requested.
  689  */
  690 void
  691 vwakeup(bp)
  692         struct buf *bp;
  693 {
  694         struct vnode *vp;
  695 
  696         if ((vp = bp->b_vp) != NULL) {
  697                 /* XXX global lock hack
  698                  * can't use v_interlock here since this is called
  699                  * in interrupt context from biodone().
  700                  */
  701                 simple_lock(&global_v_numoutput_slock);
  702                 if (--vp->v_numoutput < 0)
  703                         panic("vwakeup: neg numoutput, vp %p", vp);
  704                 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
  705                         vp->v_flag &= ~VBWAIT;
  706                         wakeup((caddr_t)&vp->v_numoutput);
  707                 }
  708                 simple_unlock(&global_v_numoutput_slock);
  709         }
  710 }
  711 
  712 /*
  713  * Flush out and invalidate all buffers associated with a vnode.
  714  * Called with the underlying vnode locked, which should prevent new dirty
  715  * buffers from being queued.
  716  */
  717 int
  718 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
  719         struct vnode *vp;
  720         int flags;
  721         struct ucred *cred;
  722         struct proc *p;
  723         int slpflag, slptimeo;
  724 {
  725         struct buf *bp, *nbp;
  726         int s, error;
  727         int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
  728                 (flags & V_SAVE ? PGO_CLEANIT : 0);
  729 
  730         /* XXXUBC this doesn't look at flags or slp* */
  731         simple_lock(&vp->v_interlock);
  732         error = VOP_PUTPAGES(vp, 0, 0, flushflags);
  733         if (error) {
  734                 return error;
  735         }
  736 
  737         if (flags & V_SAVE) {
  738                 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p);
  739                 if (error)
  740                         return (error);
  741 #ifdef DIAGNOSTIC
  742                 s = splbio();
  743                 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd))
  744                         panic("vinvalbuf: dirty bufs, vp %p", vp);
  745                 splx(s);
  746 #endif
  747         }
  748 
  749         s = splbio();
  750 
  751 restart:
  752         for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
  753                 nbp = LIST_NEXT(bp, b_vnbufs);
  754                 simple_lock(&bp->b_interlock);
  755                 if (bp->b_flags & B_BUSY) {
  756                         bp->b_flags |= B_WANTED;
  757                         error = ltsleep((caddr_t)bp,
  758                                     slpflag | (PRIBIO + 1) | PNORELOCK,
  759                                     "vinvalbuf", slptimeo, &bp->b_interlock);
  760                         if (error) {
  761                                 splx(s);
  762                                 return (error);
  763                         }
  764                         goto restart;
  765                 }
  766                 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
  767                 simple_unlock(&bp->b_interlock);
  768                 brelse(bp);
  769         }
  770 
  771         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  772                 nbp = LIST_NEXT(bp, b_vnbufs);
  773                 simple_lock(&bp->b_interlock);
  774                 if (bp->b_flags & B_BUSY) {
  775                         bp->b_flags |= B_WANTED;
  776                         error = ltsleep((caddr_t)bp,
  777                                     slpflag | (PRIBIO + 1) | PNORELOCK,
  778                                     "vinvalbuf", slptimeo, &bp->b_interlock);
  779                         if (error) {
  780                                 splx(s);
  781                                 return (error);
  782                         }
  783                         goto restart;
  784                 }
  785                 /*
  786                  * XXX Since there are no node locks for NFS, I believe
  787                  * there is a slight chance that a delayed write will
  788                  * occur while sleeping just above, so check for it.
  789                  */
  790                 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
  791 #ifdef DEBUG
  792                         printf("buffer still DELWRI\n");
  793 #endif
  794                         bp->b_flags |= B_BUSY | B_VFLUSH;
  795                         simple_unlock(&bp->b_interlock);
  796                         VOP_BWRITE(bp);
  797                         goto restart;
  798                 }
  799                 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
  800                 simple_unlock(&bp->b_interlock);
  801                 brelse(bp);
  802         }
  803 
  804 #ifdef DIAGNOSTIC
  805         if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
  806                 panic("vinvalbuf: flush failed, vp %p", vp);
  807 #endif
  808 
  809         splx(s);
  810 
  811         return (0);
  812 }
  813 
  814 /*
  815  * Destroy any in core blocks past the truncation length.
  816  * Called with the underlying vnode locked, which should prevent new dirty
  817  * buffers from being queued.
  818  */
  819 int
  820 vtruncbuf(vp, lbn, slpflag, slptimeo)
  821         struct vnode *vp;
  822         daddr_t lbn;
  823         int slpflag, slptimeo;
  824 {
  825         struct buf *bp, *nbp;
  826         int s, error;
  827         voff_t off;
  828 
  829         off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
  830         simple_lock(&vp->v_interlock);
  831         error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
  832         if (error) {
  833                 return error;
  834         }
  835 
  836         s = splbio();
  837 
  838 restart:
  839         for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
  840                 nbp = LIST_NEXT(bp, b_vnbufs);
  841                 if (bp->b_lblkno < lbn)
  842                         continue;
  843                 simple_lock(&bp->b_interlock);
  844                 if (bp->b_flags & B_BUSY) {
  845                         bp->b_flags |= B_WANTED;
  846                         error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
  847                             "vtruncbuf", slptimeo, &bp->b_interlock);
  848                         if (error) {
  849                                 splx(s);
  850                                 return (error);
  851                         }
  852                         goto restart;
  853                 }
  854                 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
  855                 simple_unlock(&bp->b_interlock);
  856                 brelse(bp);
  857         }
  858 
  859         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  860                 nbp = LIST_NEXT(bp, b_vnbufs);
  861                 if (bp->b_lblkno < lbn)
  862                         continue;
  863                 simple_lock(&bp->b_interlock);
  864                 if (bp->b_flags & B_BUSY) {
  865                         bp->b_flags |= B_WANTED;
  866                         error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK,
  867                             "vtruncbuf", slptimeo, &bp->b_interlock);
  868                         if (error) {
  869                                 splx(s);
  870                                 return (error);
  871                         }
  872                         goto restart;
  873                 }
  874                 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH;
  875                 simple_unlock(&bp->b_interlock);
  876                 brelse(bp);
  877         }
  878 
  879         splx(s);
  880 
  881         return (0);
  882 }
  883 
  884 void
  885 vflushbuf(vp, sync)
  886         struct vnode *vp;
  887         int sync;
  888 {
  889         struct buf *bp, *nbp;
  890         int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
  891         int s;
  892 
  893         simple_lock(&vp->v_interlock);
  894         (void) VOP_PUTPAGES(vp, 0, 0, flags);
  895 
  896 loop:
  897         s = splbio();
  898         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  899                 nbp = LIST_NEXT(bp, b_vnbufs);
  900                 simple_lock(&bp->b_interlock);
  901                 if ((bp->b_flags & B_BUSY)) {
  902                         simple_unlock(&bp->b_interlock);
  903                         continue;
  904                 }
  905                 if ((bp->b_flags & B_DELWRI) == 0)
  906                         panic("vflushbuf: not dirty, bp %p", bp);
  907                 bp->b_flags |= B_BUSY | B_VFLUSH;
  908                 simple_unlock(&bp->b_interlock);
  909                 splx(s);
  910                 /*
  911                  * Wait for I/O associated with indirect blocks to complete,
  912                  * since there is no way to quickly wait for them below.
  913                  */
  914                 if (bp->b_vp == vp || sync == 0)
  915                         (void) bawrite(bp);
  916                 else
  917                         (void) bwrite(bp);
  918                 goto loop;
  919         }
  920         if (sync == 0) {
  921                 splx(s);
  922                 return;
  923         }
  924         simple_lock(&global_v_numoutput_slock);
  925         while (vp->v_numoutput) {
  926                 vp->v_flag |= VBWAIT;
  927                 ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0,
  928                         &global_v_numoutput_slock);
  929         }
  930         simple_unlock(&global_v_numoutput_slock);
  931         splx(s);
  932         if (!LIST_EMPTY(&vp->v_dirtyblkhd)) {
  933                 vprint("vflushbuf: dirty", vp);
  934                 goto loop;
  935         }
  936 }
  937 
  938 /*
  939  * Associate a buffer with a vnode.
  940  */
  941 void
  942 bgetvp(vp, bp)
  943         struct vnode *vp;
  944         struct buf *bp;
  945 {
  946         int s;
  947 
  948         if (bp->b_vp)
  949                 panic("bgetvp: not free, bp %p", bp);
  950         VHOLD(vp);
  951         s = splbio();
  952         bp->b_vp = vp;
  953         if (vp->v_type == VBLK || vp->v_type == VCHR)
  954                 bp->b_dev = vp->v_rdev;
  955         else
  956                 bp->b_dev = NODEV;
  957         /*
  958          * Insert onto list for new vnode.
  959          */
  960         bufinsvn(bp, &vp->v_cleanblkhd);
  961         splx(s);
  962 }
  963 
  964 /*
  965  * Disassociate a buffer from a vnode.
  966  */
  967 void
  968 brelvp(bp)
  969         struct buf *bp;
  970 {
  971         struct vnode *vp;
  972         int s;
  973 
  974         if (bp->b_vp == NULL)
  975                 panic("brelvp: vp NULL, bp %p", bp);
  976 
  977         s = splbio();
  978         vp = bp->b_vp;
  979         /*
  980          * Delete from old vnode list, if on one.
  981          */
  982         if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
  983                 bufremvn(bp);
  984 
  985         if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) &&
  986             LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
  987                 vp->v_flag &= ~VONWORKLST;
  988                 LIST_REMOVE(vp, v_synclist);
  989         }
  990 
  991         bp->b_vp = NULL;
  992         HOLDRELE(vp);
  993         splx(s);
  994 }
  995 
  996 /*
  997  * Reassign a buffer from one vnode to another.
  998  * Used to assign file specific control information
  999  * (indirect blocks) to the vnode to which they belong.
 1000  *
 1001  * This function must be called at splbio().
 1002  */
 1003 void
 1004 reassignbuf(bp, newvp)
 1005         struct buf *bp;
 1006         struct vnode *newvp;
 1007 {
 1008         struct buflists *listheadp;
 1009         int delay;
 1010 
 1011         /*
 1012          * Delete from old vnode list, if on one.
 1013          */
 1014         if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
 1015                 bufremvn(bp);
 1016         /*
 1017          * If dirty, put on list of dirty buffers;
 1018          * otherwise insert onto list of clean buffers.
 1019          */
 1020         if ((bp->b_flags & B_DELWRI) == 0) {
 1021                 listheadp = &newvp->v_cleanblkhd;
 1022                 if (TAILQ_EMPTY(&newvp->v_uobj.memq) &&
 1023                     (newvp->v_flag & VONWORKLST) &&
 1024                     LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
 1025                         newvp->v_flag &= ~VONWORKLST;
 1026                         LIST_REMOVE(newvp, v_synclist);
 1027                 }
 1028         } else {
 1029                 listheadp = &newvp->v_dirtyblkhd;
 1030                 if ((newvp->v_flag & VONWORKLST) == 0) {
 1031                         switch (newvp->v_type) {
 1032                         case VDIR:
 1033                                 delay = dirdelay;
 1034                                 break;
 1035                         case VBLK:
 1036                                 if (newvp->v_specmountpoint != NULL) {
 1037                                         delay = metadelay;
 1038                                         break;
 1039                                 }
 1040                                 /* fall through */
 1041                         default:
 1042                                 delay = filedelay;
 1043                                 break;
 1044                         }
 1045                         if (!newvp->v_mount ||
 1046                             (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0)
 1047                                 vn_syncer_add_to_worklist(newvp, delay);
 1048                 }
 1049         }
 1050         bufinsvn(bp, listheadp);
 1051 }
 1052 
 1053 /*
 1054  * Create a vnode for a block device.
 1055  * Used for root filesystem and swap areas.
 1056  * Also used for memory file system special devices.
 1057  */
 1058 int
 1059 bdevvp(dev, vpp)
 1060         dev_t dev;
 1061         struct vnode **vpp;
 1062 {
 1063 
 1064         return (getdevvp(dev, vpp, VBLK));
 1065 }
 1066 
 1067 /*
 1068  * Create a vnode for a character device.
 1069  * Used for kernfs and some console handling.
 1070  */
 1071 int
 1072 cdevvp(dev, vpp)
 1073         dev_t dev;
 1074         struct vnode **vpp;
 1075 {
 1076 
 1077         return (getdevvp(dev, vpp, VCHR));
 1078 }
 1079 
 1080 /*
 1081  * Create a vnode for a device.
 1082  * Used by bdevvp (block device) for root file system etc.,
 1083  * and by cdevvp (character device) for console and kernfs.
 1084  */
 1085 int
 1086 getdevvp(dev, vpp, type)
 1087         dev_t dev;
 1088         struct vnode **vpp;
 1089         enum vtype type;
 1090 {
 1091         struct vnode *vp;
 1092         struct vnode *nvp;
 1093         int error;
 1094 
 1095         if (dev == NODEV) {
 1096                 *vpp = NULLVP;
 1097                 return (0);
 1098         }
 1099         error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
 1100         if (error) {
 1101                 *vpp = NULLVP;
 1102                 return (error);
 1103         }
 1104         vp = nvp;
 1105         vp->v_type = type;
 1106         if ((nvp = checkalias(vp, dev, NULL)) != 0) {
 1107                 vput(vp);
 1108                 vp = nvp;
 1109         }
 1110         *vpp = vp;
 1111         return (0);
 1112 }
 1113 
 1114 /*
 1115  * Check to see if the new vnode represents a special device
 1116  * for which we already have a vnode (either because of
 1117  * bdevvp() or because of a different vnode representing
 1118  * the same block device). If such an alias exists, deallocate
 1119  * the existing contents and return the aliased vnode. The
 1120  * caller is responsible for filling it with its new contents.
 1121  */
 1122 struct vnode *
 1123 checkalias(nvp, nvp_rdev, mp)
 1124         struct vnode *nvp;
 1125         dev_t nvp_rdev;
 1126         struct mount *mp;
 1127 {
 1128         struct proc *p = curproc;       /* XXX */
 1129         struct vnode *vp;
 1130         struct vnode **vpp;
 1131 
 1132         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 1133                 return (NULLVP);
 1134 
 1135         vpp = &speclisth[SPECHASH(nvp_rdev)];
 1136 loop:
 1137         simple_lock(&spechash_slock);
 1138         for (vp = *vpp; vp; vp = vp->v_specnext) {
 1139                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 1140                         continue;
 1141                 /*
 1142                  * Alias, but not in use, so flush it out.
 1143                  */
 1144                 simple_lock(&vp->v_interlock);
 1145                 simple_unlock(&spechash_slock);
 1146                 if (vp->v_usecount == 0) {
 1147                         vgonel(vp, p);
 1148                         goto loop;
 1149                 }
 1150                 /*
 1151                  * What we're interested to know here is if someone else has
 1152                  * removed this vnode from the device hash list while we were
 1153                  * waiting.  This can only happen if vclean() did it, and
 1154                  * this requires the vnode to be locked.  Therefore, we use
 1155                  * LK_SLEEPFAIL and retry.
 1156                  */
 1157                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL))
 1158                         goto loop;
 1159                 simple_lock(&spechash_slock);
 1160                 break;
 1161         }
 1162         if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) {
 1163                 MALLOC(nvp->v_specinfo, struct specinfo *,
 1164                         sizeof(struct specinfo), M_VNODE, M_NOWAIT);
 1165                 /* XXX Erg. */
 1166                 if (nvp->v_specinfo == NULL) {
 1167                         simple_unlock(&spechash_slock);
 1168                         uvm_wait("checkalias");
 1169                         goto loop;
 1170                 }
 1171 
 1172                 nvp->v_rdev = nvp_rdev;
 1173                 nvp->v_hashchain = vpp;
 1174                 nvp->v_specnext = *vpp;
 1175                 nvp->v_specmountpoint = NULL;
 1176                 simple_unlock(&spechash_slock);
 1177                 nvp->v_speclockf = NULL;
 1178                 simple_lock_init(&nvp->v_spec_cow_slock);
 1179                 SLIST_INIT(&nvp->v_spec_cow_head);
 1180                 nvp->v_spec_cow_req = 0;
 1181                 nvp->v_spec_cow_count = 0;
 1182 
 1183                 *vpp = nvp;
 1184                 if (vp != NULLVP) {
 1185                         nvp->v_flag |= VALIASED;
 1186                         vp->v_flag |= VALIASED;
 1187                         vput(vp);
 1188                 }
 1189                 return (NULLVP);
 1190         }
 1191         simple_unlock(&spechash_slock);
 1192         VOP_UNLOCK(vp, 0);
 1193         simple_lock(&vp->v_interlock);
 1194         vclean(vp, 0, p);
 1195         vp->v_op = nvp->v_op;
 1196         vp->v_tag = nvp->v_tag;
 1197         vp->v_vnlock = &vp->v_lock;
 1198         lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
 1199         nvp->v_type = VNON;
 1200         insmntque(vp, mp);
 1201         return (vp);
 1202 }
 1203 
 1204 /*
 1205  * Grab a particular vnode from the free list, increment its
 1206  * reference count and lock it. If the vnode lock bit is set the
 1207  * vnode is being eliminated in vgone. In that case, we can not
 1208  * grab the vnode, so the process is awakened when the transition is
 1209  * completed, and an error returned to indicate that the vnode is no
 1210  * longer usable (possibly having been changed to a new file system type).
 1211  */
 1212 int
 1213 vget(vp, flags)
 1214         struct vnode *vp;
 1215         int flags;
 1216 {
 1217         int error;
 1218 
 1219         /*
 1220          * If the vnode is in the process of being cleaned out for
 1221          * another use, we wait for the cleaning to finish and then
 1222          * return failure. Cleaning is determined by checking that
 1223          * the VXLOCK flag is set.
 1224          */
 1225 
 1226         if ((flags & LK_INTERLOCK) == 0)
 1227                 simple_lock(&vp->v_interlock);
 1228         if (vp->v_flag & VXLOCK) {
 1229                 if (flags & LK_NOWAIT) {
 1230                         simple_unlock(&vp->v_interlock);
 1231                         return EBUSY;
 1232                 }
 1233                 vp->v_flag |= VXWANT;
 1234                 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock);
 1235                 return (ENOENT);
 1236         }
 1237         if (vp->v_usecount == 0) {
 1238                 simple_lock(&vnode_free_list_slock);
 1239                 if (vp->v_holdcnt > 0)
 1240                         TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
 1241                 else
 1242                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 1243                 simple_unlock(&vnode_free_list_slock);
 1244         }
 1245         vp->v_usecount++;
 1246 #ifdef DIAGNOSTIC
 1247         if (vp->v_usecount == 0) {
 1248                 vprint("vget", vp);
 1249                 panic("vget: usecount overflow, vp %p", vp);
 1250         }
 1251 #endif
 1252         if (flags & LK_TYPE_MASK) {
 1253                 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) {
 1254                         /*
 1255                          * must expand vrele here because we do not want
 1256                          * to call VOP_INACTIVE if the reference count
 1257                          * drops back to zero since it was never really
 1258                          * active. We must remove it from the free list
 1259                          * before sleeping so that multiple processes do
 1260                          * not try to recycle it.
 1261                          */
 1262                         simple_lock(&vp->v_interlock);
 1263                         vp->v_usecount--;
 1264                         if (vp->v_usecount > 0) {
 1265                                 simple_unlock(&vp->v_interlock);
 1266                                 return (error);
 1267                         }
 1268                         /*
 1269                          * insert at tail of LRU list
 1270                          */
 1271                         simple_lock(&vnode_free_list_slock);
 1272                         if (vp->v_holdcnt > 0)
 1273                                 TAILQ_INSERT_TAIL(&vnode_hold_list, vp,
 1274                                     v_freelist);
 1275                         else
 1276                                 TAILQ_INSERT_TAIL(&vnode_free_list, vp,
 1277                                     v_freelist);
 1278                         simple_unlock(&vnode_free_list_slock);
 1279                         simple_unlock(&vp->v_interlock);
 1280                 }
 1281                 return (error);
 1282         }
 1283         simple_unlock(&vp->v_interlock);
 1284         return (0);
 1285 }
 1286 
 1287 /*
 1288  * vput(), just unlock and vrele()
 1289  */
 1290 void
 1291 vput(vp)
 1292         struct vnode *vp;
 1293 {
 1294         struct proc *p = curproc;       /* XXX */
 1295 
 1296 #ifdef DIAGNOSTIC
 1297         if (vp == NULL)
 1298                 panic("vput: null vp");
 1299 #endif
 1300         simple_lock(&vp->v_interlock);
 1301         vp->v_usecount--;
 1302         if (vp->v_usecount > 0) {
 1303                 simple_unlock(&vp->v_interlock);
 1304                 VOP_UNLOCK(vp, 0);
 1305                 return;
 1306         }
 1307 #ifdef DIAGNOSTIC
 1308         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
 1309                 vprint("vput: bad ref count", vp);
 1310                 panic("vput: ref cnt");
 1311         }
 1312 #endif
 1313         /*
 1314          * Insert at tail of LRU list.
 1315          */
 1316         simple_lock(&vnode_free_list_slock);
 1317         if (vp->v_holdcnt > 0)
 1318                 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
 1319         else
 1320                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 1321         simple_unlock(&vnode_free_list_slock);
 1322         if (vp->v_flag & VEXECMAP) {
 1323                 uvmexp.execpages -= vp->v_uobj.uo_npages;
 1324                 uvmexp.filepages += vp->v_uobj.uo_npages;
 1325         }
 1326         vp->v_flag &= ~(VTEXT|VEXECMAP);
 1327         simple_unlock(&vp->v_interlock);
 1328         VOP_INACTIVE(vp, p);
 1329 }
 1330 
 1331 /*
 1332  * Vnode release.
 1333  * If count drops to zero, call inactive routine and return to freelist.
 1334  */
 1335 void
 1336 vrele(vp)
 1337         struct vnode *vp;
 1338 {
 1339         struct proc *p = curproc;       /* XXX */
 1340 
 1341 #ifdef DIAGNOSTIC
 1342         if (vp == NULL)
 1343                 panic("vrele: null vp");
 1344 #endif
 1345         simple_lock(&vp->v_interlock);
 1346         vp->v_usecount--;
 1347         if (vp->v_usecount > 0) {
 1348                 simple_unlock(&vp->v_interlock);
 1349                 return;
 1350         }
 1351 #ifdef DIAGNOSTIC
 1352         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
 1353                 vprint("vrele: bad ref count", vp);
 1354                 panic("vrele: ref cnt vp %p", vp);
 1355         }
 1356 #endif
 1357         /*
 1358          * Insert at tail of LRU list.
 1359          */
 1360         simple_lock(&vnode_free_list_slock);
 1361         if (vp->v_holdcnt > 0)
 1362                 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
 1363         else
 1364                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 1365         simple_unlock(&vnode_free_list_slock);
 1366         if (vp->v_flag & VEXECMAP) {
 1367                 uvmexp.execpages -= vp->v_uobj.uo_npages;
 1368                 uvmexp.filepages += vp->v_uobj.uo_npages;
 1369         }
 1370         vp->v_flag &= ~(VTEXT|VEXECMAP);
 1371         if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0)
 1372                 VOP_INACTIVE(vp, p);
 1373 }
 1374 
 1375 #ifdef DIAGNOSTIC
 1376 /*
 1377  * Page or buffer structure gets a reference.
 1378  */
 1379 void
 1380 vholdl(vp)
 1381         struct vnode *vp;
 1382 {
 1383 
 1384         /*
 1385          * If it is on the freelist and the hold count is currently
 1386          * zero, move it to the hold list. The test of the back
 1387          * pointer and the use reference count of zero is because
 1388          * it will be removed from a free list by getnewvnode,
 1389          * but will not have its reference count incremented until
 1390          * after calling vgone. If the reference count were
 1391          * incremented first, vgone would (incorrectly) try to
 1392          * close the previous instance of the underlying object.
 1393          * So, the back pointer is explicitly set to `0xdeadb' in
 1394          * getnewvnode after removing it from a freelist to ensure
 1395          * that we do not try to move it here.
 1396          */
 1397         if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
 1398             vp->v_holdcnt == 0 && vp->v_usecount == 0) {
 1399                 simple_lock(&vnode_free_list_slock);
 1400                 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 1401                 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist);
 1402                 simple_unlock(&vnode_free_list_slock);
 1403         }
 1404         vp->v_holdcnt++;
 1405 }
 1406 
 1407 /*
 1408  * Page or buffer structure frees a reference.
 1409  */
 1410 void
 1411 holdrelel(vp)
 1412         struct vnode *vp;
 1413 {
 1414 
 1415         if (vp->v_holdcnt <= 0)
 1416                 panic("holdrelel: holdcnt vp %p", vp);
 1417         vp->v_holdcnt--;
 1418 
 1419         /*
 1420          * If it is on the holdlist and the hold count drops to
 1421          * zero, move it to the free list. The test of the back
 1422          * pointer and the use reference count of zero is because
 1423          * it will be removed from a free list by getnewvnode,
 1424          * but will not have its reference count incremented until
 1425          * after calling vgone. If the reference count were
 1426          * incremented first, vgone would (incorrectly) try to
 1427          * close the previous instance of the underlying object.
 1428          * So, the back pointer is explicitly set to `0xdeadb' in
 1429          * getnewvnode after removing it from a freelist to ensure
 1430          * that we do not try to move it here.
 1431          */
 1432 
 1433         if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
 1434             vp->v_holdcnt == 0 && vp->v_usecount == 0) {
 1435                 simple_lock(&vnode_free_list_slock);
 1436                 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist);
 1437                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 1438                 simple_unlock(&vnode_free_list_slock);
 1439         }
 1440 }
 1441 
 1442 /*
 1443  * Vnode reference.
 1444  */
 1445 void
 1446 vref(vp)
 1447         struct vnode *vp;
 1448 {
 1449 
 1450         simple_lock(&vp->v_interlock);
 1451         if (vp->v_usecount <= 0)
 1452                 panic("vref used where vget required, vp %p", vp);
 1453         vp->v_usecount++;
 1454 #ifdef DIAGNOSTIC
 1455         if (vp->v_usecount == 0) {
 1456                 vprint("vref", vp);
 1457                 panic("vref: usecount overflow, vp %p", vp);
 1458         }
 1459 #endif
 1460         simple_unlock(&vp->v_interlock);
 1461 }
 1462 #endif /* DIAGNOSTIC */
 1463 
 1464 /*
 1465  * Remove any vnodes in the vnode table belonging to mount point mp.
 1466  *
 1467  * If FORCECLOSE is not specified, there should not be any active ones,
 1468  * return error if any are found (nb: this is a user error, not a
 1469  * system error). If FORCECLOSE is specified, detach any active vnodes
 1470  * that are found.
 1471  *
 1472  * If WRITECLOSE is set, only flush out regular file vnodes open for
 1473  * writing.
 1474  *
 1475  * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
 1476  */
 1477 #ifdef DEBUG
 1478 int busyprt = 0;        /* print out busy vnodes */
 1479 struct ctldebug debug1 = { "busyprt", &busyprt };
 1480 #endif
 1481 
 1482 int
 1483 vflush(mp, skipvp, flags)
 1484         struct mount *mp;
 1485         struct vnode *skipvp;
 1486         int flags;
 1487 {
 1488         struct proc *p = curproc;       /* XXX */
 1489         struct vnode *vp, *nvp;
 1490         int busy = 0;
 1491 
 1492         simple_lock(&mntvnode_slock);
 1493 loop:
 1494         for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
 1495                 if (vp->v_mount != mp)
 1496                         goto loop;
 1497                 nvp = LIST_NEXT(vp, v_mntvnodes);
 1498                 /*
 1499                  * Skip over a selected vnode.
 1500                  */
 1501                 if (vp == skipvp)
 1502                         continue;
 1503                 simple_lock(&vp->v_interlock);
 1504                 /*
 1505                  * Skip over a vnodes marked VSYSTEM.
 1506                  */
 1507                 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
 1508                         simple_unlock(&vp->v_interlock);
 1509                         continue;
 1510                 }
 1511                 /*
 1512                  * If WRITECLOSE is set, only flush out regular file
 1513                  * vnodes open for writing.
 1514                  */
 1515                 if ((flags & WRITECLOSE) &&
 1516                     (vp->v_writecount == 0 || vp->v_type != VREG)) {
 1517                         simple_unlock(&vp->v_interlock);
 1518                         continue;
 1519                 }
 1520                 /*
 1521                  * With v_usecount == 0, all we need to do is clear
 1522                  * out the vnode data structures and we are done.
 1523                  */
 1524                 if (vp->v_usecount == 0) {
 1525                         simple_unlock(&mntvnode_slock);
 1526                         vgonel(vp, p);
 1527                         simple_lock(&mntvnode_slock);
 1528                         continue;
 1529                 }
 1530                 /*
 1531                  * If FORCECLOSE is set, forcibly close the vnode.
 1532                  * For block or character devices, revert to an
 1533                  * anonymous device. For all other files, just kill them.
 1534                  */
 1535                 if (flags & FORCECLOSE) {
 1536                         simple_unlock(&mntvnode_slock);
 1537                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
 1538                                 vgonel(vp, p);
 1539                         } else {
 1540                                 vclean(vp, 0, p);
 1541                                 vp->v_op = spec_vnodeop_p;
 1542                                 insmntque(vp, (struct mount *)0);
 1543                         }
 1544                         simple_lock(&mntvnode_slock);
 1545                         continue;
 1546                 }
 1547 #ifdef DEBUG
 1548                 if (busyprt)
 1549                         vprint("vflush: busy vnode", vp);
 1550 #endif
 1551                 simple_unlock(&vp->v_interlock);
 1552                 busy++;
 1553         }
 1554         simple_unlock(&mntvnode_slock);
 1555         if (busy)
 1556                 return (EBUSY);
 1557         return (0);
 1558 }
 1559 
 1560 /*
 1561  * Disassociate the underlying file system from a vnode.
 1562  */
 1563 void
 1564 vclean(vp, flags, p)
 1565         struct vnode *vp;
 1566         int flags;
 1567         struct proc *p;
 1568 {
 1569         struct mount *mp;
 1570         int active;
 1571 
 1572         LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
 1573 
 1574         /*
 1575          * Check to see if the vnode is in use.
 1576          * If so we have to reference it before we clean it out
 1577          * so that its count cannot fall to zero and generate a
 1578          * race against ourselves to recycle it.
 1579          */
 1580 
 1581         if ((active = vp->v_usecount) != 0) {
 1582                 vp->v_usecount++;
 1583 #ifdef DIAGNOSTIC
 1584                 if (vp->v_usecount == 0) {
 1585                         vprint("vclean", vp);
 1586                         panic("vclean: usecount overflow");
 1587                 }
 1588 #endif
 1589         }
 1590 
 1591         /*
 1592          * Prevent the vnode from being recycled or
 1593          * brought into use while we clean it out.
 1594          */
 1595         if (vp->v_flag & VXLOCK)
 1596                 panic("vclean: deadlock, vp %p", vp);
 1597         vp->v_flag |= VXLOCK;
 1598         if (vp->v_flag & VEXECMAP) {
 1599                 uvmexp.execpages -= vp->v_uobj.uo_npages;
 1600                 uvmexp.filepages += vp->v_uobj.uo_npages;
 1601         }
 1602         vp->v_flag &= ~(VTEXT|VEXECMAP);
 1603 
 1604         /*
 1605          * Even if the count is zero, the VOP_INACTIVE routine may still
 1606          * have the object locked while it cleans it out. The VOP_LOCK
 1607          * ensures that the VOP_INACTIVE routine is done with its work.
 1608          * For active vnodes, it ensures that no other activity can
 1609          * occur while the underlying object is being cleaned out.
 1610          */
 1611         VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK);
 1612 
 1613         /*
 1614          * Clean out any cached data associated with the vnode.
 1615          * If special device, remove it from special device alias list.
 1616          * if it is on one.
 1617          */
 1618         if (flags & DOCLOSE) {
 1619                 int error;
 1620                 struct vnode *vq, *vx;
 1621 
 1622                 vn_start_write(vp, &mp, V_WAIT | V_LOWER);
 1623                 error = vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
 1624                 vn_finished_write(mp, V_LOWER);
 1625                 if (error)
 1626                         error = vinvalbuf(vp, 0, NOCRED, p, 0, 0);
 1627                 KASSERT(error == 0);
 1628                 KASSERT((vp->v_flag & VONWORKLST) == 0);
 1629 
 1630                 if (active)
 1631                         VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
 1632 
 1633                 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
 1634                     vp->v_specinfo != 0) {
 1635                         simple_lock(&spechash_slock);
 1636                         if (vp->v_hashchain != NULL) {
 1637                                 if (*vp->v_hashchain == vp) {
 1638                                         *vp->v_hashchain = vp->v_specnext;
 1639                                 } else {
 1640                                         for (vq = *vp->v_hashchain; vq;
 1641                                              vq = vq->v_specnext) {
 1642                                                 if (vq->v_specnext != vp)
 1643                                                         continue;
 1644                                                 vq->v_specnext = vp->v_specnext;
 1645                                                 break;
 1646                                         }
 1647                                         if (vq == NULL)
 1648                                                 panic("missing bdev");
 1649                                 }
 1650                                 if (vp->v_flag & VALIASED) {
 1651                                         vx = NULL;
 1652                                                 for (vq = *vp->v_hashchain; vq;
 1653                                                      vq = vq->v_specnext) {
 1654                                                 if (vq->v_rdev != vp->v_rdev ||
 1655                                                     vq->v_type != vp->v_type)
 1656                                                         continue;
 1657                                                 if (vx)
 1658                                                         break;
 1659                                                 vx = vq;
 1660                                         }
 1661                                         if (vx == NULL)
 1662                                                 panic("missing alias");
 1663                                         if (vq == NULL)
 1664                                                 vx->v_flag &= ~VALIASED;
 1665                                         vp->v_flag &= ~VALIASED;
 1666                                 }
 1667                         }
 1668                         simple_unlock(&spechash_slock);
 1669                         FREE(vp->v_specinfo, M_VNODE);
 1670                         vp->v_specinfo = NULL;
 1671                 }
 1672         }
 1673         LOCK_ASSERT(!simple_lock_held(&vp->v_interlock));
 1674 
 1675         /*
 1676          * If purging an active vnode, it must be closed and
 1677          * deactivated before being reclaimed. Note that the
 1678          * VOP_INACTIVE will unlock the vnode.
 1679          */
 1680         if (active) {
 1681                 VOP_INACTIVE(vp, p);
 1682         } else {
 1683                 /*
 1684                  * Any other processes trying to obtain this lock must first
 1685                  * wait for VXLOCK to clear, then call the new lock operation.
 1686                  */
 1687                 VOP_UNLOCK(vp, 0);
 1688         }
 1689         /*
 1690          * Reclaim the vnode.
 1691          */
 1692         if (VOP_RECLAIM(vp, p))
 1693                 panic("vclean: cannot reclaim, vp %p", vp);
 1694         if (active) {
 1695                 /*
 1696                  * Inline copy of vrele() since VOP_INACTIVE
 1697                  * has already been called.
 1698                  */
 1699                 simple_lock(&vp->v_interlock);
 1700                 if (--vp->v_usecount <= 0) {
 1701 #ifdef DIAGNOSTIC
 1702                         if (vp->v_usecount < 0 || vp->v_writecount != 0) {
 1703                                 vprint("vclean: bad ref count", vp);
 1704                                 panic("vclean: ref cnt");
 1705                         }
 1706 #endif
 1707                         /*
 1708                          * Insert at tail of LRU list.
 1709                          */
 1710 
 1711                         simple_unlock(&vp->v_interlock);
 1712                         simple_lock(&vnode_free_list_slock);
 1713 #ifdef DIAGNOSTIC
 1714                         if (vp->v_holdcnt > 0)
 1715                                 panic("vclean: not clean, vp %p", vp);
 1716 #endif
 1717                         TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 1718                         simple_unlock(&vnode_free_list_slock);
 1719                 } else
 1720                         simple_unlock(&vp->v_interlock);
 1721         }
 1722 
 1723         KASSERT(vp->v_uobj.uo_npages == 0);
 1724         cache_purge(vp);
 1725 
 1726         /*
 1727          * Done with purge, notify sleepers of the grim news.
 1728          */
 1729         vp->v_op = dead_vnodeop_p;
 1730         vp->v_tag = VT_NON;
 1731         simple_lock(&vp->v_interlock);
 1732         VN_KNOTE(vp, NOTE_REVOKE);      /* FreeBSD has this in vn_pollgone() */
 1733         vp->v_flag &= ~VXLOCK;
 1734         if (vp->v_flag & VXWANT) {
 1735                 vp->v_flag &= ~VXWANT;
 1736                 simple_unlock(&vp->v_interlock);
 1737                 wakeup((caddr_t)vp);
 1738         } else
 1739                 simple_unlock(&vp->v_interlock);
 1740 }
 1741 
 1742 /*
 1743  * Recycle an unused vnode to the front of the free list.
 1744  * Release the passed interlock if the vnode will be recycled.
 1745  */
 1746 int
 1747 vrecycle(vp, inter_lkp, p)
 1748         struct vnode *vp;
 1749         struct simplelock *inter_lkp;
 1750         struct proc *p;
 1751 {
 1752 
 1753         simple_lock(&vp->v_interlock);
 1754         if (vp->v_usecount == 0) {
 1755                 if (inter_lkp)
 1756                         simple_unlock(inter_lkp);
 1757                 vgonel(vp, p);
 1758                 return (1);
 1759         }
 1760         simple_unlock(&vp->v_interlock);
 1761         return (0);
 1762 }
 1763 
 1764 /*
 1765  * Eliminate all activity associated with a vnode
 1766  * in preparation for reuse.
 1767  */
 1768 void
 1769 vgone(vp)
 1770         struct vnode *vp;
 1771 {
 1772         struct proc *p = curproc;       /* XXX */
 1773 
 1774         simple_lock(&vp->v_interlock);
 1775         vgonel(vp, p);
 1776 }
 1777 
 1778 /*
 1779  * vgone, with the vp interlock held.
 1780  */
 1781 void
 1782 vgonel(vp, p)
 1783         struct vnode *vp;
 1784         struct proc *p;
 1785 {
 1786 
 1787         LOCK_ASSERT(simple_lock_held(&vp->v_interlock));
 1788 
 1789         /*
 1790          * If a vgone (or vclean) is already in progress,
 1791          * wait until it is done and return.
 1792          */
 1793 
 1794         if (vp->v_flag & VXLOCK) {
 1795                 vp->v_flag |= VXWANT;
 1796                 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock);
 1797                 return;
 1798         }
 1799 
 1800         /*
 1801          * Clean out the filesystem specific data.
 1802          */
 1803 
 1804         vclean(vp, DOCLOSE, p);
 1805         KASSERT((vp->v_flag & VONWORKLST) == 0);
 1806 
 1807         /*
 1808          * Delete from old mount point vnode list, if on one.
 1809          */
 1810 
 1811         if (vp->v_mount != NULL)
 1812                 insmntque(vp, (struct mount *)0);
 1813 
 1814         /*
 1815          * The test of the back pointer and the reference count of
 1816          * zero is because it will be removed from the free list by
 1817          * getcleanvnode, but will not have its reference count
 1818          * incremented until after calling vgone. If the reference
 1819          * count were incremented first, vgone would (incorrectly)
 1820          * try to close the previous instance of the underlying object.
 1821          * So, the back pointer is explicitly set to `0xdeadb' in
 1822          * getnewvnode after removing it from the freelist to ensure
 1823          * that we do not try to move it here.
 1824          */
 1825 
 1826         vp->v_type = VBAD;
 1827         if (vp->v_usecount == 0) {
 1828                 boolean_t dofree;
 1829 
 1830                 simple_lock(&vnode_free_list_slock);
 1831                 if (vp->v_holdcnt > 0)
 1832                         panic("vgonel: not clean, vp %p", vp);
 1833                 /*
 1834                  * if it isn't on the freelist, we're called by getcleanvnode
 1835                  * and vnode is being re-used.  otherwise, we'll free it.
 1836                  */
 1837                 dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb;
 1838                 if (dofree) {
 1839                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 1840                         numvnodes--;
 1841                 }
 1842                 simple_unlock(&vnode_free_list_slock);
 1843                 if (dofree)
 1844                         pool_put(&vnode_pool, vp);
 1845         }
 1846 }
 1847 
 1848 /*
 1849  * Lookup a vnode by device number.
 1850  */
 1851 int
 1852 vfinddev(dev, type, vpp)
 1853         dev_t dev;
 1854         enum vtype type;
 1855         struct vnode **vpp;
 1856 {
 1857         struct vnode *vp;
 1858         int rc = 0;
 1859 
 1860         simple_lock(&spechash_slock);
 1861         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
 1862                 if (dev != vp->v_rdev || type != vp->v_type)
 1863                         continue;
 1864                 *vpp = vp;
 1865                 rc = 1;
 1866                 break;
 1867         }
 1868         simple_unlock(&spechash_slock);
 1869         return (rc);
 1870 }
 1871 
 1872 /*
 1873  * Revoke all the vnodes corresponding to the specified minor number
 1874  * range (endpoints inclusive) of the specified major.
 1875  */
 1876 void
 1877 vdevgone(maj, minl, minh, type)
 1878         int maj, minl, minh;
 1879         enum vtype type;
 1880 {
 1881         struct vnode *vp;
 1882         int mn;
 1883 
 1884         for (mn = minl; mn <= minh; mn++)
 1885                 if (vfinddev(makedev(maj, mn), type, &vp))
 1886                         VOP_REVOKE(vp, REVOKEALL);
 1887 }
 1888 
 1889 /*
 1890  * Calculate the total number of references to a special device.
 1891  */
 1892 int
 1893 vcount(vp)
 1894         struct vnode *vp;
 1895 {
 1896         struct vnode *vq, *vnext;
 1897         int count;
 1898 
 1899 loop:
 1900         if ((vp->v_flag & VALIASED) == 0)
 1901                 return (vp->v_usecount);
 1902         simple_lock(&spechash_slock);
 1903         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
 1904                 vnext = vq->v_specnext;
 1905                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
 1906                         continue;
 1907                 /*
 1908                  * Alias, but not in use, so flush it out.
 1909                  */
 1910                 if (vq->v_usecount == 0 && vq != vp &&
 1911                     (vq->v_flag & VXLOCK) == 0) {
 1912                         simple_unlock(&spechash_slock);
 1913                         vgone(vq);
 1914                         goto loop;
 1915                 }
 1916                 count += vq->v_usecount;
 1917         }
 1918         simple_unlock(&spechash_slock);
 1919         return (count);
 1920 }
 1921 
 1922 /*
 1923  * Print out a description of a vnode.
 1924  */
 1925 const char * const vnode_types[] = {
 1926         "VNON",
 1927         "VREG",
 1928         "VDIR",
 1929         "VBLK",
 1930         "VCHR",
 1931         "VLNK",
 1932         "VSOCK",
 1933         "VFIFO",
 1934         "VBAD"
 1935 };
 1936 
 1937 void
 1938 vprint(label, vp)
 1939         char *label;
 1940         struct vnode *vp;
 1941 {
 1942         char buf[96];
 1943 
 1944         if (label != NULL)
 1945                 printf("%s: ", label);
 1946         printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,",
 1947             vp->v_tag, vnode_types[vp->v_type],
 1948             vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
 1949         buf[0] = '\0';
 1950         if (vp->v_flag & VROOT)
 1951                 strlcat(buf, "|VROOT", sizeof(buf));
 1952         if (vp->v_flag & VTEXT)
 1953                 strlcat(buf, "|VTEXT", sizeof(buf));
 1954         if (vp->v_flag & VEXECMAP)
 1955                 strlcat(buf, "|VEXECMAP", sizeof(buf));
 1956         if (vp->v_flag & VSYSTEM)
 1957                 strlcat(buf, "|VSYSTEM", sizeof(buf));
 1958         if (vp->v_flag & VXLOCK)
 1959                 strlcat(buf, "|VXLOCK", sizeof(buf));
 1960         if (vp->v_flag & VXWANT)
 1961                 strlcat(buf, "|VXWANT", sizeof(buf));
 1962         if (vp->v_flag & VBWAIT)
 1963                 strlcat(buf, "|VBWAIT", sizeof(buf));
 1964         if (vp->v_flag & VALIASED)
 1965                 strlcat(buf, "|VALIASED", sizeof(buf));
 1966         if (buf[0] != '\0')
 1967                 printf(" flags (%s)", &buf[1]);
 1968         if (vp->v_data == NULL) {
 1969                 printf("\n");
 1970         } else {
 1971                 printf("\n\t");
 1972                 VOP_PRINT(vp);
 1973         }
 1974 }
 1975 
 1976 #ifdef DEBUG
 1977 /*
 1978  * List all of the locked vnodes in the system.
 1979  * Called when debugging the kernel.
 1980  */
 1981 void
 1982 printlockedvnodes()
 1983 {
 1984         struct mount *mp, *nmp;
 1985         struct vnode *vp;
 1986 
 1987         printf("Locked vnodes\n");
 1988         simple_lock(&mountlist_slock);
 1989         for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
 1990              mp = nmp) {
 1991                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
 1992                         nmp = CIRCLEQ_NEXT(mp, mnt_list);
 1993                         continue;
 1994                 }
 1995                 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 1996                         if (VOP_ISLOCKED(vp))
 1997                                 vprint(NULL, vp);
 1998                 }
 1999                 simple_lock(&mountlist_slock);
 2000                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
 2001                 vfs_unbusy(mp);
 2002         }
 2003         simple_unlock(&mountlist_slock);
 2004 }
 2005 #endif
 2006 
 2007 /*
 2008  * sysctl helper routine for vfs.generic.conf lookups.
 2009  */
 2010 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
 2011 static int
 2012 sysctl_vfs_generic_conf(SYSCTLFN_ARGS)
 2013 {
 2014         struct vfsconf vfc;
 2015         extern const char * const mountcompatnames[];
 2016         extern int nmountcompatnames;
 2017         struct sysctlnode node;
 2018         struct vfsops *vfsp;
 2019         u_int vfsnum;
 2020 
 2021         if (namelen != 1)
 2022                 return (ENOTDIR);
 2023         vfsnum = name[0];
 2024         if (vfsnum >= nmountcompatnames ||
 2025             mountcompatnames[vfsnum] == NULL)
 2026                 return (EOPNOTSUPP);
 2027         vfsp = vfs_getopsbyname(mountcompatnames[vfsnum]);
 2028         if (vfsp == NULL)
 2029                 return (EOPNOTSUPP);
 2030 
 2031         vfc.vfc_vfsops = vfsp;
 2032         strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN);
 2033         vfc.vfc_typenum = vfsnum;
 2034         vfc.vfc_refcount = vfsp->vfs_refcount;
 2035         vfc.vfc_flags = 0;
 2036         vfc.vfc_mountroot = vfsp->vfs_mountroot;
 2037         vfc.vfc_next = NULL;
 2038 
 2039         node = *rnode;
 2040         node.sysctl_data = &vfc;
 2041         return (sysctl_lookup(SYSCTLFN_CALL(&node)));
 2042 }
 2043 #endif
 2044 
 2045 /*
 2046  * Top level filesystem related information gathering.
 2047  */
 2048 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
 2049 {
 2050 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
 2051         extern int nmountcompatnames;
 2052 #endif
 2053 
 2054         sysctl_createv(clog, 0, NULL, NULL,
 2055                        CTLFLAG_PERMANENT,
 2056                        CTLTYPE_NODE, "vfs", NULL,
 2057                        NULL, 0, NULL, 0,
 2058                        CTL_VFS, CTL_EOL);
 2059         sysctl_createv(clog, 0, NULL, NULL,
 2060                        CTLFLAG_PERMANENT,
 2061                        CTLTYPE_NODE, "generic",
 2062                        SYSCTL_DESCR("Non-specific vfs related information"),
 2063                        NULL, 0, NULL, 0,
 2064                        CTL_VFS, VFS_GENERIC, CTL_EOL);
 2065 
 2066 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
 2067         sysctl_createv(clog, 0, NULL, NULL,
 2068                        CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
 2069                        CTLTYPE_INT, "maxtypenum",
 2070                        SYSCTL_DESCR("Highest valid filesystem type number"),
 2071                        NULL, nmountcompatnames, NULL, 0,
 2072                        CTL_VFS, VFS_GENERIC, VFS_MAXTYPENUM, CTL_EOL);
 2073 #endif
 2074         sysctl_createv(clog, 0, NULL, NULL,
 2075                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2076                        CTLTYPE_INT, "usermount",
 2077                        SYSCTL_DESCR("Whether unprivileged users may mount "
 2078                                     "filesystems"),
 2079                        NULL, 0, &dovfsusermount, 0,
 2080                        CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
 2081 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44)
 2082         sysctl_createv(clog, 0, NULL, NULL,
 2083                        CTLFLAG_PERMANENT,
 2084                        CTLTYPE_STRUCT, "conf",
 2085                        SYSCTL_DESCR("Filesystem configuration information"),
 2086                        sysctl_vfs_generic_conf, 0, NULL,
 2087                        sizeof(struct vfsconf),
 2088                        CTL_VFS, VFS_GENERIC, VFS_CONF, CTL_EOL);
 2089 #endif
 2090 }
 2091 
 2092 
 2093 int kinfo_vdebug = 1;
 2094 int kinfo_vgetfailed;
 2095 #define KINFO_VNODESLOP 10
 2096 /*
 2097  * Dump vnode list (via sysctl).
 2098  * Copyout address of vnode followed by vnode.
 2099  */
 2100 /* ARGSUSED */
 2101 int
 2102 sysctl_kern_vnode(SYSCTLFN_ARGS)
 2103 {
 2104         char *where = oldp;
 2105         size_t *sizep = oldlenp;
 2106         struct mount *mp, *nmp;
 2107         struct vnode *nvp, *vp;
 2108         char *bp = where, *savebp;
 2109         char *ewhere;
 2110         int error;
 2111 
 2112         if (namelen != 0)
 2113                 return (EOPNOTSUPP);
 2114         if (newp != NULL)
 2115                 return (EPERM);
 2116 
 2117 #define VPTRSZ  sizeof(struct vnode *)
 2118 #define VNODESZ sizeof(struct vnode)
 2119         if (where == NULL) {
 2120                 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
 2121                 return (0);
 2122         }
 2123         ewhere = where + *sizep;
 2124 
 2125         simple_lock(&mountlist_slock);
 2126         for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
 2127              mp = nmp) {
 2128                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) {
 2129                         nmp = CIRCLEQ_NEXT(mp, mnt_list);
 2130                         continue;
 2131                 }
 2132                 savebp = bp;
 2133 again:
 2134                 simple_lock(&mntvnode_slock);
 2135                 for (vp = LIST_FIRST(&mp->mnt_vnodelist);
 2136                      vp != NULL;
 2137                      vp = nvp) {
 2138                         /*
 2139                          * Check that the vp is still associated with
 2140                          * this filesystem.  RACE: could have been
 2141                          * recycled onto the same filesystem.
 2142                          */
 2143                         if (vp->v_mount != mp) {
 2144                                 simple_unlock(&mntvnode_slock);
 2145                                 if (kinfo_vdebug)
 2146                                         printf("kinfo: vp changed\n");
 2147                                 bp = savebp;
 2148                                 goto again;
 2149                         }
 2150                         nvp = LIST_NEXT(vp, v_mntvnodes);
 2151                         if (bp + VPTRSZ + VNODESZ > ewhere) {
 2152                                 simple_unlock(&mntvnode_slock);
 2153                                 *sizep = bp - where;
 2154                                 return (ENOMEM);
 2155                         }
 2156                         simple_unlock(&mntvnode_slock);
 2157                         if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
 2158                            (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
 2159                                 return (error);
 2160                         bp += VPTRSZ + VNODESZ;
 2161                         simple_lock(&mntvnode_slock);
 2162                 }
 2163                 simple_unlock(&mntvnode_slock);
 2164                 simple_lock(&mountlist_slock);
 2165                 nmp = CIRCLEQ_NEXT(mp, mnt_list);
 2166                 vfs_unbusy(mp);
 2167         }
 2168         simple_unlock(&mountlist_slock);
 2169 
 2170         *sizep = bp - where;
 2171         return (0);
 2172 }
 2173 
 2174 /*
 2175  * Check to see if a filesystem is mounted on a block device.
 2176  */
 2177 int
 2178 vfs_mountedon(vp)
 2179         struct vnode *vp;
 2180 {
 2181         struct vnode *vq;
 2182         int error = 0;
 2183 
 2184         if (vp->v_specmountpoint != NULL)
 2185                 return (EBUSY);
 2186         if (vp->v_flag & VALIASED) {
 2187                 simple_lock(&spechash_slock);
 2188                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 2189                         if (vq->v_rdev != vp->v_rdev ||
 2190                             vq->v_type != vp->v_type)
 2191                                 continue;
 2192                         if (vq->v_specmountpoint != NULL) {
 2193                                 error = EBUSY;
 2194                                 break;
 2195                         }
 2196                 }
 2197                 simple_unlock(&spechash_slock);
 2198         }
 2199         return (error);
 2200 }
 2201 
 2202 static int
 2203 sacheck(struct sockaddr *sa)
 2204 {
 2205         switch (sa->sa_family) {
 2206 #ifdef INET
 2207         case AF_INET: {
 2208                 struct sockaddr_in *sin = (struct sockaddr_in *)sa;
 2209                 char *p = (char *)sin->sin_zero;
 2210                 size_t i;
 2211 
 2212                 if (sin->sin_len != sizeof(*sin))
 2213                         return -1;
 2214                 if (sin->sin_port != 0)
 2215                         return -1;
 2216                 for (i = 0; i < sizeof(sin->sin_zero); i++)
 2217                         if (*p++ != '\0')
 2218                                 return -1;
 2219                 return 0;
 2220         }
 2221 #endif
 2222 #ifdef INET6
 2223         case AF_INET6: {
 2224                 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
 2225 
 2226                 if (sin6->sin6_len != sizeof(*sin6))
 2227                         return -1;
 2228                 if (sin6->sin6_port != 0)
 2229                         return -1;
 2230                 return 0;
 2231         }
 2232 #endif
 2233         default:
 2234                 return -1;
 2235         }
 2236 }
 2237 
 2238 /*
 2239  * Build hash lists of net addresses and hang them off the mount point.
 2240  * Called by ufs_mount() to set up the lists of export addresses.
 2241  */
 2242 static int
 2243 vfs_hang_addrlist(mp, nep, argp)
 2244         struct mount *mp;
 2245         struct netexport *nep;
 2246         struct export_args *argp;
 2247 {
 2248         struct netcred *np, *enp;
 2249         struct radix_node_head *rnh;
 2250         int i;
 2251         struct sockaddr *saddr, *smask = 0;
 2252         struct domain *dom;
 2253         int error;
 2254 
 2255         if (argp->ex_addrlen == 0) {
 2256                 if (mp->mnt_flag & MNT_DEFEXPORTED)
 2257                         return (EPERM);
 2258                 np = &nep->ne_defexported;
 2259                 np->netc_exflags = argp->ex_flags;
 2260                 crcvt(&np->netc_anon, &argp->ex_anon);
 2261                 np->netc_anon.cr_ref = 1;
 2262                 mp->mnt_flag |= MNT_DEFEXPORTED;
 2263                 return (0);
 2264         }
 2265 
 2266         if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN)
 2267                 return (EINVAL);
 2268 
 2269         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 2270         np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
 2271         memset((caddr_t)np, 0, i);
 2272         saddr = (struct sockaddr *)(np + 1);
 2273         error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen);
 2274         if (error)
 2275                 goto out;
 2276         if (saddr->sa_len > argp->ex_addrlen)
 2277                 saddr->sa_len = argp->ex_addrlen;
 2278         if (sacheck(saddr) == -1)
 2279                 return EINVAL;
 2280         if (argp->ex_masklen) {
 2281                 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
 2282                 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen);
 2283                 if (error)
 2284                         goto out;
 2285                 if (smask->sa_len > argp->ex_masklen)
 2286                         smask->sa_len = argp->ex_masklen;
 2287                 if (smask->sa_family != saddr->sa_family)
 2288                         return EINVAL;
 2289                 if (sacheck(smask) == -1)
 2290                         return EINVAL;
 2291         }
 2292         i = saddr->sa_family;
 2293         if ((rnh = nep->ne_rtable[i]) == 0) {
 2294                 /*
 2295                  * Seems silly to initialize every AF when most are not
 2296                  * used, do so on demand here
 2297                  */
 2298                 for (dom = domains; dom; dom = dom->dom_next)
 2299                         if (dom->dom_family == i && dom->dom_rtattach) {
 2300                                 dom->dom_rtattach((void **)&nep->ne_rtable[i],
 2301                                         dom->dom_rtoffset);
 2302                                 break;
 2303                         }
 2304                 if ((rnh = nep->ne_rtable[i]) == 0) {
 2305                         error = ENOBUFS;
 2306                         goto out;
 2307                 }
 2308         }
 2309 
 2310         enp = (struct netcred *)(*rnh->rnh_addaddr)(saddr, smask, rnh,
 2311             np->netc_rnodes);
 2312         if (enp != np) {
 2313                 if (enp == NULL) {
 2314                         enp = (struct netcred *)(*rnh->rnh_lookup)(saddr,
 2315                             smask, rnh);
 2316                         if (enp == NULL) {
 2317                                 error = EPERM;
 2318                                 goto out;
 2319                         }
 2320                 } else
 2321                         enp->netc_refcnt++;
 2322 
 2323                 goto check;
 2324         } else
 2325                 enp->netc_refcnt = 1;
 2326 
 2327         np->netc_exflags = argp->ex_flags;
 2328         crcvt(&np->netc_anon, &argp->ex_anon);
 2329         np->netc_anon.cr_ref = 1;
 2330         return 0;
 2331 check:
 2332         if (enp->netc_exflags != argp->ex_flags ||
 2333             crcmp(&enp->netc_anon, &argp->ex_anon) != 0)
 2334                 error = EPERM;
 2335         else
 2336                 error = 0;
 2337 out:
 2338         free(np, M_NETADDR);
 2339         return error;
 2340 }
 2341 
 2342 /* ARGSUSED */
 2343 static int
 2344 vfs_free_netcred(rn, w)
 2345         struct radix_node *rn;
 2346         void *w;
 2347 {
 2348         struct radix_node_head *rnh = (struct radix_node_head *)w;
 2349         struct netcred *np = (struct netcred *)(void *)rn;
 2350 
 2351         (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
 2352         if (--(np->netc_refcnt) <= 0)
 2353                 free(np, M_NETADDR);
 2354         return (0);
 2355 }
 2356 
 2357 /*
 2358  * Free the net address hash lists that are hanging off the mount points.
 2359  */
 2360 static void
 2361 vfs_free_addrlist(nep)
 2362         struct netexport *nep;
 2363 {
 2364         int i;
 2365         struct radix_node_head *rnh;
 2366 
 2367         for (i = 0; i <= AF_MAX; i++)
 2368                 if ((rnh = nep->ne_rtable[i]) != NULL) {
 2369                         (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh);
 2370                         free((caddr_t)rnh, M_RTABLE);
 2371                         nep->ne_rtable[i] = 0;
 2372                 }
 2373 }
 2374 
 2375 int
 2376 vfs_export(mp, nep, argp)
 2377         struct mount *mp;
 2378         struct netexport *nep;
 2379         struct export_args *argp;
 2380 {
 2381         int error;
 2382 
 2383         if (argp->ex_flags & MNT_DELEXPORT) {
 2384                 if (mp->mnt_flag & MNT_EXPUBLIC) {
 2385                         vfs_setpublicfs(NULL, NULL, NULL);
 2386                         mp->mnt_flag &= ~MNT_EXPUBLIC;
 2387                 }
 2388                 vfs_free_addrlist(nep);
 2389                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 2390         }
 2391         if (argp->ex_flags & MNT_EXPORTED) {
 2392                 if (argp->ex_flags & MNT_EXPUBLIC) {
 2393                         if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 2394                                 return (error);
 2395                         mp->mnt_flag |= MNT_EXPUBLIC;
 2396                 }
 2397                 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0)
 2398                         return (error);
 2399                 mp->mnt_flag |= MNT_EXPORTED;
 2400         }
 2401         return (0);
 2402 }
 2403 
 2404 /*
 2405  * Set the publicly exported filesystem (WebNFS). Currently, only
 2406  * one public filesystem is possible in the spec (RFC 2054 and 2055)
 2407  */
 2408 int
 2409 vfs_setpublicfs(mp, nep, argp)
 2410         struct mount *mp;
 2411         struct netexport *nep;
 2412         struct export_args *argp;
 2413 {
 2414         int error;
 2415         struct vnode *rvp;
 2416         char *cp;
 2417 
 2418         /*
 2419          * mp == NULL -> invalidate the current info, the FS is
 2420          * no longer exported. May be called from either vfs_export
 2421          * or unmount, so check if it hasn't already been done.
 2422          */
 2423         if (mp == NULL) {
 2424                 if (nfs_pub.np_valid) {
 2425                         nfs_pub.np_valid = 0;
 2426                         if (nfs_pub.np_index != NULL) {
 2427                                 FREE(nfs_pub.np_index, M_TEMP);
 2428                                 nfs_pub.np_index = NULL;
 2429                         }
 2430                 }
 2431                 return (0);
 2432         }
 2433 
 2434         /*
 2435          * Only one allowed at a time.
 2436          */
 2437         if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 2438                 return (EBUSY);
 2439 
 2440         /*
 2441          * Get real filehandle for root of exported FS.
 2442          */
 2443         memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle));
 2444         nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
 2445 
 2446         if ((error = VFS_ROOT(mp, &rvp)))
 2447                 return (error);
 2448 
 2449         if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 2450                 return (error);
 2451 
 2452         vput(rvp);
 2453 
 2454         /*
 2455          * If an indexfile was specified, pull it in.
 2456          */
 2457         if (argp->ex_indexfile != NULL) {
 2458                 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
 2459                     M_WAITOK);
 2460                 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 2461                     MAXNAMLEN, (size_t *)0);
 2462                 if (!error) {
 2463                         /*
 2464                          * Check for illegal filenames.
 2465                          */
 2466                         for (cp = nfs_pub.np_index; *cp; cp++) {
 2467                                 if (*cp == '/') {
 2468                                         error = EINVAL;
 2469                                         break;
 2470                                 }
 2471                         }
 2472                 }
 2473                 if (error) {
 2474                         FREE(nfs_pub.np_index, M_TEMP);
 2475                         return (error);
 2476                 }
 2477         }
 2478 
 2479         nfs_pub.np_mount = mp;
 2480         nfs_pub.np_valid = 1;
 2481         return (0);
 2482 }
 2483 
 2484 struct netcred *
 2485 vfs_export_lookup(mp, nep, nam)
 2486         struct mount *mp;
 2487         struct netexport *nep;
 2488         struct mbuf *nam;
 2489 {
 2490         struct netcred *np;
 2491         struct radix_node_head *rnh;
 2492         struct sockaddr *saddr;
 2493 
 2494         np = NULL;
 2495         if (mp->mnt_flag & MNT_EXPORTED) {
 2496                 /*
 2497                  * Lookup in the export list first.
 2498                  */
 2499                 if (nam != NULL) {
 2500                         saddr = mtod(nam, struct sockaddr *);
 2501                         rnh = nep->ne_rtable[saddr->sa_family];
 2502                         if (rnh != NULL) {
 2503                                 np = (struct netcred *)
 2504                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
 2505                                                               rnh);
 2506                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 2507                                         np = NULL;
 2508                         }
 2509                 }
 2510                 /*
 2511                  * If no address match, use the default if it exists.
 2512                  */
 2513                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 2514                         np = &nep->ne_defexported;
 2515         }
 2516         return (np);
 2517 }
 2518 
 2519 /*
 2520  * Do the usual access checking.
 2521  * file_mode, uid and gid are from the vnode in question,
 2522  * while acc_mode and cred are from the VOP_ACCESS parameter list
 2523  */
 2524 int
 2525 vaccess(type, file_mode, uid, gid, acc_mode, cred)
 2526         enum vtype type;
 2527         mode_t file_mode;
 2528         uid_t uid;
 2529         gid_t gid;
 2530         mode_t acc_mode;
 2531         struct ucred *cred;
 2532 {
 2533         mode_t mask;
 2534 
 2535         /*
 2536          * Super-user always gets read/write access, but execute access depends
 2537          * on at least one execute bit being set.
 2538          */
 2539         if (cred->cr_uid == 0) {
 2540                 if ((acc_mode & VEXEC) && type != VDIR &&
 2541                     (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
 2542                         return (EACCES);
 2543                 return (0);
 2544         }
 2545 
 2546         mask = 0;
 2547 
 2548         /* Otherwise, check the owner. */
 2549         if (cred->cr_uid == uid) {
 2550                 if (acc_mode & VEXEC)
 2551                         mask |= S_IXUSR;
 2552                 if (acc_mode & VREAD)
 2553                         mask |= S_IRUSR;
 2554                 if (acc_mode & VWRITE)
 2555                         mask |= S_IWUSR;
 2556                 return ((file_mode & mask) == mask ? 0 : EACCES);
 2557         }
 2558 
 2559         /* Otherwise, check the groups. */
 2560         if (cred->cr_gid == gid || groupmember(gid, cred)) {
 2561                 if (acc_mode & VEXEC)
 2562                         mask |= S_IXGRP;
 2563                 if (acc_mode & VREAD)
 2564                         mask |= S_IRGRP;
 2565                 if (acc_mode & VWRITE)
 2566                         mask |= S_IWGRP;
 2567                 return ((file_mode & mask) == mask ? 0 : EACCES);
 2568         }
 2569 
 2570         /* Otherwise, check everyone else. */
 2571         if (acc_mode & VEXEC)
 2572                 mask |= S_IXOTH;
 2573         if (acc_mode & VREAD)
 2574                 mask |= S_IROTH;
 2575         if (acc_mode & VWRITE)
 2576                 mask |= S_IWOTH;
 2577         return ((file_mode & mask) == mask ? 0 : EACCES);
 2578 }
 2579 
 2580 /*
 2581  * Unmount all file systems.
 2582  * We traverse the list in reverse order under the assumption that doing so
 2583  * will avoid needing to worry about dependencies.
 2584  */
 2585 void
 2586 vfs_unmountall(p)
 2587         struct proc *p;
 2588 {
 2589         struct mount *mp, *nmp;
 2590         int allerror, error;
 2591 
 2592         for (allerror = 0,
 2593              mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
 2594                 nmp = mp->mnt_list.cqe_prev;
 2595 #ifdef DEBUG
 2596                 printf("unmounting %s (%s)...\n",
 2597                     mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
 2598 #endif
 2599                 /*
 2600                  * XXX Freeze syncer.  Must do this before locking the
 2601                  * mount point.  See dounmount() for details.
 2602                  */
 2603                 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL);
 2604                 if (vfs_busy(mp, 0, 0)) {
 2605                         lockmgr(&syncer_lock, LK_RELEASE, NULL);
 2606                         continue;
 2607                 }
 2608                 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) {
 2609                         printf("unmount of %s failed with error %d\n",
 2610                             mp->mnt_stat.f_mntonname, error);
 2611                         allerror = 1;
 2612                 }
 2613         }
 2614         if (allerror)
 2615                 printf("WARNING: some file systems would not unmount\n");
 2616 }
 2617 
 2618 extern struct simplelock bqueue_slock; /* XXX */
 2619 
 2620 /*
 2621  * Sync and unmount file systems before shutting down.
 2622  */
 2623 void
 2624 vfs_shutdown()
 2625 {
 2626         struct lwp *l = curlwp;
 2627         struct proc *p;
 2628 
 2629         /* XXX we're certainly not running in proc0's context! */
 2630         if (l == NULL || (p = l->l_proc) == NULL)
 2631                 p = &proc0;
 2632 
 2633         printf("syncing disks... ");
 2634 
 2635         /* remove user process from run queue */
 2636         suspendsched();
 2637         (void) spl0();
 2638 
 2639         /* avoid coming back this way again if we panic. */
 2640         doing_shutdown = 1;
 2641 
 2642         sys_sync(l, NULL, NULL);
 2643 
 2644         /* Wait for sync to finish. */
 2645         if (buf_syncwait() != 0) {
 2646 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
 2647                 Debugger();
 2648 #endif
 2649                 printf("giving up\n");
 2650                 return;
 2651         } else
 2652                 printf("done\n");
 2653 
 2654         /*
 2655          * If we've panic'd, don't make the situation potentially
 2656          * worse by unmounting the file systems.
 2657          */
 2658         if (panicstr != NULL)
 2659                 return;
 2660 
 2661         /* Release inodes held by texts before update. */
 2662 #ifdef notdef
 2663         vnshutdown();
 2664 #endif
 2665         /* Unmount file systems. */
 2666         vfs_unmountall(p);
 2667 }
 2668 
 2669 /*
 2670  * Mount the root file system.  If the operator didn't specify a
 2671  * file system to use, try all possible file systems until one
 2672  * succeeds.
 2673  */
 2674 int
 2675 vfs_mountroot()
 2676 {
 2677         struct vfsops *v;
 2678 
 2679         if (root_device == NULL)
 2680                 panic("vfs_mountroot: root device unknown");
 2681 
 2682         switch (root_device->dv_class) {
 2683         case DV_IFNET:
 2684                 if (rootdev != NODEV)
 2685                         panic("vfs_mountroot: rootdev set for DV_IFNET "
 2686                             "(0x%08x -> %d,%d)", rootdev,
 2687                             major(rootdev), minor(rootdev));
 2688                 break;
 2689 
 2690         case DV_DISK:
 2691                 if (rootdev == NODEV)
 2692                         panic("vfs_mountroot: rootdev not set for DV_DISK");
 2693                 break;
 2694 
 2695         default:
 2696                 printf("%s: inappropriate for root file system\n",
 2697                     root_device->dv_xname);
 2698                 return (ENODEV);
 2699         }
 2700 
 2701         /*
 2702          * If user specified a file system, use it.
 2703          */
 2704         if (mountroot != NULL)
 2705                 return ((*mountroot)());
 2706 
 2707         /*
 2708          * Try each file system currently configured into the kernel.
 2709          */
 2710         for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
 2711                 if (v->vfs_mountroot == NULL)
 2712                         continue;
 2713 #ifdef DEBUG
 2714                 aprint_normal("mountroot: trying %s...\n", v->vfs_name);
 2715 #endif
 2716                 if ((*v->vfs_mountroot)() == 0) {
 2717                         aprint_normal("root file system type: %s\n",
 2718                             v->vfs_name);
 2719                         break;
 2720                 }
 2721         }
 2722 
 2723         if (v == NULL) {
 2724                 printf("no file system for %s", root_device->dv_xname);
 2725                 if (root_device->dv_class == DV_DISK)
 2726                         printf(" (dev 0x%x)", rootdev);
 2727                 printf("\n");
 2728                 return (EFTYPE);
 2729         }
 2730         return (0);
 2731 }
 2732 
 2733 /*
 2734  * Given a file system name, look up the vfsops for that
 2735  * file system, or return NULL if file system isn't present
 2736  * in the kernel.
 2737  */
 2738 struct vfsops *
 2739 vfs_getopsbyname(name)
 2740         const char *name;
 2741 {
 2742         struct vfsops *v;
 2743 
 2744         for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) {
 2745                 if (strcmp(v->vfs_name, name) == 0)
 2746                         break;
 2747         }
 2748 
 2749         return (v);
 2750 }
 2751 
 2752 /*
 2753  * Establish a file system and initialize it.
 2754  */
 2755 int
 2756 vfs_attach(vfs)
 2757         struct vfsops *vfs;
 2758 {
 2759         struct vfsops *v;
 2760         int error = 0;
 2761 
 2762 
 2763         /*
 2764          * Make sure this file system doesn't already exist.
 2765          */
 2766         LIST_FOREACH(v, &vfs_list, vfs_list) {
 2767                 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) {
 2768                         error = EEXIST;
 2769                         goto out;
 2770                 }
 2771         }
 2772 
 2773         /*
 2774          * Initialize the vnode operations for this file system.
 2775          */
 2776         vfs_opv_init(vfs->vfs_opv_descs);
 2777 
 2778         /*
 2779          * Now initialize the file system itself.
 2780          */
 2781         (*vfs->vfs_init)();
 2782 
 2783         /*
 2784          * ...and link it into the kernel's list.
 2785          */
 2786         LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list);
 2787 
 2788         /*
 2789          * Sanity: make sure the reference count is 0.
 2790          */
 2791         vfs->vfs_refcount = 0;
 2792 
 2793  out:
 2794         return (error);
 2795 }
 2796 
 2797 /*
 2798  * Remove a file system from the kernel.
 2799  */
 2800 int
 2801 vfs_detach(vfs)
 2802         struct vfsops *vfs;
 2803 {
 2804         struct vfsops *v;
 2805 
 2806         /*
 2807          * Make sure no one is using the filesystem.
 2808          */
 2809         if (vfs->vfs_refcount != 0)
 2810                 return (EBUSY);
 2811 
 2812         /*
 2813          * ...and remove it from the kernel's list.
 2814          */
 2815         LIST_FOREACH(v, &vfs_list, vfs_list) {
 2816                 if (v == vfs) {
 2817                         LIST_REMOVE(v, vfs_list);
 2818                         break;
 2819                 }
 2820         }
 2821 
 2822         if (v == NULL)
 2823                 return (ESRCH);
 2824 
 2825         /*
 2826          * Now run the file system-specific cleanups.
 2827          */
 2828         (*vfs->vfs_done)();
 2829 
 2830         /*
 2831          * Free the vnode operations vector.
 2832          */
 2833         vfs_opv_free(vfs->vfs_opv_descs);
 2834         return (0);
 2835 }
 2836 
 2837 void
 2838 vfs_reinit(void)
 2839 {
 2840         struct vfsops *vfs;
 2841 
 2842         LIST_FOREACH(vfs, &vfs_list, vfs_list) {
 2843                 if (vfs->vfs_reinit) {
 2844                         (*vfs->vfs_reinit)();
 2845                 }
 2846         }
 2847 }
 2848 
 2849 /*
 2850  * Request a filesystem to suspend write operations.
 2851  */
 2852 int
 2853 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo)
 2854 {
 2855         struct proc *p = curproc;       /* XXX */
 2856         int error;
 2857 
 2858         while ((mp->mnt_iflag & IMNT_SUSPEND)) {
 2859                 if (slptimeo < 0)
 2860                         return EWOULDBLOCK;
 2861                 error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo);
 2862                 if (error)
 2863                         return error;
 2864         }
 2865         mp->mnt_iflag |= IMNT_SUSPEND;
 2866 
 2867         if (mp->mnt_writeopcountupper > 0)
 2868                 tsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt", 0);
 2869 
 2870         error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p);
 2871         if (error) {
 2872                 vfs_write_resume(mp);
 2873                 return error;
 2874         }
 2875         mp->mnt_iflag |= IMNT_SUSPENDLOW;
 2876 
 2877         if (mp->mnt_writeopcountlower > 0)
 2878                 tsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt", 0);
 2879         mp->mnt_iflag |= IMNT_SUSPENDED;
 2880 
 2881         return 0;
 2882 }
 2883 
 2884 /*
 2885  * Request a filesystem to resume write operations.
 2886  */
 2887 void
 2888 vfs_write_resume(struct mount *mp)
 2889 {
 2890 
 2891         if ((mp->mnt_iflag & IMNT_SUSPEND) == 0)
 2892                 return;
 2893         mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED);
 2894         wakeup(&mp->mnt_flag);
 2895 }
 2896 
 2897 void
 2898 copy_statfs_info(struct statfs *sbp, const struct mount *mp)
 2899 {
 2900         const struct statfs *mbp;
 2901 
 2902         if (sbp == (mbp = &mp->mnt_stat))
 2903                 return;
 2904 
 2905         sbp->f_oflags = mbp->f_oflags;
 2906         sbp->f_type = mbp->f_type;
 2907         (void)memcpy(&sbp->f_fsid, &mbp->f_fsid, sizeof(sbp->f_fsid));
 2908         sbp->f_owner = mbp->f_owner;
 2909         sbp->f_flags = mbp->f_flags;
 2910         sbp->f_syncwrites = mbp->f_syncwrites;
 2911         sbp->f_asyncwrites = mbp->f_asyncwrites;
 2912         sbp->f_spare[0] = mbp->f_spare[0];
 2913         (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
 2914             sizeof(sbp->f_fstypename));
 2915         (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
 2916             sizeof(sbp->f_mntonname));
 2917         (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
 2918             sizeof(sbp->f_mntfromname));
 2919 }
 2920 
 2921 int
 2922 set_statfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
 2923     struct mount *mp, struct proc *p)
 2924 {
 2925         int error;
 2926         size_t size;
 2927         struct statfs *sfs = &mp->mnt_stat;
 2928         int (*fun)(const void *, void *, size_t, size_t *);
 2929 
 2930         (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name,
 2931             sizeof(mp->mnt_stat.f_fstypename));
 2932 
 2933         if (onp) {
 2934                 struct cwdinfo *cwdi = p->p_cwdi;
 2935                 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
 2936                 if (cwdi->cwdi_rdir != NULL) {
 2937                         size_t len;
 2938                         char *bp;
 2939                         char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 2940 
 2941                         if (!path) /* XXX can't happen with M_WAITOK */
 2942                                 return ENOMEM;
 2943 
 2944                         bp = path + MAXPATHLEN;
 2945                         *--bp = '\0';
 2946                         error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
 2947                             path, MAXPATHLEN / 2, 0, p);
 2948                         if (error) {
 2949                                 free(path, M_TEMP);
 2950                                 return error;
 2951                         }
 2952 
 2953                         len = strlen(bp);
 2954                         if (len > sizeof(sfs->f_mntonname) - 1)
 2955                                 len = sizeof(sfs->f_mntonname) - 1;
 2956                         (void)strncpy(sfs->f_mntonname, bp, len);
 2957                         free(path, M_TEMP);
 2958 
 2959                         if (len < sizeof(sfs->f_mntonname) - 1) {
 2960                                 error = (*fun)(onp, &sfs->f_mntonname[len],
 2961                                     sizeof(sfs->f_mntonname) - len - 1, &size);
 2962                                 if (error)
 2963                                         return error;
 2964                                 size += len;
 2965                         } else {
 2966                                 size = len;
 2967                         }
 2968                 } else {
 2969                         error = (*fun)(onp, &sfs->f_mntonname,
 2970                             sizeof(sfs->f_mntonname) - 1, &size);
 2971                         if (error)
 2972                                 return error;
 2973                 }
 2974                 (void)memset(sfs->f_mntonname + size, 0,
 2975                     sizeof(sfs->f_mntonname) - size);
 2976         }
 2977 
 2978         if (fromp) {
 2979                 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
 2980                 error = (*fun)(fromp, sfs->f_mntfromname,
 2981                     sizeof(sfs->f_mntfromname) - 1, &size);
 2982                 if (error)
 2983                         return error;
 2984                 (void)memset(sfs->f_mntfromname + size, 0,
 2985                     sizeof(sfs->f_mntfromname) - size);
 2986         }
 2987         return 0;
 2988 }
 2989 
 2990 #ifdef DDB
 2991 const char buf_flagbits[] =
 2992         "\2\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI"
 2993         "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE"
 2994         "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED"
 2995         "\32XXX\33VFLUSH";
 2996 
 2997 void
 2998 vfs_buf_print(bp, full, pr)
 2999         struct buf *bp;
 3000         int full;
 3001         void (*pr)(const char *, ...);
 3002 {
 3003         char buf[1024];
 3004 
 3005         (*pr)("  vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n",
 3006                   bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev);
 3007 
 3008         bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf));
 3009         (*pr)("  error %d flags 0x%s\n", bp->b_error, buf);
 3010 
 3011         (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
 3012                   bp->b_bufsize, bp->b_bcount, bp->b_resid);
 3013         (*pr)("  data %p saveaddr %p dep %p\n",
 3014                   bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
 3015         (*pr)("  iodone %p\n", bp->b_iodone);
 3016 }
 3017 
 3018 
 3019 const char vnode_flagbits[] =
 3020         "\2\1ROOT\2TEXT\3SYSTEM\4ISTTY\5EXECMAP"
 3021         "\11XLOCK\12XWANT\13BWAIT\14ALIASED"
 3022         "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY";
 3023 
 3024 const char * const vnode_tags[] = {
 3025         "VT_NON",
 3026         "VT_UFS",
 3027         "VT_NFS",
 3028         "VT_MFS",
 3029         "VT_MSDOSFS",
 3030         "VT_LFS",
 3031         "VT_LOFS",
 3032         "VT_FDESC",
 3033         "VT_PORTAL",
 3034         "VT_NULL",
 3035         "VT_UMAP",
 3036         "VT_KERNFS",
 3037         "VT_PROCFS",
 3038         "VT_AFS",
 3039         "VT_ISOFS",
 3040         "VT_UNION",
 3041         "VT_ADOSFS",
 3042         "VT_EXT2FS",
 3043         "VT_CODA",
 3044         "VT_FILECORE",
 3045         "VT_NTFS",
 3046         "VT_VFS",
 3047         "VT_OVERLAY",
 3048         "VT_SMBFS"
 3049 };
 3050 
 3051 void
 3052 vfs_vnode_print(vp, full, pr)
 3053         struct vnode *vp;
 3054         int full;
 3055         void (*pr)(const char *, ...);
 3056 {
 3057         char buf[256];
 3058         const char *vtype, *vtag;
 3059 
 3060         uvm_object_printit(&vp->v_uobj, full, pr);
 3061         bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf));
 3062         (*pr)("\nVNODE flags %s\n", buf);
 3063         (*pr)("mp %p numoutput %d size 0x%llx\n",
 3064               vp->v_mount, vp->v_numoutput, vp->v_size);
 3065 
 3066         (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n",
 3067               vp->v_data, vp->v_usecount, vp->v_writecount,
 3068               vp->v_holdcnt, vp->v_numoutput);
 3069 
 3070         vtype = (vp->v_type >= 0 &&
 3071                  vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ?
 3072                 vnode_types[vp->v_type] : "UNKNOWN";
 3073         vtag = (vp->v_tag >= 0 &&
 3074                 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ?
 3075                 vnode_tags[vp->v_tag] : "UNKNOWN";
 3076 
 3077         (*pr)("type %s(%d) tag %s(%d) mount %p typedata %p\n",
 3078               vtype, vp->v_type, vtag, vp->v_tag,
 3079               vp->v_mount, vp->v_mountedhere);
 3080 
 3081         if (full) {
 3082                 struct buf *bp;
 3083 
 3084                 (*pr)("clean bufs:\n");
 3085                 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
 3086                         (*pr)(" bp %p\n", bp);
 3087                         vfs_buf_print(bp, full, pr);
 3088                 }
 3089 
 3090                 (*pr)("dirty bufs:\n");
 3091                 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
 3092                         (*pr)(" bp %p\n", bp);
 3093                         vfs_buf_print(bp, full, pr);
 3094                 }
 3095         }
 3096 }
 3097 
 3098 void
 3099 vfs_mount_print(mp, full, pr)
 3100         struct mount *mp;
 3101         int full;
 3102         void (*pr)(const char *, ...);
 3103 {
 3104         char sbuf[256];
 3105 
 3106         (*pr)("vnodecovered = %p syncer = %p data = %p\n",
 3107                         mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
 3108 
 3109         (*pr)("fs_bshift %d dev_bshift = %d maxsymlinklen = %d\n",
 3110                         mp->mnt_fs_bshift,mp->mnt_dev_bshift,mp->mnt_maxsymlinklen);
 3111 
 3112         bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
 3113         (*pr)("flag = %s\n", sbuf);
 3114 
 3115         bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf));
 3116         (*pr)("iflag = %s\n", sbuf);
 3117 
 3118         /* XXX use lockmgr_printinfo */
 3119         if (mp->mnt_lock.lk_sharecount)
 3120                 (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg,
 3121                     mp->mnt_lock.lk_sharecount);
 3122         else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) {
 3123                 (*pr)(" lock type %s: EXCL (count %d) by ",
 3124                     mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount);
 3125                 if (mp->mnt_lock.lk_flags & LK_SPIN)
 3126                         (*pr)("processor %lu", mp->mnt_lock.lk_cpu);
 3127                 else
 3128                         (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder,
 3129                             mp->mnt_lock.lk_locklwp);
 3130         } else
 3131                 (*pr)(" not locked");
 3132         if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0)
 3133                 (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount);
 3134 
 3135         (*pr)("\n");
 3136 
 3137         if (mp->mnt_unmounter) {
 3138                 (*pr)("unmounter pid = %d ",mp->mnt_unmounter->p_pid);
 3139         }
 3140         (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n",
 3141                 mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower);
 3142 
 3143         (*pr)("statfs cache:\n");
 3144         (*pr)("\ttype = %d\n",mp->mnt_stat.f_type);
 3145         (*pr)("\toflags = 0x%04x\n",mp->mnt_stat.f_oflags);
 3146         (*pr)("\tbsize = %d\n",mp->mnt_stat.f_bsize);
 3147         (*pr)("\tiosize = %d\n",mp->mnt_stat.f_iosize);
 3148         (*pr)("\tblocks = %d\n",mp->mnt_stat.f_blocks);
 3149         (*pr)("\tbfree = %d\n",mp->mnt_stat.f_bfree);
 3150         (*pr)("\tbavail = %d\n",mp->mnt_stat.f_bavail);
 3151         (*pr)("\tfiles = %d\n",mp->mnt_stat.f_files);
 3152         (*pr)("\tffree = %d\n",mp->mnt_stat.f_ffree);
 3153         (*pr)("\tf_fsid = { 0x%"PRIx32", 0x%"PRIx32" }\n",
 3154                         mp->mnt_stat.f_fsid.val[0],mp->mnt_stat.f_fsid.val[1]);
 3155         (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
 3156         bitmask_snprintf(mp->mnt_stat.f_flags, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
 3157         (*pr)("\tflags = %s\n",sbuf);
 3158         (*pr)("\tsyncwrites = %d\n",mp->mnt_stat.f_syncwrites);
 3159         (*pr)("\tasyncwrites = %d\n",mp->mnt_stat.f_asyncwrites);
 3160         (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
 3161         (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
 3162         (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
 3163 
 3164         {
 3165                 int cnt = 0;
 3166                 struct vnode *vp;
 3167                 (*pr)("locked vnodes =");
 3168                 /* XXX would take mountlist lock, except ddb may not have context */
 3169                 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 3170                         if (VOP_ISLOCKED(vp)) {
 3171                                 if ((++cnt % 6) == 0) {
 3172                                         (*pr)(" %p,\n\t", vp);
 3173                                 } else {
 3174                                         (*pr)(" %p,", vp);
 3175                                 }
 3176                         }
 3177                 }
 3178                 (*pr)("\n");
 3179         }
 3180 
 3181         if (full) {
 3182                 int cnt = 0;
 3183                 struct vnode *vp;
 3184                 (*pr)("all vnodes =");
 3185                 /* XXX would take mountlist lock, except ddb may not have context */
 3186                 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 3187                         if (!LIST_NEXT(vp, v_mntvnodes)) {
 3188                                 (*pr)(" %p", vp);
 3189                         } else if ((++cnt % 6) == 0) {
 3190                                 (*pr)(" %p,\n\t", vp);
 3191                         } else {
 3192                                 (*pr)(" %p,", vp);
 3193                         }
 3194                 }
 3195                 (*pr)("\n", vp);
 3196         }
 3197 }
 3198 
 3199 #endif

Cache object: 156f814c972efc8a5a706f3172e6c08c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.