The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_subr.c

Version: -  FREEBSD  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-2  -  FREEBSD-11-1  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-4  -  FREEBSD-10-3  -  FREEBSD-10-2  -  FREEBSD-10-1  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-3  -  FREEBSD-9-2  -  FREEBSD-9-1  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-4  -  FREEBSD-8-3  -  FREEBSD-8-2  -  FREEBSD-8-1  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-4  -  FREEBSD-7-3  -  FREEBSD-7-2  -  FREEBSD-7-1  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-4  -  FREEBSD-6-3  -  FREEBSD-6-2  -  FREEBSD-6-1  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-5  -  FREEBSD-5-4  -  FREEBSD-5-3  -  FREEBSD-5-2  -  FREEBSD-5-1  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  xnu-1699.24.8  -  xnu-2050.18.24  -  OPENSOLARIS  -  minix-3-1-1 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: vfs_subr.c,v 1.357.4.9 2010/01/11 00:02:09 snj Exp $   */
    2 
    3 /*-
    4  * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
    9  * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  *
   20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   30  * POSSIBILITY OF SUCH DAMAGE.
   31  */
   32 
   33 /*
   34  * Copyright (c) 1989, 1993
   35  *      The Regents of the University of California.  All rights reserved.
   36  * (c) UNIX System Laboratories, Inc.
   37  * All or some portions of this file are derived from material licensed
   38  * to the University of California by American Telephone and Telegraph
   39  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   40  * the permission of UNIX System Laboratories, Inc.
   41  *
   42  * Redistribution and use in source and binary forms, with or without
   43  * modification, are permitted provided that the following conditions
   44  * are met:
   45  * 1. Redistributions of source code must retain the above copyright
   46  *    notice, this list of conditions and the following disclaimer.
   47  * 2. Redistributions in binary form must reproduce the above copyright
   48  *    notice, this list of conditions and the following disclaimer in the
   49  *    documentation and/or other materials provided with the distribution.
   50  * 3. Neither the name of the University nor the names of its contributors
   51  *    may be used to endorse or promote products derived from this software
   52  *    without specific prior written permission.
   53  *
   54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   64  * SUCH DAMAGE.
   65  *
   66  *      @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
   67  */
   68 
   69 /*
   70  * Note on v_usecount and locking:
   71  *
   72  * At nearly all points it is known that v_usecount could be zero, the
   73  * vnode interlock will be held.
   74  *
   75  * To change v_usecount away from zero, the interlock must be held.  To
   76  * change from a non-zero value to zero, again the interlock must be
   77  * held.
   78  *
   79  * There's a flag bit, VC_XLOCK, embedded in v_usecount.
   80  * To raise v_usecount, if the VC_XLOCK bit is set in it, the interlock
   81  * must be held.
   82  * To modify the VC_XLOCK bit, the interlock must be held.
   83  * We always keep the usecount (v_usecount & VC_MASK) non-zero while the
   84  * VC_XLOCK bit is set.
   85  *
   86  * Unless the VC_XLOCK bit is set, changing the usecount from a non-zero
   87  * value to a non-zero value can safely be done using atomic operations,
   88  * without the interlock held.
   89  * Even if the VC_XLOCK bit is set, decreasing the usecount to a non-zero
   90  * value can be done using atomic operations, without the interlock held.
   91  */
   92 
   93 #include <sys/cdefs.h>
   94 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.357.4.9 2010/01/11 00:02:09 snj Exp $");
   95 
   96 #include "opt_ddb.h"
   97 #include "opt_compat_netbsd.h"
   98 #include "opt_compat_43.h"
   99 
  100 #include <sys/param.h>
  101 #include <sys/systm.h>
  102 #include <sys/proc.h>
  103 #include <sys/kernel.h>
  104 #include <sys/mount.h>
  105 #include <sys/fcntl.h>
  106 #include <sys/vnode.h>
  107 #include <sys/stat.h>
  108 #include <sys/namei.h>
  109 #include <sys/ucred.h>
  110 #include <sys/buf.h>
  111 #include <sys/errno.h>
  112 #include <sys/malloc.h>
  113 #include <sys/syscallargs.h>
  114 #include <sys/device.h>
  115 #include <sys/filedesc.h>
  116 #include <sys/kauth.h>
  117 #include <sys/atomic.h>
  118 #include <sys/kthread.h>
  119 #include <sys/wapbl.h>
  120 
  121 #include <miscfs/specfs/specdev.h>
  122 #include <miscfs/syncfs/syncfs.h>
  123 
  124 #include <uvm/uvm.h>
  125 #include <uvm/uvm_readahead.h>
  126 #include <uvm/uvm_ddb.h>
  127 
  128 #include <sys/sysctl.h>
  129 
  130 const enum vtype iftovt_tab[16] = {
  131         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
  132         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
  133 };
  134 const int       vttoif_tab[9] = {
  135         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
  136         S_IFSOCK, S_IFIFO, S_IFMT,
  137 };
  138 
  139 /*
  140  * Insq/Remq for the vnode usage lists.
  141  */
  142 #define bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
  143 #define bufremvn(bp) {                                                  \
  144         LIST_REMOVE(bp, b_vnbufs);                                      \
  145         (bp)->b_vnbufs.le_next = NOLIST;                                \
  146 }
  147 
  148 int doforce = 1;                /* 1 => permit forcible unmounting */
  149 int prtactive = 0;              /* 1 => print out reclaim of active vnodes */
  150 
  151 extern int dovfsusermount;      /* 1 => permit any user to mount filesystems */
  152 extern int vfs_magiclinks;      /* 1 => expand "magic" symlinks */
  153 
  154 static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list);
  155 static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list);
  156 static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list);
  157 
  158 struct mntlist mountlist =                      /* mounted filesystem list */
  159     CIRCLEQ_HEAD_INITIALIZER(mountlist);
  160 
  161 u_int numvnodes;
  162 static specificdata_domain_t mount_specificdata_domain;
  163 
  164 static int vrele_pending;
  165 static int vrele_gen;
  166 static kmutex_t vrele_lock;
  167 static kcondvar_t vrele_cv;
  168 static lwp_t *vrele_lwp;
  169 
  170 kmutex_t mountlist_lock;
  171 kmutex_t mntid_lock;
  172 kmutex_t mntvnode_lock;
  173 kmutex_t vnode_free_list_lock;
  174 kmutex_t vfs_list_lock;
  175 
  176 static pool_cache_t vnode_cache;
  177 
  178 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes");
  179 
  180 /*
  181  * These define the root filesystem and device.
  182  */
  183 struct vnode *rootvnode;
  184 struct device *root_device;                     /* root device */
  185 
  186 /*
  187  * Local declarations.
  188  */
  189 
  190 static void vrele_thread(void *);
  191 static void insmntque(vnode_t *, struct mount *);
  192 static int getdevvp(dev_t, vnode_t **, enum vtype);
  193 static vnode_t *getcleanvnode(void);;
  194 void vpanic(vnode_t *, const char *);
  195 
  196 #ifdef DEBUG 
  197 void printlockedvnodes(void);
  198 #endif
  199 
  200 #ifdef DIAGNOSTIC
  201 void
  202 vpanic(vnode_t *vp, const char *msg)
  203 {
  204 
  205         vprint(NULL, vp);
  206         panic("%s\n", msg);
  207 }
  208 #else
  209 #define vpanic(vp, msg) /* nothing */
  210 #endif
  211 
  212 void
  213 vn_init1(void)
  214 {
  215 
  216         vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl",
  217             NULL, IPL_NONE, NULL, NULL, NULL);
  218         KASSERT(vnode_cache != NULL);
  219 
  220         /* Create deferred release thread. */
  221         mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE);
  222         cv_init(&vrele_cv, "vrele");
  223         if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread,
  224             NULL, &vrele_lwp, "vrele"))
  225                 panic("fork vrele");
  226 }
  227 
  228 /*
  229  * Initialize the vnode management data structures.
  230  */
  231 void
  232 vntblinit(void)
  233 {
  234 
  235         mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
  236         mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
  237         mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE);
  238         mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE);
  239         mutex_init(&specfs_lock, MUTEX_DEFAULT, IPL_NONE);
  240         mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
  241 
  242         mount_specificdata_domain = specificdata_domain_create();
  243 
  244         /* Initialize the filesystem syncer. */
  245         vn_initialize_syncerd();
  246         vn_init1();
  247 }
  248 
  249 int
  250 vfs_drainvnodes(long target, struct lwp *l)
  251 {
  252 
  253         while (numvnodes > target) {
  254                 vnode_t *vp;
  255 
  256                 mutex_enter(&vnode_free_list_lock);
  257                 vp = getcleanvnode();
  258                 if (vp == NULL)
  259                         return EBUSY; /* give up */
  260                 ungetnewvnode(vp);
  261         }
  262 
  263         return 0;
  264 }
  265 
  266 /*
  267  * Lookup a mount point by filesystem identifier.
  268  *
  269  * XXX Needs to add a reference to the mount point.
  270  */
  271 struct mount *
  272 vfs_getvfs(fsid_t *fsid)
  273 {
  274         struct mount *mp;
  275 
  276         mutex_enter(&mountlist_lock);
  277         CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) {
  278                 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
  279                     mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
  280                         mutex_exit(&mountlist_lock);
  281                         return (mp);
  282                 }
  283         }
  284         mutex_exit(&mountlist_lock);
  285         return ((struct mount *)0);
  286 }
  287 
  288 /*
  289  * Drop a reference to a mount structure, freeing if the last reference.
  290  */
  291 void
  292 vfs_destroy(struct mount *mp)
  293 {
  294 
  295         if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
  296                 return;
  297         }
  298 
  299         /*
  300          * Nothing else has visibility of the mount: we can now
  301          * free the data structures.
  302          */
  303         KASSERT(mp->mnt_refcnt == 0);
  304         specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
  305         rw_destroy(&mp->mnt_unmounting);
  306         mutex_destroy(&mp->mnt_updating);
  307         mutex_destroy(&mp->mnt_renamelock);
  308         if (mp->mnt_op != NULL) {
  309                 vfs_delref(mp->mnt_op);
  310         }
  311         kmem_free(mp, sizeof(*mp));
  312 }
  313 
  314 /*
  315  * grab a vnode from freelist and clean it.
  316  */
  317 vnode_t *
  318 getcleanvnode(void)
  319 {
  320         vnode_t *vp;
  321         vnodelst_t *listhd;
  322 
  323         KASSERT(mutex_owned(&vnode_free_list_lock));
  324 
  325 retry:
  326         listhd = &vnode_free_list;
  327 try_nextlist:
  328         TAILQ_FOREACH(vp, listhd, v_freelist) {
  329                 /*
  330                  * It's safe to test v_usecount and v_iflag
  331                  * without holding the interlock here, since
  332                  * these vnodes should never appear on the
  333                  * lists.
  334                  */
  335                 if (vp->v_usecount != 0) {
  336                         vpanic(vp, "free vnode isn't");
  337                 }
  338                 if ((vp->v_iflag & VI_CLEAN) != 0) {
  339                         vpanic(vp, "clean vnode on freelist");
  340                 }
  341                 if (vp->v_freelisthd != listhd) {
  342                         printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd);
  343                         vpanic(vp, "list head mismatch");
  344                 }
  345                 if (!mutex_tryenter(&vp->v_interlock))
  346                         continue;
  347                 /*
  348                  * Our lwp might hold the underlying vnode
  349                  * locked, so don't try to reclaim a VI_LAYER
  350                  * node if it's locked.
  351                  */
  352                 if ((vp->v_iflag & VI_XLOCK) == 0 &&
  353                     ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) {
  354                         break;
  355                 }
  356                 mutex_exit(&vp->v_interlock);
  357         }
  358 
  359         if (vp == NULL) {
  360                 if (listhd == &vnode_free_list) {
  361                         listhd = &vnode_hold_list;
  362                         goto try_nextlist;
  363                 }
  364                 mutex_exit(&vnode_free_list_lock);
  365                 return NULL;
  366         }
  367 
  368         /* Remove it from the freelist. */
  369         TAILQ_REMOVE(listhd, vp, v_freelist);
  370         vp->v_freelisthd = NULL;
  371         mutex_exit(&vnode_free_list_lock);
  372 
  373         if (vp->v_usecount != 0) {
  374                 /*
  375                  * was referenced again before we got the interlock
  376                  * Don't return to freelist - the holder of the last
  377                  * reference will destroy it.
  378                  */
  379                 mutex_exit(&vp->v_interlock);
  380                 mutex_enter(&vnode_free_list_lock);
  381                 goto retry;
  382         }
  383 
  384         /*
  385          * The vnode is still associated with a file system, so we must
  386          * clean it out before reusing it.  We need to add a reference
  387          * before doing this.  If the vnode gains another reference while
  388          * being cleaned out then we lose - retry.
  389          */
  390         atomic_add_int(&vp->v_usecount, 1 + VC_XLOCK);
  391         vclean(vp, DOCLOSE);
  392         KASSERT(vp->v_usecount >= 1 + VC_XLOCK);
  393         atomic_add_int(&vp->v_usecount, -VC_XLOCK);
  394         if (vp->v_usecount == 1) {
  395                 /* We're about to dirty it. */
  396                 vp->v_iflag &= ~VI_CLEAN;
  397                 mutex_exit(&vp->v_interlock);
  398                 if (vp->v_type == VBLK || vp->v_type == VCHR) {
  399                         spec_node_destroy(vp);
  400                 }
  401                 vp->v_type = VNON;
  402         } else {
  403                 /*
  404                  * Don't return to freelist - the holder of the last
  405                  * reference will destroy it.
  406                  */
  407                 vrelel(vp, 0); /* releases vp->v_interlock */
  408                 mutex_enter(&vnode_free_list_lock);
  409                 goto retry;
  410         }
  411 
  412         if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 ||
  413             !TAILQ_EMPTY(&vp->v_uobj.memq)) {
  414                 vpanic(vp, "cleaned vnode isn't");
  415         }
  416         if (vp->v_numoutput != 0) {
  417                 vpanic(vp, "clean vnode has pending I/O's");
  418         }
  419         if ((vp->v_iflag & VI_ONWORKLST) != 0) {
  420                 vpanic(vp, "clean vnode on syncer list");
  421         }
  422 
  423         return vp;
  424 }
  425 
  426 /*
  427  * Mark a mount point as busy, and gain a new reference to it.  Used to
  428  * prevent the file system from being unmounted during critical sections.
  429  *
  430  * => The caller must hold a pre-existing reference to the mount.
  431  * => Will fail if the file system is being unmounted, or is unmounted.
  432  */
  433 int
  434 vfs_busy(struct mount *mp, struct mount **nextp)
  435 {
  436 
  437         KASSERT(mp->mnt_refcnt > 0);
  438 
  439         if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) {
  440                 if (nextp != NULL) {
  441                         KASSERT(mutex_owned(&mountlist_lock));
  442                         *nextp = CIRCLEQ_NEXT(mp, mnt_list);
  443                 }
  444                 return EBUSY;
  445         }
  446         if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
  447                 rw_exit(&mp->mnt_unmounting);
  448                 if (nextp != NULL) {
  449                         KASSERT(mutex_owned(&mountlist_lock));
  450                         *nextp = CIRCLEQ_NEXT(mp, mnt_list);
  451                 }
  452                 return ENOENT;
  453         }
  454         if (nextp != NULL) {
  455                 mutex_exit(&mountlist_lock);
  456         }
  457         atomic_inc_uint(&mp->mnt_refcnt);
  458         return 0;
  459 }
  460 
  461 /*
  462  * Unbusy a busy filesystem.
  463  *
  464  * => If keepref is true, preserve reference added by vfs_busy().
  465  * => If nextp != NULL, acquire mountlist_lock.
  466  */
  467 void
  468 vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp)
  469 {
  470 
  471         KASSERT(mp->mnt_refcnt > 0);
  472 
  473         if (nextp != NULL) {
  474                 mutex_enter(&mountlist_lock);
  475         }
  476         rw_exit(&mp->mnt_unmounting);
  477         if (!keepref) {
  478                 vfs_destroy(mp);
  479         }
  480         if (nextp != NULL) {
  481                 KASSERT(mutex_owned(&mountlist_lock));
  482                 *nextp = CIRCLEQ_NEXT(mp, mnt_list);
  483         }
  484 }
  485 
  486 /*
  487  * Lookup a filesystem type, and if found allocate and initialize
  488  * a mount structure for it.
  489  *
  490  * Devname is usually updated by mount(8) after booting.
  491  */
  492 int
  493 vfs_rootmountalloc(const char *fstypename, const char *devname,
  494     struct mount **mpp)
  495 {
  496         struct vfsops *vfsp = NULL;
  497         struct mount *mp;
  498 
  499         mutex_enter(&vfs_list_lock);
  500         LIST_FOREACH(vfsp, &vfs_list, vfs_list)
  501                 if (!strncmp(vfsp->vfs_name, fstypename, 
  502                     sizeof(mp->mnt_stat.f_fstypename)))
  503                         break;
  504         if (vfsp == NULL) {
  505                 mutex_exit(&vfs_list_lock);
  506                 return (ENODEV);
  507         }
  508         vfsp->vfs_refcount++;
  509         mutex_exit(&vfs_list_lock);
  510 
  511         mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
  512         if (mp == NULL)
  513                 return ENOMEM;
  514         mp->mnt_refcnt = 1;
  515         rw_init(&mp->mnt_unmounting);
  516         mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE);
  517         mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE);
  518         (void)vfs_busy(mp, NULL);
  519         TAILQ_INIT(&mp->mnt_vnodelist);
  520         mp->mnt_op = vfsp;
  521         mp->mnt_flag = MNT_RDONLY;
  522         mp->mnt_vnodecovered = NULL;
  523         (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
  524             sizeof(mp->mnt_stat.f_fstypename));
  525         mp->mnt_stat.f_mntonname[0] = '/';
  526         mp->mnt_stat.f_mntonname[1] = '\0';
  527         mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
  528             '\0';
  529         (void)copystr(devname, mp->mnt_stat.f_mntfromname,
  530             sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
  531         mount_initspecific(mp);
  532         *mpp = mp;
  533         return (0);
  534 }
  535 
  536 /*
  537  * Routines having to do with the management of the vnode table.
  538  */
  539 extern int (**dead_vnodeop_p)(void *);
  540 
  541 /*
  542  * Return the next vnode from the free list.
  543  */
  544 int
  545 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *),
  546             vnode_t **vpp)
  547 {
  548         struct uvm_object *uobj;
  549         static int toggle;
  550         vnode_t *vp;
  551         int error = 0, tryalloc;
  552 
  553  try_again:
  554         if (mp != NULL) {
  555                 /*
  556                  * Mark filesystem busy while we're creating a
  557                  * vnode.  If unmount is in progress, this will
  558                  * fail.
  559                  */
  560                 error = vfs_busy(mp, NULL);
  561                 if (error)
  562                         return error;
  563         }
  564 
  565         /*
  566          * We must choose whether to allocate a new vnode or recycle an
  567          * existing one. The criterion for allocating a new one is that
  568          * the total number of vnodes is less than the number desired or
  569          * there are no vnodes on either free list. Generally we only
  570          * want to recycle vnodes that have no buffers associated with
  571          * them, so we look first on the vnode_free_list. If it is empty,
  572          * we next consider vnodes with referencing buffers on the
  573          * vnode_hold_list. The toggle ensures that half the time we
  574          * will use a buffer from the vnode_hold_list, and half the time
  575          * we will allocate a new one unless the list has grown to twice
  576          * the desired size. We are reticent to recycle vnodes from the
  577          * vnode_hold_list because we will lose the identity of all its
  578          * referencing buffers.
  579          */
  580 
  581         vp = NULL;
  582 
  583         mutex_enter(&vnode_free_list_lock);
  584 
  585         toggle ^= 1;
  586         if (numvnodes > 2 * desiredvnodes)
  587                 toggle = 0;
  588 
  589         tryalloc = numvnodes < desiredvnodes ||
  590             (TAILQ_FIRST(&vnode_free_list) == NULL &&
  591              (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle));
  592 
  593         if (tryalloc) {
  594                 numvnodes++;
  595                 mutex_exit(&vnode_free_list_lock);
  596                 if ((vp = vnalloc(NULL)) == NULL) {
  597                         mutex_enter(&vnode_free_list_lock);
  598                         numvnodes--;
  599                 } else
  600                         vp->v_usecount = 1;
  601         }
  602 
  603         if (vp == NULL) {
  604                 vp = getcleanvnode();
  605                 if (vp == NULL) {
  606                         if (mp != NULL) {
  607                                 vfs_unbusy(mp, false, NULL);
  608                         }
  609                         if (tryalloc) {
  610                                 printf("WARNING: unable to allocate new "
  611                                     "vnode, retrying...\n");
  612                                 kpause("newvn", false, hz, NULL);
  613                                 goto try_again;
  614                         }
  615                         tablefull("vnode", "increase kern.maxvnodes or NVNODE");
  616                         *vpp = 0;
  617                         return (ENFILE);
  618                 }
  619                 vp->v_iflag = 0;
  620                 vp->v_vflag = 0;
  621                 vp->v_uflag = 0;
  622                 vp->v_socket = NULL;
  623         }
  624 
  625         KASSERT(vp->v_usecount == 1);
  626         KASSERT(vp->v_freelisthd == NULL);
  627         KASSERT(LIST_EMPTY(&vp->v_nclist));
  628         KASSERT(LIST_EMPTY(&vp->v_dnclist));
  629 
  630         vp->v_type = VNON;
  631         vp->v_vnlock = &vp->v_lock;
  632         vp->v_tag = tag;
  633         vp->v_op = vops;
  634         insmntque(vp, mp);
  635         *vpp = vp;
  636         vp->v_data = 0;
  637 
  638         /*
  639          * initialize uvm_object within vnode.
  640          */
  641 
  642         uobj = &vp->v_uobj;
  643         KASSERT(uobj->pgops == &uvm_vnodeops);
  644         KASSERT(uobj->uo_npages == 0);
  645         KASSERT(TAILQ_FIRST(&uobj->memq) == NULL);
  646         vp->v_size = vp->v_writesize = VSIZENOTSET;
  647 
  648         if (mp != NULL) {
  649                 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0)
  650                         vp->v_vflag |= VV_MPSAFE;
  651                 vfs_unbusy(mp, true, NULL);
  652         }
  653 
  654         return (0);
  655 }
  656 
  657 /*
  658  * This is really just the reverse of getnewvnode(). Needed for
  659  * VFS_VGET functions who may need to push back a vnode in case
  660  * of a locking race.
  661  */
  662 void
  663 ungetnewvnode(vnode_t *vp)
  664 {
  665 
  666         KASSERT(vp->v_usecount == 1);
  667         KASSERT(vp->v_data == NULL);
  668         KASSERT(vp->v_freelisthd == NULL);
  669 
  670         mutex_enter(&vp->v_interlock);
  671         vp->v_iflag |= VI_CLEAN;
  672         vrelel(vp, 0);
  673 }
  674 
  675 /*
  676  * Allocate a new, uninitialized vnode.  If 'mp' is non-NULL, this is a
  677  * marker vnode and we are prepared to wait for the allocation.
  678  */
  679 vnode_t *
  680 vnalloc(struct mount *mp)
  681 {
  682         vnode_t *vp;
  683 
  684         vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT));
  685         if (vp == NULL) {
  686                 return NULL;
  687         }
  688 
  689         memset(vp, 0, sizeof(*vp));
  690         UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0);
  691         cv_init(&vp->v_cv, "vnode");
  692         /*
  693          * done by memset() above.
  694          *      LIST_INIT(&vp->v_nclist);
  695          *      LIST_INIT(&vp->v_dnclist);
  696          */
  697 
  698         if (mp != NULL) {
  699                 vp->v_mount = mp;
  700                 vp->v_type = VBAD;
  701                 vp->v_iflag = VI_MARKER;
  702         } else {
  703                 rw_init(&vp->v_lock.vl_lock);
  704         }
  705 
  706         return vp;
  707 }
  708 
  709 /*
  710  * Free an unused, unreferenced vnode.
  711  */
  712 void
  713 vnfree(vnode_t *vp)
  714 {
  715 
  716         KASSERT(vp->v_usecount == 0);
  717 
  718         if ((vp->v_iflag & VI_MARKER) == 0) {
  719                 rw_destroy(&vp->v_lock.vl_lock);
  720                 mutex_enter(&vnode_free_list_lock);
  721                 numvnodes--;
  722                 mutex_exit(&vnode_free_list_lock);
  723         }
  724 
  725         UVM_OBJ_DESTROY(&vp->v_uobj);
  726         cv_destroy(&vp->v_cv);
  727         pool_cache_put(vnode_cache, vp);
  728 }
  729 
  730 /*
  731  * Remove a vnode from its freelist.
  732  */
  733 static inline void
  734 vremfree(vnode_t *vp)
  735 {
  736 
  737         KASSERT(mutex_owned(&vp->v_interlock));
  738         KASSERT(vp->v_usecount == 0);
  739 
  740         /*
  741          * Note that the reference count must not change until
  742          * the vnode is removed.
  743          */
  744         mutex_enter(&vnode_free_list_lock);
  745         if (vp->v_holdcnt > 0) {
  746                 KASSERT(vp->v_freelisthd == &vnode_hold_list);
  747         } else {
  748                 KASSERT(vp->v_freelisthd == &vnode_free_list);
  749         }
  750         TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
  751         vp->v_freelisthd = NULL;
  752         mutex_exit(&vnode_free_list_lock);
  753 }
  754 
  755 /*
  756  * Move a vnode from one mount queue to another.
  757  */
  758 static void
  759 insmntque(vnode_t *vp, struct mount *mp)
  760 {
  761         struct mount *omp;
  762 
  763 #ifdef DIAGNOSTIC
  764         if ((mp != NULL) &&
  765             (mp->mnt_iflag & IMNT_UNMOUNT) &&
  766             !(mp->mnt_flag & MNT_SOFTDEP) &&
  767             vp->v_tag != VT_VFS) {
  768                 panic("insmntque into dying filesystem");
  769         }
  770 #endif
  771 
  772         mutex_enter(&mntvnode_lock);
  773         /*
  774          * Delete from old mount point vnode list, if on one.
  775          */
  776         if ((omp = vp->v_mount) != NULL)
  777                 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes);
  778         /*
  779          * Insert into list of vnodes for the new mount point, if
  780          * available.  The caller must take a reference on the mount
  781          * structure and donate to the vnode.
  782          */
  783         if ((vp->v_mount = mp) != NULL)
  784                 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes);
  785         mutex_exit(&mntvnode_lock);
  786 
  787         if (omp != NULL) {
  788                 /* Release reference to old mount. */
  789                 vfs_destroy(omp);
  790         }
  791 }
  792 
  793 /*
  794  * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or
  795  * recycled.
  796  */
  797 void
  798 vwait(vnode_t *vp, int flags)
  799 {
  800 
  801         KASSERT(mutex_owned(&vp->v_interlock));
  802         KASSERT(vp->v_usecount != 0);
  803 
  804         while ((vp->v_iflag & flags) != 0)
  805                 cv_wait(&vp->v_cv, &vp->v_interlock);
  806 }
  807 
  808 /*
  809  * Insert a marker vnode into a mount's vnode list, after the
  810  * specified vnode.  mntvnode_lock must be held.
  811  */
  812 void
  813 vmark(vnode_t *mvp, vnode_t *vp)
  814 {
  815         struct mount *mp;
  816 
  817         mp = mvp->v_mount;
  818 
  819         KASSERT(mutex_owned(&mntvnode_lock));
  820         KASSERT((mvp->v_iflag & VI_MARKER) != 0);
  821         KASSERT(vp->v_mount == mp);
  822 
  823         TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes);
  824 }
  825 
  826 /*
  827  * Remove a marker vnode from a mount's vnode list, and return
  828  * a pointer to the next vnode in the list.  mntvnode_lock must
  829  * be held.
  830  */
  831 vnode_t *
  832 vunmark(vnode_t *mvp)
  833 {
  834         vnode_t *vp;
  835         struct mount *mp;
  836 
  837         mp = mvp->v_mount;
  838 
  839         KASSERT(mutex_owned(&mntvnode_lock));
  840         KASSERT((mvp->v_iflag & VI_MARKER) != 0);
  841 
  842         vp = TAILQ_NEXT(mvp, v_mntvnodes);
  843         TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes); 
  844 
  845         KASSERT(vp == NULL || vp->v_mount == mp);
  846 
  847         return vp;
  848 }
  849 
  850 /*
  851  * Update outstanding I/O count and do wakeup if requested.
  852  */
  853 void
  854 vwakeup(struct buf *bp)
  855 {
  856         struct vnode *vp;
  857 
  858         if ((vp = bp->b_vp) == NULL)
  859                 return;
  860 
  861         KASSERT(bp->b_objlock == &vp->v_interlock);
  862         KASSERT(mutex_owned(bp->b_objlock));
  863 
  864         if (--vp->v_numoutput < 0)
  865                 panic("vwakeup: neg numoutput, vp %p", vp);
  866         if (vp->v_numoutput == 0)
  867                 cv_broadcast(&vp->v_cv);
  868 }
  869 
  870 /*
  871  * Flush out and invalidate all buffers associated with a vnode.
  872  * Called with the underlying vnode locked, which should prevent new dirty
  873  * buffers from being queued.
  874  */
  875 int
  876 vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l,
  877           bool catch, int slptimeo)
  878 {
  879         struct buf *bp, *nbp;
  880         int error;
  881         int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO |
  882             (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0);
  883 
  884         /* XXXUBC this doesn't look at flags or slp* */
  885         mutex_enter(&vp->v_interlock);
  886         error = VOP_PUTPAGES(vp, 0, 0, flushflags);
  887         if (error) {
  888                 return error;
  889         }
  890 
  891         if (flags & V_SAVE) {
  892                 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0);
  893                 if (error)
  894                         return (error);
  895                 KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd));
  896         }
  897 
  898         mutex_enter(&bufcache_lock);
  899 restart:
  900         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  901                 nbp = LIST_NEXT(bp, b_vnbufs);
  902                 error = bbusy(bp, catch, slptimeo, NULL);
  903                 if (error != 0) {
  904                         if (error == EPASSTHROUGH)
  905                                 goto restart;
  906                         mutex_exit(&bufcache_lock);
  907                         return (error);
  908                 }
  909                 brelsel(bp, BC_INVAL | BC_VFLUSH);
  910         }
  911 
  912         for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
  913                 nbp = LIST_NEXT(bp, b_vnbufs);
  914                 error = bbusy(bp, catch, slptimeo, NULL);
  915                 if (error != 0) {
  916                         if (error == EPASSTHROUGH)
  917                                 goto restart;
  918                         mutex_exit(&bufcache_lock);
  919                         return (error);
  920                 }
  921                 /*
  922                  * XXX Since there are no node locks for NFS, I believe
  923                  * there is a slight chance that a delayed write will
  924                  * occur while sleeping just above, so check for it.
  925                  */
  926                 if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) {
  927 #ifdef DEBUG
  928                         printf("buffer still DELWRI\n");
  929 #endif
  930                         bp->b_cflags |= BC_BUSY | BC_VFLUSH;
  931                         mutex_exit(&bufcache_lock);
  932                         VOP_BWRITE(bp);
  933                         mutex_enter(&bufcache_lock);
  934                         goto restart;
  935                 }
  936                 brelsel(bp, BC_INVAL | BC_VFLUSH);
  937         }
  938 
  939 #ifdef DIAGNOSTIC
  940         if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd))
  941                 panic("vinvalbuf: flush failed, vp %p", vp);
  942 #endif
  943 
  944         mutex_exit(&bufcache_lock);
  945 
  946         return (0);
  947 }
  948 
  949 /*
  950  * Destroy any in core blocks past the truncation length.
  951  * Called with the underlying vnode locked, which should prevent new dirty
  952  * buffers from being queued.
  953  */
  954 int
  955 vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo)
  956 {
  957         struct buf *bp, *nbp;
  958         int error;
  959         voff_t off;
  960 
  961         off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift);
  962         mutex_enter(&vp->v_interlock);
  963         error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO);
  964         if (error) {
  965                 return error;
  966         }
  967 
  968         mutex_enter(&bufcache_lock);
  969 restart:
  970         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  971                 nbp = LIST_NEXT(bp, b_vnbufs);
  972                 if (bp->b_lblkno < lbn)
  973                         continue;
  974                 error = bbusy(bp, catch, slptimeo, NULL);
  975                 if (error != 0) {
  976                         if (error == EPASSTHROUGH)
  977                                 goto restart;
  978                         mutex_exit(&bufcache_lock);
  979                         return (error);
  980                 }
  981                 brelsel(bp, BC_INVAL | BC_VFLUSH);
  982         }
  983 
  984         for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
  985                 nbp = LIST_NEXT(bp, b_vnbufs);
  986                 if (bp->b_lblkno < lbn)
  987                         continue;
  988                 error = bbusy(bp, catch, slptimeo, NULL);
  989                 if (error != 0) {
  990                         if (error == EPASSTHROUGH)
  991                                 goto restart;
  992                         mutex_exit(&bufcache_lock);
  993                         return (error);
  994                 }
  995                 brelsel(bp, BC_INVAL | BC_VFLUSH);
  996         }
  997         mutex_exit(&bufcache_lock);
  998 
  999         return (0);
 1000 }
 1001 
 1002 /*
 1003  * Flush all dirty buffers from a vnode.
 1004  * Called with the underlying vnode locked, which should prevent new dirty
 1005  * buffers from being queued.
 1006  */
 1007 void
 1008 vflushbuf(struct vnode *vp, int sync)
 1009 {
 1010         struct buf *bp, *nbp;
 1011         int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0);
 1012         bool dirty;
 1013 
 1014         mutex_enter(&vp->v_interlock);
 1015         (void) VOP_PUTPAGES(vp, 0, 0, flags);
 1016 
 1017 loop:
 1018         mutex_enter(&bufcache_lock);
 1019         for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 1020                 nbp = LIST_NEXT(bp, b_vnbufs);
 1021                 if ((bp->b_cflags & BC_BUSY))
 1022                         continue;
 1023                 if ((bp->b_oflags & BO_DELWRI) == 0)
 1024                         panic("vflushbuf: not dirty, bp %p", bp);
 1025                 bp->b_cflags |= BC_BUSY | BC_VFLUSH;
 1026                 mutex_exit(&bufcache_lock);
 1027                 /*
 1028                  * Wait for I/O associated with indirect blocks to complete,
 1029                  * since there is no way to quickly wait for them below.
 1030                  */
 1031                 if (bp->b_vp == vp || sync == 0)
 1032                         (void) bawrite(bp);
 1033                 else
 1034                         (void) bwrite(bp);
 1035                 goto loop;
 1036         }
 1037         mutex_exit(&bufcache_lock);
 1038 
 1039         if (sync == 0)
 1040                 return;
 1041 
 1042         mutex_enter(&vp->v_interlock);
 1043         while (vp->v_numoutput != 0)
 1044                 cv_wait(&vp->v_cv, &vp->v_interlock);
 1045         dirty = !LIST_EMPTY(&vp->v_dirtyblkhd);
 1046         mutex_exit(&vp->v_interlock);
 1047 
 1048         if (dirty) {
 1049                 vprint("vflushbuf: dirty", vp);
 1050                 goto loop;
 1051         }
 1052 }
 1053 
 1054 /*
 1055  * Create a vnode for a block device.
 1056  * Used for root filesystem and swap areas.
 1057  * Also used for memory file system special devices.
 1058  */
 1059 int
 1060 bdevvp(dev_t dev, vnode_t **vpp)
 1061 {
 1062 
 1063         return (getdevvp(dev, vpp, VBLK));
 1064 }
 1065 
 1066 /*
 1067  * Create a vnode for a character device.
 1068  * Used for kernfs and some console handling.
 1069  */
 1070 int
 1071 cdevvp(dev_t dev, vnode_t **vpp)
 1072 {
 1073 
 1074         return (getdevvp(dev, vpp, VCHR));
 1075 }
 1076 
 1077 /*
 1078  * Associate a buffer with a vnode.  There must already be a hold on
 1079  * the vnode.
 1080  */
 1081 void
 1082 bgetvp(struct vnode *vp, struct buf *bp)
 1083 {
 1084 
 1085         KASSERT(bp->b_vp == NULL);
 1086         KASSERT(bp->b_objlock == &buffer_lock);
 1087         KASSERT(mutex_owned(&vp->v_interlock));
 1088         KASSERT(mutex_owned(&bufcache_lock));
 1089         KASSERT((bp->b_cflags & BC_BUSY) != 0);
 1090         KASSERT(!cv_has_waiters(&bp->b_done));
 1091 
 1092         vholdl(vp);
 1093         bp->b_vp = vp;
 1094         if (vp->v_type == VBLK || vp->v_type == VCHR)
 1095                 bp->b_dev = vp->v_rdev;
 1096         else
 1097                 bp->b_dev = NODEV;
 1098 
 1099         /*
 1100          * Insert onto list for new vnode.
 1101          */
 1102         bufinsvn(bp, &vp->v_cleanblkhd);
 1103         bp->b_objlock = &vp->v_interlock;
 1104 }
 1105 
 1106 /*
 1107  * Disassociate a buffer from a vnode.
 1108  */
 1109 void
 1110 brelvp(struct buf *bp)
 1111 {
 1112         struct vnode *vp = bp->b_vp;
 1113 
 1114         KASSERT(vp != NULL);
 1115         KASSERT(bp->b_objlock == &vp->v_interlock);
 1116         KASSERT(mutex_owned(&vp->v_interlock));
 1117         KASSERT(mutex_owned(&bufcache_lock));
 1118         KASSERT((bp->b_cflags & BC_BUSY) != 0);
 1119         KASSERT(!cv_has_waiters(&bp->b_done));
 1120 
 1121         /*
 1122          * Delete from old vnode list, if on one.
 1123          */
 1124         if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
 1125                 bufremvn(bp);
 1126 
 1127         if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) &&
 1128             LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
 1129                 vp->v_iflag &= ~VI_WRMAPDIRTY;
 1130                 vn_syncer_remove_from_worklist(vp);
 1131         }
 1132 
 1133         bp->b_objlock = &buffer_lock;
 1134         bp->b_vp = NULL;
 1135         holdrelel(vp);
 1136 }
 1137 
 1138 /*
 1139  * Reassign a buffer from one vnode list to another.
 1140  * The list reassignment must be within the same vnode.
 1141  * Used to assign file specific control information
 1142  * (indirect blocks) to the list to which they belong.
 1143  */
 1144 void
 1145 reassignbuf(struct buf *bp, struct vnode *vp)
 1146 {
 1147         struct buflists *listheadp;
 1148         int delayx;
 1149 
 1150         KASSERT(mutex_owned(&bufcache_lock));
 1151         KASSERT(bp->b_objlock == &vp->v_interlock);
 1152         KASSERT(mutex_owned(&vp->v_interlock));
 1153         KASSERT((bp->b_cflags & BC_BUSY) != 0);
 1154 
 1155         /*
 1156          * Delete from old vnode list, if on one.
 1157          */
 1158         if (LIST_NEXT(bp, b_vnbufs) != NOLIST)
 1159                 bufremvn(bp);
 1160 
 1161         /*
 1162          * If dirty, put on list of dirty buffers;
 1163          * otherwise insert onto list of clean buffers.
 1164          */
 1165         if ((bp->b_oflags & BO_DELWRI) == 0) {
 1166                 listheadp = &vp->v_cleanblkhd;
 1167                 if (TAILQ_EMPTY(&vp->v_uobj.memq) &&
 1168                     (vp->v_iflag & VI_ONWORKLST) &&
 1169                     LIST_FIRST(&vp->v_dirtyblkhd) == NULL) {
 1170                         vp->v_iflag &= ~VI_WRMAPDIRTY;
 1171                         vn_syncer_remove_from_worklist(vp);
 1172                 }
 1173         } else {
 1174                 listheadp = &vp->v_dirtyblkhd;
 1175                 if ((vp->v_iflag & VI_ONWORKLST) == 0) {
 1176                         switch (vp->v_type) {
 1177                         case VDIR:
 1178                                 delayx = dirdelay;
 1179                                 break;
 1180                         case VBLK:
 1181                                 if (vp->v_specmountpoint != NULL) {
 1182                                         delayx = metadelay;
 1183                                         break;
 1184                                 }
 1185                                 /* fall through */
 1186                         default:
 1187                                 delayx = filedelay;
 1188                                 break;
 1189                         }
 1190                         if (!vp->v_mount ||
 1191                             (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
 1192                                 vn_syncer_add_to_worklist(vp, delayx);
 1193                 }
 1194         }
 1195         bufinsvn(bp, listheadp);
 1196 }
 1197 
 1198 /*
 1199  * Create a vnode for a device.
 1200  * Used by bdevvp (block device) for root file system etc.,
 1201  * and by cdevvp (character device) for console and kernfs.
 1202  */
 1203 static int
 1204 getdevvp(dev_t dev, vnode_t **vpp, enum vtype type)
 1205 {
 1206         vnode_t *vp;
 1207         vnode_t *nvp;
 1208         int error;
 1209 
 1210         if (dev == NODEV) {
 1211                 *vpp = NULL;
 1212                 return (0);
 1213         }
 1214         error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp);
 1215         if (error) {
 1216                 *vpp = NULL;
 1217                 return (error);
 1218         }
 1219         vp = nvp;
 1220         vp->v_type = type;
 1221         vp->v_vflag |= VV_MPSAFE;
 1222         uvm_vnp_setsize(vp, 0);
 1223         spec_node_init(vp, dev);
 1224         *vpp = vp;
 1225         return (0);
 1226 }
 1227 
 1228 /*
 1229  * Try to gain a reference to a vnode, without acquiring its interlock.
 1230  * The caller must hold a lock that will prevent the vnode from being
 1231  * recycled or freed.
 1232  */
 1233 bool
 1234 vtryget(vnode_t *vp)
 1235 {
 1236         u_int use, next;
 1237 
 1238         /*
 1239          * If the vnode is being freed, don't make life any harder
 1240          * for vclean() by adding another reference without waiting.
 1241          * This is not strictly necessary, but we'll do it anyway.
 1242          */
 1243         if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) {
 1244                 return false;
 1245         }
 1246         for (use = vp->v_usecount;; use = next) {
 1247                 if (use == 0 || __predict_false((use & VC_XLOCK) != 0)) {
 1248                         /* Need interlock held if first reference. */
 1249                         return false;
 1250                 }
 1251                 next = atomic_cas_uint(&vp->v_usecount, use, use + 1);
 1252                 if (__predict_true(next == use)) {
 1253                         return true;
 1254                 }
 1255         }
 1256 }
 1257 
 1258 /*
 1259  * Grab a particular vnode from the free list, increment its
 1260  * reference count and lock it. If the vnode lock bit is set the
 1261  * vnode is being eliminated in vgone. In that case, we can not
 1262  * grab the vnode, so the process is awakened when the transition is
 1263  * completed, and an error returned to indicate that the vnode is no
 1264  * longer usable (possibly having been changed to a new file system type).
 1265  */
 1266 int
 1267 vget(vnode_t *vp, int flags)
 1268 {
 1269         int error;
 1270 
 1271         KASSERT((vp->v_iflag & VI_MARKER) == 0);
 1272 
 1273         if ((flags & LK_INTERLOCK) == 0)
 1274                 mutex_enter(&vp->v_interlock);
 1275 
 1276         /*
 1277          * Before adding a reference, we must remove the vnode
 1278          * from its freelist.
 1279          */
 1280         if (vp->v_usecount == 0) {
 1281                 vremfree(vp);
 1282                 vp->v_usecount = 1;
 1283         } else {
 1284                 atomic_inc_uint(&vp->v_usecount);
 1285         }
 1286 
 1287         /*
 1288          * If the vnode is in the process of being cleaned out for
 1289          * another use, we wait for the cleaning to finish and then
 1290          * return failure.  Cleaning is determined by checking if
 1291          * the VI_XLOCK or VI_FREEING flags are set.
 1292          */
 1293         if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) {
 1294                 if ((flags & LK_NOWAIT) != 0) {
 1295                         vrelel(vp, 0);
 1296                         return EBUSY;
 1297                 }
 1298                 vwait(vp, VI_XLOCK | VI_FREEING);
 1299                 vrelel(vp, 0);
 1300                 return ENOENT;
 1301         }
 1302 
 1303         if ((vp->v_iflag & VI_INACTNOW) != 0) {
 1304                 /*
 1305                  * if it's being desactived, wait for it to complete.
 1306                  * Make sure to not return a clean vnode.
 1307                  */
 1308                  if ((flags & LK_NOWAIT) != 0) {
 1309                         vrelel(vp, 0);
 1310                         return EBUSY;
 1311                 }
 1312                 vwait(vp, VI_INACTNOW);
 1313                 if ((vp->v_iflag & VI_CLEAN) != 0) {
 1314                         vrelel(vp, 0);
 1315                         return ENOENT;
 1316                 }
 1317         }
 1318         if (flags & LK_TYPE_MASK) {
 1319                 error = vn_lock(vp, flags | LK_INTERLOCK);
 1320                 if (error != 0) {
 1321                         vrele(vp);
 1322                 }
 1323                 return error;
 1324         }
 1325         mutex_exit(&vp->v_interlock);
 1326         return 0;
 1327 }
 1328 
 1329 /*
 1330  * vput(), just unlock and vrele()
 1331  */
 1332 void
 1333 vput(vnode_t *vp)
 1334 {
 1335 
 1336         KASSERT((vp->v_iflag & VI_MARKER) == 0);
 1337 
 1338         VOP_UNLOCK(vp, 0);
 1339         vrele(vp);
 1340 }
 1341 
 1342 /*
 1343  * Try to drop reference on a vnode.  Abort if we are releasing the
 1344  * last reference.  Note: this _must_ succeed if not the last reference.
 1345  */
 1346 static inline bool
 1347 vtryrele(vnode_t *vp)
 1348 {
 1349         u_int use, next;
 1350 
 1351         for (use = vp->v_usecount;; use = next) {
 1352                 if (use == 1) {
 1353                         return false;
 1354                 }
 1355                 KASSERT((use & VC_MASK) > 1);
 1356                 next = atomic_cas_uint(&vp->v_usecount, use, use - 1);
 1357                 if (__predict_true(next == use)) {
 1358                         return true;
 1359                 }
 1360         }
 1361 }
 1362 
 1363 /*
 1364  * Vnode release.  If reference count drops to zero, call inactive
 1365  * routine and either return to freelist or free to the pool.
 1366  */
 1367 void
 1368 vrelel(vnode_t *vp, int flags)
 1369 {
 1370         bool recycle, defer;
 1371         int error;
 1372 
 1373         KASSERT(mutex_owned(&vp->v_interlock));
 1374         KASSERT((vp->v_iflag & VI_MARKER) == 0);
 1375         KASSERT(vp->v_freelisthd == NULL);
 1376 
 1377         if (__predict_false(vp->v_op == dead_vnodeop_p &&
 1378             (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) {
 1379                 vpanic(vp, "dead but not clean");
 1380         }
 1381 
 1382         /*
 1383          * If not the last reference, just drop the reference count
 1384          * and unlock.
 1385          */
 1386         if (vtryrele(vp)) {
 1387                 vp->v_iflag |= VI_INACTREDO;
 1388                 mutex_exit(&vp->v_interlock);
 1389                 return;
 1390         }
 1391         if (vp->v_usecount <= 0 || vp->v_writecount != 0) {
 1392                 vpanic(vp, "vrelel: bad ref count");
 1393         }
 1394 
 1395         KASSERT((vp->v_iflag & VI_XLOCK) == 0);
 1396 
 1397         /*
 1398          * If not clean, deactivate the vnode, but preserve
 1399          * our reference across the call to VOP_INACTIVE().
 1400          */
 1401  retry:
 1402         if ((vp->v_iflag & VI_CLEAN) == 0) {
 1403                 recycle = false;
 1404                 vp->v_iflag |= VI_INACTNOW;
 1405 
 1406                 /*
 1407                  * XXX This ugly block can be largely eliminated if
 1408                  * locking is pushed down into the file systems.
 1409                  */
 1410                 if (curlwp == uvm.pagedaemon_lwp) {
 1411                         /* The pagedaemon can't wait around; defer. */
 1412                         defer = true;
 1413                 } else if (curlwp == vrele_lwp) {
 1414                         /*
 1415                          * We have to try harder. But we can't sleep
 1416                          * with VI_INACTNOW as vget() may be waiting on it.
 1417                          */
 1418                         vp->v_iflag &= ~(VI_INACTREDO|VI_INACTNOW);
 1419                         cv_broadcast(&vp->v_cv);
 1420                         error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
 1421                             LK_RETRY);
 1422                         if (error != 0) {
 1423                                 /* XXX */
 1424                                 vpanic(vp, "vrele: unable to lock %p");
 1425                         }
 1426                         mutex_enter(&vp->v_interlock);
 1427                         /*
 1428                          * if we did get another reference while
 1429                          * sleeping, don't try to inactivate it yet.
 1430                          */
 1431                         if (__predict_false(vtryrele(vp))) {
 1432                                 VOP_UNLOCK(vp, 0);
 1433                                 mutex_exit(&vp->v_interlock);
 1434                                 return;
 1435                         }
 1436                         vp->v_iflag |= VI_INACTNOW;
 1437                         mutex_exit(&vp->v_interlock);
 1438                         defer = false;
 1439                 } else if ((vp->v_iflag & VI_LAYER) != 0) {
 1440                         /* 
 1441                          * Acquiring the stack's lock in vclean() even
 1442                          * for an honest vput/vrele is dangerous because
 1443                          * our caller may hold other vnode locks; defer.
 1444                          */
 1445                         defer = true;
 1446                 } else {                
 1447                         /* If we can't acquire the lock, then defer. */
 1448                         vp->v_iflag &= ~VI_INACTREDO;
 1449                         error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK |
 1450                             LK_NOWAIT);
 1451                         if (error != 0) {
 1452                                 defer = true;
 1453                                 mutex_enter(&vp->v_interlock);
 1454                         } else {
 1455                                 defer = false;
 1456                         }
 1457                 }
 1458 
 1459                 if (defer) {
 1460                         /*
 1461                          * Defer reclaim to the kthread; it's not safe to
 1462                          * clean it here.  We donate it our last reference.
 1463                          */
 1464                         KASSERT(mutex_owned(&vp->v_interlock));
 1465                         KASSERT((vp->v_iflag & VI_INACTPEND) == 0);
 1466                         vp->v_iflag &= ~VI_INACTNOW;
 1467                         vp->v_iflag |= VI_INACTPEND;
 1468                         mutex_enter(&vrele_lock);
 1469                         TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist);
 1470                         if (++vrele_pending > (desiredvnodes >> 8))
 1471                                 cv_signal(&vrele_cv); 
 1472                         mutex_exit(&vrele_lock);
 1473                         cv_broadcast(&vp->v_cv);
 1474                         mutex_exit(&vp->v_interlock);
 1475                         return;
 1476                 }
 1477 
 1478 #ifdef DIAGNOSTIC
 1479                 if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
 1480                     vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) {
 1481                         vprint("vrelel: missing VOP_CLOSE()", vp);
 1482                 }
 1483 #endif
 1484 
 1485                 /*
 1486                  * The vnode can gain another reference while being
 1487                  * deactivated.  If VOP_INACTIVE() indicates that
 1488                  * the described file has been deleted, then recycle
 1489                  * the vnode irrespective of additional references.
 1490                  * Another thread may be waiting to re-use the on-disk
 1491                  * inode.
 1492                  *
 1493                  * Note that VOP_INACTIVE() will drop the vnode lock.
 1494                  */
 1495                 VOP_INACTIVE(vp, &recycle);
 1496                 mutex_enter(&vp->v_interlock);
 1497                 vp->v_iflag &= ~VI_INACTNOW;
 1498                 cv_broadcast(&vp->v_cv);
 1499                 if (!recycle) {
 1500                         if (vtryrele(vp)) {
 1501                                 mutex_exit(&vp->v_interlock);
 1502                                 return;
 1503                         }
 1504 
 1505                         /*
 1506                          * If we grew another reference while
 1507                          * VOP_INACTIVE() was underway, retry.
 1508                          */
 1509                         if ((vp->v_iflag & VI_INACTREDO) != 0) {
 1510                                 goto retry;
 1511                         }
 1512                 }
 1513 
 1514                 /* Take care of space accounting. */
 1515                 if (vp->v_iflag & VI_EXECMAP) {
 1516                         atomic_add_int(&uvmexp.execpages,
 1517                             -vp->v_uobj.uo_npages);
 1518                         atomic_add_int(&uvmexp.filepages,
 1519                             vp->v_uobj.uo_npages);
 1520                 }
 1521                 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP);
 1522                 vp->v_vflag &= ~VV_MAPPED;
 1523 
 1524                 /*
 1525                  * Recycle the vnode if the file is now unused (unlinked),
 1526                  * otherwise just free it.
 1527                  */
 1528                 if (recycle) {
 1529                         vclean(vp, DOCLOSE);
 1530                 }
 1531                 KASSERT(vp->v_usecount > 0);
 1532         }
 1533 
 1534         if (atomic_dec_uint_nv(&vp->v_usecount) != 0) {
 1535                 /* Gained another reference while being reclaimed. */
 1536                 mutex_exit(&vp->v_interlock);
 1537                 return;
 1538         }
 1539 
 1540         if ((vp->v_iflag & VI_CLEAN) != 0) {
 1541                 /*
 1542                  * It's clean so destroy it.  It isn't referenced
 1543                  * anywhere since it has been reclaimed.
 1544                  */
 1545                 KASSERT(vp->v_holdcnt == 0);
 1546                 KASSERT(vp->v_writecount == 0);
 1547                 mutex_exit(&vp->v_interlock);
 1548                 insmntque(vp, NULL);
 1549                 if (vp->v_type == VBLK || vp->v_type == VCHR) {
 1550                         spec_node_destroy(vp);
 1551                 }
 1552                 vnfree(vp);
 1553         } else {
 1554                 /*
 1555                  * Otherwise, put it back onto the freelist.  It
 1556                  * can't be destroyed while still associated with
 1557                  * a file system.
 1558                  */
 1559                 mutex_enter(&vnode_free_list_lock);
 1560                 if (vp->v_holdcnt > 0) {
 1561                         vp->v_freelisthd = &vnode_hold_list;
 1562                 } else {
 1563                         vp->v_freelisthd = &vnode_free_list;
 1564                 }
 1565                 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
 1566                 mutex_exit(&vnode_free_list_lock);
 1567                 mutex_exit(&vp->v_interlock);
 1568         }
 1569 }
 1570 
 1571 void
 1572 vrele(vnode_t *vp)
 1573 {
 1574 
 1575         KASSERT((vp->v_iflag & VI_MARKER) == 0);
 1576 
 1577         if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) {
 1578                 return;
 1579         }
 1580         mutex_enter(&vp->v_interlock);
 1581         vrelel(vp, 0);
 1582 }
 1583 
 1584 static void
 1585 vrele_thread(void *cookie)
 1586 {
 1587         vnode_t *vp;
 1588 
 1589         for (;;) {
 1590                 mutex_enter(&vrele_lock);
 1591                 while (TAILQ_EMPTY(&vrele_list)) {
 1592                         vrele_gen++;
 1593                         cv_broadcast(&vrele_cv);
 1594                         cv_timedwait(&vrele_cv, &vrele_lock, hz);
 1595                 }
 1596                 vp = TAILQ_FIRST(&vrele_list);
 1597                 TAILQ_REMOVE(&vrele_list, vp, v_freelist);
 1598                 vrele_pending--;
 1599                 mutex_exit(&vrele_lock);
 1600 
 1601                 /*
 1602                  * If not the last reference, then ignore the vnode
 1603                  * and look for more work.
 1604                  */
 1605                 mutex_enter(&vp->v_interlock);
 1606                 KASSERT((vp->v_iflag & VI_INACTPEND) != 0);
 1607                 vp->v_iflag &= ~VI_INACTPEND;
 1608                 vrelel(vp, 0);
 1609         }
 1610 }
 1611 
 1612 /*
 1613  * Page or buffer structure gets a reference.
 1614  * Called with v_interlock held.
 1615  */
 1616 void
 1617 vholdl(vnode_t *vp)
 1618 {
 1619 
 1620         KASSERT(mutex_owned(&vp->v_interlock));
 1621         KASSERT((vp->v_iflag & VI_MARKER) == 0);
 1622 
 1623         if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) {
 1624                 mutex_enter(&vnode_free_list_lock);
 1625                 KASSERT(vp->v_freelisthd == &vnode_free_list);
 1626                 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
 1627                 vp->v_freelisthd = &vnode_hold_list;
 1628                 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
 1629                 mutex_exit(&vnode_free_list_lock);
 1630         }
 1631 }
 1632 
 1633 /*
 1634  * Page or buffer structure frees a reference.
 1635  * Called with v_interlock held.
 1636  */
 1637 void
 1638 holdrelel(vnode_t *vp)
 1639 {
 1640 
 1641         KASSERT(mutex_owned(&vp->v_interlock));
 1642         KASSERT((vp->v_iflag & VI_MARKER) == 0);
 1643 
 1644         if (vp->v_holdcnt <= 0) {
 1645                 vpanic(vp, "holdrelel: holdcnt vp %p");
 1646         }
 1647 
 1648         vp->v_holdcnt--;
 1649         if (vp->v_holdcnt == 0 && vp->v_usecount == 0) {
 1650                 mutex_enter(&vnode_free_list_lock);
 1651                 KASSERT(vp->v_freelisthd == &vnode_hold_list);
 1652                 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist);
 1653                 vp->v_freelisthd = &vnode_free_list;
 1654                 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist);
 1655                 mutex_exit(&vnode_free_list_lock);
 1656         }
 1657 }
 1658 
 1659 /*
 1660  * Vnode reference, where a reference is already held by some other
 1661  * object (for example, a file structure).
 1662  */
 1663 void
 1664 vref(vnode_t *vp)
 1665 {
 1666 
 1667         KASSERT((vp->v_iflag & VI_MARKER) == 0);
 1668         KASSERT(vp->v_usecount != 0);
 1669 
 1670         atomic_inc_uint(&vp->v_usecount);
 1671 }
 1672 
 1673 /*
 1674  * Remove any vnodes in the vnode table belonging to mount point mp.
 1675  *
 1676  * If FORCECLOSE is not specified, there should not be any active ones,
 1677  * return error if any are found (nb: this is a user error, not a
 1678  * system error). If FORCECLOSE is specified, detach any active vnodes
 1679  * that are found.
 1680  *
 1681  * If WRITECLOSE is set, only flush out regular file vnodes open for
 1682  * writing.
 1683  *
 1684  * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped.
 1685  */
 1686 #ifdef DEBUG
 1687 int busyprt = 0;        /* print out busy vnodes */
 1688 struct ctldebug debug1 = { "busyprt", &busyprt };
 1689 #endif
 1690 
 1691 static vnode_t *
 1692 vflushnext(vnode_t *mvp, int *when)
 1693 {
 1694 
 1695         if (hardclock_ticks > *when) {
 1696                 mutex_exit(&mntvnode_lock);
 1697                 yield();
 1698                 mutex_enter(&mntvnode_lock);
 1699                 *when = hardclock_ticks + hz / 10;
 1700         }
 1701 
 1702         return vunmark(mvp);
 1703 }
 1704 
 1705 int
 1706 vflush(struct mount *mp, vnode_t *skipvp, int flags)
 1707 {
 1708         vnode_t *vp, *mvp;
 1709         int busy = 0, when = 0, gen;
 1710 
 1711         /*
 1712          * First, flush out any vnode references from vrele_list.
 1713          */
 1714         mutex_enter(&vrele_lock);
 1715         gen = vrele_gen;
 1716         while (vrele_pending && gen == vrele_gen) {
 1717                 cv_broadcast(&vrele_cv);
 1718                 cv_wait(&vrele_cv, &vrele_lock);
 1719         }
 1720         mutex_exit(&vrele_lock);
 1721 
 1722         /* Allocate a marker vnode. */
 1723         if ((mvp = vnalloc(mp)) == NULL)
 1724                 return (ENOMEM);
 1725 
 1726         /*
 1727          * NOTE: not using the TAILQ_FOREACH here since in this loop vgone()
 1728          * and vclean() are called
 1729          */
 1730         mutex_enter(&mntvnode_lock);
 1731         for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL;
 1732             vp = vflushnext(mvp, &when)) {
 1733                 vmark(mvp, vp);
 1734                 if (vp->v_mount != mp || vismarker(vp))
 1735                         continue;
 1736                 /*
 1737                  * Skip over a selected vnode.
 1738                  */
 1739                 if (vp == skipvp)
 1740                         continue;
 1741                 mutex_enter(&vp->v_interlock);
 1742                 /*
 1743                  * Ignore clean but still referenced vnodes.
 1744                  */
 1745                 if ((vp->v_iflag & VI_CLEAN) != 0) {
 1746                         mutex_exit(&vp->v_interlock);
 1747                         continue;
 1748                 }
 1749                 /*
 1750                  * Skip over a vnodes marked VSYSTEM.
 1751                  */
 1752                 if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) {
 1753                         mutex_exit(&vp->v_interlock);
 1754                         continue;
 1755                 }
 1756                 /*
 1757                  * If WRITECLOSE is set, only flush out regular file
 1758                  * vnodes open for writing.
 1759                  */
 1760                 if ((flags & WRITECLOSE) &&
 1761                     (vp->v_writecount == 0 || vp->v_type != VREG)) {
 1762                         mutex_exit(&vp->v_interlock);
 1763                         continue;
 1764                 }
 1765                 /*
 1766                  * With v_usecount == 0, all we need to do is clear
 1767                  * out the vnode data structures and we are done.
 1768                  */
 1769                 if (vp->v_usecount == 0) {
 1770                         mutex_exit(&mntvnode_lock);
 1771                         vremfree(vp);
 1772                         vp->v_usecount = 1;
 1773                         vclean(vp, DOCLOSE);
 1774                         vrelel(vp, 0);
 1775                         mutex_enter(&mntvnode_lock);
 1776                         continue;
 1777                 }
 1778                 /*
 1779                  * If FORCECLOSE is set, forcibly close the vnode.
 1780                  * For block or character devices, revert to an
 1781                  * anonymous device.  For all other files, just
 1782                  * kill them.
 1783                  */
 1784                 if (flags & FORCECLOSE) {
 1785                         mutex_exit(&mntvnode_lock);
 1786                         atomic_inc_uint(&vp->v_usecount);
 1787                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
 1788                                 vclean(vp, DOCLOSE);
 1789                                 vrelel(vp, 0);
 1790                         } else {
 1791                                 vclean(vp, 0);
 1792                                 vp->v_op = spec_vnodeop_p; /* XXXSMP */
 1793                                 mutex_exit(&vp->v_interlock);
 1794                                 /*
 1795                                  * The vnode isn't clean, but still resides
 1796                                  * on the mount list.  Remove it. XXX This
 1797                                  * is a bit dodgy.
 1798                                  */
 1799                                 insmntque(vp, NULL);
 1800                                 vrele(vp);
 1801                         }
 1802                         mutex_enter(&mntvnode_lock);
 1803                         continue;
 1804                 }
 1805 #ifdef DEBUG
 1806                 if (busyprt)
 1807                         vprint("vflush: busy vnode", vp);
 1808 #endif
 1809                 mutex_exit(&vp->v_interlock);
 1810                 busy++;
 1811         }
 1812         mutex_exit(&mntvnode_lock);
 1813         vnfree(mvp);
 1814         if (busy)
 1815                 return (EBUSY);
 1816         return (0);
 1817 }
 1818 
 1819 /*
 1820  * Disassociate the underlying file system from a vnode.
 1821  *
 1822  * Must be called with the interlock held, and will return with it held.
 1823  */
 1824 void
 1825 vclean(vnode_t *vp, int flags)
 1826 {
 1827         lwp_t *l = curlwp;
 1828         bool recycle, active;
 1829         int error;
 1830 
 1831         KASSERT(mutex_owned(&vp->v_interlock));
 1832         KASSERT((vp->v_iflag & VI_MARKER) == 0);
 1833         KASSERT(vp->v_usecount != 0);
 1834 
 1835         /* If cleaning is already in progress wait until done and return. */
 1836         if (vp->v_iflag & VI_XLOCK) {
 1837                 vwait(vp, VI_XLOCK);
 1838                 return;
 1839         }
 1840 
 1841         /* If already clean, nothing to do. */
 1842         if ((vp->v_iflag & VI_CLEAN) != 0) {
 1843                 return;
 1844         }
 1845 
 1846         /*
 1847          * Prevent the vnode from being recycled or brought into use
 1848          * while we clean it out.
 1849          */
 1850         vp->v_iflag |= VI_XLOCK;
 1851         if (vp->v_iflag & VI_EXECMAP) {
 1852                 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages);
 1853                 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages);
 1854         }
 1855         vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP);
 1856         active = (vp->v_usecount > 1);
 1857 
 1858         /* XXXAD should not lock vnode under layer */
 1859         VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK);
 1860 
 1861         /*
 1862          * Clean out any cached data associated with the vnode.
 1863          * If purging an active vnode, it must be closed and
 1864          * deactivated before being reclaimed. Note that the
 1865          * VOP_INACTIVE will unlock the vnode.
 1866          */
 1867         if (flags & DOCLOSE) {
 1868                 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0);
 1869                 if (error != 0) {
 1870                         /* XXX, fix vn_start_write's grab of mp and use that. */
 1871 
 1872                         if (wapbl_vphaswapbl(vp))
 1873                                 WAPBL_DISCARD(wapbl_vptomp(vp));
 1874                         error = vinvalbuf(vp, 0, NOCRED, l, 0, 0);
 1875                 }
 1876                 KASSERT(error == 0);
 1877                 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
 1878                 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) {
 1879                          spec_node_revoke(vp);
 1880                 }
 1881         }
 1882         if (active) {
 1883                 VOP_INACTIVE(vp, &recycle);
 1884         } else {
 1885                 /*
 1886                  * Any other processes trying to obtain this lock must first
 1887                  * wait for VI_XLOCK to clear, then call the new lock operation.
 1888                  */
 1889                 VOP_UNLOCK(vp, 0);
 1890         }
 1891 
 1892         /* Disassociate the underlying file system from the vnode. */
 1893         if (VOP_RECLAIM(vp)) {
 1894                 vpanic(vp, "vclean: cannot reclaim");
 1895         }
 1896 
 1897         KASSERT(vp->v_uobj.uo_npages == 0);
 1898         if (vp->v_type == VREG && vp->v_ractx != NULL) {
 1899                 uvm_ra_freectx(vp->v_ractx);
 1900                 vp->v_ractx = NULL;
 1901         }
 1902         cache_purge(vp);
 1903 
 1904         /* Done with purge, notify sleepers of the grim news. */
 1905         mutex_enter(&vp->v_interlock);
 1906         vp->v_op = dead_vnodeop_p;
 1907         vp->v_tag = VT_NON;
 1908         vp->v_vnlock = &vp->v_lock;
 1909         KNOTE(&vp->v_klist, NOTE_REVOKE);
 1910         vp->v_iflag &= ~(VI_XLOCK | VI_FREEING);
 1911         vp->v_vflag &= ~VV_LOCKSWORK;
 1912         if ((flags & DOCLOSE) != 0) {
 1913                 vp->v_iflag |= VI_CLEAN;
 1914         }
 1915         cv_broadcast(&vp->v_cv);
 1916 
 1917         KASSERT((vp->v_iflag & VI_ONWORKLST) == 0);
 1918 }
 1919 
 1920 /*
 1921  * Recycle an unused vnode to the front of the free list.
 1922  * Release the passed interlock if the vnode will be recycled.
 1923  */
 1924 int
 1925 vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l)
 1926 {
 1927 
 1928         KASSERT((vp->v_iflag & VI_MARKER) == 0);
 1929 
 1930         mutex_enter(&vp->v_interlock);
 1931         if (vp->v_usecount != 0) {
 1932                 mutex_exit(&vp->v_interlock);
 1933                 return (0);
 1934         }
 1935         if (inter_lkp)
 1936                 mutex_exit(inter_lkp);
 1937         vremfree(vp);
 1938         vp->v_usecount = 1;
 1939         vclean(vp, DOCLOSE);
 1940         vrelel(vp, 0);
 1941         return (1);
 1942 }
 1943 
 1944 /*
 1945  * Eliminate all activity associated with a vnode in preparation for
 1946  * reuse.  Drops a reference from the vnode.
 1947  */
 1948 void
 1949 vgone(vnode_t *vp)
 1950 {
 1951 
 1952         mutex_enter(&vp->v_interlock);
 1953         vclean(vp, DOCLOSE);
 1954         vrelel(vp, 0);
 1955 }
 1956 
 1957 /*
 1958  * Lookup a vnode by device number.
 1959  */
 1960 int
 1961 vfinddev(dev_t dev, enum vtype type, vnode_t **vpp)
 1962 {
 1963         vnode_t *vp;
 1964         int rc = 0;
 1965 
 1966         mutex_enter(&specfs_lock);
 1967         for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
 1968                 if (dev != vp->v_rdev || type != vp->v_type)
 1969                         continue;
 1970                 *vpp = vp;
 1971                 rc = 1;
 1972                 break;
 1973         }
 1974         mutex_exit(&specfs_lock);
 1975         return (rc);
 1976 }
 1977 
 1978 /*
 1979  * Revoke all the vnodes corresponding to the specified minor number
 1980  * range (endpoints inclusive) of the specified major.
 1981  */
 1982 void
 1983 vdevgone(int maj, int minl, int minh, enum vtype type)
 1984 {
 1985         vnode_t *vp, **vpp;
 1986         dev_t dev;
 1987         int mn;
 1988 
 1989         vp = NULL;      /* XXX gcc */
 1990 
 1991         mutex_enter(&specfs_lock);
 1992         for (mn = minl; mn <= minh; mn++) {
 1993                 dev = makedev(maj, mn);
 1994                 vpp = &specfs_hash[SPECHASH(dev)];
 1995                 for (vp = *vpp; vp != NULL;) {
 1996                         mutex_enter(&vp->v_interlock);
 1997                         if ((vp->v_iflag & VI_CLEAN) != 0 ||
 1998                             dev != vp->v_rdev || type != vp->v_type) {
 1999                                 mutex_exit(&vp->v_interlock);
 2000                                 vp = vp->v_specnext;
 2001                                 continue;
 2002                         }
 2003                         mutex_exit(&specfs_lock);
 2004                         if (vget(vp, LK_INTERLOCK) == 0) {
 2005                                 VOP_REVOKE(vp, REVOKEALL);
 2006                                 vrele(vp);
 2007                         }
 2008                         mutex_enter(&specfs_lock);
 2009                         vp = *vpp;
 2010                 }
 2011         }
 2012         mutex_exit(&specfs_lock);
 2013 }
 2014 
 2015 /*
 2016  * Calculate the total number of references to a special device.
 2017  */
 2018 int
 2019 vcount(vnode_t *vp)
 2020 {
 2021         int count;
 2022 
 2023         mutex_enter(&specfs_lock);
 2024         mutex_enter(&vp->v_interlock);
 2025         if (vp->v_specnode == NULL) {
 2026                 count = vp->v_usecount - ((vp->v_iflag & VI_INACTPEND) != 0);
 2027                 mutex_exit(&vp->v_interlock);
 2028                 mutex_exit(&specfs_lock);
 2029                 return (count);
 2030         }
 2031         mutex_exit(&vp->v_interlock);
 2032         count = vp->v_specnode->sn_dev->sd_opencnt;
 2033         mutex_exit(&specfs_lock);
 2034         return (count);
 2035 }
 2036 
 2037 /*
 2038  * Eliminate all activity associated with the requested vnode
 2039  * and with all vnodes aliased to the requested vnode.
 2040  */
 2041 void
 2042 vrevoke(vnode_t *vp)
 2043 {
 2044         vnode_t *vq, **vpp;
 2045         enum vtype type;
 2046         dev_t dev;
 2047 
 2048         KASSERT(vp->v_usecount > 0);
 2049 
 2050         mutex_enter(&vp->v_interlock);
 2051         if ((vp->v_iflag & VI_CLEAN) != 0) {
 2052                 mutex_exit(&vp->v_interlock);
 2053                 return;
 2054         } else if (vp->v_type != VBLK && vp->v_type != VCHR) {
 2055                 atomic_inc_uint(&vp->v_usecount);
 2056                 vclean(vp, DOCLOSE);
 2057                 vrelel(vp, 0);
 2058                 return;
 2059         } else {
 2060                 dev = vp->v_rdev;
 2061                 type = vp->v_type;
 2062                 mutex_exit(&vp->v_interlock);
 2063         }
 2064 
 2065         vpp = &specfs_hash[SPECHASH(dev)];
 2066         mutex_enter(&specfs_lock);
 2067         for (vq = *vpp; vq != NULL;) {
 2068                 /* If clean or being cleaned, then ignore it. */
 2069                 mutex_enter(&vq->v_interlock);
 2070                 if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 ||
 2071                     vq->v_rdev != dev || vq->v_type != type) {
 2072                         mutex_exit(&vq->v_interlock);
 2073                         vq = vq->v_specnext;
 2074                         continue;
 2075                 }
 2076                 mutex_exit(&specfs_lock);
 2077                 if (vq->v_usecount == 0) {
 2078                         vremfree(vq);
 2079                         vq->v_usecount = 1;
 2080                 } else {
 2081                         atomic_inc_uint(&vq->v_usecount);
 2082                 }
 2083                 vclean(vq, DOCLOSE);
 2084                 vrelel(vq, 0);
 2085                 mutex_enter(&specfs_lock);
 2086                 vq = *vpp;
 2087         }
 2088         mutex_exit(&specfs_lock);
 2089 }
 2090 
 2091 /*
 2092  * sysctl helper routine to return list of supported fstypes
 2093  */
 2094 static int
 2095 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS)
 2096 {
 2097         char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)];
 2098         char *where = oldp;
 2099         struct vfsops *v;
 2100         size_t needed, left, slen;
 2101         int error, first;
 2102 
 2103         if (newp != NULL)
 2104                 return (EPERM);
 2105         if (namelen != 0)
 2106                 return (EINVAL);
 2107 
 2108         first = 1;
 2109         error = 0;
 2110         needed = 0;
 2111         left = *oldlenp;
 2112 
 2113         sysctl_unlock();
 2114         mutex_enter(&vfs_list_lock);
 2115         LIST_FOREACH(v, &vfs_list, vfs_list) {
 2116                 if (where == NULL)
 2117                         needed += strlen(v->vfs_name) + 1;
 2118                 else {
 2119                         memset(bf, 0, sizeof(bf));
 2120                         if (first) {
 2121                                 strncpy(bf, v->vfs_name, sizeof(bf));
 2122                                 first = 0;
 2123                         } else {
 2124                                 bf[0] = ' ';
 2125                                 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1);
 2126                         }
 2127                         bf[sizeof(bf)-1] = '\0';
 2128                         slen = strlen(bf);
 2129                         if (left < slen + 1)
 2130                                 break;
 2131                         v->vfs_refcount++;
 2132                         mutex_exit(&vfs_list_lock);
 2133                         /* +1 to copy out the trailing NUL byte */
 2134                         error = copyout(bf, where, slen + 1);
 2135                         mutex_enter(&vfs_list_lock);
 2136                         v->vfs_refcount--;
 2137                         if (error)
 2138                                 break;
 2139                         where += slen;
 2140                         needed += slen;
 2141                         left -= slen;
 2142                 }
 2143         }
 2144         mutex_exit(&vfs_list_lock);
 2145         sysctl_relock();
 2146         *oldlenp = needed;
 2147         return (error);
 2148 }
 2149 
 2150 /*
 2151  * Top level filesystem related information gathering.
 2152  */
 2153 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup")
 2154 {
 2155         sysctl_createv(clog, 0, NULL, NULL,
 2156                        CTLFLAG_PERMANENT,
 2157                        CTLTYPE_NODE, "vfs", NULL,
 2158                        NULL, 0, NULL, 0,
 2159                        CTL_VFS, CTL_EOL);
 2160         sysctl_createv(clog, 0, NULL, NULL,
 2161                        CTLFLAG_PERMANENT,
 2162                        CTLTYPE_NODE, "generic",
 2163                        SYSCTL_DESCR("Non-specific vfs related information"),
 2164                        NULL, 0, NULL, 0,
 2165                        CTL_VFS, VFS_GENERIC, CTL_EOL);
 2166         sysctl_createv(clog, 0, NULL, NULL,
 2167                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2168                        CTLTYPE_INT, "usermount",
 2169                        SYSCTL_DESCR("Whether unprivileged users may mount "
 2170                                     "filesystems"),
 2171                        NULL, 0, &dovfsusermount, 0,
 2172                        CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL);
 2173         sysctl_createv(clog, 0, NULL, NULL,
 2174                        CTLFLAG_PERMANENT,
 2175                        CTLTYPE_STRING, "fstypes",
 2176                        SYSCTL_DESCR("List of file systems present"),
 2177                        sysctl_vfs_generic_fstypes, 0, NULL, 0,
 2178                        CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL);
 2179         sysctl_createv(clog, 0, NULL, NULL,
 2180                        CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 2181                        CTLTYPE_INT, "magiclinks",
 2182                        SYSCTL_DESCR("Whether \"magic\" symlinks are expanded"),
 2183                        NULL, 0, &vfs_magiclinks, 0,
 2184                        CTL_VFS, VFS_GENERIC, VFS_MAGICLINKS, CTL_EOL);
 2185 }
 2186 
 2187 
 2188 int kinfo_vdebug = 1;
 2189 int kinfo_vgetfailed;
 2190 #define KINFO_VNODESLOP 10
 2191 /*
 2192  * Dump vnode list (via sysctl).
 2193  * Copyout address of vnode followed by vnode.
 2194  */
 2195 /* ARGSUSED */
 2196 int
 2197 sysctl_kern_vnode(SYSCTLFN_ARGS)
 2198 {
 2199         char *where = oldp;
 2200         size_t *sizep = oldlenp;
 2201         struct mount *mp, *nmp;
 2202         vnode_t *vp, *mvp, vbuf;
 2203         char *bp = where, *savebp;
 2204         char *ewhere;
 2205         int error;
 2206 
 2207         if (namelen != 0)
 2208                 return (EOPNOTSUPP);
 2209         if (newp != NULL)
 2210                 return (EPERM);
 2211 
 2212 #define VPTRSZ  sizeof(vnode_t *)
 2213 #define VNODESZ sizeof(vnode_t)
 2214         if (where == NULL) {
 2215                 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
 2216                 return (0);
 2217         }
 2218         ewhere = where + *sizep;
 2219 
 2220         sysctl_unlock();
 2221         mutex_enter(&mountlist_lock);
 2222         for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
 2223              mp = nmp) {
 2224                 if (vfs_busy(mp, &nmp)) {
 2225                         continue;
 2226                 }
 2227                 savebp = bp;
 2228                 /* Allocate a marker vnode. */
 2229                 if ((mvp = vnalloc(mp)) == NULL) {
 2230                         sysctl_relock();
 2231                         return (ENOMEM);
 2232                 }
 2233                 mutex_enter(&mntvnode_lock);
 2234                 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = vunmark(mvp)) {
 2235                         vmark(mvp, vp);
 2236                         /*
 2237                          * Check that the vp is still associated with
 2238                          * this filesystem.  RACE: could have been
 2239                          * recycled onto the same filesystem.
 2240                          */
 2241                         if (vp->v_mount != mp || vismarker(vp))
 2242                                 continue;
 2243                         if (bp + VPTRSZ + VNODESZ > ewhere) {
 2244                                 (void)vunmark(mvp);
 2245                                 mutex_exit(&mntvnode_lock);
 2246                                 vnfree(mvp);
 2247                                 sysctl_relock();
 2248                                 *sizep = bp - where;
 2249                                 return (ENOMEM);
 2250                         }
 2251                         memcpy(&vbuf, vp, VNODESZ);
 2252                         mutex_exit(&mntvnode_lock);
 2253                         if ((error = copyout(&vp, bp, VPTRSZ)) ||
 2254                            (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) {
 2255                                 mutex_enter(&mntvnode_lock);
 2256                                 (void)vunmark(mvp);
 2257                                 mutex_exit(&mntvnode_lock);
 2258                                 vnfree(mvp);
 2259                                 sysctl_relock();
 2260                                 return (error);
 2261                         }
 2262                         bp += VPTRSZ + VNODESZ;
 2263                         mutex_enter(&mntvnode_lock);
 2264                 }
 2265                 mutex_exit(&mntvnode_lock);
 2266                 vnfree(mvp);
 2267                 vfs_unbusy(mp, false, &nmp);
 2268         }
 2269         mutex_exit(&mountlist_lock);
 2270         sysctl_relock();
 2271 
 2272         *sizep = bp - where;
 2273         return (0);
 2274 }
 2275 
 2276 /*
 2277  * Remove clean vnodes from a mountpoint's vnode list.
 2278  */
 2279 void
 2280 vfs_scrubvnlist(struct mount *mp)
 2281 {
 2282         vnode_t *vp, *nvp;
 2283 
 2284  retry:
 2285         mutex_enter(&mntvnode_lock);
 2286         for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
 2287                 nvp = TAILQ_NEXT(vp, v_mntvnodes);
 2288                 mutex_enter(&vp->v_interlock);
 2289                 if ((vp->v_iflag & VI_CLEAN) != 0) {
 2290                         TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes);
 2291                         vp->v_mount = NULL;
 2292                         mutex_exit(&mntvnode_lock);
 2293                         mutex_exit(&vp->v_interlock);
 2294                         vfs_destroy(mp);
 2295                         goto retry;
 2296                 }
 2297                 mutex_exit(&vp->v_interlock);
 2298         }
 2299         mutex_exit(&mntvnode_lock);
 2300 }
 2301 
 2302 /*
 2303  * Check to see if a filesystem is mounted on a block device.
 2304  */
 2305 int
 2306 vfs_mountedon(vnode_t *vp)
 2307 {
 2308         vnode_t *vq;
 2309         int error = 0;
 2310 
 2311         if (vp->v_type != VBLK)
 2312                 return ENOTBLK;
 2313         if (vp->v_specmountpoint != NULL)
 2314                 return (EBUSY);
 2315         mutex_enter(&specfs_lock);
 2316         for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL;
 2317             vq = vq->v_specnext) {
 2318                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
 2319                         continue;
 2320                 if (vq->v_specmountpoint != NULL) {
 2321                         error = EBUSY;
 2322                         break;
 2323                 }
 2324         }
 2325         mutex_exit(&specfs_lock);
 2326         return (error);
 2327 }
 2328 
 2329 /*
 2330  * Unmount all file systems.
 2331  * We traverse the list in reverse order under the assumption that doing so
 2332  * will avoid needing to worry about dependencies.
 2333  */
 2334 void
 2335 vfs_unmountall(struct lwp *l)
 2336 {
 2337         struct mount *mp, *nmp;
 2338         int allerror, error;
 2339 
 2340         printf("unmounting file systems...");
 2341         for (allerror = 0, mp = CIRCLEQ_LAST(&mountlist);
 2342              !CIRCLEQ_EMPTY(&mountlist);
 2343              mp = nmp) {
 2344                 nmp = CIRCLEQ_PREV(mp, mnt_list);
 2345 #ifdef DEBUG
 2346                 printf("\nunmounting %s (%s)...",
 2347                     mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
 2348 #endif
 2349                 atomic_inc_uint(&mp->mnt_refcnt);
 2350                 if ((error = dounmount(mp, MNT_FORCE, l)) != 0) {
 2351                         printf("unmount of %s failed with error %d\n",
 2352                             mp->mnt_stat.f_mntonname, error);
 2353                         allerror = 1;
 2354                 }
 2355         }
 2356         printf(" done\n");
 2357         if (allerror)
 2358                 printf("WARNING: some file systems would not unmount\n");
 2359 }
 2360 
 2361 /*
 2362  * Sync and unmount file systems before shutting down.
 2363  */
 2364 void
 2365 vfs_shutdown(void)
 2366 {
 2367         struct lwp *l;
 2368 
 2369         /* XXX we're certainly not running in lwp0's context! */
 2370         l = curlwp;
 2371         if (l == NULL)
 2372                 l = &lwp0;
 2373 
 2374         printf("syncing disks... ");
 2375 
 2376         /* remove user processes from run queue */
 2377         suspendsched();
 2378         (void) spl0();
 2379 
 2380         /* avoid coming back this way again if we panic. */
 2381         doing_shutdown = 1;
 2382 
 2383         sys_sync(l, NULL, NULL);
 2384 
 2385         /* Wait for sync to finish. */
 2386         if (buf_syncwait() != 0) {
 2387 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
 2388                 Debugger();
 2389 #endif
 2390                 printf("giving up\n");
 2391                 return;
 2392         } else
 2393                 printf("done\n");
 2394 
 2395         /*
 2396          * If we've panic'd, don't make the situation potentially
 2397          * worse by unmounting the file systems.
 2398          */
 2399         if (panicstr != NULL)
 2400                 return;
 2401 
 2402         /* Release inodes held by texts before update. */
 2403 #ifdef notdef
 2404         vnshutdown();
 2405 #endif
 2406         /* Unmount file systems. */
 2407         vfs_unmountall(l);
 2408 }
 2409 
 2410 /*
 2411  * Mount the root file system.  If the operator didn't specify a
 2412  * file system to use, try all possible file systems until one
 2413  * succeeds.
 2414  */
 2415 int
 2416 vfs_mountroot(void)
 2417 {
 2418         struct vfsops *v;
 2419         int error = ENODEV;
 2420 
 2421         if (root_device == NULL)
 2422                 panic("vfs_mountroot: root device unknown");
 2423 
 2424         switch (device_class(root_device)) {
 2425         case DV_IFNET:
 2426                 if (rootdev != NODEV)
 2427                         panic("vfs_mountroot: rootdev set for DV_IFNET "
 2428                             "(0x%08x -> %d,%d)", rootdev,
 2429                             major(rootdev), minor(rootdev));
 2430                 break;
 2431 
 2432         case DV_DISK:
 2433                 if (rootdev == NODEV)
 2434                         panic("vfs_mountroot: rootdev not set for DV_DISK");
 2435                 if (bdevvp(rootdev, &rootvp))
 2436                         panic("vfs_mountroot: can't get vnode for rootdev");
 2437                 error = VOP_OPEN(rootvp, FREAD, FSCRED);
 2438                 if (error) {
 2439                         printf("vfs_mountroot: can't open root device\n");
 2440                         return (error);
 2441                 }
 2442                 break;
 2443 
 2444         default:
 2445                 printf("%s: inappropriate for root file system\n",
 2446                     device_xname(root_device));
 2447                 return (ENODEV);
 2448         }
 2449 
 2450         /*
 2451          * If user specified a file system, use it.
 2452          */
 2453         if (mountroot != NULL) {
 2454                 error = (*mountroot)();
 2455                 goto done;
 2456         }
 2457 
 2458         /*
 2459          * Try each file system currently configured into the kernel.
 2460          */
 2461         mutex_enter(&vfs_list_lock);
 2462         LIST_FOREACH(v, &vfs_list, vfs_list) {
 2463                 if (v->vfs_mountroot == NULL)
 2464                         continue;
 2465 #ifdef DEBUG
 2466                 aprint_normal("mountroot: trying %s...\n", v->vfs_name);
 2467 #endif
 2468                 v->vfs_refcount++;
 2469                 mutex_exit(&vfs_list_lock);
 2470                 error = (*v->vfs_mountroot)();
 2471                 mutex_enter(&vfs_list_lock);
 2472                 v->vfs_refcount--;
 2473                 if (!error) {
 2474                         aprint_normal("root file system type: %s\n",
 2475                             v->vfs_name);
 2476                         break;
 2477                 }
 2478         }
 2479         mutex_exit(&vfs_list_lock);
 2480 
 2481         if (v == NULL) {
 2482                 printf("no file system for %s", device_xname(root_device));
 2483                 if (device_class(root_device) == DV_DISK)
 2484                         printf(" (dev 0x%x)", rootdev);
 2485                 printf("\n");
 2486                 error = EFTYPE;
 2487         }
 2488 
 2489 done:
 2490         if (error && device_class(root_device) == DV_DISK) {
 2491                 VOP_CLOSE(rootvp, FREAD, FSCRED);
 2492                 vrele(rootvp);
 2493         }
 2494         return (error);
 2495 }
 2496 
 2497 /*
 2498  * Get a new unique fsid
 2499  */
 2500 void
 2501 vfs_getnewfsid(struct mount *mp)
 2502 {
 2503         static u_short xxxfs_mntid;
 2504         fsid_t tfsid;
 2505         int mtype;
 2506 
 2507         mutex_enter(&mntid_lock);
 2508         mtype = makefstype(mp->mnt_op->vfs_name);
 2509         mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
 2510         mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
 2511         mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
 2512         if (xxxfs_mntid == 0)
 2513                 ++xxxfs_mntid;
 2514         tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
 2515         tfsid.__fsid_val[1] = mtype;
 2516         if (!CIRCLEQ_EMPTY(&mountlist)) {
 2517                 while (vfs_getvfs(&tfsid)) {
 2518                         tfsid.__fsid_val[0]++;
 2519                         xxxfs_mntid++;
 2520                 }
 2521         }
 2522         mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
 2523         mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
 2524         mutex_exit(&mntid_lock);
 2525 }
 2526 
 2527 /*
 2528  * Make a 'unique' number from a mount type name.
 2529  */
 2530 long
 2531 makefstype(const char *type)
 2532 {
 2533         long rv;
 2534 
 2535         for (rv = 0; *type; type++) {
 2536                 rv <<= 2;
 2537                 rv ^= *type;
 2538         }
 2539         return rv;
 2540 }
 2541 
 2542 /*
 2543  * Set vnode attributes to VNOVAL
 2544  */
 2545 void
 2546 vattr_null(struct vattr *vap)
 2547 {
 2548 
 2549         memset(vap, 0, sizeof(*vap));
 2550 
 2551         vap->va_type = VNON;
 2552 
 2553         /*
 2554          * Assign individually so that it is safe even if size and
 2555          * sign of each member are varied.
 2556          */
 2557         vap->va_mode = VNOVAL;
 2558         vap->va_nlink = VNOVAL;
 2559         vap->va_uid = VNOVAL;
 2560         vap->va_gid = VNOVAL;
 2561         vap->va_fsid = VNOVAL;
 2562         vap->va_fileid = VNOVAL;
 2563         vap->va_size = VNOVAL;
 2564         vap->va_blocksize = VNOVAL;
 2565         vap->va_atime.tv_sec =
 2566             vap->va_mtime.tv_sec =
 2567             vap->va_ctime.tv_sec =
 2568             vap->va_birthtime.tv_sec = VNOVAL;
 2569         vap->va_atime.tv_nsec =
 2570             vap->va_mtime.tv_nsec =
 2571             vap->va_ctime.tv_nsec =
 2572             vap->va_birthtime.tv_nsec = VNOVAL;
 2573         vap->va_gen = VNOVAL;
 2574         vap->va_flags = VNOVAL;
 2575         vap->va_rdev = VNOVAL;
 2576         vap->va_bytes = VNOVAL;
 2577 }
 2578 
 2579 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
 2580 #define ARRAY_PRINT(idx, arr) \
 2581     ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN")
 2582 
 2583 const char * const vnode_tags[] = { VNODE_TAGS };
 2584 const char * const vnode_types[] = { VNODE_TYPES };
 2585 const char vnode_flagbits[] = VNODE_FLAGBITS;
 2586 
 2587 /*
 2588  * Print out a description of a vnode.
 2589  */
 2590 void
 2591 vprint(const char *label, struct vnode *vp)
 2592 {
 2593         struct vnlock *vl;
 2594         char bf[96];
 2595         int flag;
 2596 
 2597         vl = (vp->v_vnlock != NULL ? vp->v_vnlock : &vp->v_lock);
 2598         flag = vp->v_iflag | vp->v_vflag | vp->v_uflag;
 2599         bitmask_snprintf(flag, vnode_flagbits, bf, sizeof(bf));
 2600 
 2601         if (label != NULL)
 2602                 printf("%s: ", label);
 2603         printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), "
 2604             "usecount %d, writecount %d, holdcount %d\n"
 2605             "\tfreelisthd %p, mount %p, data %p lock %p recursecnt %d\n",
 2606             vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
 2607             ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
 2608             vp->v_usecount, vp->v_writecount, vp->v_holdcnt,
 2609             vp->v_freelisthd, vp->v_mount, vp->v_data, vl, vl->vl_recursecnt);
 2610         if (vp->v_data != NULL) {
 2611                 printf("\t");
 2612                 VOP_PRINT(vp);
 2613         }
 2614 }
 2615 
 2616 #ifdef DEBUG
 2617 /*
 2618  * List all of the locked vnodes in the system.
 2619  * Called when debugging the kernel.
 2620  */
 2621 void
 2622 printlockedvnodes(void)
 2623 {
 2624         struct mount *mp, *nmp;
 2625         struct vnode *vp;
 2626 
 2627         printf("Locked vnodes\n");
 2628         mutex_enter(&mountlist_lock);
 2629         for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
 2630              mp = nmp) {
 2631                 if (vfs_busy(mp, &nmp)) {
 2632                         continue;
 2633                 }
 2634                 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 2635                         if (VOP_ISLOCKED(vp))
 2636                                 vprint(NULL, vp);
 2637                 }
 2638                 mutex_enter(&mountlist_lock);
 2639                 vfs_unbusy(mp, false, &nmp);
 2640         }
 2641         mutex_exit(&mountlist_lock);
 2642 }
 2643 #endif
 2644 
 2645 /*
 2646  * Do the usual access checking.
 2647  * file_mode, uid and gid are from the vnode in question,
 2648  * while acc_mode and cred are from the VOP_ACCESS parameter list
 2649  */
 2650 int
 2651 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
 2652     mode_t acc_mode, kauth_cred_t cred)
 2653 {
 2654         mode_t mask;
 2655         int error, ismember;
 2656 
 2657         /*
 2658          * Super-user always gets read/write access, but execute access depends
 2659          * on at least one execute bit being set.
 2660          */
 2661         if (kauth_authorize_generic(cred, KAUTH_GENERIC_ISSUSER, NULL) == 0) {
 2662                 if ((acc_mode & VEXEC) && type != VDIR &&
 2663                     (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
 2664                         return (EACCES);
 2665                 return (0);
 2666         }
 2667 
 2668         mask = 0;
 2669 
 2670         /* Otherwise, check the owner. */
 2671         if (kauth_cred_geteuid(cred) == uid) {
 2672                 if (acc_mode & VEXEC)
 2673                         mask |= S_IXUSR;
 2674                 if (acc_mode & VREAD)
 2675                         mask |= S_IRUSR;
 2676                 if (acc_mode & VWRITE)
 2677                         mask |= S_IWUSR;
 2678                 return ((file_mode & mask) == mask ? 0 : EACCES);
 2679         }
 2680 
 2681         /* Otherwise, check the groups. */
 2682         error = kauth_cred_ismember_gid(cred, gid, &ismember);
 2683         if (error)
 2684                 return (error);
 2685         if (kauth_cred_getegid(cred) == gid || ismember) {
 2686                 if (acc_mode & VEXEC)
 2687                         mask |= S_IXGRP;
 2688                 if (acc_mode & VREAD)
 2689                         mask |= S_IRGRP;
 2690                 if (acc_mode & VWRITE)
 2691                         mask |= S_IWGRP;
 2692                 return ((file_mode & mask) == mask ? 0 : EACCES);
 2693         }
 2694 
 2695         /* Otherwise, check everyone else. */
 2696         if (acc_mode & VEXEC)
 2697                 mask |= S_IXOTH;
 2698         if (acc_mode & VREAD)
 2699                 mask |= S_IROTH;
 2700         if (acc_mode & VWRITE)
 2701                 mask |= S_IWOTH;
 2702         return ((file_mode & mask) == mask ? 0 : EACCES);
 2703 }
 2704 
 2705 /*
 2706  * Given a file system name, look up the vfsops for that
 2707  * file system, or return NULL if file system isn't present
 2708  * in the kernel.
 2709  */
 2710 struct vfsops *
 2711 vfs_getopsbyname(const char *name)
 2712 {
 2713         struct vfsops *v;
 2714 
 2715         mutex_enter(&vfs_list_lock);
 2716         LIST_FOREACH(v, &vfs_list, vfs_list) {
 2717                 if (strcmp(v->vfs_name, name) == 0)
 2718                         break;
 2719         }
 2720         if (v != NULL)
 2721                 v->vfs_refcount++;
 2722         mutex_exit(&vfs_list_lock);
 2723 
 2724         return (v);
 2725 }
 2726 
 2727 void
 2728 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp)
 2729 {
 2730         const struct statvfs *mbp;
 2731 
 2732         if (sbp == (mbp = &mp->mnt_stat))
 2733                 return;
 2734 
 2735         (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx));
 2736         sbp->f_fsid = mbp->f_fsid;
 2737         sbp->f_owner = mbp->f_owner;
 2738         sbp->f_flag = mbp->f_flag;
 2739         sbp->f_syncwrites = mbp->f_syncwrites;
 2740         sbp->f_asyncwrites = mbp->f_asyncwrites;
 2741         sbp->f_syncreads = mbp->f_syncreads;
 2742         sbp->f_asyncreads = mbp->f_asyncreads;
 2743         (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare));
 2744         (void)memcpy(sbp->f_fstypename, mbp->f_fstypename,
 2745             sizeof(sbp->f_fstypename));
 2746         (void)memcpy(sbp->f_mntonname, mbp->f_mntonname,
 2747             sizeof(sbp->f_mntonname));
 2748         (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname,
 2749             sizeof(sbp->f_mntfromname));
 2750         sbp->f_namemax = mbp->f_namemax;
 2751 }
 2752 
 2753 int
 2754 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom,
 2755     const char *vfsname, struct mount *mp, struct lwp *l)
 2756 {
 2757         int error;
 2758         size_t size;
 2759         struct statvfs *sfs = &mp->mnt_stat;
 2760         int (*fun)(const void *, void *, size_t, size_t *);
 2761 
 2762         (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname,
 2763             sizeof(mp->mnt_stat.f_fstypename));
 2764 
 2765         if (onp) {
 2766                 struct cwdinfo *cwdi = l->l_proc->p_cwdi;
 2767                 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr;
 2768                 if (cwdi->cwdi_rdir != NULL) {
 2769                         size_t len;
 2770                         char *bp;
 2771                         char *path = PNBUF_GET();
 2772 
 2773                         bp = path + MAXPATHLEN;
 2774                         *--bp = '\0';
 2775                         rw_enter(&cwdi->cwdi_lock, RW_READER);
 2776                         error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp,
 2777                             path, MAXPATHLEN / 2, 0, l);
 2778                         rw_exit(&cwdi->cwdi_lock);
 2779                         if (error) {
 2780                                 PNBUF_PUT(path);
 2781                                 return error;
 2782                         }
 2783 
 2784                         len = strlen(bp);
 2785                         if (len > sizeof(sfs->f_mntonname) - 1)
 2786                                 len = sizeof(sfs->f_mntonname) - 1;
 2787                         (void)strncpy(sfs->f_mntonname, bp, len);
 2788                         PNBUF_PUT(path);
 2789 
 2790                         if (len < sizeof(sfs->f_mntonname) - 1) {
 2791                                 error = (*fun)(onp, &sfs->f_mntonname[len],
 2792                                     sizeof(sfs->f_mntonname) - len - 1, &size);
 2793                                 if (error)
 2794                                         return error;
 2795                                 size += len;
 2796                         } else {
 2797                                 size = len;
 2798                         }
 2799                 } else {
 2800                         error = (*fun)(onp, &sfs->f_mntonname,
 2801                             sizeof(sfs->f_mntonname) - 1, &size);
 2802                         if (error)
 2803                                 return error;
 2804                 }
 2805                 (void)memset(sfs->f_mntonname + size, 0,
 2806                     sizeof(sfs->f_mntonname) - size);
 2807         }
 2808 
 2809         if (fromp) {
 2810                 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr;
 2811                 error = (*fun)(fromp, sfs->f_mntfromname,
 2812                     sizeof(sfs->f_mntfromname) - 1, &size);
 2813                 if (error)
 2814                         return error;
 2815                 (void)memset(sfs->f_mntfromname + size, 0,
 2816                     sizeof(sfs->f_mntfromname) - size);
 2817         }
 2818         return 0;
 2819 }
 2820 
 2821 void
 2822 vfs_timestamp(struct timespec *ts)
 2823 {
 2824 
 2825         nanotime(ts);
 2826 }
 2827 
 2828 time_t  rootfstime;                     /* recorded root fs time, if known */
 2829 void
 2830 setrootfstime(time_t t)
 2831 {
 2832         rootfstime = t;
 2833 }
 2834 
 2835 /*
 2836  * Sham lock manager for vnodes.  This is a temporary measure.
 2837  */
 2838 int
 2839 vlockmgr(struct vnlock *vl, int flags)
 2840 {
 2841 
 2842         KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0);
 2843 
 2844         switch (flags & LK_TYPE_MASK) {
 2845         case LK_SHARED:
 2846                 if (rw_tryenter(&vl->vl_lock, RW_READER)) {
 2847                         return 0;
 2848                 }
 2849                 if ((flags & LK_NOWAIT) != 0) {
 2850                         return EBUSY;
 2851                 }
 2852                 rw_enter(&vl->vl_lock, RW_READER);
 2853                 return 0;
 2854 
 2855         case LK_EXCLUSIVE:
 2856                 if (rw_tryenter(&vl->vl_lock, RW_WRITER)) {
 2857                         return 0;
 2858                 }
 2859                 if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) &&
 2860                     rw_write_held(&vl->vl_lock)) {
 2861                         vl->vl_recursecnt++;
 2862                         return 0;
 2863                 }
 2864                 if ((flags & LK_NOWAIT) != 0) {
 2865                         return EBUSY;
 2866                 }
 2867                 rw_enter(&vl->vl_lock, RW_WRITER);
 2868                 return 0;
 2869 
 2870         case LK_RELEASE:
 2871                 if (vl->vl_recursecnt != 0) {
 2872                         KASSERT(rw_write_held(&vl->vl_lock));
 2873                         vl->vl_recursecnt--;
 2874                         return 0;
 2875                 }
 2876                 rw_exit(&vl->vl_lock);
 2877                 return 0;
 2878 
 2879         default:
 2880                 panic("vlockmgr: flags %x", flags);
 2881         }
 2882 }
 2883 
 2884 int
 2885 vlockstatus(struct vnlock *vl)
 2886 {
 2887 
 2888         if (rw_write_held(&vl->vl_lock)) {
 2889                 return LK_EXCLUSIVE;
 2890         }
 2891         if (rw_read_held(&vl->vl_lock)) {
 2892                 return LK_SHARED;
 2893         }
 2894         return 0;
 2895 }
 2896 
 2897 /*
 2898  * mount_specific_key_create --
 2899  *      Create a key for subsystem mount-specific data.
 2900  */
 2901 int
 2902 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
 2903 {
 2904 
 2905         return (specificdata_key_create(mount_specificdata_domain, keyp, dtor));
 2906 }
 2907 
 2908 /*
 2909  * mount_specific_key_delete --
 2910  *      Delete a key for subsystem mount-specific data.
 2911  */
 2912 void
 2913 mount_specific_key_delete(specificdata_key_t key)
 2914 {
 2915 
 2916         specificdata_key_delete(mount_specificdata_domain, key);
 2917 }
 2918 
 2919 /*
 2920  * mount_initspecific --
 2921  *      Initialize a mount's specificdata container.
 2922  */
 2923 void
 2924 mount_initspecific(struct mount *mp)
 2925 {
 2926         int error;
 2927 
 2928         error = specificdata_init(mount_specificdata_domain,
 2929                                   &mp->mnt_specdataref);
 2930         KASSERT(error == 0);
 2931 }
 2932 
 2933 /*
 2934  * mount_finispecific --
 2935  *      Finalize a mount's specificdata container.
 2936  */
 2937 void
 2938 mount_finispecific(struct mount *mp)
 2939 {
 2940 
 2941         specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
 2942 }
 2943 
 2944 /*
 2945  * mount_getspecific --
 2946  *      Return mount-specific data corresponding to the specified key.
 2947  */
 2948 void *
 2949 mount_getspecific(struct mount *mp, specificdata_key_t key)
 2950 {
 2951 
 2952         return (specificdata_getspecific(mount_specificdata_domain,
 2953                                          &mp->mnt_specdataref, key));
 2954 }
 2955 
 2956 /*
 2957  * mount_setspecific --
 2958  *      Set mount-specific data corresponding to the specified key.
 2959  */
 2960 void
 2961 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
 2962 {
 2963 
 2964         specificdata_setspecific(mount_specificdata_domain,
 2965                                  &mp->mnt_specdataref, key, data);
 2966 }
 2967 
 2968 int
 2969 VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c)
 2970 {
 2971         int error;
 2972 
 2973         KERNEL_LOCK(1, NULL);
 2974         error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c);
 2975         KERNEL_UNLOCK_ONE(NULL);
 2976 
 2977         return error;
 2978 }
 2979         
 2980 int
 2981 VFS_START(struct mount *mp, int a)
 2982 {
 2983         int error;
 2984 
 2985         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 2986                 KERNEL_LOCK(1, NULL);
 2987         }
 2988         error = (*(mp->mnt_op->vfs_start))(mp, a);
 2989         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 2990                 KERNEL_UNLOCK_ONE(NULL);
 2991         }
 2992 
 2993         return error;
 2994 }
 2995         
 2996 int
 2997 VFS_UNMOUNT(struct mount *mp, int a)
 2998 {
 2999         int error;
 3000 
 3001         KERNEL_LOCK(1, NULL);
 3002         error = (*(mp->mnt_op->vfs_unmount))(mp, a);
 3003         KERNEL_UNLOCK_ONE(NULL);
 3004 
 3005         return error;
 3006 }
 3007 
 3008 int
 3009 VFS_ROOT(struct mount *mp, struct vnode **a)
 3010 {
 3011         int error;
 3012 
 3013         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3014                 KERNEL_LOCK(1, NULL);
 3015         }
 3016         error = (*(mp->mnt_op->vfs_root))(mp, a);
 3017         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3018                 KERNEL_UNLOCK_ONE(NULL);
 3019         }
 3020 
 3021         return error;
 3022 }
 3023 
 3024 int
 3025 VFS_QUOTACTL(struct mount *mp, int a, uid_t b, void *c)
 3026 {
 3027         int error;
 3028 
 3029         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3030                 KERNEL_LOCK(1, NULL);
 3031         }
 3032         error = (*(mp->mnt_op->vfs_quotactl))(mp, a, b, c);
 3033         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3034                 KERNEL_UNLOCK_ONE(NULL);
 3035         }
 3036 
 3037         return error;
 3038 }
 3039 
 3040 int
 3041 VFS_STATVFS(struct mount *mp, struct statvfs *a)
 3042 {
 3043         int error;
 3044 
 3045         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3046                 KERNEL_LOCK(1, NULL);
 3047         }
 3048         error = (*(mp->mnt_op->vfs_statvfs))(mp, a);
 3049         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3050                 KERNEL_UNLOCK_ONE(NULL);
 3051         }
 3052 
 3053         return error;
 3054 }
 3055 
 3056 int
 3057 VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b)
 3058 {
 3059         int error;
 3060 
 3061         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3062                 KERNEL_LOCK(1, NULL);
 3063         }
 3064         error = (*(mp->mnt_op->vfs_sync))(mp, a, b);
 3065         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3066                 KERNEL_UNLOCK_ONE(NULL);
 3067         }
 3068 
 3069         return error;
 3070 }
 3071 
 3072 int
 3073 VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b)
 3074 {
 3075         int error;
 3076 
 3077         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3078                 KERNEL_LOCK(1, NULL);
 3079         }
 3080         error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b);
 3081         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3082                 KERNEL_UNLOCK_ONE(NULL);
 3083         }
 3084 
 3085         return error;
 3086 }
 3087 
 3088 int
 3089 VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b)
 3090 {
 3091         int error;
 3092 
 3093         if ((vp->v_vflag & VV_MPSAFE) == 0) {
 3094                 KERNEL_LOCK(1, NULL);
 3095         }
 3096         error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b);
 3097         if ((vp->v_vflag & VV_MPSAFE) == 0) {
 3098                 KERNEL_UNLOCK_ONE(NULL);
 3099         }
 3100 
 3101         return error;
 3102 }
 3103 
 3104 int
 3105 VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b)
 3106 {
 3107         int error;
 3108 
 3109         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3110                 KERNEL_LOCK(1, NULL);
 3111         }
 3112         error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b);
 3113         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3114                 KERNEL_UNLOCK_ONE(NULL);
 3115         }
 3116 
 3117         return error;
 3118 }
 3119 
 3120 int
 3121 VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d)
 3122 {
 3123         int error;
 3124 
 3125         KERNEL_LOCK(1, NULL);           /* XXXSMP check ffs */
 3126         error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d);
 3127         KERNEL_UNLOCK_ONE(NULL);        /* XXX */
 3128 
 3129         return error;
 3130 }
 3131 
 3132 int
 3133 VFS_SUSPENDCTL(struct mount *mp, int a)
 3134 {
 3135         int error;
 3136 
 3137         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3138                 KERNEL_LOCK(1, NULL);
 3139         }
 3140         error = (*(mp->mnt_op->vfs_suspendctl))(mp, a);
 3141         if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) {
 3142                 KERNEL_UNLOCK_ONE(NULL);
 3143         }
 3144 
 3145         return error;
 3146 }
 3147 
 3148 #ifdef DDB
 3149 static const char buf_flagbits[] = BUF_FLAGBITS;
 3150 
 3151 void
 3152 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...))
 3153 {
 3154         char bf[1024];
 3155 
 3156         (*pr)("  vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%"
 3157             PRIx64 " dev 0x%x\n",
 3158             bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev);
 3159 
 3160         bitmask_snprintf(bp->b_flags | bp->b_oflags | bp->b_cflags,
 3161             buf_flagbits, bf, sizeof(bf));
 3162         (*pr)("  error %d flags 0x%s\n", bp->b_error, bf);
 3163 
 3164         (*pr)("  bufsize 0x%lx bcount 0x%lx resid 0x%lx\n",
 3165                   bp->b_bufsize, bp->b_bcount, bp->b_resid);
 3166         (*pr)("  data %p saveaddr %p dep %p\n",
 3167                   bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep));
 3168         (*pr)("  iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock);
 3169 }
 3170 
 3171 
 3172 void
 3173 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...))
 3174 {
 3175         char bf[256];
 3176 
 3177         uvm_object_printit(&vp->v_uobj, full, pr);
 3178         bitmask_snprintf(vp->v_iflag | vp->v_vflag | vp->v_uflag,
 3179             vnode_flagbits, bf, sizeof(bf));
 3180         (*pr)("\nVNODE flags %s\n", bf);
 3181         (*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n",
 3182               vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize);
 3183 
 3184         (*pr)("data %p writecount %ld holdcnt %ld\n",
 3185               vp->v_data, vp->v_writecount, vp->v_holdcnt);
 3186 
 3187         (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n",
 3188               ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag,
 3189               ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type,
 3190               vp->v_mount, vp->v_mountedhere);
 3191 
 3192         (*pr)("v_lock %p v_vnlock %p\n", &vp->v_lock, vp->v_vnlock);
 3193 
 3194         if (full) {
 3195                 struct buf *bp;
 3196 
 3197                 (*pr)("clean bufs:\n");
 3198                 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) {
 3199                         (*pr)(" bp %p\n", bp);
 3200                         vfs_buf_print(bp, full, pr);
 3201                 }
 3202 
 3203                 (*pr)("dirty bufs:\n");
 3204                 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) {
 3205                         (*pr)(" bp %p\n", bp);
 3206                         vfs_buf_print(bp, full, pr);
 3207                 }
 3208         }
 3209 }
 3210 
 3211 void
 3212 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...))
 3213 {
 3214         char sbuf[256];
 3215 
 3216         (*pr)("vnodecovered = %p syncer = %p data = %p\n",
 3217                         mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data);
 3218 
 3219         (*pr)("fs_bshift %d dev_bshift = %d\n",
 3220                         mp->mnt_fs_bshift,mp->mnt_dev_bshift);
 3221 
 3222         bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf));
 3223         (*pr)("flag = %s\n", sbuf);
 3224 
 3225         bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf));
 3226         (*pr)("iflag = %s\n", sbuf);
 3227 
 3228         (*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt,
 3229             &mp->mnt_unmounting, &mp->mnt_updating);
 3230 
 3231         (*pr)("statvfs cache:\n");
 3232         (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize);
 3233         (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize);
 3234         (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize);
 3235 
 3236         (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks);
 3237         (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree);
 3238         (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail);
 3239         (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd);
 3240 
 3241         (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files);
 3242         (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree);
 3243         (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail);
 3244         (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd);
 3245 
 3246         (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n",
 3247                         mp->mnt_stat.f_fsidx.__fsid_val[0],
 3248                         mp->mnt_stat.f_fsidx.__fsid_val[1]);
 3249 
 3250         (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner);
 3251         (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax);
 3252 
 3253         bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf,
 3254             sizeof(sbuf));
 3255         (*pr)("\tflag = %s\n",sbuf);
 3256         (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites);
 3257         (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites);
 3258         (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads);
 3259         (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads);
 3260         (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename);
 3261         (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname);
 3262         (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname);
 3263 
 3264         {
 3265                 int cnt = 0;
 3266                 struct vnode *vp;
 3267                 (*pr)("locked vnodes =");
 3268                 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 3269                         if (VOP_ISLOCKED(vp)) {
 3270                                 if ((++cnt % 6) == 0) {
 3271                                         (*pr)(" %p,\n\t", vp);
 3272                                 } else {
 3273                                         (*pr)(" %p,", vp);
 3274                                 }
 3275                         }
 3276                 }
 3277                 (*pr)("\n");
 3278         }
 3279 
 3280         if (full) {
 3281                 int cnt = 0;
 3282                 struct vnode *vp;
 3283                 (*pr)("all vnodes =");
 3284                 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 3285                         if (!TAILQ_NEXT(vp, v_mntvnodes)) {
 3286                                 (*pr)(" %p", vp);
 3287                         } else if ((++cnt % 6) == 0) {
 3288                                 (*pr)(" %p,\n\t", vp);
 3289                         } else {
 3290                                 (*pr)(" %p,", vp);
 3291                         }
 3292                 }
 3293                 (*pr)("\n", vp);
 3294         }
 3295 }
 3296 #endif /* DDB */

Cache object: 4734dc1502f7d99b0dfedff76a906d19


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.