vfs_subr.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)vfs_subr.c  8.31 (Berkeley) 5/26/95
   39  * $FreeBSD$
   40  */
   41 
   42 /*
   43  * External virtual filesystem routines
   44  */
   45 #include "opt_ddb.h"
   46 
   47 #include <sys/param.h>
   48 #include <sys/systm.h>
   49 #include <sys/conf.h>
   50 #include <sys/fcntl.h>
   51 #include <sys/kernel.h>
   52 #include <sys/proc.h>
   53 #include <sys/malloc.h>
   54 #include <sys/mount.h>
   55 #include <sys/socket.h>
   56 #include <sys/vnode.h>
   57 #include <sys/stat.h>
   58 #include <sys/buf.h>
   59 #include <sys/domain.h>
   60 #include <sys/dirent.h>
   61 #include <sys/vmmeter.h>
   62 
   63 #include <machine/limits.h>
   64 
   65 #include <vm/vm.h>
   66 #include <vm/vm_object.h>
   67 #include <vm/vm_extern.h>
   68 #include <vm/pmap.h>
   69 #include <vm/vm_map.h>
   70 #include <vm/vm_pager.h>
   71 #include <vm/vnode_pager.h>
   72 #include <vm/vm_zone.h>
   73 #include <sys/sysctl.h>
   74 
   75 #include <miscfs/specfs/specdev.h>
   76 
   77 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
   78 
   79 static void     insmntque __P((struct vnode *vp, struct mount *mp));
   80 static void     vclean __P((struct vnode *vp, int flags, struct proc *p));
   81 static void     vfree __P((struct vnode *));
   82 static void     vgonel __P((struct vnode *vp, struct proc *p));
   83 static unsigned long    numvnodes;
   84 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
   85 
   86 enum vtype iftovt_tab[16] = {
   87         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
   88         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
   89 };
   90 int vttoif_tab[9] = {
   91         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
   92         S_IFSOCK, S_IFIFO, S_IFMT,
   93 };
   94 
   95 static TAILQ_HEAD(freelst, vnode) vnode_free_list;      /* vnode free list */
   96 struct tobefreelist vnode_tobefree_list;        /* vnode free list */
   97 
   98 static u_long wantfreevnodes = 25;
   99 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
  100 static u_long freevnodes = 0;
  101 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
  102 
  103 int vfs_ioopt = 0;
  104 #ifdef ENABLE_VFS_IOOPT
  105 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
  106 #endif
  107 
  108 struct mntlist mountlist;       /* mounted filesystem list */
  109 struct simplelock mountlist_slock;
  110 struct simplelock mntvnode_slock;
  111 int     nfs_mount_type = -1;
  112 #ifndef NULL_SIMPLELOCKS
  113 static struct simplelock mntid_slock;
  114 static struct simplelock vnode_free_list_slock;
  115 static struct simplelock spechash_slock;
  116 #endif
  117 struct nfs_public nfs_pub;      /* publicly exported FS */
  118 static vm_zone_t vnode_zone;
  119 
  120 /*
  121  * The workitem queue.
  122  */
  123 #define SYNCER_MAXDELAY         32
  124 static int syncer_maxdelay = SYNCER_MAXDELAY;   /* maximum delay time */
  125 time_t syncdelay =              30;
  126 int rushjob;                            /* number of slots to run ASAP */
  127 
  128 static int syncer_delayno = 0;
  129 static long syncer_mask; 
  130 LIST_HEAD(synclist, vnode);
  131 static struct synclist *syncer_workitem_pending;
  132 
  133 int desiredvnodes;
  134 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
  135 
  136 static void     vfs_free_addrlist __P((struct netexport *nep));
  137 static int      vfs_free_netcred __P((struct radix_node *rn, void *w));
  138 static int      vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
  139                                        struct export_args *argp));
  140 
  141 /*
  142  * Initialize the vnode management data structures.
  143  */
  144 void
  145 vntblinit()
  146 {
  147 
  148         desiredvnodes = maxproc + cnt.v_page_count / 4;
  149         simple_lock_init(&mntvnode_slock);
  150         simple_lock_init(&mntid_slock);
  151         simple_lock_init(&spechash_slock);
  152         TAILQ_INIT(&vnode_free_list);
  153         TAILQ_INIT(&vnode_tobefree_list);
  154         simple_lock_init(&vnode_free_list_slock);
  155         CIRCLEQ_INIT(&mountlist);
  156         vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
  157         /*
  158          * Initialize the filesystem syncer.
  159          */     
  160         syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 
  161                 &syncer_mask);
  162         syncer_maxdelay = syncer_mask + 1;
  163 }
  164 
  165 /*
  166  * Mark a mount point as busy. Used to synchronize access and to delay
  167  * unmounting. Interlock is not released on failure.
  168  */
  169 int
  170 vfs_busy(mp, flags, interlkp, p)
  171         struct mount *mp;
  172         int flags;
  173         struct simplelock *interlkp;
  174         struct proc *p;
  175 {
  176         int lkflags;
  177 
  178         if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
  179                 if (flags & LK_NOWAIT)
  180                         return (ENOENT);
  181                 mp->mnt_kern_flag |= MNTK_MWAIT;
  182                 if (interlkp) {
  183                         simple_unlock(interlkp);
  184                 }
  185                 /*
  186                  * Since all busy locks are shared except the exclusive
  187                  * lock granted when unmounting, the only place that a
  188                  * wakeup needs to be done is at the release of the
  189                  * exclusive lock at the end of dounmount.
  190                  */
  191                 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
  192                 if (interlkp) {
  193                         simple_lock(interlkp);
  194                 }
  195                 return (ENOENT);
  196         }
  197         lkflags = LK_SHARED | LK_NOPAUSE;
  198         if (interlkp)
  199                 lkflags |= LK_INTERLOCK;
  200         if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
  201                 panic("vfs_busy: unexpected lock failure");
  202         return (0);
  203 }
  204 
  205 /*
  206  * Free a busy filesystem.
  207  */
  208 void
  209 vfs_unbusy(mp, p)
  210         struct mount *mp;
  211         struct proc *p;
  212 {
  213 
  214         lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
  215 }
  216 
  217 /*
  218  * Lookup a filesystem type, and if found allocate and initialize
  219  * a mount structure for it.
  220  *
  221  * Devname is usually updated by mount(8) after booting.
  222  */
  223 int
  224 vfs_rootmountalloc(fstypename, devname, mpp)
  225         char *fstypename;
  226         char *devname;
  227         struct mount **mpp;
  228 {
  229         struct proc *p = curproc;       /* XXX */
  230         struct vfsconf *vfsp;
  231         struct mount *mp;
  232 
  233         if (fstypename == NULL)
  234                 return (ENODEV);
  235         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
  236                 if (!strcmp(vfsp->vfc_name, fstypename))
  237                         break;
  238         if (vfsp == NULL)
  239                 return (ENODEV);
  240         mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
  241         bzero((char *)mp, (u_long)sizeof(struct mount));
  242         lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
  243         (void)vfs_busy(mp, LK_NOWAIT, 0, p);
  244         LIST_INIT(&mp->mnt_vnodelist);
  245         mp->mnt_vfc = vfsp;
  246         mp->mnt_op = vfsp->vfc_vfsops;
  247         mp->mnt_flag = MNT_RDONLY;
  248         mp->mnt_vnodecovered = NULLVP;
  249         vfsp->vfc_refcount++;
  250         mp->mnt_stat.f_type = vfsp->vfc_typenum;
  251         mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
  252         strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
  253         mp->mnt_stat.f_mntonname[0] = '/';
  254         mp->mnt_stat.f_mntonname[1] = 0;
  255         (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
  256         *mpp = mp;
  257         return (0);
  258 }
  259 
  260 /*
  261  * Find an appropriate filesystem to use for the root. If a filesystem
  262  * has not been preselected, walk through the list of known filesystems
  263  * trying those that have mountroot routines, and try them until one
  264  * works or we have tried them all.
  265  */
  266 #ifdef notdef   /* XXX JH */
  267 int
  268 lite2_vfs_mountroot()
  269 {
  270         struct vfsconf *vfsp;
  271         extern int (*lite2_mountroot) __P((void));
  272         int error;
  273 
  274         if (lite2_mountroot != NULL)
  275                 return ((*lite2_mountroot)());
  276         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
  277                 if (vfsp->vfc_mountroot == NULL)
  278                         continue;
  279                 if ((error = (*vfsp->vfc_mountroot)()) == 0)
  280                         return (0);
  281                 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
  282         }
  283         return (ENODEV);
  284 }
  285 #endif
  286 
  287 /*
  288  * Lookup a mount point by filesystem identifier.
  289  */
  290 struct mount *
  291 vfs_getvfs(fsid)
  292         fsid_t *fsid;
  293 {
  294         register struct mount *mp;
  295 
  296         simple_lock(&mountlist_slock);
  297         for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
  298             mp = mp->mnt_list.cqe_next) {
  299                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
  300                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
  301                         simple_unlock(&mountlist_slock);
  302                         return (mp);
  303             }
  304         }
  305         simple_unlock(&mountlist_slock);
  306         return ((struct mount *) 0);
  307 }
  308 
  309 /*
  310  * Get a new unique fsid
  311  */
  312 void
  313 vfs_getnewfsid(mp)
  314         struct mount *mp;
  315 {
  316         static u_short xxxfs_mntid;
  317 
  318         fsid_t tfsid;
  319         int mtype;
  320 
  321         simple_lock(&mntid_slock); 
  322         mtype = mp->mnt_vfc->vfc_typenum;
  323         mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
  324         mp->mnt_stat.f_fsid.val[1] = mtype;
  325         if (xxxfs_mntid == 0)
  326                 ++xxxfs_mntid;
  327         tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
  328         tfsid.val[1] = mtype;
  329         if (mountlist.cqh_first != (void *)&mountlist) {
  330                 while (vfs_getvfs(&tfsid)) {
  331                         tfsid.val[0]++;
  332                         xxxfs_mntid++;
  333                 }
  334         }
  335         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
  336         simple_unlock(&mntid_slock);
  337 }
  338 
  339 /*
  340  * Set vnode attributes to VNOVAL
  341  */
  342 void
  343 vattr_null(vap)
  344         register struct vattr *vap;
  345 {
  346 
  347         vap->va_type = VNON;
  348         vap->va_size = VNOVAL;
  349         vap->va_bytes = VNOVAL;
  350         vap->va_mode = VNOVAL;
  351         vap->va_nlink = VNOVAL;
  352         vap->va_uid = VNOVAL;
  353         vap->va_gid = VNOVAL;
  354         vap->va_fsid = VNOVAL;
  355         vap->va_fileid = VNOVAL;
  356         vap->va_blocksize = VNOVAL;
  357         vap->va_rdev = VNOVAL;
  358         vap->va_atime.tv_sec = VNOVAL;
  359         vap->va_atime.tv_nsec = VNOVAL;
  360         vap->va_mtime.tv_sec = VNOVAL;
  361         vap->va_mtime.tv_nsec = VNOVAL;
  362         vap->va_ctime.tv_sec = VNOVAL;
  363         vap->va_ctime.tv_nsec = VNOVAL;
  364         vap->va_flags = VNOVAL;
  365         vap->va_gen = VNOVAL;
  366         vap->va_vaflags = 0;
  367 }
  368 
  369 /*
  370  * Routines having to do with the management of the vnode table.
  371  */
  372 extern vop_t **dead_vnodeop_p;
  373 
  374 /*
  375  * Return the next vnode from the free list.
  376  */
  377 int
  378 getnewvnode(tag, mp, vops, vpp)
  379         enum vtagtype tag;
  380         struct mount *mp;
  381         vop_t **vops;
  382         struct vnode **vpp;
  383 {
  384         int s;
  385         struct proc *p = curproc;       /* XXX */
  386         struct vnode *vp, *tvp, *nvp;
  387         vm_object_t object;
  388         TAILQ_HEAD(freelst, vnode) vnode_tmp_list;
  389 
  390         /*
  391          * We take the least recently used vnode from the freelist
  392          * if we can get it and it has no cached pages, and no
  393          * namecache entries are relative to it.
  394          * Otherwise we allocate a new vnode
  395          */
  396 
  397         s = splbio();
  398         simple_lock(&vnode_free_list_slock);
  399         TAILQ_INIT(&vnode_tmp_list);
  400 
  401         for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) {
  402                 nvp = TAILQ_NEXT(vp, v_freelist);
  403                 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
  404                 if (vp->v_flag & VAGE) {
  405                         TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
  406                 } else {
  407                         TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
  408                 }
  409                 vp->v_flag &= ~(VTBFREE|VAGE);
  410                 vp->v_flag |= VFREE;
  411                 if (vp->v_usecount)
  412                         panic("tobe free vnode isn't");
  413                 freevnodes++;
  414         }
  415 
  416         if (wantfreevnodes && freevnodes < wantfreevnodes) {
  417                 vp = NULL;
  418         } else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
  419                 /* 
  420                  * XXX: this is only here to be backwards compatible
  421                  */
  422                 vp = NULL;
  423         } else {
  424                 for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) {
  425                         nvp = TAILQ_NEXT(vp, v_freelist);
  426                         if (!simple_lock_try(&vp->v_interlock)) 
  427                                 continue;
  428                         if (vp->v_usecount)
  429                                 panic("free vnode isn't");
  430 
  431                         object = vp->v_object;
  432                         if (object && (object->resident_page_count || object->ref_count)) {
  433                                 printf("object inconsistant state: RPC: %d, RC: %d\n",
  434                                         object->resident_page_count, object->ref_count);
  435                                 /* Don't recycle if it's caching some pages */
  436                                 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  437                                 TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist);
  438                                 continue;
  439                         } else if (LIST_FIRST(&vp->v_cache_src)) {
  440                                 /* Don't recycle if active in the namecache */
  441                                 simple_unlock(&vp->v_interlock);
  442                                 continue;
  443                         } else {
  444                                 break;
  445                         }
  446                 }
  447         }
  448 
  449         for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) {
  450                 nvp = TAILQ_NEXT(tvp, v_freelist);
  451                 TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist);
  452                 TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist);
  453                 simple_unlock(&tvp->v_interlock);
  454         }
  455 
  456         if (vp) {
  457                 vp->v_flag |= VDOOMED;
  458                 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  459                 freevnodes--;
  460                 simple_unlock(&vnode_free_list_slock);
  461                 cache_purge(vp);
  462                 vp->v_lease = NULL;
  463                 if (vp->v_type != VBAD) {
  464                         vgonel(vp, p);
  465                 } else {
  466                         simple_unlock(&vp->v_interlock);
  467                 }
  468 
  469 #ifdef INVARIANTS
  470                 {
  471                         int s;
  472 
  473                         if (vp->v_data)
  474                                 panic("cleaned vnode isn't");
  475                         s = splbio();
  476                         if (vp->v_numoutput)
  477                                 panic("Clean vnode has pending I/O's");
  478                         splx(s);
  479                 }
  480 #endif
  481                 vp->v_flag = 0;
  482                 vp->v_lastr = 0;
  483                 vp->v_lastw = 0;
  484                 vp->v_lasta = 0;
  485                 vp->v_cstart = 0;
  486                 vp->v_clen = 0;
  487                 vp->v_socket = 0;
  488                 vp->v_writecount = 0;   /* XXX */
  489                 vp->v_maxio = 0;
  490                 vp->v_cache_dst_count = 0;
  491         } else {
  492                 simple_unlock(&vnode_free_list_slock);
  493                 vp = (struct vnode *) zalloc(vnode_zone);
  494                 bzero((char *) vp, sizeof *vp);
  495                 simple_lock_init(&vp->v_interlock);
  496                 vp->v_dd = vp;
  497                 cache_purge(vp);
  498                 LIST_INIT(&vp->v_cache_src);
  499                 TAILQ_INIT(&vp->v_cache_dst);
  500                 numvnodes++;
  501         }
  502 
  503         TAILQ_INIT(&vp->v_cleanblkhd);
  504         TAILQ_INIT(&vp->v_dirtyblkhd);
  505         vp->v_type = VNON;
  506         vp->v_tag = tag;
  507         vp->v_op = vops;
  508         insmntque(vp, mp);
  509         *vpp = vp;
  510         vp->v_usecount = 1;
  511         vp->v_data = 0;
  512         splx(s);
  513 
  514         vfs_object_create(vp, p, p->p_ucred);
  515         return (0);
  516 }
  517 
  518 /*
  519  * Move a vnode from one mount queue to another.
  520  */
  521 static void
  522 insmntque(vp, mp)
  523         register struct vnode *vp;
  524         register struct mount *mp;
  525 {
  526 
  527         simple_lock(&mntvnode_slock);
  528         /*
  529          * Delete from old mount point vnode list, if on one.
  530          */
  531         if (vp->v_mount != NULL)
  532                 LIST_REMOVE(vp, v_mntvnodes);
  533         /*
  534          * Insert into list of vnodes for the new mount point, if available.
  535          */
  536         if ((vp->v_mount = mp) == NULL) {
  537                 simple_unlock(&mntvnode_slock);
  538                 return;
  539         }
  540         LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
  541         simple_unlock(&mntvnode_slock);
  542 }
  543 
  544 /*
  545  * Update outstanding I/O count and do wakeup if requested.
  546  */
  547 void
  548 vwakeup(bp)
  549         register struct buf *bp;
  550 {
  551         register struct vnode *vp;
  552 
  553         bp->b_flags &= ~B_WRITEINPROG;
  554         if ((vp = bp->b_vp)) {
  555                 vp->v_numoutput--;
  556                 if (vp->v_numoutput < 0)
  557                         panic("vwakeup: neg numoutput");
  558                 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
  559                         vp->v_flag &= ~VBWAIT;
  560                         wakeup((caddr_t) &vp->v_numoutput);
  561                 }
  562         }
  563 }
  564 
  565 /*
  566  * Flush out and invalidate all buffers associated with a vnode.
  567  * Called with the underlying object locked.
  568  */
  569 int
  570 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
  571         register struct vnode *vp;
  572         int flags;
  573         struct ucred *cred;
  574         struct proc *p;
  575         int slpflag, slptimeo;
  576 {
  577         register struct buf *bp;
  578         struct buf *nbp, *blist;
  579         int s, error;
  580         vm_object_t object;
  581 
  582         if (flags & V_SAVE) {
  583                 s = splbio();
  584                 while (vp->v_numoutput) {
  585                         vp->v_flag |= VBWAIT;
  586                         error = tsleep((caddr_t)&vp->v_numoutput,
  587                             slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo);
  588                         if (error) {
  589                                 splx(s);
  590                                 return (error);
  591                         }
  592                 }
  593                 if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
  594                         splx(s);
  595                         if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
  596                                 return (error);
  597                         s = splbio();
  598                         if (vp->v_numoutput > 0 ||
  599                             !TAILQ_EMPTY(&vp->v_dirtyblkhd))
  600                                 panic("vinvalbuf: dirty bufs");
  601                 }
  602                 splx(s);
  603         }
  604         s = splbio();
  605         for (;;) {
  606                 blist = TAILQ_FIRST(&vp->v_cleanblkhd);
  607                 if (!blist)
  608                         blist = TAILQ_FIRST(&vp->v_dirtyblkhd);
  609                 if (!blist)
  610                         break;
  611 
  612                 for (bp = blist; bp; bp = nbp) {
  613                         nbp = TAILQ_NEXT(bp, b_vnbufs);
  614                         if (bp->b_flags & B_BUSY) {
  615                                 bp->b_flags |= B_WANTED;
  616                                 error = tsleep((caddr_t) bp,
  617                                     slpflag | (PRIBIO + 4), "vinvalbuf",
  618                                     slptimeo);
  619                                 if (error) {
  620                                         splx(s);
  621                                         return (error);
  622                                 }
  623                                 break;
  624                         }
  625                         /*
  626                          * XXX Since there are no node locks for NFS, I
  627                          * believe there is a slight chance that a delayed
  628                          * write will occur while sleeping just above, so
  629                          * check for it.  Note that vfs_bio_awrite expects
  630                          * buffers to reside on a queue, while VOP_BWRITE and
  631                          * brelse do not.
  632                          */
  633                         if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
  634                                 (flags & V_SAVE)) {
  635 
  636                                 if (bp->b_vp == vp) {
  637                                         if (bp->b_flags & B_CLUSTEROK) {
  638                                                 vfs_bio_awrite(bp);
  639                                         } else {
  640                                                 bremfree(bp);
  641                                                 bp->b_flags |= (B_BUSY | B_ASYNC);
  642                                                 VOP_BWRITE(bp);
  643                                         }
  644                                 } else {
  645                                         bremfree(bp);
  646                                         bp->b_flags |= B_BUSY;
  647                                         (void) VOP_BWRITE(bp);
  648                                 }
  649                                 break;
  650                         }
  651                         bremfree(bp);
  652                         bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF | B_BUSY);
  653                         bp->b_flags &= ~B_ASYNC;
  654                         brelse(bp);
  655                 }
  656         }
  657 
  658         while (vp->v_numoutput > 0) {
  659                 vp->v_flag |= VBWAIT;
  660                 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
  661         }
  662 
  663         splx(s);
  664 
  665         /*
  666          * Destroy the copy in the VM cache, too.
  667          */
  668         simple_lock(&vp->v_interlock);
  669         object = vp->v_object;
  670         if (object != NULL) {
  671                 vm_object_page_remove(object, 0, 0,
  672                         (flags & V_SAVE) ? TRUE : FALSE);
  673         }
  674         simple_unlock(&vp->v_interlock);
  675 
  676         if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd))
  677                 panic("vinvalbuf: flush failed");
  678         return (0);
  679 }
  680 
  681 /*
  682  * Truncate a file's buffer and pages to a specified length.  This
  683  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
  684  * sync activity.
  685  */
  686 int
  687 vtruncbuf(vp, cred, p, length, blksize)
  688         register struct vnode *vp;
  689         struct ucred *cred;
  690         struct proc *p;
  691         off_t length;
  692         int blksize;
  693 {
  694         register struct buf *bp;
  695         struct buf *nbp;
  696         int s, anyfreed;
  697         int trunclbn;
  698 
  699         /*
  700          * Round up to the *next* lbn.
  701          */
  702         trunclbn = (length + blksize - 1) / blksize;
  703 
  704         s = splbio();
  705 restart:
  706         anyfreed = 1;
  707         for (;anyfreed;) {
  708                 anyfreed = 0;
  709                 for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
  710                         nbp = TAILQ_NEXT(bp, b_vnbufs);
  711                         if (bp->b_lblkno >= trunclbn) {
  712                                 if (bp->b_flags & B_BUSY) {
  713                                         bp->b_flags |= B_WANTED;
  714                                         tsleep(bp, PRIBIO + 4, "vtrb1", 0);
  715                                         goto restart;
  716                                 } else {
  717                                         bremfree(bp);
  718                                         bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF);
  719                                         bp->b_flags &= ~B_ASYNC;
  720                                         brelse(bp);
  721                                         anyfreed = 1;
  722                                 }
  723                                 if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)||
  724                                          (nbp->b_vp != vp) ||
  725                                          (nbp->b_flags & B_DELWRI))) {
  726                                         goto restart;
  727                                 }
  728                         }
  729                 }
  730 
  731                 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  732                         nbp = TAILQ_NEXT(bp, b_vnbufs);
  733                         if (bp->b_lblkno >= trunclbn) {
  734                                 if (bp->b_flags & B_BUSY) {
  735                                         bp->b_flags |= B_WANTED;
  736                                         tsleep(bp, PRIBIO + 4, "vtrb2", 0);
  737                                         goto restart;
  738                                 } else {
  739                                         bremfree(bp);
  740                                         bp->b_flags |= (B_BUSY | B_INVAL | B_RELBUF);
  741                                         bp->b_flags &= ~B_ASYNC;
  742                                         brelse(bp);
  743                                         anyfreed = 1;
  744                                 }
  745                                 if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)||
  746                                          (nbp->b_vp != vp) ||
  747                                          (nbp->b_flags & B_DELWRI) == 0)) {
  748                                         goto restart;
  749                                 }
  750                         }
  751                 }
  752         }
  753 
  754         if (length > 0) {
  755 restartsync:
  756                 for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
  757                         nbp = TAILQ_NEXT(bp, b_vnbufs);
  758                         if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) {
  759                                 if (bp->b_flags & B_BUSY) {
  760                                         bp->b_flags |= B_WANTED;
  761                                         tsleep(bp, PRIBIO, "vtrb3", 0);
  762                                 } else {
  763                                         bremfree(bp);
  764                                         bp->b_flags |= B_BUSY;
  765                                         if (bp->b_vp == vp) {
  766                                                 bp->b_flags |= B_ASYNC;
  767                                         } else {
  768                                                 bp->b_flags &= ~B_ASYNC;
  769                                         }
  770                                         VOP_BWRITE(bp);
  771                                 }
  772                                 goto restartsync;
  773                         }
  774 
  775                 }
  776         }
  777 
  778         while (vp->v_numoutput > 0) {
  779                 vp->v_flag |= VBWAIT;
  780                 tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0);
  781         }
  782 
  783         splx(s);
  784 
  785         vnode_pager_setsize(vp, length);
  786 
  787         return (0);
  788 }
  789 
  790 /*
  791  * Associate a buffer with a vnode.
  792  */
  793 void
  794 bgetvp(vp, bp)
  795         register struct vnode *vp;
  796         register struct buf *bp;
  797 {
  798         int s;
  799 
  800         KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
  801 
  802         vhold(vp);
  803         bp->b_vp = vp;
  804         if (vp->v_type == VBLK || vp->v_type == VCHR)
  805                 bp->b_dev = vp->v_rdev;
  806         else
  807                 bp->b_dev = NODEV;
  808         /*
  809          * Insert onto list for new vnode.
  810          */
  811         s = splbio();
  812         bp->b_xflags |= B_VNCLEAN;
  813         bp->b_xflags &= ~B_VNDIRTY;
  814         TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs);
  815         splx(s);
  816 }
  817 
  818 /*
  819  * Disassociate a buffer from a vnode.
  820  */
  821 void
  822 brelvp(bp)
  823         register struct buf *bp;
  824 {
  825         struct vnode *vp;
  826         struct buflists *listheadp;
  827         int s;
  828 
  829         KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
  830 
  831         /*
  832          * Delete from old vnode list, if on one.
  833          */
  834         vp = bp->b_vp;
  835         s = splbio();
  836         if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) {
  837                 if (bp->b_xflags & B_VNDIRTY)
  838                         listheadp = &vp->v_dirtyblkhd;
  839                 else 
  840                         listheadp = &vp->v_cleanblkhd;
  841                 TAILQ_REMOVE(listheadp, bp, b_vnbufs);
  842                 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN);
  843         }
  844         if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
  845                 vp->v_flag &= ~VONWORKLST;
  846                 LIST_REMOVE(vp, v_synclist);
  847         }
  848         splx(s);
  849         bp->b_vp = (struct vnode *) 0;
  850         vdrop(vp);
  851 }
  852 
  853 /*
  854  * The workitem queue.
  855  * 
  856  * It is useful to delay writes of file data and filesystem metadata
  857  * for tens of seconds so that quickly created and deleted files need
  858  * not waste disk bandwidth being created and removed. To realize this,
  859  * we append vnodes to a "workitem" queue. When running with a soft
  860  * updates implementation, most pending metadata dependencies should
  861  * not wait for more than a few seconds. Thus, mounted on block devices
  862  * are delayed only about a half the time that file data is delayed.
  863  * Similarly, directory updates are more critical, so are only delayed
  864  * about a third the time that file data is delayed. Thus, there are
  865  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
  866  * one each second (driven off the filesystem syner process). The
  867  * syncer_delayno variable indicates the next queue that is to be processed.
  868  * Items that need to be processed soon are placed in this queue:
  869  *
  870  *      syncer_workitem_pending[syncer_delayno]
  871  *
  872  * A delay of fifteen seconds is done by placing the request fifteen
  873  * entries later in the queue:
  874  *
  875  *      syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
  876  *
  877  */
  878 
  879 /*
  880  * Add an item to the syncer work queue.
  881  */
  882 void
  883 vn_syncer_add_to_worklist(vp, delay)
  884         struct vnode *vp;
  885         int delay;
  886 {
  887         int s, slot;
  888 
  889         s = splbio();
  890 
  891         if (vp->v_flag & VONWORKLST) {
  892                 LIST_REMOVE(vp, v_synclist);
  893         }
  894 
  895         if (delay > syncer_maxdelay - 2)
  896                 delay = syncer_maxdelay - 2;
  897         slot = (syncer_delayno + delay) & syncer_mask;
  898 
  899         LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
  900         vp->v_flag |= VONWORKLST;
  901         splx(s);
  902 }
  903 
  904 static void sched_sync __P((void));
  905 static struct   proc *updateproc;
  906 static struct kproc_desc up_kp = {
  907         "syncer",
  908         sched_sync,
  909         &updateproc
  910 };
  911 SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
  912 
  913 /*
  914  * System filesystem synchronizer daemon.
  915  */
  916 void 
  917 sched_sync(void)
  918 {
  919         struct synclist *slp;
  920         struct vnode *vp;
  921         long starttime;
  922         int s;
  923         struct proc *p = updateproc;
  924 
  925         for (;;) {
  926                 starttime = time_second;
  927 
  928                 /*
  929                  * Push files whose dirty time has expired.  Be careful
  930                  * of interrupt race on slp queue
  931                  */
  932                 s = splbio();
  933                 slp = &syncer_workitem_pending[syncer_delayno];
  934                 syncer_delayno += 1;
  935                 if (syncer_delayno == syncer_maxdelay)
  936                         syncer_delayno = 0;
  937                 splx(s);
  938 
  939                 while ((vp = LIST_FIRST(slp)) != NULL) {
  940                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
  941                         (void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
  942                         VOP_UNLOCK(vp, 0, p);
  943                         s = splbio();
  944                         if (LIST_FIRST(slp) == vp) {
  945                                 if (TAILQ_EMPTY(&vp->v_dirtyblkhd) &&
  946                                     vp->v_type != VBLK)
  947                                         panic("sched_sync: fsync failed");
  948                                 /*
  949                                  * Move ourselves to the back of the sync list.
  950                                  * Put us back on the worklist.  The worklist
  951                                  * routine will remove us from our current
  952                                  * position and then add us back in at a later
  953                                  * position.
  954                                  */
  955                                 vn_syncer_add_to_worklist(vp, syncdelay);
  956                         }
  957                         splx(s);
  958                 }
  959 
  960                 /*
  961                  * Do soft update processing.
  962                  */
  963                 if (bioops.io_sync)
  964                         (*bioops.io_sync)(NULL);
  965 
  966                 /*
  967                  * The variable rushjob allows the kernel to speed up the
  968                  * processing of the filesystem syncer process. A rushjob
  969                  * value of N tells the filesystem syncer to process the next
  970                  * N seconds worth of work on its queue ASAP. Currently rushjob
  971                  * is used by the soft update code to speed up the filesystem
  972                  * syncer process when the incore state is getting so far
  973                  * ahead of the disk that the kernel memory pool is being
  974                  * threatened with exhaustion.
  975                  */
  976                 if (rushjob > 0) {
  977                         rushjob -= 1;
  978                         continue;
  979                 }
  980                 /*
  981                  * If it has taken us less than a second to process the
  982                  * current work, then wait. Otherwise start right over
  983                  * again. We can still lose time if any single round
  984                  * takes more than two seconds, but it does not really
  985                  * matter as we are just trying to generally pace the
  986                  * filesystem activity.
  987                  */
  988                 if (time_second == starttime)
  989                         tsleep(&lbolt, PPAUSE, "syncer", 0);
  990         }
  991 }
  992 
  993 /*
  994  * Associate a p-buffer with a vnode.
  995  */
  996 void
  997 pbgetvp(vp, bp)
  998         register struct vnode *vp;
  999         register struct buf *bp;
 1000 {
 1001 
 1002         KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
 1003 
 1004         bp->b_vp = vp;
 1005         if (vp->v_type == VBLK || vp->v_type == VCHR)
 1006                 bp->b_dev = vp->v_rdev;
 1007         else
 1008                 bp->b_dev = NODEV;
 1009 }
 1010 
 1011 /*
 1012  * Disassociate a p-buffer from a vnode.
 1013  */
 1014 void
 1015 pbrelvp(bp)
 1016         register struct buf *bp;
 1017 {
 1018 
 1019         KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
 1020 
 1021         bp->b_vp = (struct vnode *) 0;
 1022 }
 1023 
 1024 /*
 1025  * Reassign a buffer from one vnode to another.
 1026  * Used to assign file specific control information
 1027  * (indirect blocks) to the vnode to which they belong.
 1028  */
 1029 void
 1030 reassignbuf(bp, newvp)
 1031         register struct buf *bp;
 1032         register struct vnode *newvp;
 1033 {
 1034         struct buflists *listheadp;
 1035         struct vnode *oldvp;
 1036         int delay;
 1037         int s;
 1038 
 1039         if (newvp == NULL) {
 1040                 printf("reassignbuf: NULL");
 1041                 return;
 1042         }
 1043 
 1044         s = splbio();
 1045         /*
 1046          * Delete from old vnode list, if on one.
 1047          */
 1048         if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) {
 1049                 oldvp = bp->b_vp;
 1050                 if (bp->b_xflags & B_VNDIRTY)
 1051                         listheadp = &oldvp->v_dirtyblkhd;
 1052                 else 
 1053                         listheadp = &oldvp->v_cleanblkhd;
 1054                 TAILQ_REMOVE(listheadp, bp, b_vnbufs);
 1055                 bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN);
 1056                 vdrop(oldvp);
 1057         }
 1058         /*
 1059          * If dirty, put on list of dirty buffers; otherwise insert onto list
 1060          * of clean buffers.
 1061          */
 1062         if (bp->b_flags & B_DELWRI) {
 1063                 struct buf *tbp;
 1064 
 1065                 listheadp = &newvp->v_dirtyblkhd;
 1066                 if ((newvp->v_flag & VONWORKLST) == 0) {
 1067                         switch (newvp->v_type) {
 1068                         case VDIR:
 1069                                 delay = syncdelay / 3;
 1070                                 break;
 1071                         case VBLK:
 1072                                 if (newvp->v_specmountpoint != NULL) {
 1073                                         delay = syncdelay / 2;
 1074                                         break;
 1075                                 }
 1076                                 /* fall through */
 1077                         default:
 1078                                 delay = syncdelay;
 1079                         }
 1080                         vn_syncer_add_to_worklist(newvp, delay);
 1081                 }
 1082                 bp->b_xflags |= B_VNDIRTY;
 1083                 tbp = TAILQ_FIRST(listheadp);
 1084                 if (tbp == NULL ||
 1085                     (bp->b_lblkno >= 0 && tbp->b_lblkno > bp->b_lblkno)) {
 1086                         TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
 1087                 } else {
 1088                         if (bp->b_lblkno >= 0) {
 1089                                 struct buf *ttbp;
 1090                                 while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) &&
 1091                                     (ttbp->b_lblkno < bp->b_lblkno)) {
 1092                                         tbp = ttbp;
 1093                                 }
 1094                                 TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 1095                         } else {
 1096                                 TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs);
 1097                         }
 1098                 }
 1099         } else {
 1100                 bp->b_xflags |= B_VNCLEAN;
 1101                 TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs);
 1102                 if ((newvp->v_flag & VONWORKLST) &&
 1103                     TAILQ_EMPTY(&newvp->v_dirtyblkhd)) {
 1104                         newvp->v_flag &= ~VONWORKLST;
 1105                         LIST_REMOVE(newvp, v_synclist);
 1106                 }
 1107         }
 1108         bp->b_vp = newvp;
 1109         vhold(bp->b_vp);
 1110         splx(s);
 1111 }
 1112 
 1113 /*
 1114  * Create a vnode for a block device.
 1115  * Used for mounting the root file system.
 1116  */
 1117 int
 1118 bdevvp(dev, vpp)
 1119         dev_t dev;
 1120         struct vnode **vpp;
 1121 {
 1122         register struct vnode *vp;
 1123         struct vnode *nvp;
 1124         int error;
 1125 
 1126         /* XXX 255 is for mfs. */
 1127         if (dev == NODEV || (major(dev) != 255 && (major(dev) >= nblkdev ||
 1128             bdevsw[major(dev)] == NULL))) {
 1129                 *vpp = NULLVP;
 1130                 return (ENXIO);
 1131         }
 1132         error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 1133         if (error) {
 1134                 *vpp = NULLVP;
 1135                 return (error);
 1136         }
 1137         vp = nvp;
 1138         vp->v_type = VBLK;
 1139         if ((nvp = checkalias(vp, dev, (struct mount *)0)) != NULL) {
 1140                 vput(vp);
 1141                 vp = nvp;
 1142         }
 1143         *vpp = vp;
 1144         return (0);
 1145 }
 1146 
 1147 /*
 1148  * Check to see if the new vnode represents a special device
 1149  * for which we already have a vnode (either because of
 1150  * bdevvp() or because of a different vnode representing
 1151  * the same block device). If such an alias exists, deallocate
 1152  * the existing contents and return the aliased vnode. The
 1153  * caller is responsible for filling it with its new contents.
 1154  */
 1155 struct vnode *
 1156 checkalias(nvp, nvp_rdev, mp)
 1157         register struct vnode *nvp;
 1158         dev_t nvp_rdev;
 1159         struct mount *mp;
 1160 {
 1161         struct proc *p = curproc;       /* XXX */
 1162         struct vnode *vp;
 1163         struct vnode **vpp;
 1164 
 1165         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 1166                 return (NULLVP);
 1167 
 1168         vpp = &speclisth[SPECHASH(nvp_rdev)];
 1169 loop:
 1170         simple_lock(&spechash_slock);
 1171         for (vp = *vpp; vp; vp = vp->v_specnext) {
 1172                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 1173                         continue;
 1174                 /*
 1175                  * Alias, but not in use, so flush it out.
 1176                  * Only alias active device nodes.
 1177                  * Not sure why we don't re-use this like we do below.
 1178                  */
 1179                 simple_lock(&vp->v_interlock);
 1180                 if (vp->v_usecount == 0) {
 1181                         simple_unlock(&spechash_slock);
 1182                         vgonel(vp, p);
 1183                         goto loop;
 1184                 }
 1185                 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 1186                         /*
 1187                          * It dissappeared, and we may have slept.
 1188                          * Restart from the beginning
 1189                          */
 1190                         simple_unlock(&spechash_slock);
 1191                         goto loop;
 1192                 }
 1193                 break;
 1194         }
 1195         /*
 1196          * It would be a lot clearer what is going on here if
 1197          * this had been expressed as:
 1198          * if ( vp && (vp->v_tag == VT_NULL))
 1199          * and the clauses had been swapped.
 1200          */
 1201         if (vp == NULL || vp->v_tag != VT_NON) {
 1202                 /*
 1203                  * Put the new vnode into the hash chain.
 1204                  * and if there was an alias, connect them.
 1205                  */
 1206                 MALLOC(nvp->v_specinfo, struct specinfo *,
 1207                     sizeof(struct specinfo), M_VNODE, M_WAITOK);
 1208                 nvp->v_rdev = nvp_rdev;
 1209                 nvp->v_hashchain = vpp;
 1210                 nvp->v_specnext = *vpp;
 1211                 nvp->v_specmountpoint = NULL;
 1212                 simple_unlock(&spechash_slock);
 1213                 *vpp = nvp;
 1214                 if (vp != NULLVP) {
 1215                         nvp->v_flag |= VALIASED;
 1216                         vp->v_flag |= VALIASED;
 1217                         vput(vp);
 1218                 }
 1219                 return (NULLVP);
 1220         }
 1221         /*
 1222          * if ( vp && (vp->v_tag == VT_NULL))
 1223          * We have a vnode alias, but it is a trashed.
 1224          * Make it look like it's newley allocated. (by getnewvnode())
 1225          * The caller should use this instead.
 1226          */
 1227         simple_unlock(&spechash_slock);
 1228         VOP_UNLOCK(vp, 0, p);
 1229         simple_lock(&vp->v_interlock);
 1230         vclean(vp, 0, p);
 1231         vp->v_op = nvp->v_op;
 1232         vp->v_tag = nvp->v_tag;
 1233         nvp->v_type = VNON;
 1234         insmntque(vp, mp);
 1235         return (vp);
 1236 }
 1237 
 1238 /*
 1239  * Grab a particular vnode from the free list, increment its
 1240  * reference count and lock it. The vnode lock bit is set the
 1241  * vnode is being eliminated in vgone. The process is awakened
 1242  * when the transition is completed, and an error returned to
 1243  * indicate that the vnode is no longer usable (possibly having
 1244  * been changed to a new file system type).
 1245  */
 1246 int
 1247 vget(vp, flags, p)
 1248         register struct vnode *vp;
 1249         int flags;
 1250         struct proc *p;
 1251 {
 1252         int error;
 1253 
 1254         /*
 1255          * If the vnode is in the process of being cleaned out for
 1256          * another use, we wait for the cleaning to finish and then
 1257          * return failure. Cleaning is determined by checking that
 1258          * the VXLOCK flag is set.
 1259          */
 1260         if ((flags & LK_INTERLOCK) == 0) {
 1261                 simple_lock(&vp->v_interlock);
 1262         }
 1263         if (vp->v_flag & VXLOCK) {
 1264                 vp->v_flag |= VXWANT;
 1265                 simple_unlock(&vp->v_interlock);
 1266                 tsleep((caddr_t)vp, PINOD, "vget", 0);
 1267                 return (ENOENT);
 1268         }
 1269 
 1270         vp->v_usecount++;
 1271 
 1272         if (VSHOULDBUSY(vp))
 1273                 vbusy(vp);
 1274         if (flags & LK_TYPE_MASK) {
 1275                 if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
 1276                         /*
 1277                          * must expand vrele here because we do not want
 1278                          * to call VOP_INACTIVE if the reference count
 1279                          * drops back to zero since it was never really
 1280                          * active. We must remove it from the free list
 1281                          * before sleeping so that multiple processes do
 1282                          * not try to recycle it.
 1283                          */
 1284                         simple_lock(&vp->v_interlock);
 1285                         vp->v_usecount--;
 1286                         if (VSHOULDFREE(vp))
 1287                                 vfree(vp);
 1288                         simple_unlock(&vp->v_interlock);
 1289                 }
 1290                 return (error);
 1291         }
 1292         simple_unlock(&vp->v_interlock);
 1293         return (0);
 1294 }
 1295 
 1296 void
 1297 vref(struct vnode *vp)
 1298 {
 1299         simple_lock(&vp->v_interlock);
 1300         vp->v_usecount++;
 1301         simple_unlock(&vp->v_interlock);
 1302 }
 1303 
 1304 /*
 1305  * Vnode put/release.
 1306  * If count drops to zero, call inactive routine and return to freelist.
 1307  */
 1308 void
 1309 vrele(vp)
 1310         struct vnode *vp;
 1311 {
 1312         struct proc *p = curproc;       /* XXX */
 1313 
 1314         KASSERT(vp != NULL, ("vrele: null vp"));
 1315 
 1316         simple_lock(&vp->v_interlock);
 1317 
 1318         if (vp->v_usecount > 1) {
 1319 
 1320                 vp->v_usecount--;
 1321                 simple_unlock(&vp->v_interlock);
 1322 
 1323                 return;
 1324         }
 1325 
 1326         if (vp->v_usecount == 1) {
 1327 
 1328                 vp->v_usecount--;
 1329                 if (VSHOULDFREE(vp))
 1330                         vfree(vp);
 1331         /*
 1332          * If we are doing a vput, the node is already locked, and we must
 1333          * call VOP_INACTIVE with the node locked.  So, in the case of
 1334          * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 1335          */
 1336                 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
 1337                         VOP_INACTIVE(vp, p);
 1338                 }
 1339 
 1340         } else {
 1341 #ifdef DIAGNOSTIC
 1342                 vprint("vrele: negative ref count", vp);
 1343                 simple_unlock(&vp->v_interlock);
 1344 #endif
 1345                 panic("vrele: negative ref cnt");
 1346         }
 1347 }
 1348 
 1349 void
 1350 vput(vp)
 1351         struct vnode *vp;
 1352 {
 1353         struct proc *p = curproc;       /* XXX */
 1354 
 1355         KASSERT(vp != NULL, ("vput: null vp"));
 1356 
 1357         simple_lock(&vp->v_interlock);
 1358 
 1359         if (vp->v_usecount > 1) {
 1360 
 1361                 vp->v_usecount--;
 1362                 VOP_UNLOCK(vp, LK_INTERLOCK, p);
 1363                 return;
 1364 
 1365         }
 1366 
 1367         if (vp->v_usecount == 1) {
 1368 
 1369                 vp->v_usecount--;
 1370                 if (VSHOULDFREE(vp))
 1371                         vfree(vp);
 1372         /*
 1373          * If we are doing a vput, the node is already locked, and we must
 1374          * call VOP_INACTIVE with the node locked.  So, in the case of
 1375          * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 1376          */
 1377                 simple_unlock(&vp->v_interlock);
 1378                 VOP_INACTIVE(vp, p);
 1379 
 1380         } else {
 1381 #ifdef DIAGNOSTIC
 1382                 vprint("vput: negative ref count", vp);
 1383 #endif
 1384                 panic("vput: negative ref cnt");
 1385         }
 1386 }
 1387 
 1388 /*
 1389  * Somebody doesn't want the vnode recycled.
 1390  */
 1391 void
 1392 vhold(vp)
 1393         register struct vnode *vp;
 1394 {
 1395         int s;
 1396 
 1397         s = splbio();
 1398         vp->v_holdcnt++;
 1399         if (VSHOULDBUSY(vp))
 1400                 vbusy(vp);
 1401         splx(s);
 1402 }
 1403 
 1404 /*
 1405  * One less who cares about this vnode.
 1406  */
 1407 void
 1408 vdrop(vp)
 1409         register struct vnode *vp;
 1410 {
 1411         int s;
 1412 
 1413         s = splbio();
 1414         if (vp->v_holdcnt <= 0)
 1415                 panic("vdrop: holdcnt");
 1416         vp->v_holdcnt--;
 1417         if (VSHOULDFREE(vp))
 1418                 vfree(vp);
 1419         splx(s);
 1420 }
 1421 
 1422 /*
 1423  * Remove any vnodes in the vnode table belonging to mount point mp.
 1424  *
 1425  * If MNT_NOFORCE is specified, there should not be any active ones,
 1426  * return error if any are found (nb: this is a user error, not a
 1427  * system error). If MNT_FORCE is specified, detach any active vnodes
 1428  * that are found.
 1429  */
 1430 #ifdef DIAGNOSTIC
 1431 static int busyprt = 0;         /* print out busy vnodes */
 1432 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
 1433 #endif
 1434 
 1435 int
 1436 vflush(mp, skipvp, flags)
 1437         struct mount *mp;
 1438         struct vnode *skipvp;
 1439         int flags;
 1440 {
 1441         struct proc *p = curproc;       /* XXX */
 1442         struct vnode *vp, *nvp;
 1443         int busy = 0;
 1444 
 1445         simple_lock(&mntvnode_slock);
 1446 loop:
 1447         for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
 1448                 /*
 1449                  * Make sure this vnode wasn't reclaimed in getnewvnode().
 1450                  * Start over if it has (it won't be on the list anymore).
 1451                  */
 1452                 if (vp->v_mount != mp)
 1453                         goto loop;
 1454                 nvp = vp->v_mntvnodes.le_next;
 1455                 /*
 1456                  * Skip over a selected vnode.
 1457                  */
 1458                 if (vp == skipvp)
 1459                         continue;
 1460 
 1461                 simple_lock(&vp->v_interlock);
 1462                 /*
 1463                  * Skip over a vnodes marked VSYSTEM.
 1464                  */
 1465                 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
 1466                         simple_unlock(&vp->v_interlock);
 1467                         continue;
 1468                 }
 1469                 /*
 1470                  * If WRITECLOSE is set, only flush out regular file vnodes
 1471                  * open for writing.
 1472                  */
 1473                 if ((flags & WRITECLOSE) &&
 1474                     (vp->v_writecount == 0 || vp->v_type != VREG)) {
 1475                         simple_unlock(&vp->v_interlock);
 1476                         continue;
 1477                 }
 1478 
 1479                 /*
 1480                  * With v_usecount == 0, all we need to do is clear out the
 1481                  * vnode data structures and we are done.
 1482                  */
 1483                 if (vp->v_usecount == 0) {
 1484                         simple_unlock(&mntvnode_slock);
 1485                         vgonel(vp, p);
 1486                         simple_lock(&mntvnode_slock);
 1487                         continue;
 1488                 }
 1489 
 1490                 /*
 1491                  * If FORCECLOSE is set, forcibly close the vnode. For block
 1492                  * or character devices, revert to an anonymous device. For
 1493                  * all other files, just kill them.
 1494                  */
 1495                 if (flags & FORCECLOSE) {
 1496                         simple_unlock(&mntvnode_slock);
 1497                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
 1498                                 vgonel(vp, p);
 1499                         } else {
 1500                                 vclean(vp, 0, p);
 1501                                 vp->v_op = spec_vnodeop_p;
 1502                                 insmntque(vp, (struct mount *) 0);
 1503                         }
 1504                         simple_lock(&mntvnode_slock);
 1505                         continue;
 1506                 }
 1507 #ifdef DIAGNOSTIC
 1508                 if (busyprt)
 1509                         vprint("vflush: busy vnode", vp);
 1510 #endif
 1511                 simple_unlock(&vp->v_interlock);
 1512                 busy++;
 1513         }
 1514         simple_unlock(&mntvnode_slock);
 1515         if (busy)
 1516                 return (EBUSY);
 1517         return (0);
 1518 }
 1519 
 1520 /*
 1521  * Disassociate the underlying file system from a vnode.
 1522  */
 1523 static void
 1524 vclean(vp, flags, p)
 1525         struct vnode *vp;
 1526         int flags;
 1527         struct proc *p;
 1528 {
 1529         int active;
 1530         vm_object_t obj;
 1531 
 1532         /*
 1533          * Check to see if the vnode is in use. If so we have to reference it
 1534          * before we clean it out so that its count cannot fall to zero and
 1535          * generate a race against ourselves to recycle it.
 1536          */
 1537         if ((active = vp->v_usecount))
 1538                 vp->v_usecount++;
 1539 
 1540         /*
 1541          * Prevent the vnode from being recycled or brought into use while we
 1542          * clean it out.
 1543          */
 1544         if (vp->v_flag & VXLOCK)
 1545                 panic("vclean: deadlock");
 1546         vp->v_flag |= VXLOCK;
 1547         /*
 1548          * Even if the count is zero, the VOP_INACTIVE routine may still
 1549          * have the object locked while it cleans it out. The VOP_LOCK
 1550          * ensures that the VOP_INACTIVE routine is done with its work.
 1551          * For active vnodes, it ensures that no other activity can
 1552          * occur while the underlying object is being cleaned out.
 1553          */
 1554         VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
 1555 
 1556         /*
 1557          * Clean out any buffers associated with the vnode.
 1558          */
 1559         vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
 1560         if (obj = vp->v_object) {
 1561                 if (obj->ref_count == 0) {
 1562                         /*
 1563                          * This is a normal way of shutting down the object/vnode
 1564                          * association.
 1565                          */
 1566                         vm_object_terminate(obj);
 1567                 } else {
 1568                         /*
 1569                          * Woe to the process that tries to page now :-).
 1570                          */
 1571                         vm_pager_deallocate(obj);
 1572                 }
 1573         }
 1574 
 1575         /*
 1576          * If purging an active vnode, it must be closed and
 1577          * deactivated before being reclaimed. Note that the
 1578          * VOP_INACTIVE will unlock the vnode.
 1579          */
 1580         if (active) {
 1581                 if (flags & DOCLOSE)
 1582                         VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
 1583                 VOP_INACTIVE(vp, p);
 1584         } else {
 1585                 /*
 1586                  * Any other processes trying to obtain this lock must first
 1587                  * wait for VXLOCK to clear, then call the new lock operation.
 1588                  */
 1589                 VOP_UNLOCK(vp, 0, p);
 1590         }
 1591         /*
 1592          * Reclaim the vnode.
 1593          */
 1594         if (VOP_RECLAIM(vp, p))
 1595                 panic("vclean: cannot reclaim");
 1596 
 1597         if (active)
 1598                 vrele(vp);
 1599 
 1600         cache_purge(vp);
 1601         if (vp->v_vnlock) {
 1602 #if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */
 1603 #ifdef DIAGNOSTIC
 1604                 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
 1605                         vprint("vclean: lock not drained", vp);
 1606 #endif
 1607 #endif
 1608                 FREE(vp->v_vnlock, M_VNODE);
 1609                 vp->v_vnlock = NULL;
 1610         }
 1611 
 1612         if (VSHOULDFREE(vp))
 1613                 vfree(vp);
 1614 
 1615         /*
 1616          * Done with purge, notify sleepers of the grim news.
 1617          */
 1618         vp->v_op = dead_vnodeop_p;
 1619         vn_pollgone(vp);
 1620         vp->v_tag = VT_NON;
 1621         vp->v_flag &= ~VXLOCK;
 1622         if (vp->v_flag & VXWANT) {
 1623                 vp->v_flag &= ~VXWANT;
 1624                 wakeup((caddr_t) vp);
 1625         }
 1626 }
 1627 
 1628 /*
 1629  * Eliminate all activity associated with the requested vnode
 1630  * and with all vnodes aliased to the requested vnode.
 1631  */
 1632 int
 1633 vop_revoke(ap)
 1634         struct vop_revoke_args /* {
 1635                 struct vnode *a_vp;
 1636                 int a_flags;
 1637         } */ *ap;
 1638 {
 1639         struct vnode *vp, *vq;
 1640         struct proc *p = curproc;       /* XXX */
 1641 
 1642         KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke"));
 1643 
 1644         vp = ap->a_vp;
 1645         simple_lock(&vp->v_interlock);
 1646 
 1647         if (vp->v_flag & VALIASED) {
 1648                 /*
 1649                  * If a vgone (or vclean) is already in progress,
 1650                  * wait until it is done and return.
 1651                  */
 1652                 if (vp->v_flag & VXLOCK) {
 1653                         vp->v_flag |= VXWANT;
 1654                         simple_unlock(&vp->v_interlock);
 1655                         tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
 1656                         return (0);
 1657                 }
 1658                 /*
 1659                  * Ensure that vp will not be vgone'd while we
 1660                  * are eliminating its aliases.
 1661                  */
 1662                 vp->v_flag |= VXLOCK;
 1663                 simple_unlock(&vp->v_interlock);
 1664                 while (vp->v_flag & VALIASED) {
 1665                         simple_lock(&spechash_slock);
 1666                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1667                                 if (vq->v_rdev != vp->v_rdev ||
 1668                                     vq->v_type != vp->v_type || vp == vq)
 1669                                         continue;
 1670                                 simple_unlock(&spechash_slock);
 1671                                 vgone(vq);
 1672                                 break;
 1673                         }
 1674                         if (vq == NULLVP) {
 1675                                 simple_unlock(&spechash_slock);
 1676                         }
 1677                 }
 1678                 /*
 1679                  * Remove the lock so that vgone below will
 1680                  * really eliminate the vnode after which time
 1681                  * vgone will awaken any sleepers.
 1682                  */
 1683                 simple_lock(&vp->v_interlock);
 1684                 vp->v_flag &= ~VXLOCK;
 1685                 if (vp->v_flag & VXWANT) {
 1686                         vp->v_flag &= ~VXWANT;
 1687                         wakeup(vp);
 1688                 }
 1689         }
 1690         vgonel(vp, p);
 1691         return (0);
 1692 }
 1693 
 1694 /*
 1695  * Recycle an unused vnode to the front of the free list.
 1696  * Release the passed interlock if the vnode will be recycled.
 1697  */
 1698 int
 1699 vrecycle(vp, inter_lkp, p)
 1700         struct vnode *vp;
 1701         struct simplelock *inter_lkp;
 1702         struct proc *p;
 1703 {
 1704 
 1705         simple_lock(&vp->v_interlock);
 1706         if (vp->v_usecount == 0) {
 1707                 if (inter_lkp) {
 1708                         simple_unlock(inter_lkp);
 1709                 }
 1710                 vgonel(vp, p);
 1711                 return (1);
 1712         }
 1713         simple_unlock(&vp->v_interlock);
 1714         return (0);
 1715 }
 1716 
 1717 /*
 1718  * Eliminate all activity associated with a vnode
 1719  * in preparation for reuse.
 1720  */
 1721 void
 1722 vgone(vp)
 1723         register struct vnode *vp;
 1724 {
 1725         struct proc *p = curproc;       /* XXX */
 1726 
 1727         simple_lock(&vp->v_interlock);
 1728         vgonel(vp, p);
 1729 }
 1730 
 1731 /*
 1732  * vgone, with the vp interlock held.
 1733  */
 1734 static void
 1735 vgonel(vp, p)
 1736         struct vnode *vp;
 1737         struct proc *p;
 1738 {
 1739         int s;
 1740         struct vnode *vq;
 1741         struct vnode *vx;
 1742 
 1743         /*
 1744          * If a vgone (or vclean) is already in progress,
 1745          * wait until it is done and return.
 1746          */
 1747         if (vp->v_flag & VXLOCK) {
 1748                 vp->v_flag |= VXWANT;
 1749                 simple_unlock(&vp->v_interlock);
 1750                 tsleep((caddr_t)vp, PINOD, "vgone", 0);
 1751                 return;
 1752         }
 1753 
 1754         /*
 1755          * Clean out the filesystem specific data.
 1756          */
 1757         vclean(vp, DOCLOSE, p);
 1758         simple_lock(&vp->v_interlock);
 1759 
 1760         /*
 1761          * Delete from old mount point vnode list, if on one.
 1762          */
 1763         if (vp->v_mount != NULL)
 1764                 insmntque(vp, (struct mount *)0);
 1765         /*
 1766          * If special device, remove it from special device alias list
 1767          * if it is on one.
 1768          */
 1769         if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
 1770                 simple_lock(&spechash_slock);
 1771                 if (*vp->v_hashchain == vp) {
 1772                         *vp->v_hashchain = vp->v_specnext;
 1773                 } else {
 1774                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1775                                 if (vq->v_specnext != vp)
 1776                                         continue;
 1777                                 vq->v_specnext = vp->v_specnext;
 1778                                 break;
 1779                         }
 1780                         if (vq == NULL)
 1781                                 panic("missing bdev");
 1782                 }
 1783                 if (vp->v_flag & VALIASED) {
 1784                         vx = NULL;
 1785                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1786                                 if (vq->v_rdev != vp->v_rdev ||
 1787                                     vq->v_type != vp->v_type)
 1788                                         continue;
 1789                                 if (vx)
 1790                                         break;
 1791                                 vx = vq;
 1792                         }
 1793                         if (vx == NULL)
 1794                                 panic("missing alias");
 1795                         if (vq == NULL)
 1796                                 vx->v_flag &= ~VALIASED;
 1797                         vp->v_flag &= ~VALIASED;
 1798                 }
 1799                 simple_unlock(&spechash_slock);
 1800                 FREE(vp->v_specinfo, M_VNODE);
 1801                 vp->v_specinfo = NULL;
 1802         }
 1803 
 1804         /*
 1805          * If it is on the freelist and not already at the head,
 1806          * move it to the head of the list. The test of the back
 1807          * pointer and the reference count of zero is because
 1808          * it will be removed from the free list by getnewvnode,
 1809          * but will not have its reference count incremented until
 1810          * after calling vgone. If the reference count were
 1811          * incremented first, vgone would (incorrectly) try to
 1812          * close the previous instance of the underlying object.
 1813          */
 1814         if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
 1815                 s = splbio();
 1816                 simple_lock(&vnode_free_list_slock);
 1817                 if (vp->v_flag & VFREE) {
 1818                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 1819                 } else if (vp->v_flag & VTBFREE) {
 1820                         TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 1821                         vp->v_flag &= ~VTBFREE;
 1822                         freevnodes++;
 1823                 } else
 1824                         freevnodes++;
 1825                 vp->v_flag |= VFREE;
 1826                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 1827                 simple_unlock(&vnode_free_list_slock);
 1828                 splx(s);
 1829         }
 1830 
 1831         vp->v_type = VBAD;
 1832         simple_unlock(&vp->v_interlock);
 1833 }
 1834 
 1835 /*
 1836  * Lookup a vnode by device number.
 1837  */
 1838 int
 1839 vfinddev(dev, type, vpp)
 1840         dev_t dev;
 1841         enum vtype type;
 1842         struct vnode **vpp;
 1843 {
 1844         register struct vnode *vp;
 1845         int rc = 0;
 1846 
 1847         simple_lock(&spechash_slock);
 1848         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
 1849                 if (dev != vp->v_rdev || type != vp->v_type)
 1850                         continue;
 1851                 *vpp = vp;
 1852                 rc = 1;
 1853                 break;
 1854         }
 1855         simple_unlock(&spechash_slock);
 1856         return (rc);
 1857 }
 1858 
 1859 /*
 1860  * Calculate the total number of references to a special device.
 1861  */
 1862 int
 1863 vcount(vp)
 1864         register struct vnode *vp;
 1865 {
 1866         struct vnode *vq, *vnext;
 1867         int count;
 1868 
 1869 loop:
 1870         if ((vp->v_flag & VALIASED) == 0)
 1871                 return (vp->v_usecount);
 1872         simple_lock(&spechash_slock);
 1873         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
 1874                 vnext = vq->v_specnext;
 1875                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
 1876                         continue;
 1877                 /*
 1878                  * Alias, but not in use, so flush it out.
 1879                  */
 1880                 if (vq->v_usecount == 0 && vq != vp) {
 1881                         simple_unlock(&spechash_slock);
 1882                         vgone(vq);
 1883                         goto loop;
 1884                 }
 1885                 count += vq->v_usecount;
 1886         }
 1887         simple_unlock(&spechash_slock);
 1888         return (count);
 1889 }
 1890 /*
 1891  * Print out a description of a vnode.
 1892  */
 1893 static char *typename[] =
 1894 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
 1895 
 1896 void
 1897 vprint(label, vp)
 1898         char *label;
 1899         register struct vnode *vp;
 1900 {
 1901         char buf[96];
 1902 
 1903         if (label != NULL)
 1904                 printf("%s: %p: ", label, (void *)vp);
 1905         else
 1906                 printf("%p: ", (void *)vp);
 1907         printf("type %s, usecount %d, writecount %d, refcount %d,",
 1908             typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 1909             vp->v_holdcnt);
 1910         buf[0] = '\0';
 1911         if (vp->v_flag & VROOT)
 1912                 strcat(buf, "|VROOT");
 1913         if (vp->v_flag & VTEXT)
 1914                 strcat(buf, "|VTEXT");
 1915         if (vp->v_flag & VSYSTEM)
 1916                 strcat(buf, "|VSYSTEM");
 1917         if (vp->v_flag & VXLOCK)
 1918                 strcat(buf, "|VXLOCK");
 1919         if (vp->v_flag & VXWANT)
 1920                 strcat(buf, "|VXWANT");
 1921         if (vp->v_flag & VBWAIT)
 1922                 strcat(buf, "|VBWAIT");
 1923         if (vp->v_flag & VALIASED)
 1924                 strcat(buf, "|VALIASED");
 1925         if (vp->v_flag & VDOOMED)
 1926                 strcat(buf, "|VDOOMED");
 1927         if (vp->v_flag & VFREE)
 1928                 strcat(buf, "|VFREE");
 1929         if (vp->v_flag & VOBJBUF)
 1930                 strcat(buf, "|VOBJBUF");
 1931         if (buf[0] != '\0')
 1932                 printf(" flags (%s)", &buf[1]);
 1933         if (vp->v_data == NULL) {
 1934                 printf("\n");
 1935         } else {
 1936                 printf("\n\t");
 1937                 VOP_PRINT(vp);
 1938         }
 1939 }
 1940 
 1941 #ifdef DDB
 1942 #include <ddb/ddb.h>
 1943 /*
 1944  * List all of the locked vnodes in the system.
 1945  * Called when debugging the kernel.
 1946  */
 1947 DB_SHOW_COMMAND(lockedvnodes, lockedvnodes)
 1948 {
 1949         struct proc *p = curproc;       /* XXX */
 1950         struct mount *mp, *nmp;
 1951         struct vnode *vp;
 1952 
 1953         printf("Locked vnodes\n");
 1954         simple_lock(&mountlist_slock);
 1955         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 1956                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 1957                         nmp = mp->mnt_list.cqe_next;
 1958                         continue;
 1959                 }
 1960                 for (vp = mp->mnt_vnodelist.lh_first;
 1961                      vp != NULL;
 1962                      vp = vp->v_mntvnodes.le_next) {
 1963                         if (VOP_ISLOCKED(vp))
 1964                                 vprint((char *)0, vp);
 1965                 }
 1966                 simple_lock(&mountlist_slock);
 1967                 nmp = mp->mnt_list.cqe_next;
 1968                 vfs_unbusy(mp, p);
 1969         }
 1970         simple_unlock(&mountlist_slock);
 1971 }
 1972 #endif
 1973 
 1974 /*
 1975  * Top level filesystem related information gathering.
 1976  */
 1977 static int      sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
 1978 
 1979 static int
 1980 vfs_sysctl SYSCTL_HANDLER_ARGS
 1981 {
 1982         int *name = (int *)arg1 - 1;    /* XXX */
 1983         u_int namelen = arg2 + 1;       /* XXX */
 1984         struct vfsconf *vfsp;
 1985 
 1986 #if 1 || defined(COMPAT_PRELITE2)
 1987         /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 1988         if (namelen == 1)
 1989                 return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 1990 #endif
 1991 
 1992 #ifdef notyet
 1993         /* all sysctl names at this level are at least name and field */
 1994         if (namelen < 2)
 1995                 return (ENOTDIR);               /* overloaded */
 1996         if (name[0] != VFS_GENERIC) {
 1997                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 1998                         if (vfsp->vfc_typenum == name[0])
 1999                                 break;
 2000                 if (vfsp == NULL)
 2001                         return (EOPNOTSUPP);
 2002                 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
 2003                     oldp, oldlenp, newp, newlen, p));
 2004         }
 2005 #endif
 2006         switch (name[1]) {
 2007         case VFS_MAXTYPENUM:
 2008                 if (namelen != 2)
 2009                         return (ENOTDIR);
 2010                 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 2011         case VFS_CONF:
 2012                 if (namelen != 3)
 2013                         return (ENOTDIR);       /* overloaded */
 2014                 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 2015                         if (vfsp->vfc_typenum == name[2])
 2016                                 break;
 2017                 if (vfsp == NULL)
 2018                         return (EOPNOTSUPP);
 2019                 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
 2020         }
 2021         return (EOPNOTSUPP);
 2022 }
 2023 
 2024 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
 2025         "Generic filesystem");
 2026 
 2027 #if 1 || defined(COMPAT_PRELITE2)
 2028 
 2029 static int
 2030 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
 2031 {
 2032         int error;
 2033         struct vfsconf *vfsp;
 2034         struct ovfsconf ovfs;
 2035 
 2036         for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 2037                 ovfs.vfc_vfsops = vfsp->vfc_vfsops;     /* XXX used as flag */
 2038                 strcpy(ovfs.vfc_name, vfsp->vfc_name);
 2039                 ovfs.vfc_index = vfsp->vfc_typenum;
 2040                 ovfs.vfc_refcount = vfsp->vfc_refcount;
 2041                 ovfs.vfc_flags = vfsp->vfc_flags;
 2042                 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 2043                 if (error)
 2044                         return error;
 2045         }
 2046         return 0;
 2047 }
 2048 
 2049 #endif /* 1 || COMPAT_PRELITE2 */
 2050 
 2051 #if 0
 2052 #define KINFO_VNODESLOP 10
 2053 /*
 2054  * Dump vnode list (via sysctl).
 2055  * Copyout address of vnode followed by vnode.
 2056  */
 2057 /* ARGSUSED */
 2058 static int
 2059 sysctl_vnode SYSCTL_HANDLER_ARGS
 2060 {
 2061         struct proc *p = curproc;       /* XXX */
 2062         struct mount *mp, *nmp;
 2063         struct vnode *nvp, *vp;
 2064         int error;
 2065 
 2066 #define VPTRSZ  sizeof (struct vnode *)
 2067 #define VNODESZ sizeof (struct vnode)
 2068 
 2069         req->lock = 0;
 2070         if (!req->oldptr) /* Make an estimate */
 2071                 return (SYSCTL_OUT(req, 0,
 2072                         (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
 2073 
 2074         simple_lock(&mountlist_slock);
 2075         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 2076                 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 2077                         nmp = mp->mnt_list.cqe_next;
 2078                         continue;
 2079                 }
 2080 again:
 2081                 simple_lock(&mntvnode_slock);
 2082                 for (vp = mp->mnt_vnodelist.lh_first;
 2083                      vp != NULL;
 2084                      vp = nvp) {
 2085                         /*
 2086                          * Check that the vp is still associated with
 2087                          * this filesystem.  RACE: could have been
 2088                          * recycled onto the same filesystem.
 2089                          */
 2090                         if (vp->v_mount != mp) {
 2091                                 simple_unlock(&mntvnode_slock);
 2092                                 goto again;
 2093                         }
 2094                         nvp = vp->v_mntvnodes.le_next;
 2095                         simple_unlock(&mntvnode_slock);
 2096                         if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
 2097                             (error = SYSCTL_OUT(req, vp, VNODESZ)))
 2098                                 return (error);
 2099                         simple_lock(&mntvnode_slock);
 2100                 }
 2101                 simple_unlock(&mntvnode_slock);
 2102                 simple_lock(&mountlist_slock);
 2103                 nmp = mp->mnt_list.cqe_next;
 2104                 vfs_unbusy(mp, p);
 2105         }
 2106         simple_unlock(&mountlist_slock);
 2107 
 2108         return (0);
 2109 }
 2110 #endif
 2111 
 2112 /*
 2113  * XXX
 2114  * Exporting the vnode list on large systems causes them to crash.
 2115  * Exporting the vnode list on medium systems causes sysctl to coredump.
 2116  */
 2117 #if 0
 2118 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
 2119         0, 0, sysctl_vnode, "S,vnode", "");
 2120 #endif
 2121 
 2122 /*
 2123  * Check to see if a filesystem is mounted on a block device.
 2124  */
 2125 int
 2126 vfs_mountedon(vp)
 2127         struct vnode *vp;
 2128 {
 2129         struct vnode *vq;
 2130         int error = 0;
 2131 
 2132         if (vp->v_specmountpoint != NULL)
 2133                 return (EBUSY);
 2134         if (vp->v_flag & VALIASED) {
 2135                 simple_lock(&spechash_slock);
 2136                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 2137                         if (vq->v_rdev != vp->v_rdev ||
 2138                             vq->v_type != vp->v_type)
 2139                                 continue;
 2140                         if (vq->v_specmountpoint != NULL) {
 2141                                 error = EBUSY;
 2142                                 break;
 2143                         }
 2144                 }
 2145                 simple_unlock(&spechash_slock);
 2146         }
 2147         return (error);
 2148 }
 2149 
 2150 /*
 2151  * Unmount all filesystems. The list is traversed in reverse order
 2152  * of mounting to avoid dependencies.
 2153  */
 2154 void
 2155 vfs_unmountall()
 2156 {
 2157         struct mount *mp, *nmp;
 2158         struct proc *p;
 2159         int error;
 2160 
 2161         if (curproc != NULL)
 2162                 p = curproc;
 2163         else
 2164                 p = initproc;   /* XXX XXX should this be proc0? */
 2165         /*
 2166          * Since this only runs when rebooting, it is not interlocked.
 2167          */
 2168         for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
 2169                 nmp = mp->mnt_list.cqe_prev;
 2170                 error = dounmount(mp, MNT_FORCE, p);
 2171                 if (error) {
 2172                         printf("unmount of %s failed (",
 2173                             mp->mnt_stat.f_mntonname);
 2174                         if (error == EBUSY)
 2175                                 printf("BUSY)\n");
 2176                         else
 2177                                 printf("%d)\n", error);
 2178                 }
 2179         }
 2180 }
 2181 
 2182 /*
 2183  * Build hash lists of net addresses and hang them off the mount point.
 2184  * Called by ufs_mount() to set up the lists of export addresses.
 2185  */
 2186 static int
 2187 vfs_hang_addrlist(mp, nep, argp)
 2188         struct mount *mp;
 2189         struct netexport *nep;
 2190         struct export_args *argp;
 2191 {
 2192         register struct netcred *np;
 2193         register struct radix_node_head *rnh;
 2194         register int i;
 2195         struct radix_node *rn;
 2196         struct sockaddr *saddr, *smask = 0;
 2197         struct domain *dom;
 2198         int error;
 2199 
 2200         if (argp->ex_addrlen == 0) {
 2201                 if (mp->mnt_flag & MNT_DEFEXPORTED)
 2202                         return (EPERM);
 2203                 np = &nep->ne_defexported;
 2204                 np->netc_exflags = argp->ex_flags;
 2205                 np->netc_anon = argp->ex_anon;
 2206                 np->netc_anon.cr_ref = 1;
 2207                 mp->mnt_flag |= MNT_DEFEXPORTED;
 2208                 return (0);
 2209         }
 2210         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 2211         np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
 2212         bzero((caddr_t) np, i);
 2213         saddr = (struct sockaddr *) (np + 1);
 2214         if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
 2215                 goto out;
 2216         if (saddr->sa_len > argp->ex_addrlen)
 2217                 saddr->sa_len = argp->ex_addrlen;
 2218         if (argp->ex_masklen) {
 2219                 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
 2220                 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
 2221                 if (error)
 2222                         goto out;
 2223                 if (smask->sa_len > argp->ex_masklen)
 2224                         smask->sa_len = argp->ex_masklen;
 2225         }
 2226         i = saddr->sa_family;
 2227         if ((rnh = nep->ne_rtable[i]) == 0) {
 2228                 /*
 2229                  * Seems silly to initialize every AF when most are not used,
 2230                  * do so on demand here
 2231                  */
 2232                 for (dom = domains; dom; dom = dom->dom_next)
 2233                         if (dom->dom_family == i && dom->dom_rtattach) {
 2234                                 dom->dom_rtattach((void **) &nep->ne_rtable[i],
 2235                                     dom->dom_rtoffset);
 2236                                 break;
 2237                         }
 2238                 if ((rnh = nep->ne_rtable[i]) == 0) {
 2239                         error = ENOBUFS;
 2240                         goto out;
 2241                 }
 2242         }
 2243         rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
 2244             np->netc_rnodes);
 2245         if (rn == 0 || np != (struct netcred *) rn) {   /* already exists */
 2246                 error = EPERM;
 2247                 goto out;
 2248         }
 2249         np->netc_exflags = argp->ex_flags;
 2250         np->netc_anon = argp->ex_anon;
 2251         np->netc_anon.cr_ref = 1;
 2252         return (0);
 2253 out:
 2254         free(np, M_NETADDR);
 2255         return (error);
 2256 }
 2257 
 2258 /* ARGSUSED */
 2259 static int
 2260 vfs_free_netcred(rn, w)
 2261         struct radix_node *rn;
 2262         void *w;
 2263 {
 2264         register struct radix_node_head *rnh = (struct radix_node_head *) w;
 2265 
 2266         (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
 2267         free((caddr_t) rn, M_NETADDR);
 2268         return (0);
 2269 }
 2270 
 2271 /*
 2272  * Free the net address hash lists that are hanging off the mount points.
 2273  */
 2274 static void
 2275 vfs_free_addrlist(nep)
 2276         struct netexport *nep;
 2277 {
 2278         register int i;
 2279         register struct radix_node_head *rnh;
 2280 
 2281         for (i = 0; i <= AF_MAX; i++)
 2282                 if ((rnh = nep->ne_rtable[i])) {
 2283                         (*rnh->rnh_walktree) (rnh, vfs_free_netcred,
 2284                             (caddr_t) rnh);
 2285                         free((caddr_t) rnh, M_RTABLE);
 2286                         nep->ne_rtable[i] = 0;
 2287                 }
 2288 }
 2289 
 2290 int
 2291 vfs_export(mp, nep, argp)
 2292         struct mount *mp;
 2293         struct netexport *nep;
 2294         struct export_args *argp;
 2295 {
 2296         int error;
 2297 
 2298         if (argp->ex_flags & MNT_DELEXPORT) {
 2299                 if (mp->mnt_flag & MNT_EXPUBLIC) {
 2300                         vfs_setpublicfs(NULL, NULL, NULL);
 2301                         mp->mnt_flag &= ~MNT_EXPUBLIC;
 2302                 }
 2303                 vfs_free_addrlist(nep);
 2304                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 2305         }
 2306         if (argp->ex_flags & MNT_EXPORTED) {
 2307                 if (argp->ex_flags & MNT_EXPUBLIC) {
 2308                         if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 2309                                 return (error);
 2310                         mp->mnt_flag |= MNT_EXPUBLIC;
 2311                 }
 2312                 if ((error = vfs_hang_addrlist(mp, nep, argp)))
 2313                         return (error);
 2314                 mp->mnt_flag |= MNT_EXPORTED;
 2315         }
 2316         return (0);
 2317 }
 2318 
 2319 
 2320 /*
 2321  * Set the publicly exported filesystem (WebNFS). Currently, only
 2322  * one public filesystem is possible in the spec (RFC 2054 and 2055)
 2323  */
 2324 int
 2325 vfs_setpublicfs(mp, nep, argp)
 2326         struct mount *mp;
 2327         struct netexport *nep;
 2328         struct export_args *argp;
 2329 {
 2330         int error;
 2331         struct vnode *rvp;
 2332         char *cp;
 2333 
 2334         /*
 2335          * mp == NULL -> invalidate the current info, the FS is
 2336          * no longer exported. May be called from either vfs_export
 2337          * or unmount, so check if it hasn't already been done.
 2338          */
 2339         if (mp == NULL) {
 2340                 if (nfs_pub.np_valid) {
 2341                         nfs_pub.np_valid = 0;
 2342                         if (nfs_pub.np_index != NULL) {
 2343                                 FREE(nfs_pub.np_index, M_TEMP);
 2344                                 nfs_pub.np_index = NULL;
 2345                         }
 2346                 }
 2347                 return (0);
 2348         }
 2349 
 2350         /*
 2351          * Only one allowed at a time.
 2352          */
 2353         if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 2354                 return (EBUSY);
 2355 
 2356         /*
 2357          * Get real filehandle for root of exported FS.
 2358          */
 2359         bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
 2360         nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
 2361 
 2362         if ((error = VFS_ROOT(mp, &rvp)))
 2363                 return (error);
 2364 
 2365         if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 2366                 return (error);
 2367 
 2368         vput(rvp);
 2369 
 2370         /*
 2371          * If an indexfile was specified, pull it in.
 2372          */
 2373         if (argp->ex_indexfile != NULL) {
 2374                 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
 2375                     M_WAITOK);
 2376                 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 2377                     MAXNAMLEN, (size_t *)0);
 2378                 if (!error) {
 2379                         /*
 2380                          * Check for illegal filenames.
 2381                          */
 2382                         for (cp = nfs_pub.np_index; *cp; cp++) {
 2383                                 if (*cp == '/') {
 2384                                         error = EINVAL;
 2385                                         break;
 2386                                 }
 2387                         }
 2388                 }
 2389                 if (error) {
 2390                         FREE(nfs_pub.np_index, M_TEMP);
 2391                         return (error);
 2392                 }
 2393         }
 2394 
 2395         nfs_pub.np_mount = mp;
 2396         nfs_pub.np_valid = 1;
 2397         return (0);
 2398 }
 2399 
 2400 struct netcred *
 2401 vfs_export_lookup(mp, nep, nam)
 2402         register struct mount *mp;
 2403         struct netexport *nep;
 2404         struct sockaddr *nam;
 2405 {
 2406         register struct netcred *np;
 2407         register struct radix_node_head *rnh;
 2408         struct sockaddr *saddr;
 2409 
 2410         np = NULL;
 2411         if (mp->mnt_flag & MNT_EXPORTED) {
 2412                 /*
 2413                  * Lookup in the export list first.
 2414                  */
 2415                 if (nam != NULL) {
 2416                         saddr = nam;
 2417                         rnh = nep->ne_rtable[saddr->sa_family];
 2418                         if (rnh != NULL) {
 2419                                 np = (struct netcred *)
 2420                                         (*rnh->rnh_matchaddr)((caddr_t)saddr,
 2421                                                               rnh);
 2422                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 2423                                         np = NULL;
 2424                         }
 2425                 }
 2426                 /*
 2427                  * If no address match, use the default if it exists.
 2428                  */
 2429                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 2430                         np = &nep->ne_defexported;
 2431         }
 2432         return (np);
 2433 }
 2434 
 2435 /*
 2436  * perform msync on all vnodes under a mount point
 2437  * the mount point must be locked.
 2438  */
 2439 void
 2440 vfs_msync(struct mount *mp, int flags) {
 2441         struct vnode *vp, *nvp;
 2442         struct vm_object *obj;
 2443         int anyio, tries;
 2444 
 2445         tries = 5;
 2446 loop:
 2447         anyio = 0;
 2448         for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 2449 
 2450                 nvp = vp->v_mntvnodes.le_next;
 2451 
 2452                 if (vp->v_mount != mp) {
 2453                         goto loop;
 2454                 }
 2455 
 2456                 if (vp->v_flag & VXLOCK)        /* XXX: what if MNT_WAIT? */
 2457                         continue;
 2458 
 2459                 if (flags != MNT_WAIT) {
 2460                         obj = vp->v_object;
 2461                         if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0)
 2462                                 continue;
 2463                         if (VOP_ISLOCKED(vp))
 2464                                 continue;
 2465                 }
 2466 
 2467                 simple_lock(&vp->v_interlock);
 2468                 if (vp->v_object &&
 2469                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 2470                         if (!vget(vp,
 2471                                 LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
 2472                                 if (vp->v_object) {
 2473                                         vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0);
 2474                                         anyio = 1;
 2475                                 }
 2476                                 vput(vp);
 2477                         }
 2478                 } else {
 2479                         simple_unlock(&vp->v_interlock);
 2480                 }
 2481         }
 2482         if (anyio && (--tries > 0))
 2483                 goto loop;
 2484 }
 2485 
 2486 /*
 2487  * Create the VM object needed for VMIO and mmap support.  This
 2488  * is done for all VREG files in the system.  Some filesystems might
 2489  * afford the additional metadata buffering capability of the
 2490  * VMIO code by making the device node be VMIO mode also.
 2491  *
 2492  * vp must be locked when vfs_object_create is called.
 2493  */
 2494 int
 2495 vfs_object_create(vp, p, cred)
 2496         struct vnode *vp;
 2497         struct proc *p;
 2498         struct ucred *cred;
 2499 {
 2500         struct vattr vat;
 2501         vm_object_t object;
 2502         int error = 0;
 2503 
 2504         if ((vp->v_type != VREG) && (vp->v_type != VBLK))
 2505                 return 0;
 2506 
 2507 retry:
 2508         if ((object = vp->v_object) == NULL) {
 2509                 if (vp->v_type == VREG) {
 2510                         if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
 2511                                 goto retn;
 2512                         object = vnode_pager_alloc(vp, vat.va_size, 0, 0);
 2513                 } else if (major(vp->v_rdev) < nblkdev &&
 2514                     bdevsw[major(vp->v_rdev)] != NULL) {
 2515                         /*
 2516                          * This simply allocates the biggest object possible
 2517                          * for a VBLK vnode.  This should be fixed, but doesn't
 2518                          * cause any problems (yet).
 2519                          */
 2520                         object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0);
 2521                 }
 2522                 object->ref_count--;
 2523                 vp->v_usecount--;
 2524         } else {
 2525                 if (object->flags & OBJ_DEAD) {
 2526                         VOP_UNLOCK(vp, 0, p);
 2527                         tsleep(object, PVM, "vodead", 0);
 2528                         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 2529                         goto retry;
 2530                 }
 2531         }
 2532 
 2533         if (vp->v_object)
 2534                 vp->v_flag |= VOBJBUF;
 2535 
 2536 retn:
 2537         return error;
 2538 }
 2539 
 2540 static void
 2541 vfree(vp)
 2542         struct vnode *vp;
 2543 {
 2544         int s;
 2545 
 2546         s = splbio();
 2547         simple_lock(&vnode_free_list_slock);
 2548         if (vp->v_flag & VTBFREE) {
 2549                 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 2550                 vp->v_flag &= ~VTBFREE;
 2551         }
 2552         if (vp->v_flag & VAGE) {
 2553                 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 2554         } else {
 2555                 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 2556         }
 2557         freevnodes++;
 2558         simple_unlock(&vnode_free_list_slock);
 2559         vp->v_flag &= ~VAGE;
 2560         vp->v_flag |= VFREE;
 2561         splx(s);
 2562 }
 2563 
 2564 void
 2565 vbusy(vp)
 2566         struct vnode *vp;
 2567 {
 2568         int s;
 2569 
 2570         s = splbio();
 2571         simple_lock(&vnode_free_list_slock);
 2572         if (vp->v_flag & VTBFREE) {
 2573                 TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 2574                 vp->v_flag &= ~VTBFREE;
 2575         } else {
 2576                 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 2577                 freevnodes--;
 2578         }
 2579         simple_unlock(&vnode_free_list_slock);
 2580         vp->v_flag &= ~(VFREE|VAGE);
 2581         splx(s);
 2582 }
 2583 
 2584 /*
 2585  * Record a process's interest in events which might happen to
 2586  * a vnode.  Because poll uses the historic select-style interface
 2587  * internally, this routine serves as both the ``check for any
 2588  * pending events'' and the ``record my interest in future events''
 2589  * functions.  (These are done together, while the lock is held,
 2590  * to avoid race conditions.)
 2591  */
 2592 int
 2593 vn_pollrecord(vp, p, events)
 2594         struct vnode *vp;
 2595         struct proc *p;
 2596         short events;
 2597 {
 2598         simple_lock(&vp->v_pollinfo.vpi_lock);
 2599         if (vp->v_pollinfo.vpi_revents & events) {
 2600                 /*
 2601                  * This leaves events we are not interested
 2602                  * in available for the other process which
 2603                  * which presumably had requested them
 2604                  * (otherwise they would never have been
 2605                  * recorded).
 2606                  */
 2607                 events &= vp->v_pollinfo.vpi_revents;
 2608                 vp->v_pollinfo.vpi_revents &= ~events;
 2609 
 2610                 simple_unlock(&vp->v_pollinfo.vpi_lock);
 2611                 return events;
 2612         }
 2613         vp->v_pollinfo.vpi_events |= events;
 2614         selrecord(p, &vp->v_pollinfo.vpi_selinfo);
 2615         simple_unlock(&vp->v_pollinfo.vpi_lock);
 2616         return 0;
 2617 }
 2618 
 2619 /*
 2620  * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
 2621  * it is possible for us to miss an event due to race conditions, but
 2622  * that condition is expected to be rare, so for the moment it is the
 2623  * preferred interface.
 2624  */
 2625 void
 2626 vn_pollevent(vp, events)
 2627         struct vnode *vp;
 2628         short events;
 2629 {
 2630         simple_lock(&vp->v_pollinfo.vpi_lock);
 2631         if (vp->v_pollinfo.vpi_events & events) {
 2632                 /*
 2633                  * We clear vpi_events so that we don't
 2634                  * call selwakeup() twice if two events are
 2635                  * posted before the polling process(es) is
 2636                  * awakened.  This also ensures that we take at
 2637                  * most one selwakeup() if the polling process
 2638                  * is no longer interested.  However, it does
 2639                  * mean that only one event can be noticed at
 2640                  * a time.  (Perhaps we should only clear those
 2641                  * event bits which we note?) XXX
 2642                  */
 2643                 vp->v_pollinfo.vpi_events = 0;  /* &= ~events ??? */
 2644                 vp->v_pollinfo.vpi_revents |= events;
 2645                 selwakeup(&vp->v_pollinfo.vpi_selinfo);
 2646         }
 2647         simple_unlock(&vp->v_pollinfo.vpi_lock);
 2648 }
 2649 
 2650 /*
 2651  * Wake up anyone polling on vp because it is being revoked.
 2652  * This depends on dead_poll() returning POLLHUP for correct
 2653  * behavior.
 2654  */
 2655 void
 2656 vn_pollgone(vp)
 2657         struct vnode *vp;
 2658 {
 2659         simple_lock(&vp->v_pollinfo.vpi_lock);
 2660         if (vp->v_pollinfo.vpi_events) {
 2661                 vp->v_pollinfo.vpi_events = 0;
 2662                 selwakeup(&vp->v_pollinfo.vpi_selinfo);
 2663         }
 2664         simple_unlock(&vp->v_pollinfo.vpi_lock);
 2665 }
 2666 
 2667 
 2668 
 2669 /*
 2670  * Routine to create and manage a filesystem syncer vnode.
 2671  */
 2672 #define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
 2673 static int      sync_fsync __P((struct  vop_fsync_args *));
 2674 static int      sync_inactive __P((struct  vop_inactive_args *));
 2675 static int      sync_reclaim  __P((struct  vop_reclaim_args *));
 2676 #define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
 2677 #define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
 2678 static int      sync_print __P((struct vop_print_args *));
 2679 #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
 2680 
 2681 static vop_t **sync_vnodeop_p;
 2682 static struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
 2683         { &vop_default_desc,    (vop_t *) vop_eopnotsupp },
 2684         { &vop_close_desc,      (vop_t *) sync_close },         /* close */
 2685         { &vop_fsync_desc,      (vop_t *) sync_fsync },         /* fsync */
 2686         { &vop_inactive_desc,   (vop_t *) sync_inactive },      /* inactive */
 2687         { &vop_reclaim_desc,    (vop_t *) sync_reclaim },       /* reclaim */
 2688         { &vop_lock_desc,       (vop_t *) sync_lock },          /* lock */
 2689         { &vop_unlock_desc,     (vop_t *) sync_unlock },        /* unlock */
 2690         { &vop_print_desc,      (vop_t *) sync_print },         /* print */
 2691         { &vop_islocked_desc,   (vop_t *) sync_islocked },      /* islocked */
 2692         { NULL, NULL }
 2693 };
 2694 static struct vnodeopv_desc sync_vnodeop_opv_desc =
 2695         { &sync_vnodeop_p, sync_vnodeop_entries };
 2696 
 2697 VNODEOP_SET(sync_vnodeop_opv_desc);
 2698 
 2699 /*
 2700  * Create a new filesystem syncer vnode for the specified mount point.
 2701  */
 2702 int
 2703 vfs_allocate_syncvnode(mp)
 2704         struct mount *mp;
 2705 {
 2706         struct vnode *vp;
 2707         static long start, incr, next;
 2708         int error;
 2709 
 2710         /* Allocate a new vnode */
 2711         if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
 2712                 mp->mnt_syncer = NULL;
 2713                 return (error);
 2714         }
 2715         vp->v_type = VNON;
 2716         /*
 2717          * Place the vnode onto the syncer worklist. We attempt to
 2718          * scatter them about on the list so that they will go off
 2719          * at evenly distributed times even if all the filesystems
 2720          * are mounted at once.
 2721          */
 2722         next += incr;
 2723         if (next == 0 || next > syncer_maxdelay) {
 2724                 start /= 2;
 2725                 incr /= 2;
 2726                 if (start == 0) {
 2727                         start = syncer_maxdelay / 2;
 2728                         incr = syncer_maxdelay;
 2729                 }
 2730                 next = start;
 2731         }
 2732         vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
 2733         mp->mnt_syncer = vp;
 2734         return (0);
 2735 }
 2736 
 2737 /*
 2738  * Do a lazy sync of the filesystem.
 2739  */
 2740 static int
 2741 sync_fsync(ap)
 2742         struct vop_fsync_args /* {
 2743                 struct vnode *a_vp;
 2744                 struct ucred *a_cred;
 2745                 int a_waitfor;
 2746                 struct proc *a_p;
 2747         } */ *ap;
 2748 {
 2749         struct vnode *syncvp = ap->a_vp;
 2750         struct mount *mp = syncvp->v_mount;
 2751         struct proc *p = ap->a_p;
 2752         int asyncflag;
 2753 
 2754         /*
 2755          * We only need to do something if this is a lazy evaluation.
 2756          */
 2757         if (ap->a_waitfor != MNT_LAZY)
 2758                 return (0);
 2759 
 2760         /*
 2761          * Move ourselves to the back of the sync list.
 2762          */
 2763         vn_syncer_add_to_worklist(syncvp, syncdelay);
 2764 
 2765         /*
 2766          * Walk the list of vnodes pushing all that are dirty and
 2767          * not already on the sync list.
 2768          */
 2769         simple_lock(&mountlist_slock);
 2770         if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) {
 2771                 simple_unlock(&mountlist_slock);
 2772                 return (0);
 2773         }
 2774         asyncflag = mp->mnt_flag & MNT_ASYNC;
 2775         mp->mnt_flag &= ~MNT_ASYNC;
 2776         vfs_msync(mp, MNT_NOWAIT);
 2777         VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
 2778         if (asyncflag)
 2779                 mp->mnt_flag |= MNT_ASYNC;
 2780         vfs_unbusy(mp, p);
 2781         return (0);
 2782 }
 2783 
 2784 /*
 2785  * The syncer vnode is no referenced.
 2786  */
 2787 static int
 2788 sync_inactive(ap)
 2789         struct vop_inactive_args /* {
 2790                 struct vnode *a_vp;
 2791                 struct proc *a_p;
 2792         } */ *ap;
 2793 {
 2794 
 2795         vgone(ap->a_vp);
 2796         return (0);
 2797 }
 2798 
 2799 /*
 2800  * The syncer vnode is no longer needed and is being decommissioned.
 2801  *
 2802  * Modifications to the worklist must be protected at splbio().
 2803  */
 2804 static int
 2805 sync_reclaim(ap)
 2806         struct vop_reclaim_args /* {
 2807                 struct vnode *a_vp;
 2808         } */ *ap;
 2809 {
 2810         struct vnode *vp = ap->a_vp;
 2811         int s;
 2812 
 2813         s = splbio();
 2814         vp->v_mount->mnt_syncer = NULL;
 2815         if (vp->v_flag & VONWORKLST) {
 2816                 LIST_REMOVE(vp, v_synclist);
 2817                 vp->v_flag &= ~VONWORKLST;
 2818         }
 2819         splx(s);
 2820 
 2821         return (0);
 2822 }
 2823 
 2824 /*
 2825  * Print out a syncer vnode.
 2826  */
 2827 static int
 2828 sync_print(ap)
 2829         struct vop_print_args /* {
 2830                 struct vnode *a_vp;
 2831         } */ *ap;
 2832 {
 2833         struct vnode *vp = ap->a_vp;
 2834 
 2835         printf("syncer vnode");
 2836         if (vp->v_vnlock != NULL)
 2837                 lockmgr_printinfo(vp->v_vnlock);
 2838         printf("\n");
 2839         return (0);
 2840 }
Cache object: 812a3518dfb04fb21f75765b5abbd107
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_subr.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_subr.c