vfs_subr.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1989, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)vfs_subr.c  8.13 (Berkeley) 4/18/94
   39  * $FreeBSD: src/sys/kern/vfs_subr.c,v 1.64.2.6 1999/09/05 08:15:44 peter Exp $
   40  */
   41 
   42 /*
   43  * External virtual filesystem routines
   44  */
   45 #include "opt_ddb.h"
   46 #include "opt_devfs.h"
   47 
   48 #include <sys/param.h>
   49 #include <sys/systm.h>
   50 #include <sys/kernel.h>
   51 #include <sys/file.h>
   52 #include <sys/proc.h>
   53 #include <sys/mount.h>
   54 #include <sys/time.h>
   55 #include <sys/vnode.h>
   56 #include <sys/stat.h>
   57 #include <sys/namei.h>
   58 #include <sys/ucred.h>
   59 #include <sys/buf.h>
   60 #include <sys/errno.h>
   61 #include <sys/malloc.h>
   62 #include <sys/domain.h>
   63 #include <sys/mbuf.h>
   64 
   65 #include <vm/vm.h>
   66 #include <vm/vm_param.h>
   67 #include <vm/vm_object.h>
   68 #include <vm/vm_extern.h>
   69 #include <vm/vm_pager.h>
   70 #include <vm/vnode_pager.h>
   71 #include <sys/sysctl.h>
   72 
   73 #include <miscfs/specfs/specdev.h>
   74 
   75 #ifdef DDB
   76 extern void     printlockedvnodes __P((void));
   77 #endif
   78 extern void     vclean __P((struct vnode *vp, int flags));
   79 extern void     vfs_unmountroot __P((struct mount *rootfs));
   80 
   81 enum vtype iftovt_tab[16] = {
   82         VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
   83         VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
   84 };
   85 int vttoif_tab[9] = {
   86         0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
   87         S_IFSOCK, S_IFIFO, S_IFMT,
   88 };
   89 
   90 /*
   91  * Insq/Remq for the vnode usage lists.
   92  */
   93 #define bufinsvn(bp, dp)        LIST_INSERT_HEAD(dp, bp, b_vnbufs)
   94 #define bufremvn(bp) {  \
   95         LIST_REMOVE(bp, b_vnbufs); \
   96         (bp)->b_vnbufs.le_next = NOLIST; \
   97 }
   98 
   99 TAILQ_HEAD(freelst, vnode) vnode_free_list;     /* vnode free list */
  100 static u_long freevnodes = 0;
  101 
  102 struct mntlist mountlist;       /* mounted filesystem list */
  103 
  104 int desiredvnodes;
  105 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
  106 
  107 static void     vfs_free_addrlist __P((struct netexport *nep));
  108 static int      vfs_free_netcred __P((struct radix_node *rn, void *w));
  109 static int      vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
  110                                        struct export_args *argp));
  111 
  112 /*
  113  * Initialize the vnode management data structures.
  114  */
  115 void
  116 vntblinit()
  117 {
  118 
  119         desiredvnodes = maxproc + vm_object_cache_max;
  120         TAILQ_INIT(&vnode_free_list);
  121         CIRCLEQ_INIT(&mountlist);
  122 }
  123 
  124 /*
  125  * Lock a filesystem.
  126  * Used to prevent access to it while mounting and unmounting.
  127  */
  128 int
  129 vfs_lock(mp)
  130         register struct mount *mp;
  131 {
  132 
  133         while (mp->mnt_flag & MNT_MLOCK) {
  134                 mp->mnt_flag |= MNT_MWAIT;
  135                 (void) tsleep((caddr_t) mp, PVFS, "vfslck", 0);
  136         }
  137         mp->mnt_flag |= MNT_MLOCK;
  138         return (0);
  139 }
  140 
  141 /*
  142  * Unlock a locked filesystem.
  143  * Panic if filesystem is not locked.
  144  */
  145 void
  146 vfs_unlock(mp)
  147         register struct mount *mp;
  148 {
  149 
  150         if ((mp->mnt_flag & MNT_MLOCK) == 0)
  151                 panic("vfs_unlock: not locked");
  152         mp->mnt_flag &= ~MNT_MLOCK;
  153         if (mp->mnt_flag & MNT_MWAIT) {
  154                 mp->mnt_flag &= ~MNT_MWAIT;
  155                 wakeup((caddr_t) mp);
  156         }
  157 }
  158 
  159 /*
  160  * Mark a mount point as busy.
  161  * Used to synchronize access and to delay unmounting.
  162  */
  163 int
  164 vfs_busy(mp)
  165         register struct mount *mp;
  166 {
  167 
  168         while (mp->mnt_flag & MNT_MPBUSY) {
  169                 mp->mnt_flag |= MNT_MPWANT;
  170                 (void) tsleep((caddr_t) &mp->mnt_flag, PVFS, "vfsbsy", 0);
  171         }
  172         if (mp->mnt_flag & MNT_UNMOUNT)
  173                 return (1);
  174         mp->mnt_flag |= MNT_MPBUSY;
  175         return (0);
  176 }
  177 
  178 /*
  179  * Free a busy filesystem.
  180  * Panic if filesystem is not busy.
  181  */
  182 void
  183 vfs_unbusy(mp)
  184         register struct mount *mp;
  185 {
  186 
  187         if ((mp->mnt_flag & MNT_MPBUSY) == 0)
  188                 panic("vfs_unbusy: not busy");
  189         mp->mnt_flag &= ~MNT_MPBUSY;
  190         if (mp->mnt_flag & MNT_MPWANT) {
  191                 mp->mnt_flag &= ~MNT_MPWANT;
  192                 wakeup((caddr_t) &mp->mnt_flag);
  193         }
  194 }
  195 
  196 void
  197 vfs_unmountroot(struct mount *rootfs)
  198 {
  199         struct mount *mp = rootfs;
  200         int error;
  201 
  202         if (vfs_busy(mp)) {
  203                 printf("failed to unmount root\n");
  204                 return;
  205         }
  206         mp->mnt_flag |= MNT_UNMOUNT;
  207         if ((error = vfs_lock(mp))) {
  208                 printf("lock of root filesystem failed (%d)\n", error);
  209                 return;
  210         }
  211         vnode_pager_umount(mp); /* release cached vnodes */
  212         cache_purgevfs(mp);     /* remove cache entries for this file sys */
  213 
  214         if ((error = VFS_SYNC(mp, MNT_WAIT, initproc->p_ucred, initproc)))
  215                 printf("sync of root filesystem failed (%d)\n", error);
  216 
  217         if ((error = VFS_UNMOUNT(mp, MNT_FORCE, initproc))) {
  218                 printf("unmount of root filesystem failed (");
  219                 if (error == EBUSY)
  220                         printf("BUSY)\n");
  221                 else
  222                         printf("%d)\n", error);
  223         }
  224         mp->mnt_flag &= ~MNT_UNMOUNT;
  225         vfs_unbusy(mp);
  226 }
  227 
  228 /*
  229  * Unmount all filesystems.  Should only be called by halt().
  230  */
  231 void
  232 vfs_unmountall()
  233 {
  234         struct mount *mp, *nmp, *rootfs = NULL;
  235         int error;
  236 
  237         /* unmount all but rootfs */
  238         for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
  239                 nmp = mp->mnt_list.cqe_prev;
  240 
  241                 if (mp->mnt_flag & MNT_ROOTFS) {
  242                         rootfs = mp;
  243                         continue;
  244                 }
  245                 error = dounmount(mp, MNT_FORCE, initproc);
  246                 if (error) {
  247                         printf("unmount of %s failed (", mp->mnt_stat.f_mntonname);
  248                         if (error == EBUSY)
  249                                 printf("BUSY)\n");
  250                         else
  251                                 printf("%d)\n", error);
  252                 }
  253         }
  254 
  255         /* and finally... */
  256         if (rootfs) {
  257                 vfs_unmountroot(rootfs);
  258         } else {
  259                 printf("no root filesystem\n");
  260         }
  261 }
  262 
  263 /*
  264  * Lookup a mount point by filesystem identifier.
  265  */
  266 struct mount *
  267 getvfs(fsid)
  268         fsid_t *fsid;
  269 {
  270         register struct mount *mp;
  271 
  272         for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
  273             mp = mp->mnt_list.cqe_next) {
  274                 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
  275                     mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
  276                         return (mp);
  277         }
  278         return ((struct mount *) 0);
  279 }
  280 
  281 /*
  282  * Get a new unique fsid
  283  */
  284 void
  285 getnewfsid(mp, mtype)
  286         struct mount *mp;
  287         int mtype;
  288 {
  289         static u_short xxxfs_mntid;
  290 
  291         fsid_t tfsid;
  292 
  293         mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
  294         mp->mnt_stat.f_fsid.val[1] = mtype;
  295         if (xxxfs_mntid == 0)
  296                 ++xxxfs_mntid;
  297         tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
  298         tfsid.val[1] = mtype;
  299         if (mountlist.cqh_first != (void *)&mountlist) {
  300                 while (getvfs(&tfsid)) {
  301                         tfsid.val[0]++;
  302                         xxxfs_mntid++;
  303                 }
  304         }
  305         mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
  306 }
  307 
  308 /*
  309  * Set vnode attributes to VNOVAL
  310  */
  311 void
  312 vattr_null(vap)
  313         register struct vattr *vap;
  314 {
  315 
  316         vap->va_type = VNON;
  317         vap->va_size = VNOVAL;
  318         vap->va_bytes = VNOVAL;
  319         vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
  320             vap->va_fsid = vap->va_fileid =
  321             vap->va_blocksize = vap->va_rdev =
  322             vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
  323             vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
  324             vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
  325             vap->va_flags = vap->va_gen = VNOVAL;
  326         vap->va_vaflags = 0;
  327 }
  328 
  329 /*
  330  * Routines having to do with the management of the vnode table.
  331  */
  332 extern vop_t **dead_vnodeop_p;
  333 
  334 /*
  335  * Return the next vnode from the free list.
  336  */
  337 int
  338 getnewvnode(tag, mp, vops, vpp)
  339         enum vtagtype tag;
  340         struct mount *mp;
  341         vop_t **vops;
  342         struct vnode **vpp;
  343 {
  344         register struct vnode *vp;
  345 
  346 retry:
  347         vp = vnode_free_list.tqh_first;
  348         /*
  349          * we allocate a new vnode if
  350          *      1. we don't have any free
  351          *              Pretty obvious, we actually used to panic, but that
  352          *              is a silly thing to do.
  353          *      2. we havn't filled our pool yet
  354          *              We don't want to trash the incore (VM-)vnodecache.
  355          *      3. if less that 1/4th of our vnodes are free.
  356          *              We don't want to trash the namei cache either.
  357          */
  358         if (freevnodes < (numvnodes >> 2) ||
  359             numvnodes < desiredvnodes ||
  360             vp == NULL) {
  361                 vp = (struct vnode *) malloc((u_long) sizeof *vp,
  362                     M_VNODE, M_WAITOK);
  363                 bzero((char *) vp, sizeof *vp);
  364                 numvnodes++;
  365         } else {
  366                 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  367                 if (vp->v_usage > 0) {
  368                         --vp->v_usage;
  369                         TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
  370                         goto retry;
  371                 }
  372                 freevnodes--;
  373                 if (vp->v_usecount)
  374                         panic("free vnode isn't");
  375 
  376                 /* see comment on why 0xdeadb is set at end of vgone (below) */
  377                 vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb;
  378                 vp->v_lease = NULL;
  379                 if (vp->v_type != VBAD)
  380                         vgone(vp);
  381 
  382 #ifdef DIAGNOSTIC
  383                 {
  384                         int s;
  385 
  386                         if (vp->v_data)
  387                                 panic("cleaned vnode isn't");
  388                         s = splbio();
  389                         if (vp->v_numoutput)
  390                                 panic("Clean vnode has pending I/O's");
  391                         splx(s);
  392                 }
  393 #endif
  394                 vp->v_flag = 0;
  395                 vp->v_lastr = 0;
  396                 vp->v_ralen = 0;
  397                 vp->v_maxra = 0;
  398                 vp->v_lastw = 0;
  399                 vp->v_lasta = 0;
  400                 vp->v_cstart = 0;
  401                 vp->v_clen = 0;
  402                 vp->v_socket = 0;
  403                 vp->v_writecount = 0;   /* XXX */
  404                 vp->v_usage = 0;
  405         }
  406         vp->v_type = VNON;
  407         cache_purge(vp);
  408         vp->v_tag = tag;
  409         vp->v_op = vops;
  410         insmntque(vp, mp);
  411         *vpp = vp;
  412         vp->v_usecount = 1;
  413         vp->v_data = 0;
  414         return (0);
  415 }
  416 
  417 /*
  418  * Move a vnode from one mount queue to another.
  419  */
  420 void
  421 insmntque(vp, mp)
  422         register struct vnode *vp;
  423         register struct mount *mp;
  424 {
  425 
  426         /*
  427          * Delete from old mount point vnode list, if on one.
  428          */
  429         if (vp->v_mount != NULL)
  430                 LIST_REMOVE(vp, v_mntvnodes);
  431         /*
  432          * Insert into list of vnodes for the new mount point, if available.
  433          */
  434         if ((vp->v_mount = mp) == NULL)
  435                 return;
  436         LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
  437 }
  438 
  439 /*
  440  * Update outstanding I/O count and do wakeup if requested.
  441  */
  442 void
  443 vwakeup(bp)
  444         register struct buf *bp;
  445 {
  446         register struct vnode *vp;
  447 
  448         bp->b_flags &= ~B_WRITEINPROG;
  449         if ((vp = bp->b_vp)) {
  450                 vp->v_numoutput--;
  451                 if (vp->v_numoutput < 0)
  452                         panic("vwakeup: neg numoutput");
  453                 if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
  454                         vp->v_flag &= ~VBWAIT;
  455                         wakeup((caddr_t) &vp->v_numoutput);
  456                 }
  457         }
  458 }
  459 
  460 /*
  461  * Flush out and invalidate all buffers associated with a vnode.
  462  * Called with the underlying object locked.
  463  */
  464 int
  465 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
  466         register struct vnode *vp;
  467         int flags;
  468         struct ucred *cred;
  469         struct proc *p;
  470         int slpflag, slptimeo;
  471 {
  472         register struct buf *bp;
  473         struct buf *nbp, *blist;
  474         int s, error;
  475         vm_object_t object;
  476 
  477         if (flags & V_SAVE) {
  478                 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
  479                         return (error);
  480                 if (vp->v_dirtyblkhd.lh_first != NULL)
  481                         panic("vinvalbuf: dirty bufs");
  482         }
  483 
  484         s = splbio();
  485         for (;;) {
  486                 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
  487                         while (blist && blist->b_lblkno < 0)
  488                                 blist = blist->b_vnbufs.le_next;
  489                 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
  490                     (flags & V_SAVEMETA))
  491                         while (blist && blist->b_lblkno < 0)
  492                                 blist = blist->b_vnbufs.le_next;
  493                 if (!blist)
  494                         break;
  495 
  496                 for (bp = blist; bp; bp = nbp) {
  497                         nbp = bp->b_vnbufs.le_next;
  498                         if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
  499                                 continue;
  500                         if (bp->b_flags & B_BUSY) {
  501                                 bp->b_flags |= B_WANTED;
  502                                 error = tsleep((caddr_t) bp,
  503                                     slpflag | (PRIBIO + 1), "vinvalbuf",
  504                                     slptimeo);
  505                                 if (error) {
  506                                         splx(s);
  507                                         return (error);
  508                                 }
  509                                 break;
  510                         }
  511                         bremfree(bp);
  512                         bp->b_flags |= B_BUSY;
  513                         /*
  514                          * XXX Since there are no node locks for NFS, I
  515                          * believe there is a slight chance that a delayed
  516                          * write will occur while sleeping just above, so
  517                          * check for it.
  518                          */
  519                         if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
  520                                 (void) VOP_BWRITE(bp);
  521                                 break;
  522                         }
  523                         bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
  524                         brelse(bp);
  525                 }
  526         }
  527 
  528         while (vp->v_numoutput > 0) {
  529                 vp->v_flag |= VBWAIT;
  530                 tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
  531         }
  532 
  533         splx(s);
  534 
  535         /*
  536          * Destroy the copy in the VM cache, too.
  537          */
  538         object = vp->v_object;
  539         if (object != NULL) {
  540                 vm_object_page_remove(object, 0, object->size,
  541                     (flags & V_SAVE) ? TRUE : FALSE);
  542         }
  543         if (!(flags & V_SAVEMETA) &&
  544             (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
  545                 panic("vinvalbuf: flush failed");
  546         return (0);
  547 }
  548 
  549 /*
  550  * Associate a buffer with a vnode.
  551  */
  552 void
  553 bgetvp(vp, bp)
  554         register struct vnode *vp;
  555         register struct buf *bp;
  556 {
  557         int s;
  558 
  559         if (bp->b_vp)
  560                 panic("bgetvp: not free");
  561         VHOLD(vp);
  562         bp->b_vp = vp;
  563         if (vp->v_type == VBLK || vp->v_type == VCHR)
  564                 bp->b_dev = vp->v_rdev;
  565         else
  566                 bp->b_dev = NODEV;
  567         /*
  568          * Insert onto list for new vnode.
  569          */
  570         s = splbio();
  571         bufinsvn(bp, &vp->v_cleanblkhd);
  572         splx(s);
  573 }
  574 
  575 /*
  576  * Disassociate a buffer from a vnode.
  577  */
  578 void
  579 brelvp(bp)
  580         register struct buf *bp;
  581 {
  582         struct vnode *vp;
  583         int s;
  584 
  585         if (bp->b_vp == (struct vnode *) 0)
  586                 panic("brelvp: NULL");
  587         /*
  588          * Delete from old vnode list, if on one.
  589          */
  590         s = splbio();
  591         if (bp->b_vnbufs.le_next != NOLIST)
  592                 bufremvn(bp);
  593         splx(s);
  594 
  595         vp = bp->b_vp;
  596         bp->b_vp = (struct vnode *) 0;
  597         HOLDRELE(vp);
  598 }
  599 
  600 /*
  601  * Associate a p-buffer with a vnode.
  602  */
  603 void
  604 pbgetvp(vp, bp)
  605         register struct vnode *vp;
  606         register struct buf *bp;
  607 {
  608         if (bp->b_vp)
  609                 panic("pbgetvp: not free");
  610         VHOLD(vp);
  611         bp->b_vp = vp;
  612         if (vp->v_type == VBLK || vp->v_type == VCHR)
  613                 bp->b_dev = vp->v_rdev;
  614         else
  615                 bp->b_dev = NODEV;
  616 }
  617 
  618 /*
  619  * Disassociate a p-buffer from a vnode.
  620  */
  621 void
  622 pbrelvp(bp)
  623         register struct buf *bp;
  624 {
  625         struct vnode *vp;
  626 
  627         if (bp->b_vp == (struct vnode *) 0)
  628                 panic("brelvp: NULL");
  629 
  630         vp = bp->b_vp;
  631         bp->b_vp = (struct vnode *) 0;
  632         HOLDRELE(vp);
  633 }
  634 
  635 /*
  636  * Reassign a buffer from one vnode to another.
  637  * Used to assign file specific control information
  638  * (indirect blocks) to the vnode to which they belong.
  639  */
  640 void
  641 reassignbuf(bp, newvp)
  642         register struct buf *bp;
  643         register struct vnode *newvp;
  644 {
  645         int s;
  646 
  647         if (newvp == NULL) {
  648                 printf("reassignbuf: NULL");
  649                 return;
  650         }
  651 
  652         s = splbio();
  653         /*
  654          * Delete from old vnode list, if on one.
  655          */
  656         if (bp->b_vnbufs.le_next != NOLIST)
  657                 bufremvn(bp);
  658         /*
  659          * If dirty, put on list of dirty buffers; otherwise insert onto list
  660          * of clean buffers.
  661          */
  662         if (bp->b_flags & B_DELWRI) {
  663                 struct buf *tbp;
  664 
  665                 tbp = newvp->v_dirtyblkhd.lh_first;
  666                 if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
  667                         bufinsvn(bp, &newvp->v_dirtyblkhd);
  668                 } else {
  669                         while (tbp->b_vnbufs.le_next &&
  670                                 (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
  671                                 tbp = tbp->b_vnbufs.le_next;
  672                         }
  673                         LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
  674                 }
  675         } else {
  676                 bufinsvn(bp, &newvp->v_cleanblkhd);
  677         }
  678         splx(s);
  679 }
  680 
  681 #ifndef DEVFS_ROOT
  682 /*
  683  * Create a vnode for a block device.
  684  * Used for root filesystem, argdev, and swap areas.
  685  * Also used for memory file system special devices.
  686  */
  687 int
  688 bdevvp(dev, vpp)
  689         dev_t dev;
  690         struct vnode **vpp;
  691 {
  692         register struct vnode *vp;
  693         struct vnode *nvp;
  694         int error;
  695 
  696         if (dev == NODEV)
  697                 return (0);
  698         error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
  699         if (error) {
  700                 *vpp = 0;
  701                 return (error);
  702         }
  703         vp = nvp;
  704         vp->v_type = VBLK;
  705         if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
  706                 vput(vp);
  707                 vp = nvp;
  708         }
  709         *vpp = vp;
  710         return (0);
  711 }
  712 #endif /* !DEVFS_ROOT */
  713 
  714 /*
  715  * Check to see if the new vnode represents a special device
  716  * for which we already have a vnode (either because of
  717  * bdevvp() or because of a different vnode representing
  718  * the same block device). If such an alias exists, deallocate
  719  * the existing contents and return the aliased vnode. The
  720  * caller is responsible for filling it with its new contents.
  721  */
  722 struct vnode *
  723 checkalias(nvp, nvp_rdev, mp)
  724         register struct vnode *nvp;
  725         dev_t nvp_rdev;
  726         struct mount *mp;
  727 {
  728         register struct vnode *vp;
  729         struct vnode **vpp;
  730 
  731         if (nvp->v_type != VBLK && nvp->v_type != VCHR)
  732                 return (NULLVP);
  733 
  734         vpp = &speclisth[SPECHASH(nvp_rdev)];
  735 loop:
  736         for (vp = *vpp; vp; vp = vp->v_specnext) {
  737                 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
  738                         continue;
  739                 /*
  740                  * Alias, but not in use, so flush it out.
  741                  */
  742                 if (vp->v_usecount == 0) {
  743                         vgone(vp);
  744                         goto loop;
  745                 }
  746                 if (vget(vp, 1))
  747                         goto loop;
  748                 break;
  749         }
  750 
  751         if (vp == NULL || vp->v_tag != VT_NON) {
  752                 MALLOC(nvp->v_specinfo, struct specinfo *,
  753                     sizeof(struct specinfo), M_VNODE, M_WAITOK);
  754                 nvp->v_rdev = nvp_rdev;
  755                 nvp->v_hashchain = vpp;
  756                 nvp->v_specnext = *vpp;
  757                 nvp->v_specflags = 0;
  758                 *vpp = nvp;
  759                 if (vp != NULL) {
  760                         nvp->v_flag |= VALIASED;
  761                         vp->v_flag |= VALIASED;
  762                         vput(vp);
  763                 }
  764                 return (NULLVP);
  765         }
  766         VOP_UNLOCK(vp);
  767         vclean(vp, 0);
  768         vp->v_op = nvp->v_op;
  769         vp->v_tag = nvp->v_tag;
  770         nvp->v_type = VNON;
  771         insmntque(vp, mp);
  772         return (vp);
  773 }
  774 
  775 /*
  776  * Grab a particular vnode from the free list, increment its
  777  * reference count and lock it. The vnode lock bit is set the
  778  * vnode is being eliminated in vgone. The process is awakened
  779  * when the transition is completed, and an error returned to
  780  * indicate that the vnode is no longer usable (possibly having
  781  * been changed to a new file system type).
  782  */
  783 int
  784 vget(vp, lockflag)
  785         register struct vnode *vp;
  786         int lockflag;
  787 {
  788 
  789         /*
  790          * If the vnode is in the process of being cleaned out for another
  791          * use, we wait for the cleaning to finish and then return failure.
  792          * Cleaning is determined either by checking that the VXLOCK flag is
  793          * set, or that the use count is zero with the back pointer set to
  794          * show that it has been removed from the free list by getnewvnode.
  795          * The VXLOCK flag may not have been set yet because vclean is blocked
  796          * in the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
  797          */
  798         if ((vp->v_flag & VXLOCK) ||
  799             (vp->v_usecount == 0 &&
  800                 vp->v_freelist.tqe_prev == (struct vnode **) 0xdeadb)) {
  801                 vp->v_flag |= VXWANT;
  802                 (void) tsleep((caddr_t) vp, PINOD, "vget", 0);
  803                 return (1);
  804         }
  805         if (vp->v_usecount == 0) {
  806                 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
  807                 freevnodes--;
  808         }
  809         vp->v_usecount++;
  810 
  811         /*
  812          * Create the VM object, if needed
  813          */
  814         if ((vp->v_type == VREG) &&
  815                 ((vp->v_object == NULL) ||
  816                         (vp->v_object->flags & OBJ_VFS_REF) == 0)) {
  817                 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
  818         }
  819         if (lockflag)
  820                 VOP_LOCK(vp);
  821 
  822         return (0);
  823 }
  824 
  825 /*
  826  * Vnode reference, just increment the count
  827  */
  828 void
  829 vref(vp)
  830         struct vnode *vp;
  831 {
  832         if (vp->v_usecount <= 0)
  833                 panic("vref used where vget required");
  834 
  835         vp->v_usecount++;
  836 
  837         if ((vp->v_type == VREG) &&
  838                 ((vp->v_object == NULL) ||
  839                         ((vp->v_object->flags & OBJ_VFS_REF) == 0)) ) {
  840                 /*
  841                  * We need to lock to VP during the time that
  842                  * the object is created.  This is necessary to
  843                  * keep the system from re-entrantly doing it
  844                  * multiple times.
  845                  */
  846                 vfs_object_create(vp, curproc, curproc->p_ucred, 0);
  847         }
  848 }
  849 
  850 /*
  851  * vput(), just unlock and vrele()
  852  */
  853 void
  854 vput(vp)
  855         register struct vnode *vp;
  856 {
  857         VOP_UNLOCK(vp);
  858         vrele(vp);
  859 }
  860 
  861 /*
  862  * Vnode release.
  863  * If count drops to zero, call inactive routine and return to freelist.
  864  */
  865 void
  866 vrele(vp)
  867         register struct vnode *vp;
  868 {
  869 
  870 #ifdef DIAGNOSTIC
  871         if (vp == NULL)
  872                 panic("vrele: null vp");
  873 #endif
  874 
  875         vp->v_usecount--;
  876 
  877         if ((vp->v_usecount == 1) &&
  878                 vp->v_object &&
  879                 (vp->v_object->flags & OBJ_VFS_REF)) {
  880                 vp->v_object->flags &= ~OBJ_VFS_REF;
  881                 vm_object_deallocate(vp->v_object);
  882                 return;
  883         }
  884 
  885         if (vp->v_usecount > 0)
  886                 return;
  887 
  888         if (vp->v_usecount < 0) {
  889 #ifdef DIAGNOSTIC
  890                 vprint("vrele: negative ref count", vp);
  891 #endif
  892                 panic("vrele: negative reference cnt");
  893         }
  894         if (vp->v_flag & VAGE) {
  895                 if(vp->v_tag != VT_TFS)
  896                         TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
  897                 vp->v_flag &= ~VAGE;
  898                 vp->v_usage = 0;
  899         } else {
  900                 if(vp->v_tag != VT_TFS)
  901                         TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
  902         }
  903         freevnodes++;
  904 
  905         VOP_INACTIVE(vp);
  906 }
  907 
  908 #ifdef DIAGNOSTIC
  909 /*
  910  * Page or buffer structure gets a reference.
  911  */
  912 void
  913 vhold(vp)
  914         register struct vnode *vp;
  915 {
  916 
  917         vp->v_holdcnt++;
  918 }
  919 
  920 /*
  921  * Page or buffer structure frees a reference.
  922  */
  923 void
  924 holdrele(vp)
  925         register struct vnode *vp;
  926 {
  927 
  928         if (vp->v_holdcnt <= 0)
  929                 panic("holdrele: holdcnt");
  930         vp->v_holdcnt--;
  931 }
  932 #endif /* DIAGNOSTIC */
  933 
  934 /*
  935  * Remove any vnodes in the vnode table belonging to mount point mp.
  936  *
  937  * If MNT_NOFORCE is specified, there should not be any active ones,
  938  * return error if any are found (nb: this is a user error, not a
  939  * system error). If MNT_FORCE is specified, detach any active vnodes
  940  * that are found.
  941  */
  942 #ifdef DIAGNOSTIC
  943 static int busyprt = 0;         /* print out busy vnodes */
  944 SYSCTL_INT(_debug, 1, busyprt, CTLFLAG_RW, &busyprt, 0, "");
  945 #endif
  946 
  947 int
  948 vflush(mp, skipvp, flags)
  949         struct mount *mp;
  950         struct vnode *skipvp;
  951         int flags;
  952 {
  953         register struct vnode *vp, *nvp;
  954         int busy = 0;
  955 
  956         if ((mp->mnt_flag & MNT_MPBUSY) == 0)
  957                 panic("vflush: not busy");
  958 loop:
  959         for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
  960                 /*
  961                  * Make sure this vnode wasn't reclaimed in getnewvnode().
  962                  * Start over if it has (it won't be on the list anymore).
  963                  */
  964                 if (vp->v_mount != mp)
  965                         goto loop;
  966                 nvp = vp->v_mntvnodes.le_next;
  967                 /*
  968                  * Skip over a selected vnode.
  969                  */
  970                 if (vp == skipvp)
  971                         continue;
  972                 /*
  973                  * Skip over a vnodes marked VSYSTEM.
  974                  */
  975                 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
  976                         continue;
  977                 /*
  978                  * If WRITECLOSE is set, only flush out regular file vnodes
  979                  * open for writing.
  980                  */
  981                 if ((flags & WRITECLOSE) &&
  982                     (vp->v_writecount == 0 || vp->v_type != VREG))
  983                         continue;
  984 
  985                 if (vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) {
  986                         vm_object_reference(vp->v_object);
  987                         pager_cache(vp->v_object, FALSE);
  988                         vp->v_object->flags &= ~OBJ_VFS_REF;
  989                         vm_object_deallocate(vp->v_object);
  990                 }
  991 
  992                 /*
  993                  * With v_usecount == 0, all we need to do is clear out the
  994                  * vnode data structures and we are done.
  995                  */
  996                 if (vp->v_usecount == 0) {
  997                         vgone(vp);
  998                         continue;
  999                 }
 1000 
 1001                 /*
 1002                  * If FORCECLOSE is set, forcibly close the vnode. For block
 1003                  * or character devices, revert to an anonymous device. For
 1004                  * all other files, just kill them.
 1005                  */
 1006                 if (flags & FORCECLOSE) {
 1007                         if (vp->v_type != VBLK && vp->v_type != VCHR) {
 1008                                 vgone(vp);
 1009                         } else {
 1010                                 vclean(vp, 0);
 1011                                 vp->v_op = spec_vnodeop_p;
 1012                                 insmntque(vp, (struct mount *) 0);
 1013                         }
 1014                         continue;
 1015                 }
 1016 #ifdef DIAGNOSTIC
 1017                 if (busyprt)
 1018                         vprint("vflush: busy vnode", vp);
 1019 #endif
 1020                 busy++;
 1021         }
 1022         if (busy)
 1023                 return (EBUSY);
 1024         return (0);
 1025 }
 1026 
 1027 /*
 1028  * Disassociate the underlying file system from a vnode.
 1029  */
 1030 void
 1031 vclean(struct vnode *vp, int flags)
 1032 {
 1033         int active;
 1034 
 1035         /*
 1036          * Check to see if the vnode is in use. If so we have to reference it
 1037          * before we clean it out so that its count cannot fall to zero and
 1038          * generate a race against ourselves to recycle it.
 1039          */
 1040         if ((active = vp->v_usecount))
 1041                 VREF(vp);
 1042         /*
 1043          * Even if the count is zero, the VOP_INACTIVE routine may still have
 1044          * the object locked while it cleans it out. The VOP_LOCK ensures that
 1045          * the VOP_INACTIVE routine is done with its work. For active vnodes,
 1046          * it ensures that no other activity can occur while the underlying
 1047          * object is being cleaned out.
 1048          */
 1049         VOP_LOCK(vp);
 1050         /*
 1051          * Prevent the vnode from being recycled or brought into use while we
 1052          * clean it out.
 1053          */
 1054         if (vp->v_flag & VXLOCK)
 1055                 panic("vclean: deadlock");
 1056         vp->v_flag |= VXLOCK;
 1057         /*
 1058          * Clean out any buffers associated with the vnode.
 1059          */
 1060         if (flags & DOCLOSE)
 1061                 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
 1062         /*
 1063          * Any other processes trying to obtain this lock must first wait for
 1064          * VXLOCK to clear, then call the new lock operation.
 1065          */
 1066         VOP_UNLOCK(vp);
 1067         /*
 1068          * If purging an active vnode, it must be closed and deactivated
 1069          * before being reclaimed.
 1070          */
 1071         if (active) {
 1072                 if (flags & DOCLOSE)
 1073                         VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL);
 1074                 VOP_INACTIVE(vp);
 1075         }
 1076         /*
 1077          * Reclaim the vnode.
 1078          */
 1079         if (VOP_RECLAIM(vp))
 1080                 panic("vclean: cannot reclaim");
 1081         if (active)
 1082                 vrele(vp);
 1083 
 1084         /*
 1085          * Done with purge, notify sleepers of the grim news.
 1086          */
 1087         vp->v_op = dead_vnodeop_p;
 1088         vp->v_tag = VT_NON;
 1089         vp->v_flag &= ~VXLOCK;
 1090         if (vp->v_flag & VXWANT) {
 1091                 vp->v_flag &= ~VXWANT;
 1092                 wakeup((caddr_t) vp);
 1093         }
 1094 }
 1095 
 1096 /*
 1097  * Eliminate all activity associated with  the requested vnode
 1098  * and with all vnodes aliased to the requested vnode.
 1099  */
 1100 void
 1101 vgoneall(vp)
 1102         register struct vnode *vp;
 1103 {
 1104         register struct vnode *vq;
 1105 
 1106         if (vp->v_flag & VALIASED) {
 1107                 /*
 1108                  * If a vgone (or vclean) is already in progress, wait until
 1109                  * it is done and return.
 1110                  */
 1111                 if (vp->v_flag & VXLOCK) {
 1112                         vp->v_flag |= VXWANT;
 1113                         (void) tsleep((caddr_t) vp, PINOD, "vgall", 0);
 1114                         return;
 1115                 }
 1116                 /*
 1117                  * Ensure that vp will not be vgone'd while we are eliminating
 1118                  * its aliases.
 1119                  */
 1120                 vp->v_flag |= VXLOCK;
 1121                 while (vp->v_flag & VALIASED) {
 1122                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1123                                 if (vq->v_rdev != vp->v_rdev ||
 1124                                     vq->v_type != vp->v_type || vp == vq)
 1125                                         continue;
 1126                                 vgone(vq);
 1127                                 break;
 1128                         }
 1129                 }
 1130                 /*
 1131                  * Remove the lock so that vgone below will really eliminate
 1132                  * the vnode after which time vgone will awaken any sleepers.
 1133                  */
 1134                 vp->v_flag &= ~VXLOCK;
 1135         }
 1136         vgone(vp);
 1137 }
 1138 
 1139 /*
 1140  * Eliminate all activity associated with a vnode
 1141  * in preparation for reuse.
 1142  */
 1143 void
 1144 vgone(vp)
 1145         register struct vnode *vp;
 1146 {
 1147         register struct vnode *vq;
 1148         struct vnode *vx;
 1149 
 1150         /*
 1151          * If a vgone (or vclean) is already in progress, wait until it is
 1152          * done and return.
 1153          */
 1154         if (vp->v_flag & VXLOCK) {
 1155                 vp->v_flag |= VXWANT;
 1156                 (void) tsleep((caddr_t) vp, PINOD, "vgone", 0);
 1157                 return;
 1158         }
 1159 
 1160         if (vp->v_object) {
 1161                 vp->v_object->flags |= OBJ_VNODE_GONE;
 1162         }
 1163 
 1164         /*
 1165          * Clean out the filesystem specific data.
 1166          */
 1167         vclean(vp, DOCLOSE);
 1168         /*
 1169          * Delete from old mount point vnode list, if on one.
 1170          */
 1171         if (vp->v_mount != NULL) {
 1172                 LIST_REMOVE(vp, v_mntvnodes);
 1173                 vp->v_mount = NULL;
 1174         }
 1175         /*
 1176          * If special device, remove it from special device alias list.
 1177          */
 1178         if (vp->v_type == VBLK || vp->v_type == VCHR) {
 1179                 if (*vp->v_hashchain == vp) {
 1180                         *vp->v_hashchain = vp->v_specnext;
 1181                 } else {
 1182                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1183                                 if (vq->v_specnext != vp)
 1184                                         continue;
 1185                                 vq->v_specnext = vp->v_specnext;
 1186                                 break;
 1187                         }
 1188                         if (vq == NULL)
 1189                                 panic("missing bdev");
 1190                 }
 1191                 if (vp->v_flag & VALIASED) {
 1192                         vx = NULL;
 1193                         for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1194                                 if (vq->v_rdev != vp->v_rdev ||
 1195                                     vq->v_type != vp->v_type)
 1196                                         continue;
 1197                                 if (vx)
 1198                                         break;
 1199                                 vx = vq;
 1200                         }
 1201                         if (vx == NULL)
 1202                                 panic("missing alias");
 1203                         if (vq == NULL)
 1204                                 vx->v_flag &= ~VALIASED;
 1205                         vp->v_flag &= ~VALIASED;
 1206                 }
 1207                 FREE(vp->v_specinfo, M_VNODE);
 1208                 vp->v_specinfo = NULL;
 1209         }
 1210         /*
 1211          * If it is on the freelist and not already at the head, move it to
 1212          * the head of the list. The test of the back pointer and the
 1213          * reference count of zero is because it will be removed from the free
 1214          * list by getnewvnode, but will not have its reference count
 1215          * incremented until after calling vgone. If the reference count were
 1216          * incremented first, vgone would (incorrectly) try to close the
 1217          * previous instance of the underlying object. So, the back pointer is
 1218          * explicitly set to `0xdeadb' in getnewvnode after removing it from
 1219          * the freelist to ensure that we do not try to move it here.
 1220          */
 1221         if (vp->v_usecount == 0 &&
 1222             vp->v_freelist.tqe_prev != (struct vnode **) 0xdeadb &&
 1223             vnode_free_list.tqh_first != vp) {
 1224                 if(vp->v_tag != VT_TFS) {
 1225                         TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 1226                         TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 1227                 }
 1228         }
 1229         vp->v_type = VBAD;
 1230 }
 1231 
 1232 /*
 1233  * Lookup a vnode by device number.
 1234  */
 1235 int
 1236 vfinddev(dev, type, vpp)
 1237         dev_t dev;
 1238         enum vtype type;
 1239         struct vnode **vpp;
 1240 {
 1241         register struct vnode *vp;
 1242 
 1243         for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
 1244                 if (dev != vp->v_rdev || type != vp->v_type)
 1245                         continue;
 1246                 *vpp = vp;
 1247                 return (1);
 1248         }
 1249         return (0);
 1250 }
 1251 
 1252 /*
 1253  * Calculate the total number of references to a special device.
 1254  */
 1255 int
 1256 vcount(vp)
 1257         register struct vnode *vp;
 1258 {
 1259         register struct vnode *vq, *vnext;
 1260         int count;
 1261 
 1262 loop:
 1263         if ((vp->v_flag & VALIASED) == 0)
 1264                 return (vp->v_usecount);
 1265         for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
 1266                 vnext = vq->v_specnext;
 1267                 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
 1268                         continue;
 1269                 /*
 1270                  * Alias, but not in use, so flush it out.
 1271                  */
 1272                 if (vq->v_usecount == 0 && vq != vp) {
 1273                         vgone(vq);
 1274                         goto loop;
 1275                 }
 1276                 count += vq->v_usecount;
 1277         }
 1278         return (count);
 1279 }
 1280 
 1281 /*
 1282  * Print out a description of a vnode.
 1283  */
 1284 static char *typename[] =
 1285 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
 1286 
 1287 void
 1288 vprint(label, vp)
 1289         char *label;
 1290         register struct vnode *vp;
 1291 {
 1292         char buf[64];
 1293 
 1294         if (label != NULL)
 1295                 printf("%s: ", label);
 1296         printf("type %s, usecount %d, writecount %d, refcount %ld,",
 1297             typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 1298             vp->v_holdcnt);
 1299         buf[0] = '\0';
 1300         if (vp->v_flag & VROOT)
 1301                 strcat(buf, "|VROOT");
 1302         if (vp->v_flag & VTEXT)
 1303                 strcat(buf, "|VTEXT");
 1304         if (vp->v_flag & VSYSTEM)
 1305                 strcat(buf, "|VSYSTEM");
 1306         if (vp->v_flag & VXLOCK)
 1307                 strcat(buf, "|VXLOCK");
 1308         if (vp->v_flag & VXWANT)
 1309                 strcat(buf, "|VXWANT");
 1310         if (vp->v_flag & VBWAIT)
 1311                 strcat(buf, "|VBWAIT");
 1312         if (vp->v_flag & VALIASED)
 1313                 strcat(buf, "|VALIASED");
 1314         if (buf[0] != '\0')
 1315                 printf(" flags (%s)", &buf[1]);
 1316         if (vp->v_data == NULL) {
 1317                 printf("\n");
 1318         } else {
 1319                 printf("\n\t");
 1320                 VOP_PRINT(vp);
 1321         }
 1322 }
 1323 
 1324 #ifdef DDB
 1325 /*
 1326  * List all of the locked vnodes in the system.
 1327  * Called when debugging the kernel.
 1328  */
 1329 void
 1330 printlockedvnodes(void)
 1331 {
 1332         register struct mount *mp;
 1333         register struct vnode *vp;
 1334 
 1335         printf("Locked vnodes\n");
 1336         for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
 1337             mp = mp->mnt_list.cqe_next) {
 1338                 for (vp = mp->mnt_vnodelist.lh_first;
 1339                     vp != NULL;
 1340                     vp = vp->v_mntvnodes.le_next)
 1341                         if (VOP_ISLOCKED(vp))
 1342                                 vprint((char *) 0, vp);
 1343         }
 1344 }
 1345 #endif
 1346 
 1347 int kinfo_vdebug = 1;
 1348 int kinfo_vgetfailed;
 1349 
 1350 #define KINFO_VNODESLOP 10
 1351 /*
 1352  * Dump vnode list (via sysctl).
 1353  * Copyout address of vnode followed by vnode.
 1354  */
 1355 /* ARGSUSED */
 1356 static int
 1357 sysctl_vnode SYSCTL_HANDLER_ARGS
 1358 {
 1359         register struct mount *mp, *nmp;
 1360         struct vnode *vp;
 1361         int error;
 1362 
 1363 #define VPTRSZ  sizeof (struct vnode *)
 1364 #define VNODESZ sizeof (struct vnode)
 1365 
 1366         req->lock = 0;
 1367         if (!req->oldptr) /* Make an estimate */
 1368                 return (SYSCTL_OUT(req, 0,
 1369                         (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
 1370 
 1371         for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 1372                 nmp = mp->mnt_list.cqe_next;
 1373                 if (vfs_busy(mp))
 1374                         continue;
 1375 again:
 1376                 for (vp = mp->mnt_vnodelist.lh_first;
 1377                     vp != NULL;
 1378                     vp = vp->v_mntvnodes.le_next) {
 1379                         /*
 1380                          * Check that the vp is still associated with this
 1381                          * filesystem.  RACE: could have been recycled onto
 1382                          * the same filesystem.
 1383                          */
 1384                         if (vp->v_mount != mp) {
 1385                                 if (kinfo_vdebug)
 1386                                         printf("kinfo: vp changed\n");
 1387                                 goto again;
 1388                         }
 1389                         if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
 1390                             (error = SYSCTL_OUT(req, vp, VNODESZ))) {
 1391                                 vfs_unbusy(mp);
 1392                                 return (error);
 1393                         }
 1394                 }
 1395                 vfs_unbusy(mp);
 1396         }
 1397 
 1398         return (0);
 1399 }
 1400 
 1401 /*
 1402  * XXX
 1403  * Exporting the vnode list on large systems causes them to crash.
 1404  * Exporting the vnode list on medium systems causes sysctl to coredump.
 1405  */
 1406 #if 0
 1407 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
 1408         0, 0, sysctl_vnode, "S,vnode", "");
 1409 #endif
 1410 
 1411 /*
 1412  * Check to see if a filesystem is mounted on a block device.
 1413  */
 1414 int
 1415 vfs_mountedon(vp)
 1416         register struct vnode *vp;
 1417 {
 1418         register struct vnode *vq;
 1419 
 1420         if (vp->v_specflags & SI_MOUNTEDON)
 1421                 return (EBUSY);
 1422         if (vp->v_flag & VALIASED) {
 1423                 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 1424                         if (vq->v_rdev != vp->v_rdev ||
 1425                             vq->v_type != vp->v_type)
 1426                                 continue;
 1427                         if (vq->v_specflags & SI_MOUNTEDON)
 1428                                 return (EBUSY);
 1429                 }
 1430         }
 1431         return (0);
 1432 }
 1433 
 1434 /*
 1435  * Build hash lists of net addresses and hang them off the mount point.
 1436  * Called by ufs_mount() to set up the lists of export addresses.
 1437  */
 1438 static int
 1439 vfs_hang_addrlist(struct mount *mp, struct netexport *nep, 
 1440         struct export_args *argp)
 1441 {
 1442         register struct netcred *np;
 1443         register struct radix_node_head *rnh;
 1444         register int i;
 1445         struct radix_node *rn;
 1446         struct sockaddr *saddr, *smask = 0;
 1447         struct domain *dom;
 1448         int error;
 1449 
 1450         if (argp->ex_addrlen == 0) {
 1451                 if (mp->mnt_flag & MNT_DEFEXPORTED)
 1452                         return (EPERM);
 1453                 np = &nep->ne_defexported;
 1454                 np->netc_exflags = argp->ex_flags;
 1455                 np->netc_anon = argp->ex_anon;
 1456                 np->netc_anon.cr_ref = 1;
 1457                 mp->mnt_flag |= MNT_DEFEXPORTED;
 1458                 return (0);
 1459         }
 1460         i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 1461         np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
 1462         bzero((caddr_t) np, i);
 1463         saddr = (struct sockaddr *) (np + 1);
 1464         if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
 1465                 goto out;
 1466         if (saddr->sa_len > argp->ex_addrlen)
 1467                 saddr->sa_len = argp->ex_addrlen;
 1468         if (argp->ex_masklen) {
 1469                 smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
 1470                 error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
 1471                 if (error)
 1472                         goto out;
 1473                 if (smask->sa_len > argp->ex_masklen)
 1474                         smask->sa_len = argp->ex_masklen;
 1475         }
 1476         i = saddr->sa_family;
 1477         if ((rnh = nep->ne_rtable[i]) == 0) {
 1478                 /*
 1479                  * Seems silly to initialize every AF when most are not used,
 1480                  * do so on demand here
 1481                  */
 1482                 for (dom = domains; dom; dom = dom->dom_next)
 1483                         if (dom->dom_family == i && dom->dom_rtattach) {
 1484                                 dom->dom_rtattach((void **) &nep->ne_rtable[i],
 1485                                     dom->dom_rtoffset);
 1486                                 break;
 1487                         }
 1488                 if ((rnh = nep->ne_rtable[i]) == 0) {
 1489                         error = ENOBUFS;
 1490                         goto out;
 1491                 }
 1492         }
 1493         rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
 1494             np->netc_rnodes);
 1495         if (rn == 0 || np != (struct netcred *) rn) {   /* already exists */
 1496                 error = EPERM;
 1497                 goto out;
 1498         }
 1499         np->netc_exflags = argp->ex_flags;
 1500         np->netc_anon = argp->ex_anon;
 1501         np->netc_anon.cr_ref = 1;
 1502         return (0);
 1503 out:
 1504         free(np, M_NETADDR);
 1505         return (error);
 1506 }
 1507 
 1508 /* ARGSUSED */
 1509 static int
 1510 vfs_free_netcred(struct radix_node *rn, void *w)
 1511 {
 1512         register struct radix_node_head *rnh = (struct radix_node_head *) w;
 1513 
 1514         (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
 1515         free((caddr_t) rn, M_NETADDR);
 1516         return (0);
 1517 }
 1518 
 1519 /*
 1520  * Free the net address hash lists that are hanging off the mount points.
 1521  */
 1522 static void
 1523 vfs_free_addrlist(struct netexport *nep)
 1524 {
 1525         register int i;
 1526         register struct radix_node_head *rnh;
 1527 
 1528         for (i = 0; i <= AF_MAX; i++)
 1529                 if ((rnh = nep->ne_rtable[i])) {
 1530                         (*rnh->rnh_walktree) (rnh, vfs_free_netcred,
 1531                             (caddr_t) rnh);
 1532                         free((caddr_t) rnh, M_RTABLE);
 1533                         nep->ne_rtable[i] = 0;
 1534                 }
 1535 }
 1536 
 1537 int
 1538 vfs_export(mp, nep, argp)
 1539         struct mount *mp;
 1540         struct netexport *nep;
 1541         struct export_args *argp;
 1542 {
 1543         int error;
 1544 
 1545         if (argp->ex_flags & MNT_DELEXPORT) {
 1546                 vfs_free_addrlist(nep);
 1547                 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 1548         }
 1549         if (argp->ex_flags & MNT_EXPORTED) {
 1550                 if ((error = vfs_hang_addrlist(mp, nep, argp)))
 1551                         return (error);
 1552                 mp->mnt_flag |= MNT_EXPORTED;
 1553         }
 1554         return (0);
 1555 }
 1556 
 1557 struct netcred *
 1558 vfs_export_lookup(mp, nep, nam)
 1559         register struct mount *mp;
 1560         struct netexport *nep;
 1561         struct mbuf *nam;
 1562 {
 1563         register struct netcred *np;
 1564         register struct radix_node_head *rnh;
 1565         struct sockaddr *saddr;
 1566 
 1567         np = NULL;
 1568         if (mp->mnt_flag & MNT_EXPORTED) {
 1569                 /*
 1570                  * Lookup in the export list first.
 1571                  */
 1572                 if (nam != NULL) {
 1573                         saddr = mtod(nam, struct sockaddr *);
 1574                         rnh = nep->ne_rtable[saddr->sa_family];
 1575                         if (rnh != NULL) {
 1576                                 np = (struct netcred *)
 1577                                     (*rnh->rnh_matchaddr) ((caddr_t) saddr,
 1578                                     rnh);
 1579                                 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 1580                                         np = NULL;
 1581                         }
 1582                 }
 1583                 /*
 1584                  * If no address match, use the default if it exists.
 1585                  */
 1586                 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 1587                         np = &nep->ne_defexported;
 1588         }
 1589         return (np);
 1590 }
 1591 
 1592 
 1593 /*
 1594  * perform msync on all vnodes under a mount point
 1595  * the mount point must be locked.
 1596  */
 1597 void
 1598 vfs_msync(struct mount *mp, int flags) {
 1599         struct vnode *vp, *nvp;
 1600 loop:
 1601         for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 1602 
 1603                 if (vp->v_mount != mp)
 1604                         goto loop;
 1605                 nvp = vp->v_mntvnodes.le_next;
 1606                 if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))
 1607                         continue;
 1608                 if (vp->v_object &&
 1609                    (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 1610                         vm_object_page_clean(vp->v_object, 0, 0, TRUE, TRUE);
 1611                 }
 1612         }
 1613 }
 1614 
 1615 /*
 1616  * Create the VM object needed for VMIO and mmap support.  This
 1617  * is done for all VREG files in the system.  Some filesystems might
 1618  * afford the additional metadata buffering capability of the
 1619  * VMIO code by making the device node be VMIO mode also.
 1620  */
 1621 int
 1622 vfs_object_create(vp, p, cred, waslocked)
 1623         struct vnode *vp;
 1624         struct proc *p;
 1625         struct ucred *cred;
 1626         int waslocked;
 1627 {
 1628         struct vattr vat;
 1629         vm_object_t object;
 1630         int error = 0;
 1631 
 1632 retry:
 1633         if ((object = vp->v_object) == NULL) {
 1634                 if (vp->v_type == VREG) {
 1635                         if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
 1636                                 goto retn;
 1637                         (void) vnode_pager_alloc(vp,
 1638                                 OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
 1639                 } else {
 1640                         /*
 1641                          * This simply allocates the biggest object possible
 1642                          * for a VBLK vnode.  This should be fixed, but doesn't
 1643                          * cause any problems (yet).
 1644                          */
 1645                         (void) vnode_pager_alloc(vp, INT_MAX, 0, 0);
 1646                 }
 1647                 vp->v_object->flags |= OBJ_VFS_REF;
 1648         } else {
 1649                 if (object->flags & OBJ_DEAD) {
 1650                         if (waslocked)
 1651                                 VOP_UNLOCK(vp);
 1652                         tsleep(object, PVM, "vodead", 0);
 1653                         if (waslocked)
 1654                                 VOP_LOCK(vp);
 1655                         goto retry;
 1656                 }
 1657                 if ((object->flags & OBJ_VFS_REF) == 0) {
 1658                         object->flags |= OBJ_VFS_REF;
 1659                         vm_object_reference(object);
 1660                 }
 1661         }
 1662         if (vp->v_object)
 1663                 vp->v_flag |= VVMIO;
 1664 
 1665 retn:
 1666         return error;
 1667 }
Cache object: 7abdc223ebacc917814e3e613fd936c3
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/vfs_subr.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_subr.c