The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/vfs_lock.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2004,2013 The DragonFly Project.  All rights reserved.
    3  * 
    4  * This code is derived from software contributed to The DragonFly Project
    5  * by Matthew Dillon <dillon@backplane.com>
    6  * 
    7  * Redistribution and use in source and binary forms, with or without
    8  * modification, are permitted provided that the following conditions
    9  * are met:
   10  * 
   11  * 1. Redistributions of source code must retain the above copyright
   12  *    notice, this list of conditions and the following disclaimer.
   13  * 2. Redistributions in binary form must reproduce the above copyright
   14  *    notice, this list of conditions and the following disclaimer in
   15  *    the documentation and/or other materials provided with the
   16  *    distribution.
   17  * 3. Neither the name of The DragonFly Project nor the names of its
   18  *    contributors may be used to endorse or promote products derived
   19  *    from this software without specific, prior written permission.
   20  * 
   21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
   25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
   29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  */
   34 
   35 /*
   36  * External lock/ref-related vnode functions
   37  *
   38  * vs_state transition locking requirements:
   39  *
   40  *      INACTIVE -> CACHED|DYING        vx_lock(excl) + vfs_spin
   41  *      DYING    -> CACHED              vx_lock(excl)
   42  *      ACTIVE   -> INACTIVE            (none)       + v_spin + vfs_spin
   43  *      INACTIVE -> ACTIVE              vn_lock(any) + v_spin + vfs_spin
   44  *      CACHED   -> ACTIVE              vn_lock(any) + v_spin + vfs_spin
   45  *
   46  * NOTE: Switching to/from ACTIVE/INACTIVE requires v_spin and vfs_spin,
   47  *
   48  *       Switching into ACTIVE also requires a vref and vnode lock, however
   49  *       the vnode lock is allowed to be SHARED.
   50  *
   51  *       Switching into a CACHED or DYING state requires an exclusive vnode
   52  *       lock or vx_lock (which is almost the same thing).
   53  */
   54 
   55 #include <sys/param.h>
   56 #include <sys/systm.h>
   57 #include <sys/kernel.h>
   58 #include <sys/malloc.h>
   59 #include <sys/mount.h>
   60 #include <sys/proc.h>
   61 #include <sys/vnode.h>
   62 #include <sys/buf.h>
   63 #include <sys/sysctl.h>
   64 
   65 #include <machine/limits.h>
   66 
   67 #include <vm/vm.h>
   68 #include <vm/vm_object.h>
   69 
   70 #include <sys/buf2.h>
   71 #include <sys/thread2.h>
   72 
   73 #define VACT_MAX        10
   74 #define VACT_INC        2
   75 
   76 static void vnode_terminate(struct vnode *vp);
   77 
   78 static MALLOC_DEFINE(M_VNODE, "vnodes", "vnode structures");
   79 
   80 /*
   81  * The vnode free list hold inactive vnodes.  Aged inactive vnodes
   82  * are inserted prior to the mid point, and otherwise inserted
   83  * at the tail.
   84  */
   85 TAILQ_HEAD(freelst, vnode);
   86 static struct freelst   vnode_active_list;
   87 static struct freelst   vnode_inactive_list;
   88 static struct vnode     vnode_active_rover;
   89 static struct spinlock  vfs_spin = SPINLOCK_INITIALIZER(vfs_spin);
   90 
   91 int  activevnodes = 0;
   92 SYSCTL_INT(_debug, OID_AUTO, activevnodes, CTLFLAG_RD,
   93         &activevnodes, 0, "Number of active nodes");
   94 int  cachedvnodes = 0;
   95 SYSCTL_INT(_debug, OID_AUTO, cachedvnodes, CTLFLAG_RD,
   96         &cachedvnodes, 0, "Number of total cached nodes");
   97 int  inactivevnodes = 0;
   98 SYSCTL_INT(_debug, OID_AUTO, inactivevnodes, CTLFLAG_RD,
   99         &inactivevnodes, 0, "Number of inactive nodes");
  100 static int batchfreevnodes = 5;
  101 SYSCTL_INT(_debug, OID_AUTO, batchfreevnodes, CTLFLAG_RW,
  102         &batchfreevnodes, 0, "Number of vnodes to free at once");
  103 #ifdef TRACKVNODE
  104 static ulong trackvnode;
  105 SYSCTL_ULONG(_debug, OID_AUTO, trackvnode, CTLFLAG_RW,
  106                 &trackvnode, 0, "");
  107 #endif
  108 
  109 /*
  110  * Called from vfsinit()
  111  */
  112 void
  113 vfs_lock_init(void)
  114 {
  115         TAILQ_INIT(&vnode_inactive_list);
  116         TAILQ_INIT(&vnode_active_list);
  117         TAILQ_INSERT_TAIL(&vnode_active_list, &vnode_active_rover, v_list);
  118         spin_init(&vfs_spin);
  119         kmalloc_raise_limit(M_VNODE, 0);        /* unlimited */
  120 }
  121 
  122 /*
  123  * Misc functions
  124  */
  125 static __inline
  126 void
  127 _vsetflags(struct vnode *vp, int flags)
  128 {
  129         atomic_set_int(&vp->v_flag, flags);
  130 }
  131 
  132 static __inline
  133 void
  134 _vclrflags(struct vnode *vp, int flags)
  135 {
  136         atomic_clear_int(&vp->v_flag, flags);
  137 }
  138 
  139 void
  140 vsetflags(struct vnode *vp, int flags)
  141 {
  142         _vsetflags(vp, flags);
  143 }
  144 
  145 void
  146 vclrflags(struct vnode *vp, int flags)
  147 {
  148         _vclrflags(vp, flags);
  149 }
  150 
  151 /*
  152  * Place the vnode on the active list.
  153  *
  154  * Caller must hold vp->v_spin
  155  */
  156 static __inline 
  157 void
  158 _vactivate(struct vnode *vp)
  159 {
  160 #ifdef TRACKVNODE
  161         if ((ulong)vp == trackvnode)
  162                 kprintf("_vactivate %p %08x\n", vp, vp->v_flag);
  163 #endif
  164         spin_lock(&vfs_spin);
  165 
  166         switch(vp->v_state) {
  167         case VS_ACTIVE:
  168                 panic("_vactivate: already active");
  169                 /* NOT REACHED */
  170                 spin_unlock(&vfs_spin);
  171                 return;
  172         case VS_INACTIVE:
  173                 TAILQ_REMOVE(&vnode_inactive_list, vp, v_list);
  174                 --inactivevnodes;
  175                 break;
  176         case VS_CACHED:
  177         case VS_DYING:
  178                 break;
  179         }
  180         TAILQ_INSERT_TAIL(&vnode_active_list, vp, v_list);
  181         vp->v_state = VS_ACTIVE;
  182         ++activevnodes;
  183 
  184         spin_unlock(&vfs_spin);
  185 }
  186 
  187 /*
  188  * Put a vnode on the inactive list.
  189  *
  190  * Caller must hold v_spin
  191  */
  192 static __inline
  193 void
  194 _vinactive(struct vnode *vp)
  195 {
  196 #ifdef TRACKVNODE
  197         if ((ulong)vp == trackvnode) {
  198                 kprintf("_vinactive %p %08x\n", vp, vp->v_flag);
  199                 print_backtrace(-1);
  200         }
  201 #endif
  202         spin_lock(&vfs_spin);
  203 
  204         /*
  205          * Remove from active list if it is sitting on it
  206          */
  207         switch(vp->v_state) {
  208         case VS_ACTIVE:
  209                 TAILQ_REMOVE(&vnode_active_list, vp, v_list);
  210                 --activevnodes;
  211                 break;
  212         case VS_INACTIVE:
  213                 panic("_vinactive: already inactive");
  214                 /* NOT REACHED */
  215                 spin_unlock(&vfs_spin);
  216                 return;
  217         case VS_CACHED:
  218         case VS_DYING:
  219                 break;
  220         }
  221 
  222         /*
  223          * Distinguish between basically dead vnodes, vnodes with cached
  224          * data, and vnodes without cached data.  A rover will shift the
  225          * vnodes around as their cache status is lost.
  226          */
  227         if (vp->v_flag & VRECLAIMED) {
  228                 TAILQ_INSERT_HEAD(&vnode_inactive_list, vp, v_list);
  229         } else {
  230                 TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_list);
  231         }
  232         ++inactivevnodes;
  233         vp->v_state = VS_INACTIVE;
  234 
  235         spin_unlock(&vfs_spin);
  236 }
  237 
  238 static __inline
  239 void
  240 _vinactive_tail(struct vnode *vp)
  241 {
  242         spin_lock(&vfs_spin);
  243 
  244         /*
  245          * Remove from active list if it is sitting on it
  246          */
  247         switch(vp->v_state) {
  248         case VS_ACTIVE:
  249                 TAILQ_REMOVE(&vnode_active_list, vp, v_list);
  250                 --activevnodes;
  251                 break;
  252         case VS_INACTIVE:
  253                 panic("_vinactive_tail: already inactive");
  254                 /* NOT REACHED */
  255                 spin_unlock(&vfs_spin);
  256                 return;
  257         case VS_CACHED:
  258         case VS_DYING:
  259                 break;
  260         }
  261 
  262         TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_list);
  263         ++inactivevnodes;
  264         vp->v_state = VS_INACTIVE;
  265 
  266         spin_unlock(&vfs_spin);
  267 }
  268 
  269 /*
  270  * Add a ref to an active vnode.  This function should never be called
  271  * with an inactive vnode (use vget() instead), but might be called
  272  * with other states.
  273  */
  274 void
  275 vref(struct vnode *vp)
  276 {
  277         KASSERT((VREFCNT(vp) > 0 && vp->v_state != VS_INACTIVE),
  278                 ("vref: bad refcnt %08x %d", vp->v_refcnt, vp->v_state));
  279         atomic_add_int(&vp->v_refcnt, 1);
  280 }
  281 
  282 /*
  283  * Release a ref on an active or inactive vnode.
  284  *
  285  * Caller has no other requirements.
  286  *
  287  * If VREF_FINALIZE is set this will deactivate the vnode on the 1->0
  288  * transition, otherwise we leave the vnode in the active list and
  289  * do a lockless transition to 0, which is very important for the
  290  * critical path.
  291  *
  292  * (vrele() is not called when a vnode is being destroyed w/kfree)
  293  */
  294 void
  295 vrele(struct vnode *vp)
  296 {
  297         for (;;) {
  298                 int count = vp->v_refcnt;
  299                 cpu_ccfence();
  300                 KKASSERT((count & VREF_MASK) > 0);
  301                 KKASSERT(vp->v_state == VS_ACTIVE ||
  302                          vp->v_state == VS_INACTIVE);
  303 
  304                 /*
  305                  * 2+ case
  306                  */
  307                 if ((count & VREF_MASK) > 1) {
  308                         if (atomic_cmpset_int(&vp->v_refcnt, count, count - 1))
  309                                 break;
  310                         continue;
  311                 }
  312 
  313                 /*
  314                  * 1->0 transition case must handle possible finalization.
  315                  * When finalizing we transition 1->0x40000000.  Note that
  316                  * cachedvnodes is only adjusted on transitions to ->0.
  317                  *
  318                  * WARNING! VREF_TERMINATE can be cleared at any point
  319                  *          when the refcnt is non-zero (by vget()) and
  320                  *          the vnode has not been reclaimed.  Thus
  321                  *          transitions out of VREF_TERMINATE do not have
  322                  *          to mess with cachedvnodes.
  323                  */
  324                 if (count & VREF_FINALIZE) {
  325                         vx_lock(vp);
  326                         if (atomic_cmpset_int(&vp->v_refcnt,
  327                                               count, VREF_TERMINATE)) {
  328                                 vnode_terminate(vp);
  329                                 break;
  330                         }
  331                         vx_unlock(vp);
  332                 } else {
  333                         if (atomic_cmpset_int(&vp->v_refcnt, count, 0)) {
  334                                 atomic_add_int(&cachedvnodes, 1);
  335                                 break;
  336                         }
  337                 }
  338                 /* retry */
  339         }
  340 }
  341 
  342 /*
  343  * Add an auxiliary data structure reference to the vnode.  Auxiliary
  344  * references do not change the state of the vnode or prevent deactivation
  345  * or reclamation of the vnode, but will prevent the vnode from being
  346  * destroyed (kfree()'d).
  347  *
  348  * WARNING!  vhold() must not acquire v_spin.  The spinlock may or may not
  349  *           already be held by the caller.  vdrop() will clean up the
  350  *           free list state.
  351  */
  352 void
  353 vhold(struct vnode *vp)
  354 {
  355         atomic_add_int(&vp->v_auxrefs, 1);
  356 }
  357 
  358 /*
  359  * Remove an auxiliary reference from the vnode.
  360  */
  361 void
  362 vdrop(struct vnode *vp)
  363 {
  364         atomic_add_int(&vp->v_auxrefs, -1);
  365 }
  366 
  367 /*
  368  * This function is called on the 1->0 transition (which is actually
  369  * 1->VREF_TERMINATE) when VREF_FINALIZE is set, forcing deactivation
  370  * of the vnode.
  371  *
  372  * Additional vrefs are allowed to race but will not result in a reentrant
  373  * call to vnode_terminate() due to refcnt being VREF_TERMINATE.  This
  374  * prevents additional 1->0 transitions.
  375  *
  376  * ONLY A VGET() CAN REACTIVATE THE VNODE.
  377  *
  378  * Caller must hold the VX lock.
  379  *
  380  * NOTE: v_mount may be NULL due to assigmment to dead_vnode_vops
  381  *
  382  * NOTE: The vnode may be marked inactive with dirty buffers
  383  *       or dirty pages in its cached VM object still present.
  384  *
  385  * NOTE: VS_FREE should not be set on entry (the vnode was expected to
  386  *       previously be active).  We lose control of the vnode the instant
  387  *       it is placed on the free list.
  388  *
  389  *       The VX lock is required when transitioning to VS_CACHED but is
  390  *       not sufficient for the vshouldfree() interlocked test or when
  391  *       transitioning away from VS_CACHED.  v_spin is also required for
  392  *       those cases.
  393  */
  394 static
  395 void
  396 vnode_terminate(struct vnode *vp)
  397 {
  398         KKASSERT(vp->v_state == VS_ACTIVE);
  399 
  400         if ((vp->v_flag & VINACTIVE) == 0) {
  401                 _vsetflags(vp, VINACTIVE);
  402                 if (vp->v_mount)
  403                         VOP_INACTIVE(vp);
  404                 /* might deactivate page */
  405         }
  406         spin_lock(&vp->v_spin);
  407         _vinactive(vp);
  408         spin_unlock(&vp->v_spin);
  409 
  410         vx_unlock(vp);
  411 }
  412 
  413 /****************************************************************
  414  *                      VX LOCKING FUNCTIONS                    *
  415  ****************************************************************
  416  *
  417  * These functions lock vnodes for reclamation and deactivation related
  418  * activities.  The caller must already be holding some sort of reference
  419  * on the vnode.
  420  */
  421 void
  422 vx_lock(struct vnode *vp)
  423 {
  424         lockmgr(&vp->v_lock, LK_EXCLUSIVE);
  425 }
  426 
  427 void
  428 vx_unlock(struct vnode *vp)
  429 {
  430         lockmgr(&vp->v_lock, LK_RELEASE);
  431 }
  432 
  433 /****************************************************************
  434  *                      VNODE ACQUISITION FUNCTIONS             *
  435  ****************************************************************
  436  *
  437  * These functions must be used when accessing a vnode that has no
  438  * chance of being destroyed in a SMP race.  That means the caller will
  439  * usually either hold an auxiliary reference (such as the namecache)
  440  * or hold some other lock that ensures that the vnode cannot be destroyed.
  441  *
  442  * These functions are MANDATORY for any code chain accessing a vnode
  443  * whos activation state is not known.
  444  *
  445  * vget() can be called with LK_NOWAIT and will return EBUSY if the
  446  * lock cannot be immediately acquired.
  447  *
  448  * vget()/vput() are used when reactivation is desired.
  449  *
  450  * vx_get() and vx_put() are used when reactivation is not desired.
  451  */
  452 int
  453 vget(struct vnode *vp, int flags)
  454 {
  455         int error;
  456 
  457         /*
  458          * A lock type must be passed
  459          */
  460         if ((flags & LK_TYPE_MASK) == 0) {
  461                 panic("vget() called with no lock specified!");
  462                 /* NOT REACHED */
  463         }
  464 
  465         /*
  466          * Reference the structure and then acquire the lock.
  467          *
  468          * NOTE: The requested lock might be a shared lock and does
  469          *       not protect our access to the refcnt or other fields.
  470          */
  471         if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0)
  472                 atomic_add_int(&cachedvnodes, -1);
  473 
  474         if ((error = vn_lock(vp, flags | LK_FAILRECLAIM)) != 0) {
  475                 /*
  476                  * The lock failed, undo and return an error.  This will not
  477                  * normally trigger a termination.
  478                  */
  479                 vrele(vp);
  480         } else if (vp->v_flag & VRECLAIMED) {
  481                 /*
  482                  * The node is being reclaimed and cannot be reactivated
  483                  * any more, undo and return ENOENT.
  484                  */
  485                 vn_unlock(vp);
  486                 vrele(vp);
  487                 error = ENOENT;
  488         } else if (vp->v_state == VS_ACTIVE) {
  489                 /*
  490                  * A VS_ACTIVE vnode coupled with the fact that we have
  491                  * a vnode lock (even if shared) prevents v_state from
  492                  * changing.  Since the vnode is not in a VRECLAIMED state,
  493                  * we can safely clear VINACTIVE.
  494                  *
  495                  * NOTE! Multiple threads may clear VINACTIVE if this is
  496                  *       shared lock.  This race is allowed.
  497                  */
  498                 _vclrflags(vp, VINACTIVE);      /* SMP race ok */
  499                 vp->v_act += VACT_INC;
  500                 if (vp->v_act > VACT_MAX)       /* SMP race ok */
  501                         vp->v_act = VACT_MAX;
  502                 error = 0;
  503         } else {
  504                 /*
  505                  * If the vnode is not VS_ACTIVE it must be reactivated
  506                  * in addition to clearing VINACTIVE.  An exclusive spin_lock
  507                  * is needed to manipulate the vnode's list.
  508                  *
  509                  * Because the lockmgr lock might be shared, we might race
  510                  * another reactivation, which we handle.  In this situation,
  511                  * however, the refcnt prevents other v_state races.
  512                  *
  513                  * As with above, clearing VINACTIVE is allowed to race other
  514                  * clearings of VINACTIVE.
  515                  *
  516                  * VREF_TERMINATE and VREF_FINALIZE can only be cleared when
  517                  * the refcnt is non-zero and the vnode has not been
  518                  * reclaimed.  This also means that the transitions do
  519                  * not affect cachedvnodes.
  520                  */
  521                 _vclrflags(vp, VINACTIVE);
  522                 vp->v_act += VACT_INC;
  523                 if (vp->v_act > VACT_MAX)       /* SMP race ok */
  524                         vp->v_act = VACT_MAX;
  525                 spin_lock(&vp->v_spin);
  526 
  527                 switch(vp->v_state) {
  528                 case VS_INACTIVE:
  529                         _vactivate(vp);
  530                         atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE |
  531                                                         VREF_FINALIZE);
  532                         spin_unlock(&vp->v_spin);
  533                         break;
  534                 case VS_CACHED:
  535                         _vactivate(vp);
  536                         atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE |
  537                                                         VREF_FINALIZE);
  538                         spin_unlock(&vp->v_spin);
  539                         break;
  540                 case VS_ACTIVE:
  541                         atomic_clear_int(&vp->v_refcnt, VREF_FINALIZE);
  542                         spin_unlock(&vp->v_spin);
  543                         break;
  544                 case VS_DYING:
  545                         spin_unlock(&vp->v_spin);
  546                         panic("Impossible VS_DYING state");
  547                         break;
  548                 }
  549                 error = 0;
  550         }
  551         return(error);
  552 }
  553 
  554 #ifdef DEBUG_VPUT
  555 
  556 void
  557 debug_vput(struct vnode *vp, const char *filename, int line)
  558 {
  559         kprintf("vput(%p) %s:%d\n", vp, filename, line);
  560         vn_unlock(vp);
  561         vrele(vp);
  562 }
  563 
  564 #else
  565 
  566 void
  567 vput(struct vnode *vp)
  568 {
  569         vn_unlock(vp);
  570         vrele(vp);
  571 }
  572 
  573 #endif
  574 
  575 /*
  576  * Acquire the vnode lock unguarded.
  577  *
  578  * The non-blocking version also uses a slightly different mechanic.
  579  * This function will explicitly fail not only if it cannot acquire
  580  * the lock normally, but also if the caller already holds a lock.
  581  *
  582  * The adjusted mechanic is used to close a loophole where complex
  583  * VOP_RECLAIM code can circle around recursively and allocate the
  584  * same vnode it is trying to destroy from the freelist.
  585  *
  586  * Any filesystem (aka UFS) which puts LK_CANRECURSE in lk_flags can
  587  * cause the incorrect behavior to occur.  If not for that lockmgr()
  588  * would do the right thing.
  589  *
  590  * XXX The vx_*() locks should use auxrefs, not the main reference counter.
  591  */
  592 void
  593 vx_get(struct vnode *vp)
  594 {
  595         if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0)
  596                 atomic_add_int(&cachedvnodes, -1);
  597         lockmgr(&vp->v_lock, LK_EXCLUSIVE);
  598 }
  599 
  600 int
  601 vx_get_nonblock(struct vnode *vp)
  602 {
  603         int error;
  604 
  605         if (lockcountnb(&vp->v_lock))
  606                 return(EBUSY);
  607         error = lockmgr(&vp->v_lock, LK_EXCLUSIVE | LK_NOWAIT);
  608         if (error == 0) {
  609                 if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0)
  610                         atomic_add_int(&cachedvnodes, -1);
  611         }
  612         return(error);
  613 }
  614 
  615 /*
  616  * Release a VX lock that also held a ref on the vnode.  vrele() will handle
  617  * any needed state transitions.
  618  *
  619  * However, filesystems use this function to get rid of unwanted new vnodes
  620  * so try to get the vnode on the correct queue in that case.
  621  */
  622 void
  623 vx_put(struct vnode *vp)
  624 {
  625         if (vp->v_type == VNON || vp->v_type == VBAD)
  626                 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
  627         lockmgr(&vp->v_lock, LK_RELEASE);
  628         vrele(vp);
  629 }
  630 
  631 /*
  632  * Try to reuse a vnode from the free list.  This function is somewhat
  633  * advisory in that NULL can be returned as a normal case, even if free
  634  * vnodes are present.
  635  *
  636  * The scan is limited because it can result in excessive CPU use during
  637  * periods of extreme vnode use.
  638  *
  639  * NOTE: The returned vnode is not completely initialized.
  640  */
  641 static
  642 struct vnode *
  643 cleanfreevnode(int maxcount)
  644 {
  645         struct vnode *vp;
  646         int count;
  647         int trigger = (long)vmstats.v_page_count / (activevnodes * 2 + 1);
  648 
  649         /*
  650          * Try to deactivate some vnodes cached on the active list.
  651          */
  652         if (cachedvnodes < inactivevnodes)
  653                 goto skip;
  654 
  655         for (count = 0; count < maxcount * 2; count++) {
  656                 spin_lock(&vfs_spin);
  657 
  658                 vp = TAILQ_NEXT(&vnode_active_rover, v_list);
  659                 TAILQ_REMOVE(&vnode_active_list, &vnode_active_rover, v_list);
  660                 if (vp == NULL) {
  661                         TAILQ_INSERT_HEAD(&vnode_active_list,
  662                                           &vnode_active_rover, v_list);
  663                 } else {
  664                         TAILQ_INSERT_AFTER(&vnode_active_list, vp,
  665                                            &vnode_active_rover, v_list);
  666                 }
  667                 if (vp == NULL) {
  668                         spin_unlock(&vfs_spin);
  669                         continue;
  670                 }
  671                 if ((vp->v_refcnt & VREF_MASK) != 0) {
  672                         spin_unlock(&vfs_spin);
  673                         vp->v_act += VACT_INC;
  674                         if (vp->v_act > VACT_MAX)       /* SMP race ok */
  675                                 vp->v_act = VACT_MAX;
  676                         continue;
  677                 }
  678 
  679                 /*
  680                  * decrement by less if the vnode's object has a lot of
  681                  * VM pages.  XXX possible SMP races.
  682                  */
  683                 if (vp->v_act > 0) {
  684                         vm_object_t obj;
  685                         if ((obj = vp->v_object) != NULL &&
  686                             obj->resident_page_count >= trigger) {
  687                                 vp->v_act -= 1;
  688                         } else {
  689                                 vp->v_act -= VACT_INC;
  690                         }
  691                         if (vp->v_act < 0)
  692                                 vp->v_act = 0;
  693                         spin_unlock(&vfs_spin);
  694                         continue;
  695                 }
  696 
  697                 /*
  698                  * Try to deactivate the vnode.
  699                  */
  700                 if ((atomic_fetchadd_int(&vp->v_refcnt, 1) & VREF_MASK) == 0)
  701                         atomic_add_int(&cachedvnodes, -1);
  702                 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE);
  703 
  704                 spin_unlock(&vfs_spin);
  705                 vrele(vp);
  706         }
  707 
  708 skip:
  709         /*
  710          * Loop trying to lock the first vnode on the free list.
  711          * Cycle if we can't.
  712          */
  713         for (count = 0; count < maxcount; count++) {
  714                 spin_lock(&vfs_spin);
  715 
  716                 vp = TAILQ_FIRST(&vnode_inactive_list);
  717                 if (vp == NULL) {
  718                         spin_unlock(&vfs_spin);
  719                         break;
  720                 }
  721 
  722                 /*
  723                  * non-blocking vx_get will also ref the vnode on success.
  724                  */
  725                 if (vx_get_nonblock(vp)) {
  726                         KKASSERT(vp->v_state == VS_INACTIVE);
  727                         TAILQ_REMOVE(&vnode_inactive_list, vp, v_list);
  728                         TAILQ_INSERT_TAIL(&vnode_inactive_list, vp, v_list);
  729                         spin_unlock(&vfs_spin);
  730                         continue;
  731                 }
  732 
  733                 /*
  734                  * Because we are holding vfs_spin the vnode should currently
  735                  * be inactive and VREF_TERMINATE should still be set.
  736                  *
  737                  * Once vfs_spin is released the vnode's state should remain
  738                  * unmodified due to both the lock and ref on it.
  739                  */
  740                 KKASSERT(vp->v_state == VS_INACTIVE);
  741                 spin_unlock(&vfs_spin);
  742 #ifdef TRACKVNODE
  743                 if ((ulong)vp == trackvnode)
  744                         kprintf("cleanfreevnode %p %08x\n", vp, vp->v_flag);
  745 #endif
  746 
  747                 /*
  748                  * Do not reclaim/reuse a vnode while auxillary refs exists.
  749                  * This includes namecache refs due to a related ncp being
  750                  * locked or having children, a VM object association, or
  751                  * other hold users.
  752                  *
  753                  * Do not reclaim/reuse a vnode if someone else has a real
  754                  * ref on it.  This can occur if a filesystem temporarily
  755                  * releases the vnode lock during VOP_RECLAIM.
  756                  */
  757                 if (vp->v_auxrefs ||
  758                     (vp->v_refcnt & ~VREF_FINALIZE) != VREF_TERMINATE + 1) {
  759 failed:
  760                         if (vp->v_state == VS_INACTIVE) {
  761                                 spin_lock(&vfs_spin);
  762                                 if (vp->v_state == VS_INACTIVE) {
  763                                         TAILQ_REMOVE(&vnode_inactive_list,
  764                                                      vp, v_list);
  765                                         TAILQ_INSERT_TAIL(&vnode_inactive_list,
  766                                                           vp, v_list);
  767                                 }
  768                                 spin_unlock(&vfs_spin);
  769                         }
  770                         vx_put(vp);
  771                         continue;
  772                 }
  773 
  774                 /*
  775                  * VINACTIVE and VREF_TERMINATE are expected to both be set
  776                  * for vnodes pulled from the inactive list, and cannot be
  777                  * changed while we hold the vx lock.
  778                  *
  779                  * Try to reclaim the vnode.
  780                  */
  781                 KKASSERT(vp->v_flag & VINACTIVE);
  782                 KKASSERT(vp->v_refcnt & VREF_TERMINATE);
  783 
  784                 if ((vp->v_flag & VRECLAIMED) == 0) {
  785                         if (cache_inval_vp_nonblock(vp))
  786                                 goto failed;
  787                         vgone_vxlocked(vp);
  788                         /* vnode is still VX locked */
  789                 }
  790 
  791                 /*
  792                  * At this point if there are no other refs or auxrefs on
  793                  * the vnode with the inactive list locked, and we remove
  794                  * the vnode from the inactive list, it should not be
  795                  * possible for anyone else to access the vnode any more.
  796                  *
  797                  * Since the vnode is in a VRECLAIMED state, no new
  798                  * namecache associations could have been made and the
  799                  * vnode should have already been removed from its mountlist.
  800                  *
  801                  * Since we hold a VX lock on the vnode it cannot have been
  802                  * reactivated (moved out of the inactive list).
  803                  */
  804                 KKASSERT(TAILQ_EMPTY(&vp->v_namecache));
  805                 spin_lock(&vfs_spin);
  806                 if (vp->v_auxrefs ||
  807                     (vp->v_refcnt & ~VREF_FINALIZE) != VREF_TERMINATE + 1) {
  808                         spin_unlock(&vfs_spin);
  809                         goto failed;
  810                 }
  811                 KKASSERT(vp->v_state == VS_INACTIVE);
  812                 TAILQ_REMOVE(&vnode_inactive_list, vp, v_list);
  813                 --inactivevnodes;
  814                 vp->v_state = VS_DYING;
  815                 spin_unlock(&vfs_spin);
  816 
  817                 /*
  818                  * Nothing should have been able to access this vp.  Only
  819                  * our ref should remain now.
  820                  */
  821                 atomic_clear_int(&vp->v_refcnt, VREF_TERMINATE|VREF_FINALIZE);
  822                 KASSERT(vp->v_refcnt == 1,
  823                         ("vp %p badrefs %08x", vp, vp->v_refcnt));
  824 
  825                 /*
  826                  * Return a VX locked vnode suitable for reuse.
  827                  */
  828                 return(vp);
  829         }
  830         return(NULL);
  831 }
  832 
  833 /*
  834  * Obtain a new vnode.  The returned vnode is VX locked & vrefd.
  835  *
  836  * All new vnodes set the VAGE flags.  An open() of the vnode will
  837  * decrement the (2-bit) flags.  Vnodes which are opened several times
  838  * are thus retained in the cache over vnodes which are merely stat()d.
  839  *
  840  * We always allocate the vnode.  Attempting to recycle existing vnodes
  841  * here can lead to numerous deadlocks, particularly with softupdates.
  842  */
  843 struct vnode *
  844 allocvnode(int lktimeout, int lkflags)
  845 {
  846         struct vnode *vp;
  847 
  848         /*
  849          * Do not flag for synchronous recyclement unless there are enough
  850          * freeable vnodes to recycle and the number of vnodes has
  851          * significantly exceeded our target.  We want the normal vnlru
  852          * process to handle the cleaning (at 9/10's) before we are forced
  853          * to flag it here at 11/10's for userexit path processing.
  854          */
  855         if (numvnodes >= desiredvnodes * 11 / 10 &&
  856             cachedvnodes + inactivevnodes >= desiredvnodes * 5 / 10) {
  857                 struct thread *td = curthread;
  858                 if (td->td_lwp)
  859                         atomic_set_int(&td->td_lwp->lwp_mpflags, LWP_MP_VNLRU);
  860         }
  861 
  862         /*
  863          * lktimeout only applies when LK_TIMELOCK is used, and only
  864          * the pageout daemon uses it.  The timeout may not be zero
  865          * or the pageout daemon can deadlock in low-VM situations.
  866          */
  867         if (lktimeout == 0)
  868                 lktimeout = hz / 10;
  869 
  870         vp = kmalloc(sizeof(*vp), M_VNODE, M_ZERO | M_WAITOK);
  871 
  872         lwkt_token_init(&vp->v_token, "vnode");
  873         lockinit(&vp->v_lock, "vnode", lktimeout, lkflags);
  874         TAILQ_INIT(&vp->v_namecache);
  875         RB_INIT(&vp->v_rbclean_tree);
  876         RB_INIT(&vp->v_rbdirty_tree);
  877         RB_INIT(&vp->v_rbhash_tree);
  878         spin_init(&vp->v_spin);
  879 
  880         lockmgr(&vp->v_lock, LK_EXCLUSIVE);
  881         atomic_add_int(&numvnodes, 1);
  882         vp->v_refcnt = 1;
  883         vp->v_flag = VAGE0 | VAGE1;
  884 
  885         KKASSERT(TAILQ_EMPTY(&vp->v_namecache));
  886         /* exclusive lock still held */
  887 
  888         vp->v_filesize = NOOFFSET;
  889         vp->v_type = VNON;
  890         vp->v_tag = 0;
  891         vp->v_state = VS_CACHED;
  892         _vactivate(vp);
  893 
  894         return (vp);
  895 }
  896 
  897 /*
  898  * Called after a process has allocated a vnode via allocvnode()
  899  * and we detected that too many vnodes were present.
  900  *
  901  * This function is called just prior to a return to userland if the
  902  * process at some point had to allocate a new vnode during the last
  903  * system call and the vnode count was found to be excessive.
  904  *
  905  * This is a synchronous path that we do not normally want to execute.
  906  *
  907  * Flagged at >= 11/10's, runs if >= 10/10, vnlru runs at 9/10.
  908  *
  909  * WARNING: Sometimes numvnodes can blow out due to children being
  910  *          present under directory vnodes in the namecache.  For the
  911  *          moment use an if() instead of a while() and note that if
  912  *          we were to use a while() we would still have to break out
  913  *          if freesomevnodes() returned 0.  vnlru will also be trying
  914  *          hard to free vnodes at the same time (with a lower trigger
  915  *          pointer).
  916  */
  917 void
  918 allocvnode_gc(void)
  919 {
  920         if (numvnodes >= desiredvnodes &&
  921             cachedvnodes + inactivevnodes >= desiredvnodes * 5 / 10) {
  922                 freesomevnodes(batchfreevnodes);
  923         }
  924 }
  925 
  926 int
  927 freesomevnodes(int n)
  928 {
  929         struct vnode *vp;
  930         int count = 0;
  931 
  932         while (n) {
  933                 if ((vp = cleanfreevnode(n)) == NULL)
  934                         break;
  935                 vx_unlock(vp);
  936                 --n;
  937                 ++count;
  938                 kfree(vp, M_VNODE);
  939                 atomic_add_int(&numvnodes, -1);
  940         }
  941         return(count);
  942 }

Cache object: 9a89f4bde486c60498c14fbb8ba02f53


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.