The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ufs/ufs_dirhash.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
    3  *
    4  * Copyright (c) 2001, 2002 Ian Dowse.  All rights reserved.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice, this list of conditions and the following disclaimer.
   11  * 2. Redistributions in binary form must reproduce the above copyright
   12  *    notice, this list of conditions and the following disclaimer in the
   13  *    documentation and/or other materials provided with the distribution.
   14  *
   15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   25  * SUCH DAMAGE.
   26  */
   27 
   28 /*
   29  * This implements a hash-based lookup scheme for UFS directories.
   30  */
   31 
   32 #include <sys/cdefs.h>
   33 __FBSDID("$FreeBSD$");
   34 
   35 #include "opt_ufs.h"
   36 
   37 #ifdef UFS_DIRHASH
   38 
   39 #include <sys/param.h>
   40 #include <sys/systm.h>
   41 #include <sys/kernel.h>
   42 #include <sys/lock.h>
   43 #include <sys/mutex.h>
   44 #include <sys/malloc.h>
   45 #include <sys/fnv_hash.h>
   46 #include <sys/proc.h>
   47 #include <sys/bio.h>
   48 #include <sys/buf.h>
   49 #include <sys/vnode.h>
   50 #include <sys/mount.h>
   51 #include <sys/refcount.h>
   52 #include <sys/sysctl.h>
   53 #include <sys/sx.h>
   54 #include <sys/eventhandler.h>
   55 #include <sys/time.h>
   56 #include <vm/uma.h>
   57 
   58 #include <ufs/ufs/quota.h>
   59 #include <ufs/ufs/inode.h>
   60 #include <ufs/ufs/dir.h>
   61 #include <ufs/ufs/dirhash.h>
   62 #include <ufs/ufs/extattr.h>
   63 #include <ufs/ufs/ufsmount.h>
   64 #include <ufs/ufs/ufs_extern.h>
   65 
   66 #define WRAPINCR(val, limit)    (((val) + 1 == (limit)) ? 0 : ((val) + 1))
   67 #define WRAPDECR(val, limit)    (((val) == 0) ? ((limit) - 1) : ((val) - 1))
   68 #define BLKFREE2IDX(n)          ((n) > DH_NFSTATS ? DH_NFSTATS : (n))
   69 
   70 static MALLOC_DEFINE(M_DIRHASH, "ufs_dirhash", "UFS directory hash tables");
   71 
   72 static int ufs_mindirhashsize = DIRBLKSIZ * 5;
   73 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW,
   74     &ufs_mindirhashsize,
   75     0, "minimum directory size in bytes for which to use hashed lookup");
   76 static int ufs_dirhashmaxmem = 2 * 1024 * 1024; /* NOTE: initial value. It is
   77                                                    tuned in ufsdirhash_init() */
   78 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem,
   79     0, "maximum allowed dirhash memory usage");
   80 static int ufs_dirhashmem;
   81 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem,
   82     0, "current dirhash memory usage");
   83 static int ufs_dirhashcheck = 0;
   84 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck,
   85     0, "enable extra sanity tests");
   86 static int ufs_dirhashlowmemcount = 0;
   87 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_lowmemcount, CTLFLAG_RD, 
   88     &ufs_dirhashlowmemcount, 0, "number of times low memory hook called");
   89 static int ufs_dirhashreclaimpercent = 10;
   90 static int ufsdirhash_set_reclaimpercent(SYSCTL_HANDLER_ARGS);
   91 SYSCTL_PROC(_vfs_ufs, OID_AUTO, dirhash_reclaimpercent,
   92     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT,
   93     0, 0, ufsdirhash_set_reclaimpercent, "I",
   94     "set percentage of dirhash cache to be removed in low VM events");
   95 
   96 static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen);
   97 static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff);
   98 static void ufsdirhash_delslot(struct dirhash *dh, int slot);
   99 static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen,
  100            doff_t offset);
  101 static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset);
  102 static int ufsdirhash_recycle(int wanted);
  103 static void ufsdirhash_lowmem(void);
  104 static void ufsdirhash_free_locked(struct inode *ip);
  105 
  106 static uma_zone_t       ufsdirhash_zone;
  107 
  108 #define DIRHASHLIST_LOCK()              mtx_lock(&ufsdirhash_mtx)
  109 #define DIRHASHLIST_UNLOCK()            mtx_unlock(&ufsdirhash_mtx)
  110 #define DIRHASH_BLKALLOC()              uma_zalloc(ufsdirhash_zone, M_NOWAIT)
  111 #define DIRHASH_BLKFREE(ptr)            uma_zfree(ufsdirhash_zone, (ptr))
  112 #define DIRHASH_ASSERT_LOCKED(dh)                                       \
  113     sx_assert(&(dh)->dh_lock, SA_LOCKED)
  114 
  115 /* Dirhash list; recently-used entries are near the tail. */
  116 static TAILQ_HEAD(, dirhash) ufsdirhash_list;
  117 
  118 /* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */
  119 static struct mtx       ufsdirhash_mtx;
  120 
  121 /*
  122  * Locking:
  123  *
  124  * The relationship between inode and dirhash is protected either by an
  125  * exclusive vnode lock or the vnode interlock where a shared vnode lock
  126  * may be used.  The dirhash_mtx is acquired after the dirhash lock.  To
  127  * handle teardown races, code wishing to lock the dirhash for an inode
  128  * when using a shared vnode lock must obtain a private reference on the
  129  * dirhash while holding the vnode interlock.  They can drop it once they
  130  * have obtained the dirhash lock and verified that the dirhash wasn't
  131  * recycled while they waited for the dirhash lock.
  132  *
  133  * ufsdirhash_build() acquires a shared lock on the dirhash when it is
  134  * successful.  This lock is released after a call to ufsdirhash_lookup().
  135  *
  136  * Functions requiring exclusive access use ufsdirhash_acquire() which may
  137  * free a dirhash structure that was recycled by ufsdirhash_recycle().
  138  *
  139  * The dirhash lock may be held across io operations.
  140  *
  141  * WITNESS reports a lock order reversal between the "bufwait" lock
  142  * and the "dirhash" lock.  However, this specific reversal will not
  143  * cause a deadlock.  To get a deadlock, one would have to lock a
  144  * buffer followed by the dirhash while a second thread locked a
  145  * buffer while holding the dirhash lock.  The second order can happen
  146  * under a shared or exclusive vnode lock for the associated directory
  147  * in lookup().  The first order, however, can only happen under an
  148  * exclusive vnode lock (e.g. unlink(), rename(), etc.).  Thus, for
  149  * a thread to be doing a "bufwait" -> "dirhash" order, it has to hold
  150  * an exclusive vnode lock.  That exclusive vnode lock will prevent
  151  * any other threads from doing a "dirhash" -> "bufwait" order.
  152  */
  153 
  154 static void
  155 ufsdirhash_hold(struct dirhash *dh)
  156 {
  157 
  158         refcount_acquire(&dh->dh_refcount);
  159 }
  160 
  161 static void
  162 ufsdirhash_drop(struct dirhash *dh)
  163 {
  164 
  165         if (refcount_release(&dh->dh_refcount)) {
  166                 sx_destroy(&dh->dh_lock);
  167                 free(dh, M_DIRHASH);
  168         }
  169 }
  170 
  171 /*
  172  * Release the lock on a dirhash.
  173  */
  174 static void
  175 ufsdirhash_release(struct dirhash *dh)
  176 {
  177 
  178         sx_unlock(&dh->dh_lock);
  179 }
  180 
  181 /*
  182  * Either acquire an existing hash locked shared or create a new hash and
  183  * return it exclusively locked.  May return NULL if the allocation fails.
  184  *
  185  * The vnode interlock is used to protect the i_dirhash pointer from
  186  * simultaneous access while only a shared vnode lock is held.
  187  */
  188 static struct dirhash *
  189 ufsdirhash_create(struct inode *ip)
  190 {
  191         struct dirhash *ndh;
  192         struct dirhash *dh;
  193         struct vnode *vp;
  194         bool excl;
  195 
  196         ndh = dh = NULL;
  197         vp = ip->i_vnode;
  198         excl = false;
  199         for (;;) {
  200                 /* Racy check for i_dirhash to prefetch a dirhash structure. */
  201                 if (ip->i_dirhash == NULL && ndh == NULL) {
  202                         ndh = malloc(sizeof *dh, M_DIRHASH,
  203                             M_NOWAIT | M_ZERO);
  204                         if (ndh == NULL)
  205                                 return (NULL);
  206                         refcount_init(&ndh->dh_refcount, 1);
  207 
  208                         /*
  209                          * The DUPOK is to prevent warnings from the
  210                          * sx_slock() a few lines down which is safe
  211                          * since the duplicate lock in that case is
  212                          * the one for this dirhash we are creating
  213                          * now which has no external references until
  214                          * after this function returns.
  215                          */
  216                         sx_init_flags(&ndh->dh_lock, "dirhash", SX_DUPOK);
  217                         sx_xlock(&ndh->dh_lock);
  218                 }
  219                 /*
  220                  * Check i_dirhash.  If it's NULL just try to use a
  221                  * preallocated structure.  If none exists loop and try again.
  222                  */
  223                 VI_LOCK(vp);
  224                 dh = ip->i_dirhash;
  225                 if (dh == NULL) {
  226                         ip->i_dirhash = ndh;
  227                         VI_UNLOCK(vp);
  228                         if (ndh == NULL)
  229                                 continue;
  230                         return (ndh);
  231                 }
  232                 ufsdirhash_hold(dh);
  233                 VI_UNLOCK(vp);
  234 
  235                 /* Acquire a lock on existing hashes. */
  236                 if (excl)
  237                         sx_xlock(&dh->dh_lock);
  238                 else
  239                         sx_slock(&dh->dh_lock);
  240 
  241                 /* The hash could've been recycled while we were waiting. */
  242                 VI_LOCK(vp);
  243                 if (ip->i_dirhash != dh) {
  244                         VI_UNLOCK(vp);
  245                         ufsdirhash_release(dh);
  246                         ufsdirhash_drop(dh);
  247                         continue;
  248                 }
  249                 VI_UNLOCK(vp);
  250                 ufsdirhash_drop(dh);
  251 
  252                 /* If the hash is still valid we've succeeded. */
  253                 if (dh->dh_hash != NULL)
  254                         break;
  255                 /*
  256                  * If the hash is NULL it has been recycled.  Try to upgrade
  257                  * so we can recreate it.  If we fail the upgrade, drop our
  258                  * lock and try again.
  259                  */
  260                 if (excl || sx_try_upgrade(&dh->dh_lock))
  261                         break;
  262                 sx_sunlock(&dh->dh_lock);
  263                 excl = true;
  264         }
  265         /* Free the preallocated structure if it was not necessary. */
  266         if (ndh) {
  267                 ufsdirhash_release(ndh);
  268                 ufsdirhash_drop(ndh);
  269         }
  270         return (dh);
  271 }
  272 
  273 /*
  274  * Acquire an exclusive lock on an existing hash.  Requires an exclusive
  275  * vnode lock to protect the i_dirhash pointer.  hashes that have been
  276  * recycled are reclaimed here and NULL is returned.
  277  */
  278 static struct dirhash *
  279 ufsdirhash_acquire(struct inode *ip)
  280 {
  281         struct dirhash *dh;
  282 
  283         ASSERT_VOP_ELOCKED(ip->i_vnode, __FUNCTION__);
  284 
  285         dh = ip->i_dirhash;
  286         if (dh == NULL)
  287                 return (NULL);
  288         sx_xlock(&dh->dh_lock);
  289         if (dh->dh_hash != NULL)
  290                 return (dh);
  291         ufsdirhash_free_locked(ip);
  292         return (NULL);
  293 }
  294 
  295 /*
  296  * Acquire exclusively and free the hash pointed to by ip.  Works with a
  297  * shared or exclusive vnode lock.
  298  */
  299 void
  300 ufsdirhash_free(struct inode *ip)
  301 {
  302         struct dirhash *dh;
  303         struct vnode *vp;
  304 
  305         vp = ip->i_vnode;
  306         for (;;) {
  307                 /* Grab a reference on this inode's dirhash if it has one. */
  308                 VI_LOCK(vp);
  309                 dh = ip->i_dirhash;
  310                 if (dh == NULL) {
  311                         VI_UNLOCK(vp);
  312                         return;
  313                 }
  314                 ufsdirhash_hold(dh);
  315                 VI_UNLOCK(vp);
  316 
  317                 /* Exclusively lock the dirhash. */
  318                 sx_xlock(&dh->dh_lock);
  319 
  320                 /* If this dirhash still belongs to this inode, then free it. */
  321                 VI_LOCK(vp);
  322                 if (ip->i_dirhash == dh) {
  323                         VI_UNLOCK(vp);
  324                         ufsdirhash_drop(dh);
  325                         break;
  326                 }
  327                 VI_UNLOCK(vp);
  328 
  329                 /*
  330                  * This inode's dirhash has changed while we were
  331                  * waiting for the dirhash lock, so try again.
  332                  */
  333                 ufsdirhash_release(dh);
  334                 ufsdirhash_drop(dh);
  335         }
  336         ufsdirhash_free_locked(ip);
  337 }
  338 
  339 /*
  340  * Attempt to build up a hash table for the directory contents in
  341  * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
  342  */
  343 int
  344 ufsdirhash_build(struct inode *ip)
  345 {
  346         struct dirhash *dh;
  347         struct buf *bp = NULL;
  348         struct direct *ep;
  349         struct vnode *vp;
  350         doff_t bmask, pos;
  351         u_int dirblocks, i, narrays, nblocks, nslots;
  352         int j, memreqd, slot;
  353 
  354         /* Take care of a decreased sysctl value. */
  355         while (ufs_dirhashmem > ufs_dirhashmaxmem) {
  356                 if (ufsdirhash_recycle(0) != 0)
  357                         return (-1);
  358                 /* Recycled enough memory, so unlock the list. */
  359                 DIRHASHLIST_UNLOCK();
  360         }
  361 
  362         /* Check if we can/should use dirhash. */
  363         if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode) ||
  364             ip->i_effnlink == 0) {
  365                 if (ip->i_dirhash)
  366                         ufsdirhash_free(ip);
  367                 return (-1);
  368         }
  369         dh = ufsdirhash_create(ip);
  370         if (dh == NULL)
  371                 return (-1);
  372         if (dh->dh_hash != NULL)
  373                 return (0);
  374 
  375         vp = ip->i_vnode;
  376         /* Allocate 50% more entries than this dir size could ever need. */
  377         KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size"));
  378         nslots = ip->i_size / DIRECTSIZ(1);
  379         nslots = (nslots * 3 + 1) / 2;
  380         narrays = howmany(nslots, DH_NBLKOFF);
  381         nslots = narrays * DH_NBLKOFF;
  382         dirblocks = howmany(ip->i_size, DIRBLKSIZ);
  383         nblocks = (dirblocks * 3 + 1) / 2;
  384         memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
  385             narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
  386             nblocks * sizeof(*dh->dh_blkfree);
  387         DIRHASHLIST_LOCK();
  388         if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
  389                 DIRHASHLIST_UNLOCK();
  390                 if (memreqd > ufs_dirhashmaxmem / 2)
  391                         goto fail;
  392                 /* Try to free some space. */
  393                 if (ufsdirhash_recycle(memreqd) != 0)
  394                         goto fail;
  395                 /* Enough was freed, and list has been locked. */
  396         }
  397         ufs_dirhashmem += memreqd;
  398         DIRHASHLIST_UNLOCK();
  399 
  400         /* Initialise the hash table and block statistics. */
  401         dh->dh_memreq = memreqd;
  402         dh->dh_narrays = narrays;
  403         dh->dh_hlen = nslots;
  404         dh->dh_nblk = nblocks;
  405         dh->dh_dirblks = dirblocks;
  406         for (i = 0; i < DH_NFSTATS; i++)
  407                 dh->dh_firstfree[i] = -1;
  408         dh->dh_firstfree[DH_NFSTATS] = 0;
  409         dh->dh_hused = 0;
  410         dh->dh_seqoff = -1;
  411         dh->dh_score = DH_SCOREINIT;
  412         dh->dh_lastused = time_second;
  413 
  414         /*
  415          * Use non-blocking mallocs so that we will revert to a linear
  416          * lookup on failure rather than potentially blocking forever.
  417          */
  418         dh->dh_hash = malloc(narrays * sizeof(dh->dh_hash[0]),
  419             M_DIRHASH, M_NOWAIT | M_ZERO);
  420         if (dh->dh_hash == NULL)
  421                 goto fail;
  422         dh->dh_blkfree = malloc(nblocks * sizeof(dh->dh_blkfree[0]),
  423             M_DIRHASH, M_NOWAIT);
  424         if (dh->dh_blkfree == NULL)
  425                 goto fail;
  426         for (i = 0; i < narrays; i++) {
  427                 if ((dh->dh_hash[i] = DIRHASH_BLKALLOC()) == NULL)
  428                         goto fail;
  429                 for (j = 0; j < DH_NBLKOFF; j++)
  430                         dh->dh_hash[i][j] = DIRHASH_EMPTY;
  431         }
  432         for (i = 0; i < dirblocks; i++)
  433                 dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN;
  434         bmask = vp->v_mount->mnt_stat.f_iosize - 1;
  435         pos = 0;
  436         while (pos < ip->i_size) {
  437                 /* If necessary, get the next directory block. */
  438                 if ((pos & bmask) == 0) {
  439                         if (bp != NULL)
  440                                 brelse(bp);
  441                         if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0)
  442                                 goto fail;
  443                 }
  444 
  445                 /* Add this entry to the hash. */
  446                 ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
  447                 if (ep->d_reclen == 0 || ep->d_reclen >
  448                     DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) {
  449                         /* Corrupted directory. */
  450                         brelse(bp);
  451                         goto fail;
  452                 }
  453                 if (ep->d_ino != 0) {
  454                         /* Add the entry (simplified ufsdirhash_add). */
  455                         slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
  456                         while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
  457                                 slot = WRAPINCR(slot, dh->dh_hlen);
  458                         dh->dh_hused++;
  459                         DH_ENTRY(dh, slot) = pos;
  460                         ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep));
  461                 }
  462                 pos += ep->d_reclen;
  463         }
  464 
  465         if (bp != NULL)
  466                 brelse(bp);
  467         DIRHASHLIST_LOCK();
  468         TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
  469         dh->dh_onlist = 1;
  470         DIRHASHLIST_UNLOCK();
  471         sx_downgrade(&dh->dh_lock);
  472         return (0);
  473 
  474 fail:
  475         ufsdirhash_free_locked(ip);
  476         return (-1);
  477 }
  478 
  479 /*
  480  * Free any hash table associated with inode 'ip'.
  481  */
  482 static void
  483 ufsdirhash_free_locked(struct inode *ip)
  484 {
  485         struct dirhash *dh;
  486         struct vnode *vp;
  487         int i;
  488 
  489         DIRHASH_ASSERT_LOCKED(ip->i_dirhash);
  490 
  491         /*
  492          * Clear the pointer in the inode to prevent new threads from
  493          * finding the dead structure.
  494          */
  495         vp = ip->i_vnode;
  496         VI_LOCK(vp);
  497         dh = ip->i_dirhash;
  498         ip->i_dirhash = NULL;
  499         VI_UNLOCK(vp);
  500 
  501         /*
  502          * Remove the hash from the list since we are going to free its
  503          * memory.
  504          */
  505         DIRHASHLIST_LOCK();
  506         if (dh->dh_onlist)
  507                 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
  508         ufs_dirhashmem -= dh->dh_memreq;
  509         DIRHASHLIST_UNLOCK();
  510 
  511         /*
  512          * At this point, any waiters for the lock should hold their
  513          * own reference on the dirhash structure.  They will drop
  514          * that reference once they grab the vnode interlock and see
  515          * that ip->i_dirhash is NULL.
  516          */
  517         sx_xunlock(&dh->dh_lock);
  518 
  519         /*
  520          * Handle partially recycled as well as fully constructed hashes.
  521          */
  522         if (dh->dh_hash != NULL) {
  523                 for (i = 0; i < dh->dh_narrays; i++)
  524                         if (dh->dh_hash[i] != NULL)
  525                                 DIRHASH_BLKFREE(dh->dh_hash[i]);
  526                 free(dh->dh_hash, M_DIRHASH);
  527                 if (dh->dh_blkfree != NULL)
  528                         free(dh->dh_blkfree, M_DIRHASH);
  529         }
  530 
  531         /*
  532          * Drop the inode's reference to the data structure.
  533          */
  534         ufsdirhash_drop(dh);
  535 }
  536 
  537 /*
  538  * Find the offset of the specified name within the given inode.
  539  * Returns 0 on success, ENOENT if the entry does not exist, or
  540  * EJUSTRETURN if the caller should revert to a linear search.
  541  *
  542  * If successful, the directory offset is stored in *offp, and a
  543  * pointer to a struct buf containing the entry is stored in *bpp. If
  544  * prevoffp is non-NULL, the offset of the previous entry within
  545  * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
  546  * is the first in a block, the start of the block is used).
  547  *
  548  * Must be called with the hash locked.  Returns with the hash unlocked.
  549  */
  550 int
  551 ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp,
  552     struct buf **bpp, doff_t *prevoffp)
  553 {
  554         struct dirhash *dh, *dh_next;
  555         struct direct *dp;
  556         struct vnode *vp;
  557         struct buf *bp;
  558         doff_t blkoff, bmask, offset, prevoff, seqoff;
  559         int i, slot;
  560         int error;
  561 
  562         dh = ip->i_dirhash;
  563         KASSERT(dh != NULL && dh->dh_hash != NULL,
  564             ("ufsdirhash_lookup: Invalid dirhash %p\n", dh));
  565         DIRHASH_ASSERT_LOCKED(dh);
  566         /*
  567          * Move this dirhash towards the end of the list if it has a
  568          * score higher than the next entry, and acquire the dh_lock.
  569          */
  570         DIRHASHLIST_LOCK();
  571         if (TAILQ_NEXT(dh, dh_list) != NULL) {
  572                 /*
  573                  * If the new score will be greater than that of the next
  574                  * entry, then move this entry past it. With both mutexes
  575                  * held, dh_next won't go away, but its dh_score could
  576                  * change; that's not important since it is just a hint.
  577                  */
  578                 if ((dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
  579                     dh->dh_score >= dh_next->dh_score) {
  580                         KASSERT(dh->dh_onlist, ("dirhash: not on list"));
  581                         TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
  582                         TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
  583                             dh_list);
  584                 }
  585         }
  586         /* Update the score. */
  587         if (dh->dh_score < DH_SCOREMAX)
  588                 dh->dh_score++;
  589 
  590         /* Update last used time. */
  591         dh->dh_lastused = time_second;
  592         DIRHASHLIST_UNLOCK();
  593 
  594         vp = ip->i_vnode;
  595         bmask = vp->v_mount->mnt_stat.f_iosize - 1;
  596         blkoff = -1;
  597         bp = NULL;
  598         seqoff = dh->dh_seqoff;
  599 restart:
  600         slot = ufsdirhash_hash(dh, name, namelen);
  601 
  602         if (seqoff != -1) {
  603                 /*
  604                  * Sequential access optimisation. seqoff contains the
  605                  * offset of the directory entry immediately following
  606                  * the last entry that was looked up. Check if this offset
  607                  * appears in the hash chain for the name we are looking for.
  608                  */
  609                 for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
  610                     i = WRAPINCR(i, dh->dh_hlen))
  611                         if (offset == seqoff)
  612                                 break;
  613                 if (offset == seqoff) {
  614                         /*
  615                          * We found an entry with the expected offset. This
  616                          * is probably the entry we want, but if not, the
  617                          * code below will retry.
  618                          */ 
  619                         slot = i;
  620                 } else
  621                         seqoff = -1;
  622         }
  623 
  624         for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
  625             slot = WRAPINCR(slot, dh->dh_hlen)) {
  626                 if (offset == DIRHASH_DEL)
  627                         continue;
  628                 if (offset < 0 || offset >= ip->i_size)
  629                         panic("ufsdirhash_lookup: bad offset in hash array");
  630                 if ((offset & ~bmask) != blkoff) {
  631                         if (bp != NULL)
  632                                 brelse(bp);
  633                         blkoff = offset & ~bmask;
  634                         if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) {
  635                                 error = EJUSTRETURN;
  636                                 goto fail;
  637                         }
  638                 }
  639                 KASSERT(bp != NULL, ("no buffer allocated"));
  640                 dp = (struct direct *)(bp->b_data + (offset & bmask));
  641                 if (dp->d_reclen == 0 || dp->d_reclen >
  642                     DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) {
  643                         /* Corrupted directory. */
  644                         error = EJUSTRETURN;
  645                         goto fail;
  646                 }
  647                 if (dp->d_namlen == namelen &&
  648                     bcmp(dp->d_name, name, namelen) == 0) {
  649                         /* Found. Get the prev offset if needed. */
  650                         if (prevoffp != NULL) {
  651                                 if (offset & (DIRBLKSIZ - 1)) {
  652                                         prevoff = ufsdirhash_getprev(dp,
  653                                             offset);
  654                                         if (prevoff == -1) {
  655                                                 error = EJUSTRETURN;
  656                                                 goto fail;
  657                                         }
  658                                 } else
  659                                         prevoff = offset;
  660                                 *prevoffp = prevoff;
  661                         }
  662 
  663                         /* Update offset. */
  664                         dh->dh_seqoff = offset + DIRSIZ(0, dp);
  665                         *bpp = bp;
  666                         *offp = offset;
  667                         ufsdirhash_release(dh);
  668                         return (0);
  669                 }
  670 
  671                 /*
  672                  * When the name doesn't match in the sequential
  673                  * optimization case, go back and search normally.
  674                  */
  675                 if (seqoff != -1) {
  676                         seqoff = -1;
  677                         goto restart;
  678                 }
  679         }
  680         error = ENOENT;
  681 fail:
  682         ufsdirhash_release(dh);
  683         if (bp != NULL)
  684                 brelse(bp);
  685         return (error);
  686 }
  687 
  688 /*
  689  * Find a directory block with room for 'slotneeded' bytes. Returns
  690  * the offset of the directory entry that begins the free space.
  691  * This will either be the offset of an existing entry that has free
  692  * space at the end, or the offset of an entry with d_ino == 0 at
  693  * the start of a DIRBLKSIZ block.
  694  *
  695  * To use the space, the caller may need to compact existing entries in
  696  * the directory. The total number of bytes in all of the entries involved
  697  * in the compaction is stored in *slotsize. In other words, all of
  698  * the entries that must be compacted are exactly contained in the
  699  * region beginning at the returned offset and spanning *slotsize bytes.
  700  *
  701  * Returns -1 if no space was found, indicating that the directory
  702  * must be extended.
  703  */
  704 doff_t
  705 ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
  706 {
  707         struct direct *dp;
  708         struct dirhash *dh;
  709         struct buf *bp;
  710         doff_t pos, slotstart;
  711         int dirblock, error, freebytes, i;
  712 
  713         dh = ip->i_dirhash;
  714         KASSERT(dh != NULL && dh->dh_hash != NULL,
  715             ("ufsdirhash_findfree: Invalid dirhash %p\n", dh));
  716         DIRHASH_ASSERT_LOCKED(dh);
  717 
  718         /* Find a directory block with the desired free space. */
  719         dirblock = -1;
  720         for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
  721                 if ((dirblock = dh->dh_firstfree[i]) != -1)
  722                         break;
  723         if (dirblock == -1)
  724                 return (-1);
  725 
  726         KASSERT(dirblock < dh->dh_nblk &&
  727             dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN),
  728             ("ufsdirhash_findfree: bad stats"));
  729         pos = dirblock * DIRBLKSIZ;
  730         error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp);
  731         if (error)
  732                 return (-1);
  733 
  734         /* Find the first entry with free space. */
  735         for (i = 0; i < DIRBLKSIZ; ) {
  736                 if (dp->d_reclen == 0) {
  737                         brelse(bp);
  738                         return (-1);
  739                 }
  740                 if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp))
  741                         break;
  742                 i += dp->d_reclen;
  743                 dp = (struct direct *)((char *)dp + dp->d_reclen);
  744         }
  745         if (i > DIRBLKSIZ) {
  746                 brelse(bp);
  747                 return (-1);
  748         }
  749         slotstart = pos + i;
  750 
  751         /* Find the range of entries needed to get enough space */
  752         freebytes = 0;
  753         while (i < DIRBLKSIZ && freebytes < slotneeded) {
  754                 freebytes += dp->d_reclen;
  755                 if (dp->d_ino != 0)
  756                         freebytes -= DIRSIZ(0, dp);
  757                 if (dp->d_reclen == 0) {
  758                         brelse(bp);
  759                         return (-1);
  760                 }
  761                 i += dp->d_reclen;
  762                 dp = (struct direct *)((char *)dp + dp->d_reclen);
  763         }
  764         if (i > DIRBLKSIZ) {
  765                 brelse(bp);
  766                 return (-1);
  767         }
  768         if (freebytes < slotneeded)
  769                 panic("ufsdirhash_findfree: free mismatch");
  770         brelse(bp);
  771         *slotsize = pos + i - slotstart;
  772         return (slotstart);
  773 }
  774 
  775 /*
  776  * Return the start of the unused space at the end of a directory, or
  777  * -1 if there are no trailing unused blocks.
  778  */
  779 doff_t
  780 ufsdirhash_enduseful(struct inode *ip)
  781 {
  782 
  783         struct dirhash *dh;
  784         int i;
  785 
  786         dh = ip->i_dirhash;
  787         DIRHASH_ASSERT_LOCKED(dh);
  788         KASSERT(dh != NULL && dh->dh_hash != NULL,
  789             ("ufsdirhash_enduseful: Invalid dirhash %p\n", dh));
  790 
  791         if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN)
  792                 return (-1);
  793 
  794         for (i = dh->dh_dirblks - 1; i >= 0; i--)
  795                 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
  796                         break;
  797 
  798         return ((doff_t)(i + 1) * DIRBLKSIZ);
  799 }
  800 
  801 /*
  802  * Insert information into the hash about a new directory entry. dirp
  803  * points to a struct direct containing the entry, and offset specifies
  804  * the offset of this entry.
  805  */
  806 void
  807 ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
  808 {
  809         struct dirhash *dh;
  810         int slot;
  811 
  812         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  813                 return;
  814 
  815         KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
  816             ("ufsdirhash_add: bad offset"));
  817         /*
  818          * Normal hash usage is < 66%. If the usage gets too high then
  819          * remove the hash entirely and let it be rebuilt later.
  820          */
  821         if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
  822                 ufsdirhash_free_locked(ip);
  823                 return;
  824         }
  825 
  826         /* Find a free hash slot (empty or deleted), and add the entry. */
  827         slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
  828         while (DH_ENTRY(dh, slot) >= 0)
  829                 slot = WRAPINCR(slot, dh->dh_hlen);
  830         if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
  831                 dh->dh_hused++;
  832         DH_ENTRY(dh, slot) = offset;
  833 
  834         /* Update last used time. */
  835         dh->dh_lastused = time_second;
  836 
  837         /* Update the per-block summary info. */
  838         ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp));
  839         ufsdirhash_release(dh);
  840 }
  841 
  842 /*
  843  * Remove the specified directory entry from the hash. The entry to remove
  844  * is defined by the name in `dirp', which must exist at the specified
  845  * `offset' within the directory.
  846  */
  847 void
  848 ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
  849 {
  850         struct dirhash *dh;
  851         int slot;
  852 
  853         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  854                 return;
  855 
  856         KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
  857             ("ufsdirhash_remove: bad offset"));
  858         /* Find the entry */
  859         slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
  860 
  861         /* Remove the hash entry. */
  862         ufsdirhash_delslot(dh, slot);
  863 
  864         /* Update the per-block summary info. */
  865         ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp));
  866         ufsdirhash_release(dh);
  867 }
  868 
  869 /*
  870  * Change the offset associated with a directory entry in the hash. Used
  871  * when compacting directory blocks.
  872  */
  873 void
  874 ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
  875     doff_t newoff)
  876 {
  877         struct dirhash *dh;
  878         int slot;
  879 
  880         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  881                 return;
  882 
  883         KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ &&
  884             newoff < dh->dh_dirblks * DIRBLKSIZ,
  885             ("ufsdirhash_move: bad offset"));
  886         /* Find the entry, and update the offset. */
  887         slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
  888         DH_ENTRY(dh, slot) = newoff;
  889         ufsdirhash_release(dh);
  890 }
  891 
  892 /*
  893  * Inform dirhash that the directory has grown by one block that
  894  * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
  895  */
  896 void
  897 ufsdirhash_newblk(struct inode *ip, doff_t offset)
  898 {
  899         struct dirhash *dh;
  900         int block;
  901 
  902         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  903                 return;
  904 
  905         KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ,
  906             ("ufsdirhash_newblk: bad offset"));
  907         block = offset / DIRBLKSIZ;
  908         if (block >= dh->dh_nblk) {
  909                 /* Out of space; must rebuild. */
  910                 ufsdirhash_free_locked(ip);
  911                 return;
  912         }
  913         dh->dh_dirblks = block + 1;
  914 
  915         /* Account for the new free block. */
  916         dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN;
  917         if (dh->dh_firstfree[DH_NFSTATS] == -1)
  918                 dh->dh_firstfree[DH_NFSTATS] = block;
  919         ufsdirhash_release(dh);
  920 }
  921 
  922 /*
  923  * Inform dirhash that the directory is being truncated.
  924  */
  925 void
  926 ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
  927 {
  928         struct dirhash *dh;
  929         int block, i;
  930 
  931         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  932                 return;
  933 
  934         KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ,
  935             ("ufsdirhash_dirtrunc: bad offset"));
  936         block = howmany(offset, DIRBLKSIZ);
  937         /*
  938          * If the directory shrinks to less than 1/8 of dh_nblk blocks
  939          * (about 20% of its original size due to the 50% extra added in
  940          * ufsdirhash_build) then free it, and let the caller rebuild
  941          * if necessary.
  942          */
  943         if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
  944                 ufsdirhash_free_locked(ip);
  945                 return;
  946         }
  947 
  948         /*
  949          * Remove any `first free' information pertaining to the
  950          * truncated blocks. All blocks we're removing should be
  951          * completely unused.
  952          */
  953         if (dh->dh_firstfree[DH_NFSTATS] >= block)
  954                 dh->dh_firstfree[DH_NFSTATS] = -1;
  955         for (i = block; i < dh->dh_dirblks; i++)
  956                 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
  957                         panic("ufsdirhash_dirtrunc: blocks in use");
  958         for (i = 0; i < DH_NFSTATS; i++)
  959                 if (dh->dh_firstfree[i] >= block)
  960                         panic("ufsdirhash_dirtrunc: first free corrupt");
  961         dh->dh_dirblks = block;
  962         ufsdirhash_release(dh);
  963 }
  964 
  965 /*
  966  * Debugging function to check that the dirhash information about
  967  * a directory block matches its actual contents. Panics if a mismatch
  968  * is detected.
  969  *
  970  * On entry, `buf' should point to the start of an in-core
  971  * DIRBLKSIZ-sized directory block, and `offset' should contain the
  972  * offset from the start of the directory of that block.
  973  */
  974 void
  975 ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset)
  976 {
  977         struct dirhash *dh;
  978         struct direct *dp;
  979         int block, ffslot, i, nfree;
  980 
  981         if (!ufs_dirhashcheck)
  982                 return;
  983         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  984                 return;
  985 
  986         block = offset / DIRBLKSIZ;
  987         if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks)
  988                 panic("ufsdirhash_checkblock: bad offset");
  989 
  990         nfree = 0;
  991         for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) {
  992                 dp = (struct direct *)(buf + i);
  993                 if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ)
  994                         panic("ufsdirhash_checkblock: bad dir");
  995 
  996                 if (dp->d_ino == 0) {
  997 #if 0
  998                         /*
  999                          * XXX entries with d_ino == 0 should only occur
 1000                          * at the start of a DIRBLKSIZ block. However the
 1001                          * ufs code is tolerant of such entries at other
 1002                          * offsets, and fsck does not fix them.
 1003                          */
 1004                         if (i != 0)
 1005                                 panic("ufsdirhash_checkblock: bad dir inode");
 1006 #endif
 1007                         nfree += dp->d_reclen;
 1008                         continue;
 1009                 }
 1010 
 1011                 /* Check that the entry exists (will panic if it doesn't). */
 1012                 ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
 1013 
 1014                 nfree += dp->d_reclen - DIRSIZ(0, dp);
 1015         }
 1016         if (i != DIRBLKSIZ)
 1017                 panic("ufsdirhash_checkblock: bad dir end");
 1018 
 1019         if (dh->dh_blkfree[block] * DIRALIGN != nfree)
 1020                 panic("ufsdirhash_checkblock: bad free count");
 1021 
 1022         ffslot = BLKFREE2IDX(nfree / DIRALIGN);
 1023         for (i = 0; i <= DH_NFSTATS; i++)
 1024                 if (dh->dh_firstfree[i] == block && i != ffslot)
 1025                         panic("ufsdirhash_checkblock: bad first-free");
 1026         if (dh->dh_firstfree[ffslot] == -1)
 1027                 panic("ufsdirhash_checkblock: missing first-free entry");
 1028         ufsdirhash_release(dh);
 1029 }
 1030 
 1031 /*
 1032  * Hash the specified filename into a dirhash slot.
 1033  */
 1034 static int
 1035 ufsdirhash_hash(struct dirhash *dh, char *name, int namelen)
 1036 {
 1037         u_int32_t hash;
 1038 
 1039         /*
 1040          * We hash the name and then some other bit of data that is
 1041          * invariant over the dirhash's lifetime. Otherwise names
 1042          * differing only in the last byte are placed close to one
 1043          * another in the table, which is bad for linear probing.
 1044          */
 1045         hash = fnv_32_buf(name, namelen, FNV1_32_INIT);
 1046         hash = fnv_32_buf(&dh, sizeof(dh), hash);
 1047         return (hash % dh->dh_hlen);
 1048 }
 1049 
 1050 /*
 1051  * Adjust the number of free bytes in the block containing `offset'
 1052  * by the value specified by `diff'.
 1053  *
 1054  * The caller must ensure we have exclusive access to `dh'; normally
 1055  * that means that dh_lock should be held, but this is also called
 1056  * from ufsdirhash_build() where exclusive access can be assumed.
 1057  */
 1058 static void
 1059 ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff)
 1060 {
 1061         int block, i, nfidx, ofidx;
 1062 
 1063         /* Update the per-block summary info. */
 1064         block = offset / DIRBLKSIZ;
 1065         KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks,
 1066              ("dirhash bad offset"));
 1067         ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
 1068         dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
 1069         nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
 1070 
 1071         /* Update the `first free' list if necessary. */
 1072         if (ofidx != nfidx) {
 1073                 /* If removing, scan forward for the next block. */
 1074                 if (dh->dh_firstfree[ofidx] == block) {
 1075                         for (i = block + 1; i < dh->dh_dirblks; i++)
 1076                                 if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
 1077                                         break;
 1078                         dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
 1079                 }
 1080 
 1081                 /* Make this the new `first free' if necessary */
 1082                 if (dh->dh_firstfree[nfidx] > block ||
 1083                     dh->dh_firstfree[nfidx] == -1)
 1084                         dh->dh_firstfree[nfidx] = block;
 1085         }
 1086 }
 1087 
 1088 /*
 1089  * Find the specified name which should have the specified offset.
 1090  * Returns a slot number, and panics on failure.
 1091  *
 1092  * `dh' must be locked on entry and remains so on return.
 1093  */
 1094 static int
 1095 ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset)
 1096 {
 1097         int slot;
 1098 
 1099         DIRHASH_ASSERT_LOCKED(dh);
 1100 
 1101         /* Find the entry. */
 1102         KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full"));
 1103         slot = ufsdirhash_hash(dh, name, namelen);
 1104         while (DH_ENTRY(dh, slot) != offset &&
 1105             DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
 1106                 slot = WRAPINCR(slot, dh->dh_hlen);
 1107         if (DH_ENTRY(dh, slot) != offset)
 1108                 panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
 1109 
 1110         return (slot);
 1111 }
 1112 
 1113 /*
 1114  * Remove the entry corresponding to the specified slot from the hash array.
 1115  *
 1116  * `dh' must be locked on entry and remains so on return.
 1117  */
 1118 static void
 1119 ufsdirhash_delslot(struct dirhash *dh, int slot)
 1120 {
 1121         int i;
 1122 
 1123         DIRHASH_ASSERT_LOCKED(dh);
 1124 
 1125         /* Mark the entry as deleted. */
 1126         DH_ENTRY(dh, slot) = DIRHASH_DEL;
 1127 
 1128         /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
 1129         for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
 1130                 i = WRAPINCR(i, dh->dh_hlen);
 1131         if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
 1132                 i = WRAPDECR(i, dh->dh_hlen);
 1133                 while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
 1134                         DH_ENTRY(dh, i) = DIRHASH_EMPTY;
 1135                         dh->dh_hused--;
 1136                         i = WRAPDECR(i, dh->dh_hlen);
 1137                 }
 1138                 KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen"));
 1139         }
 1140 }
 1141 
 1142 /*
 1143  * Given a directory entry and its offset, find the offset of the
 1144  * previous entry in the same DIRBLKSIZ-sized block. Returns an
 1145  * offset, or -1 if there is no previous entry in the block or some
 1146  * other problem occurred.
 1147  */
 1148 static doff_t
 1149 ufsdirhash_getprev(struct direct *dirp, doff_t offset)
 1150 {
 1151         struct direct *dp;
 1152         char *blkbuf;
 1153         doff_t blkoff, prevoff;
 1154         int entrypos, i;
 1155 
 1156         blkoff = rounddown2(offset, DIRBLKSIZ); /* offset of start of block */
 1157         entrypos = offset & (DIRBLKSIZ - 1);    /* entry relative to block */
 1158         blkbuf = (char *)dirp - entrypos;
 1159         prevoff = blkoff;
 1160 
 1161         /* If `offset' is the start of a block, there is no previous entry. */
 1162         if (entrypos == 0)
 1163                 return (-1);
 1164 
 1165         /* Scan from the start of the block until we get to the entry. */
 1166         for (i = 0; i < entrypos; i += dp->d_reclen) {
 1167                 dp = (struct direct *)(blkbuf + i);
 1168                 if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
 1169                         return (-1);    /* Corrupted directory. */
 1170                 prevoff = blkoff + i;
 1171         }
 1172         return (prevoff);
 1173 }
 1174 
 1175 /*
 1176  * Delete the given dirhash and reclaim its memory. Assumes that 
 1177  * ufsdirhash_list is locked, and leaves it locked. Also assumes 
 1178  * that dh is locked. Returns the amount of memory freed.
 1179  */
 1180 static int
 1181 ufsdirhash_destroy(struct dirhash *dh)
 1182 {
 1183         doff_t **hash;
 1184         u_int8_t *blkfree;
 1185         int i, mem, narrays;
 1186 
 1187         KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
 1188 
 1189         /* Remove it from the list and detach its memory. */
 1190         TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
 1191         dh->dh_onlist = 0;
 1192         hash = dh->dh_hash;
 1193         dh->dh_hash = NULL;
 1194         blkfree = dh->dh_blkfree;
 1195         dh->dh_blkfree = NULL;
 1196         narrays = dh->dh_narrays;
 1197         mem = dh->dh_memreq;
 1198         dh->dh_memreq = 0;
 1199 
 1200         /* Unlock dirhash and free the detached memory. */
 1201         ufsdirhash_release(dh);
 1202         for (i = 0; i < narrays; i++)
 1203                 DIRHASH_BLKFREE(hash[i]);
 1204         free(hash, M_DIRHASH);
 1205         free(blkfree, M_DIRHASH);
 1206 
 1207         /* Account for the returned memory. */
 1208         ufs_dirhashmem -= mem;
 1209 
 1210         return (mem);
 1211 }
 1212 
 1213 /*
 1214  * Try to free up `wanted' bytes by stealing memory from existing
 1215  * dirhashes. Returns zero with list locked if successful.
 1216  */
 1217 static int
 1218 ufsdirhash_recycle(int wanted)
 1219 {
 1220         struct dirhash *dh;
 1221 
 1222         DIRHASHLIST_LOCK();
 1223         dh = TAILQ_FIRST(&ufsdirhash_list);
 1224         while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
 1225                 /* Decrement the score; only recycle if it becomes zero. */
 1226                 if (dh == NULL || --dh->dh_score > 0) {
 1227                         DIRHASHLIST_UNLOCK();
 1228                         return (-1);
 1229                 }
 1230                 /*
 1231                  * If we can't lock it it's in use and we don't want to
 1232                  * recycle it anyway.
 1233                  */
 1234                 if (!sx_try_xlock(&dh->dh_lock)) {
 1235                         dh = TAILQ_NEXT(dh, dh_list);
 1236                         continue;
 1237                 }
 1238 
 1239                 ufsdirhash_destroy(dh);
 1240 
 1241                 /* Repeat if necessary. */
 1242                 dh = TAILQ_FIRST(&ufsdirhash_list);
 1243         }
 1244         /* Success; return with list locked. */
 1245         return (0);
 1246 }
 1247 
 1248 /*
 1249  * Callback that frees some dirhashes when the system is low on virtual memory.
 1250  */
 1251 static void
 1252 ufsdirhash_lowmem(void)
 1253 {
 1254         struct dirhash *dh, *dh_temp;
 1255         int memfreed, memwanted;
 1256 
 1257         ufs_dirhashlowmemcount++;
 1258         memfreed = 0;
 1259         memwanted = ufs_dirhashmem * ufs_dirhashreclaimpercent / 100;
 1260 
 1261         DIRHASHLIST_LOCK();
 1262 
 1263         /*
 1264          * Reclaim up to memwanted from the oldest dirhashes. This will allow
 1265          * us to make some progress when the system is running out of memory
 1266          * without compromising the dinamicity of maximum age. If the situation
 1267          * does not improve lowmem will be eventually retriggered and free some
 1268          * other entry in the cache. The entries on the head of the list should
 1269          * be the oldest. If during list traversal we can't get a lock on the
 1270          * dirhash, it will be skipped.
 1271          */
 1272         TAILQ_FOREACH_SAFE(dh, &ufsdirhash_list, dh_list, dh_temp) {
 1273                 if (sx_try_xlock(&dh->dh_lock))
 1274                         memfreed += ufsdirhash_destroy(dh);
 1275                 if (memfreed >= memwanted)
 1276                         break;
 1277         }
 1278         DIRHASHLIST_UNLOCK();
 1279 }
 1280 
 1281 static int
 1282 ufsdirhash_set_reclaimpercent(SYSCTL_HANDLER_ARGS)
 1283 {
 1284         int error, v;
 1285 
 1286         v = ufs_dirhashreclaimpercent;
 1287         error = sysctl_handle_int(oidp, &v, v, req);
 1288         if (error)
 1289                 return (error);
 1290         if (req->newptr == NULL)
 1291                 return (error);
 1292         if (v == ufs_dirhashreclaimpercent)
 1293                 return (0);
 1294 
 1295         /* Refuse invalid percentages */
 1296         if (v < 0 || v > 100)
 1297                 return (EINVAL);
 1298         ufs_dirhashreclaimpercent = v;
 1299         return (0);
 1300 }
 1301 
 1302 void
 1303 ufsdirhash_init(void)
 1304 {
 1305         ufs_dirhashmaxmem = lmax(roundup(hibufspace / 64, PAGE_SIZE),
 1306             2 * 1024 * 1024);
 1307 
 1308         ufsdirhash_zone = uma_zcreate("DIRHASH", DH_NBLKOFF * sizeof(doff_t),
 1309             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 1310         mtx_init(&ufsdirhash_mtx, "dirhash list", NULL, MTX_DEF);
 1311         TAILQ_INIT(&ufsdirhash_list);
 1312 
 1313         /* Register a callback function to handle low memory signals */
 1314         EVENTHANDLER_REGISTER(vm_lowmem, ufsdirhash_lowmem, NULL, 
 1315             EVENTHANDLER_PRI_FIRST);
 1316 }
 1317 
 1318 void
 1319 ufsdirhash_uninit(void)
 1320 {
 1321         KASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit"));
 1322         uma_zdestroy(ufsdirhash_zone);
 1323         mtx_destroy(&ufsdirhash_mtx);
 1324 }
 1325 
 1326 #endif /* UFS_DIRHASH */

Cache object: efcf97592dc2c27990ca5f688dba7ce3


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.