The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ufs/ufs_dirhash.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 2001, 2002 Ian Dowse.  All rights reserved.
    3  *
    4  * Redistribution and use in source and binary forms, with or without
    5  * modification, are permitted provided that the following conditions
    6  * are met:
    7  * 1. Redistributions of source code must retain the above copyright
    8  *    notice, this list of conditions and the following disclaimer.
    9  * 2. Redistributions in binary form must reproduce the above copyright
   10  *    notice, this list of conditions and the following disclaimer in the
   11  *    documentation and/or other materials provided with the distribution.
   12  *
   13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   23  * SUCH DAMAGE.
   24  */
   25 
   26 /*
   27  * This implements a hash-based lookup scheme for UFS directories.
   28  */
   29 
   30 #include <sys/cdefs.h>
   31 __FBSDID("$FreeBSD$");
   32 
   33 #include "opt_ufs.h"
   34 
   35 #ifdef UFS_DIRHASH
   36 
   37 #include <sys/param.h>
   38 #include <sys/systm.h>
   39 #include <sys/kernel.h>
   40 #include <sys/lock.h>
   41 #include <sys/mutex.h>
   42 #include <sys/malloc.h>
   43 #include <sys/fnv_hash.h>
   44 #include <sys/proc.h>
   45 #include <sys/bio.h>
   46 #include <sys/buf.h>
   47 #include <sys/vnode.h>
   48 #include <sys/mount.h>
   49 #include <sys/refcount.h>
   50 #include <sys/sysctl.h>
   51 #include <sys/sx.h>
   52 #include <sys/eventhandler.h>
   53 #include <sys/time.h>
   54 #include <vm/uma.h>
   55 
   56 #include <ufs/ufs/quota.h>
   57 #include <ufs/ufs/inode.h>
   58 #include <ufs/ufs/dir.h>
   59 #include <ufs/ufs/dirhash.h>
   60 #include <ufs/ufs/extattr.h>
   61 #include <ufs/ufs/ufsmount.h>
   62 #include <ufs/ufs/ufs_extern.h>
   63 
   64 #define WRAPINCR(val, limit)    (((val) + 1 == (limit)) ? 0 : ((val) + 1))
   65 #define WRAPDECR(val, limit)    (((val) == 0) ? ((limit) - 1) : ((val) - 1))
   66 #define OFSFMT(vp)              ((vp)->v_mount->mnt_maxsymlinklen <= 0)
   67 #define BLKFREE2IDX(n)          ((n) > DH_NFSTATS ? DH_NFSTATS : (n))
   68 
   69 static MALLOC_DEFINE(M_DIRHASH, "ufs_dirhash", "UFS directory hash tables");
   70 
   71 static int ufs_mindirhashsize = DIRBLKSIZ * 5;
   72 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW,
   73     &ufs_mindirhashsize,
   74     0, "minimum directory size in bytes for which to use hashed lookup");
   75 static int ufs_dirhashmaxmem = 2 * 1024 * 1024; /* NOTE: initial value. It is
   76                                                    tuned in ufsdirhash_init() */
   77 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem,
   78     0, "maximum allowed dirhash memory usage");
   79 static int ufs_dirhashmem;
   80 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem,
   81     0, "current dirhash memory usage");
   82 static int ufs_dirhashcheck = 0;
   83 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck,
   84     0, "enable extra sanity tests");
   85 static int ufs_dirhashlowmemcount = 0;
   86 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_lowmemcount, CTLFLAG_RD, 
   87     &ufs_dirhashlowmemcount, 0, "number of times low memory hook called");
   88 static int ufs_dirhashreclaimage = 5;
   89 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_reclaimage, CTLFLAG_RW, 
   90     &ufs_dirhashreclaimage, 0, 
   91     "max time in seconds of hash inactivity before deletion in low VM events");
   92 
   93 
   94 static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen);
   95 static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff);
   96 static void ufsdirhash_delslot(struct dirhash *dh, int slot);
   97 static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen,
   98            doff_t offset);
   99 static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset);
  100 static int ufsdirhash_recycle(int wanted);
  101 static void ufsdirhash_lowmem(void);
  102 static void ufsdirhash_free_locked(struct inode *ip);
  103 
  104 static uma_zone_t       ufsdirhash_zone;
  105 
  106 #define DIRHASHLIST_LOCK()              mtx_lock(&ufsdirhash_mtx)
  107 #define DIRHASHLIST_UNLOCK()            mtx_unlock(&ufsdirhash_mtx)
  108 #define DIRHASH_BLKALLOC_WAITOK()       uma_zalloc(ufsdirhash_zone, M_WAITOK)
  109 #define DIRHASH_BLKFREE(ptr)            uma_zfree(ufsdirhash_zone, (ptr))
  110 #define DIRHASH_ASSERT_LOCKED(dh)                                       \
  111     sx_assert(&(dh)->dh_lock, SA_LOCKED)
  112 
  113 /* Dirhash list; recently-used entries are near the tail. */
  114 static TAILQ_HEAD(, dirhash) ufsdirhash_list;
  115 
  116 /* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */
  117 static struct mtx       ufsdirhash_mtx;
  118 
  119 /*
  120  * Locking:
  121  *
  122  * The relationship between inode and dirhash is protected either by an
  123  * exclusive vnode lock or the vnode interlock where a shared vnode lock
  124  * may be used.  The dirhash_mtx is acquired after the dirhash lock.  To
  125  * handle teardown races, code wishing to lock the dirhash for an inode
  126  * when using a shared vnode lock must obtain a private reference on the
  127  * dirhash while holding the vnode interlock.  They can drop it once they
  128  * have obtained the dirhash lock and verified that the dirhash wasn't
  129  * recycled while they waited for the dirhash lock.
  130  *
  131  * ufsdirhash_build() acquires a shared lock on the dirhash when it is
  132  * successful.  This lock is released after a call to ufsdirhash_lookup().
  133  *
  134  * Functions requiring exclusive access use ufsdirhash_acquire() which may
  135  * free a dirhash structure that was recycled by ufsdirhash_recycle().
  136  *
  137  * The dirhash lock may be held across io operations.
  138  *
  139  * WITNESS reports a lock order reversal between the "bufwait" lock
  140  * and the "dirhash" lock.  However, this specific reversal will not
  141  * cause a deadlock.  To get a deadlock, one would have to lock a
  142  * buffer followed by the dirhash while a second thread locked a
  143  * buffer while holding the dirhash lock.  The second order can happen
  144  * under a shared or exclusive vnode lock for the associated directory
  145  * in lookup().  The first order, however, can only happen under an
  146  * exclusive vnode lock (e.g. unlink(), rename(), etc.).  Thus, for
  147  * a thread to be doing a "bufwait" -> "dirhash" order, it has to hold
  148  * an exclusive vnode lock.  That exclusive vnode lock will prevent
  149  * any other threads from doing a "dirhash" -> "bufwait" order.
  150  */
  151 
  152 static void
  153 ufsdirhash_hold(struct dirhash *dh)
  154 {
  155 
  156         refcount_acquire(&dh->dh_refcount);
  157 }
  158 
  159 static void
  160 ufsdirhash_drop(struct dirhash *dh)
  161 {
  162 
  163         if (refcount_release(&dh->dh_refcount)) {
  164                 sx_destroy(&dh->dh_lock);
  165                 free(dh, M_DIRHASH);
  166         }
  167 }
  168 
  169 /*
  170  * Release the lock on a dirhash.
  171  */
  172 static void
  173 ufsdirhash_release(struct dirhash *dh)
  174 {
  175 
  176         sx_unlock(&dh->dh_lock);
  177 }
  178 
  179 /*
  180  * Either acquire an existing hash locked shared or create a new hash and
  181  * return it exclusively locked.  May return NULL if the allocation fails.
  182  *
  183  * The vnode interlock is used to protect the i_dirhash pointer from
  184  * simultaneous access while only a shared vnode lock is held.
  185  */
  186 static struct dirhash *
  187 ufsdirhash_create(struct inode *ip)
  188 {
  189         struct dirhash *ndh;
  190         struct dirhash *dh;
  191         struct vnode *vp;
  192         int error;
  193 
  194         error = 0;
  195         ndh = dh = NULL;
  196         vp = ip->i_vnode;
  197         for (;;) {
  198                 /* Racy check for i_dirhash to prefetch a dirhash structure. */
  199                 if (ip->i_dirhash == NULL && ndh == NULL) {
  200                         ndh = malloc(sizeof *dh, M_DIRHASH,
  201                             M_NOWAIT | M_ZERO);
  202                         if (ndh == NULL)
  203                                 return (NULL);
  204                         refcount_init(&ndh->dh_refcount, 1);
  205 
  206                         /*
  207                          * The DUPOK is to prevent warnings from the
  208                          * sx_slock() a few lines down which is safe
  209                          * since the duplicate lock in that case is
  210                          * the one for this dirhash we are creating
  211                          * now which has no external references until
  212                          * after this function returns.
  213                          */
  214                         sx_init_flags(&ndh->dh_lock, "dirhash", SX_DUPOK);
  215                         sx_xlock(&ndh->dh_lock);
  216                 }
  217                 /*
  218                  * Check i_dirhash.  If it's NULL just try to use a
  219                  * preallocated structure.  If none exists loop and try again.
  220                  */
  221                 VI_LOCK(vp);
  222                 dh = ip->i_dirhash;
  223                 if (dh == NULL) {
  224                         ip->i_dirhash = ndh;
  225                         VI_UNLOCK(vp);
  226                         if (ndh == NULL)
  227                                 continue;
  228                         return (ndh);
  229                 }
  230                 ufsdirhash_hold(dh);
  231                 VI_UNLOCK(vp);
  232 
  233                 /* Acquire a shared lock on existing hashes. */
  234                 sx_slock(&dh->dh_lock);
  235 
  236                 /* The hash could've been recycled while we were waiting. */
  237                 VI_LOCK(vp);
  238                 if (ip->i_dirhash != dh) {
  239                         VI_UNLOCK(vp);
  240                         ufsdirhash_release(dh);
  241                         ufsdirhash_drop(dh);
  242                         continue;
  243                 }
  244                 VI_UNLOCK(vp);
  245                 ufsdirhash_drop(dh);
  246 
  247                 /* If the hash is still valid we've succeeded. */
  248                 if (dh->dh_hash != NULL)
  249                         break;
  250                 /*
  251                  * If the hash is NULL it has been recycled.  Try to upgrade
  252                  * so we can recreate it.  If we fail the upgrade, drop our
  253                  * lock and try again.
  254                  */
  255                 if (sx_try_upgrade(&dh->dh_lock))
  256                         break;
  257                 sx_sunlock(&dh->dh_lock);
  258         }
  259         /* Free the preallocated structure if it was not necessary. */
  260         if (ndh) {
  261                 ufsdirhash_release(ndh);
  262                 ufsdirhash_drop(ndh);
  263         }
  264         return (dh);
  265 }
  266 
  267 /*
  268  * Acquire an exclusive lock on an existing hash.  Requires an exclusive
  269  * vnode lock to protect the i_dirhash pointer.  hashes that have been
  270  * recycled are reclaimed here and NULL is returned.
  271  */
  272 static struct dirhash *
  273 ufsdirhash_acquire(struct inode *ip)
  274 {
  275         struct dirhash *dh;
  276         struct vnode *vp;
  277 
  278         ASSERT_VOP_ELOCKED(ip->i_vnode, __FUNCTION__);
  279 
  280         vp = ip->i_vnode;
  281         dh = ip->i_dirhash;
  282         if (dh == NULL)
  283                 return (NULL);
  284         sx_xlock(&dh->dh_lock);
  285         if (dh->dh_hash != NULL)
  286                 return (dh);
  287         ufsdirhash_free_locked(ip);
  288         return (NULL);
  289 }
  290 
  291 /*
  292  * Acquire exclusively and free the hash pointed to by ip.  Works with a
  293  * shared or exclusive vnode lock.
  294  */
  295 void
  296 ufsdirhash_free(struct inode *ip)
  297 {
  298         struct dirhash *dh;
  299         struct vnode *vp;
  300 
  301         vp = ip->i_vnode;
  302         for (;;) {
  303                 /* Grab a reference on this inode's dirhash if it has one. */
  304                 VI_LOCK(vp);
  305                 dh = ip->i_dirhash;
  306                 if (dh == NULL) {
  307                         VI_UNLOCK(vp);
  308                         return;
  309                 }
  310                 ufsdirhash_hold(dh);
  311                 VI_UNLOCK(vp);
  312 
  313                 /* Exclusively lock the dirhash. */
  314                 sx_xlock(&dh->dh_lock);
  315 
  316                 /* If this dirhash still belongs to this inode, then free it. */
  317                 VI_LOCK(vp);
  318                 if (ip->i_dirhash == dh) {
  319                         VI_UNLOCK(vp);
  320                         ufsdirhash_drop(dh);
  321                         break;
  322                 }
  323                 VI_UNLOCK(vp);
  324 
  325                 /*
  326                  * This inode's dirhash has changed while we were
  327                  * waiting for the dirhash lock, so try again.
  328                  */
  329                 ufsdirhash_release(dh);
  330                 ufsdirhash_drop(dh);
  331         }
  332         ufsdirhash_free_locked(ip);
  333 }
  334 
  335 /*
  336  * Attempt to build up a hash table for the directory contents in
  337  * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
  338  */
  339 int
  340 ufsdirhash_build(struct inode *ip)
  341 {
  342         struct dirhash *dh;
  343         struct buf *bp = NULL;
  344         struct direct *ep;
  345         struct vnode *vp;
  346         doff_t bmask, pos;
  347         int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
  348 
  349         /* Take care of a decreased sysctl value. */
  350         while (ufs_dirhashmem > ufs_dirhashmaxmem) {
  351                 if (ufsdirhash_recycle(0) != 0)
  352                         return (-1);
  353                 /* Recycled enough memory, so unlock the list. */
  354                 DIRHASHLIST_UNLOCK();
  355         }
  356 
  357         /* Check if we can/should use dirhash. */
  358         if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode) ||
  359             ip->i_effnlink == 0) {
  360                 if (ip->i_dirhash)
  361                         ufsdirhash_free(ip);
  362                 return (-1);
  363         }
  364         dh = ufsdirhash_create(ip);
  365         if (dh == NULL)
  366                 return (-1);
  367         if (dh->dh_hash != NULL)
  368                 return (0);
  369 
  370         vp = ip->i_vnode;
  371         /* Allocate 50% more entries than this dir size could ever need. */
  372         KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size"));
  373         nslots = ip->i_size / DIRECTSIZ(1);
  374         nslots = (nslots * 3 + 1) / 2;
  375         narrays = howmany(nslots, DH_NBLKOFF);
  376         nslots = narrays * DH_NBLKOFF;
  377         dirblocks = howmany(ip->i_size, DIRBLKSIZ);
  378         nblocks = (dirblocks * 3 + 1) / 2;
  379         memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
  380             narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
  381             nblocks * sizeof(*dh->dh_blkfree);
  382         DIRHASHLIST_LOCK();
  383         if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
  384                 DIRHASHLIST_UNLOCK();
  385                 if (memreqd > ufs_dirhashmaxmem / 2)
  386                         goto fail;
  387                 /* Try to free some space. */
  388                 if (ufsdirhash_recycle(memreqd) != 0)
  389                         goto fail;
  390                 /* Enough was freed, and list has been locked. */
  391         }
  392         ufs_dirhashmem += memreqd;
  393         DIRHASHLIST_UNLOCK();
  394 
  395         /* Initialise the hash table and block statistics. */
  396         dh->dh_memreq = memreqd;
  397         dh->dh_narrays = narrays;
  398         dh->dh_hlen = nslots;
  399         dh->dh_nblk = nblocks;
  400         dh->dh_dirblks = dirblocks;
  401         for (i = 0; i < DH_NFSTATS; i++)
  402                 dh->dh_firstfree[i] = -1;
  403         dh->dh_firstfree[DH_NFSTATS] = 0;
  404         dh->dh_hused = 0;
  405         dh->dh_seqoff = -1;
  406         dh->dh_score = DH_SCOREINIT;
  407         dh->dh_lastused = time_second;
  408 
  409         /*
  410          * Use non-blocking mallocs so that we will revert to a linear
  411          * lookup on failure rather than potentially blocking forever.
  412          */
  413         dh->dh_hash = malloc(narrays * sizeof(dh->dh_hash[0]),
  414             M_DIRHASH, M_NOWAIT | M_ZERO);
  415         if (dh->dh_hash == NULL)
  416                 goto fail;
  417         dh->dh_blkfree = malloc(nblocks * sizeof(dh->dh_blkfree[0]),
  418             M_DIRHASH, M_NOWAIT);
  419         if (dh->dh_blkfree == NULL)
  420                 goto fail;
  421         for (i = 0; i < narrays; i++) {
  422                 if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL)
  423                         goto fail;
  424                 for (j = 0; j < DH_NBLKOFF; j++)
  425                         dh->dh_hash[i][j] = DIRHASH_EMPTY;
  426         }
  427         for (i = 0; i < dirblocks; i++)
  428                 dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN;
  429         bmask = vp->v_mount->mnt_stat.f_iosize - 1;
  430         pos = 0;
  431         while (pos < ip->i_size) {
  432                 /* If necessary, get the next directory block. */
  433                 if ((pos & bmask) == 0) {
  434                         if (bp != NULL)
  435                                 brelse(bp);
  436                         if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0)
  437                                 goto fail;
  438                 }
  439 
  440                 /* Add this entry to the hash. */
  441                 ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
  442                 if (ep->d_reclen == 0 || ep->d_reclen >
  443                     DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) {
  444                         /* Corrupted directory. */
  445                         brelse(bp);
  446                         goto fail;
  447                 }
  448                 if (ep->d_ino != 0) {
  449                         /* Add the entry (simplified ufsdirhash_add). */
  450                         slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
  451                         while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
  452                                 slot = WRAPINCR(slot, dh->dh_hlen);
  453                         dh->dh_hused++;
  454                         DH_ENTRY(dh, slot) = pos;
  455                         ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep));
  456                 }
  457                 pos += ep->d_reclen;
  458         }
  459 
  460         if (bp != NULL)
  461                 brelse(bp);
  462         DIRHASHLIST_LOCK();
  463         TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
  464         dh->dh_onlist = 1;
  465         DIRHASHLIST_UNLOCK();
  466         sx_downgrade(&dh->dh_lock);
  467         return (0);
  468 
  469 fail:
  470         ufsdirhash_free_locked(ip);
  471         return (-1);
  472 }
  473 
  474 /*
  475  * Free any hash table associated with inode 'ip'.
  476  */
  477 static void
  478 ufsdirhash_free_locked(struct inode *ip)
  479 {
  480         struct dirhash *dh;
  481         struct vnode *vp;
  482         int i;
  483 
  484         DIRHASH_ASSERT_LOCKED(ip->i_dirhash);
  485 
  486         /*
  487          * Clear the pointer in the inode to prevent new threads from
  488          * finding the dead structure.
  489          */
  490         vp = ip->i_vnode;
  491         VI_LOCK(vp);
  492         dh = ip->i_dirhash;
  493         ip->i_dirhash = NULL;
  494         VI_UNLOCK(vp);
  495 
  496         /*
  497          * Remove the hash from the list since we are going to free its
  498          * memory.
  499          */
  500         DIRHASHLIST_LOCK();
  501         if (dh->dh_onlist)
  502                 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
  503         ufs_dirhashmem -= dh->dh_memreq;
  504         DIRHASHLIST_UNLOCK();
  505 
  506         /*
  507          * At this point, any waiters for the lock should hold their
  508          * own reference on the dirhash structure.  They will drop
  509          * that reference once they grab the vnode interlock and see
  510          * that ip->i_dirhash is NULL.
  511          */
  512         sx_xunlock(&dh->dh_lock);
  513 
  514         /*
  515          * Handle partially recycled as well as fully constructed hashes.
  516          */
  517         if (dh->dh_hash != NULL) {
  518                 for (i = 0; i < dh->dh_narrays; i++)
  519                         if (dh->dh_hash[i] != NULL)
  520                                 DIRHASH_BLKFREE(dh->dh_hash[i]);
  521                 free(dh->dh_hash, M_DIRHASH);
  522                 if (dh->dh_blkfree != NULL)
  523                         free(dh->dh_blkfree, M_DIRHASH);
  524         }
  525 
  526         /*
  527          * Drop the inode's reference to the data structure.
  528          */
  529         ufsdirhash_drop(dh);
  530 }
  531 
  532 /*
  533  * Find the offset of the specified name within the given inode.
  534  * Returns 0 on success, ENOENT if the entry does not exist, or
  535  * EJUSTRETURN if the caller should revert to a linear search.
  536  *
  537  * If successful, the directory offset is stored in *offp, and a
  538  * pointer to a struct buf containing the entry is stored in *bpp. If
  539  * prevoffp is non-NULL, the offset of the previous entry within
  540  * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
  541  * is the first in a block, the start of the block is used).
  542  *
  543  * Must be called with the hash locked.  Returns with the hash unlocked.
  544  */
  545 int
  546 ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp,
  547     struct buf **bpp, doff_t *prevoffp)
  548 {
  549         struct dirhash *dh, *dh_next;
  550         struct direct *dp;
  551         struct vnode *vp;
  552         struct buf *bp;
  553         doff_t blkoff, bmask, offset, prevoff, seqoff;
  554         int i, slot;
  555         int error;
  556 
  557         dh = ip->i_dirhash;
  558         KASSERT(dh != NULL && dh->dh_hash != NULL,
  559             ("ufsdirhash_lookup: Invalid dirhash %p\n", dh));
  560         DIRHASH_ASSERT_LOCKED(dh);
  561         /*
  562          * Move this dirhash towards the end of the list if it has a
  563          * score higher than the next entry, and acquire the dh_lock.
  564          */
  565         DIRHASHLIST_LOCK();
  566         if (TAILQ_NEXT(dh, dh_list) != NULL) {
  567                 /*
  568                  * If the new score will be greater than that of the next
  569                  * entry, then move this entry past it. With both mutexes
  570                  * held, dh_next won't go away, but its dh_score could
  571                  * change; that's not important since it is just a hint.
  572                  */
  573                 if ((dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
  574                     dh->dh_score >= dh_next->dh_score) {
  575                         KASSERT(dh->dh_onlist, ("dirhash: not on list"));
  576                         TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
  577                         TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
  578                             dh_list);
  579                 }
  580         }
  581         /* Update the score. */
  582         if (dh->dh_score < DH_SCOREMAX)
  583                 dh->dh_score++;
  584 
  585         /* Update last used time. */
  586         dh->dh_lastused = time_second;
  587         DIRHASHLIST_UNLOCK();
  588 
  589         vp = ip->i_vnode;
  590         bmask = vp->v_mount->mnt_stat.f_iosize - 1;
  591         blkoff = -1;
  592         bp = NULL;
  593         seqoff = dh->dh_seqoff;
  594 restart:
  595         slot = ufsdirhash_hash(dh, name, namelen);
  596 
  597         if (seqoff != -1) {
  598                 /*
  599                  * Sequential access optimisation. seqoff contains the
  600                  * offset of the directory entry immediately following
  601                  * the last entry that was looked up. Check if this offset
  602                  * appears in the hash chain for the name we are looking for.
  603                  */
  604                 for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
  605                     i = WRAPINCR(i, dh->dh_hlen))
  606                         if (offset == seqoff)
  607                                 break;
  608                 if (offset == seqoff) {
  609                         /*
  610                          * We found an entry with the expected offset. This
  611                          * is probably the entry we want, but if not, the
  612                          * code below will retry.
  613                          */ 
  614                         slot = i;
  615                 } else
  616                         seqoff = -1;
  617         }
  618 
  619         for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
  620             slot = WRAPINCR(slot, dh->dh_hlen)) {
  621                 if (offset == DIRHASH_DEL)
  622                         continue;
  623                 if (offset < 0 || offset >= ip->i_size)
  624                         panic("ufsdirhash_lookup: bad offset in hash array");
  625                 if ((offset & ~bmask) != blkoff) {
  626                         if (bp != NULL)
  627                                 brelse(bp);
  628                         blkoff = offset & ~bmask;
  629                         if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) {
  630                                 error = EJUSTRETURN;
  631                                 goto fail;
  632                         }
  633                 }
  634                 KASSERT(bp != NULL, ("no buffer allocated"));
  635                 dp = (struct direct *)(bp->b_data + (offset & bmask));
  636                 if (dp->d_reclen == 0 || dp->d_reclen >
  637                     DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) {
  638                         /* Corrupted directory. */
  639                         error = EJUSTRETURN;
  640                         goto fail;
  641                 }
  642                 if (dp->d_namlen == namelen &&
  643                     bcmp(dp->d_name, name, namelen) == 0) {
  644                         /* Found. Get the prev offset if needed. */
  645                         if (prevoffp != NULL) {
  646                                 if (offset & (DIRBLKSIZ - 1)) {
  647                                         prevoff = ufsdirhash_getprev(dp,
  648                                             offset);
  649                                         if (prevoff == -1) {
  650                                                 error = EJUSTRETURN;
  651                                                 goto fail;
  652                                         }
  653                                 } else
  654                                         prevoff = offset;
  655                                 *prevoffp = prevoff;
  656                         }
  657 
  658                         /* Update offset. */
  659                         dh->dh_seqoff = offset + DIRSIZ(0, dp);
  660                         *bpp = bp;
  661                         *offp = offset;
  662                         ufsdirhash_release(dh);
  663                         return (0);
  664                 }
  665 
  666                 /*
  667                  * When the name doesn't match in the sequential
  668                  * optimization case, go back and search normally.
  669                  */
  670                 if (seqoff != -1) {
  671                         seqoff = -1;
  672                         goto restart;
  673                 }
  674         }
  675         error = ENOENT;
  676 fail:
  677         ufsdirhash_release(dh);
  678         if (bp != NULL)
  679                 brelse(bp);
  680         return (error);
  681 }
  682 
  683 /*
  684  * Find a directory block with room for 'slotneeded' bytes. Returns
  685  * the offset of the directory entry that begins the free space.
  686  * This will either be the offset of an existing entry that has free
  687  * space at the end, or the offset of an entry with d_ino == 0 at
  688  * the start of a DIRBLKSIZ block.
  689  *
  690  * To use the space, the caller may need to compact existing entries in
  691  * the directory. The total number of bytes in all of the entries involved
  692  * in the compaction is stored in *slotsize. In other words, all of
  693  * the entries that must be compacted are exactly contained in the
  694  * region beginning at the returned offset and spanning *slotsize bytes.
  695  *
  696  * Returns -1 if no space was found, indicating that the directory
  697  * must be extended.
  698  */
  699 doff_t
  700 ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
  701 {
  702         struct direct *dp;
  703         struct dirhash *dh;
  704         struct buf *bp;
  705         doff_t pos, slotstart;
  706         int dirblock, error, freebytes, i;
  707 
  708         dh = ip->i_dirhash;
  709         KASSERT(dh != NULL && dh->dh_hash != NULL,
  710             ("ufsdirhash_findfree: Invalid dirhash %p\n", dh));
  711         DIRHASH_ASSERT_LOCKED(dh);
  712 
  713         /* Find a directory block with the desired free space. */
  714         dirblock = -1;
  715         for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
  716                 if ((dirblock = dh->dh_firstfree[i]) != -1)
  717                         break;
  718         if (dirblock == -1)
  719                 return (-1);
  720 
  721         KASSERT(dirblock < dh->dh_nblk &&
  722             dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN),
  723             ("ufsdirhash_findfree: bad stats"));
  724         pos = dirblock * DIRBLKSIZ;
  725         error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp);
  726         if (error)
  727                 return (-1);
  728 
  729         /* Find the first entry with free space. */
  730         for (i = 0; i < DIRBLKSIZ; ) {
  731                 if (dp->d_reclen == 0) {
  732                         brelse(bp);
  733                         return (-1);
  734                 }
  735                 if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp))
  736                         break;
  737                 i += dp->d_reclen;
  738                 dp = (struct direct *)((char *)dp + dp->d_reclen);
  739         }
  740         if (i > DIRBLKSIZ) {
  741                 brelse(bp);
  742                 return (-1);
  743         }
  744         slotstart = pos + i;
  745 
  746         /* Find the range of entries needed to get enough space */
  747         freebytes = 0;
  748         while (i < DIRBLKSIZ && freebytes < slotneeded) {
  749                 freebytes += dp->d_reclen;
  750                 if (dp->d_ino != 0)
  751                         freebytes -= DIRSIZ(0, dp);
  752                 if (dp->d_reclen == 0) {
  753                         brelse(bp);
  754                         return (-1);
  755                 }
  756                 i += dp->d_reclen;
  757                 dp = (struct direct *)((char *)dp + dp->d_reclen);
  758         }
  759         if (i > DIRBLKSIZ) {
  760                 brelse(bp);
  761                 return (-1);
  762         }
  763         if (freebytes < slotneeded)
  764                 panic("ufsdirhash_findfree: free mismatch");
  765         brelse(bp);
  766         *slotsize = pos + i - slotstart;
  767         return (slotstart);
  768 }
  769 
  770 /*
  771  * Return the start of the unused space at the end of a directory, or
  772  * -1 if there are no trailing unused blocks.
  773  */
  774 doff_t
  775 ufsdirhash_enduseful(struct inode *ip)
  776 {
  777 
  778         struct dirhash *dh;
  779         int i;
  780 
  781         dh = ip->i_dirhash;
  782         DIRHASH_ASSERT_LOCKED(dh);
  783         KASSERT(dh != NULL && dh->dh_hash != NULL,
  784             ("ufsdirhash_enduseful: Invalid dirhash %p\n", dh));
  785 
  786         if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN)
  787                 return (-1);
  788 
  789         for (i = dh->dh_dirblks - 1; i >= 0; i--)
  790                 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
  791                         break;
  792 
  793         return ((doff_t)(i + 1) * DIRBLKSIZ);
  794 }
  795 
  796 /*
  797  * Insert information into the hash about a new directory entry. dirp
  798  * points to a struct direct containing the entry, and offset specifies
  799  * the offset of this entry.
  800  */
  801 void
  802 ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
  803 {
  804         struct dirhash *dh;
  805         int slot;
  806 
  807         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  808                 return;
  809         
  810         KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
  811             ("ufsdirhash_add: bad offset"));
  812         /*
  813          * Normal hash usage is < 66%. If the usage gets too high then
  814          * remove the hash entirely and let it be rebuilt later.
  815          */
  816         if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
  817                 ufsdirhash_free_locked(ip);
  818                 return;
  819         }
  820 
  821         /* Find a free hash slot (empty or deleted), and add the entry. */
  822         slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
  823         while (DH_ENTRY(dh, slot) >= 0)
  824                 slot = WRAPINCR(slot, dh->dh_hlen);
  825         if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
  826                 dh->dh_hused++;
  827         DH_ENTRY(dh, slot) = offset;
  828 
  829         /* Update last used time. */
  830         dh->dh_lastused = time_second;
  831 
  832         /* Update the per-block summary info. */
  833         ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp));
  834         ufsdirhash_release(dh);
  835 }
  836 
  837 /*
  838  * Remove the specified directory entry from the hash. The entry to remove
  839  * is defined by the name in `dirp', which must exist at the specified
  840  * `offset' within the directory.
  841  */
  842 void
  843 ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
  844 {
  845         struct dirhash *dh;
  846         int slot;
  847 
  848         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  849                 return;
  850 
  851         KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
  852             ("ufsdirhash_remove: bad offset"));
  853         /* Find the entry */
  854         slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
  855 
  856         /* Remove the hash entry. */
  857         ufsdirhash_delslot(dh, slot);
  858 
  859         /* Update the per-block summary info. */
  860         ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp));
  861         ufsdirhash_release(dh);
  862 }
  863 
  864 /*
  865  * Change the offset associated with a directory entry in the hash. Used
  866  * when compacting directory blocks.
  867  */
  868 void
  869 ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
  870     doff_t newoff)
  871 {
  872         struct dirhash *dh;
  873         int slot;
  874 
  875         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  876                 return;
  877 
  878         KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ &&
  879             newoff < dh->dh_dirblks * DIRBLKSIZ,
  880             ("ufsdirhash_move: bad offset"));
  881         /* Find the entry, and update the offset. */
  882         slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
  883         DH_ENTRY(dh, slot) = newoff;
  884         ufsdirhash_release(dh);
  885 }
  886 
  887 /*
  888  * Inform dirhash that the directory has grown by one block that
  889  * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
  890  */
  891 void
  892 ufsdirhash_newblk(struct inode *ip, doff_t offset)
  893 {
  894         struct dirhash *dh;
  895         int block;
  896 
  897         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  898                 return;
  899 
  900         KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ,
  901             ("ufsdirhash_newblk: bad offset"));
  902         block = offset / DIRBLKSIZ;
  903         if (block >= dh->dh_nblk) {
  904                 /* Out of space; must rebuild. */
  905                 ufsdirhash_free_locked(ip);
  906                 return;
  907         }
  908         dh->dh_dirblks = block + 1;
  909 
  910         /* Account for the new free block. */
  911         dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN;
  912         if (dh->dh_firstfree[DH_NFSTATS] == -1)
  913                 dh->dh_firstfree[DH_NFSTATS] = block;
  914         ufsdirhash_release(dh);
  915 }
  916 
  917 /*
  918  * Inform dirhash that the directory is being truncated.
  919  */
  920 void
  921 ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
  922 {
  923         struct dirhash *dh;
  924         int block, i;
  925 
  926         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  927                 return;
  928 
  929         KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ,
  930             ("ufsdirhash_dirtrunc: bad offset"));
  931         block = howmany(offset, DIRBLKSIZ);
  932         /*
  933          * If the directory shrinks to less than 1/8 of dh_nblk blocks
  934          * (about 20% of its original size due to the 50% extra added in
  935          * ufsdirhash_build) then free it, and let the caller rebuild
  936          * if necessary.
  937          */
  938         if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
  939                 ufsdirhash_free_locked(ip);
  940                 return;
  941         }
  942 
  943         /*
  944          * Remove any `first free' information pertaining to the
  945          * truncated blocks. All blocks we're removing should be
  946          * completely unused.
  947          */
  948         if (dh->dh_firstfree[DH_NFSTATS] >= block)
  949                 dh->dh_firstfree[DH_NFSTATS] = -1;
  950         for (i = block; i < dh->dh_dirblks; i++)
  951                 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
  952                         panic("ufsdirhash_dirtrunc: blocks in use");
  953         for (i = 0; i < DH_NFSTATS; i++)
  954                 if (dh->dh_firstfree[i] >= block)
  955                         panic("ufsdirhash_dirtrunc: first free corrupt");
  956         dh->dh_dirblks = block;
  957         ufsdirhash_release(dh);
  958 }
  959 
  960 /*
  961  * Debugging function to check that the dirhash information about
  962  * a directory block matches its actual contents. Panics if a mismatch
  963  * is detected.
  964  *
  965  * On entry, `buf' should point to the start of an in-core
  966  * DIRBLKSIZ-sized directory block, and `offset' should contain the
  967  * offset from the start of the directory of that block.
  968  */
  969 void
  970 ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset)
  971 {
  972         struct dirhash *dh;
  973         struct direct *dp;
  974         int block, ffslot, i, nfree;
  975 
  976         if (!ufs_dirhashcheck)
  977                 return;
  978         if ((dh = ufsdirhash_acquire(ip)) == NULL)
  979                 return;
  980 
  981         block = offset / DIRBLKSIZ;
  982         if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks)
  983                 panic("ufsdirhash_checkblock: bad offset");
  984 
  985         nfree = 0;
  986         for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) {
  987                 dp = (struct direct *)(buf + i);
  988                 if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ)
  989                         panic("ufsdirhash_checkblock: bad dir");
  990 
  991                 if (dp->d_ino == 0) {
  992 #if 0
  993                         /*
  994                          * XXX entries with d_ino == 0 should only occur
  995                          * at the start of a DIRBLKSIZ block. However the
  996                          * ufs code is tolerant of such entries at other
  997                          * offsets, and fsck does not fix them.
  998                          */
  999                         if (i != 0)
 1000                                 panic("ufsdirhash_checkblock: bad dir inode");
 1001 #endif
 1002                         nfree += dp->d_reclen;
 1003                         continue;
 1004                 }
 1005 
 1006                 /* Check that the entry exists (will panic if it doesn't). */
 1007                 ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
 1008 
 1009                 nfree += dp->d_reclen - DIRSIZ(0, dp);
 1010         }
 1011         if (i != DIRBLKSIZ)
 1012                 panic("ufsdirhash_checkblock: bad dir end");
 1013 
 1014         if (dh->dh_blkfree[block] * DIRALIGN != nfree)
 1015                 panic("ufsdirhash_checkblock: bad free count");
 1016 
 1017         ffslot = BLKFREE2IDX(nfree / DIRALIGN);
 1018         for (i = 0; i <= DH_NFSTATS; i++)
 1019                 if (dh->dh_firstfree[i] == block && i != ffslot)
 1020                         panic("ufsdirhash_checkblock: bad first-free");
 1021         if (dh->dh_firstfree[ffslot] == -1)
 1022                 panic("ufsdirhash_checkblock: missing first-free entry");
 1023         ufsdirhash_release(dh);
 1024 }
 1025 
 1026 /*
 1027  * Hash the specified filename into a dirhash slot.
 1028  */
 1029 static int
 1030 ufsdirhash_hash(struct dirhash *dh, char *name, int namelen)
 1031 {
 1032         u_int32_t hash;
 1033 
 1034         /*
 1035          * We hash the name and then some other bit of data that is
 1036          * invariant over the dirhash's lifetime. Otherwise names
 1037          * differing only in the last byte are placed close to one
 1038          * another in the table, which is bad for linear probing.
 1039          */
 1040         hash = fnv_32_buf(name, namelen, FNV1_32_INIT);
 1041         hash = fnv_32_buf(&dh, sizeof(dh), hash);
 1042         return (hash % dh->dh_hlen);
 1043 }
 1044 
 1045 /*
 1046  * Adjust the number of free bytes in the block containing `offset'
 1047  * by the value specified by `diff'.
 1048  *
 1049  * The caller must ensure we have exclusive access to `dh'; normally
 1050  * that means that dh_lock should be held, but this is also called
 1051  * from ufsdirhash_build() where exclusive access can be assumed.
 1052  */
 1053 static void
 1054 ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff)
 1055 {
 1056         int block, i, nfidx, ofidx;
 1057 
 1058         /* Update the per-block summary info. */
 1059         block = offset / DIRBLKSIZ;
 1060         KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks,
 1061              ("dirhash bad offset"));
 1062         ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
 1063         dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
 1064         nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
 1065 
 1066         /* Update the `first free' list if necessary. */
 1067         if (ofidx != nfidx) {
 1068                 /* If removing, scan forward for the next block. */
 1069                 if (dh->dh_firstfree[ofidx] == block) {
 1070                         for (i = block + 1; i < dh->dh_dirblks; i++)
 1071                                 if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
 1072                                         break;
 1073                         dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
 1074                 }
 1075 
 1076                 /* Make this the new `first free' if necessary */
 1077                 if (dh->dh_firstfree[nfidx] > block ||
 1078                     dh->dh_firstfree[nfidx] == -1)
 1079                         dh->dh_firstfree[nfidx] = block;
 1080         }
 1081 }
 1082 
 1083 /*
 1084  * Find the specified name which should have the specified offset.
 1085  * Returns a slot number, and panics on failure.
 1086  *
 1087  * `dh' must be locked on entry and remains so on return.
 1088  */
 1089 static int
 1090 ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset)
 1091 {
 1092         int slot;
 1093 
 1094         DIRHASH_ASSERT_LOCKED(dh);
 1095 
 1096         /* Find the entry. */
 1097         KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full"));
 1098         slot = ufsdirhash_hash(dh, name, namelen);
 1099         while (DH_ENTRY(dh, slot) != offset &&
 1100             DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
 1101                 slot = WRAPINCR(slot, dh->dh_hlen);
 1102         if (DH_ENTRY(dh, slot) != offset)
 1103                 panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
 1104 
 1105         return (slot);
 1106 }
 1107 
 1108 /*
 1109  * Remove the entry corresponding to the specified slot from the hash array.
 1110  *
 1111  * `dh' must be locked on entry and remains so on return.
 1112  */
 1113 static void
 1114 ufsdirhash_delslot(struct dirhash *dh, int slot)
 1115 {
 1116         int i;
 1117 
 1118         DIRHASH_ASSERT_LOCKED(dh);
 1119 
 1120         /* Mark the entry as deleted. */
 1121         DH_ENTRY(dh, slot) = DIRHASH_DEL;
 1122 
 1123         /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
 1124         for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
 1125                 i = WRAPINCR(i, dh->dh_hlen);
 1126         if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
 1127                 i = WRAPDECR(i, dh->dh_hlen);
 1128                 while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
 1129                         DH_ENTRY(dh, i) = DIRHASH_EMPTY;
 1130                         dh->dh_hused--;
 1131                         i = WRAPDECR(i, dh->dh_hlen);
 1132                 }
 1133                 KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen"));
 1134         }
 1135 }
 1136 
 1137 /*
 1138  * Given a directory entry and its offset, find the offset of the
 1139  * previous entry in the same DIRBLKSIZ-sized block. Returns an
 1140  * offset, or -1 if there is no previous entry in the block or some
 1141  * other problem occurred.
 1142  */
 1143 static doff_t
 1144 ufsdirhash_getprev(struct direct *dirp, doff_t offset)
 1145 {
 1146         struct direct *dp;
 1147         char *blkbuf;
 1148         doff_t blkoff, prevoff;
 1149         int entrypos, i;
 1150 
 1151         blkoff = offset & ~(DIRBLKSIZ - 1);     /* offset of start of block */
 1152         entrypos = offset & (DIRBLKSIZ - 1);    /* entry relative to block */
 1153         blkbuf = (char *)dirp - entrypos;
 1154         prevoff = blkoff;
 1155 
 1156         /* If `offset' is the start of a block, there is no previous entry. */
 1157         if (entrypos == 0)
 1158                 return (-1);
 1159 
 1160         /* Scan from the start of the block until we get to the entry. */
 1161         for (i = 0; i < entrypos; i += dp->d_reclen) {
 1162                 dp = (struct direct *)(blkbuf + i);
 1163                 if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
 1164                         return (-1);    /* Corrupted directory. */
 1165                 prevoff = blkoff + i;
 1166         }
 1167         return (prevoff);
 1168 }
 1169 
 1170 /*
 1171  * Delete the given dirhash and reclaim its memory. Assumes that 
 1172  * ufsdirhash_list is locked, and leaves it locked. Also assumes 
 1173  * that dh is locked. Returns the amount of memory freed.
 1174  */
 1175 static int
 1176 ufsdirhash_destroy(struct dirhash *dh)
 1177 {
 1178         doff_t **hash;
 1179         u_int8_t *blkfree;
 1180         int i, mem, narrays;
 1181 
 1182         KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
 1183         
 1184         /* Remove it from the list and detach its memory. */
 1185         TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
 1186         dh->dh_onlist = 0;
 1187         hash = dh->dh_hash;
 1188         dh->dh_hash = NULL;
 1189         blkfree = dh->dh_blkfree;
 1190         dh->dh_blkfree = NULL;
 1191         narrays = dh->dh_narrays;
 1192         mem = dh->dh_memreq;
 1193         dh->dh_memreq = 0;
 1194 
 1195         /* Unlock dirhash and free the detached memory. */
 1196         ufsdirhash_release(dh);
 1197         for (i = 0; i < narrays; i++)
 1198                 DIRHASH_BLKFREE(hash[i]);
 1199         free(hash, M_DIRHASH);
 1200         free(blkfree, M_DIRHASH);
 1201 
 1202         /* Account for the returned memory. */
 1203         ufs_dirhashmem -= mem;
 1204 
 1205         return (mem);
 1206 }
 1207 
 1208 /*
 1209  * Try to free up `wanted' bytes by stealing memory from existing
 1210  * dirhashes. Returns zero with list locked if successful.
 1211  */
 1212 static int
 1213 ufsdirhash_recycle(int wanted)
 1214 {
 1215         struct dirhash *dh;
 1216 
 1217         DIRHASHLIST_LOCK();
 1218         dh = TAILQ_FIRST(&ufsdirhash_list);
 1219         while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
 1220                 /* Decrement the score; only recycle if it becomes zero. */
 1221                 if (dh == NULL || --dh->dh_score > 0) {
 1222                         DIRHASHLIST_UNLOCK();
 1223                         return (-1);
 1224                 }
 1225                 /*
 1226                  * If we can't lock it it's in use and we don't want to
 1227                  * recycle it anyway.
 1228                  */
 1229                 if (!sx_try_xlock(&dh->dh_lock)) {
 1230                         dh = TAILQ_NEXT(dh, dh_list);
 1231                         continue;
 1232                 }
 1233 
 1234                 ufsdirhash_destroy(dh);
 1235 
 1236                 /* Repeat if necessary. */
 1237                 dh = TAILQ_FIRST(&ufsdirhash_list);
 1238         }
 1239         /* Success; return with list locked. */
 1240         return (0);
 1241 }
 1242 
 1243 /*
 1244  * Callback that frees some dirhashes when the system is low on virtual memory.
 1245  */
 1246 static void
 1247 ufsdirhash_lowmem()
 1248 {
 1249         struct dirhash *dh, *dh_temp;
 1250         int memfreed = 0;
 1251         /* XXX: this 10% may need to be adjusted */
 1252         int memwanted = ufs_dirhashmem / 10;
 1253 
 1254         ufs_dirhashlowmemcount++;
 1255 
 1256         DIRHASHLIST_LOCK();
 1257         /* 
 1258          * Delete dirhashes not used for more than ufs_dirhashreclaimage 
 1259          * seconds. If we can't get a lock on the dirhash, it will be skipped.
 1260          */
 1261         TAILQ_FOREACH_SAFE(dh, &ufsdirhash_list, dh_list, dh_temp) {
 1262                 if (!sx_try_xlock(&dh->dh_lock))
 1263                         continue;
 1264                 if (time_second - dh->dh_lastused > ufs_dirhashreclaimage)
 1265                         memfreed += ufsdirhash_destroy(dh);
 1266                 /* Unlock if we didn't delete the dirhash */
 1267                 else
 1268                         ufsdirhash_release(dh);
 1269         }
 1270 
 1271         /* 
 1272          * If not enough memory was freed, keep deleting hashes from the head 
 1273          * of the dirhash list. The ones closest to the head should be the 
 1274          * oldest. 
 1275          */
 1276         if (memfreed < memwanted) {
 1277                 TAILQ_FOREACH_SAFE(dh, &ufsdirhash_list, dh_list, dh_temp) {
 1278                         if (!sx_try_xlock(&dh->dh_lock))
 1279                                 continue;
 1280                         memfreed += ufsdirhash_destroy(dh);
 1281                         if (memfreed >= memwanted)
 1282                                 break;
 1283                 }
 1284         }
 1285         DIRHASHLIST_UNLOCK();
 1286 }
 1287 
 1288 
 1289 void
 1290 ufsdirhash_init()
 1291 {
 1292         ufs_dirhashmaxmem = lmax(roundup(hibufspace / 64, PAGE_SIZE),
 1293             2 * 1024 * 1024);
 1294 
 1295         ufsdirhash_zone = uma_zcreate("DIRHASH", DH_NBLKOFF * sizeof(doff_t),
 1296             NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 1297         mtx_init(&ufsdirhash_mtx, "dirhash list", NULL, MTX_DEF);
 1298         TAILQ_INIT(&ufsdirhash_list);
 1299 
 1300         /* Register a callback function to handle low memory signals */
 1301         EVENTHANDLER_REGISTER(vm_lowmem, ufsdirhash_lowmem, NULL, 
 1302             EVENTHANDLER_PRI_FIRST);
 1303 }
 1304 
 1305 void
 1306 ufsdirhash_uninit()
 1307 {
 1308         KASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit"));
 1309         uma_zdestroy(ufsdirhash_zone);
 1310         mtx_destroy(&ufsdirhash_mtx);
 1311 }
 1312 
 1313 #endif /* UFS_DIRHASH */

Cache object: 58cf36b24fa2aa9069a7c1501ccea890


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.