The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/dcache.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * fs/dcache.c
    3  *
    4  * Complete reimplementation
    5  * (C) 1997 Thomas Schoebel-Theuer,
    6  * with heavy changes by Linus Torvalds
    7  */
    8 
    9 /*
   10  * Notes on the allocation strategy:
   11  *
   12  * The dcache is a master of the icache - whenever a dcache entry
   13  * exists, the inode will always exist. "iput()" is done either when
   14  * the dcache entry is deleted or garbage collected.
   15  */
   16 
   17 #include <linux/syscalls.h>
   18 #include <linux/string.h>
   19 #include <linux/mm.h>
   20 #include <linux/fs.h>
   21 #include <linux/fsnotify.h>
   22 #include <linux/slab.h>
   23 #include <linux/init.h>
   24 #include <linux/hash.h>
   25 #include <linux/cache.h>
   26 #include <linux/export.h>
   27 #include <linux/mount.h>
   28 #include <linux/file.h>
   29 #include <asm/uaccess.h>
   30 #include <linux/security.h>
   31 #include <linux/seqlock.h>
   32 #include <linux/swap.h>
   33 #include <linux/bootmem.h>
   34 #include <linux/fs_struct.h>
   35 #include <linux/hardirq.h>
   36 #include <linux/bit_spinlock.h>
   37 #include <linux/rculist_bl.h>
   38 #include <linux/prefetch.h>
   39 #include <linux/ratelimit.h>
   40 #include "internal.h"
   41 #include "mount.h"
   42 
   43 /*
   44  * Usage:
   45  * dcache->d_inode->i_lock protects:
   46  *   - i_dentry, d_alias, d_inode of aliases
   47  * dcache_hash_bucket lock protects:
   48  *   - the dcache hash table
   49  * s_anon bl list spinlock protects:
   50  *   - the s_anon list (see __d_drop)
   51  * dcache_lru_lock protects:
   52  *   - the dcache lru lists and counters
   53  * d_lock protects:
   54  *   - d_flags
   55  *   - d_name
   56  *   - d_lru
   57  *   - d_count
   58  *   - d_unhashed()
   59  *   - d_parent and d_subdirs
   60  *   - childrens' d_child and d_parent
   61  *   - d_alias, d_inode
   62  *
   63  * Ordering:
   64  * dentry->d_inode->i_lock
   65  *   dentry->d_lock
   66  *     dcache_lru_lock
   67  *     dcache_hash_bucket lock
   68  *     s_anon lock
   69  *
   70  * If there is an ancestor relationship:
   71  * dentry->d_parent->...->d_parent->d_lock
   72  *   ...
   73  *     dentry->d_parent->d_lock
   74  *       dentry->d_lock
   75  *
   76  * If no ancestor relationship:
   77  * if (dentry1 < dentry2)
   78  *   dentry1->d_lock
   79  *     dentry2->d_lock
   80  */
   81 int sysctl_vfs_cache_pressure __read_mostly = 100;
   82 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
   83 
   84 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
   85 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
   86 
   87 EXPORT_SYMBOL(rename_lock);
   88 
   89 static struct kmem_cache *dentry_cache __read_mostly;
   90 
   91 /*
   92  * This is the single most critical data structure when it comes
   93  * to the dcache: the hashtable for lookups. Somebody should try
   94  * to make this good - I've just made it work.
   95  *
   96  * This hash-function tries to avoid losing too many bits of hash
   97  * information, yet avoid using a prime hash-size or similar.
   98  */
   99 #define D_HASHBITS     d_hash_shift
  100 #define D_HASHMASK     d_hash_mask
  101 
  102 static unsigned int d_hash_mask __read_mostly;
  103 static unsigned int d_hash_shift __read_mostly;
  104 
  105 static struct hlist_bl_head *dentry_hashtable __read_mostly;
  106 
  107 static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
  108                                         unsigned int hash)
  109 {
  110         hash += (unsigned long) parent / L1_CACHE_BYTES;
  111         hash = hash + (hash >> D_HASHBITS);
  112         return dentry_hashtable + (hash & D_HASHMASK);
  113 }
  114 
  115 /* Statistics gathering. */
  116 struct dentry_stat_t dentry_stat = {
  117         .age_limit = 45,
  118 };
  119 
  120 static DEFINE_PER_CPU(unsigned int, nr_dentry);
  121 
  122 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
  123 static int get_nr_dentry(void)
  124 {
  125         int i;
  126         int sum = 0;
  127         for_each_possible_cpu(i)
  128                 sum += per_cpu(nr_dentry, i);
  129         return sum < 0 ? 0 : sum;
  130 }
  131 
  132 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
  133                    size_t *lenp, loff_t *ppos)
  134 {
  135         dentry_stat.nr_dentry = get_nr_dentry();
  136         return proc_dointvec(table, write, buffer, lenp, ppos);
  137 }
  138 #endif
  139 
  140 /*
  141  * Compare 2 name strings, return 0 if they match, otherwise non-zero.
  142  * The strings are both count bytes long, and count is non-zero.
  143  */
  144 #ifdef CONFIG_DCACHE_WORD_ACCESS
  145 
  146 #include <asm/word-at-a-time.h>
  147 /*
  148  * NOTE! 'cs' and 'scount' come from a dentry, so it has a
  149  * aligned allocation for this particular component. We don't
  150  * strictly need the load_unaligned_zeropad() safety, but it
  151  * doesn't hurt either.
  152  *
  153  * In contrast, 'ct' and 'tcount' can be from a pathname, and do
  154  * need the careful unaligned handling.
  155  */
  156 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
  157 {
  158         unsigned long a,b,mask;
  159 
  160         for (;;) {
  161                 a = *(unsigned long *)cs;
  162                 b = load_unaligned_zeropad(ct);
  163                 if (tcount < sizeof(unsigned long))
  164                         break;
  165                 if (unlikely(a != b))
  166                         return 1;
  167                 cs += sizeof(unsigned long);
  168                 ct += sizeof(unsigned long);
  169                 tcount -= sizeof(unsigned long);
  170                 if (!tcount)
  171                         return 0;
  172         }
  173         mask = ~(~0ul << tcount*8);
  174         return unlikely(!!((a ^ b) & mask));
  175 }
  176 
  177 #else
  178 
  179 static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char *ct, unsigned tcount)
  180 {
  181         do {
  182                 if (*cs != *ct)
  183                         return 1;
  184                 cs++;
  185                 ct++;
  186                 tcount--;
  187         } while (tcount);
  188         return 0;
  189 }
  190 
  191 #endif
  192 
  193 static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
  194 {
  195         const unsigned char *cs;
  196         /*
  197          * Be careful about RCU walk racing with rename:
  198          * use ACCESS_ONCE to fetch the name pointer.
  199          *
  200          * NOTE! Even if a rename will mean that the length
  201          * was not loaded atomically, we don't care. The
  202          * RCU walk will check the sequence count eventually,
  203          * and catch it. And we won't overrun the buffer,
  204          * because we're reading the name pointer atomically,
  205          * and a dentry name is guaranteed to be properly
  206          * terminated with a NUL byte.
  207          *
  208          * End result: even if 'len' is wrong, we'll exit
  209          * early because the data cannot match (there can
  210          * be no NUL in the ct/tcount data)
  211          */
  212         cs = ACCESS_ONCE(dentry->d_name.name);
  213         smp_read_barrier_depends();
  214         return dentry_string_cmp(cs, ct, tcount);
  215 }
  216 
  217 static void __d_free(struct rcu_head *head)
  218 {
  219         struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
  220 
  221         WARN_ON(!hlist_unhashed(&dentry->d_alias));
  222         if (dname_external(dentry))
  223                 kfree(dentry->d_name.name);
  224         kmem_cache_free(dentry_cache, dentry); 
  225 }
  226 
  227 /*
  228  * no locks, please.
  229  */
  230 static void d_free(struct dentry *dentry)
  231 {
  232         BUG_ON(dentry->d_count);
  233         this_cpu_dec(nr_dentry);
  234         if (dentry->d_op && dentry->d_op->d_release)
  235                 dentry->d_op->d_release(dentry);
  236 
  237         /* if dentry was never visible to RCU, immediate free is OK */
  238         if (!(dentry->d_flags & DCACHE_RCUACCESS))
  239                 __d_free(&dentry->d_u.d_rcu);
  240         else
  241                 call_rcu(&dentry->d_u.d_rcu, __d_free);
  242 }
  243 
  244 /**
  245  * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups
  246  * @dentry: the target dentry
  247  * After this call, in-progress rcu-walk path lookup will fail. This
  248  * should be called after unhashing, and after changing d_inode (if
  249  * the dentry has not already been unhashed).
  250  */
  251 static inline void dentry_rcuwalk_barrier(struct dentry *dentry)
  252 {
  253         assert_spin_locked(&dentry->d_lock);
  254         /* Go through a barrier */
  255         write_seqcount_barrier(&dentry->d_seq);
  256 }
  257 
  258 /*
  259  * Release the dentry's inode, using the filesystem
  260  * d_iput() operation if defined. Dentry has no refcount
  261  * and is unhashed.
  262  */
  263 static void dentry_iput(struct dentry * dentry)
  264         __releases(dentry->d_lock)
  265         __releases(dentry->d_inode->i_lock)
  266 {
  267         struct inode *inode = dentry->d_inode;
  268         if (inode) {
  269                 dentry->d_inode = NULL;
  270                 hlist_del_init(&dentry->d_alias);
  271                 spin_unlock(&dentry->d_lock);
  272                 spin_unlock(&inode->i_lock);
  273                 if (!inode->i_nlink)
  274                         fsnotify_inoderemove(inode);
  275                 if (dentry->d_op && dentry->d_op->d_iput)
  276                         dentry->d_op->d_iput(dentry, inode);
  277                 else
  278                         iput(inode);
  279         } else {
  280                 spin_unlock(&dentry->d_lock);
  281         }
  282 }
  283 
  284 /*
  285  * Release the dentry's inode, using the filesystem
  286  * d_iput() operation if defined. dentry remains in-use.
  287  */
  288 static void dentry_unlink_inode(struct dentry * dentry)
  289         __releases(dentry->d_lock)
  290         __releases(dentry->d_inode->i_lock)
  291 {
  292         struct inode *inode = dentry->d_inode;
  293         dentry->d_inode = NULL;
  294         hlist_del_init(&dentry->d_alias);
  295         dentry_rcuwalk_barrier(dentry);
  296         spin_unlock(&dentry->d_lock);
  297         spin_unlock(&inode->i_lock);
  298         if (!inode->i_nlink)
  299                 fsnotify_inoderemove(inode);
  300         if (dentry->d_op && dentry->d_op->d_iput)
  301                 dentry->d_op->d_iput(dentry, inode);
  302         else
  303                 iput(inode);
  304 }
  305 
  306 /*
  307  * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
  308  */
  309 static void dentry_lru_add(struct dentry *dentry)
  310 {
  311         if (list_empty(&dentry->d_lru)) {
  312                 spin_lock(&dcache_lru_lock);
  313                 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
  314                 dentry->d_sb->s_nr_dentry_unused++;
  315                 dentry_stat.nr_unused++;
  316                 spin_unlock(&dcache_lru_lock);
  317         }
  318 }
  319 
  320 static void __dentry_lru_del(struct dentry *dentry)
  321 {
  322         list_del_init(&dentry->d_lru);
  323         dentry->d_flags &= ~DCACHE_SHRINK_LIST;
  324         dentry->d_sb->s_nr_dentry_unused--;
  325         dentry_stat.nr_unused--;
  326 }
  327 
  328 /*
  329  * Remove a dentry with references from the LRU.
  330  */
  331 static void dentry_lru_del(struct dentry *dentry)
  332 {
  333         if (!list_empty(&dentry->d_lru)) {
  334                 spin_lock(&dcache_lru_lock);
  335                 __dentry_lru_del(dentry);
  336                 spin_unlock(&dcache_lru_lock);
  337         }
  338 }
  339 
  340 /*
  341  * Remove a dentry that is unreferenced and about to be pruned
  342  * (unhashed and destroyed) from the LRU, and inform the file system.
  343  * This wrapper should be called _prior_ to unhashing a victim dentry.
  344  */
  345 static void dentry_lru_prune(struct dentry *dentry)
  346 {
  347         if (!list_empty(&dentry->d_lru)) {
  348                 if (dentry->d_flags & DCACHE_OP_PRUNE)
  349                         dentry->d_op->d_prune(dentry);
  350 
  351                 spin_lock(&dcache_lru_lock);
  352                 __dentry_lru_del(dentry);
  353                 spin_unlock(&dcache_lru_lock);
  354         }
  355 }
  356 
  357 static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
  358 {
  359         spin_lock(&dcache_lru_lock);
  360         if (list_empty(&dentry->d_lru)) {
  361                 list_add_tail(&dentry->d_lru, list);
  362                 dentry->d_sb->s_nr_dentry_unused++;
  363                 dentry_stat.nr_unused++;
  364         } else {
  365                 list_move_tail(&dentry->d_lru, list);
  366         }
  367         spin_unlock(&dcache_lru_lock);
  368 }
  369 
  370 /**
  371  * d_kill - kill dentry and return parent
  372  * @dentry: dentry to kill
  373  * @parent: parent dentry
  374  *
  375  * The dentry must already be unhashed and removed from the LRU.
  376  *
  377  * If this is the root of the dentry tree, return NULL.
  378  *
  379  * dentry->d_lock and parent->d_lock must be held by caller, and are dropped by
  380  * d_kill.
  381  */
  382 static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
  383         __releases(dentry->d_lock)
  384         __releases(parent->d_lock)
  385         __releases(dentry->d_inode->i_lock)
  386 {
  387         list_del(&dentry->d_u.d_child);
  388         /*
  389          * Inform try_to_ascend() that we are no longer attached to the
  390          * dentry tree
  391          */
  392         dentry->d_flags |= DCACHE_DENTRY_KILLED;
  393         if (parent)
  394                 spin_unlock(&parent->d_lock);
  395         dentry_iput(dentry);
  396         /*
  397          * dentry_iput drops the locks, at which point nobody (except
  398          * transient RCU lookups) can reach this dentry.
  399          */
  400         d_free(dentry);
  401         return parent;
  402 }
  403 
  404 /*
  405  * Unhash a dentry without inserting an RCU walk barrier or checking that
  406  * dentry->d_lock is locked.  The caller must take care of that, if
  407  * appropriate.
  408  */
  409 static void __d_shrink(struct dentry *dentry)
  410 {
  411         if (!d_unhashed(dentry)) {
  412                 struct hlist_bl_head *b;
  413                 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
  414                         b = &dentry->d_sb->s_anon;
  415                 else
  416                         b = d_hash(dentry->d_parent, dentry->d_name.hash);
  417 
  418                 hlist_bl_lock(b);
  419                 __hlist_bl_del(&dentry->d_hash);
  420                 dentry->d_hash.pprev = NULL;
  421                 hlist_bl_unlock(b);
  422         }
  423 }
  424 
  425 /**
  426  * d_drop - drop a dentry
  427  * @dentry: dentry to drop
  428  *
  429  * d_drop() unhashes the entry from the parent dentry hashes, so that it won't
  430  * be found through a VFS lookup any more. Note that this is different from
  431  * deleting the dentry - d_delete will try to mark the dentry negative if
  432  * possible, giving a successful _negative_ lookup, while d_drop will
  433  * just make the cache lookup fail.
  434  *
  435  * d_drop() is used mainly for stuff that wants to invalidate a dentry for some
  436  * reason (NFS timeouts or autofs deletes).
  437  *
  438  * __d_drop requires dentry->d_lock.
  439  */
  440 void __d_drop(struct dentry *dentry)
  441 {
  442         if (!d_unhashed(dentry)) {
  443                 __d_shrink(dentry);
  444                 dentry_rcuwalk_barrier(dentry);
  445         }
  446 }
  447 EXPORT_SYMBOL(__d_drop);
  448 
  449 void d_drop(struct dentry *dentry)
  450 {
  451         spin_lock(&dentry->d_lock);
  452         __d_drop(dentry);
  453         spin_unlock(&dentry->d_lock);
  454 }
  455 EXPORT_SYMBOL(d_drop);
  456 
  457 /*
  458  * Finish off a dentry we've decided to kill.
  459  * dentry->d_lock must be held, returns with it unlocked.
  460  * If ref is non-zero, then decrement the refcount too.
  461  * Returns dentry requiring refcount drop, or NULL if we're done.
  462  */
  463 static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
  464         __releases(dentry->d_lock)
  465 {
  466         struct inode *inode;
  467         struct dentry *parent;
  468 
  469         inode = dentry->d_inode;
  470         if (inode && !spin_trylock(&inode->i_lock)) {
  471 relock:
  472                 spin_unlock(&dentry->d_lock);
  473                 cpu_relax();
  474                 return dentry; /* try again with same dentry */
  475         }
  476         if (IS_ROOT(dentry))
  477                 parent = NULL;
  478         else
  479                 parent = dentry->d_parent;
  480         if (parent && !spin_trylock(&parent->d_lock)) {
  481                 if (inode)
  482                         spin_unlock(&inode->i_lock);
  483                 goto relock;
  484         }
  485 
  486         if (ref)
  487                 dentry->d_count--;
  488         /*
  489          * if dentry was on the d_lru list delete it from there.
  490          * inform the fs via d_prune that this dentry is about to be
  491          * unhashed and destroyed.
  492          */
  493         dentry_lru_prune(dentry);
  494         /* if it was on the hash then remove it */
  495         __d_drop(dentry);
  496         return d_kill(dentry, parent);
  497 }
  498 
  499 /* 
  500  * This is dput
  501  *
  502  * This is complicated by the fact that we do not want to put
  503  * dentries that are no longer on any hash chain on the unused
  504  * list: we'd much rather just get rid of them immediately.
  505  *
  506  * However, that implies that we have to traverse the dentry
  507  * tree upwards to the parents which might _also_ now be
  508  * scheduled for deletion (it may have been only waiting for
  509  * its last child to go away).
  510  *
  511  * This tail recursion is done by hand as we don't want to depend
  512  * on the compiler to always get this right (gcc generally doesn't).
  513  * Real recursion would eat up our stack space.
  514  */
  515 
  516 /*
  517  * dput - release a dentry
  518  * @dentry: dentry to release 
  519  *
  520  * Release a dentry. This will drop the usage count and if appropriate
  521  * call the dentry unlink method as well as removing it from the queues and
  522  * releasing its resources. If the parent dentries were scheduled for release
  523  * they too may now get deleted.
  524  */
  525 void dput(struct dentry *dentry)
  526 {
  527         if (!dentry)
  528                 return;
  529 
  530 repeat:
  531         if (dentry->d_count == 1)
  532                 might_sleep();
  533         spin_lock(&dentry->d_lock);
  534         BUG_ON(!dentry->d_count);
  535         if (dentry->d_count > 1) {
  536                 dentry->d_count--;
  537                 spin_unlock(&dentry->d_lock);
  538                 return;
  539         }
  540 
  541         if (dentry->d_flags & DCACHE_OP_DELETE) {
  542                 if (dentry->d_op->d_delete(dentry))
  543                         goto kill_it;
  544         }
  545 
  546         /* Unreachable? Get rid of it */
  547         if (d_unhashed(dentry))
  548                 goto kill_it;
  549 
  550         dentry->d_flags |= DCACHE_REFERENCED;
  551         dentry_lru_add(dentry);
  552 
  553         dentry->d_count--;
  554         spin_unlock(&dentry->d_lock);
  555         return;
  556 
  557 kill_it:
  558         dentry = dentry_kill(dentry, 1);
  559         if (dentry)
  560                 goto repeat;
  561 }
  562 EXPORT_SYMBOL(dput);
  563 
  564 /**
  565  * d_invalidate - invalidate a dentry
  566  * @dentry: dentry to invalidate
  567  *
  568  * Try to invalidate the dentry if it turns out to be
  569  * possible. If there are other dentries that can be
  570  * reached through this one we can't delete it and we
  571  * return -EBUSY. On success we return 0.
  572  *
  573  * no dcache lock.
  574  */
  575  
  576 int d_invalidate(struct dentry * dentry)
  577 {
  578         /*
  579          * If it's already been dropped, return OK.
  580          */
  581         spin_lock(&dentry->d_lock);
  582         if (d_unhashed(dentry)) {
  583                 spin_unlock(&dentry->d_lock);
  584                 return 0;
  585         }
  586         /*
  587          * Check whether to do a partial shrink_dcache
  588          * to get rid of unused child entries.
  589          */
  590         if (!list_empty(&dentry->d_subdirs)) {
  591                 spin_unlock(&dentry->d_lock);
  592                 shrink_dcache_parent(dentry);
  593                 spin_lock(&dentry->d_lock);
  594         }
  595 
  596         /*
  597          * Somebody else still using it?
  598          *
  599          * If it's a directory, we can't drop it
  600          * for fear of somebody re-populating it
  601          * with children (even though dropping it
  602          * would make it unreachable from the root,
  603          * we might still populate it if it was a
  604          * working directory or similar).
  605          * We also need to leave mountpoints alone,
  606          * directory or not.
  607          */
  608         if (dentry->d_count > 1 && dentry->d_inode) {
  609                 if (S_ISDIR(dentry->d_inode->i_mode) || d_mountpoint(dentry)) {
  610                         spin_unlock(&dentry->d_lock);
  611                         return -EBUSY;
  612                 }
  613         }
  614 
  615         __d_drop(dentry);
  616         spin_unlock(&dentry->d_lock);
  617         return 0;
  618 }
  619 EXPORT_SYMBOL(d_invalidate);
  620 
  621 /* This must be called with d_lock held */
  622 static inline void __dget_dlock(struct dentry *dentry)
  623 {
  624         dentry->d_count++;
  625 }
  626 
  627 static inline void __dget(struct dentry *dentry)
  628 {
  629         spin_lock(&dentry->d_lock);
  630         __dget_dlock(dentry);
  631         spin_unlock(&dentry->d_lock);
  632 }
  633 
  634 struct dentry *dget_parent(struct dentry *dentry)
  635 {
  636         struct dentry *ret;
  637 
  638 repeat:
  639         /*
  640          * Don't need rcu_dereference because we re-check it was correct under
  641          * the lock.
  642          */
  643         rcu_read_lock();
  644         ret = dentry->d_parent;
  645         spin_lock(&ret->d_lock);
  646         if (unlikely(ret != dentry->d_parent)) {
  647                 spin_unlock(&ret->d_lock);
  648                 rcu_read_unlock();
  649                 goto repeat;
  650         }
  651         rcu_read_unlock();
  652         BUG_ON(!ret->d_count);
  653         ret->d_count++;
  654         spin_unlock(&ret->d_lock);
  655         return ret;
  656 }
  657 EXPORT_SYMBOL(dget_parent);
  658 
  659 /**
  660  * d_find_alias - grab a hashed alias of inode
  661  * @inode: inode in question
  662  * @want_discon:  flag, used by d_splice_alias, to request
  663  *          that only a DISCONNECTED alias be returned.
  664  *
  665  * If inode has a hashed alias, or is a directory and has any alias,
  666  * acquire the reference to alias and return it. Otherwise return NULL.
  667  * Notice that if inode is a directory there can be only one alias and
  668  * it can be unhashed only if it has no children, or if it is the root
  669  * of a filesystem.
  670  *
  671  * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer
  672  * any other hashed alias over that one unless @want_discon is set,
  673  * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias.
  674  */
  675 static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
  676 {
  677         struct dentry *alias, *discon_alias;
  678         struct hlist_node *p;
  679 
  680 again:
  681         discon_alias = NULL;
  682         hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
  683                 spin_lock(&alias->d_lock);
  684                 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
  685                         if (IS_ROOT(alias) &&
  686                             (alias->d_flags & DCACHE_DISCONNECTED)) {
  687                                 discon_alias = alias;
  688                         } else if (!want_discon) {
  689                                 __dget_dlock(alias);
  690                                 spin_unlock(&alias->d_lock);
  691                                 return alias;
  692                         }
  693                 }
  694                 spin_unlock(&alias->d_lock);
  695         }
  696         if (discon_alias) {
  697                 alias = discon_alias;
  698                 spin_lock(&alias->d_lock);
  699                 if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
  700                         if (IS_ROOT(alias) &&
  701                             (alias->d_flags & DCACHE_DISCONNECTED)) {
  702                                 __dget_dlock(alias);
  703                                 spin_unlock(&alias->d_lock);
  704                                 return alias;
  705                         }
  706                 }
  707                 spin_unlock(&alias->d_lock);
  708                 goto again;
  709         }
  710         return NULL;
  711 }
  712 
  713 struct dentry *d_find_alias(struct inode *inode)
  714 {
  715         struct dentry *de = NULL;
  716 
  717         if (!hlist_empty(&inode->i_dentry)) {
  718                 spin_lock(&inode->i_lock);
  719                 de = __d_find_alias(inode, 0);
  720                 spin_unlock(&inode->i_lock);
  721         }
  722         return de;
  723 }
  724 EXPORT_SYMBOL(d_find_alias);
  725 
  726 /*
  727  *      Try to kill dentries associated with this inode.
  728  * WARNING: you must own a reference to inode.
  729  */
  730 void d_prune_aliases(struct inode *inode)
  731 {
  732         struct dentry *dentry;
  733         struct hlist_node *p;
  734 restart:
  735         spin_lock(&inode->i_lock);
  736         hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
  737                 spin_lock(&dentry->d_lock);
  738                 if (!dentry->d_count) {
  739                         __dget_dlock(dentry);
  740                         __d_drop(dentry);
  741                         spin_unlock(&dentry->d_lock);
  742                         spin_unlock(&inode->i_lock);
  743                         dput(dentry);
  744                         goto restart;
  745                 }
  746                 spin_unlock(&dentry->d_lock);
  747         }
  748         spin_unlock(&inode->i_lock);
  749 }
  750 EXPORT_SYMBOL(d_prune_aliases);
  751 
  752 /*
  753  * Try to throw away a dentry - free the inode, dput the parent.
  754  * Requires dentry->d_lock is held, and dentry->d_count == 0.
  755  * Releases dentry->d_lock.
  756  *
  757  * This may fail if locks cannot be acquired no problem, just try again.
  758  */
  759 static void try_prune_one_dentry(struct dentry *dentry)
  760         __releases(dentry->d_lock)
  761 {
  762         struct dentry *parent;
  763 
  764         parent = dentry_kill(dentry, 0);
  765         /*
  766          * If dentry_kill returns NULL, we have nothing more to do.
  767          * if it returns the same dentry, trylocks failed. In either
  768          * case, just loop again.
  769          *
  770          * Otherwise, we need to prune ancestors too. This is necessary
  771          * to prevent quadratic behavior of shrink_dcache_parent(), but
  772          * is also expected to be beneficial in reducing dentry cache
  773          * fragmentation.
  774          */
  775         if (!parent)
  776                 return;
  777         if (parent == dentry)
  778                 return;
  779 
  780         /* Prune ancestors. */
  781         dentry = parent;
  782         while (dentry) {
  783                 spin_lock(&dentry->d_lock);
  784                 if (dentry->d_count > 1) {
  785                         dentry->d_count--;
  786                         spin_unlock(&dentry->d_lock);
  787                         return;
  788                 }
  789                 dentry = dentry_kill(dentry, 1);
  790         }
  791 }
  792 
  793 static void shrink_dentry_list(struct list_head *list)
  794 {
  795         struct dentry *dentry;
  796 
  797         rcu_read_lock();
  798         for (;;) {
  799                 dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
  800                 if (&dentry->d_lru == list)
  801                         break; /* empty */
  802                 spin_lock(&dentry->d_lock);
  803                 if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
  804                         spin_unlock(&dentry->d_lock);
  805                         continue;
  806                 }
  807 
  808                 /*
  809                  * We found an inuse dentry which was not removed from
  810                  * the LRU because of laziness during lookup.  Do not free
  811                  * it - just keep it off the LRU list.
  812                  */
  813                 if (dentry->d_count) {
  814                         dentry_lru_del(dentry);
  815                         spin_unlock(&dentry->d_lock);
  816                         continue;
  817                 }
  818 
  819                 rcu_read_unlock();
  820 
  821                 try_prune_one_dentry(dentry);
  822 
  823                 rcu_read_lock();
  824         }
  825         rcu_read_unlock();
  826 }
  827 
  828 /**
  829  * prune_dcache_sb - shrink the dcache
  830  * @sb: superblock
  831  * @count: number of entries to try to free
  832  *
  833  * Attempt to shrink the superblock dcache LRU by @count entries. This is
  834  * done when we need more memory an called from the superblock shrinker
  835  * function.
  836  *
  837  * This function may fail to free any resources if all the dentries are in
  838  * use.
  839  */
  840 void prune_dcache_sb(struct super_block *sb, int count)
  841 {
  842         struct dentry *dentry;
  843         LIST_HEAD(referenced);
  844         LIST_HEAD(tmp);
  845 
  846 relock:
  847         spin_lock(&dcache_lru_lock);
  848         while (!list_empty(&sb->s_dentry_lru)) {
  849                 dentry = list_entry(sb->s_dentry_lru.prev,
  850                                 struct dentry, d_lru);
  851                 BUG_ON(dentry->d_sb != sb);
  852 
  853                 if (!spin_trylock(&dentry->d_lock)) {
  854                         spin_unlock(&dcache_lru_lock);
  855                         cpu_relax();
  856                         goto relock;
  857                 }
  858 
  859                 if (dentry->d_flags & DCACHE_REFERENCED) {
  860                         dentry->d_flags &= ~DCACHE_REFERENCED;
  861                         list_move(&dentry->d_lru, &referenced);
  862                         spin_unlock(&dentry->d_lock);
  863                 } else {
  864                         list_move_tail(&dentry->d_lru, &tmp);
  865                         dentry->d_flags |= DCACHE_SHRINK_LIST;
  866                         spin_unlock(&dentry->d_lock);
  867                         if (!--count)
  868                                 break;
  869                 }
  870                 cond_resched_lock(&dcache_lru_lock);
  871         }
  872         if (!list_empty(&referenced))
  873                 list_splice(&referenced, &sb->s_dentry_lru);
  874         spin_unlock(&dcache_lru_lock);
  875 
  876         shrink_dentry_list(&tmp);
  877 }
  878 
  879 /**
  880  * shrink_dcache_sb - shrink dcache for a superblock
  881  * @sb: superblock
  882  *
  883  * Shrink the dcache for the specified super block. This is used to free
  884  * the dcache before unmounting a file system.
  885  */
  886 void shrink_dcache_sb(struct super_block *sb)
  887 {
  888         LIST_HEAD(tmp);
  889 
  890         spin_lock(&dcache_lru_lock);
  891         while (!list_empty(&sb->s_dentry_lru)) {
  892                 list_splice_init(&sb->s_dentry_lru, &tmp);
  893                 spin_unlock(&dcache_lru_lock);
  894                 shrink_dentry_list(&tmp);
  895                 spin_lock(&dcache_lru_lock);
  896         }
  897         spin_unlock(&dcache_lru_lock);
  898 }
  899 EXPORT_SYMBOL(shrink_dcache_sb);
  900 
  901 /*
  902  * destroy a single subtree of dentries for unmount
  903  * - see the comments on shrink_dcache_for_umount() for a description of the
  904  *   locking
  905  */
  906 static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
  907 {
  908         struct dentry *parent;
  909 
  910         BUG_ON(!IS_ROOT(dentry));
  911 
  912         for (;;) {
  913                 /* descend to the first leaf in the current subtree */
  914                 while (!list_empty(&dentry->d_subdirs))
  915                         dentry = list_entry(dentry->d_subdirs.next,
  916                                             struct dentry, d_u.d_child);
  917 
  918                 /* consume the dentries from this leaf up through its parents
  919                  * until we find one with children or run out altogether */
  920                 do {
  921                         struct inode *inode;
  922 
  923                         /*
  924                          * remove the dentry from the lru, and inform
  925                          * the fs that this dentry is about to be
  926                          * unhashed and destroyed.
  927                          */
  928                         dentry_lru_prune(dentry);
  929                         __d_shrink(dentry);
  930 
  931                         if (dentry->d_count != 0) {
  932                                 printk(KERN_ERR
  933                                        "BUG: Dentry %p{i=%lx,n=%s}"
  934                                        " still in use (%d)"
  935                                        " [unmount of %s %s]\n",
  936                                        dentry,
  937                                        dentry->d_inode ?
  938                                        dentry->d_inode->i_ino : 0UL,
  939                                        dentry->d_name.name,
  940                                        dentry->d_count,
  941                                        dentry->d_sb->s_type->name,
  942                                        dentry->d_sb->s_id);
  943                                 BUG();
  944                         }
  945 
  946                         if (IS_ROOT(dentry)) {
  947                                 parent = NULL;
  948                                 list_del(&dentry->d_u.d_child);
  949                         } else {
  950                                 parent = dentry->d_parent;
  951                                 parent->d_count--;
  952                                 list_del(&dentry->d_u.d_child);
  953                         }
  954 
  955                         inode = dentry->d_inode;
  956                         if (inode) {
  957                                 dentry->d_inode = NULL;
  958                                 hlist_del_init(&dentry->d_alias);
  959                                 if (dentry->d_op && dentry->d_op->d_iput)
  960                                         dentry->d_op->d_iput(dentry, inode);
  961                                 else
  962                                         iput(inode);
  963                         }
  964 
  965                         d_free(dentry);
  966 
  967                         /* finished when we fall off the top of the tree,
  968                          * otherwise we ascend to the parent and move to the
  969                          * next sibling if there is one */
  970                         if (!parent)
  971                                 return;
  972                         dentry = parent;
  973                 } while (list_empty(&dentry->d_subdirs));
  974 
  975                 dentry = list_entry(dentry->d_subdirs.next,
  976                                     struct dentry, d_u.d_child);
  977         }
  978 }
  979 
  980 /*
  981  * destroy the dentries attached to a superblock on unmounting
  982  * - we don't need to use dentry->d_lock because:
  983  *   - the superblock is detached from all mountings and open files, so the
  984  *     dentry trees will not be rearranged by the VFS
  985  *   - s_umount is write-locked, so the memory pressure shrinker will ignore
  986  *     any dentries belonging to this superblock that it comes across
  987  *   - the filesystem itself is no longer permitted to rearrange the dentries
  988  *     in this superblock
  989  */
  990 void shrink_dcache_for_umount(struct super_block *sb)
  991 {
  992         struct dentry *dentry;
  993 
  994         if (down_read_trylock(&sb->s_umount))
  995                 BUG();
  996 
  997         dentry = sb->s_root;
  998         sb->s_root = NULL;
  999         dentry->d_count--;
 1000         shrink_dcache_for_umount_subtree(dentry);
 1001 
 1002         while (!hlist_bl_empty(&sb->s_anon)) {
 1003                 dentry = hlist_bl_entry(hlist_bl_first(&sb->s_anon), struct dentry, d_hash);
 1004                 shrink_dcache_for_umount_subtree(dentry);
 1005         }
 1006 }
 1007 
 1008 /*
 1009  * This tries to ascend one level of parenthood, but
 1010  * we can race with renaming, so we need to re-check
 1011  * the parenthood after dropping the lock and check
 1012  * that the sequence number still matches.
 1013  */
 1014 static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
 1015 {
 1016         struct dentry *new = old->d_parent;
 1017 
 1018         rcu_read_lock();
 1019         spin_unlock(&old->d_lock);
 1020         spin_lock(&new->d_lock);
 1021 
 1022         /*
 1023          * might go back up the wrong parent if we have had a rename
 1024          * or deletion
 1025          */
 1026         if (new != old->d_parent ||
 1027                  (old->d_flags & DCACHE_DENTRY_KILLED) ||
 1028                  (!locked && read_seqretry(&rename_lock, seq))) {
 1029                 spin_unlock(&new->d_lock);
 1030                 new = NULL;
 1031         }
 1032         rcu_read_unlock();
 1033         return new;
 1034 }
 1035 
 1036 
 1037 /*
 1038  * Search for at least 1 mount point in the dentry's subdirs.
 1039  * We descend to the next level whenever the d_subdirs
 1040  * list is non-empty and continue searching.
 1041  */
 1042  
 1043 /**
 1044  * have_submounts - check for mounts over a dentry
 1045  * @parent: dentry to check.
 1046  *
 1047  * Return true if the parent or its subdirectories contain
 1048  * a mount point
 1049  */
 1050 int have_submounts(struct dentry *parent)
 1051 {
 1052         struct dentry *this_parent;
 1053         struct list_head *next;
 1054         unsigned seq;
 1055         int locked = 0;
 1056 
 1057         seq = read_seqbegin(&rename_lock);
 1058 again:
 1059         this_parent = parent;
 1060 
 1061         if (d_mountpoint(parent))
 1062                 goto positive;
 1063         spin_lock(&this_parent->d_lock);
 1064 repeat:
 1065         next = this_parent->d_subdirs.next;
 1066 resume:
 1067         while (next != &this_parent->d_subdirs) {
 1068                 struct list_head *tmp = next;
 1069                 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 1070                 next = tmp->next;
 1071 
 1072                 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 1073                 /* Have we found a mount point ? */
 1074                 if (d_mountpoint(dentry)) {
 1075                         spin_unlock(&dentry->d_lock);
 1076                         spin_unlock(&this_parent->d_lock);
 1077                         goto positive;
 1078                 }
 1079                 if (!list_empty(&dentry->d_subdirs)) {
 1080                         spin_unlock(&this_parent->d_lock);
 1081                         spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
 1082                         this_parent = dentry;
 1083                         spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
 1084                         goto repeat;
 1085                 }
 1086                 spin_unlock(&dentry->d_lock);
 1087         }
 1088         /*
 1089          * All done at this level ... ascend and resume the search.
 1090          */
 1091         if (this_parent != parent) {
 1092                 struct dentry *child = this_parent;
 1093                 this_parent = try_to_ascend(this_parent, locked, seq);
 1094                 if (!this_parent)
 1095                         goto rename_retry;
 1096                 next = child->d_u.d_child.next;
 1097                 goto resume;
 1098         }
 1099         spin_unlock(&this_parent->d_lock);
 1100         if (!locked && read_seqretry(&rename_lock, seq))
 1101                 goto rename_retry;
 1102         if (locked)
 1103                 write_sequnlock(&rename_lock);
 1104         return 0; /* No mount points found in tree */
 1105 positive:
 1106         if (!locked && read_seqretry(&rename_lock, seq))
 1107                 goto rename_retry;
 1108         if (locked)
 1109                 write_sequnlock(&rename_lock);
 1110         return 1;
 1111 
 1112 rename_retry:
 1113         if (locked)
 1114                 goto again;
 1115         locked = 1;
 1116         write_seqlock(&rename_lock);
 1117         goto again;
 1118 }
 1119 EXPORT_SYMBOL(have_submounts);
 1120 
 1121 /*
 1122  * Search the dentry child list of the specified parent,
 1123  * and move any unused dentries to the end of the unused
 1124  * list for prune_dcache(). We descend to the next level
 1125  * whenever the d_subdirs list is non-empty and continue
 1126  * searching.
 1127  *
 1128  * It returns zero iff there are no unused children,
 1129  * otherwise  it returns the number of children moved to
 1130  * the end of the unused list. This may not be the total
 1131  * number of unused children, because select_parent can
 1132  * drop the lock and return early due to latency
 1133  * constraints.
 1134  */
 1135 static int select_parent(struct dentry *parent, struct list_head *dispose)
 1136 {
 1137         struct dentry *this_parent;
 1138         struct list_head *next;
 1139         unsigned seq;
 1140         int found = 0;
 1141         int locked = 0;
 1142 
 1143         seq = read_seqbegin(&rename_lock);
 1144 again:
 1145         this_parent = parent;
 1146         spin_lock(&this_parent->d_lock);
 1147 repeat:
 1148         next = this_parent->d_subdirs.next;
 1149 resume:
 1150         while (next != &this_parent->d_subdirs) {
 1151                 struct list_head *tmp = next;
 1152                 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 1153                 next = tmp->next;
 1154 
 1155                 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 1156 
 1157                 /*
 1158                  * move only zero ref count dentries to the dispose list.
 1159                  *
 1160                  * Those which are presently on the shrink list, being processed
 1161                  * by shrink_dentry_list(), shouldn't be moved.  Otherwise the
 1162                  * loop in shrink_dcache_parent() might not make any progress
 1163                  * and loop forever.
 1164                  */
 1165                 if (dentry->d_count) {
 1166                         dentry_lru_del(dentry);
 1167                 } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
 1168                         dentry_lru_move_list(dentry, dispose);
 1169                         dentry->d_flags |= DCACHE_SHRINK_LIST;
 1170                         found++;
 1171                 }
 1172                 /*
 1173                  * We can return to the caller if we have found some (this
 1174                  * ensures forward progress). We'll be coming back to find
 1175                  * the rest.
 1176                  */
 1177                 if (found && need_resched()) {
 1178                         spin_unlock(&dentry->d_lock);
 1179                         goto out;
 1180                 }
 1181 
 1182                 /*
 1183                  * Descend a level if the d_subdirs list is non-empty.
 1184                  */
 1185                 if (!list_empty(&dentry->d_subdirs)) {
 1186                         spin_unlock(&this_parent->d_lock);
 1187                         spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
 1188                         this_parent = dentry;
 1189                         spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
 1190                         goto repeat;
 1191                 }
 1192 
 1193                 spin_unlock(&dentry->d_lock);
 1194         }
 1195         /*
 1196          * All done at this level ... ascend and resume the search.
 1197          */
 1198         if (this_parent != parent) {
 1199                 struct dentry *child = this_parent;
 1200                 this_parent = try_to_ascend(this_parent, locked, seq);
 1201                 if (!this_parent)
 1202                         goto rename_retry;
 1203                 next = child->d_u.d_child.next;
 1204                 goto resume;
 1205         }
 1206 out:
 1207         spin_unlock(&this_parent->d_lock);
 1208         if (!locked && read_seqretry(&rename_lock, seq))
 1209                 goto rename_retry;
 1210         if (locked)
 1211                 write_sequnlock(&rename_lock);
 1212         return found;
 1213 
 1214 rename_retry:
 1215         if (found)
 1216                 return found;
 1217         if (locked)
 1218                 goto again;
 1219         locked = 1;
 1220         write_seqlock(&rename_lock);
 1221         goto again;
 1222 }
 1223 
 1224 /**
 1225  * shrink_dcache_parent - prune dcache
 1226  * @parent: parent of entries to prune
 1227  *
 1228  * Prune the dcache to remove unused children of the parent dentry.
 1229  */
 1230 void shrink_dcache_parent(struct dentry * parent)
 1231 {
 1232         LIST_HEAD(dispose);
 1233         int found;
 1234 
 1235         while ((found = select_parent(parent, &dispose)) != 0)
 1236                 shrink_dentry_list(&dispose);
 1237 }
 1238 EXPORT_SYMBOL(shrink_dcache_parent);
 1239 
 1240 /**
 1241  * __d_alloc    -       allocate a dcache entry
 1242  * @sb: filesystem it will belong to
 1243  * @name: qstr of the name
 1244  *
 1245  * Allocates a dentry. It returns %NULL if there is insufficient memory
 1246  * available. On a success the dentry is returned. The name passed in is
 1247  * copied and the copy passed in may be reused after this call.
 1248  */
 1249  
 1250 struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 1251 {
 1252         struct dentry *dentry;
 1253         char *dname;
 1254 
 1255         dentry = kmem_cache_alloc(dentry_cache, GFP_KERNEL);
 1256         if (!dentry)
 1257                 return NULL;
 1258 
 1259         /*
 1260          * We guarantee that the inline name is always NUL-terminated.
 1261          * This way the memcpy() done by the name switching in rename
 1262          * will still always have a NUL at the end, even if we might
 1263          * be overwriting an internal NUL character
 1264          */
 1265         dentry->d_iname[DNAME_INLINE_LEN-1] = 0;
 1266         if (name->len > DNAME_INLINE_LEN-1) {
 1267                 dname = kmalloc(name->len + 1, GFP_KERNEL);
 1268                 if (!dname) {
 1269                         kmem_cache_free(dentry_cache, dentry); 
 1270                         return NULL;
 1271                 }
 1272         } else  {
 1273                 dname = dentry->d_iname;
 1274         }       
 1275 
 1276         dentry->d_name.len = name->len;
 1277         dentry->d_name.hash = name->hash;
 1278         memcpy(dname, name->name, name->len);
 1279         dname[name->len] = 0;
 1280 
 1281         /* Make sure we always see the terminating NUL character */
 1282         smp_wmb();
 1283         dentry->d_name.name = dname;
 1284 
 1285         dentry->d_count = 1;
 1286         dentry->d_flags = 0;
 1287         spin_lock_init(&dentry->d_lock);
 1288         seqcount_init(&dentry->d_seq);
 1289         dentry->d_inode = NULL;
 1290         dentry->d_parent = dentry;
 1291         dentry->d_sb = sb;
 1292         dentry->d_op = NULL;
 1293         dentry->d_fsdata = NULL;
 1294         INIT_HLIST_BL_NODE(&dentry->d_hash);
 1295         INIT_LIST_HEAD(&dentry->d_lru);
 1296         INIT_LIST_HEAD(&dentry->d_subdirs);
 1297         INIT_HLIST_NODE(&dentry->d_alias);
 1298         INIT_LIST_HEAD(&dentry->d_u.d_child);
 1299         d_set_d_op(dentry, dentry->d_sb->s_d_op);
 1300 
 1301         this_cpu_inc(nr_dentry);
 1302 
 1303         return dentry;
 1304 }
 1305 
 1306 /**
 1307  * d_alloc      -       allocate a dcache entry
 1308  * @parent: parent of entry to allocate
 1309  * @name: qstr of the name
 1310  *
 1311  * Allocates a dentry. It returns %NULL if there is insufficient memory
 1312  * available. On a success the dentry is returned. The name passed in is
 1313  * copied and the copy passed in may be reused after this call.
 1314  */
 1315 struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 1316 {
 1317         struct dentry *dentry = __d_alloc(parent->d_sb, name);
 1318         if (!dentry)
 1319                 return NULL;
 1320 
 1321         spin_lock(&parent->d_lock);
 1322         /*
 1323          * don't need child lock because it is not subject
 1324          * to concurrency here
 1325          */
 1326         __dget_dlock(parent);
 1327         dentry->d_parent = parent;
 1328         list_add(&dentry->d_u.d_child, &parent->d_subdirs);
 1329         spin_unlock(&parent->d_lock);
 1330 
 1331         return dentry;
 1332 }
 1333 EXPORT_SYMBOL(d_alloc);
 1334 
 1335 struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
 1336 {
 1337         struct dentry *dentry = __d_alloc(sb, name);
 1338         if (dentry)
 1339                 dentry->d_flags |= DCACHE_DISCONNECTED;
 1340         return dentry;
 1341 }
 1342 EXPORT_SYMBOL(d_alloc_pseudo);
 1343 
 1344 struct dentry *d_alloc_name(struct dentry *parent, const char *name)
 1345 {
 1346         struct qstr q;
 1347 
 1348         q.name = name;
 1349         q.len = strlen(name);
 1350         q.hash = full_name_hash(q.name, q.len);
 1351         return d_alloc(parent, &q);
 1352 }
 1353 EXPORT_SYMBOL(d_alloc_name);
 1354 
 1355 void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
 1356 {
 1357         WARN_ON_ONCE(dentry->d_op);
 1358         WARN_ON_ONCE(dentry->d_flags & (DCACHE_OP_HASH  |
 1359                                 DCACHE_OP_COMPARE       |
 1360                                 DCACHE_OP_REVALIDATE    |
 1361                                 DCACHE_OP_DELETE ));
 1362         dentry->d_op = op;
 1363         if (!op)
 1364                 return;
 1365         if (op->d_hash)
 1366                 dentry->d_flags |= DCACHE_OP_HASH;
 1367         if (op->d_compare)
 1368                 dentry->d_flags |= DCACHE_OP_COMPARE;
 1369         if (op->d_revalidate)
 1370                 dentry->d_flags |= DCACHE_OP_REVALIDATE;
 1371         if (op->d_delete)
 1372                 dentry->d_flags |= DCACHE_OP_DELETE;
 1373         if (op->d_prune)
 1374                 dentry->d_flags |= DCACHE_OP_PRUNE;
 1375 
 1376 }
 1377 EXPORT_SYMBOL(d_set_d_op);
 1378 
 1379 static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 1380 {
 1381         spin_lock(&dentry->d_lock);
 1382         if (inode) {
 1383                 if (unlikely(IS_AUTOMOUNT(inode)))
 1384                         dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
 1385                 hlist_add_head(&dentry->d_alias, &inode->i_dentry);
 1386         }
 1387         dentry->d_inode = inode;
 1388         dentry_rcuwalk_barrier(dentry);
 1389         spin_unlock(&dentry->d_lock);
 1390         fsnotify_d_instantiate(dentry, inode);
 1391 }
 1392 
 1393 /**
 1394  * d_instantiate - fill in inode information for a dentry
 1395  * @entry: dentry to complete
 1396  * @inode: inode to attach to this dentry
 1397  *
 1398  * Fill in inode information in the entry.
 1399  *
 1400  * This turns negative dentries into productive full members
 1401  * of society.
 1402  *
 1403  * NOTE! This assumes that the inode count has been incremented
 1404  * (or otherwise set) by the caller to indicate that it is now
 1405  * in use by the dcache.
 1406  */
 1407  
 1408 void d_instantiate(struct dentry *entry, struct inode * inode)
 1409 {
 1410         BUG_ON(!hlist_unhashed(&entry->d_alias));
 1411         if (inode)
 1412                 spin_lock(&inode->i_lock);
 1413         __d_instantiate(entry, inode);
 1414         if (inode)
 1415                 spin_unlock(&inode->i_lock);
 1416         security_d_instantiate(entry, inode);
 1417 }
 1418 EXPORT_SYMBOL(d_instantiate);
 1419 
 1420 /**
 1421  * d_instantiate_unique - instantiate a non-aliased dentry
 1422  * @entry: dentry to instantiate
 1423  * @inode: inode to attach to this dentry
 1424  *
 1425  * Fill in inode information in the entry. On success, it returns NULL.
 1426  * If an unhashed alias of "entry" already exists, then we return the
 1427  * aliased dentry instead and drop one reference to inode.
 1428  *
 1429  * Note that in order to avoid conflicts with rename() etc, the caller
 1430  * had better be holding the parent directory semaphore.
 1431  *
 1432  * This also assumes that the inode count has been incremented
 1433  * (or otherwise set) by the caller to indicate that it is now
 1434  * in use by the dcache.
 1435  */
 1436 static struct dentry *__d_instantiate_unique(struct dentry *entry,
 1437                                              struct inode *inode)
 1438 {
 1439         struct dentry *alias;
 1440         int len = entry->d_name.len;
 1441         const char *name = entry->d_name.name;
 1442         unsigned int hash = entry->d_name.hash;
 1443         struct hlist_node *p;
 1444 
 1445         if (!inode) {
 1446                 __d_instantiate(entry, NULL);
 1447                 return NULL;
 1448         }
 1449 
 1450         hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
 1451                 /*
 1452                  * Don't need alias->d_lock here, because aliases with
 1453                  * d_parent == entry->d_parent are not subject to name or
 1454                  * parent changes, because the parent inode i_mutex is held.
 1455                  */
 1456                 if (alias->d_name.hash != hash)
 1457                         continue;
 1458                 if (alias->d_parent != entry->d_parent)
 1459                         continue;
 1460                 if (alias->d_name.len != len)
 1461                         continue;
 1462                 if (dentry_cmp(alias, name, len))
 1463                         continue;
 1464                 __dget(alias);
 1465                 return alias;
 1466         }
 1467 
 1468         __d_instantiate(entry, inode);
 1469         return NULL;
 1470 }
 1471 
 1472 struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 1473 {
 1474         struct dentry *result;
 1475 
 1476         BUG_ON(!hlist_unhashed(&entry->d_alias));
 1477 
 1478         if (inode)
 1479                 spin_lock(&inode->i_lock);
 1480         result = __d_instantiate_unique(entry, inode);
 1481         if (inode)
 1482                 spin_unlock(&inode->i_lock);
 1483 
 1484         if (!result) {
 1485                 security_d_instantiate(entry, inode);
 1486                 return NULL;
 1487         }
 1488 
 1489         BUG_ON(!d_unhashed(result));
 1490         iput(inode);
 1491         return result;
 1492 }
 1493 
 1494 EXPORT_SYMBOL(d_instantiate_unique);
 1495 
 1496 struct dentry *d_make_root(struct inode *root_inode)
 1497 {
 1498         struct dentry *res = NULL;
 1499 
 1500         if (root_inode) {
 1501                 static const struct qstr name = QSTR_INIT("/", 1);
 1502 
 1503                 res = __d_alloc(root_inode->i_sb, &name);
 1504                 if (res)
 1505                         d_instantiate(res, root_inode);
 1506                 else
 1507                         iput(root_inode);
 1508         }
 1509         return res;
 1510 }
 1511 EXPORT_SYMBOL(d_make_root);
 1512 
 1513 static struct dentry * __d_find_any_alias(struct inode *inode)
 1514 {
 1515         struct dentry *alias;
 1516 
 1517         if (hlist_empty(&inode->i_dentry))
 1518                 return NULL;
 1519         alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
 1520         __dget(alias);
 1521         return alias;
 1522 }
 1523 
 1524 /**
 1525  * d_find_any_alias - find any alias for a given inode
 1526  * @inode: inode to find an alias for
 1527  *
 1528  * If any aliases exist for the given inode, take and return a
 1529  * reference for one of them.  If no aliases exist, return %NULL.
 1530  */
 1531 struct dentry *d_find_any_alias(struct inode *inode)
 1532 {
 1533         struct dentry *de;
 1534 
 1535         spin_lock(&inode->i_lock);
 1536         de = __d_find_any_alias(inode);
 1537         spin_unlock(&inode->i_lock);
 1538         return de;
 1539 }
 1540 EXPORT_SYMBOL(d_find_any_alias);
 1541 
 1542 /**
 1543  * d_obtain_alias - find or allocate a dentry for a given inode
 1544  * @inode: inode to allocate the dentry for
 1545  *
 1546  * Obtain a dentry for an inode resulting from NFS filehandle conversion or
 1547  * similar open by handle operations.  The returned dentry may be anonymous,
 1548  * or may have a full name (if the inode was already in the cache).
 1549  *
 1550  * When called on a directory inode, we must ensure that the inode only ever
 1551  * has one dentry.  If a dentry is found, that is returned instead of
 1552  * allocating a new one.
 1553  *
 1554  * On successful return, the reference to the inode has been transferred
 1555  * to the dentry.  In case of an error the reference on the inode is released.
 1556  * To make it easier to use in export operations a %NULL or IS_ERR inode may
 1557  * be passed in and will be the error will be propagate to the return value,
 1558  * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
 1559  */
 1560 struct dentry *d_obtain_alias(struct inode *inode)
 1561 {
 1562         static const struct qstr anonstring = QSTR_INIT("/", 1);
 1563         struct dentry *tmp;
 1564         struct dentry *res;
 1565 
 1566         if (!inode)
 1567                 return ERR_PTR(-ESTALE);
 1568         if (IS_ERR(inode))
 1569                 return ERR_CAST(inode);
 1570 
 1571         res = d_find_any_alias(inode);
 1572         if (res)
 1573                 goto out_iput;
 1574 
 1575         tmp = __d_alloc(inode->i_sb, &anonstring);
 1576         if (!tmp) {
 1577                 res = ERR_PTR(-ENOMEM);
 1578                 goto out_iput;
 1579         }
 1580 
 1581         spin_lock(&inode->i_lock);
 1582         res = __d_find_any_alias(inode);
 1583         if (res) {
 1584                 spin_unlock(&inode->i_lock);
 1585                 dput(tmp);
 1586                 goto out_iput;
 1587         }
 1588 
 1589         /* attach a disconnected dentry */
 1590         spin_lock(&tmp->d_lock);
 1591         tmp->d_inode = inode;
 1592         tmp->d_flags |= DCACHE_DISCONNECTED;
 1593         hlist_add_head(&tmp->d_alias, &inode->i_dentry);
 1594         hlist_bl_lock(&tmp->d_sb->s_anon);
 1595         hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
 1596         hlist_bl_unlock(&tmp->d_sb->s_anon);
 1597         spin_unlock(&tmp->d_lock);
 1598         spin_unlock(&inode->i_lock);
 1599         security_d_instantiate(tmp, inode);
 1600 
 1601         return tmp;
 1602 
 1603  out_iput:
 1604         if (res && !IS_ERR(res))
 1605                 security_d_instantiate(res, inode);
 1606         iput(inode);
 1607         return res;
 1608 }
 1609 EXPORT_SYMBOL(d_obtain_alias);
 1610 
 1611 /**
 1612  * d_splice_alias - splice a disconnected dentry into the tree if one exists
 1613  * @inode:  the inode which may have a disconnected dentry
 1614  * @dentry: a negative dentry which we want to point to the inode.
 1615  *
 1616  * If inode is a directory and has a 'disconnected' dentry (i.e. IS_ROOT and
 1617  * DCACHE_DISCONNECTED), then d_move that in place of the given dentry
 1618  * and return it, else simply d_add the inode to the dentry and return NULL.
 1619  *
 1620  * This is needed in the lookup routine of any filesystem that is exportable
 1621  * (via knfsd) so that we can build dcache paths to directories effectively.
 1622  *
 1623  * If a dentry was found and moved, then it is returned.  Otherwise NULL
 1624  * is returned.  This matches the expected return value of ->lookup.
 1625  *
 1626  */
 1627 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 1628 {
 1629         struct dentry *new = NULL;
 1630 
 1631         if (IS_ERR(inode))
 1632                 return ERR_CAST(inode);
 1633 
 1634         if (inode && S_ISDIR(inode->i_mode)) {
 1635                 spin_lock(&inode->i_lock);
 1636                 new = __d_find_alias(inode, 1);
 1637                 if (new) {
 1638                         BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
 1639                         spin_unlock(&inode->i_lock);
 1640                         security_d_instantiate(new, inode);
 1641                         d_move(new, dentry);
 1642                         iput(inode);
 1643                 } else {
 1644                         /* already taking inode->i_lock, so d_add() by hand */
 1645                         __d_instantiate(dentry, inode);
 1646                         spin_unlock(&inode->i_lock);
 1647                         security_d_instantiate(dentry, inode);
 1648                         d_rehash(dentry);
 1649                 }
 1650         } else
 1651                 d_add(dentry, inode);
 1652         return new;
 1653 }
 1654 EXPORT_SYMBOL(d_splice_alias);
 1655 
 1656 /**
 1657  * d_add_ci - lookup or allocate new dentry with case-exact name
 1658  * @inode:  the inode case-insensitive lookup has found
 1659  * @dentry: the negative dentry that was passed to the parent's lookup func
 1660  * @name:   the case-exact name to be associated with the returned dentry
 1661  *
 1662  * This is to avoid filling the dcache with case-insensitive names to the
 1663  * same inode, only the actual correct case is stored in the dcache for
 1664  * case-insensitive filesystems.
 1665  *
 1666  * For a case-insensitive lookup match and if the the case-exact dentry
 1667  * already exists in in the dcache, use it and return it.
 1668  *
 1669  * If no entry exists with the exact case name, allocate new dentry with
 1670  * the exact case, and return the spliced entry.
 1671  */
 1672 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 1673                         struct qstr *name)
 1674 {
 1675         int error;
 1676         struct dentry *found;
 1677         struct dentry *new;
 1678 
 1679         /*
 1680          * First check if a dentry matching the name already exists,
 1681          * if not go ahead and create it now.
 1682          */
 1683         found = d_hash_and_lookup(dentry->d_parent, name);
 1684         if (!found) {
 1685                 new = d_alloc(dentry->d_parent, name);
 1686                 if (!new) {
 1687                         error = -ENOMEM;
 1688                         goto err_out;
 1689                 }
 1690 
 1691                 found = d_splice_alias(inode, new);
 1692                 if (found) {
 1693                         dput(new);
 1694                         return found;
 1695                 }
 1696                 return new;
 1697         }
 1698 
 1699         /*
 1700          * If a matching dentry exists, and it's not negative use it.
 1701          *
 1702          * Decrement the reference count to balance the iget() done
 1703          * earlier on.
 1704          */
 1705         if (found->d_inode) {
 1706                 if (unlikely(found->d_inode != inode)) {
 1707                         /* This can't happen because bad inodes are unhashed. */
 1708                         BUG_ON(!is_bad_inode(inode));
 1709                         BUG_ON(!is_bad_inode(found->d_inode));
 1710                 }
 1711                 iput(inode);
 1712                 return found;
 1713         }
 1714 
 1715         /*
 1716          * Negative dentry: instantiate it unless the inode is a directory and
 1717          * already has a dentry.
 1718          */
 1719         new = d_splice_alias(inode, found);
 1720         if (new) {
 1721                 dput(found);
 1722                 found = new;
 1723         }
 1724         return found;
 1725 
 1726 err_out:
 1727         iput(inode);
 1728         return ERR_PTR(error);
 1729 }
 1730 EXPORT_SYMBOL(d_add_ci);
 1731 
 1732 /*
 1733  * Do the slow-case of the dentry name compare.
 1734  *
 1735  * Unlike the dentry_cmp() function, we need to atomically
 1736  * load the name, length and inode information, so that the
 1737  * filesystem can rely on them, and can use the 'name' and
 1738  * 'len' information without worrying about walking off the
 1739  * end of memory etc.
 1740  *
 1741  * Thus the read_seqcount_retry() and the "duplicate" info
 1742  * in arguments (the low-level filesystem should not look
 1743  * at the dentry inode or name contents directly, since
 1744  * rename can change them while we're in RCU mode).
 1745  */
 1746 enum slow_d_compare {
 1747         D_COMP_OK,
 1748         D_COMP_NOMATCH,
 1749         D_COMP_SEQRETRY,
 1750 };
 1751 
 1752 static noinline enum slow_d_compare slow_dentry_cmp(
 1753                 const struct dentry *parent,
 1754                 struct inode *inode,
 1755                 struct dentry *dentry,
 1756                 unsigned int seq,
 1757                 const struct qstr *name)
 1758 {
 1759         int tlen = dentry->d_name.len;
 1760         const char *tname = dentry->d_name.name;
 1761         struct inode *i = dentry->d_inode;
 1762 
 1763         if (read_seqcount_retry(&dentry->d_seq, seq)) {
 1764                 cpu_relax();
 1765                 return D_COMP_SEQRETRY;
 1766         }
 1767         if (parent->d_op->d_compare(parent, inode,
 1768                                 dentry, i,
 1769                                 tlen, tname, name))
 1770                 return D_COMP_NOMATCH;
 1771         return D_COMP_OK;
 1772 }
 1773 
 1774 /**
 1775  * __d_lookup_rcu - search for a dentry (racy, store-free)
 1776  * @parent: parent dentry
 1777  * @name: qstr of name we wish to find
 1778  * @seqp: returns d_seq value at the point where the dentry was found
 1779  * @inode: returns dentry->d_inode when the inode was found valid.
 1780  * Returns: dentry, or NULL
 1781  *
 1782  * __d_lookup_rcu is the dcache lookup function for rcu-walk name
 1783  * resolution (store-free path walking) design described in
 1784  * Documentation/filesystems/path-lookup.txt.
 1785  *
 1786  * This is not to be used outside core vfs.
 1787  *
 1788  * __d_lookup_rcu must only be used in rcu-walk mode, ie. with vfsmount lock
 1789  * held, and rcu_read_lock held. The returned dentry must not be stored into
 1790  * without taking d_lock and checking d_seq sequence count against @seq
 1791  * returned here.
 1792  *
 1793  * A refcount may be taken on the found dentry with the __d_rcu_to_refcount
 1794  * function.
 1795  *
 1796  * Alternatively, __d_lookup_rcu may be called again to look up the child of
 1797  * the returned dentry, so long as its parent's seqlock is checked after the
 1798  * child is looked up. Thus, an interlocking stepping of sequence lock checks
 1799  * is formed, giving integrity down the path walk.
 1800  *
 1801  * NOTE! The caller *has* to check the resulting dentry against the sequence
 1802  * number we've returned before using any of the resulting dentry state!
 1803  */
 1804 struct dentry *__d_lookup_rcu(const struct dentry *parent,
 1805                                 const struct qstr *name,
 1806                                 unsigned *seqp, struct inode *inode)
 1807 {
 1808         u64 hashlen = name->hash_len;
 1809         const unsigned char *str = name->name;
 1810         struct hlist_bl_head *b = d_hash(parent, hashlen_hash(hashlen));
 1811         struct hlist_bl_node *node;
 1812         struct dentry *dentry;
 1813 
 1814         /*
 1815          * Note: There is significant duplication with __d_lookup_rcu which is
 1816          * required to prevent single threaded performance regressions
 1817          * especially on architectures where smp_rmb (in seqcounts) are costly.
 1818          * Keep the two functions in sync.
 1819          */
 1820 
 1821         /*
 1822          * The hash list is protected using RCU.
 1823          *
 1824          * Carefully use d_seq when comparing a candidate dentry, to avoid
 1825          * races with d_move().
 1826          *
 1827          * It is possible that concurrent renames can mess up our list
 1828          * walk here and result in missing our dentry, resulting in the
 1829          * false-negative result. d_lookup() protects against concurrent
 1830          * renames using rename_lock seqlock.
 1831          *
 1832          * See Documentation/filesystems/path-lookup.txt for more details.
 1833          */
 1834         hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
 1835                 unsigned seq;
 1836 
 1837 seqretry:
 1838                 /*
 1839                  * The dentry sequence count protects us from concurrent
 1840                  * renames, and thus protects inode, parent and name fields.
 1841                  *
 1842                  * The caller must perform a seqcount check in order
 1843                  * to do anything useful with the returned dentry,
 1844                  * including using the 'd_inode' pointer.
 1845                  *
 1846                  * NOTE! We do a "raw" seqcount_begin here. That means that
 1847                  * we don't wait for the sequence count to stabilize if it
 1848                  * is in the middle of a sequence change. If we do the slow
 1849                  * dentry compare, we will do seqretries until it is stable,
 1850                  * and if we end up with a successful lookup, we actually
 1851                  * want to exit RCU lookup anyway.
 1852                  */
 1853                 seq = raw_seqcount_begin(&dentry->d_seq);
 1854                 if (dentry->d_parent != parent)
 1855                         continue;
 1856                 if (d_unhashed(dentry))
 1857                         continue;
 1858                 *seqp = seq;
 1859 
 1860                 if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
 1861                         if (dentry->d_name.hash != hashlen_hash(hashlen))
 1862                                 continue;
 1863                         switch (slow_dentry_cmp(parent, inode, dentry, seq, name)) {
 1864                         case D_COMP_OK:
 1865                                 return dentry;
 1866                         case D_COMP_NOMATCH:
 1867                                 continue;
 1868                         default:
 1869                                 goto seqretry;
 1870                         }
 1871                 }
 1872 
 1873                 if (dentry->d_name.hash_len != hashlen)
 1874                         continue;
 1875                 if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
 1876                         return dentry;
 1877         }
 1878         return NULL;
 1879 }
 1880 
 1881 /**
 1882  * d_lookup - search for a dentry
 1883  * @parent: parent dentry
 1884  * @name: qstr of name we wish to find
 1885  * Returns: dentry, or NULL
 1886  *
 1887  * d_lookup searches the children of the parent dentry for the name in
 1888  * question. If the dentry is found its reference count is incremented and the
 1889  * dentry is returned. The caller must use dput to free the entry when it has
 1890  * finished using it. %NULL is returned if the dentry does not exist.
 1891  */
 1892 struct dentry *d_lookup(struct dentry *parent, struct qstr *name)
 1893 {
 1894         struct dentry *dentry;
 1895         unsigned seq;
 1896 
 1897         do {
 1898                 seq = read_seqbegin(&rename_lock);
 1899                 dentry = __d_lookup(parent, name);
 1900                 if (dentry)
 1901                         break;
 1902         } while (read_seqretry(&rename_lock, seq));
 1903         return dentry;
 1904 }
 1905 EXPORT_SYMBOL(d_lookup);
 1906 
 1907 /**
 1908  * __d_lookup - search for a dentry (racy)
 1909  * @parent: parent dentry
 1910  * @name: qstr of name we wish to find
 1911  * Returns: dentry, or NULL
 1912  *
 1913  * __d_lookup is like d_lookup, however it may (rarely) return a
 1914  * false-negative result due to unrelated rename activity.
 1915  *
 1916  * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
 1917  * however it must be used carefully, eg. with a following d_lookup in
 1918  * the case of failure.
 1919  *
 1920  * __d_lookup callers must be commented.
 1921  */
 1922 struct dentry *__d_lookup(struct dentry *parent, struct qstr *name)
 1923 {
 1924         unsigned int len = name->len;
 1925         unsigned int hash = name->hash;
 1926         const unsigned char *str = name->name;
 1927         struct hlist_bl_head *b = d_hash(parent, hash);
 1928         struct hlist_bl_node *node;
 1929         struct dentry *found = NULL;
 1930         struct dentry *dentry;
 1931 
 1932         /*
 1933          * Note: There is significant duplication with __d_lookup_rcu which is
 1934          * required to prevent single threaded performance regressions
 1935          * especially on architectures where smp_rmb (in seqcounts) are costly.
 1936          * Keep the two functions in sync.
 1937          */
 1938 
 1939         /*
 1940          * The hash list is protected using RCU.
 1941          *
 1942          * Take d_lock when comparing a candidate dentry, to avoid races
 1943          * with d_move().
 1944          *
 1945          * It is possible that concurrent renames can mess up our list
 1946          * walk here and result in missing our dentry, resulting in the
 1947          * false-negative result. d_lookup() protects against concurrent
 1948          * renames using rename_lock seqlock.
 1949          *
 1950          * See Documentation/filesystems/path-lookup.txt for more details.
 1951          */
 1952         rcu_read_lock();
 1953         
 1954         hlist_bl_for_each_entry_rcu(dentry, node, b, d_hash) {
 1955 
 1956                 if (dentry->d_name.hash != hash)
 1957                         continue;
 1958 
 1959                 spin_lock(&dentry->d_lock);
 1960                 if (dentry->d_parent != parent)
 1961                         goto next;
 1962                 if (d_unhashed(dentry))
 1963                         goto next;
 1964 
 1965                 /*
 1966                  * It is safe to compare names since d_move() cannot
 1967                  * change the qstr (protected by d_lock).
 1968                  */
 1969                 if (parent->d_flags & DCACHE_OP_COMPARE) {
 1970                         int tlen = dentry->d_name.len;
 1971                         const char *tname = dentry->d_name.name;
 1972                         if (parent->d_op->d_compare(parent, parent->d_inode,
 1973                                                 dentry, dentry->d_inode,
 1974                                                 tlen, tname, name))
 1975                                 goto next;
 1976                 } else {
 1977                         if (dentry->d_name.len != len)
 1978                                 goto next;
 1979                         if (dentry_cmp(dentry, str, len))
 1980                                 goto next;
 1981                 }
 1982 
 1983                 dentry->d_count++;
 1984                 found = dentry;
 1985                 spin_unlock(&dentry->d_lock);
 1986                 break;
 1987 next:
 1988                 spin_unlock(&dentry->d_lock);
 1989         }
 1990         rcu_read_unlock();
 1991 
 1992         return found;
 1993 }
 1994 
 1995 /**
 1996  * d_hash_and_lookup - hash the qstr then search for a dentry
 1997  * @dir: Directory to search in
 1998  * @name: qstr of name we wish to find
 1999  *
 2000  * On hash failure or on lookup failure NULL is returned.
 2001  */
 2002 struct dentry *d_hash_and_lookup(struct dentry *dir, struct qstr *name)
 2003 {
 2004         struct dentry *dentry = NULL;
 2005 
 2006         /*
 2007          * Check for a fs-specific hash function. Note that we must
 2008          * calculate the standard hash first, as the d_op->d_hash()
 2009          * routine may choose to leave the hash value unchanged.
 2010          */
 2011         name->hash = full_name_hash(name->name, name->len);
 2012         if (dir->d_flags & DCACHE_OP_HASH) {
 2013                 if (dir->d_op->d_hash(dir, dir->d_inode, name) < 0)
 2014                         goto out;
 2015         }
 2016         dentry = d_lookup(dir, name);
 2017 out:
 2018         return dentry;
 2019 }
 2020 
 2021 /**
 2022  * d_validate - verify dentry provided from insecure source (deprecated)
 2023  * @dentry: The dentry alleged to be valid child of @dparent
 2024  * @dparent: The parent dentry (known to be valid)
 2025  *
 2026  * An insecure source has sent us a dentry, here we verify it and dget() it.
 2027  * This is used by ncpfs in its readdir implementation.
 2028  * Zero is returned in the dentry is invalid.
 2029  *
 2030  * This function is slow for big directories, and deprecated, do not use it.
 2031  */
 2032 int d_validate(struct dentry *dentry, struct dentry *dparent)
 2033 {
 2034         struct dentry *child;
 2035 
 2036         spin_lock(&dparent->d_lock);
 2037         list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
 2038                 if (dentry == child) {
 2039                         spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 2040                         __dget_dlock(dentry);
 2041                         spin_unlock(&dentry->d_lock);
 2042                         spin_unlock(&dparent->d_lock);
 2043                         return 1;
 2044                 }
 2045         }
 2046         spin_unlock(&dparent->d_lock);
 2047 
 2048         return 0;
 2049 }
 2050 EXPORT_SYMBOL(d_validate);
 2051 
 2052 /*
 2053  * When a file is deleted, we have two options:
 2054  * - turn this dentry into a negative dentry
 2055  * - unhash this dentry and free it.
 2056  *
 2057  * Usually, we want to just turn this into
 2058  * a negative dentry, but if anybody else is
 2059  * currently using the dentry or the inode
 2060  * we can't do that and we fall back on removing
 2061  * it from the hash queues and waiting for
 2062  * it to be deleted later when it has no users
 2063  */
 2064  
 2065 /**
 2066  * d_delete - delete a dentry
 2067  * @dentry: The dentry to delete
 2068  *
 2069  * Turn the dentry into a negative dentry if possible, otherwise
 2070  * remove it from the hash queues so it can be deleted later
 2071  */
 2072  
 2073 void d_delete(struct dentry * dentry)
 2074 {
 2075         struct inode *inode;
 2076         int isdir = 0;
 2077         /*
 2078          * Are we the only user?
 2079          */
 2080 again:
 2081         spin_lock(&dentry->d_lock);
 2082         inode = dentry->d_inode;
 2083         isdir = S_ISDIR(inode->i_mode);
 2084         if (dentry->d_count == 1) {
 2085                 if (!spin_trylock(&inode->i_lock)) {
 2086                         spin_unlock(&dentry->d_lock);
 2087                         cpu_relax();
 2088                         goto again;
 2089                 }
 2090                 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
 2091                 dentry_unlink_inode(dentry);
 2092                 fsnotify_nameremove(dentry, isdir);
 2093                 return;
 2094         }
 2095 
 2096         if (!d_unhashed(dentry))
 2097                 __d_drop(dentry);
 2098 
 2099         spin_unlock(&dentry->d_lock);
 2100 
 2101         fsnotify_nameremove(dentry, isdir);
 2102 }
 2103 EXPORT_SYMBOL(d_delete);
 2104 
 2105 static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
 2106 {
 2107         BUG_ON(!d_unhashed(entry));
 2108         hlist_bl_lock(b);
 2109         entry->d_flags |= DCACHE_RCUACCESS;
 2110         hlist_bl_add_head_rcu(&entry->d_hash, b);
 2111         hlist_bl_unlock(b);
 2112 }
 2113 
 2114 static void _d_rehash(struct dentry * entry)
 2115 {
 2116         __d_rehash(entry, d_hash(entry->d_parent, entry->d_name.hash));
 2117 }
 2118 
 2119 /**
 2120  * d_rehash     - add an entry back to the hash
 2121  * @entry: dentry to add to the hash
 2122  *
 2123  * Adds a dentry to the hash according to its name.
 2124  */
 2125  
 2126 void d_rehash(struct dentry * entry)
 2127 {
 2128         spin_lock(&entry->d_lock);
 2129         _d_rehash(entry);
 2130         spin_unlock(&entry->d_lock);
 2131 }
 2132 EXPORT_SYMBOL(d_rehash);
 2133 
 2134 /**
 2135  * dentry_update_name_case - update case insensitive dentry with a new name
 2136  * @dentry: dentry to be updated
 2137  * @name: new name
 2138  *
 2139  * Update a case insensitive dentry with new case of name.
 2140  *
 2141  * dentry must have been returned by d_lookup with name @name. Old and new
 2142  * name lengths must match (ie. no d_compare which allows mismatched name
 2143  * lengths).
 2144  *
 2145  * Parent inode i_mutex must be held over d_lookup and into this call (to
 2146  * keep renames and concurrent inserts, and readdir(2) away).
 2147  */
 2148 void dentry_update_name_case(struct dentry *dentry, struct qstr *name)
 2149 {
 2150         BUG_ON(!mutex_is_locked(&dentry->d_parent->d_inode->i_mutex));
 2151         BUG_ON(dentry->d_name.len != name->len); /* d_lookup gives this */
 2152 
 2153         spin_lock(&dentry->d_lock);
 2154         write_seqcount_begin(&dentry->d_seq);
 2155         memcpy((unsigned char *)dentry->d_name.name, name->name, name->len);
 2156         write_seqcount_end(&dentry->d_seq);
 2157         spin_unlock(&dentry->d_lock);
 2158 }
 2159 EXPORT_SYMBOL(dentry_update_name_case);
 2160 
 2161 static void switch_names(struct dentry *dentry, struct dentry *target)
 2162 {
 2163         if (dname_external(target)) {
 2164                 if (dname_external(dentry)) {
 2165                         /*
 2166                          * Both external: swap the pointers
 2167                          */
 2168                         swap(target->d_name.name, dentry->d_name.name);
 2169                 } else {
 2170                         /*
 2171                          * dentry:internal, target:external.  Steal target's
 2172                          * storage and make target internal.
 2173                          */
 2174                         memcpy(target->d_iname, dentry->d_name.name,
 2175                                         dentry->d_name.len + 1);
 2176                         dentry->d_name.name = target->d_name.name;
 2177                         target->d_name.name = target->d_iname;
 2178                 }
 2179         } else {
 2180                 if (dname_external(dentry)) {
 2181                         /*
 2182                          * dentry:external, target:internal.  Give dentry's
 2183                          * storage to target and make dentry internal
 2184                          */
 2185                         memcpy(dentry->d_iname, target->d_name.name,
 2186                                         target->d_name.len + 1);
 2187                         target->d_name.name = dentry->d_name.name;
 2188                         dentry->d_name.name = dentry->d_iname;
 2189                 } else {
 2190                         /*
 2191                          * Both are internal.  Just copy target to dentry
 2192                          */
 2193                         memcpy(dentry->d_iname, target->d_name.name,
 2194                                         target->d_name.len + 1);
 2195                         dentry->d_name.len = target->d_name.len;
 2196                         return;
 2197                 }
 2198         }
 2199         swap(dentry->d_name.len, target->d_name.len);
 2200 }
 2201 
 2202 static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
 2203 {
 2204         /*
 2205          * XXXX: do we really need to take target->d_lock?
 2206          */
 2207         if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
 2208                 spin_lock(&target->d_parent->d_lock);
 2209         else {
 2210                 if (d_ancestor(dentry->d_parent, target->d_parent)) {
 2211                         spin_lock(&dentry->d_parent->d_lock);
 2212                         spin_lock_nested(&target->d_parent->d_lock,
 2213                                                 DENTRY_D_LOCK_NESTED);
 2214                 } else {
 2215                         spin_lock(&target->d_parent->d_lock);
 2216                         spin_lock_nested(&dentry->d_parent->d_lock,
 2217                                                 DENTRY_D_LOCK_NESTED);
 2218                 }
 2219         }
 2220         if (target < dentry) {
 2221                 spin_lock_nested(&target->d_lock, 2);
 2222                 spin_lock_nested(&dentry->d_lock, 3);
 2223         } else {
 2224                 spin_lock_nested(&dentry->d_lock, 2);
 2225                 spin_lock_nested(&target->d_lock, 3);
 2226         }
 2227 }
 2228 
 2229 static void dentry_unlock_parents_for_move(struct dentry *dentry,
 2230                                         struct dentry *target)
 2231 {
 2232         if (target->d_parent != dentry->d_parent)
 2233                 spin_unlock(&dentry->d_parent->d_lock);
 2234         if (target->d_parent != target)
 2235                 spin_unlock(&target->d_parent->d_lock);
 2236 }
 2237 
 2238 /*
 2239  * When switching names, the actual string doesn't strictly have to
 2240  * be preserved in the target - because we're dropping the target
 2241  * anyway. As such, we can just do a simple memcpy() to copy over
 2242  * the new name before we switch.
 2243  *
 2244  * Note that we have to be a lot more careful about getting the hash
 2245  * switched - we have to switch the hash value properly even if it
 2246  * then no longer matches the actual (corrupted) string of the target.
 2247  * The hash value has to match the hash queue that the dentry is on..
 2248  */
 2249 /*
 2250  * __d_move - move a dentry
 2251  * @dentry: entry to move
 2252  * @target: new dentry
 2253  *
 2254  * Update the dcache to reflect the move of a file name. Negative
 2255  * dcache entries should not be moved in this way. Caller must hold
 2256  * rename_lock, the i_mutex of the source and target directories,
 2257  * and the sb->s_vfs_rename_mutex if they differ. See lock_rename().
 2258  */
 2259 static void __d_move(struct dentry * dentry, struct dentry * target)
 2260 {
 2261         if (!dentry->d_inode)
 2262                 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
 2263 
 2264         BUG_ON(d_ancestor(dentry, target));
 2265         BUG_ON(d_ancestor(target, dentry));
 2266 
 2267         dentry_lock_for_move(dentry, target);
 2268 
 2269         write_seqcount_begin(&dentry->d_seq);
 2270         write_seqcount_begin(&target->d_seq);
 2271 
 2272         /* __d_drop does write_seqcount_barrier, but they're OK to nest. */
 2273 
 2274         /*
 2275          * Move the dentry to the target hash queue. Don't bother checking
 2276          * for the same hash queue because of how unlikely it is.
 2277          */
 2278         __d_drop(dentry);
 2279         __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash));
 2280 
 2281         /* Unhash the target: dput() will then get rid of it */
 2282         __d_drop(target);
 2283 
 2284         list_del(&dentry->d_u.d_child);
 2285         list_del(&target->d_u.d_child);
 2286 
 2287         /* Switch the names.. */
 2288         switch_names(dentry, target);
 2289         swap(dentry->d_name.hash, target->d_name.hash);
 2290 
 2291         /* ... and switch the parents */
 2292         if (IS_ROOT(dentry)) {
 2293                 dentry->d_parent = target->d_parent;
 2294                 target->d_parent = target;
 2295                 INIT_LIST_HEAD(&target->d_u.d_child);
 2296         } else {
 2297                 swap(dentry->d_parent, target->d_parent);
 2298 
 2299                 /* And add them back to the (new) parent lists */
 2300                 list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
 2301         }
 2302 
 2303         list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
 2304 
 2305         write_seqcount_end(&target->d_seq);
 2306         write_seqcount_end(&dentry->d_seq);
 2307 
 2308         dentry_unlock_parents_for_move(dentry, target);
 2309         spin_unlock(&target->d_lock);
 2310         fsnotify_d_move(dentry);
 2311         spin_unlock(&dentry->d_lock);
 2312 }
 2313 
 2314 /*
 2315  * d_move - move a dentry
 2316  * @dentry: entry to move
 2317  * @target: new dentry
 2318  *
 2319  * Update the dcache to reflect the move of a file name. Negative
 2320  * dcache entries should not be moved in this way. See the locking
 2321  * requirements for __d_move.
 2322  */
 2323 void d_move(struct dentry *dentry, struct dentry *target)
 2324 {
 2325         write_seqlock(&rename_lock);
 2326         __d_move(dentry, target);
 2327         write_sequnlock(&rename_lock);
 2328 }
 2329 EXPORT_SYMBOL(d_move);
 2330 
 2331 /**
 2332  * d_ancestor - search for an ancestor
 2333  * @p1: ancestor dentry
 2334  * @p2: child dentry
 2335  *
 2336  * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
 2337  * an ancestor of p2, else NULL.
 2338  */
 2339 struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
 2340 {
 2341         struct dentry *p;
 2342 
 2343         for (p = p2; !IS_ROOT(p); p = p->d_parent) {
 2344                 if (p->d_parent == p1)
 2345                         return p;
 2346         }
 2347         return NULL;
 2348 }
 2349 
 2350 /*
 2351  * This helper attempts to cope with remotely renamed directories
 2352  *
 2353  * It assumes that the caller is already holding
 2354  * dentry->d_parent->d_inode->i_mutex, inode->i_lock and rename_lock
 2355  *
 2356  * Note: If ever the locking in lock_rename() changes, then please
 2357  * remember to update this too...
 2358  */
 2359 static struct dentry *__d_unalias(struct inode *inode,
 2360                 struct dentry *dentry, struct dentry *alias)
 2361 {
 2362         struct mutex *m1 = NULL, *m2 = NULL;
 2363         struct dentry *ret = ERR_PTR(-EBUSY);
 2364 
 2365         /* If alias and dentry share a parent, then no extra locks required */
 2366         if (alias->d_parent == dentry->d_parent)
 2367                 goto out_unalias;
 2368 
 2369         /* See lock_rename() */
 2370         if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex))
 2371                 goto out_err;
 2372         m1 = &dentry->d_sb->s_vfs_rename_mutex;
 2373         if (!mutex_trylock(&alias->d_parent->d_inode->i_mutex))
 2374                 goto out_err;
 2375         m2 = &alias->d_parent->d_inode->i_mutex;
 2376 out_unalias:
 2377         if (likely(!d_mountpoint(alias))) {
 2378                 __d_move(alias, dentry);
 2379                 ret = alias;
 2380         }
 2381 out_err:
 2382         spin_unlock(&inode->i_lock);
 2383         if (m2)
 2384                 mutex_unlock(m2);
 2385         if (m1)
 2386                 mutex_unlock(m1);
 2387         return ret;
 2388 }
 2389 
 2390 /*
 2391  * Prepare an anonymous dentry for life in the superblock's dentry tree as a
 2392  * named dentry in place of the dentry to be replaced.
 2393  * returns with anon->d_lock held!
 2394  */
 2395 static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
 2396 {
 2397         struct dentry *dparent, *aparent;
 2398 
 2399         dentry_lock_for_move(anon, dentry);
 2400 
 2401         write_seqcount_begin(&dentry->d_seq);
 2402         write_seqcount_begin(&anon->d_seq);
 2403 
 2404         dparent = dentry->d_parent;
 2405         aparent = anon->d_parent;
 2406 
 2407         switch_names(dentry, anon);
 2408         swap(dentry->d_name.hash, anon->d_name.hash);
 2409 
 2410         dentry->d_parent = (aparent == anon) ? dentry : aparent;
 2411         list_del(&dentry->d_u.d_child);
 2412         if (!IS_ROOT(dentry))
 2413                 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
 2414         else
 2415                 INIT_LIST_HEAD(&dentry->d_u.d_child);
 2416 
 2417         anon->d_parent = (dparent == dentry) ? anon : dparent;
 2418         list_del(&anon->d_u.d_child);
 2419         if (!IS_ROOT(anon))
 2420                 list_add(&anon->d_u.d_child, &anon->d_parent->d_subdirs);
 2421         else
 2422                 INIT_LIST_HEAD(&anon->d_u.d_child);
 2423 
 2424         write_seqcount_end(&dentry->d_seq);
 2425         write_seqcount_end(&anon->d_seq);
 2426 
 2427         dentry_unlock_parents_for_move(anon, dentry);
 2428         spin_unlock(&dentry->d_lock);
 2429 
 2430         /* anon->d_lock still locked, returns locked */
 2431         anon->d_flags &= ~DCACHE_DISCONNECTED;
 2432 }
 2433 
 2434 /**
 2435  * d_materialise_unique - introduce an inode into the tree
 2436  * @dentry: candidate dentry
 2437  * @inode: inode to bind to the dentry, to which aliases may be attached
 2438  *
 2439  * Introduces an dentry into the tree, substituting an extant disconnected
 2440  * root directory alias in its place if there is one. Caller must hold the
 2441  * i_mutex of the parent directory.
 2442  */
 2443 struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
 2444 {
 2445         struct dentry *actual;
 2446 
 2447         BUG_ON(!d_unhashed(dentry));
 2448 
 2449         if (!inode) {
 2450                 actual = dentry;
 2451                 __d_instantiate(dentry, NULL);
 2452                 d_rehash(actual);
 2453                 goto out_nolock;
 2454         }
 2455 
 2456         spin_lock(&inode->i_lock);
 2457 
 2458         if (S_ISDIR(inode->i_mode)) {
 2459                 struct dentry *alias;
 2460 
 2461                 /* Does an aliased dentry already exist? */
 2462                 alias = __d_find_alias(inode, 0);
 2463                 if (alias) {
 2464                         actual = alias;
 2465                         write_seqlock(&rename_lock);
 2466 
 2467                         if (d_ancestor(alias, dentry)) {
 2468                                 /* Check for loops */
 2469                                 actual = ERR_PTR(-ELOOP);
 2470                                 spin_unlock(&inode->i_lock);
 2471                         } else if (IS_ROOT(alias)) {
 2472                                 /* Is this an anonymous mountpoint that we
 2473                                  * could splice into our tree? */
 2474                                 __d_materialise_dentry(dentry, alias);
 2475                                 write_sequnlock(&rename_lock);
 2476                                 __d_drop(alias);
 2477                                 goto found;
 2478                         } else {
 2479                                 /* Nope, but we must(!) avoid directory
 2480                                  * aliasing. This drops inode->i_lock */
 2481                                 actual = __d_unalias(inode, dentry, alias);
 2482                         }
 2483                         write_sequnlock(&rename_lock);
 2484                         if (IS_ERR(actual)) {
 2485                                 if (PTR_ERR(actual) == -ELOOP)
 2486                                         pr_warn_ratelimited(
 2487                                                 "VFS: Lookup of '%s' in %s %s"
 2488                                                 " would have caused loop\n",
 2489                                                 dentry->d_name.name,
 2490                                                 inode->i_sb->s_type->name,
 2491                                                 inode->i_sb->s_id);
 2492                                 dput(alias);
 2493                         }
 2494                         goto out_nolock;
 2495                 }
 2496         }
 2497 
 2498         /* Add a unique reference */
 2499         actual = __d_instantiate_unique(dentry, inode);
 2500         if (!actual)
 2501                 actual = dentry;
 2502         else
 2503                 BUG_ON(!d_unhashed(actual));
 2504 
 2505         spin_lock(&actual->d_lock);
 2506 found:
 2507         _d_rehash(actual);
 2508         spin_unlock(&actual->d_lock);
 2509         spin_unlock(&inode->i_lock);
 2510 out_nolock:
 2511         if (actual == dentry) {
 2512                 security_d_instantiate(dentry, inode);
 2513                 return NULL;
 2514         }
 2515 
 2516         iput(inode);
 2517         return actual;
 2518 }
 2519 EXPORT_SYMBOL_GPL(d_materialise_unique);
 2520 
 2521 static int prepend(char **buffer, int *buflen, const char *str, int namelen)
 2522 {
 2523         *buflen -= namelen;
 2524         if (*buflen < 0)
 2525                 return -ENAMETOOLONG;
 2526         *buffer -= namelen;
 2527         memcpy(*buffer, str, namelen);
 2528         return 0;
 2529 }
 2530 
 2531 static int prepend_name(char **buffer, int *buflen, struct qstr *name)
 2532 {
 2533         return prepend(buffer, buflen, name->name, name->len);
 2534 }
 2535 
 2536 /**
 2537  * prepend_path - Prepend path string to a buffer
 2538  * @path: the dentry/vfsmount to report
 2539  * @root: root vfsmnt/dentry
 2540  * @buffer: pointer to the end of the buffer
 2541  * @buflen: pointer to buffer length
 2542  *
 2543  * Caller holds the rename_lock.
 2544  */
 2545 static int prepend_path(const struct path *path,
 2546                         const struct path *root,
 2547                         char **buffer, int *buflen)
 2548 {
 2549         struct dentry *dentry = path->dentry;
 2550         struct vfsmount *vfsmnt = path->mnt;
 2551         struct mount *mnt = real_mount(vfsmnt);
 2552         bool slash = false;
 2553         int error = 0;
 2554 
 2555         br_read_lock(&vfsmount_lock);
 2556         while (dentry != root->dentry || vfsmnt != root->mnt) {
 2557                 struct dentry * parent;
 2558 
 2559                 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
 2560                         /* Global root? */
 2561                         if (!mnt_has_parent(mnt))
 2562                                 goto global_root;
 2563                         dentry = mnt->mnt_mountpoint;
 2564                         mnt = mnt->mnt_parent;
 2565                         vfsmnt = &mnt->mnt;
 2566                         continue;
 2567                 }
 2568                 parent = dentry->d_parent;
 2569                 prefetch(parent);
 2570                 spin_lock(&dentry->d_lock);
 2571                 error = prepend_name(buffer, buflen, &dentry->d_name);
 2572                 spin_unlock(&dentry->d_lock);
 2573                 if (!error)
 2574                         error = prepend(buffer, buflen, "/", 1);
 2575                 if (error)
 2576                         break;
 2577 
 2578                 slash = true;
 2579                 dentry = parent;
 2580         }
 2581 
 2582         if (!error && !slash)
 2583                 error = prepend(buffer, buflen, "/", 1);
 2584 
 2585 out:
 2586         br_read_unlock(&vfsmount_lock);
 2587         return error;
 2588 
 2589 global_root:
 2590         /*
 2591          * Filesystems needing to implement special "root names"
 2592          * should do so with ->d_dname()
 2593          */
 2594         if (IS_ROOT(dentry) &&
 2595             (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
 2596                 WARN(1, "Root dentry has weird name <%.*s>\n",
 2597                      (int) dentry->d_name.len, dentry->d_name.name);
 2598         }
 2599         if (!slash)
 2600                 error = prepend(buffer, buflen, "/", 1);
 2601         if (!error)
 2602                 error = is_mounted(vfsmnt) ? 1 : 2;
 2603         goto out;
 2604 }
 2605 
 2606 /**
 2607  * __d_path - return the path of a dentry
 2608  * @path: the dentry/vfsmount to report
 2609  * @root: root vfsmnt/dentry
 2610  * @buf: buffer to return value in
 2611  * @buflen: buffer length
 2612  *
 2613  * Convert a dentry into an ASCII path name.
 2614  *
 2615  * Returns a pointer into the buffer or an error code if the
 2616  * path was too long.
 2617  *
 2618  * "buflen" should be positive.
 2619  *
 2620  * If the path is not reachable from the supplied root, return %NULL.
 2621  */
 2622 char *__d_path(const struct path *path,
 2623                const struct path *root,
 2624                char *buf, int buflen)
 2625 {
 2626         char *res = buf + buflen;
 2627         int error;
 2628 
 2629         prepend(&res, &buflen, "\0", 1);
 2630         write_seqlock(&rename_lock);
 2631         error = prepend_path(path, root, &res, &buflen);
 2632         write_sequnlock(&rename_lock);
 2633 
 2634         if (error < 0)
 2635                 return ERR_PTR(error);
 2636         if (error > 0)
 2637                 return NULL;
 2638         return res;
 2639 }
 2640 
 2641 char *d_absolute_path(const struct path *path,
 2642                char *buf, int buflen)
 2643 {
 2644         struct path root = {};
 2645         char *res = buf + buflen;
 2646         int error;
 2647 
 2648         prepend(&res, &buflen, "\0", 1);
 2649         write_seqlock(&rename_lock);
 2650         error = prepend_path(path, &root, &res, &buflen);
 2651         write_sequnlock(&rename_lock);
 2652 
 2653         if (error > 1)
 2654                 error = -EINVAL;
 2655         if (error < 0)
 2656                 return ERR_PTR(error);
 2657         return res;
 2658 }
 2659 
 2660 /*
 2661  * same as __d_path but appends "(deleted)" for unlinked files.
 2662  */
 2663 static int path_with_deleted(const struct path *path,
 2664                              const struct path *root,
 2665                              char **buf, int *buflen)
 2666 {
 2667         prepend(buf, buflen, "\0", 1);
 2668         if (d_unlinked(path->dentry)) {
 2669                 int error = prepend(buf, buflen, " (deleted)", 10);
 2670                 if (error)
 2671                         return error;
 2672         }
 2673 
 2674         return prepend_path(path, root, buf, buflen);
 2675 }
 2676 
 2677 static int prepend_unreachable(char **buffer, int *buflen)
 2678 {
 2679         return prepend(buffer, buflen, "(unreachable)", 13);
 2680 }
 2681 
 2682 /**
 2683  * d_path - return the path of a dentry
 2684  * @path: path to report
 2685  * @buf: buffer to return value in
 2686  * @buflen: buffer length
 2687  *
 2688  * Convert a dentry into an ASCII path name. If the entry has been deleted
 2689  * the string " (deleted)" is appended. Note that this is ambiguous.
 2690  *
 2691  * Returns a pointer into the buffer or an error code if the path was
 2692  * too long. Note: Callers should use the returned pointer, not the passed
 2693  * in buffer, to use the name! The implementation often starts at an offset
 2694  * into the buffer, and may leave 0 bytes at the start.
 2695  *
 2696  * "buflen" should be positive.
 2697  */
 2698 char *d_path(const struct path *path, char *buf, int buflen)
 2699 {
 2700         char *res = buf + buflen;
 2701         struct path root;
 2702         int error;
 2703 
 2704         /*
 2705          * We have various synthetic filesystems that never get mounted.  On
 2706          * these filesystems dentries are never used for lookup purposes, and
 2707          * thus don't need to be hashed.  They also don't need a name until a
 2708          * user wants to identify the object in /proc/pid/fd/.  The little hack
 2709          * below allows us to generate a name for these objects on demand:
 2710          */
 2711         if (path->dentry->d_op && path->dentry->d_op->d_dname)
 2712                 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
 2713 
 2714         get_fs_root(current->fs, &root);
 2715         write_seqlock(&rename_lock);
 2716         error = path_with_deleted(path, &root, &res, &buflen);
 2717         if (error < 0)
 2718                 res = ERR_PTR(error);
 2719         write_sequnlock(&rename_lock);
 2720         path_put(&root);
 2721         return res;
 2722 }
 2723 EXPORT_SYMBOL(d_path);
 2724 
 2725 /**
 2726  * d_path_with_unreachable - return the path of a dentry
 2727  * @path: path to report
 2728  * @buf: buffer to return value in
 2729  * @buflen: buffer length
 2730  *
 2731  * The difference from d_path() is that this prepends "(unreachable)"
 2732  * to paths which are unreachable from the current process' root.
 2733  */
 2734 char *d_path_with_unreachable(const struct path *path, char *buf, int buflen)
 2735 {
 2736         char *res = buf + buflen;
 2737         struct path root;
 2738         int error;
 2739 
 2740         if (path->dentry->d_op && path->dentry->d_op->d_dname)
 2741                 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
 2742 
 2743         get_fs_root(current->fs, &root);
 2744         write_seqlock(&rename_lock);
 2745         error = path_with_deleted(path, &root, &res, &buflen);
 2746         if (error > 0)
 2747                 error = prepend_unreachable(&res, &buflen);
 2748         write_sequnlock(&rename_lock);
 2749         path_put(&root);
 2750         if (error)
 2751                 res =  ERR_PTR(error);
 2752 
 2753         return res;
 2754 }
 2755 
 2756 /*
 2757  * Helper function for dentry_operations.d_dname() members
 2758  */
 2759 char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
 2760                         const char *fmt, ...)
 2761 {
 2762         va_list args;
 2763         char temp[64];
 2764         int sz;
 2765 
 2766         va_start(args, fmt);
 2767         sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
 2768         va_end(args);
 2769 
 2770         if (sz > sizeof(temp) || sz > buflen)
 2771                 return ERR_PTR(-ENAMETOOLONG);
 2772 
 2773         buffer += buflen - sz;
 2774         return memcpy(buffer, temp, sz);
 2775 }
 2776 
 2777 /*
 2778  * Write full pathname from the root of the filesystem into the buffer.
 2779  */
 2780 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
 2781 {
 2782         char *end = buf + buflen;
 2783         char *retval;
 2784 
 2785         prepend(&end, &buflen, "\0", 1);
 2786         if (buflen < 1)
 2787                 goto Elong;
 2788         /* Get '/' right */
 2789         retval = end-1;
 2790         *retval = '/';
 2791 
 2792         while (!IS_ROOT(dentry)) {
 2793                 struct dentry *parent = dentry->d_parent;
 2794                 int error;
 2795 
 2796                 prefetch(parent);
 2797                 spin_lock(&dentry->d_lock);
 2798                 error = prepend_name(&end, &buflen, &dentry->d_name);
 2799                 spin_unlock(&dentry->d_lock);
 2800                 if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
 2801                         goto Elong;
 2802 
 2803                 retval = end;
 2804                 dentry = parent;
 2805         }
 2806         return retval;
 2807 Elong:
 2808         return ERR_PTR(-ENAMETOOLONG);
 2809 }
 2810 
 2811 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
 2812 {
 2813         char *retval;
 2814 
 2815         write_seqlock(&rename_lock);
 2816         retval = __dentry_path(dentry, buf, buflen);
 2817         write_sequnlock(&rename_lock);
 2818 
 2819         return retval;
 2820 }
 2821 EXPORT_SYMBOL(dentry_path_raw);
 2822 
 2823 char *dentry_path(struct dentry *dentry, char *buf, int buflen)
 2824 {
 2825         char *p = NULL;
 2826         char *retval;
 2827 
 2828         write_seqlock(&rename_lock);
 2829         if (d_unlinked(dentry)) {
 2830                 p = buf + buflen;
 2831                 if (prepend(&p, &buflen, "//deleted", 10) != 0)
 2832                         goto Elong;
 2833                 buflen++;
 2834         }
 2835         retval = __dentry_path(dentry, buf, buflen);
 2836         write_sequnlock(&rename_lock);
 2837         if (!IS_ERR(retval) && p)
 2838                 *p = '/';       /* restore '/' overriden with '\0' */
 2839         return retval;
 2840 Elong:
 2841         return ERR_PTR(-ENAMETOOLONG);
 2842 }
 2843 
 2844 /*
 2845  * NOTE! The user-level library version returns a
 2846  * character pointer. The kernel system call just
 2847  * returns the length of the buffer filled (which
 2848  * includes the ending '\0' character), or a negative
 2849  * error value. So libc would do something like
 2850  *
 2851  *      char *getcwd(char * buf, size_t size)
 2852  *      {
 2853  *              int retval;
 2854  *
 2855  *              retval = sys_getcwd(buf, size);
 2856  *              if (retval >= 0)
 2857  *                      return buf;
 2858  *              errno = -retval;
 2859  *              return NULL;
 2860  *      }
 2861  */
 2862 SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 2863 {
 2864         int error;
 2865         struct path pwd, root;
 2866         char *page = (char *) __get_free_page(GFP_USER);
 2867 
 2868         if (!page)
 2869                 return -ENOMEM;
 2870 
 2871         get_fs_root_and_pwd(current->fs, &root, &pwd);
 2872 
 2873         error = -ENOENT;
 2874         write_seqlock(&rename_lock);
 2875         if (!d_unlinked(pwd.dentry)) {
 2876                 unsigned long len;
 2877                 char *cwd = page + PAGE_SIZE;
 2878                 int buflen = PAGE_SIZE;
 2879 
 2880                 prepend(&cwd, &buflen, "\0", 1);
 2881                 error = prepend_path(&pwd, &root, &cwd, &buflen);
 2882                 write_sequnlock(&rename_lock);
 2883 
 2884                 if (error < 0)
 2885                         goto out;
 2886 
 2887                 /* Unreachable from current root */
 2888                 if (error > 0) {
 2889                         error = prepend_unreachable(&cwd, &buflen);
 2890                         if (error)
 2891                                 goto out;
 2892                 }
 2893 
 2894                 error = -ERANGE;
 2895                 len = PAGE_SIZE + page - cwd;
 2896                 if (len <= size) {
 2897                         error = len;
 2898                         if (copy_to_user(buf, cwd, len))
 2899                                 error = -EFAULT;
 2900                 }
 2901         } else {
 2902                 write_sequnlock(&rename_lock);
 2903         }
 2904 
 2905 out:
 2906         path_put(&pwd);
 2907         path_put(&root);
 2908         free_page((unsigned long) page);
 2909         return error;
 2910 }
 2911 
 2912 /*
 2913  * Test whether new_dentry is a subdirectory of old_dentry.
 2914  *
 2915  * Trivially implemented using the dcache structure
 2916  */
 2917 
 2918 /**
 2919  * is_subdir - is new dentry a subdirectory of old_dentry
 2920  * @new_dentry: new dentry
 2921  * @old_dentry: old dentry
 2922  *
 2923  * Returns 1 if new_dentry is a subdirectory of the parent (at any depth).
 2924  * Returns 0 otherwise.
 2925  * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
 2926  */
 2927   
 2928 int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
 2929 {
 2930         int result;
 2931         unsigned seq;
 2932 
 2933         if (new_dentry == old_dentry)
 2934                 return 1;
 2935 
 2936         do {
 2937                 /* for restarting inner loop in case of seq retry */
 2938                 seq = read_seqbegin(&rename_lock);
 2939                 /*
 2940                  * Need rcu_readlock to protect against the d_parent trashing
 2941                  * due to d_move
 2942                  */
 2943                 rcu_read_lock();
 2944                 if (d_ancestor(old_dentry, new_dentry))
 2945                         result = 1;
 2946                 else
 2947                         result = 0;
 2948                 rcu_read_unlock();
 2949         } while (read_seqretry(&rename_lock, seq));
 2950 
 2951         return result;
 2952 }
 2953 
 2954 void d_genocide(struct dentry *root)
 2955 {
 2956         struct dentry *this_parent;
 2957         struct list_head *next;
 2958         unsigned seq;
 2959         int locked = 0;
 2960 
 2961         seq = read_seqbegin(&rename_lock);
 2962 again:
 2963         this_parent = root;
 2964         spin_lock(&this_parent->d_lock);
 2965 repeat:
 2966         next = this_parent->d_subdirs.next;
 2967 resume:
 2968         while (next != &this_parent->d_subdirs) {
 2969                 struct list_head *tmp = next;
 2970                 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 2971                 next = tmp->next;
 2972 
 2973                 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 2974                 if (d_unhashed(dentry) || !dentry->d_inode) {
 2975                         spin_unlock(&dentry->d_lock);
 2976                         continue;
 2977                 }
 2978                 if (!list_empty(&dentry->d_subdirs)) {
 2979                         spin_unlock(&this_parent->d_lock);
 2980                         spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
 2981                         this_parent = dentry;
 2982                         spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
 2983                         goto repeat;
 2984                 }
 2985                 if (!(dentry->d_flags & DCACHE_GENOCIDE)) {
 2986                         dentry->d_flags |= DCACHE_GENOCIDE;
 2987                         dentry->d_count--;
 2988                 }
 2989                 spin_unlock(&dentry->d_lock);
 2990         }
 2991         if (this_parent != root) {
 2992                 struct dentry *child = this_parent;
 2993                 if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
 2994                         this_parent->d_flags |= DCACHE_GENOCIDE;
 2995                         this_parent->d_count--;
 2996                 }
 2997                 this_parent = try_to_ascend(this_parent, locked, seq);
 2998                 if (!this_parent)
 2999                         goto rename_retry;
 3000                 next = child->d_u.d_child.next;
 3001                 goto resume;
 3002         }
 3003         spin_unlock(&this_parent->d_lock);
 3004         if (!locked && read_seqretry(&rename_lock, seq))
 3005                 goto rename_retry;
 3006         if (locked)
 3007                 write_sequnlock(&rename_lock);
 3008         return;
 3009 
 3010 rename_retry:
 3011         if (locked)
 3012                 goto again;
 3013         locked = 1;
 3014         write_seqlock(&rename_lock);
 3015         goto again;
 3016 }
 3017 
 3018 /**
 3019  * find_inode_number - check for dentry with name
 3020  * @dir: directory to check
 3021  * @name: Name to find.
 3022  *
 3023  * Check whether a dentry already exists for the given name,
 3024  * and return the inode number if it has an inode. Otherwise
 3025  * 0 is returned.
 3026  *
 3027  * This routine is used to post-process directory listings for
 3028  * filesystems using synthetic inode numbers, and is necessary
 3029  * to keep getcwd() working.
 3030  */
 3031  
 3032 ino_t find_inode_number(struct dentry *dir, struct qstr *name)
 3033 {
 3034         struct dentry * dentry;
 3035         ino_t ino = 0;
 3036 
 3037         dentry = d_hash_and_lookup(dir, name);
 3038         if (dentry) {
 3039                 if (dentry->d_inode)
 3040                         ino = dentry->d_inode->i_ino;
 3041                 dput(dentry);
 3042         }
 3043         return ino;
 3044 }
 3045 EXPORT_SYMBOL(find_inode_number);
 3046 
 3047 static __initdata unsigned long dhash_entries;
 3048 static int __init set_dhash_entries(char *str)
 3049 {
 3050         if (!str)
 3051                 return 0;
 3052         dhash_entries = simple_strtoul(str, &str, 0);
 3053         return 1;
 3054 }
 3055 __setup("dhash_entries=", set_dhash_entries);
 3056 
 3057 static void __init dcache_init_early(void)
 3058 {
 3059         unsigned int loop;
 3060 
 3061         /* If hashes are distributed across NUMA nodes, defer
 3062          * hash allocation until vmalloc space is available.
 3063          */
 3064         if (hashdist)
 3065                 return;
 3066 
 3067         dentry_hashtable =
 3068                 alloc_large_system_hash("Dentry cache",
 3069                                         sizeof(struct hlist_bl_head),
 3070                                         dhash_entries,
 3071                                         13,
 3072                                         HASH_EARLY,
 3073                                         &d_hash_shift,
 3074                                         &d_hash_mask,
 3075                                         0,
 3076                                         0);
 3077 
 3078         for (loop = 0; loop < (1U << d_hash_shift); loop++)
 3079                 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
 3080 }
 3081 
 3082 static void __init dcache_init(void)
 3083 {
 3084         unsigned int loop;
 3085 
 3086         /* 
 3087          * A constructor could be added for stable state like the lists,
 3088          * but it is probably not worth it because of the cache nature
 3089          * of the dcache. 
 3090          */
 3091         dentry_cache = KMEM_CACHE(dentry,
 3092                 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
 3093 
 3094         /* Hash may have been set up in dcache_init_early */
 3095         if (!hashdist)
 3096                 return;
 3097 
 3098         dentry_hashtable =
 3099                 alloc_large_system_hash("Dentry cache",
 3100                                         sizeof(struct hlist_bl_head),
 3101                                         dhash_entries,
 3102                                         13,
 3103                                         0,
 3104                                         &d_hash_shift,
 3105                                         &d_hash_mask,
 3106                                         0,
 3107                                         0);
 3108 
 3109         for (loop = 0; loop < (1U << d_hash_shift); loop++)
 3110                 INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
 3111 }
 3112 
 3113 /* SLAB cache for __getname() consumers */
 3114 struct kmem_cache *names_cachep __read_mostly;
 3115 EXPORT_SYMBOL(names_cachep);
 3116 
 3117 EXPORT_SYMBOL(d_genocide);
 3118 
 3119 void __init vfs_caches_init_early(void)
 3120 {
 3121         dcache_init_early();
 3122         inode_init_early();
 3123 }
 3124 
 3125 void __init vfs_caches_init(unsigned long mempages)
 3126 {
 3127         unsigned long reserve;
 3128 
 3129         /* Base hash sizes on available memory, with a reserve equal to
 3130            150% of current kernel size */
 3131 
 3132         reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
 3133         mempages -= reserve;
 3134 
 3135         names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
 3136                         SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 3137 
 3138         dcache_init();
 3139         inode_init();
 3140         files_init(mempages);
 3141         mnt_init();
 3142         bdev_cache_init();
 3143         chrdev_init();
 3144 }

Cache object: 1cf3c49adf883dc591480d7dd9c91d47


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.