The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/inode.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * linux/fs/inode.c
    3  *
    4  * (C) 1997 Linus Torvalds
    5  */
    6 
    7 #include <linux/config.h>
    8 #include <linux/fs.h>
    9 #include <linux/string.h>
   10 #include <linux/mm.h>
   11 #include <linux/dcache.h>
   12 #include <linux/init.h>
   13 #include <linux/quotaops.h>
   14 #include <linux/slab.h>
   15 #include <linux/cache.h>
   16 #include <linux/swap.h>
   17 #include <linux/swapctl.h>
   18 #include <linux/prefetch.h>
   19 #include <linux/locks.h>
   20 
   21 /*
   22  * New inode.c implementation.
   23  *
   24  * This implementation has the basic premise of trying
   25  * to be extremely low-overhead and SMP-safe, yet be
   26  * simple enough to be "obviously correct".
   27  *
   28  * Famous last words.
   29  */
   30 
   31 /* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */
   32 
   33 /* #define INODE_PARANOIA 1 */
   34 /* #define INODE_DEBUG 1 */
   35 
   36 /*
   37  * Inode lookup is no longer as critical as it used to be:
   38  * most of the lookups are going to be through the dcache.
   39  */
   40 #define I_HASHBITS      i_hash_shift
   41 #define I_HASHMASK      i_hash_mask
   42 
   43 static unsigned int i_hash_mask;
   44 static unsigned int i_hash_shift;
   45 
   46 /*
   47  * Each inode can be on two separate lists. One is
   48  * the hash list of the inode, used for lookups. The
   49  * other linked list is the "type" list:
   50  *  "in_use" - valid inode, i_count > 0, i_nlink > 0
   51  *  "dirty"  - as "in_use" but also dirty
   52  *  "unused" - valid inode, i_count = 0
   53  *
   54  * A "dirty" list is maintained for each super block,
   55  * allowing for low-overhead inode sync() operations.
   56  */
   57 
   58 static LIST_HEAD(inode_in_use);
   59 static LIST_HEAD(inode_unused);
   60 static struct list_head *inode_hashtable;
   61 static LIST_HEAD(anon_hash_chain); /* for inodes with NULL i_sb */
   62 
   63 /*
   64  * A simple spinlock to protect the list manipulations.
   65  *
   66  * NOTE! You also have to own the lock if you change
   67  * the i_state of an inode while it is in use..
   68  */
   69 static spinlock_t inode_lock = SPIN_LOCK_UNLOCKED;
   70 
   71 /*
   72  * Statistics gathering..
   73  */
   74 struct inodes_stat_t inodes_stat;
   75 
   76 static kmem_cache_t * inode_cachep;
   77 
   78 static struct inode *alloc_inode(struct super_block *sb)
   79 {
   80         static struct address_space_operations empty_aops;
   81         static struct inode_operations empty_iops;
   82         static struct file_operations empty_fops;
   83         struct inode *inode;
   84 
   85         if (sb->s_op->alloc_inode)
   86                 inode = sb->s_op->alloc_inode(sb);
   87         else {
   88                 inode = (struct inode *) kmem_cache_alloc(inode_cachep, SLAB_KERNEL);
   89                 /* will die */
   90                 if (inode)
   91                         memset(&inode->u, 0, sizeof(inode->u));
   92         }
   93 
   94         if (inode) {
   95                 struct address_space * const mapping = &inode->i_data;
   96 
   97                 inode->i_sb = sb;
   98                 inode->i_dev = sb->s_dev;
   99                 inode->i_blkbits = sb->s_blocksize_bits;
  100                 inode->i_flags = 0;
  101                 atomic_set(&inode->i_count, 1);
  102                 inode->i_sock = 0;
  103                 inode->i_op = &empty_iops;
  104                 inode->i_fop = &empty_fops;
  105                 inode->i_nlink = 1;
  106                 atomic_set(&inode->i_writecount, 0);
  107                 inode->i_size = 0;
  108                 inode->i_blocks = 0;
  109                 inode->i_bytes = 0;
  110                 inode->i_generation = 0;
  111                 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
  112                 inode->i_pipe = NULL;
  113                 inode->i_bdev = NULL;
  114                 inode->i_cdev = NULL;
  115 
  116                 mapping->a_ops = &empty_aops;
  117                 mapping->host = inode;
  118                 mapping->gfp_mask = GFP_HIGHUSER;
  119                 inode->i_mapping = mapping;
  120         }
  121         return inode;
  122 }
  123 
  124 static void destroy_inode(struct inode *inode) 
  125 {
  126         if (inode_has_buffers(inode))
  127                 BUG();
  128         if (inode->i_sb->s_op->destroy_inode)
  129                 inode->i_sb->s_op->destroy_inode(inode);
  130         else
  131                 kmem_cache_free(inode_cachep, inode);
  132 }
  133 
  134 
  135 /*
  136  * These are initializations that only need to be done
  137  * once, because the fields are idempotent across use
  138  * of the inode, so let the slab aware of that.
  139  */
  140 void inode_init_once(struct inode *inode)
  141 {
  142         memset(inode, 0, sizeof(*inode));
  143         init_waitqueue_head(&inode->i_wait);
  144         INIT_LIST_HEAD(&inode->i_hash);
  145         INIT_LIST_HEAD(&inode->i_data.clean_pages);
  146         INIT_LIST_HEAD(&inode->i_data.dirty_pages);
  147         INIT_LIST_HEAD(&inode->i_data.locked_pages);
  148         INIT_LIST_HEAD(&inode->i_dentry);
  149         INIT_LIST_HEAD(&inode->i_dirty_buffers);
  150         INIT_LIST_HEAD(&inode->i_dirty_data_buffers);
  151         INIT_LIST_HEAD(&inode->i_devices);
  152         sema_init(&inode->i_sem, 1);
  153         sema_init(&inode->i_zombie, 1);
  154         init_rwsem(&inode->i_alloc_sem);
  155         spin_lock_init(&inode->i_data.i_shared_lock);
  156 }
  157 
  158 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
  159 {
  160         struct inode * inode = (struct inode *) foo;
  161 
  162         if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
  163             SLAB_CTOR_CONSTRUCTOR)
  164                 inode_init_once(inode);
  165 }
  166 
  167 /*
  168  * Put the inode on the super block's dirty list.
  169  *
  170  * CAREFUL! We mark it dirty unconditionally, but
  171  * move it onto the dirty list only if it is hashed.
  172  * If it was not hashed, it will never be added to
  173  * the dirty list even if it is later hashed, as it
  174  * will have been marked dirty already.
  175  *
  176  * In short, make sure you hash any inodes _before_
  177  * you start marking them dirty..
  178  */
  179  
  180 /**
  181  *      __mark_inode_dirty -    internal function
  182  *      @inode: inode to mark
  183  *      @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
  184  *      Mark an inode as dirty. Callers should use mark_inode_dirty or
  185  *      mark_inode_dirty_sync.
  186  */
  187  
  188 void __mark_inode_dirty(struct inode *inode, int flags)
  189 {
  190         struct super_block * sb = inode->i_sb;
  191 
  192         if (!sb)
  193                 return;
  194 
  195         /* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */
  196         if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
  197                 if (sb->s_op && sb->s_op->dirty_inode)
  198                         sb->s_op->dirty_inode(inode);
  199         }
  200 
  201         /* avoid the locking if we can */
  202         if ((inode->i_state & flags) == flags)
  203                 return;
  204 
  205         spin_lock(&inode_lock);
  206         if ((inode->i_state & flags) != flags) {
  207                 inode->i_state |= flags;
  208                 /* Only add valid (ie hashed) inodes to the dirty list */
  209                 if (!(inode->i_state & I_LOCK) && !list_empty(&inode->i_hash)) {
  210                         list_del(&inode->i_list);
  211                         list_add(&inode->i_list, &sb->s_dirty);
  212                 }
  213         }
  214         spin_unlock(&inode_lock);
  215 }
  216 
  217 static void __wait_on_inode(struct inode * inode)
  218 {
  219         DECLARE_WAITQUEUE(wait, current);
  220 
  221         add_wait_queue(&inode->i_wait, &wait);
  222 repeat:
  223         set_current_state(TASK_UNINTERRUPTIBLE);
  224         if (inode->i_state & I_LOCK) {
  225                 schedule();
  226                 goto repeat;
  227         }
  228         remove_wait_queue(&inode->i_wait, &wait);
  229         current->state = TASK_RUNNING;
  230 }
  231 
  232 static inline void wait_on_inode(struct inode *inode)
  233 {
  234         if (inode->i_state & I_LOCK)
  235                 __wait_on_inode(inode);
  236 }
  237 
  238 
  239 static inline void write_inode(struct inode *inode, int sync)
  240 {
  241         if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->write_inode && !is_bad_inode(inode))
  242                 inode->i_sb->s_op->write_inode(inode, sync);
  243 }
  244 
  245 static inline void __iget(struct inode * inode)
  246 {
  247         if (atomic_read(&inode->i_count)) {
  248                 atomic_inc(&inode->i_count);
  249                 return;
  250         }
  251         atomic_inc(&inode->i_count);
  252         if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
  253                 list_del(&inode->i_list);
  254                 list_add(&inode->i_list, &inode_in_use);
  255         }
  256         inodes_stat.nr_unused--;
  257 }
  258 
  259 static inline void __sync_one(struct inode *inode, int sync)
  260 {
  261         unsigned dirty;
  262 
  263         list_del(&inode->i_list);
  264         list_add(&inode->i_list, &inode->i_sb->s_locked_inodes);
  265 
  266         if (inode->i_state & I_LOCK)
  267                 BUG();
  268 
  269         /* Set I_LOCK, reset I_DIRTY */
  270         dirty = inode->i_state & I_DIRTY;
  271         inode->i_state |= I_LOCK;
  272         inode->i_state &= ~I_DIRTY;
  273         spin_unlock(&inode_lock);
  274 
  275         filemap_fdatasync(inode->i_mapping);
  276 
  277         /* Don't write the inode if only I_DIRTY_PAGES was set */
  278         if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC))
  279                 write_inode(inode, sync);
  280 
  281         filemap_fdatawait(inode->i_mapping);
  282 
  283         spin_lock(&inode_lock);
  284         inode->i_state &= ~I_LOCK;
  285         if (!(inode->i_state & I_FREEING)) {
  286                 struct list_head *to;
  287                 if (inode->i_state & I_DIRTY)
  288                         to = &inode->i_sb->s_dirty;
  289                 else if (atomic_read(&inode->i_count))
  290                         to = &inode_in_use;
  291                 else
  292                         to = &inode_unused;
  293                 list_del(&inode->i_list);
  294                 list_add(&inode->i_list, to);
  295         }
  296         wake_up(&inode->i_wait);
  297 }
  298 
  299 static inline void sync_one(struct inode *inode, int sync)
  300 {
  301         while (inode->i_state & I_LOCK) {
  302                 __iget(inode);
  303                 spin_unlock(&inode_lock);
  304                 __wait_on_inode(inode);
  305                 iput(inode);
  306                 spin_lock(&inode_lock);
  307         }
  308 
  309         __sync_one(inode, sync);
  310 }
  311 
  312 static inline void sync_list(struct list_head *head)
  313 {
  314         struct list_head * tmp;
  315 
  316         while ((tmp = head->prev) != head) 
  317                 __sync_one(list_entry(tmp, struct inode, i_list), 0);
  318 }
  319 
  320 static inline void wait_on_locked(struct list_head *head)
  321 {
  322         struct list_head * tmp;
  323         while ((tmp = head->prev) != head) {
  324                 struct inode *inode = list_entry(tmp, struct inode, i_list);
  325                 __iget(inode);
  326                 spin_unlock(&inode_lock);
  327                 __wait_on_inode(inode);
  328                 iput(inode);
  329                 spin_lock(&inode_lock);
  330         }
  331 }
  332 
  333 static inline int try_to_sync_unused_list(struct list_head *head, int nr_inodes)
  334 {
  335         struct list_head *tmp = head;
  336         struct inode *inode;
  337 
  338         while (nr_inodes && (tmp = tmp->prev) != head) {
  339                 inode = list_entry(tmp, struct inode, i_list);
  340 
  341                 if (!atomic_read(&inode->i_count)) {
  342                         __sync_one(inode, 0);
  343                         nr_inodes--;
  344 
  345                         /* 
  346                          * __sync_one moved the inode to another list,
  347                          * so we have to start looking from the list head.
  348                          */
  349                         tmp = head;
  350                 }
  351         }
  352 
  353         return nr_inodes;
  354 }
  355 
  356 void sync_inodes_sb(struct super_block *sb)
  357 {
  358         spin_lock(&inode_lock);
  359         while (!list_empty(&sb->s_dirty)||!list_empty(&sb->s_locked_inodes)) {
  360                 sync_list(&sb->s_dirty);
  361                 wait_on_locked(&sb->s_locked_inodes);
  362         }
  363         spin_unlock(&inode_lock);
  364 }
  365 
  366 /*
  367  * Note:
  368  * We don't need to grab a reference to superblock here. If it has non-empty
  369  * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
  370  * past sync_inodes_sb() until both ->s_dirty and ->s_locked_inodes are
  371  * empty. Since __sync_one() regains inode_lock before it finally moves
  372  * inode from superblock lists we are OK.
  373  */
  374 
  375 void sync_unlocked_inodes(void)
  376 {
  377         struct super_block * sb;
  378         spin_lock(&inode_lock);
  379         spin_lock(&sb_lock);
  380         sb = sb_entry(super_blocks.next);
  381         for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
  382                 if (!list_empty(&sb->s_dirty)) {
  383                         spin_unlock(&sb_lock);
  384                         sync_list(&sb->s_dirty);
  385                         spin_lock(&sb_lock);
  386                 }
  387         }
  388         spin_unlock(&sb_lock);
  389         spin_unlock(&inode_lock);
  390 }
  391 
  392 /*
  393  * Find a superblock with inodes that need to be synced
  394  */
  395 
  396 static struct super_block *get_super_to_sync(void)
  397 {
  398         struct list_head *p;
  399 restart:
  400         spin_lock(&inode_lock);
  401         spin_lock(&sb_lock);
  402         list_for_each(p, &super_blocks) {
  403                 struct super_block *s = list_entry(p,struct super_block,s_list);
  404                 if (list_empty(&s->s_dirty) && list_empty(&s->s_locked_inodes))
  405                         continue;
  406                 s->s_count++;
  407                 spin_unlock(&sb_lock);
  408                 spin_unlock(&inode_lock);
  409                 down_read(&s->s_umount);
  410                 if (!s->s_root) {
  411                         drop_super(s);
  412                         goto restart;
  413                 }
  414                 return s;
  415         }
  416         spin_unlock(&sb_lock);
  417         spin_unlock(&inode_lock);
  418         return NULL;
  419 }
  420 
  421 /**
  422  *      sync_inodes
  423  *      @dev: device to sync the inodes from.
  424  *
  425  *      sync_inodes goes through the super block's dirty list, 
  426  *      writes them out, and puts them back on the normal list.
  427  */
  428 
  429 void sync_inodes(kdev_t dev)
  430 {
  431         struct super_block * s;
  432 
  433         /*
  434          * Search the super_blocks array for the device(s) to sync.
  435          */
  436         if (dev) {
  437                 if ((s = get_super(dev)) != NULL) {
  438                         sync_inodes_sb(s);
  439                         drop_super(s);
  440                 }
  441         } else {
  442                 while ((s = get_super_to_sync()) != NULL) {
  443                         sync_inodes_sb(s);
  444                         drop_super(s);
  445                 }
  446         }
  447 }
  448 
  449 static void try_to_sync_unused_inodes(void * arg)
  450 {
  451         struct super_block * sb;
  452         int nr_inodes = inodes_stat.nr_unused;
  453 
  454         spin_lock(&inode_lock);
  455         spin_lock(&sb_lock);
  456         sb = sb_entry(super_blocks.next);
  457         for (; nr_inodes && sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
  458                 if (list_empty(&sb->s_dirty))
  459                         continue;
  460                 spin_unlock(&sb_lock);
  461                 nr_inodes = try_to_sync_unused_list(&sb->s_dirty, nr_inodes);
  462                 spin_lock(&sb_lock);
  463         }
  464         spin_unlock(&sb_lock);
  465         spin_unlock(&inode_lock);
  466 }
  467 
  468 static struct tq_struct unused_inodes_flush_task;
  469 
  470 /**
  471  *      write_inode_now -       write an inode to disk
  472  *      @inode: inode to write to disk
  473  *      @sync: whether the write should be synchronous or not
  474  *
  475  *      This function commits an inode to disk immediately if it is
  476  *      dirty. This is primarily needed by knfsd.
  477  */
  478  
  479 void write_inode_now(struct inode *inode, int sync)
  480 {
  481         struct super_block * sb = inode->i_sb;
  482 
  483         if (sb) {
  484                 spin_lock(&inode_lock);
  485                 while (inode->i_state & I_DIRTY)
  486                         sync_one(inode, sync);
  487                 spin_unlock(&inode_lock);
  488                 if (sync)
  489                         wait_on_inode(inode);
  490         }
  491         else
  492                 printk(KERN_ERR "write_inode_now: no super block\n");
  493 }
  494 
  495 /**
  496  * generic_osync_inode - flush all dirty data for a given inode to disk
  497  * @inode: inode to write
  498  * @datasync: if set, don't bother flushing timestamps
  499  *
  500  * This can be called by file_write functions for files which have the
  501  * O_SYNC flag set, to flush dirty writes to disk.  
  502  */
  503 
  504 int generic_osync_inode(struct inode *inode, int what)
  505 {
  506         int err = 0, err2 = 0, need_write_inode_now = 0;
  507         
  508         /* 
  509          * WARNING
  510          *
  511          * Currently, the filesystem write path does not pass the
  512          * filp down to the low-level write functions.  Therefore it
  513          * is impossible for (say) __block_commit_write to know if
  514          * the operation is O_SYNC or not.
  515          *
  516          * Ideally, O_SYNC writes would have the filesystem call
  517          * ll_rw_block as it went to kick-start the writes, and we
  518          * could call osync_inode_buffers() here to wait only for
  519          * those IOs which have already been submitted to the device
  520          * driver layer.  As it stands, if we did this we'd not write
  521          * anything to disk since our writes have not been queued by
  522          * this point: they are still on the dirty LRU.
  523          * 
  524          * So, currently we will call fsync_inode_buffers() instead,
  525          * to flush _all_ dirty buffers for this inode to disk on 
  526          * every O_SYNC write, not just the synchronous I/Os.  --sct
  527          */
  528 
  529         if (what & OSYNC_METADATA)
  530                 err = fsync_inode_buffers(inode);
  531         if (what & OSYNC_DATA)
  532                 err2 = fsync_inode_data_buffers(inode);
  533         if (!err)
  534                 err = err2;
  535 
  536         spin_lock(&inode_lock);
  537         if ((inode->i_state & I_DIRTY) &&
  538             ((what & OSYNC_INODE) || (inode->i_state & I_DIRTY_DATASYNC)))
  539                 need_write_inode_now = 1;
  540         spin_unlock(&inode_lock);
  541 
  542         if (need_write_inode_now)
  543                 write_inode_now(inode, 1);
  544         else
  545                 wait_on_inode(inode);
  546 
  547         return err;
  548 }
  549 
  550 /**
  551  * clear_inode - clear an inode
  552  * @inode: inode to clear
  553  *
  554  * This is called by the filesystem to tell us
  555  * that the inode is no longer useful. We just
  556  * terminate it with extreme prejudice.
  557  */
  558  
  559 void clear_inode(struct inode *inode)
  560 {
  561         invalidate_inode_buffers(inode);
  562        
  563         if (inode->i_data.nrpages)
  564                 BUG();
  565         if (!(inode->i_state & I_FREEING))
  566                 BUG();
  567         if (inode->i_state & I_CLEAR)
  568                 BUG();
  569         wait_on_inode(inode);
  570         DQUOT_DROP(inode);
  571         if (inode->i_sb && inode->i_sb->s_op && inode->i_sb->s_op->clear_inode)
  572                 inode->i_sb->s_op->clear_inode(inode);
  573         if (inode->i_bdev)
  574                 bd_forget(inode);
  575         else if (inode->i_cdev) {
  576                 cdput(inode->i_cdev);
  577                 inode->i_cdev = NULL;
  578         }
  579         inode->i_state = I_CLEAR;
  580 }
  581 
  582 /*
  583  * Dispose-list gets a local list with local inodes in it, so it doesn't
  584  * need to worry about list corruption and SMP locks.
  585  */
  586 static void dispose_list(struct list_head * head)
  587 {
  588         struct list_head * inode_entry;
  589         struct inode * inode;
  590 
  591         while ((inode_entry = head->next) != head)
  592         {
  593                 list_del(inode_entry);
  594 
  595                 inode = list_entry(inode_entry, struct inode, i_list);
  596                 if (inode->i_data.nrpages)
  597                         truncate_inode_pages(&inode->i_data, 0);
  598                 clear_inode(inode);
  599                 destroy_inode(inode);
  600                 inodes_stat.nr_inodes--;
  601         }
  602 }
  603 
  604 /*
  605  * Invalidate all inodes for a device.
  606  */
  607 static int invalidate_list(struct list_head *head, struct super_block * sb, struct list_head * dispose)
  608 {
  609         struct list_head *next;
  610         int busy = 0, count = 0;
  611 
  612         next = head->next;
  613         for (;;) {
  614                 struct list_head * tmp = next;
  615                 struct inode * inode;
  616 
  617                 next = next->next;
  618                 if (tmp == head)
  619                         break;
  620                 inode = list_entry(tmp, struct inode, i_list);
  621                 if (inode->i_sb != sb)
  622                         continue;
  623                 invalidate_inode_buffers(inode);
  624                 if (!atomic_read(&inode->i_count)) {
  625                         list_del_init(&inode->i_hash);
  626                         list_del(&inode->i_list);
  627                         list_add(&inode->i_list, dispose);
  628                         inode->i_state |= I_FREEING;
  629                         count++;
  630                         continue;
  631                 }
  632                 busy = 1;
  633         }
  634         /* only unused inodes may be cached with i_count zero */
  635         inodes_stat.nr_unused -= count;
  636         return busy;
  637 }
  638 
  639 /*
  640  * This is a two-stage process. First we collect all
  641  * offending inodes onto the throw-away list, and in
  642  * the second stage we actually dispose of them. This
  643  * is because we don't want to sleep while messing
  644  * with the global lists..
  645  */
  646  
  647 /**
  648  *      invalidate_inodes       - discard the inodes on a device
  649  *      @sb: superblock
  650  *
  651  *      Discard all of the inodes for a given superblock. If the discard
  652  *      fails because there are busy inodes then a non zero value is returned.
  653  *      If the discard is successful all the inodes have been discarded.
  654  */
  655  
  656 int invalidate_inodes(struct super_block * sb)
  657 {
  658         int busy;
  659         LIST_HEAD(throw_away);
  660 
  661         spin_lock(&inode_lock);
  662         busy = invalidate_list(&inode_in_use, sb, &throw_away);
  663         busy |= invalidate_list(&inode_unused, sb, &throw_away);
  664         busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
  665         busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
  666         spin_unlock(&inode_lock);
  667 
  668         dispose_list(&throw_away);
  669 
  670         return busy;
  671 }
  672  
  673 int invalidate_device(kdev_t dev, int do_sync)
  674 {
  675         struct super_block *sb;
  676         int res;
  677 
  678         if (do_sync)
  679                 fsync_dev(dev);
  680 
  681         res = 0;
  682         sb = get_super(dev);
  683         if (sb) {
  684                 /*
  685                  * no need to lock the super, get_super holds the
  686                  * read semaphore so the filesystem cannot go away
  687                  * under us (->put_super runs with the write lock
  688                  * hold).
  689                  */
  690                 shrink_dcache_sb(sb);
  691                 res = invalidate_inodes(sb);
  692                 drop_super(sb);
  693         }
  694         invalidate_buffers(dev);
  695         return res;
  696 }
  697 
  698 
  699 /*
  700  * This is called with the inode lock held. It searches
  701  * the in-use for freeable inodes, which are moved to a
  702  * temporary list and then placed on the unused list by
  703  * dispose_list. 
  704  *
  705  * We don't expect to have to call this very often.
  706  *
  707  * N.B. The spinlock is released during the call to
  708  *      dispose_list.
  709  */
  710 #define CAN_UNUSE(inode) \
  711         ((((inode)->i_state | (inode)->i_data.nrpages) == 0)  && \
  712          !inode_has_buffers(inode))
  713 #define INODE(entry)    (list_entry(entry, struct inode, i_list))
  714 
  715 void prune_icache(int goal)
  716 {
  717         LIST_HEAD(list);
  718         struct list_head *entry, *freeable = &list;
  719         int count;
  720         struct inode * inode;
  721 
  722         spin_lock(&inode_lock);
  723 
  724         count = 0;
  725         entry = inode_unused.prev;
  726         while (entry != &inode_unused)
  727         {
  728                 struct list_head *tmp = entry;
  729 
  730                 entry = entry->prev;
  731                 inode = INODE(tmp);
  732                 if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
  733                         continue;
  734                 if (!CAN_UNUSE(inode))
  735                         continue;
  736                 if (atomic_read(&inode->i_count))
  737                         continue;
  738                 list_del(tmp);
  739                 list_del(&inode->i_hash);
  740                 INIT_LIST_HEAD(&inode->i_hash);
  741                 list_add(tmp, freeable);
  742                 inode->i_state |= I_FREEING;
  743                 count++;
  744                 if (!--goal)
  745                         break;
  746         }
  747         inodes_stat.nr_unused -= count;
  748         spin_unlock(&inode_lock);
  749 
  750         dispose_list(freeable);
  751 
  752         /* 
  753          * If we didn't freed enough clean inodes schedule
  754          * a sync of the dirty inodes, we cannot do it
  755          * from here or we're either synchronously dogslow
  756          * or we deadlock with oom.
  757          */
  758         if (goal)
  759                 schedule_task(&unused_inodes_flush_task);
  760 }
  761 
  762 int shrink_icache_memory(int priority, int gfp_mask)
  763 {
  764         int count = 0;
  765 
  766         /*
  767          * Nasty deadlock avoidance..
  768          *
  769          * We may hold various FS locks, and we don't
  770          * want to recurse into the FS that called us
  771          * in clear_inode() and friends..
  772          */
  773         if (!(gfp_mask & __GFP_FS))
  774                 return 0;
  775 
  776         count = inodes_stat.nr_unused / priority;
  777 
  778         prune_icache(count);
  779         return kmem_cache_shrink(inode_cachep);
  780 }
  781 
  782 /*
  783  * Called with the inode lock held.
  784  * NOTE: we are not increasing the inode-refcount, you must call __iget()
  785  * by hand after calling find_inode now! This simplifies iunique and won't
  786  * add any additional branch in the common code.
  787  */
  788 static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
  789 {
  790         struct list_head *tmp;
  791         struct inode * inode;
  792 
  793         tmp = head;
  794         for (;;) {
  795                 tmp = tmp->next;
  796                 inode = NULL;
  797                 if (tmp == head)
  798                         break;
  799                 inode = list_entry(tmp, struct inode, i_hash);
  800                 if (inode->i_ino != ino)
  801                         continue;
  802                 if (inode->i_sb != sb)
  803                         continue;
  804                 if (find_actor && !find_actor(inode, ino, opaque))
  805                         continue;
  806                 break;
  807         }
  808         return inode;
  809 }
  810 
  811 /**
  812  *      new_inode       - obtain an inode
  813  *      @sb: superblock
  814  *
  815  *      Allocates a new inode for given superblock.
  816  */
  817  
  818 struct inode * new_inode(struct super_block *sb)
  819 {
  820         static unsigned long last_ino;
  821         struct inode * inode;
  822 
  823         spin_lock_prefetch(&inode_lock);
  824         
  825         inode = alloc_inode(sb);
  826         if (inode) {
  827                 spin_lock(&inode_lock);
  828                 inodes_stat.nr_inodes++;
  829                 list_add(&inode->i_list, &inode_in_use);
  830                 inode->i_ino = ++last_ino;
  831                 inode->i_state = 0;
  832                 spin_unlock(&inode_lock);
  833         }
  834         return inode;
  835 }
  836 
  837 /*
  838  * This is called without the inode lock held.. Be careful.
  839  *
  840  * We no longer cache the sb_flags in i_flags - see fs.h
  841  *      -- rmk@arm.uk.linux.org
  842  */
  843 static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque)
  844 {
  845         struct inode * inode;
  846 
  847         inode = alloc_inode(sb);
  848         if (inode) {
  849                 struct inode * old;
  850 
  851                 spin_lock(&inode_lock);
  852                 /* We released the lock, so.. */
  853                 old = find_inode(sb, ino, head, find_actor, opaque);
  854                 if (!old) {
  855                         inodes_stat.nr_inodes++;
  856                         list_add(&inode->i_list, &inode_in_use);
  857                         list_add(&inode->i_hash, head);
  858                         inode->i_ino = ino;
  859                         inode->i_state = I_LOCK;
  860                         spin_unlock(&inode_lock);
  861 
  862                         /* reiserfs specific hack right here.  We don't
  863                         ** want this to last, and are looking for VFS changes
  864                         ** that will allow us to get rid of it.
  865                         ** -- mason@suse.com 
  866                         */
  867                         if (sb->s_op->read_inode2) {
  868                                 sb->s_op->read_inode2(inode, opaque) ;
  869                         } else {
  870                                 sb->s_op->read_inode(inode);
  871                         }
  872 
  873                         /*
  874                          * This is special!  We do not need the spinlock
  875                          * when clearing I_LOCK, because we're guaranteed
  876                          * that nobody else tries to do anything about the
  877                          * state of the inode when it is locked, as we
  878                          * just created it (so there can be no old holders
  879                          * that haven't tested I_LOCK).
  880                          */
  881                         inode->i_state &= ~I_LOCK;
  882                         wake_up(&inode->i_wait);
  883 
  884                         return inode;
  885                 }
  886 
  887                 /*
  888                  * Uhhuh, somebody else created the same inode under
  889                  * us. Use the old inode instead of the one we just
  890                  * allocated.
  891                  */
  892                 __iget(old);
  893                 spin_unlock(&inode_lock);
  894                 destroy_inode(inode);
  895                 inode = old;
  896                 wait_on_inode(inode);
  897         }
  898         return inode;
  899 }
  900 
  901 static inline unsigned long hash(struct super_block *sb, unsigned long i_ino)
  902 {
  903         unsigned long tmp = i_ino + ((unsigned long) sb / L1_CACHE_BYTES);
  904         tmp = tmp + (tmp >> I_HASHBITS);
  905         return tmp & I_HASHMASK;
  906 }
  907 
  908 /* Yeah, I know about quadratic hash. Maybe, later. */
  909 
  910 /**
  911  *      iunique - get a unique inode number
  912  *      @sb: superblock
  913  *      @max_reserved: highest reserved inode number
  914  *
  915  *      Obtain an inode number that is unique on the system for a given
  916  *      superblock. This is used by file systems that have no natural
  917  *      permanent inode numbering system. An inode number is returned that
  918  *      is higher than the reserved limit but unique.
  919  *
  920  *      BUGS:
  921  *      With a large number of inodes live on the file system this function
  922  *      currently becomes quite slow.
  923  */
  924  
  925 ino_t iunique(struct super_block *sb, ino_t max_reserved)
  926 {
  927         static ino_t counter = 0;
  928         struct inode *inode;
  929         struct list_head * head;
  930         ino_t res;
  931         spin_lock(&inode_lock);
  932 retry:
  933         if (counter > max_reserved) {
  934                 head = inode_hashtable + hash(sb,counter);
  935                 inode = find_inode(sb, res = counter++, head, NULL, NULL);
  936                 if (!inode) {
  937                         spin_unlock(&inode_lock);
  938                         return res;
  939                 }
  940         } else {
  941                 counter = max_reserved + 1;
  942         }
  943         goto retry;
  944         
  945 }
  946 
  947 struct inode *igrab(struct inode *inode)
  948 {
  949         spin_lock(&inode_lock);
  950         if (!(inode->i_state & I_FREEING))
  951                 __iget(inode);
  952         else
  953                 /*
  954                  * Handle the case where s_op->clear_inode is not been
  955                  * called yet, and somebody is calling igrab
  956                  * while the inode is getting freed.
  957                  */
  958                 inode = NULL;
  959         spin_unlock(&inode_lock);
  960         return inode;
  961 }
  962 
  963 
  964 struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque)
  965 {
  966         struct list_head * head = inode_hashtable + hash(sb,ino);
  967         struct inode * inode;
  968 
  969         spin_lock(&inode_lock);
  970         inode = find_inode(sb, ino, head, find_actor, opaque);
  971         if (inode) {
  972                 __iget(inode);
  973                 spin_unlock(&inode_lock);
  974                 wait_on_inode(inode);
  975                 return inode;
  976         }
  977         spin_unlock(&inode_lock);
  978 
  979         /*
  980          * get_new_inode() will do the right thing, re-trying the search
  981          * in case it had to block at any point.
  982          */
  983         return get_new_inode(sb, ino, head, find_actor, opaque);
  984 }
  985 
  986 /**
  987  *      insert_inode_hash - hash an inode
  988  *      @inode: unhashed inode
  989  *
  990  *      Add an inode to the inode hash for this superblock. If the inode
  991  *      has no superblock it is added to a separate anonymous chain.
  992  */
  993  
  994 void insert_inode_hash(struct inode *inode)
  995 {
  996         struct list_head *head = &anon_hash_chain;
  997         if (inode->i_sb)
  998                 head = inode_hashtable + hash(inode->i_sb, inode->i_ino);
  999         spin_lock(&inode_lock);
 1000         list_add(&inode->i_hash, head);
 1001         spin_unlock(&inode_lock);
 1002 }
 1003 
 1004 /**
 1005  *      remove_inode_hash - remove an inode from the hash
 1006  *      @inode: inode to unhash
 1007  *
 1008  *      Remove an inode from the superblock or anonymous hash.
 1009  */
 1010  
 1011 void remove_inode_hash(struct inode *inode)
 1012 {
 1013         spin_lock(&inode_lock);
 1014         list_del(&inode->i_hash);
 1015         INIT_LIST_HEAD(&inode->i_hash);
 1016         spin_unlock(&inode_lock);
 1017 }
 1018 
 1019 /**
 1020  *      iput    - put an inode 
 1021  *      @inode: inode to put
 1022  *
 1023  *      Puts an inode, dropping its usage count. If the inode use count hits
 1024  *      zero the inode is also then freed and may be destroyed.
 1025  */
 1026  
 1027 void iput(struct inode *inode)
 1028 {
 1029         if (inode) {
 1030                 struct super_block *sb = inode->i_sb;
 1031                 struct super_operations *op = NULL;
 1032 
 1033                 if (inode->i_state == I_CLEAR)
 1034                         BUG();
 1035 
 1036                 if (sb && sb->s_op)
 1037                         op = sb->s_op;
 1038                 if (op && op->put_inode)
 1039                         op->put_inode(inode);
 1040 
 1041                 if (!atomic_dec_and_lock(&inode->i_count, &inode_lock))
 1042                         return;
 1043 
 1044                 if (!inode->i_nlink) {
 1045                         list_del(&inode->i_hash);
 1046                         INIT_LIST_HEAD(&inode->i_hash);
 1047                         list_del(&inode->i_list);
 1048                         INIT_LIST_HEAD(&inode->i_list);
 1049                         inode->i_state|=I_FREEING;
 1050                         inodes_stat.nr_inodes--;
 1051                         spin_unlock(&inode_lock);
 1052 
 1053                         if (inode->i_data.nrpages)
 1054                                 truncate_inode_pages(&inode->i_data, 0);
 1055 
 1056                         if (op && op->delete_inode) {
 1057                                 void (*delete)(struct inode *) = op->delete_inode;
 1058                                 if (!is_bad_inode(inode))
 1059                                         DQUOT_INIT(inode);
 1060                                 /* s_op->delete_inode internally recalls clear_inode() */
 1061                                 delete(inode);
 1062                         } else
 1063                                 clear_inode(inode);
 1064                         if (inode->i_state != I_CLEAR)
 1065                                 BUG();
 1066                 } else {
 1067                         if (!list_empty(&inode->i_hash)) {
 1068                                 if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
 1069                                         list_del(&inode->i_list);
 1070                                         list_add(&inode->i_list, &inode_unused);
 1071                                 }
 1072                                 inodes_stat.nr_unused++;
 1073                                 spin_unlock(&inode_lock);
 1074                                 if (!sb || (sb->s_flags & MS_ACTIVE))
 1075                                         return;
 1076                                 write_inode_now(inode, 1);
 1077                                 spin_lock(&inode_lock);
 1078                                 inodes_stat.nr_unused--;
 1079                                 list_del_init(&inode->i_hash);
 1080                         }
 1081                         list_del_init(&inode->i_list);
 1082                         inode->i_state|=I_FREEING;
 1083                         inodes_stat.nr_inodes--;
 1084                         spin_unlock(&inode_lock);
 1085                         if (inode->i_data.nrpages)
 1086                                 truncate_inode_pages(&inode->i_data, 0);
 1087                         clear_inode(inode);
 1088                 }
 1089                 destroy_inode(inode);
 1090         }
 1091 }
 1092 
 1093 void force_delete(struct inode *inode)
 1094 {
 1095         /*
 1096          * Kill off unused inodes ... iput() will unhash and
 1097          * delete the inode if we set i_nlink to zero.
 1098          */
 1099         if (atomic_read(&inode->i_count) == 1)
 1100                 inode->i_nlink = 0;
 1101 }
 1102 
 1103 /**
 1104  *      bmap    - find a block number in a file
 1105  *      @inode: inode of file
 1106  *      @block: block to find
 1107  *
 1108  *      Returns the block number on the device holding the inode that
 1109  *      is the disk block number for the block of the file requested.
 1110  *      That is, asked for block 4 of inode 1 the function will return the
 1111  *      disk block relative to the disk start that holds that block of the 
 1112  *      file.
 1113  */
 1114  
 1115 int bmap(struct inode * inode, int block)
 1116 {
 1117         int res = 0;
 1118         if (inode->i_mapping->a_ops->bmap)
 1119                 res = inode->i_mapping->a_ops->bmap(inode->i_mapping, block);
 1120         return res;
 1121 }
 1122 
 1123 /*
 1124  * Initialize the hash tables.
 1125  */
 1126 void __init inode_init(unsigned long mempages)
 1127 {
 1128         struct list_head *head;
 1129         unsigned long order;
 1130         unsigned int nr_hash;
 1131         int i;
 1132 
 1133         mempages >>= (14 - PAGE_SHIFT);
 1134         mempages *= sizeof(struct list_head);
 1135         for (order = 0; ((1UL << order) << PAGE_SHIFT) < mempages; order++)
 1136                 ;
 1137 
 1138         do {
 1139                 unsigned long tmp;
 1140 
 1141                 nr_hash = (1UL << order) * PAGE_SIZE /
 1142                         sizeof(struct list_head);
 1143                 i_hash_mask = (nr_hash - 1);
 1144 
 1145                 tmp = nr_hash;
 1146                 i_hash_shift = 0;
 1147                 while ((tmp >>= 1UL) != 0UL)
 1148                         i_hash_shift++;
 1149 
 1150                 inode_hashtable = (struct list_head *)
 1151                         __get_free_pages(GFP_ATOMIC, order);
 1152         } while (inode_hashtable == NULL && --order >= 0);
 1153 
 1154         printk(KERN_INFO "Inode cache hash table entries: %d (order: %ld, %ld bytes)\n",
 1155                         nr_hash, order, (PAGE_SIZE << order));
 1156 
 1157         if (!inode_hashtable)
 1158                 panic("Failed to allocate inode hash table\n");
 1159 
 1160         head = inode_hashtable;
 1161         i = nr_hash;
 1162         do {
 1163                 INIT_LIST_HEAD(head);
 1164                 head++;
 1165                 i--;
 1166         } while (i);
 1167 
 1168         /* inode slab cache */
 1169         inode_cachep = kmem_cache_create("inode_cache", sizeof(struct inode),
 1170                                          0, SLAB_HWCACHE_ALIGN, init_once,
 1171                                          NULL);
 1172         if (!inode_cachep)
 1173                 panic("cannot create inode slab cache");
 1174 
 1175         unused_inodes_flush_task.routine = try_to_sync_unused_inodes;
 1176 }
 1177 
 1178 /**
 1179  *      update_atime    -       update the access time
 1180  *      @inode: inode accessed
 1181  *
 1182  *      Update the accessed time on an inode and mark it for writeback.
 1183  *      This function automatically handles read only file systems and media,
 1184  *      as well as the "noatime" flag and inode specific "noatime" markers.
 1185  */
 1186  
 1187 void update_atime (struct inode *inode)
 1188 {
 1189         if (inode->i_atime == CURRENT_TIME)
 1190                 return;
 1191         if (IS_NOATIME(inode))
 1192                 return;
 1193         if (IS_NODIRATIME(inode) && S_ISDIR(inode->i_mode)) 
 1194                 return;
 1195         if (IS_RDONLY(inode)) 
 1196                 return;
 1197         inode->i_atime = CURRENT_TIME;
 1198         mark_inode_dirty_sync (inode);
 1199 }
 1200 
 1201 /**
 1202  *      update_mctime   -       update the mtime and ctime
 1203  *      @inode: inode accessed
 1204  *
 1205  *      Update the modified and changed times on an inode for writes to special
 1206  *      files such as fifos.  No change is forced if the timestamps are already
 1207  *      up-to-date or if the filesystem is readonly.
 1208  */
 1209  
 1210 void update_mctime (struct inode *inode)
 1211 {
 1212         if (inode->i_mtime == CURRENT_TIME && inode->i_ctime == CURRENT_TIME)
 1213                 return;
 1214         if (IS_RDONLY(inode))
 1215                 return;
 1216         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
 1217         mark_inode_dirty (inode);
 1218 }
 1219 
 1220 
 1221 /*
 1222  *      Quota functions that want to walk the inode lists..
 1223  */
 1224 #ifdef CONFIG_QUOTA
 1225 
 1226 /* Functions back in dquot.c */
 1227 void put_dquot_list(struct list_head *);
 1228 int remove_inode_dquot_ref(struct inode *, short, struct list_head *);
 1229 
 1230 void remove_dquot_ref(struct super_block *sb, short type)
 1231 {
 1232         struct inode *inode;
 1233         struct list_head *act_head;
 1234         LIST_HEAD(tofree_head);
 1235 
 1236         if (!sb->dq_op)
 1237                 return; /* nothing to do */
 1238         /* We have to be protected against other CPUs */
 1239         lock_kernel();          /* This lock is for quota code */
 1240         spin_lock(&inode_lock); /* This lock is for inodes code */
 1241  
 1242         list_for_each(act_head, &inode_in_use) {
 1243                 inode = list_entry(act_head, struct inode, i_list);
 1244                 if (inode->i_sb == sb && IS_QUOTAINIT(inode))
 1245                         remove_inode_dquot_ref(inode, type, &tofree_head);
 1246         }
 1247         list_for_each(act_head, &inode_unused) {
 1248                 inode = list_entry(act_head, struct inode, i_list);
 1249                 if (inode->i_sb == sb && IS_QUOTAINIT(inode))
 1250                         remove_inode_dquot_ref(inode, type, &tofree_head);
 1251         }
 1252         list_for_each(act_head, &sb->s_dirty) {
 1253                 inode = list_entry(act_head, struct inode, i_list);
 1254                 if (IS_QUOTAINIT(inode))
 1255                         remove_inode_dquot_ref(inode, type, &tofree_head);
 1256         }
 1257         list_for_each(act_head, &sb->s_locked_inodes) {
 1258                 inode = list_entry(act_head, struct inode, i_list);
 1259                 if (IS_QUOTAINIT(inode))
 1260                         remove_inode_dquot_ref(inode, type, &tofree_head);
 1261         }
 1262         spin_unlock(&inode_lock);
 1263         unlock_kernel();
 1264 
 1265         put_dquot_list(&tofree_head);
 1266 }
 1267 
 1268 #endif

Cache object: 3cf0829203b1cb9c22637c6e9d157a07


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.