The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/mm/shmem.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Resizable virtual memory filesystem for Linux.
    3  *
    4  * Copyright (C) 2000 Linus Torvalds.
    5  *               2000 Transmeta Corp.
    6  *               2000-2001 Christoph Rohland
    7  *               2000-2001 SAP AG
    8  *               2002 Red Hat Inc.
    9  * Copyright (C) 2002-2003 Hugh Dickins.
   10  * Copyright (C) 2002-2003 VERITAS Software Corporation.
   11  *
   12  * This file is released under the GPL.
   13  */
   14 
   15 /*
   16  * This virtual memory filesystem is heavily based on the ramfs. It
   17  * extends ramfs by the ability to use swap and honor resource limits
   18  * which makes it a completely usable filesystem.
   19  */
   20 
   21 #include <linux/config.h>
   22 #include <linux/module.h>
   23 #include <linux/init.h>
   24 #include <linux/devfs_fs_kernel.h>
   25 #include <linux/fs.h>
   26 #include <linux/mm.h>
   27 #include <linux/file.h>
   28 #include <linux/swap.h>
   29 #include <linux/pagemap.h>
   30 #include <linux/string.h>
   31 #include <linux/locks.h>
   32 #include <linux/smp_lock.h>
   33 
   34 #include <asm/uaccess.h>
   35 #include <asm/div64.h>
   36 
   37 /* This magic number is used in glibc for posix shared memory */
   38 #define TMPFS_MAGIC     0x01021994
   39 
   40 #define ENTRIES_PER_PAGE (PAGE_CACHE_SIZE/sizeof(unsigned long))
   41 #define ENTRIES_PER_PAGEPAGE (ENTRIES_PER_PAGE*ENTRIES_PER_PAGE)
   42 #define BLOCKS_PER_PAGE  (PAGE_CACHE_SIZE/512)
   43 
   44 #define SHMEM_MAX_INDEX  (SHMEM_NR_DIRECT + (ENTRIES_PER_PAGEPAGE/2) * (ENTRIES_PER_PAGE+1))
   45 #define SHMEM_MAX_BYTES  ((unsigned long long)SHMEM_MAX_INDEX << PAGE_CACHE_SHIFT)
   46 
   47 #define VM_ACCT(size)    (PAGE_CACHE_ALIGN(size) >> PAGE_SHIFT)
   48 
   49 /* Pretend that each entry is of this size in directory's i_size */
   50 #define BOGO_DIRENT_SIZE 20
   51 
   52 #define SHMEM_SB(sb) (&sb->u.shmem_sb)
   53 
   54 /* Flag allocation requirements to shmem_getpage and shmem_swp_alloc */
   55 enum sgp_type {
   56         SGP_READ,       /* don't exceed i_size, don't allocate page */
   57         SGP_CACHE,      /* don't exceed i_size, may allocate page */
   58         SGP_WRITE,      /* may exceed i_size, may allocate page */
   59 };
   60 
   61 static int shmem_getpage(struct inode *inode, unsigned long idx,
   62                          struct page **pagep, enum sgp_type sgp);
   63 
   64 static struct super_operations shmem_ops;
   65 static struct address_space_operations shmem_aops;
   66 static struct file_operations shmem_file_operations;
   67 static struct inode_operations shmem_inode_operations;
   68 static struct inode_operations shmem_dir_inode_operations;
   69 static struct vm_operations_struct shmem_vm_ops;
   70 
   71 LIST_HEAD(shmem_inodes);
   72 static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
   73 
   74 static void shmem_free_block(struct inode *inode)
   75 {
   76         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
   77         spin_lock(&sbinfo->stat_lock);
   78         sbinfo->free_blocks++;
   79         inode->i_blocks -= BLOCKS_PER_PAGE;
   80         spin_unlock(&sbinfo->stat_lock);
   81 }
   82 
   83 static void shmem_removepage(struct page *page)
   84 {
   85         if (!PageLaunder(page))
   86                 shmem_free_block(page->mapping->host);
   87 }
   88 
   89 /*
   90  * shmem_swp_entry - find the swap vector position in the info structure
   91  *
   92  * @info:  info structure for the inode
   93  * @index: index of the page to find
   94  * @page:  optional page to add to the structure. Has to be preset to
   95  *         all zeros
   96  *
   97  * If there is no space allocated yet it will return NULL when
   98  * page is 0, else it will use the page for the needed block,
   99  * setting it to 0 on return to indicate that it has been used.
  100  *
  101  * The swap vector is organized the following way:
  102  *
  103  * There are SHMEM_NR_DIRECT entries directly stored in the
  104  * shmem_inode_info structure. So small files do not need an addional
  105  * allocation.
  106  *
  107  * For pages with index > SHMEM_NR_DIRECT there is the pointer
  108  * i_indirect which points to a page which holds in the first half
  109  * doubly indirect blocks, in the second half triple indirect blocks:
  110  *
  111  * For an artificial ENTRIES_PER_PAGE = 4 this would lead to the
  112  * following layout (for SHMEM_NR_DIRECT == 16):
  113  *
  114  * i_indirect -> dir --> 16-19
  115  *            |      +-> 20-23
  116  *            |
  117  *            +-->dir2 --> 24-27
  118  *            |        +-> 28-31
  119  *            |        +-> 32-35
  120  *            |        +-> 36-39
  121  *            |
  122  *            +-->dir3 --> 40-43
  123  *                     +-> 44-47
  124  *                     +-> 48-51
  125  *                     +-> 52-55
  126  */
  127 static swp_entry_t *shmem_swp_entry(struct shmem_inode_info *info, unsigned long index, unsigned long *page)
  128 {
  129         unsigned long offset;
  130         void **dir;
  131 
  132         if (index < SHMEM_NR_DIRECT)
  133                 return info->i_direct+index;
  134         if (!info->i_indirect) {
  135                 if (page) {
  136                         info->i_indirect = (void **) *page;
  137                         *page = 0;
  138                 }
  139                 return NULL;                    /* need another page */
  140         }
  141 
  142         index -= SHMEM_NR_DIRECT;
  143         offset = index % ENTRIES_PER_PAGE;
  144         index /= ENTRIES_PER_PAGE;
  145         dir = info->i_indirect;
  146 
  147         if (index >= ENTRIES_PER_PAGE/2) {
  148                 index -= ENTRIES_PER_PAGE/2;
  149                 dir += ENTRIES_PER_PAGE/2 + index/ENTRIES_PER_PAGE;
  150                 index %= ENTRIES_PER_PAGE;
  151                 if (!*dir) {
  152                         if (page) {
  153                                 *dir = (void *) *page;
  154                                 *page = 0;
  155                         }
  156                         return NULL;            /* need another page */
  157                 }
  158                 dir = (void **) *dir;
  159         }
  160 
  161         dir += index;
  162         if (!*dir) {
  163                 if (!page || !*page)
  164                         return NULL;            /* need a page */
  165                 *dir = (void *) *page;
  166                 *page = 0;
  167         }
  168         return (swp_entry_t *) *dir + offset;
  169 }
  170 
  171 /*
  172  * shmem_swp_alloc - get the position of the swap entry for the page.
  173  *                   If it does not exist allocate the entry.
  174  *
  175  * @info:       info structure for the inode
  176  * @index:      index of the page to find
  177  * @sgp:        check and recheck i_size? skip allocation?
  178  */
  179 static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long index, enum sgp_type sgp)
  180 {
  181         struct inode *inode = info->inode;
  182         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  183         unsigned long page = 0;
  184         swp_entry_t *entry;
  185         static const swp_entry_t unswapped = {0};
  186 
  187         if (sgp != SGP_WRITE &&
  188             ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size)
  189                 return ERR_PTR(-EINVAL);
  190 
  191         while (!(entry = shmem_swp_entry(info, index, &page))) {
  192                 if (sgp == SGP_READ)
  193                         return (swp_entry_t *) &unswapped;
  194                 /*
  195                  * Test free_blocks against 1 not 0, since we have 1 data
  196                  * page (and perhaps indirect index pages) yet to allocate:
  197                  * a waste to allocate index if we cannot allocate data.
  198                  */
  199                 spin_lock(&sbinfo->stat_lock);
  200                 if (sbinfo->free_blocks <= 1) {
  201                         spin_unlock(&sbinfo->stat_lock);
  202                         return ERR_PTR(-ENOSPC);
  203                 }
  204                 sbinfo->free_blocks--;
  205                 inode->i_blocks += BLOCKS_PER_PAGE;
  206                 spin_unlock(&sbinfo->stat_lock);
  207 
  208                 spin_unlock(&info->lock);
  209                 page = get_zeroed_page(GFP_USER);
  210                 spin_lock(&info->lock);
  211 
  212                 if (!page) {
  213                         shmem_free_block(inode);
  214                         return ERR_PTR(-ENOMEM);
  215                 }
  216                 if (sgp != SGP_WRITE &&
  217                     ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) {
  218                         entry = ERR_PTR(-EINVAL);
  219                         break;
  220                 }
  221                 if (info->next_index <= index)
  222                         info->next_index = index + 1;
  223         }
  224         if (page) {
  225                 /* another task gave its page, or truncated the file */
  226                 shmem_free_block(inode);
  227                 free_page(page);
  228         }
  229         if (info->next_index <= index && !IS_ERR(entry))
  230                 info->next_index = index + 1;
  231         return entry;
  232 }
  233 
  234 /*
  235  * shmem_free_swp - free some swap entries in a directory
  236  *
  237  * @dir:   pointer to the directory
  238  * @edir:  pointer after last entry of the directory
  239  */
  240 static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir)
  241 {
  242         swp_entry_t *ptr;
  243         int freed = 0;
  244 
  245         for (ptr = dir; ptr < edir; ptr++) {
  246                 if (ptr->val) {
  247                         free_swap_and_cache(*ptr);
  248                         *ptr = (swp_entry_t){0};
  249                         freed++;
  250                 }
  251         }
  252         return freed;
  253 }
  254 
  255 /*
  256  * shmem_truncate_direct - free the swap entries of a whole doubly
  257  *                         indirect block
  258  *
  259  * @info:       the info structure of the inode
  260  * @dir:        pointer to the pointer to the block
  261  * @start:      offset to start from (in pages)
  262  * @len:        how many pages are stored in this block
  263  */
  264 static inline unsigned long
  265 shmem_truncate_direct(struct shmem_inode_info *info, swp_entry_t ***dir, unsigned long start, unsigned long len)
  266 {
  267         swp_entry_t **last, **ptr;
  268         unsigned long off, freed_swp, freed = 0;
  269 
  270         last = *dir + (len + ENTRIES_PER_PAGE - 1) / ENTRIES_PER_PAGE;
  271         off = start % ENTRIES_PER_PAGE;
  272 
  273         for (ptr = *dir + start/ENTRIES_PER_PAGE; ptr < last; ptr++, off = 0) {
  274                 if (!*ptr)
  275                         continue;
  276 
  277                 if (info->swapped) {
  278                         freed_swp = shmem_free_swp(*ptr + off,
  279                                                 *ptr + ENTRIES_PER_PAGE);
  280                         info->swapped -= freed_swp;
  281                         freed += freed_swp;
  282                 }
  283 
  284                 if (!off) {
  285                         freed++;
  286                         free_page((unsigned long) *ptr);
  287                         *ptr = 0;
  288                 }
  289         }
  290 
  291         if (!start) {
  292                 freed++;
  293                 free_page((unsigned long) *dir);
  294                 *dir = 0;
  295         }
  296         return freed;
  297 }
  298 
  299 /*
  300  * shmem_truncate_indirect - truncate an inode
  301  *
  302  * @info:  the info structure of the inode
  303  * @index: the index to truncate
  304  *
  305  * This function locates the last doubly indirect block and calls
  306  * then shmem_truncate_direct to do the real work
  307  */
  308 static inline unsigned long
  309 shmem_truncate_indirect(struct shmem_inode_info *info, unsigned long index)
  310 {
  311         swp_entry_t ***base;
  312         unsigned long baseidx, start;
  313         unsigned long len = info->next_index;
  314         unsigned long freed;
  315 
  316         if (len <= SHMEM_NR_DIRECT) {
  317                 info->next_index = index;
  318                 if (!info->swapped)
  319                         return 0;
  320                 freed = shmem_free_swp(info->i_direct + index,
  321                                         info->i_direct + len);
  322                 info->swapped -= freed;
  323                 return freed;
  324         }
  325 
  326         if (len <= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT) {
  327                 len -= SHMEM_NR_DIRECT;
  328                 base = (swp_entry_t ***) &info->i_indirect;
  329                 baseidx = SHMEM_NR_DIRECT;
  330         } else {
  331                 len -= ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT;
  332                 BUG_ON(len > ENTRIES_PER_PAGEPAGE*ENTRIES_PER_PAGE/2);
  333                 baseidx = len - 1;
  334                 baseidx -= baseidx % ENTRIES_PER_PAGEPAGE;
  335                 base = (swp_entry_t ***) info->i_indirect +
  336                         ENTRIES_PER_PAGE/2 + baseidx/ENTRIES_PER_PAGEPAGE;
  337                 len -= baseidx;
  338                 baseidx += ENTRIES_PER_PAGEPAGE/2 + SHMEM_NR_DIRECT;
  339         }
  340 
  341         if (index > baseidx) {
  342                 info->next_index = index;
  343                 start = index - baseidx;
  344         } else {
  345                 info->next_index = baseidx;
  346                 start = 0;
  347         }
  348         return *base? shmem_truncate_direct(info, base, start, len): 0;
  349 }
  350 
  351 static void shmem_truncate(struct inode *inode)
  352 {
  353         struct shmem_inode_info *info = SHMEM_I(inode);
  354         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  355         unsigned long freed = 0;
  356         unsigned long index;
  357 
  358         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
  359         index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
  360         if (index >= info->next_index)
  361                 return;
  362 
  363         spin_lock(&info->lock);
  364         while (index < info->next_index)
  365                 freed += shmem_truncate_indirect(info, index);
  366         BUG_ON(info->swapped > info->next_index);
  367         spin_unlock(&info->lock);
  368 
  369         spin_lock(&sbinfo->stat_lock);
  370         sbinfo->free_blocks += freed;
  371         inode->i_blocks -= freed*BLOCKS_PER_PAGE;
  372         spin_unlock(&sbinfo->stat_lock);
  373 }
  374 
  375 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
  376 {
  377         struct inode *inode = dentry->d_inode;
  378         struct page *page = NULL;
  379         int error;
  380 
  381         if (attr->ia_valid & ATTR_SIZE) {
  382                 if (attr->ia_size < inode->i_size) {
  383                         /*
  384                          * If truncating down to a partial page, then
  385                          * if that page is already allocated, hold it
  386                          * in memory until the truncation is over, so
  387                          * truncate_partial_page cannnot miss it were
  388                          * it assigned to swap.
  389                          */
  390                         if (attr->ia_size & (PAGE_CACHE_SIZE-1)) {
  391                                 (void) shmem_getpage(inode,
  392                                         attr->ia_size>>PAGE_CACHE_SHIFT,
  393                                                 &page, SGP_READ);
  394                         }
  395                 }
  396         }
  397 
  398         error = inode_change_ok(inode, attr);
  399         if (!error)
  400                 error = inode_setattr(inode, attr);
  401         if (page)
  402                 page_cache_release(page);
  403         return error;
  404 }
  405 
  406 static void shmem_delete_inode(struct inode *inode)
  407 {
  408         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  409         struct shmem_inode_info *info = SHMEM_I(inode);
  410 
  411         if (inode->i_op->truncate == shmem_truncate) {
  412                 spin_lock(&shmem_ilock);
  413                 list_del(&info->list);
  414                 spin_unlock(&shmem_ilock);
  415                 inode->i_size = 0;
  416                 shmem_truncate(inode);
  417         }
  418         BUG_ON(inode->i_blocks);
  419         spin_lock(&sbinfo->stat_lock);
  420         sbinfo->free_inodes++;
  421         spin_unlock(&sbinfo->stat_lock);
  422         clear_inode(inode);
  423 }
  424 
  425 static inline int shmem_find_swp(swp_entry_t entry, swp_entry_t *dir, swp_entry_t *edir)
  426 {
  427         swp_entry_t *ptr;
  428 
  429         for (ptr = dir; ptr < edir; ptr++) {
  430                 if (ptr->val == entry.val)
  431                         return ptr - dir;
  432         }
  433         return -1;
  434 }
  435 
  436 static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, struct page *page)
  437 {
  438         struct inode *inode;
  439         struct address_space *mapping;
  440         swp_entry_t *ptr;
  441         unsigned long idx;
  442         unsigned long limit;
  443         int offset;
  444 
  445         idx = 0;
  446         ptr = info->i_direct;
  447         spin_lock(&info->lock);
  448         offset = info->next_index;
  449         if (offset > SHMEM_NR_DIRECT)
  450                 offset = SHMEM_NR_DIRECT;
  451         offset = shmem_find_swp(entry, ptr, ptr + offset);
  452         if (offset >= 0)
  453                 goto found;
  454 
  455         for (idx = SHMEM_NR_DIRECT; idx < info->next_index;
  456              idx += ENTRIES_PER_PAGE) {
  457                 ptr = shmem_swp_entry(info, idx, NULL);
  458                 if (!ptr)
  459                         continue;
  460                 offset = info->next_index - idx;
  461                 if (offset > ENTRIES_PER_PAGE)
  462                         offset = ENTRIES_PER_PAGE;
  463                 offset = shmem_find_swp(entry, ptr, ptr + offset);
  464                 if (offset >= 0)
  465                         goto found;
  466         }
  467         spin_unlock(&info->lock);
  468         return 0;
  469 found:
  470         idx += offset;
  471         inode = info->inode;
  472         mapping = inode->i_mapping;
  473         delete_from_swap_cache(page);
  474 
  475         /* Racing against delete or truncate? Must leave out of page cache */
  476         limit = (inode->i_state & I_FREEING)? 0:
  477                 (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
  478 
  479         if (idx >= limit || add_to_page_cache_unique(page,
  480                         mapping, idx, page_hash(mapping, idx)) == 0) {
  481                 ptr[offset].val = 0;
  482                 info->swapped--;
  483         } else if (add_to_swap_cache(page, entry) != 0)
  484                 BUG();
  485         spin_unlock(&info->lock);
  486         SetPageUptodate(page);
  487         /*
  488          * Decrement swap count even when the entry is left behind:
  489          * try_to_unuse will skip over mms, then reincrement count.
  490          */
  491         swap_free(entry);
  492         return 1;
  493 }
  494 
  495 /*
  496  * shmem_unuse() search for an eventually swapped out shmem page.
  497  */
  498 int shmem_unuse(swp_entry_t entry, struct page *page)
  499 {
  500         struct list_head *p;
  501         struct shmem_inode_info *info;
  502         int found = 0;
  503 
  504         spin_lock(&shmem_ilock);
  505         list_for_each(p, &shmem_inodes) {
  506                 info = list_entry(p, struct shmem_inode_info, list);
  507 
  508                 if (info->swapped && shmem_unuse_inode(info, entry, page)) {
  509                         /* move head to start search for next from here */
  510                         list_move_tail(&shmem_inodes, &info->list);
  511                         found = 1;
  512                         break;
  513                 }
  514         }
  515         spin_unlock(&shmem_ilock);
  516         return found;
  517 }
  518 
  519 /*
  520  * Move the page from the page cache to the swap cache.
  521  */
  522 static int shmem_writepage(struct page *page)
  523 {
  524         struct shmem_inode_info *info;
  525         swp_entry_t *entry, swap;
  526         struct address_space *mapping;
  527         unsigned long index;
  528         struct inode *inode;
  529 
  530         BUG_ON(!PageLocked(page));
  531         if (!PageLaunder(page))
  532                 return fail_writepage(page);
  533 
  534         mapping = page->mapping;
  535         index = page->index;
  536         inode = mapping->host;
  537         info = SHMEM_I(inode);
  538         if (info->flags & VM_LOCKED)
  539                 return fail_writepage(page);
  540 getswap:
  541         swap = get_swap_page();
  542         if (!swap.val)
  543                 return fail_writepage(page);
  544 
  545         spin_lock(&info->lock);
  546         BUG_ON(index >= info->next_index);
  547         entry = shmem_swp_entry(info, index, NULL);
  548         BUG_ON(!entry);
  549         BUG_ON(entry->val);
  550 
  551         /* Remove it from the page cache */
  552         remove_inode_page(page);
  553         page_cache_release(page);
  554 
  555         /* Add it to the swap cache */
  556         if (add_to_swap_cache(page, swap) != 0) {
  557                 /*
  558                  * Raced with "speculative" read_swap_cache_async.
  559                  * Add page back to page cache, unref swap, try again.
  560                  */
  561                 add_to_page_cache_locked(page, mapping, index);
  562                 spin_unlock(&info->lock);
  563                 swap_free(swap);
  564                 goto getswap;
  565         }
  566 
  567         *entry = swap;
  568         info->swapped++;
  569         spin_unlock(&info->lock);
  570         SetPageUptodate(page);
  571         set_page_dirty(page);
  572         UnlockPage(page);
  573         return 0;
  574 }
  575 
  576 /*
  577  * shmem_getpage - either get the page from swap or allocate a new one
  578  *
  579  * If we allocate a new one we do not mark it dirty. That's up to the
  580  * vm. If we swap it in we mark it dirty since we also free the swap
  581  * entry since a page cannot live in both the swap and page cache
  582  */
  583 static int shmem_getpage(struct inode *inode, unsigned long idx, struct page **pagep, enum sgp_type sgp)
  584 {
  585         struct address_space *mapping = inode->i_mapping;
  586         struct shmem_inode_info *info = SHMEM_I(inode);
  587         struct shmem_sb_info *sbinfo;
  588         struct page *filepage = *pagep;
  589         struct page *swappage;
  590         swp_entry_t *entry;
  591         swp_entry_t swap;
  592         int error = 0;
  593 
  594         if (idx >= SHMEM_MAX_INDEX)
  595                 return -EFBIG;
  596         /*
  597          * Normally, filepage is NULL on entry, and either found
  598          * uptodate immediately, or allocated and zeroed, or read
  599          * in under swappage, which is then assigned to filepage.
  600          * But shmem_readpage and shmem_prepare_write pass in a locked
  601          * filepage, which may be found not uptodate by other callers
  602          * too, and may need to be copied from the swappage read in.
  603          */
  604 repeat:
  605         if (!filepage)
  606                 filepage = find_lock_page(mapping, idx);
  607         if (filepage && Page_Uptodate(filepage))
  608                 goto done;
  609 
  610         spin_lock(&info->lock);
  611         entry = shmem_swp_alloc(info, idx, sgp);
  612         if (IS_ERR(entry)) {
  613                 spin_unlock(&info->lock);
  614                 error = PTR_ERR(entry);
  615                 goto failed;
  616         }
  617         swap = *entry;
  618 
  619         if (swap.val) {
  620                 /* Look it up and read it in.. */
  621                 swappage = lookup_swap_cache(swap);
  622                 if (!swappage) {
  623                         spin_unlock(&info->lock);
  624                         swapin_readahead(swap);
  625                         swappage = read_swap_cache_async(swap);
  626                         if (!swappage) {
  627                                 spin_lock(&info->lock);
  628                                 entry = shmem_swp_alloc(info, idx, sgp);
  629                                 if (IS_ERR(entry))
  630                                         error = PTR_ERR(entry);
  631                                 else if (entry->val == swap.val)
  632                                         error = -ENOMEM;
  633                                 spin_unlock(&info->lock);
  634                                 if (error)
  635                                         goto failed;
  636                                 goto repeat;
  637                         }
  638                         wait_on_page(swappage);
  639                         page_cache_release(swappage);
  640                         goto repeat;
  641                 }
  642 
  643                 /* We have to do this with page locked to prevent races */
  644                 if (TryLockPage(swappage)) {
  645                         spin_unlock(&info->lock);
  646                         wait_on_page(swappage);
  647                         page_cache_release(swappage);
  648                         goto repeat;
  649                 }
  650                 if (!Page_Uptodate(swappage)) {
  651                         spin_unlock(&info->lock);
  652                         UnlockPage(swappage);
  653                         page_cache_release(swappage);
  654                         error = -EIO;
  655                         goto failed;
  656                 }
  657 
  658                 delete_from_swap_cache(swappage);
  659                 if (filepage) {
  660                         entry->val = 0;
  661                         info->swapped--;
  662                         spin_unlock(&info->lock);
  663                         flush_page_to_ram(swappage);
  664                         copy_highpage(filepage, swappage);
  665                         UnlockPage(swappage);
  666                         page_cache_release(swappage);
  667                         flush_dcache_page(filepage);
  668                         SetPageUptodate(filepage);
  669                         SetPageDirty(filepage);
  670                         swap_free(swap);
  671                 } else if (add_to_page_cache_unique(swappage,
  672                         mapping, idx, page_hash(mapping, idx)) == 0) {
  673                         entry->val = 0;
  674                         info->swapped--;
  675                         spin_unlock(&info->lock);
  676                         filepage = swappage;
  677                         SetPageUptodate(filepage);
  678                         SetPageDirty(filepage);
  679                         swap_free(swap);
  680                 } else {
  681                         if (add_to_swap_cache(swappage, swap) != 0)
  682                                 BUG();
  683                         spin_unlock(&info->lock);
  684                         SetPageUptodate(swappage);
  685                         SetPageDirty(swappage);
  686                         UnlockPage(swappage);
  687                         page_cache_release(swappage);
  688                         goto repeat;
  689                 }
  690         } else if (sgp == SGP_READ && !filepage) {
  691                 filepage = find_get_page(mapping, idx);
  692                 if (filepage &&
  693                     (!Page_Uptodate(filepage) || TryLockPage(filepage))) {
  694                         spin_unlock(&info->lock);
  695                         wait_on_page(filepage);
  696                         page_cache_release(filepage);
  697                         filepage = NULL;
  698                         goto repeat;
  699                 }
  700                 spin_unlock(&info->lock);
  701         } else {
  702                 sbinfo = SHMEM_SB(inode->i_sb);
  703                 spin_lock(&sbinfo->stat_lock);
  704                 if (sbinfo->free_blocks == 0) {
  705                         spin_unlock(&sbinfo->stat_lock);
  706                         spin_unlock(&info->lock);
  707                         error = -ENOSPC;
  708                         goto failed;
  709                 }
  710                 sbinfo->free_blocks--;
  711                 inode->i_blocks += BLOCKS_PER_PAGE;
  712                 spin_unlock(&sbinfo->stat_lock);
  713 
  714                 if (!filepage) {
  715                         spin_unlock(&info->lock);
  716                         filepage = page_cache_alloc(mapping);
  717                         if (!filepage) {
  718                                 shmem_free_block(inode);
  719                                 error = -ENOMEM;
  720                                 goto failed;
  721                         }
  722 
  723                         spin_lock(&info->lock);
  724                         entry = shmem_swp_alloc(info, idx, sgp);
  725                         if (IS_ERR(entry))
  726                                 error = PTR_ERR(entry);
  727                         if (error || entry->val ||
  728                             add_to_page_cache_unique(filepage,
  729                             mapping, idx, page_hash(mapping, idx)) != 0) {
  730                                 spin_unlock(&info->lock);
  731                                 page_cache_release(filepage);
  732                                 shmem_free_block(inode);
  733                                 filepage = NULL;
  734                                 if (error)
  735                                         goto failed;
  736                                 goto repeat;
  737                         }
  738                 }
  739 
  740                 spin_unlock(&info->lock);
  741                 clear_highpage(filepage);
  742                 flush_dcache_page(filepage);
  743                 SetPageUptodate(filepage);
  744         }
  745 done:
  746         if (!*pagep) {
  747                 if (filepage) {
  748                         UnlockPage(filepage);
  749                         *pagep = filepage;
  750                 } else
  751                         *pagep = ZERO_PAGE(0);
  752         }
  753         return 0;
  754 
  755 failed:
  756         if (*pagep != filepage) {
  757                 UnlockPage(filepage);
  758                 page_cache_release(filepage);
  759         }
  760         return error;
  761 }
  762 
  763 struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int unused)
  764 {
  765         struct inode *inode = vma->vm_file->f_dentry->d_inode;
  766         struct page *page = NULL;
  767         unsigned long idx;
  768         int error;
  769 
  770         idx = (address - vma->vm_start) >> PAGE_SHIFT;
  771         idx += vma->vm_pgoff;
  772         idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
  773 
  774         error = shmem_getpage(inode, idx, &page, SGP_CACHE);
  775         if (error)
  776                 return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
  777 
  778         mark_page_accessed(page);
  779         flush_page_to_ram(page);
  780         return page;
  781 }
  782 
  783 void shmem_lock(struct file *file, int lock)
  784 {
  785         struct inode *inode = file->f_dentry->d_inode;
  786         struct shmem_inode_info *info = SHMEM_I(inode);
  787 
  788         spin_lock(&info->lock);
  789         if (lock)
  790                 info->flags |= VM_LOCKED;
  791         else
  792                 info->flags &= ~VM_LOCKED;
  793         spin_unlock(&info->lock);
  794 }
  795 
  796 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
  797 {
  798         struct vm_operations_struct *ops;
  799         struct inode *inode = file->f_dentry->d_inode;
  800 
  801         ops = &shmem_vm_ops;
  802         if (!S_ISREG(inode->i_mode))
  803                 return -EACCES;
  804         UPDATE_ATIME(inode);
  805         vma->vm_ops = ops;
  806         return 0;
  807 }
  808 
  809 static struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
  810 {
  811         struct inode *inode;
  812         struct shmem_inode_info *info;
  813         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
  814 
  815         spin_lock(&sbinfo->stat_lock);
  816         if (!sbinfo->free_inodes) {
  817                 spin_unlock(&sbinfo->stat_lock);
  818                 return NULL;
  819         }
  820         sbinfo->free_inodes--;
  821         spin_unlock(&sbinfo->stat_lock);
  822 
  823         inode = new_inode(sb);
  824         if (inode) {
  825                 inode->i_mode = mode;
  826                 inode->i_uid = current->fsuid;
  827                 inode->i_gid = current->fsgid;
  828                 inode->i_blksize = PAGE_CACHE_SIZE;
  829                 inode->i_blocks = 0;
  830                 inode->i_rdev = NODEV;
  831                 inode->i_mapping->a_ops = &shmem_aops;
  832                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
  833                 info = SHMEM_I(inode);
  834                 info->inode = inode;
  835                 spin_lock_init(&info->lock);
  836                 switch (mode & S_IFMT) {
  837                 default:
  838                         init_special_inode(inode, mode, dev);
  839                         break;
  840                 case S_IFREG:
  841                         inode->i_op = &shmem_inode_operations;
  842                         inode->i_fop = &shmem_file_operations;
  843                         spin_lock(&shmem_ilock);
  844                         list_add_tail(&info->list, &shmem_inodes);
  845                         spin_unlock(&shmem_ilock);
  846                         break;
  847                 case S_IFDIR:
  848                         inode->i_nlink++;
  849                         /* Some things misbehave if size == 0 on a directory */
  850                         inode->i_size = 2 * BOGO_DIRENT_SIZE;
  851                         inode->i_op = &shmem_dir_inode_operations;
  852                         inode->i_fop = &dcache_dir_ops;
  853                         break;
  854                 case S_IFLNK:
  855                         break;
  856                 }
  857         }
  858         return inode;
  859 }
  860 
  861 static int shmem_set_size(struct shmem_sb_info *info,
  862                           unsigned long max_blocks, unsigned long max_inodes)
  863 {
  864         int error;
  865         unsigned long blocks, inodes;
  866 
  867         spin_lock(&info->stat_lock);
  868         blocks = info->max_blocks - info->free_blocks;
  869         inodes = info->max_inodes - info->free_inodes;
  870         error = -EINVAL;
  871         if (max_blocks < blocks)
  872                 goto out;
  873         if (max_inodes < inodes)
  874                 goto out;
  875         error = 0;
  876         info->max_blocks  = max_blocks;
  877         info->free_blocks = max_blocks - blocks;
  878         info->max_inodes  = max_inodes;
  879         info->free_inodes = max_inodes - inodes;
  880 out:
  881         spin_unlock(&info->stat_lock);
  882         return error;
  883 }
  884 
  885 #ifdef CONFIG_TMPFS
  886 
  887 static struct inode_operations shmem_symlink_inode_operations;
  888 static struct inode_operations shmem_symlink_inline_operations;
  889 
  890 /*
  891  * tmpfs itself makes no use of generic_file_read, generic_file_mmap
  892  * or generic_file_write; but shmem_readpage, shmem_prepare_write and
  893  * shmem_commit_write let a tmpfs file be used below the loop driver,
  894  * and shmem_readpage lets a tmpfs file be used by sendfile.
  895  */
  896 static int
  897 shmem_readpage(struct file *file, struct page *page)
  898 {
  899         struct inode *inode = page->mapping->host;
  900         int error = shmem_getpage(inode, page->index, &page, SGP_CACHE);
  901         UnlockPage(page);
  902         return error;
  903 }
  904 
  905 static int
  906 shmem_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
  907 {
  908         struct inode *inode = page->mapping->host;
  909         return shmem_getpage(inode, page->index, &page, SGP_WRITE);
  910 }
  911 
  912 static int
  913 shmem_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
  914 {
  915         struct inode *inode = page->mapping->host;
  916         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
  917 
  918         if (pos > inode->i_size)
  919                 inode->i_size = pos;
  920         SetPageDirty(page);
  921         return 0;
  922 }
  923 
  924 static ssize_t
  925 shmem_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
  926 {
  927         struct inode    *inode = file->f_dentry->d_inode;
  928         loff_t          pos;
  929         unsigned long   written;
  930         int             err;
  931 
  932         if ((ssize_t) count < 0)
  933                 return -EINVAL;
  934 
  935         if (!access_ok(VERIFY_READ, buf, count))
  936                 return -EFAULT;
  937 
  938         down(&inode->i_sem);
  939 
  940         pos = *ppos;
  941         written = 0;
  942 
  943         err = precheck_file_write(file, inode, &count, &pos);
  944         if (err || !count)
  945                 goto out;
  946 
  947         remove_suid(inode);
  948         inode->i_ctime = inode->i_mtime = CURRENT_TIME;
  949 
  950         do {
  951                 struct page *page = NULL;
  952                 unsigned long bytes, index, offset;
  953                 char *kaddr;
  954                 int left;
  955 
  956                 offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
  957                 index = pos >> PAGE_CACHE_SHIFT;
  958                 bytes = PAGE_CACHE_SIZE - offset;
  959                 if (bytes > count)
  960                         bytes = count;
  961 
  962                 /*
  963                  * We don't hold page lock across copy from user -
  964                  * what would it guard against? - so no deadlock here.
  965                  */
  966 
  967                 err = shmem_getpage(inode, index, &page, SGP_WRITE);
  968                 if (err)
  969                         break;
  970 
  971                 kaddr = kmap(page);
  972                 left = __copy_from_user(kaddr + offset, buf, bytes);
  973                 kunmap(page);
  974 
  975                 written += bytes;
  976                 count -= bytes;
  977                 pos += bytes;
  978                 buf += bytes;
  979                 if (pos > inode->i_size)
  980                         inode->i_size = pos;
  981 
  982                 flush_dcache_page(page);
  983                 SetPageDirty(page);
  984                 SetPageReferenced(page);
  985                 page_cache_release(page);
  986 
  987                 if (left) {
  988                         pos -= left;
  989                         written -= left;
  990                         err = -EFAULT;
  991                         break;
  992                 }
  993         } while (count);
  994 
  995         *ppos = pos;
  996         if (written)
  997                 err = written;
  998 out:
  999         up(&inode->i_sem);
 1000         return err;
 1001 }
 1002 
 1003 static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc)
 1004 {
 1005         struct inode *inode = filp->f_dentry->d_inode;
 1006         struct address_space *mapping = inode->i_mapping;
 1007         unsigned long index, offset;
 1008 
 1009         index = *ppos >> PAGE_CACHE_SHIFT;
 1010         offset = *ppos & ~PAGE_CACHE_MASK;
 1011 
 1012         for (;;) {
 1013                 struct page *page = NULL;
 1014                 unsigned long end_index, nr, ret;
 1015 
 1016                 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
 1017                 if (index > end_index)
 1018                         break;
 1019                 if (index == end_index) {
 1020                         nr = inode->i_size & ~PAGE_CACHE_MASK;
 1021                         if (nr <= offset)
 1022                                 break;
 1023                 }
 1024 
 1025                 desc->error = shmem_getpage(inode, index, &page, SGP_READ);
 1026                 if (desc->error) {
 1027                         if (desc->error == -EINVAL)
 1028                                 desc->error = 0;
 1029                         break;
 1030                 }
 1031 
 1032                 /*
 1033                  * We must evaluate after, since reads (unlike writes)
 1034                  * are called without i_sem protection against truncate
 1035                  */
 1036                 nr = PAGE_CACHE_SIZE;
 1037                 end_index = inode->i_size >> PAGE_CACHE_SHIFT;
 1038                 if (index == end_index) {
 1039                         nr = inode->i_size & ~PAGE_CACHE_MASK;
 1040                         if (nr <= offset) {
 1041                                 page_cache_release(page);
 1042                                 break;
 1043                         }
 1044                 }
 1045                 nr -= offset;
 1046 
 1047                 if (page != ZERO_PAGE(0)) {
 1048                         /*
 1049                          * If users can be writing to this page using arbitrary
 1050                          * virtual addresses, take care about potential aliasing
 1051                          * before reading the page on the kernel side.
 1052                          */
 1053                         if (mapping->i_mmap_shared != NULL)
 1054                                 flush_dcache_page(page);
 1055                         /*
 1056                          * Mark the page accessed if we read the
 1057                          * beginning or we just did an lseek.
 1058                          */
 1059                         if (!offset || !filp->f_reada)
 1060                                 mark_page_accessed(page);
 1061                 }
 1062 
 1063                 /*
 1064                  * Ok, we have the page, and it's up-to-date, so
 1065                  * now we can copy it to user space...
 1066                  *
 1067                  * The actor routine returns how many bytes were actually used..
 1068                  * NOTE! This may not be the same as how much of a user buffer
 1069                  * we filled up (we may be padding etc), so we can only update
 1070                  * "pos" here (the actor routine has to update the user buffer
 1071                  * pointers and the remaining count).
 1072                  */
 1073                 ret = file_read_actor(desc, page, offset, nr);
 1074                 offset += ret;
 1075                 index += offset >> PAGE_CACHE_SHIFT;
 1076                 offset &= ~PAGE_CACHE_MASK;
 1077 
 1078                 page_cache_release(page);
 1079                 if (ret != nr || !desc->count)
 1080                         break;
 1081         }
 1082 
 1083         *ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
 1084         filp->f_reada = 1;
 1085         UPDATE_ATIME(inode);
 1086 }
 1087 
 1088 static ssize_t shmem_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
 1089 {
 1090         read_descriptor_t desc;
 1091 
 1092         if ((ssize_t) count < 0)
 1093                 return -EINVAL;
 1094         if (!access_ok(VERIFY_WRITE, buf, count))
 1095                 return -EFAULT;
 1096         if (!count)
 1097                 return 0;
 1098 
 1099         desc.written = 0;
 1100         desc.count = count;
 1101         desc.buf = buf;
 1102         desc.error = 0;
 1103 
 1104         do_shmem_file_read(filp, ppos, &desc);
 1105         if (desc.written)
 1106                 return desc.written;
 1107         return desc.error;
 1108 }
 1109 
 1110 static int shmem_statfs(struct super_block *sb, struct statfs *buf)
 1111 {
 1112         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 1113 
 1114         buf->f_type = TMPFS_MAGIC;
 1115         buf->f_bsize = PAGE_CACHE_SIZE;
 1116         spin_lock(&sbinfo->stat_lock);
 1117         buf->f_blocks = sbinfo->max_blocks;
 1118         buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
 1119         buf->f_files = sbinfo->max_inodes;
 1120         buf->f_ffree = sbinfo->free_inodes;
 1121         spin_unlock(&sbinfo->stat_lock);
 1122         buf->f_namelen = NAME_MAX;
 1123         return 0;
 1124 }
 1125 
 1126 /*
 1127  * Lookup the data. This is trivial - if the dentry didn't already
 1128  * exist, we know it is negative.
 1129  */
 1130 static struct dentry *shmem_lookup(struct inode *dir, struct dentry *dentry)
 1131 {
 1132         d_add(dentry, NULL);
 1133         return NULL;
 1134 }
 1135 
 1136 /*
 1137  * File creation. Allocate an inode, and we're done..
 1138  */
 1139 static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
 1140 {
 1141         struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
 1142         int error = -ENOSPC;
 1143 
 1144         if (inode) {
 1145                 dir->i_size += BOGO_DIRENT_SIZE;
 1146                 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 1147                 d_instantiate(dentry, inode);
 1148                 dget(dentry); /* Extra count - pin the dentry in core */
 1149                 error = 0;
 1150         }
 1151         return error;
 1152 }
 1153 
 1154 static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 1155 {
 1156         int error;
 1157 
 1158         if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
 1159                 return error;
 1160         dir->i_nlink++;
 1161         return 0;
 1162 }
 1163 
 1164 static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
 1165 {
 1166         return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
 1167 }
 1168 
 1169 /*
 1170  * Link a file..
 1171  */
 1172 static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 1173 {
 1174         struct inode *inode = old_dentry->d_inode;
 1175 
 1176         if (S_ISDIR(inode->i_mode))
 1177                 return -EPERM;
 1178 
 1179         dir->i_size += BOGO_DIRENT_SIZE;
 1180         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 1181         inode->i_nlink++;
 1182         atomic_inc(&inode->i_count);    /* New dentry reference */
 1183         dget(dentry);           /* Extra pinning count for the created dentry */
 1184         d_instantiate(dentry, inode);
 1185         return 0;
 1186 }
 1187 
 1188 static inline int shmem_positive(struct dentry *dentry)
 1189 {
 1190         return dentry->d_inode && !d_unhashed(dentry);
 1191 }
 1192 
 1193 /*
 1194  * Check that a directory is empty (this works
 1195  * for regular files too, they'll just always be
 1196  * considered empty..).
 1197  *
 1198  * Note that an empty directory can still have
 1199  * children, they just all have to be negative..
 1200  */
 1201 static int shmem_empty(struct dentry *dentry)
 1202 {
 1203         struct list_head *list;
 1204 
 1205         spin_lock(&dcache_lock);
 1206         list = dentry->d_subdirs.next;
 1207 
 1208         while (list != &dentry->d_subdirs) {
 1209                 struct dentry *de = list_entry(list, struct dentry, d_child);
 1210 
 1211                 if (shmem_positive(de)) {
 1212                         spin_unlock(&dcache_lock);
 1213                         return 0;
 1214                 }
 1215                 list = list->next;
 1216         }
 1217         spin_unlock(&dcache_lock);
 1218         return 1;
 1219 }
 1220 
 1221 static int shmem_unlink(struct inode *dir, struct dentry *dentry)
 1222 {
 1223         struct inode *inode = dentry->d_inode;
 1224 
 1225         dir->i_size -= BOGO_DIRENT_SIZE;
 1226         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 1227         inode->i_nlink--;
 1228         dput(dentry);   /* Undo the count from "create" - this does all the work */
 1229         return 0;
 1230 }
 1231 
 1232 static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
 1233 {
 1234         if (!shmem_empty(dentry))
 1235                 return -ENOTEMPTY;
 1236 
 1237         dir->i_nlink--;
 1238         return shmem_unlink(dir, dentry);
 1239 }
 1240 
 1241 /*
 1242  * The VFS layer already does all the dentry stuff for rename,
 1243  * we just have to decrement the usage count for the target if
 1244  * it exists so that the VFS layer correctly free's it when it
 1245  * gets overwritten.
 1246  */
 1247 static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
 1248 {
 1249         struct inode *inode = old_dentry->d_inode;
 1250         int they_are_dirs = S_ISDIR(inode->i_mode);
 1251 
 1252         if (!shmem_empty(new_dentry))
 1253                 return -ENOTEMPTY;
 1254 
 1255         if (new_dentry->d_inode) {
 1256                 (void) shmem_unlink(new_dir, new_dentry);
 1257                 if (they_are_dirs)
 1258                         old_dir->i_nlink--;
 1259         } else if (they_are_dirs) {
 1260                 old_dir->i_nlink--;
 1261                 new_dir->i_nlink++;
 1262         }
 1263 
 1264         old_dir->i_size -= BOGO_DIRENT_SIZE;
 1265         new_dir->i_size += BOGO_DIRENT_SIZE;
 1266         old_dir->i_ctime = old_dir->i_mtime =
 1267         new_dir->i_ctime = new_dir->i_mtime =
 1268         inode->i_ctime = CURRENT_TIME;
 1269         return 0;
 1270 }
 1271 
 1272 static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
 1273 {
 1274         int error;
 1275         int len;
 1276         struct inode *inode;
 1277         struct page *page = NULL;
 1278         char *kaddr;
 1279         struct shmem_inode_info *info;
 1280 
 1281         len = strlen(symname) + 1;
 1282         if (len > PAGE_CACHE_SIZE)
 1283                 return -ENAMETOOLONG;
 1284 
 1285         inode = shmem_get_inode(dir->i_sb, S_IFLNK|S_IRWXUGO, 0);
 1286         if (!inode)
 1287                 return -ENOSPC;
 1288 
 1289         info = SHMEM_I(inode);
 1290         inode->i_size = len-1;
 1291         if (len <= sizeof(struct shmem_inode_info)) {
 1292                 /* do it inline */
 1293                 memcpy(info, symname, len);
 1294                 inode->i_op = &shmem_symlink_inline_operations;
 1295         } else {
 1296                 error = shmem_getpage(inode, 0, &page, SGP_WRITE);
 1297                 if (error) {
 1298                         iput(inode);
 1299                         return error;
 1300                 }
 1301                 inode->i_op = &shmem_symlink_inode_operations;
 1302                 spin_lock(&shmem_ilock);
 1303                 list_add_tail(&info->list, &shmem_inodes);
 1304                 spin_unlock(&shmem_ilock);
 1305                 kaddr = kmap(page);
 1306                 memcpy(kaddr, symname, len);
 1307                 kunmap(page);
 1308                 SetPageDirty(page);
 1309                 page_cache_release(page);
 1310         }
 1311         dir->i_size += BOGO_DIRENT_SIZE;
 1312         dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 1313         d_instantiate(dentry, inode);
 1314         dget(dentry);
 1315         return 0;
 1316 }
 1317 
 1318 static int shmem_readlink_inline(struct dentry *dentry, char *buffer, int buflen)
 1319 {
 1320         return vfs_readlink(dentry, buffer, buflen, (const char *)SHMEM_I(dentry->d_inode));
 1321 }
 1322 
 1323 static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd)
 1324 {
 1325         return vfs_follow_link(nd, (const char *)SHMEM_I(dentry->d_inode));
 1326 }
 1327 
 1328 static int shmem_readlink(struct dentry *dentry, char *buffer, int buflen)
 1329 {
 1330         struct page *page = NULL;
 1331         int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
 1332         if (res)
 1333                 return res;
 1334         res = vfs_readlink(dentry, buffer, buflen, kmap(page));
 1335         kunmap(page);
 1336         mark_page_accessed(page);
 1337         page_cache_release(page);
 1338         return res;
 1339 }
 1340 
 1341 static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd)
 1342 {
 1343         struct page *page = NULL;
 1344         int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ);
 1345         if (res)
 1346                 return res;
 1347         res = vfs_follow_link(nd, kmap(page));
 1348         kunmap(page);
 1349         mark_page_accessed(page);
 1350         page_cache_release(page);
 1351         return res;
 1352 }
 1353 
 1354 static struct inode_operations shmem_symlink_inline_operations = {
 1355         readlink:       shmem_readlink_inline,
 1356         follow_link:    shmem_follow_link_inline,
 1357 };
 1358 
 1359 static struct inode_operations shmem_symlink_inode_operations = {
 1360         truncate:       shmem_truncate,
 1361         readlink:       shmem_readlink,
 1362         follow_link:    shmem_follow_link,
 1363 };
 1364 
 1365 static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes)
 1366 {
 1367         char *this_char, *value, *rest;
 1368 
 1369         while ((this_char = strsep(&options, ",")) != NULL) {
 1370                 if (!*this_char)
 1371                         continue;
 1372                 if ((value = strchr(this_char,'=')) != NULL) {
 1373                         *value++ = 0;
 1374                 } else {
 1375                         printk(KERN_ERR
 1376                             "tmpfs: No value for mount option '%s'\n",
 1377                             this_char);
 1378                         return 1;
 1379                 }
 1380 
 1381                 if (!strcmp(this_char,"size")) {
 1382                         unsigned long long size;
 1383                         size = memparse(value,&rest);
 1384                         if (*rest == '%') {
 1385                                 struct sysinfo si;
 1386                                 si_meminfo(&si);
 1387                                 size <<= PAGE_SHIFT;
 1388                                 size *= si.totalram;
 1389                                 do_div(size, 100);
 1390                                 rest++;
 1391                         }
 1392                         if (*rest)
 1393                                 goto bad_val;
 1394                         *blocks = size >> PAGE_CACHE_SHIFT;
 1395                 } else if (!strcmp(this_char,"nr_blocks")) {
 1396                         *blocks = memparse(value,&rest);
 1397                         if (*rest)
 1398                                 goto bad_val;
 1399                 } else if (!strcmp(this_char,"nr_inodes")) {
 1400                         *inodes = memparse(value,&rest);
 1401                         if (*rest)
 1402                                 goto bad_val;
 1403                 } else if (!strcmp(this_char,"mode")) {
 1404                         if (!mode)
 1405                                 continue;
 1406                         *mode = simple_strtoul(value,&rest,8);
 1407                         if (*rest)
 1408                                 goto bad_val;
 1409                 } else if (!strcmp(this_char,"uid")) {
 1410                         if (!uid)
 1411                                 continue;
 1412                         *uid = simple_strtoul(value,&rest,0);
 1413                         if (*rest)
 1414                                 goto bad_val;
 1415                 } else if (!strcmp(this_char,"gid")) {
 1416                         if (!gid)
 1417                                 continue;
 1418                         *gid = simple_strtoul(value,&rest,0);
 1419                         if (*rest)
 1420                                 goto bad_val;
 1421                 } else {
 1422                         printk(KERN_ERR "tmpfs: Bad mount option %s\n",
 1423                                this_char);
 1424                         return 1;
 1425                 }
 1426         }
 1427         return 0;
 1428 
 1429 bad_val:
 1430         printk(KERN_ERR "tmpfs: Bad value '%s' for mount option '%s'\n",
 1431                value, this_char);
 1432         return 1;
 1433 }
 1434 
 1435 static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
 1436 {
 1437         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 1438         unsigned long max_blocks = sbinfo->max_blocks;
 1439         unsigned long max_inodes = sbinfo->max_inodes;
 1440 
 1441         if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, &max_inodes))
 1442                 return -EINVAL;
 1443         return shmem_set_size(sbinfo, max_blocks, max_inodes);
 1444 }
 1445 
 1446 static int shmem_sync_file(struct file *file, struct dentry *dentry, int datasync)
 1447 {
 1448         return 0;
 1449 }
 1450 #endif
 1451 
 1452 static struct super_block *shmem_read_super(struct super_block *sb, void *data, int silent)
 1453 {
 1454         struct inode *inode;
 1455         struct dentry *root;
 1456         unsigned long blocks, inodes;
 1457         int mode   = S_IRWXUGO | S_ISVTX;
 1458         uid_t uid = current->fsuid;
 1459         gid_t gid = current->fsgid;
 1460         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
 1461         struct sysinfo si;
 1462 
 1463         /*
 1464          * Per default we only allow half of the physical ram per
 1465          * tmpfs instance
 1466          */
 1467         si_meminfo(&si);
 1468         blocks = inodes = si.totalram / 2;
 1469 
 1470 #ifdef CONFIG_TMPFS
 1471         if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes))
 1472                 return NULL;
 1473 #endif
 1474 
 1475         spin_lock_init(&sbinfo->stat_lock);
 1476         sbinfo->max_blocks = blocks;
 1477         sbinfo->free_blocks = blocks;
 1478         sbinfo->max_inodes = inodes;
 1479         sbinfo->free_inodes = inodes;
 1480         sb->s_maxbytes = SHMEM_MAX_BYTES;
 1481         sb->s_blocksize = PAGE_CACHE_SIZE;
 1482         sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
 1483         sb->s_magic = TMPFS_MAGIC;
 1484         sb->s_op = &shmem_ops;
 1485         inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
 1486         if (!inode)
 1487                 return NULL;
 1488 
 1489         inode->i_uid = uid;
 1490         inode->i_gid = gid;
 1491         root = d_alloc_root(inode);
 1492         if (!root) {
 1493                 iput(inode);
 1494                 return NULL;
 1495         }
 1496         sb->s_root = root;
 1497         return sb;
 1498 }
 1499 
 1500 static struct address_space_operations shmem_aops = {
 1501         removepage:     shmem_removepage,
 1502         writepage:      shmem_writepage,
 1503 #ifdef CONFIG_TMPFS
 1504         readpage:       shmem_readpage,
 1505         prepare_write:  shmem_prepare_write,
 1506         commit_write:   shmem_commit_write,
 1507 #endif
 1508 };
 1509 
 1510 static struct file_operations shmem_file_operations = {
 1511         mmap:           shmem_mmap,
 1512 #ifdef CONFIG_TMPFS
 1513         read:           shmem_file_read,
 1514         write:          shmem_file_write,
 1515         fsync:          shmem_sync_file,
 1516 #endif
 1517 };
 1518 
 1519 static struct inode_operations shmem_inode_operations = {
 1520         truncate:       shmem_truncate,
 1521         setattr:        shmem_notify_change,
 1522 };
 1523 
 1524 static struct inode_operations shmem_dir_inode_operations = {
 1525 #ifdef CONFIG_TMPFS
 1526         create:         shmem_create,
 1527         lookup:         shmem_lookup,
 1528         link:           shmem_link,
 1529         unlink:         shmem_unlink,
 1530         symlink:        shmem_symlink,
 1531         mkdir:          shmem_mkdir,
 1532         rmdir:          shmem_rmdir,
 1533         mknod:          shmem_mknod,
 1534         rename:         shmem_rename,
 1535 #endif
 1536 };
 1537 
 1538 static struct super_operations shmem_ops = {
 1539 #ifdef CONFIG_TMPFS
 1540         statfs:         shmem_statfs,
 1541         remount_fs:     shmem_remount_fs,
 1542 #endif
 1543         delete_inode:   shmem_delete_inode,
 1544         put_inode:      force_delete,
 1545 };
 1546 
 1547 static struct vm_operations_struct shmem_vm_ops = {
 1548         nopage:         shmem_nopage,
 1549 };
 1550 
 1551 #ifdef CONFIG_TMPFS
 1552 /* type "shm" will be tagged obsolete in 2.5 */
 1553 static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER);
 1554 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER);
 1555 #else
 1556 static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT);
 1557 #endif
 1558 static struct vfsmount *shm_mnt;
 1559 
 1560 static int __init init_tmpfs(void)
 1561 {
 1562         int error;
 1563 
 1564         error = register_filesystem(&tmpfs_fs_type);
 1565         if (error) {
 1566                 printk(KERN_ERR "Could not register tmpfs\n");
 1567                 goto out3;
 1568         }
 1569 #ifdef CONFIG_TMPFS
 1570         error = register_filesystem(&shmem_fs_type);
 1571         if (error) {
 1572                 printk(KERN_ERR "Could not register shm fs\n");
 1573                 goto out2;
 1574         }
 1575         devfs_mk_dir(NULL, "shm", NULL);
 1576 #endif
 1577         shm_mnt = kern_mount(&tmpfs_fs_type);
 1578         if (IS_ERR(shm_mnt)) {
 1579                 error = PTR_ERR(shm_mnt);
 1580                 printk(KERN_ERR "Could not kern_mount tmpfs\n");
 1581                 goto out1;
 1582         }
 1583 
 1584         /* The internal instance should not do size checking */
 1585         shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
 1586         return 0;
 1587 
 1588 out1:
 1589 #ifdef CONFIG_TMPFS
 1590         unregister_filesystem(&shmem_fs_type);
 1591 out2:
 1592 #endif
 1593         unregister_filesystem(&tmpfs_fs_type);
 1594 out3:
 1595         shm_mnt = ERR_PTR(error);
 1596         return error;
 1597 }
 1598 module_init(init_tmpfs)
 1599 
 1600 /*
 1601  * shmem_file_setup - get an unlinked file living in tmpfs
 1602  *
 1603  * @name: name for dentry (to be seen in /proc/<pid>/maps
 1604  * @size: size to be set for the file
 1605  *
 1606  */
 1607 struct file *shmem_file_setup(char *name, loff_t size)
 1608 {
 1609         int error;
 1610         struct file *file;
 1611         struct inode *inode;
 1612         struct dentry *dentry, *root;
 1613         struct qstr this;
 1614         int vm_enough_memory(long pages);
 1615 
 1616         if (IS_ERR(shm_mnt))
 1617                 return (void *)shm_mnt;
 1618 
 1619         if (size > SHMEM_MAX_BYTES)
 1620                 return ERR_PTR(-EINVAL);
 1621 
 1622         if (!vm_enough_memory(VM_ACCT(size)))
 1623                 return ERR_PTR(-ENOMEM);
 1624 
 1625         this.name = name;
 1626         this.len = strlen(name);
 1627         this.hash = 0; /* will go */
 1628         root = shm_mnt->mnt_root;
 1629         dentry = d_alloc(root, &this);
 1630         if (!dentry)
 1631                 return ERR_PTR(-ENOMEM);
 1632 
 1633         error = -ENFILE;
 1634         file = get_empty_filp();
 1635         if (!file)
 1636                 goto put_dentry;
 1637 
 1638         error = -ENOSPC;
 1639         inode = shmem_get_inode(root->d_sb, S_IFREG | S_IRWXUGO, 0);
 1640         if (!inode)
 1641                 goto close_file;
 1642 
 1643         d_instantiate(dentry, inode);
 1644         inode->i_size = size;
 1645         inode->i_nlink = 0;     /* It is unlinked */
 1646         file->f_vfsmnt = mntget(shm_mnt);
 1647         file->f_dentry = dentry;
 1648         file->f_op = &shmem_file_operations;
 1649         file->f_mode = FMODE_WRITE | FMODE_READ;
 1650         return file;
 1651 
 1652 close_file:
 1653         put_filp(file);
 1654 put_dentry:
 1655         dput(dentry);
 1656         return ERR_PTR(error);
 1657 }
 1658 
 1659 /*
 1660  * shmem_zero_setup - setup a shared anonymous mapping
 1661  *
 1662  * @vma: the vma to be mmapped is prepared by do_mmap_pgoff
 1663  */
 1664 int shmem_zero_setup(struct vm_area_struct *vma)
 1665 {
 1666         struct file *file;
 1667         loff_t size = vma->vm_end - vma->vm_start;
 1668 
 1669         file = shmem_file_setup("dev/zero", size);
 1670         if (IS_ERR(file))
 1671                 return PTR_ERR(file);
 1672 
 1673         if (vma->vm_file)
 1674                 fput(vma->vm_file);
 1675         vma->vm_file = file;
 1676         vma->vm_ops = &shmem_vm_ops;
 1677         return 0;
 1678 }
 1679 
 1680 EXPORT_SYMBOL(shmem_file_setup);

Cache object: a521fcd4428bec090dc58f3db8a238f5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.