swap.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  *  linux/mm/swap.c
    3  *
    4  *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
    5  */
    6 
    7 /*
    8  * This file contains the default values for the operation of the
    9  * Linux VM subsystem. Fine-tuning documentation can be found in
   10  * Documentation/sysctl/vm.txt.
   11  * Started 18.12.91
   12  * Swap aging added 23.2.95, Stephen Tweedie.
   13  * Buffermem limits added 12.3.98, Rik van Riel.
   14  */
   15 
   16 #include <linux/mm.h>
   17 #include <linux/sched.h>
   18 #include <linux/kernel_stat.h>
   19 #include <linux/swap.h>
   20 #include <linux/mman.h>
   21 #include <linux/pagemap.h>
   22 #include <linux/pagevec.h>
   23 #include <linux/init.h>
   24 #include <linux/export.h>
   25 #include <linux/mm_inline.h>
   26 #include <linux/percpu_counter.h>
   27 #include <linux/percpu.h>
   28 #include <linux/cpu.h>
   29 #include <linux/notifier.h>
   30 #include <linux/backing-dev.h>
   31 #include <linux/memcontrol.h>
   32 #include <linux/gfp.h>
   33 
   34 #include "internal.h"
   35 
   36 /* How many pages do we try to swap or page in/out together? */
   37 int page_cluster;
   38 
   39 static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
   40 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
   41 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
   42 
   43 /*
   44  * This path almost never happens for VM activity - pages are normally
   45  * freed via pagevecs.  But it gets used by networking.
   46  */
   47 static void __page_cache_release(struct page *page)
   48 {
   49         if (PageLRU(page)) {
   50                 struct zone *zone = page_zone(page);
   51                 struct lruvec *lruvec;
   52                 unsigned long flags;
   53 
   54                 spin_lock_irqsave(&zone->lru_lock, flags);
   55                 lruvec = mem_cgroup_page_lruvec(page, zone);
   56                 VM_BUG_ON(!PageLRU(page));
   57                 __ClearPageLRU(page);
   58                 del_page_from_lru_list(page, lruvec, page_off_lru(page));
   59                 spin_unlock_irqrestore(&zone->lru_lock, flags);
   60         }
   61 }
   62 
   63 static void __put_single_page(struct page *page)
   64 {
   65         __page_cache_release(page);
   66         free_hot_cold_page(page, 0);
   67 }
   68 
   69 static void __put_compound_page(struct page *page)
   70 {
   71         compound_page_dtor *dtor;
   72 
   73         __page_cache_release(page);
   74         dtor = get_compound_page_dtor(page);
   75         (*dtor)(page);
   76 }
   77 
   78 static void put_compound_page(struct page *page)
   79 {
   80         if (unlikely(PageTail(page))) {
   81                 /* __split_huge_page_refcount can run under us */
   82                 struct page *page_head = compound_trans_head(page);
   83 
   84                 if (likely(page != page_head &&
   85                            get_page_unless_zero(page_head))) {
   86                         unsigned long flags;
   87 
   88                         /*
   89                          * THP can not break up slab pages so avoid taking
   90                          * compound_lock().  Slab performs non-atomic bit ops
   91                          * on page->flags for better performance.  In particular
   92                          * slab_unlock() in slub used to be a hot path.  It is
   93                          * still hot on arches that do not support
   94                          * this_cpu_cmpxchg_double().
   95                          */
   96                         if (PageSlab(page_head)) {
   97                                 if (PageTail(page)) {
   98                                         if (put_page_testzero(page_head))
   99                                                 VM_BUG_ON(1);
  100 
  101                                         atomic_dec(&page->_mapcount);
  102                                         goto skip_lock_tail;
  103                                 } else
  104                                         goto skip_lock;
  105                         }
  106                         /*
  107                          * page_head wasn't a dangling pointer but it
  108                          * may not be a head page anymore by the time
  109                          * we obtain the lock. That is ok as long as it
  110                          * can't be freed from under us.
  111                          */
  112                         flags = compound_lock_irqsave(page_head);
  113                         if (unlikely(!PageTail(page))) {
  114                                 /* __split_huge_page_refcount run before us */
  115                                 compound_unlock_irqrestore(page_head, flags);
  116 skip_lock:
  117                                 if (put_page_testzero(page_head))
  118                                         __put_single_page(page_head);
  119 out_put_single:
  120                                 if (put_page_testzero(page))
  121                                         __put_single_page(page);
  122                                 return;
  123                         }
  124                         VM_BUG_ON(page_head != page->first_page);
  125                         /*
  126                          * We can release the refcount taken by
  127                          * get_page_unless_zero() now that
  128                          * __split_huge_page_refcount() is blocked on
  129                          * the compound_lock.
  130                          */
  131                         if (put_page_testzero(page_head))
  132                                 VM_BUG_ON(1);
  133                         /* __split_huge_page_refcount will wait now */
  134                         VM_BUG_ON(page_mapcount(page) <= 0);
  135                         atomic_dec(&page->_mapcount);
  136                         VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
  137                         VM_BUG_ON(atomic_read(&page->_count) != 0);
  138                         compound_unlock_irqrestore(page_head, flags);
  139 
  140 skip_lock_tail:
  141                         if (put_page_testzero(page_head)) {
  142                                 if (PageHead(page_head))
  143                                         __put_compound_page(page_head);
  144                                 else
  145                                         __put_single_page(page_head);
  146                         }
  147                 } else {
  148                         /* page_head is a dangling pointer */
  149                         VM_BUG_ON(PageTail(page));
  150                         goto out_put_single;
  151                 }
  152         } else if (put_page_testzero(page)) {
  153                 if (PageHead(page))
  154                         __put_compound_page(page);
  155                 else
  156                         __put_single_page(page);
  157         }
  158 }
  159 
  160 void put_page(struct page *page)
  161 {
  162         if (unlikely(PageCompound(page)))
  163                 put_compound_page(page);
  164         else if (put_page_testzero(page))
  165                 __put_single_page(page);
  166 }
  167 EXPORT_SYMBOL(put_page);
  168 
  169 /*
  170  * This function is exported but must not be called by anything other
  171  * than get_page(). It implements the slow path of get_page().
  172  */
  173 bool __get_page_tail(struct page *page)
  174 {
  175         /*
  176          * This takes care of get_page() if run on a tail page
  177          * returned by one of the get_user_pages/follow_page variants.
  178          * get_user_pages/follow_page itself doesn't need the compound
  179          * lock because it runs __get_page_tail_foll() under the
  180          * proper PT lock that already serializes against
  181          * split_huge_page().
  182          */
  183         unsigned long flags;
  184         bool got = false;
  185         struct page *page_head = compound_trans_head(page);
  186 
  187         if (likely(page != page_head && get_page_unless_zero(page_head))) {
  188 
  189                 /* Ref to put_compound_page() comment. */
  190                 if (PageSlab(page_head)) {
  191                         if (likely(PageTail(page))) {
  192                                 __get_page_tail_foll(page, false);
  193                                 return true;
  194                         } else {
  195                                 put_page(page_head);
  196                                 return false;
  197                         }
  198                 }
  199 
  200                 /*
  201                  * page_head wasn't a dangling pointer but it
  202                  * may not be a head page anymore by the time
  203                  * we obtain the lock. That is ok as long as it
  204                  * can't be freed from under us.
  205                  */
  206                 flags = compound_lock_irqsave(page_head);
  207                 /* here __split_huge_page_refcount won't run anymore */
  208                 if (likely(PageTail(page))) {
  209                         __get_page_tail_foll(page, false);
  210                         got = true;
  211                 }
  212                 compound_unlock_irqrestore(page_head, flags);
  213                 if (unlikely(!got))
  214                         put_page(page_head);
  215         }
  216         return got;
  217 }
  218 EXPORT_SYMBOL(__get_page_tail);
  219 
  220 /**
  221  * put_pages_list() - release a list of pages
  222  * @pages: list of pages threaded on page->lru
  223  *
  224  * Release a list of pages which are strung together on page.lru.  Currently
  225  * used by read_cache_pages() and related error recovery code.
  226  */
  227 void put_pages_list(struct list_head *pages)
  228 {
  229         while (!list_empty(pages)) {
  230                 struct page *victim;
  231 
  232                 victim = list_entry(pages->prev, struct page, lru);
  233                 list_del(&victim->lru);
  234                 page_cache_release(victim);
  235         }
  236 }
  237 EXPORT_SYMBOL(put_pages_list);
  238 
  239 /*
  240  * get_kernel_pages() - pin kernel pages in memory
  241  * @kiov:       An array of struct kvec structures
  242  * @nr_segs:    number of segments to pin
  243  * @write:      pinning for read/write, currently ignored
  244  * @pages:      array that receives pointers to the pages pinned.
  245  *              Should be at least nr_segs long.
  246  *
  247  * Returns number of pages pinned. This may be fewer than the number
  248  * requested. If nr_pages is 0 or negative, returns 0. If no pages
  249  * were pinned, returns -errno. Each page returned must be released
  250  * with a put_page() call when it is finished with.
  251  */
  252 int get_kernel_pages(const struct kvec *kiov, int nr_segs, int write,
  253                 struct page **pages)
  254 {
  255         int seg;
  256 
  257         for (seg = 0; seg < nr_segs; seg++) {
  258                 if (WARN_ON(kiov[seg].iov_len != PAGE_SIZE))
  259                         return seg;
  260 
  261                 pages[seg] = kmap_to_page(kiov[seg].iov_base);
  262                 page_cache_get(pages[seg]);
  263         }
  264 
  265         return seg;
  266 }
  267 EXPORT_SYMBOL_GPL(get_kernel_pages);
  268 
  269 /*
  270  * get_kernel_page() - pin a kernel page in memory
  271  * @start:      starting kernel address
  272  * @write:      pinning for read/write, currently ignored
  273  * @pages:      array that receives pointer to the page pinned.
  274  *              Must be at least nr_segs long.
  275  *
  276  * Returns 1 if page is pinned. If the page was not pinned, returns
  277  * -errno. The page returned must be released with a put_page() call
  278  * when it is finished with.
  279  */
  280 int get_kernel_page(unsigned long start, int write, struct page **pages)
  281 {
  282         const struct kvec kiov = {
  283                 .iov_base = (void *)start,
  284                 .iov_len = PAGE_SIZE
  285         };
  286 
  287         return get_kernel_pages(&kiov, 1, write, pages);
  288 }
  289 EXPORT_SYMBOL_GPL(get_kernel_page);
  290 
  291 static void pagevec_lru_move_fn(struct pagevec *pvec,
  292         void (*move_fn)(struct page *page, struct lruvec *lruvec, void *arg),
  293         void *arg)
  294 {
  295         int i;
  296         struct zone *zone = NULL;
  297         struct lruvec *lruvec;
  298         unsigned long flags = 0;
  299 
  300         for (i = 0; i < pagevec_count(pvec); i++) {
  301                 struct page *page = pvec->pages[i];
  302                 struct zone *pagezone = page_zone(page);
  303 
  304                 if (pagezone != zone) {
  305                         if (zone)
  306                                 spin_unlock_irqrestore(&zone->lru_lock, flags);
  307                         zone = pagezone;
  308                         spin_lock_irqsave(&zone->lru_lock, flags);
  309                 }
  310 
  311                 lruvec = mem_cgroup_page_lruvec(page, zone);
  312                 (*move_fn)(page, lruvec, arg);
  313         }
  314         if (zone)
  315                 spin_unlock_irqrestore(&zone->lru_lock, flags);
  316         release_pages(pvec->pages, pvec->nr, pvec->cold);
  317         pagevec_reinit(pvec);
  318 }
  319 
  320 static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec,
  321                                  void *arg)
  322 {
  323         int *pgmoved = arg;
  324 
  325         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  326                 enum lru_list lru = page_lru_base_type(page);
  327                 list_move_tail(&page->lru, &lruvec->lists[lru]);
  328                 (*pgmoved)++;
  329         }
  330 }
  331 
  332 /*
  333  * pagevec_move_tail() must be called with IRQ disabled.
  334  * Otherwise this may cause nasty races.
  335  */
  336 static void pagevec_move_tail(struct pagevec *pvec)
  337 {
  338         int pgmoved = 0;
  339 
  340         pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
  341         __count_vm_events(PGROTATED, pgmoved);
  342 }
  343 
  344 /*
  345  * Writeback is about to end against a page which has been marked for immediate
  346  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
  347  * inactive list.
  348  */
  349 void rotate_reclaimable_page(struct page *page)
  350 {
  351         if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
  352             !PageUnevictable(page) && PageLRU(page)) {
  353                 struct pagevec *pvec;
  354                 unsigned long flags;
  355 
  356                 page_cache_get(page);
  357                 local_irq_save(flags);
  358                 pvec = &__get_cpu_var(lru_rotate_pvecs);
  359                 if (!pagevec_add(pvec, page))
  360                         pagevec_move_tail(pvec);
  361                 local_irq_restore(flags);
  362         }
  363 }
  364 
  365 static void update_page_reclaim_stat(struct lruvec *lruvec,
  366                                      int file, int rotated)
  367 {
  368         struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
  369 
  370         reclaim_stat->recent_scanned[file]++;
  371         if (rotated)
  372                 reclaim_stat->recent_rotated[file]++;
  373 }
  374 
  375 static void __activate_page(struct page *page, struct lruvec *lruvec,
  376                             void *arg)
  377 {
  378         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  379                 int file = page_is_file_cache(page);
  380                 int lru = page_lru_base_type(page);
  381 
  382                 del_page_from_lru_list(page, lruvec, lru);
  383                 SetPageActive(page);
  384                 lru += LRU_ACTIVE;
  385                 add_page_to_lru_list(page, lruvec, lru);
  386 
  387                 __count_vm_event(PGACTIVATE);
  388                 update_page_reclaim_stat(lruvec, file, 1);
  389         }
  390 }
  391 
  392 #ifdef CONFIG_SMP
  393 static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
  394 
  395 static void activate_page_drain(int cpu)
  396 {
  397         struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
  398 
  399         if (pagevec_count(pvec))
  400                 pagevec_lru_move_fn(pvec, __activate_page, NULL);
  401 }
  402 
  403 void activate_page(struct page *page)
  404 {
  405         if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
  406                 struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
  407 
  408                 page_cache_get(page);
  409                 if (!pagevec_add(pvec, page))
  410                         pagevec_lru_move_fn(pvec, __activate_page, NULL);
  411                 put_cpu_var(activate_page_pvecs);
  412         }
  413 }
  414 
  415 #else
  416 static inline void activate_page_drain(int cpu)
  417 {
  418 }
  419 
  420 void activate_page(struct page *page)
  421 {
  422         struct zone *zone = page_zone(page);
  423 
  424         spin_lock_irq(&zone->lru_lock);
  425         __activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL);
  426         spin_unlock_irq(&zone->lru_lock);
  427 }
  428 #endif
  429 
  430 /*
  431  * Mark a page as having seen activity.
  432  *
  433  * inactive,unreferenced        ->      inactive,referenced
  434  * inactive,referenced          ->      active,unreferenced
  435  * active,unreferenced          ->      active,referenced
  436  */
  437 void mark_page_accessed(struct page *page)
  438 {
  439         if (!PageActive(page) && !PageUnevictable(page) &&
  440                         PageReferenced(page) && PageLRU(page)) {
  441                 activate_page(page);
  442                 ClearPageReferenced(page);
  443         } else if (!PageReferenced(page)) {
  444                 SetPageReferenced(page);
  445         }
  446 }
  447 EXPORT_SYMBOL(mark_page_accessed);
  448 
  449 /*
  450  * Order of operations is important: flush the pagevec when it's already
  451  * full, not when adding the last page, to make sure that last page is
  452  * not added to the LRU directly when passed to this function. Because
  453  * mark_page_accessed() (called after this when writing) only activates
  454  * pages that are on the LRU, linear writes in subpage chunks would see
  455  * every PAGEVEC_SIZE page activated, which is unexpected.
  456  */
  457 void __lru_cache_add(struct page *page, enum lru_list lru)
  458 {
  459         struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
  460 
  461         page_cache_get(page);
  462         if (!pagevec_space(pvec))
  463                 __pagevec_lru_add(pvec, lru);
  464         pagevec_add(pvec, page);
  465         put_cpu_var(lru_add_pvecs);
  466 }
  467 EXPORT_SYMBOL(__lru_cache_add);
  468 
  469 /**
  470  * lru_cache_add_lru - add a page to a page list
  471  * @page: the page to be added to the LRU.
  472  * @lru: the LRU list to which the page is added.
  473  */
  474 void lru_cache_add_lru(struct page *page, enum lru_list lru)
  475 {
  476         if (PageActive(page)) {
  477                 VM_BUG_ON(PageUnevictable(page));
  478                 ClearPageActive(page);
  479         } else if (PageUnevictable(page)) {
  480                 VM_BUG_ON(PageActive(page));
  481                 ClearPageUnevictable(page);
  482         }
  483 
  484         VM_BUG_ON(PageLRU(page) || PageActive(page) || PageUnevictable(page));
  485         __lru_cache_add(page, lru);
  486 }
  487 
  488 /**
  489  * add_page_to_unevictable_list - add a page to the unevictable list
  490  * @page:  the page to be added to the unevictable list
  491  *
  492  * Add page directly to its zone's unevictable list.  To avoid races with
  493  * tasks that might be making the page evictable, through eg. munlock,
  494  * munmap or exit, while it's not on the lru, we want to add the page
  495  * while it's locked or otherwise "invisible" to other tasks.  This is
  496  * difficult to do when using the pagevec cache, so bypass that.
  497  */
  498 void add_page_to_unevictable_list(struct page *page)
  499 {
  500         struct zone *zone = page_zone(page);
  501         struct lruvec *lruvec;
  502 
  503         spin_lock_irq(&zone->lru_lock);
  504         lruvec = mem_cgroup_page_lruvec(page, zone);
  505         SetPageUnevictable(page);
  506         SetPageLRU(page);
  507         add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
  508         spin_unlock_irq(&zone->lru_lock);
  509 }
  510 
  511 /*
  512  * If the page can not be invalidated, it is moved to the
  513  * inactive list to speed up its reclaim.  It is moved to the
  514  * head of the list, rather than the tail, to give the flusher
  515  * threads some time to write it out, as this is much more
  516  * effective than the single-page writeout from reclaim.
  517  *
  518  * If the page isn't page_mapped and dirty/writeback, the page
  519  * could reclaim asap using PG_reclaim.
  520  *
  521  * 1. active, mapped page -> none
  522  * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
  523  * 3. inactive, mapped page -> none
  524  * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
  525  * 5. inactive, clean -> inactive, tail
  526  * 6. Others -> none
  527  *
  528  * In 4, why it moves inactive's head, the VM expects the page would
  529  * be write it out by flusher threads as this is much more effective
  530  * than the single-page writeout from reclaim.
  531  */
  532 static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
  533                               void *arg)
  534 {
  535         int lru, file;
  536         bool active;
  537 
  538         if (!PageLRU(page))
  539                 return;
  540 
  541         if (PageUnevictable(page))
  542                 return;
  543 
  544         /* Some processes are using the page */
  545         if (page_mapped(page))
  546                 return;
  547 
  548         active = PageActive(page);
  549         file = page_is_file_cache(page);
  550         lru = page_lru_base_type(page);
  551 
  552         del_page_from_lru_list(page, lruvec, lru + active);
  553         ClearPageActive(page);
  554         ClearPageReferenced(page);
  555         add_page_to_lru_list(page, lruvec, lru);
  556 
  557         if (PageWriteback(page) || PageDirty(page)) {
  558                 /*
  559                  * PG_reclaim could be raced with end_page_writeback
  560                  * It can make readahead confusing.  But race window
  561                  * is _really_ small and  it's non-critical problem.
  562                  */
  563                 SetPageReclaim(page);
  564         } else {
  565                 /*
  566                  * The page's writeback ends up during pagevec
  567                  * We moves tha page into tail of inactive.
  568                  */
  569                 list_move_tail(&page->lru, &lruvec->lists[lru]);
  570                 __count_vm_event(PGROTATED);
  571         }
  572 
  573         if (active)
  574                 __count_vm_event(PGDEACTIVATE);
  575         update_page_reclaim_stat(lruvec, file, 0);
  576 }
  577 
  578 /*
  579  * Drain pages out of the cpu's pagevecs.
  580  * Either "cpu" is the current CPU, and preemption has already been
  581  * disabled; or "cpu" is being hot-unplugged, and is already dead.
  582  */
  583 void lru_add_drain_cpu(int cpu)
  584 {
  585         struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
  586         struct pagevec *pvec;
  587         int lru;
  588 
  589         for_each_lru(lru) {
  590                 pvec = &pvecs[lru - LRU_BASE];
  591                 if (pagevec_count(pvec))
  592                         __pagevec_lru_add(pvec, lru);
  593         }
  594 
  595         pvec = &per_cpu(lru_rotate_pvecs, cpu);
  596         if (pagevec_count(pvec)) {
  597                 unsigned long flags;
  598 
  599                 /* No harm done if a racing interrupt already did this */
  600                 local_irq_save(flags);
  601                 pagevec_move_tail(pvec);
  602                 local_irq_restore(flags);
  603         }
  604 
  605         pvec = &per_cpu(lru_deactivate_pvecs, cpu);
  606         if (pagevec_count(pvec))
  607                 pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
  608 
  609         activate_page_drain(cpu);
  610 }
  611 
  612 /**
  613  * deactivate_page - forcefully deactivate a page
  614  * @page: page to deactivate
  615  *
  616  * This function hints the VM that @page is a good reclaim candidate,
  617  * for example if its invalidation fails due to the page being dirty
  618  * or under writeback.
  619  */
  620 void deactivate_page(struct page *page)
  621 {
  622         /*
  623          * In a workload with many unevictable page such as mprotect, unevictable
  624          * page deactivation for accelerating reclaim is pointless.
  625          */
  626         if (PageUnevictable(page))
  627                 return;
  628 
  629         if (likely(get_page_unless_zero(page))) {
  630                 struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
  631 
  632                 if (!pagevec_add(pvec, page))
  633                         pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
  634                 put_cpu_var(lru_deactivate_pvecs);
  635         }
  636 }
  637 
  638 void lru_add_drain(void)
  639 {
  640         lru_add_drain_cpu(get_cpu());
  641         put_cpu();
  642 }
  643 
  644 static void lru_add_drain_per_cpu(struct work_struct *dummy)
  645 {
  646         lru_add_drain();
  647 }
  648 
  649 /*
  650  * Returns 0 for success
  651  */
  652 int lru_add_drain_all(void)
  653 {
  654         return schedule_on_each_cpu(lru_add_drain_per_cpu);
  655 }
  656 
  657 /*
  658  * Batched page_cache_release().  Decrement the reference count on all the
  659  * passed pages.  If it fell to zero then remove the page from the LRU and
  660  * free it.
  661  *
  662  * Avoid taking zone->lru_lock if possible, but if it is taken, retain it
  663  * for the remainder of the operation.
  664  *
  665  * The locking in this function is against shrink_inactive_list(): we recheck
  666  * the page count inside the lock to see whether shrink_inactive_list()
  667  * grabbed the page via the LRU.  If it did, give up: shrink_inactive_list()
  668  * will free it.
  669  */
  670 void release_pages(struct page **pages, int nr, int cold)
  671 {
  672         int i;
  673         LIST_HEAD(pages_to_free);
  674         struct zone *zone = NULL;
  675         struct lruvec *lruvec;
  676         unsigned long uninitialized_var(flags);
  677 
  678         for (i = 0; i < nr; i++) {
  679                 struct page *page = pages[i];
  680 
  681                 if (unlikely(PageCompound(page))) {
  682                         if (zone) {
  683                                 spin_unlock_irqrestore(&zone->lru_lock, flags);
  684                                 zone = NULL;
  685                         }
  686                         put_compound_page(page);
  687                         continue;
  688                 }
  689 
  690                 if (!put_page_testzero(page))
  691                         continue;
  692 
  693                 if (PageLRU(page)) {
  694                         struct zone *pagezone = page_zone(page);
  695 
  696                         if (pagezone != zone) {
  697                                 if (zone)
  698                                         spin_unlock_irqrestore(&zone->lru_lock,
  699                                                                         flags);
  700                                 zone = pagezone;
  701                                 spin_lock_irqsave(&zone->lru_lock, flags);
  702                         }
  703 
  704                         lruvec = mem_cgroup_page_lruvec(page, zone);
  705                         VM_BUG_ON(!PageLRU(page));
  706                         __ClearPageLRU(page);
  707                         del_page_from_lru_list(page, lruvec, page_off_lru(page));
  708                 }
  709 
  710                 list_add(&page->lru, &pages_to_free);
  711         }
  712         if (zone)
  713                 spin_unlock_irqrestore(&zone->lru_lock, flags);
  714 
  715         free_hot_cold_page_list(&pages_to_free, cold);
  716 }
  717 EXPORT_SYMBOL(release_pages);
  718 
  719 /*
  720  * The pages which we're about to release may be in the deferred lru-addition
  721  * queues.  That would prevent them from really being freed right now.  That's
  722  * OK from a correctness point of view but is inefficient - those pages may be
  723  * cache-warm and we want to give them back to the page allocator ASAP.
  724  *
  725  * So __pagevec_release() will drain those queues here.  __pagevec_lru_add()
  726  * and __pagevec_lru_add_active() call release_pages() directly to avoid
  727  * mutual recursion.
  728  */
  729 void __pagevec_release(struct pagevec *pvec)
  730 {
  731         lru_add_drain();
  732         release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
  733         pagevec_reinit(pvec);
  734 }
  735 EXPORT_SYMBOL(__pagevec_release);
  736 
  737 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  738 /* used by __split_huge_page_refcount() */
  739 void lru_add_page_tail(struct page *page, struct page *page_tail,
  740                        struct lruvec *lruvec)
  741 {
  742         int uninitialized_var(active);
  743         enum lru_list lru;
  744         const int file = 0;
  745 
  746         VM_BUG_ON(!PageHead(page));
  747         VM_BUG_ON(PageCompound(page_tail));
  748         VM_BUG_ON(PageLRU(page_tail));
  749         VM_BUG_ON(NR_CPUS != 1 &&
  750                   !spin_is_locked(&lruvec_zone(lruvec)->lru_lock));
  751 
  752         SetPageLRU(page_tail);
  753 
  754         if (page_evictable(page_tail)) {
  755                 if (PageActive(page)) {
  756                         SetPageActive(page_tail);
  757                         active = 1;
  758                         lru = LRU_ACTIVE_ANON;
  759                 } else {
  760                         active = 0;
  761                         lru = LRU_INACTIVE_ANON;
  762                 }
  763         } else {
  764                 SetPageUnevictable(page_tail);
  765                 lru = LRU_UNEVICTABLE;
  766         }
  767 
  768         if (likely(PageLRU(page)))
  769                 list_add_tail(&page_tail->lru, &page->lru);
  770         else {
  771                 struct list_head *list_head;
  772                 /*
  773                  * Head page has not yet been counted, as an hpage,
  774                  * so we must account for each subpage individually.
  775                  *
  776                  * Use the standard add function to put page_tail on the list,
  777                  * but then correct its position so they all end up in order.
  778                  */
  779                 add_page_to_lru_list(page_tail, lruvec, lru);
  780                 list_head = page_tail->lru.prev;
  781                 list_move_tail(&page_tail->lru, list_head);
  782         }
  783 
  784         if (!PageUnevictable(page))
  785                 update_page_reclaim_stat(lruvec, file, active);
  786 }
  787 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  788 
  789 static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec,
  790                                  void *arg)
  791 {
  792         enum lru_list lru = (enum lru_list)arg;
  793         int file = is_file_lru(lru);
  794         int active = is_active_lru(lru);
  795 
  796         VM_BUG_ON(PageActive(page));
  797         VM_BUG_ON(PageUnevictable(page));
  798         VM_BUG_ON(PageLRU(page));
  799 
  800         SetPageLRU(page);
  801         if (active)
  802                 SetPageActive(page);
  803         add_page_to_lru_list(page, lruvec, lru);
  804         update_page_reclaim_stat(lruvec, file, active);
  805 }
  806 
  807 /*
  808  * Add the passed pages to the LRU, then drop the caller's refcount
  809  * on them.  Reinitialises the caller's pagevec.
  810  */
  811 void __pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
  812 {
  813         VM_BUG_ON(is_unevictable_lru(lru));
  814 
  815         pagevec_lru_move_fn(pvec, __pagevec_lru_add_fn, (void *)lru);
  816 }
  817 EXPORT_SYMBOL(__pagevec_lru_add);
  818 
  819 /**
  820  * pagevec_lookup - gang pagecache lookup
  821  * @pvec:       Where the resulting pages are placed
  822  * @mapping:    The address_space to search
  823  * @start:      The starting page index
  824  * @nr_pages:   The maximum number of pages
  825  *
  826  * pagevec_lookup() will search for and return a group of up to @nr_pages pages
  827  * in the mapping.  The pages are placed in @pvec.  pagevec_lookup() takes a
  828  * reference against the pages in @pvec.
  829  *
  830  * The search returns a group of mapping-contiguous pages with ascending
  831  * indexes.  There may be holes in the indices due to not-present pages.
  832  *
  833  * pagevec_lookup() returns the number of pages which were found.
  834  */
  835 unsigned pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
  836                 pgoff_t start, unsigned nr_pages)
  837 {
  838         pvec->nr = find_get_pages(mapping, start, nr_pages, pvec->pages);
  839         return pagevec_count(pvec);
  840 }
  841 EXPORT_SYMBOL(pagevec_lookup);
  842 
  843 unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
  844                 pgoff_t *index, int tag, unsigned nr_pages)
  845 {
  846         pvec->nr = find_get_pages_tag(mapping, index, tag,
  847                                         nr_pages, pvec->pages);
  848         return pagevec_count(pvec);
  849 }
  850 EXPORT_SYMBOL(pagevec_lookup_tag);
  851 
  852 /*
  853  * Perform any setup for the swap system
  854  */
  855 void __init swap_setup(void)
  856 {
  857         unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT);
  858 
  859 #ifdef CONFIG_SWAP
  860         bdi_init(swapper_space.backing_dev_info);
  861 #endif
  862 
  863         /* Use a smaller cluster for small-memory machines */
  864         if (megs < 16)
  865                 page_cluster = 2;
  866         else
  867                 page_cluster = 3;
  868         /*
  869          * Right now other parts of the system means that we
  870          * _really_ don't want to cluster much more
  871          */
  872 }
Cache object: c139b4e4cd17a26afbd5ee8d520c0454
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/mm/swap.c

FreeBSD/Linux Kernel Cross Reference
sys/mm/swap.c